diff options
Diffstat (limited to 'src/formats/osml/OsmlStreamParser.hpp')
-rw-r--r-- | src/formats/osml/OsmlStreamParser.hpp | 85 |
1 files changed, 37 insertions, 48 deletions
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp index dc3034c..453a2bb 100644 --- a/src/formats/osml/OsmlStreamParser.hpp +++ b/src/formats/osml/OsmlStreamParser.hpp @@ -29,17 +29,19 @@ #ifndef _OUSIA_OSML_STREAM_PARSER_HPP_ #define _OUSIA_OSML_STREAM_PARSER_HPP_ -#include <stack> +#include <memory> #include <core/common/Variant.hpp> +#include <core/common/Whitespace.hpp> #include <core/parser/utils/Tokenizer.hpp> +#include <core/parser/utils/TokenizedData.hpp> namespace ousia { // Forward declarations class CharReader; class Logger; -class DataHandler; +class OsmlStreamParserImpl; /** * The OsmlStreamParser class provides a low-level reader for the TeX-esque osml @@ -137,26 +139,15 @@ public: Variant arguments; /** - * Set to true if this is a command with clear begin and end. - */ - bool hasRange : 1; - - /** - * Set to true if we are currently inside a field of this command. - */ - bool inField : 1; - - /** - * Set to true if we are currently in the range field of the command - * (implies inField being set to true). + * Vector used as stack for holding the number of opening/closing braces + * and the corresponding "isDefaultField" flag. */ - bool inRangeField : 1; + std::vector<bool> fields; /** - * Set to true if we are currently in a field that has been especially - * marked as default field (using the "|") syntax. + * Set to true if this is a command with clear begin and end. */ - bool inDefaultField : 1; + bool hasRange; /** * Default constructor. @@ -164,7 +155,6 @@ public: Command() : hasRange(false), inField(false), - inRangeField(false), inDefaultField() { } @@ -178,15 +168,10 @@ public: * command. * @param hasRange should be set to true if this is a command with * explicit range. - * @param inField is set to true if we currently are inside a field - * of this command. - * @param inRangeField is set to true if we currently are inside the - * outer field of a ranged command. * @param inDefaultField is set to true if we currently are in a * specially marked default field. */ - Command(Variant name, Variant arguments, bool hasRange, - bool inField, bool inRangeField, bool inDefaultField) + Command(Variant name, Variant arguments, bool hasRange) : name(std::move(name)), arguments(std::move(arguments)), hasRange(hasRange), @@ -215,25 +200,20 @@ private: Tokenizer tokenizer; /** - * Stack containing the current commands. - */ - std::stack<Command> commands; - - /** - * Variant containing the data that has been read (always is a string, - * contains the exact location of the data in the source file). + * Variant containing the tokenized data that was returned from the + * tokenizer as data. */ - Variant data; + TokenizedData data; /** - * Contains the location of the last token. + * Stack containing the current commands. */ - SourceLocation location; + std::stack<Command> commands; /** - * Contains the field index of the current command. + * Pointer at */ - size_t fieldIdx; + std::unique_ptr<OsmlStreamParserImpl> impl; /** * Function used internall to parse an identifier. @@ -291,12 +271,10 @@ private: /** * Checks whether there is any data pending to be issued, if yes, issues it. * - * @param handler is the data handler that contains the data that may be - * returned to the user. * @return true if there was any data and DATA should be returned by the * parse function, false otherwise. */ - bool checkIssueData(DataHandler &handler); + bool checkIssueData(); /** * Called before any data is appended to the internal data handler. Checks @@ -328,6 +306,12 @@ public: OsmlStreamParser(CharReader &reader, Logger &logger); /** + * Destructor of the OsmlStreamParser, needed to destroy the incomplete + * OsmlStreamParserImpl. + */ + ~OsmlStreamParser(); + + /** * Continues parsing. Returns one of the states defined in the State enum. * Callers should stop once the State::END state is reached. Use the getter * functions to get more information about the current state, such as the @@ -344,7 +328,19 @@ public: * @return a reference at a variant containing the data parsed by the * "parse" function. */ - const Variant &getData() const { return data; } + const TokenizedData &getData() const { return data; } + + /** + * Returns the complete content of the internal TokenizedData instance as + * a single string Variant. This method is mainly used in the unit tests for + * this class, it simply calls the text() method of TokenizedData. + * + * @param mode is the WhitespaceMode that should be used for returning the + * text. + * @return a string variant containing the text content of the internal + * TokenizedData instance or a nullptr variant if there is no text. + */ + Variant getText(WhitespaceMode mode = WhitespaceMode::COLLAPSE); /** * Returns a reference at the internally stored command name. Only valid if @@ -371,13 +367,6 @@ public: * syntax). */ bool inDefaultField() const; - - /** - * Returns a reference at the char reader. - * - * @return the last internal token location. - */ - const SourceLocation &getLocation() const { return location; } }; } |