diff options
Diffstat (limited to 'src/plugins/plain/PlainFormatStreamReader.hpp')
-rw-r--r-- | src/plugins/plain/PlainFormatStreamReader.hpp | 347 |
1 files changed, 0 insertions, 347 deletions
diff --git a/src/plugins/plain/PlainFormatStreamReader.hpp b/src/plugins/plain/PlainFormatStreamReader.hpp deleted file mode 100644 index 2ee261c..0000000 --- a/src/plugins/plain/PlainFormatStreamReader.hpp +++ /dev/null @@ -1,347 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file PlainFormatStreamReader.hpp - * - * Provides classes for low-level classes for reading the plain TeX-esque - * format. The class provided here do not build any model objects and does not - * implement the Parser interfaces. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_ -#define _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_ - -#include <stack> - -#include <core/common/Variant.hpp> - -#include "DynamicTokenizer.hpp" - -namespace ousia { - -// Forward declarations -class CharReader; -class Logger; -class DataHandler; - -/** - * The PlainFormatStreamReader class provides a low-level reader for the plain - * TeX-esque format. The parser is constructed around a "parse" function, which - * reads data from the underlying CharReader until a new state is reached and - * indicates this state in a return value. The calling code then has to pull - * corresponding data from the stream reader. The reader already handles some - * invalid cases, but recovers from most errors and happily continues parsing. - */ -class PlainFormatStreamReader { -public: - /** - * Enum used to indicate which state the PlainFormatStreamReader class is in - * after calling the "parse" function. - */ - enum class State { - /** - * State returned if a fully featured command has been read. A command - * consists of the command name and its arguments (which optionally - * includes the name). - */ - COMMAND, - - /** - * State returned if data is given. The reader must decide which field - * or command this should be routed to. Trailing or leading whitespace - * has been removed. Only called if the data is non-empty. - */ - DATA, - - /** - * A user-defined entity has been found. The entity sequence is stored - * in the command name. - */ - ENTITY, - - /** - * State returned if an annotation was started. An annotation consists - * of the command name and its arguments (which optionally include the - * name). - */ - ANNOTATION_START, - - /** - * State returned if an annotation ends. The reader indicates which - * annotation ends. - */ - ANNOTATION_END, - - /** - * State returned if a new field started. The reader assures that the - * current field ends before a new field is started and that the field - * is not started if data has been given outside of a field. The - * field number is set to the current field index. - */ - FIELD_START, - - /** - * State returned if the current field ends. The reader assures that a - * field was actually open. - */ - FIELD_END, - - /** - * The end of the stream has been reached. - */ - END, - - /** - * Returned from internal functions if nothing should be done. - */ - NONE, - - /** - * Returned from internal function to indicate irrecoverable errors. - */ - ERROR - }; - - /** - * Entry used for the command stack. - */ - struct Command { - /** - * Name and location of the current command. - */ - Variant name; - - /** - * Arguments that were passed to the command. - */ - Variant arguments; - - /** - * Set to true if this is a command with clear begin and end. - */ - bool hasRange; - - /** - * Set to true if we are currently inside a field of this command. - */ - bool inField; - - /** - * Set to true if we are currently in the range field of the command - * (implies inField being set to true). - */ - bool inRangeField; - - /** - * Default constructor. - */ - Command() : hasRange(false), inField(false), inRangeField(false) {} - - /** - * Constructor of the Command class. - * - * @param name is a string variant with name and location of the - * command. - * @param arguments is a map variant with the arguments given to the - * command. - * @param hasRange should be set to true if this is a command with - * explicit range. - * @param inField is set to true if we currently are inside a field - * of this command. - * @param inRangeField is set to true if we currently inside the outer - * field of the command. - */ - Command(Variant name, Variant arguments, bool hasRange, - bool inField, bool inRangeField) - : name(std::move(name)), - arguments(std::move(arguments)), - hasRange(hasRange), - inField(inField), - inRangeField(inRangeField) - { - } - }; - -private: - /** - * Reference to the CharReader instance from which the incomming bytes are - * read. - */ - CharReader &reader; - - /** - * Reference at the logger instance to which all error messages are sent. - */ - Logger &logger; - - /** - * Tokenizer instance used to read individual tokens from the text. - */ - DynamicTokenizer tokenizer; - - /** - * Stack containing the current commands. - */ - std::stack<Command> commands; - - /** - * Variant containing the data that has been read (always is a string, - * contains the exact location of the data in the source file). - */ - Variant data; - - /** - * Contains the location of the last token. - */ - SourceLocation location; - - /** - * Contains the field index of the current command. - */ - size_t fieldIdx; - - /** - * Function used internall to parse an identifier. - * - * @param start is the start byte offset of the identifier (including the - * backslash). - * @param allowNSSep should be set to true if the namespace separator is - * allowed in the identifier name. Issues error if the namespace separator - * is placed incorrectly. - */ - Variant parseIdentifier(size_t start, bool allowNSSep = false); - - /** - * Function used internally to handle the special "\begin" command. - */ - State parseBeginCommand(); - - /** - * Function used internally to handle the special "\end" command. - */ - State parseEndCommand(); - - /** - * Pushes the parsed command onto the command stack. - */ - void pushCommand(Variant commandName, Variant commandArguments, bool hasRange); - - /** - * Parses the command arguments. - */ - Variant parseCommandArguments(Variant commandArgName); - - /** - * Function used internally to parse a command. - * - * @param start is the start byte offset of the command (including the - * backslash) - * @return true if a command was actuall parsed, false otherwise. - */ - State parseCommand(size_t start); - - /** - * Function used internally to parse a block comment. - */ - void parseBlockComment(); - - /** - * Function used internally to parse a generic comment. - */ - void parseLineComment(); - - /** - * Checks whether there is any data pending to be issued, if yes, issues it. - * - * @param handler is the data handler that contains the data that may be - * returned to the user. - * @return true if there was any data and DATA should be returned by the - * parse function, false otherwise. - */ - bool checkIssueData(DataHandler &handler); - - /** - * Called before any data is appended to the internal data handler. Checks - * whether a new field should be started or implicitly ended. - * - * @return true if FIELD_START should be returned by the parse function. - */ - bool checkIssueFieldStart(); - -public: - /** - * Constructor of the PlainFormatStreamReader class. Attaches the new - * PlainFormatStreamReader to the given CharReader and Logger instances. - * - * @param reader is the reader instance from which incomming characters - * should be read. - * @param logger is the logger instance to which errors should be written. - */ - PlainFormatStreamReader(CharReader &reader, Logger &logger); - - /** - * Continues parsing. Returns one of the states defined in the State enum. - * Callers should stop once the State::END state is reached. Use the getter - * functions to get more information about the current state, such as the - * command name or the data or the current field index. - * - * @return the new state the parser has reached. - */ - State parse(); - - /** - * Returns a reference at the internally stored data. Only valid if - * State::DATA was returned by the "parse" function. - * - * @return a reference at a variant containing the data parsed by the - * "parse" function. - */ - const Variant &getData() { return data; } - - /** - * Returns a reference at the internally stored command name. Only valid if - * State::COMMAND was returned by the "parse" function. - * - * @return a reference at a variant containing name and location of the - * parsed command. - */ - const Variant &getCommandName(); - - /** - * Returns a reference at the internally stored command name. Only valid if - * State::COMMAND was returned by the "parse" function. - * - * @return a reference at a variant containing arguments given to the - * command. - */ - const Variant &getCommandArguments(); - - /** - * Returns a reference at the char reader. - * - * @return the last internal token location. - */ - SourceLocation &getLocation() {return location;} -}; -} - -#endif /* _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_ */ - |