summaryrefslogtreecommitdiff
path: root/src/plugins/plain/PlainFormatStreamReader.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/plain/PlainFormatStreamReader.hpp')
-rw-r--r--src/plugins/plain/PlainFormatStreamReader.hpp347
1 files changed, 0 insertions, 347 deletions
diff --git a/src/plugins/plain/PlainFormatStreamReader.hpp b/src/plugins/plain/PlainFormatStreamReader.hpp
deleted file mode 100644
index 2ee261c..0000000
--- a/src/plugins/plain/PlainFormatStreamReader.hpp
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file PlainFormatStreamReader.hpp
- *
- * Provides classes for low-level classes for reading the plain TeX-esque
- * format. The class provided here do not build any model objects and does not
- * implement the Parser interfaces.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_
-#define _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_
-
-#include <stack>
-
-#include <core/common/Variant.hpp>
-
-#include "DynamicTokenizer.hpp"
-
-namespace ousia {
-
-// Forward declarations
-class CharReader;
-class Logger;
-class DataHandler;
-
-/**
- * The PlainFormatStreamReader class provides a low-level reader for the plain
- * TeX-esque format. The parser is constructed around a "parse" function, which
- * reads data from the underlying CharReader until a new state is reached and
- * indicates this state in a return value. The calling code then has to pull
- * corresponding data from the stream reader. The reader already handles some
- * invalid cases, but recovers from most errors and happily continues parsing.
- */
-class PlainFormatStreamReader {
-public:
- /**
- * Enum used to indicate which state the PlainFormatStreamReader class is in
- * after calling the "parse" function.
- */
- enum class State {
- /**
- * State returned if a fully featured command has been read. A command
- * consists of the command name and its arguments (which optionally
- * includes the name).
- */
- COMMAND,
-
- /**
- * State returned if data is given. The reader must decide which field
- * or command this should be routed to. Trailing or leading whitespace
- * has been removed. Only called if the data is non-empty.
- */
- DATA,
-
- /**
- * A user-defined entity has been found. The entity sequence is stored
- * in the command name.
- */
- ENTITY,
-
- /**
- * State returned if an annotation was started. An annotation consists
- * of the command name and its arguments (which optionally include the
- * name).
- */
- ANNOTATION_START,
-
- /**
- * State returned if an annotation ends. The reader indicates which
- * annotation ends.
- */
- ANNOTATION_END,
-
- /**
- * State returned if a new field started. The reader assures that the
- * current field ends before a new field is started and that the field
- * is not started if data has been given outside of a field. The
- * field number is set to the current field index.
- */
- FIELD_START,
-
- /**
- * State returned if the current field ends. The reader assures that a
- * field was actually open.
- */
- FIELD_END,
-
- /**
- * The end of the stream has been reached.
- */
- END,
-
- /**
- * Returned from internal functions if nothing should be done.
- */
- NONE,
-
- /**
- * Returned from internal function to indicate irrecoverable errors.
- */
- ERROR
- };
-
- /**
- * Entry used for the command stack.
- */
- struct Command {
- /**
- * Name and location of the current command.
- */
- Variant name;
-
- /**
- * Arguments that were passed to the command.
- */
- Variant arguments;
-
- /**
- * Set to true if this is a command with clear begin and end.
- */
- bool hasRange;
-
- /**
- * Set to true if we are currently inside a field of this command.
- */
- bool inField;
-
- /**
- * Set to true if we are currently in the range field of the command
- * (implies inField being set to true).
- */
- bool inRangeField;
-
- /**
- * Default constructor.
- */
- Command() : hasRange(false), inField(false), inRangeField(false) {}
-
- /**
- * Constructor of the Command class.
- *
- * @param name is a string variant with name and location of the
- * command.
- * @param arguments is a map variant with the arguments given to the
- * command.
- * @param hasRange should be set to true if this is a command with
- * explicit range.
- * @param inField is set to true if we currently are inside a field
- * of this command.
- * @param inRangeField is set to true if we currently inside the outer
- * field of the command.
- */
- Command(Variant name, Variant arguments, bool hasRange,
- bool inField, bool inRangeField)
- : name(std::move(name)),
- arguments(std::move(arguments)),
- hasRange(hasRange),
- inField(inField),
- inRangeField(inRangeField)
- {
- }
- };
-
-private:
- /**
- * Reference to the CharReader instance from which the incomming bytes are
- * read.
- */
- CharReader &reader;
-
- /**
- * Reference at the logger instance to which all error messages are sent.
- */
- Logger &logger;
-
- /**
- * Tokenizer instance used to read individual tokens from the text.
- */
- DynamicTokenizer tokenizer;
-
- /**
- * Stack containing the current commands.
- */
- std::stack<Command> commands;
-
- /**
- * Variant containing the data that has been read (always is a string,
- * contains the exact location of the data in the source file).
- */
- Variant data;
-
- /**
- * Contains the location of the last token.
- */
- SourceLocation location;
-
- /**
- * Contains the field index of the current command.
- */
- size_t fieldIdx;
-
- /**
- * Function used internall to parse an identifier.
- *
- * @param start is the start byte offset of the identifier (including the
- * backslash).
- * @param allowNSSep should be set to true if the namespace separator is
- * allowed in the identifier name. Issues error if the namespace separator
- * is placed incorrectly.
- */
- Variant parseIdentifier(size_t start, bool allowNSSep = false);
-
- /**
- * Function used internally to handle the special "\begin" command.
- */
- State parseBeginCommand();
-
- /**
- * Function used internally to handle the special "\end" command.
- */
- State parseEndCommand();
-
- /**
- * Pushes the parsed command onto the command stack.
- */
- void pushCommand(Variant commandName, Variant commandArguments, bool hasRange);
-
- /**
- * Parses the command arguments.
- */
- Variant parseCommandArguments(Variant commandArgName);
-
- /**
- * Function used internally to parse a command.
- *
- * @param start is the start byte offset of the command (including the
- * backslash)
- * @return true if a command was actuall parsed, false otherwise.
- */
- State parseCommand(size_t start);
-
- /**
- * Function used internally to parse a block comment.
- */
- void parseBlockComment();
-
- /**
- * Function used internally to parse a generic comment.
- */
- void parseLineComment();
-
- /**
- * Checks whether there is any data pending to be issued, if yes, issues it.
- *
- * @param handler is the data handler that contains the data that may be
- * returned to the user.
- * @return true if there was any data and DATA should be returned by the
- * parse function, false otherwise.
- */
- bool checkIssueData(DataHandler &handler);
-
- /**
- * Called before any data is appended to the internal data handler. Checks
- * whether a new field should be started or implicitly ended.
- *
- * @return true if FIELD_START should be returned by the parse function.
- */
- bool checkIssueFieldStart();
-
-public:
- /**
- * Constructor of the PlainFormatStreamReader class. Attaches the new
- * PlainFormatStreamReader to the given CharReader and Logger instances.
- *
- * @param reader is the reader instance from which incomming characters
- * should be read.
- * @param logger is the logger instance to which errors should be written.
- */
- PlainFormatStreamReader(CharReader &reader, Logger &logger);
-
- /**
- * Continues parsing. Returns one of the states defined in the State enum.
- * Callers should stop once the State::END state is reached. Use the getter
- * functions to get more information about the current state, such as the
- * command name or the data or the current field index.
- *
- * @return the new state the parser has reached.
- */
- State parse();
-
- /**
- * Returns a reference at the internally stored data. Only valid if
- * State::DATA was returned by the "parse" function.
- *
- * @return a reference at a variant containing the data parsed by the
- * "parse" function.
- */
- const Variant &getData() { return data; }
-
- /**
- * Returns a reference at the internally stored command name. Only valid if
- * State::COMMAND was returned by the "parse" function.
- *
- * @return a reference at a variant containing name and location of the
- * parsed command.
- */
- const Variant &getCommandName();
-
- /**
- * Returns a reference at the internally stored command name. Only valid if
- * State::COMMAND was returned by the "parse" function.
- *
- * @return a reference at a variant containing arguments given to the
- * command.
- */
- const Variant &getCommandArguments();
-
- /**
- * Returns a reference at the char reader.
- *
- * @return the last internal token location.
- */
- SourceLocation &getLocation() {return location;}
-};
-}
-
-#endif /* _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_ */
-