/*
Ousía
Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/**
* @file OsmlStreamParser.hpp
*
* Provides classes for low-level classes for reading the TeX-esque osml
* format. The class provided here does not build any model objects and does not
* implement the Parser interface.
*
* @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
*/
#ifndef _OUSIA_OSML_STREAM_PARSER_HPP_
#define _OUSIA_OSML_STREAM_PARSER_HPP_
#include
#include
#include
namespace ousia {
// Forward declarations
class CharReader;
class Logger;
class OsmlStreamParserImpl;
class TokenizedData;
class Variant;
/**
* The OsmlStreamParser class provides a low-level reader for the TeX-esque osml
* format. The parser is constructed around a "parse" function, which reads data
* from the underlying CharReader until a new state is reached and indicates
* this state in a return value. The calling code then has to pull corresponding
* data from the stream reader. The reader makes sure the incomming stream is
* syntactically valid and tries to recorver from most errors. If an error is
* irrecoverable (this is the case for errors with wrong nesting of commands or
* fields, as this would lead to too many consecutive errors) a
* LoggableException is thrown. In short, the OsmlStreamParser can be described
* as a SAX parser for OSML.
*/
class OsmlStreamParser: public parser_stack::ParserCallbacks {
public:
/**
* Enum used to indicate which state the OsmlStreamParser class is in
* after calling the "parse" function.
*/
enum class State : uint8_t {
/**
* State returned if the start of a command has been read. Use the
* getCommandName(), getCommandArguments() and inRangeCommand()
* functions the retrieve more information about the command that was
* just started.
*/
COMMAND_START = 0,
/**
* State returned if a range command or range annotation has just ended.
* This state is not returned for non-range commands (as the actual end
* of a command is context dependent).
*/
RANGE_END = 1,
/**
* State returned if a new field started. The reader assures that the
* current field ends before a new field is started and that the field
* is not started if data has been given outside of a field. The
* field number is set to the current field index.
*/
FIELD_START = 2,
/**
* State returned if the current field ends. The reader assures that a
* field was actually open.
*/
FIELD_END = 3,
/**
* State returned if an annotation was started. An annotation consists
* of the command name and its arguments (which optionally include the
* name).
*/
ANNOTATION_START = 4,
/**
* State returned if an annotation ends. The reader indicates which
* annotation ends.
*/
ANNOTATION_END = 5,
/**
* State returned if data is given. The reader must decide which field
* or command this should be routed to. Trailing or leading whitespace
* has been removed. Only called if the data is non-empty.
*/
DATA = 6,
/**
* The end of the stream has been reached.
*/
END = 7
};
private:
/**
* Pointer at the class containing the internal implementation (according
* to the PIMPL idiom).
*/
std::unique_ptr impl;
public:
/**
* Constructor of the OsmlStreamParser class. Attaches the new
* OsmlStreamParser to the given CharReader and Logger instances.
*
* @param reader is the reader instance from which incomming characters
* should be read.
* @param logger is the logger instance to which errors should be written.
*/
OsmlStreamParser(CharReader &reader, Logger &logger);
/**
* Destructor of the OsmlStreamParser, needed to destroy the incomplete
* OsmlStreamParserImpl.
*/
~OsmlStreamParser();
/**
* Continues parsing. Returns one of the states defined in the State enum.
* Callers should stop once the State::END state is reached. Use the getter
* functions to get more information about the current state, such as the
* command name or the data or the current field index.
*
* @return the new state the parser has reached.
*/
State parse();
/**
* Returns a reference at the internally stored command name. Only valid if
* State::COMMAND_START, State::ANNOTATION_START or State::ANNOTATION_END
* was returned by the "parse" function.
*
* @return a reference at a variant containing name and location of the
* parsed command.
*/
const Variant &getCommandName() const;
/**
* Returns a reference at the internally stored command name. Only valid if
* State::COMMAND_START, State::ANNOTATION_START or State::ANNOTATION_END
* was returned by the "parse" function.
*
* @return a reference at a variant containing arguments given to the
* command.
*/
const Variant &getCommandArguments() const;
/**
* Returns a reference at the internally stored data. Only valid if
* State::DATA was returned by the "parse" function.
*
* @return a reference at a variant containing the data parsed by the
* "parse" function.
*/
const TokenizedData &getData() const;
/**
* Returns the location of the current token.
*/
const SourceLocation &getLocation() const;
/**
* Returns true if the currently started command is a range command, only
* valid if State::COMMAND_START or State::ANNOTATION_START was returned by
* the "parse" function.
*
* @return true if the command is started is a range command, false
* otherwise.
*/
bool inRangeCommand() const;
/**
* Returns true if the current field is the "default" field. This is true if
* the parser either is in the outer range of a range command or inside a
* field that has been especially marked as "default" field (using the "{!"
* syntax). Only valid if State::FIELD_START was returned by the "parse"
* function.
*
* @return true if the current field was marked as default field (using the
* "{!" syntax).
*/
bool inDefaultField() const;
TokenId registerToken(const std::string &token) override;
void unregisterToken(TokenId token) override;
};
}
#endif /* _OUSIA_OSML_STREAM_PARSER_HPP_ */