summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-27 18:52:43 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-27 18:52:43 +0100
commit12e10d18810b7ea4ce142d76e846b4faf0c33488 (patch)
treee0e5fb04d0e24033f5c884f7821866ffde5d0fba
parent19dd5946125e90dcbd61966896c9f6cfc4451d80 (diff)
Made OsmlStreamParser ready for user defined tokens, started to adapt unit tests.
-rw-r--r--CMakeLists.txt32
-rw-r--r--src/formats/osml/OsmlStreamParser.cpp701
-rw-r--r--src/formats/osml/OsmlStreamParser.hpp298
-rw-r--r--test/formats/osml/OsmlStreamParserTest.cpp1542
4 files changed, 1355 insertions, 1218 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 75909e9..4e2d7f7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -212,14 +212,14 @@ ADD_LIBRARY(ousia_core
# ousia_core
#)
-#ADD_LIBRARY(ousia_osml
+ADD_LIBRARY(ousia_osml
# src/formats/osml/OsmlParser
-# src/formats/osml/OsmlStreamParser
-#)
+ src/formats/osml/OsmlStreamParser
+)
-#TARGET_LINK_LIBRARIES(ousia_osml
-# ousia_core
-#)
+TARGET_LINK_LIBRARIES(ousia_osml
+ ousia_core
+)
ADD_LIBRARY(ousia_osxml
src/formats/osxml/OsxmlAttributeLocator
@@ -383,17 +383,17 @@ IF(TEST)
# ousia_mozjs
# )
-# ADD_EXECUTABLE(ousia_test_osml
+ ADD_EXECUTABLE(ousia_test_osml
# test/formats/osml/OsmlParserTest
-# test/formats/osml/OsmlStreamParserTest
-# )
+ test/formats/osml/OsmlStreamParserTest
+ )
-# TARGET_LINK_LIBRARIES(ousia_test_osml
-# ${GTEST_LIBRARIES}
-# ousia_core
-# ousia_osml
-# ousia_filesystem
-# )
+ TARGET_LINK_LIBRARIES(ousia_test_osml
+ ${GTEST_LIBRARIES}
+ ousia_core
+ ousia_osml
+ ousia_filesystem
+ )
# ADD_EXECUTABLE(ousia_test_osxml
# test/formats/osxml/OsxmlEventParserTest
@@ -423,7 +423,7 @@ IF(TEST)
ADD_TEST(ousia_test_filesystem ousia_test_filesystem)
ADD_TEST(ousia_test_html ousia_test_html)
# ADD_TEST(ousia_test_mozjs ousia_test_mozjs)
-# ADD_TEST(ousia_test_osml ousia_test_osml)
+ ADD_TEST(ousia_test_osml ousia_test_osml)
# ADD_TEST(ousia_test_osxml ousia_test_osxml)
ADD_TEST(ousia_test_xml ousia_test_xml)
ENDIF()
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index d4cdbf8..7e01a3c 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -19,92 +19,411 @@
#include <core/common/CharReader.hpp>
#include <core/common/Logger.hpp>
#include <core/common/Utils.hpp>
+#include <core/common/Variant.hpp>
#include <core/common/VariantReader.hpp>
+#include <core/parser/utils/Tokenizer.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
+
#include "OsmlStreamParser.hpp"
+#include <stack>
+#include <vector>
+
namespace ousia {
+namespace {
/**
- * Plain format default tokenizer.
+ * Osml format default tokenizer. Registers the primary tokens in its
+ * constructor. A single, static instance of this class is created as
+ * "OsmlTokens", which is copied to the Tokenizer instance of
+ * OsmlStreamParserImpl.
*/
-class PlainFormatTokens : public Tokenizer {
+class OsmlFormatTokens : public Tokenizer {
public:
+ TokenId Backslash;
+ TokenId LineComment;
+ TokenId BlockCommentStart;
+ TokenId BlockCommentEnd;
+ TokenId FieldStart;
+ TokenId FieldEnd;
+ TokenId DefaultFieldStart;
+ TokenId AnnotationStart;
+ TokenId AnnotationEnd;
+
/**
- * Id of the backslash token.
+ * Registers the plain format tokens in the internal tokenizer.
*/
- TokenId Backslash;
+ OsmlFormatTokens()
+ {
+ Backslash = registerToken("\\");
+ LineComment = registerToken("%");
+ BlockCommentStart = registerToken("%{");
+ BlockCommentEnd = registerToken("}%");
+ FieldStart = registerToken("{");
+ FieldEnd = registerToken("}");
+ DefaultFieldStart = registerToken("{!");
+ AnnotationStart = registerToken("<\\");
+ AnnotationEnd = registerToken("\\>");
+ }
+};
+
+/**
+ * Instance of OsmlFormatTokens used to initialize the internal tokenizer
+ * instance of OsmlStreamParserImpl.
+ */
+static const OsmlFormatTokens OsmlTokens;
+/**
+ * Structure representing a field.
+ */
+struct Field {
/**
- * Id of the line comment token.
+ * Specifies whether this field was marked as default field.
*/
- TokenId LineComment;
+ bool defaultField;
/**
- * Id of the block comment start token.
+ * Location at which the field was started.
*/
- TokenId BlockCommentStart;
+ SourceLocation location;
/**
- * Id of the block comment end token.
+ * Constructor of the Field structure, initializes all member variables with
+ * the given values.
+ *
+ * @param defaultField is a flag specifying whether this field is a default
+ * field.
+ * @param location specifies the location at which the field was started.
*/
- TokenId BlockCommentEnd;
+ Field(bool defaultField = false,
+ const SourceLocation &location = SourceLocation{})
+ : defaultField(defaultField), location(location)
+ {
+ }
+};
+/**
+ * Entry used for the command stack.
+ */
+class Command {
+private:
/**
- * Id of the field start token.
+ * Name and location of the current command.
*/
- TokenId FieldStart;
+ Variant name;
/**
- * Id of the field end token.
+ * Arguments that were passed to the command.
*/
- TokenId FieldEnd;
+ Variant arguments;
/**
- * Id of the default field start token.
+ * Vector used as stack for holding the number of opening/closing braces
+ * and the corresponding "isDefaultField" flag.
*/
- TokenId DefaultFieldStart;
+ std::vector<Field> fields;
/**
- * Id of the annotation start token.
+ * Set to true if this is a command with clear begin and end.
*/
- TokenId AnnotationStart;
+ bool hasRange;
+public:
/**
- * Id of the annotation end token.
+ * Default constructor, marks this command as normal, non-range command.
*/
- TokenId AnnotationEnd;
+ Command() : hasRange(false) {}
/**
- * Registers the plain format tokens in the internal tokenizer.
+ * Constructor of the Command class.
+ *
+ * @param name is a string variant with name and location of the
+ * command.
+ * @param arguments is a map variant with the arguments given to the
+ * command.
+ * @param hasRange should be set to true if this is a command with
+ * explicit range.
*/
- PlainFormatTokens()
+ Command(Variant name, Variant arguments, bool hasRange)
+ : name(std::move(name)),
+ arguments(std::move(arguments)),
+ hasRange(hasRange)
{
- Backslash = registerToken("\\");
- LineComment = registerToken("%");
- BlockCommentStart = registerToken("%{");
- BlockCommentEnd = registerToken("}%");
- FieldStart = registerToken("{");
- FieldEnd = registerToken("}");
- DefaultFieldStart = registerToken("{!");
- AnnotationStart = registerToken("<\\");
- AnnotationEnd = registerToken("\\>");
+ }
+
+ /**
+ * Returns a reference at the variant representing name and location of the
+ * command.
+ *
+ * @return a variant containing name and location of the command.
+ */
+ const Variant &getName() const { return name; }
+
+ /**
+ * Returns a reference at the variant containing name, value and location of
+ * the arguments.
+ *
+ * @return the arguments stored for the command.
+ */
+ const Variant &getArguments() const { return arguments; }
+
+ /**
+ * Returns a reference at the internal field list. This list should be used
+ * for printing error messages when fields are still open although the outer
+ * range field closes.
+ *
+ * @return a const reference at the internal field vector.
+ */
+ const std::vector<Field> &getFields() const { return fields; }
+
+ /**
+ * Returns true if this command is currently in a default field.
+ *
+ * @return true if the current field on the field stack was explicitly
+ * marked as default field. If the field stack is empty, true is returned
+ * if this is a range command.
+ */
+ bool inDefaultField() const
+ {
+ return (!fields.empty() && fields.back().defaultField) ||
+ (fields.empty() && hasRange);
+ }
+
+ /**
+ * Returns true if this command currently is in any field.
+ *
+ * @return true if a field is on the stack or this is a range commands.
+ * Range commands always are in a field.
+ */
+ bool inField() const { return !fields.empty() || hasRange; }
+
+ /**
+ * Returns true if this command currently is in a range field.
+ *
+ * @return true if the command has a range and no other ranges are on the
+ * stack.
+ */
+ bool inRangeField() const { return fields.empty() && hasRange; }
+
+ /**
+ * Returns true if this command currently is in a non-range field.
+ *
+ * @return true if the command is in a field, but the field is not the field
+ * constructed by the "range"
+ */
+ bool inNonRangeField() const { return !fields.empty(); }
+
+ /**
+ * Pushes another field onto the field stack of this command.
+ *
+ * @param defaultField if true, explicitly marks this field as default
+ * field.
+ * @param location is the source location at which the field was started.
+ * Used for error messages in which the user is notified about an error with
+ * too few closing fields.
+ */
+ void pushField(bool defaultField = false,
+ const SourceLocation &location = SourceLocation{})
+ {
+ fields.emplace_back(defaultField, location);
+ }
+
+ /**
+ * Removes another field from the field stack of this command, returns true
+ * if the operation was successful.
+ *
+ * @return true if there was a field to pop on the stack, false otherwise.
+ */
+ bool popField()
+ {
+ if (!fields.empty()) {
+ fields.pop_back();
+ return true;
+ }
+ return false;
}
};
+}
-static const PlainFormatTokens OsmlTokens;
+/* Class OsmlStreamParserImpl */
-OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
- : reader(reader),
- logger(logger),
- tokenizer(OsmlTokens),
- data(reader.getSourceId())
+/**
+ * Internal implementation of OsmlStreamParser.
+ */
+class OsmlStreamParserImpl {
+public:
+ /**
+ * State enum compatible with OsmlStreamParserState but extended by two more
+ * entries (END and NONE).
+ */
+ enum class State : uint8_t {
+ COMMAND_START = 0,
+ COMMAND_END = 1,
+ FIELD_START = 2,
+ FIELD_END = 3,
+ ANNOTATION_START = 4,
+ ANNOTATION_END = 5,
+ DATA = 6,
+ END = 7,
+ RECOVERABLE_ERROR = 8,
+ IRRECOVERABLE_ERROR = 9
+ };
+
+private:
+ /**
+ * Reference to the CharReader instance from which the incomming bytes are
+ * read.
+ */
+ CharReader &reader;
+
+ /**
+ * Reference at the logger instance to which all error messages are sent.
+ */
+ Logger &logger;
+
+ /**
+ * Tokenizer instance used to read individual tokens from the text.
+ */
+ Tokenizer tokenizer;
+
+ /**
+ * Stack containing the current commands.
+ */
+ std::stack<Command> commands;
+
+ /**
+ * Variant containing the tokenized data that was returned from the
+ * tokenizer as data.
+ */
+ TokenizedData data;
+
+ /**
+ * Variable containing the current location of the parser.
+ */
+ SourceLocation location;
+
+ /**
+ * Function used internally to parse an identifier.
+ *
+ * @param start is the start byte offset of the identifier (including the
+ * backslash).
+ * @param allowNSSep should be set to true if the namespace separator is
+ * allowed in the identifier name. Issues error if the namespace separator
+ * is placed incorrectly.
+ */
+ Variant parseIdentifier(size_t start, bool allowNSSep = false);
+
+ /**
+ * Function used internally to handle the special "\begin" command.
+ *
+ * @return an internal State specifying whether an error occured (return
+ * values State::REOVERABLE_ERROR or State::IRRECOVERABLE_ERROR) or a
+ * command was actually started (return value State::COMMAND_START).
+ */
+ State parseBeginCommand();
+
+ /**
+ * Function used internally to handle the special "\end" command.
+ *
+ * @return an internal State specifying whether an error occured (return
+ * values State::REOVERABLE_ERROR or State::IRRECOVERABLE_ERROR) or a
+ * command was actually ended (return value State::COMMAND_END).
+ */
+ State parseEndCommand();
+
+ /**
+ * Parses the command arguments. Handles errors if the name of the command
+ * was given using the hash notation and as a name field.
+ *
+ * @param commandArgName is the name argument that was given using the hash
+ * notation.
+ * @return a map variant containing the arguments.
+ */
+ Variant parseCommandArguments(Variant commandArgName);
+
+ /**
+ * Function used internally to parse a command.
+ *
+ * @param start is the start byte offset of the command (including the
+ * backslash)
+ * @param isAnnotation if true, the command is not returned as command, but
+ * as annotation start.
+ * @return true if a command was actuall parsed, false otherwise.
+ */
+ State parseCommand(size_t start, bool isAnnotation);
+
+ /**
+ * Function used internally to parse a block comment.
+ */
+ void parseBlockComment();
+
+ /**
+ * Function used internally to parse a generic comment.
+ */
+ void parseLineComment();
+
+ /**
+ * Pushes the parsed command onto the command stack.
+ */
+ void pushCommand(Variant commandName, Variant commandArguments,
+ bool hasRange);
+
+ /**
+ * Checks whether there is any data pending to be issued, if yes, resets the
+ * currently peeked characters and returns true.
+ *
+ * @return true if there was any data and DATA should be returned by the
+ * parse function, false otherwise.
+ */
+ bool checkIssueData();
+
+ /**
+ * Returns a reference at the current command at the top of the command
+ * stack.
+ *
+ * @return a reference at the top command in the command stack.
+ */
+ Command &cmd() { return commands.top(); }
+
+ /**
+ * Returns a reference at the current command at the top of the command
+ * stack.
+ *
+ * @return a reference at the top command in the command stack.
+ */
+ const Command &cmd() const { return commands.top(); }
+
+public:
+ /**
+ * Constructor of the OsmlStreamParserImpl class. Attaches the new
+ * OsmlStreamParserImpl to the given CharReader and Logger instances.
+ *
+ * @param reader is the reader instance from which incomming characters
+ * should be read.
+ * @param logger is the logger instance to which errors should be written.
+ */
+ OsmlStreamParserImpl(CharReader &reader, Logger &logger);
+
+ State parse();
+
+ const TokenizedData &getData() const { return data; }
+ const Variant &getCommandName() const { return cmd().getName(); }
+ const Variant &getCommandArguments() const { return cmd().getArguments(); }
+ const SourceLocation &getLocation() const { return location; }
+ bool inRangeCommand() const { return cmd().inRangeField(); };
+ bool inDefaultField() const { return cmd().inDefaultField(); }
+};
+
+/* Class OsmlStreamParserImpl */
+
+OsmlStreamParserImpl::OsmlStreamParserImpl(CharReader &reader, Logger &logger)
+ : reader(reader), logger(logger), tokenizer(OsmlTokens)
{
- // Place an intial command representing the complete file on the stack
- commands.push(Command{"", Variant::mapType{}, true, true, true, false});
+ commands.emplace("", Variant::mapType{}, true);
}
-Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
+Variant OsmlStreamParserImpl::parseIdentifier(size_t start, bool allowNSSep)
{
bool first = true;
bool hasCharSinceNSSep = false;
@@ -147,20 +466,20 @@ Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
return res;
}
-OsmlStreamParser::State OsmlStreamParser::parseBeginCommand()
+OsmlStreamParserImpl::State OsmlStreamParserImpl::parseBeginCommand()
{
// Expect a '{' after the command
reader.consumeWhitespace();
if (!reader.expect('{')) {
logger.error("Expected \"{\" after \\begin", reader);
- return State::NONE;
+ return State::RECOVERABLE_ERROR;
}
// Parse the name of the command that should be opened
Variant commandName = parseIdentifier(reader.getOffset(), true);
if (commandName.asString().empty()) {
logger.error("Expected identifier", commandName);
- return State::ERROR;
+ return State::IRRECOVERABLE_ERROR;
}
// Check whether the next character is a '#', indicating the start of the
@@ -176,7 +495,7 @@ OsmlStreamParser::State OsmlStreamParser::parseBeginCommand()
if (!reader.expect('}')) {
logger.error("Expected \"}\"", reader);
- return State::ERROR;
+ return State::IRRECOVERABLE_ERROR;
}
// Parse the arguments
@@ -185,28 +504,15 @@ OsmlStreamParser::State OsmlStreamParser::parseBeginCommand()
// Push the command onto the command stack
pushCommand(std::move(commandName), std::move(commandArguments), true);
- return State::COMMAND;
-}
-
-static bool checkStillInField(const OsmlStreamParser::Command &cmd,
- const Variant &endName, Logger &logger)
-{
- if (cmd.inField && !cmd.inRangeField) {
- logger.error(std::string("\\end in open field of command \"") +
- cmd.name.asString() + std::string("\""),
- endName);
- logger.note(std::string("Open command started here:"), cmd.name);
- return true;
- }
- return false;
+ return State::COMMAND_START;
}
-OsmlStreamParser::State OsmlStreamParser::parseEndCommand()
+OsmlStreamParserImpl::State OsmlStreamParserImpl::parseEndCommand()
{
// Expect a '{' after the command
if (!reader.expect('{')) {
logger.error("Expected \"{\" after \\end", reader);
- return State::NONE;
+ return State::RECOVERABLE_ERROR;
}
// Fetch the name of the command that should be ended here
@@ -215,56 +521,58 @@ OsmlStreamParser::State OsmlStreamParser::parseEndCommand()
// Make sure the given command name is not empty
if (name.asString().empty()) {
logger.error("Expected identifier", name);
- return State::ERROR;
+ return State::IRRECOVERABLE_ERROR;
}
// Make sure the command name is terminated with a '}'
if (!reader.expect('}')) {
logger.error("Expected \"}\"", reader);
- return State::ERROR;
+ return State::IRRECOVERABLE_ERROR;
}
- // Unroll the command stack up to the last range command
- while (!commands.top().hasRange) {
- if (checkStillInField(commands.top(), name, logger)) {
- return State::ERROR;
+ // Unroll the command stack up to the last range command, make sure we do
+ // not intersect with any open field
+ while (!cmd().inRangeField()) {
+ if (cmd().inField()) {
+ logger.error(std::string("\\end in open field of command \"") +
+ cmd().getName().asString() + std::string("\""),
+ name);
+ const std::vector<Field> &fields = cmd().getFields();
+ for (const Field &field : fields) {
+ logger.note(std::string("Still open field started here: "),
+ field.location);
+ }
+ return State::IRRECOVERABLE_ERROR;
}
commands.pop();
}
- // Make sure we're not in an open field of this command
- if (checkStillInField(commands.top(), name, logger)) {
- return State::ERROR;
- }
-
// Special error message if the top-level command is reached
if (commands.size() == 1) {
logger.error(std::string("Cannot end command \"") + name.asString() +
std::string("\" here, no command open"),
name);
- return State::ERROR;
+ return State::IRRECOVERABLE_ERROR;
}
- // Inform the about command mismatches
- const Command &cmd = commands.top();
- if (commands.top().name.asString() != name.asString()) {
- logger.error(std::string("Trying to end command \"") +
- cmd.name.asString() +
+ // Inform the user about command mismatches, copy the current command
+ // descriptor before popping it from the stack
+ if (getCommandName().asString() != name.asString()) {
+ logger.error(std::string("Trying to end command \"") + name.asString() +
std::string("\", but open command is \"") +
- name.asString() + std::string("\""),
+ getCommandName().asString() + std::string("\""),
name);
- logger.note("Last command was opened here:", cmd.name);
- return State::ERROR;
+ logger.note("Open command started here:", getCommandName());
+ return State::IRRECOVERABLE_ERROR;
}
- // Set the location to the location of the command that was ended, then end
- // the current command
+ // End the current command
location = name.getLocation();
commands.pop();
- return cmd.inRangeField ? State::FIELD_END : State::NONE;
+ return State::COMMAND_END;
}
-Variant OsmlStreamParser::parseCommandArguments(Variant commandArgName)
+Variant OsmlStreamParserImpl::parseCommandArguments(Variant commandArgName)
{
// Parse the arguments using the universal VariantReader
Variant commandArguments;
@@ -290,29 +598,14 @@ Variant OsmlStreamParser::parseCommandArguments(Variant commandArgName)
return commandArguments;
}
-void OsmlStreamParser::pushCommand(Variant commandName,
- Variant commandArguments, bool hasRange)
-{
- // Store the location on the stack
- location = commandName.getLocation();
-
- // Place the command on the command stack, remove the last commands if we're
- // not currently inside a field of these commands
- while (!commands.top().inField) {
- commands.pop();
- }
- commands.push(Command{std::move(commandName), std::move(commandArguments),
- hasRange, false, false, false});
-}
-
-OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
- bool isAnnotation)
+OsmlStreamParserImpl::State OsmlStreamParserImpl::parseCommand(
+ size_t start, bool isAnnotation)
{
// Parse the commandName as a first identifier
Variant commandName = parseIdentifier(start, true);
if (commandName.asString().empty()) {
logger.error("Empty command name", reader);
- return State::NONE;
+ return State::RECOVERABLE_ERROR;
}
// Handle the special "begin" and "end" commands
@@ -322,7 +615,7 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
const bool isEnd = commandNameComponents[0] == "end";
// Parse the begin or end command
- State res = State::COMMAND;
+ State res = State::COMMAND_START;
if (isBegin || isEnd) {
if (commandNameComponents.size() > 1) {
logger.error(
@@ -378,12 +671,13 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
} else {
// Make sure no arguments apart from the "name" argument are given
// to an annotation end
- Variant::mapType &map = commands.top().arguments.asMap();
+ const Variant::mapType &map = getCommandArguments().asMap();
if (!map.empty()) {
if (map.count("name") == 0 || map.size() > 1U) {
logger.error(
"An annotation end command may not have any arguments "
- "other than \"name\"");
+ "other than \"name\"",
+ reader);
return res;
}
}
@@ -397,13 +691,13 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
// If we're starting an annotation, return the command as annotation start
// instead of command
- if (isAnnotation && res == State::COMMAND) {
+ if (isAnnotation && res == State::COMMAND_START) {
return State::ANNOTATION_START;
}
return res;
}
-void OsmlStreamParser::parseBlockComment()
+void OsmlStreamParserImpl::parseBlockComment()
{
Token token;
size_t depth = 1;
@@ -426,7 +720,7 @@ void OsmlStreamParser::parseBlockComment()
logger.error("File ended while being in a block comment", reader);
}
-void OsmlStreamParser::parseLineComment()
+void OsmlStreamParserImpl::parseLineComment()
{
char c;
while (reader.read(c)) {
@@ -436,65 +730,34 @@ void OsmlStreamParser::parseLineComment()
}
}
-bool OsmlStreamParser::checkIssueData()
-{
- if (!data.empty()) {
- location = data.getLocation();
- reader.resetPeek();
- return true;
- }
- return false;
-}
-
-bool OsmlStreamParser::checkIssueFieldStart()
+void OsmlStreamParserImpl::pushCommand(Variant commandName,
+ Variant commandArguments, bool hasRange)
{
- // Fetch the current command, and check whether we're currently inside a
- // field of this command
- Command &cmd = commands.top();
- if (!cmd.inField) {
- // If this is a range command, we're now implicitly inside the field of
- // this command -- we'll have to issue a field start command!
- if (cmd.hasRange) {
- cmd.inField = true;
- cmd.inRangeField = true;
- reader.resetPeek();
- return true;
- }
+ // Store the location of the command
+ location = commandName.getLocation();
- // This was not a range command, so obviously we're now inside within
- // a field of some command -- so unroll the commands stack until a
- // command with open field is reached
- while (!commands.top().inField) {
- commands.pop();
- }
+ // Place the command on the command stack, remove the last commands if we're
+ // not currently inside a field of these commands
+ while (!cmd().inField()) {
+ commands.pop();
}
- return false;
+
+ // Push the new command onto the command stack
+ commands.emplace(std::move(commandName), std::move(commandArguments),
+ hasRange);
}
-bool OsmlStreamParser::closeField()
+bool OsmlStreamParserImpl::checkIssueData()
{
- // Try to end an open field of the current command -- if the current command
- // is not inside an open field, end this command and try to close the next
- // one
- for (int i = 0; i < 2 && commands.size() > 1; i++) {
- Command &cmd = commands.top();
- if (!cmd.inRangeField) {
- if (cmd.inField) {
- cmd.inField = false;
- if (cmd.inDefaultField) {
- commands.pop();
- }
- return true;
- }
- commands.pop();
- } else {
- return false;
- }
+ if (!data.empty()) {
+ location = data.getLocation();
+ reader.resetPeek();
+ return true;
}
return false;
}
-OsmlStreamParser::State OsmlStreamParser::parse()
+OsmlStreamParserImpl::State OsmlStreamParserImpl::parse()
{
// Reset the data handler
data.clear();
@@ -507,14 +770,6 @@ OsmlStreamParser::State OsmlStreamParser::parse()
// Special handling for Backslash and Text
if (type == OsmlTokens.Backslash ||
type == OsmlTokens.AnnotationStart) {
- // Before appending anything to the output data or starting a new
- // command, check whether FIELD_START has to be issued, as the
- // current command is a command with range
- if (checkIssueFieldStart()) {
- location = token.location;
- return State::FIELD_START;
- }
-
// Check whether a command starts now, without advancing the peek
// cursor
char c;
@@ -535,11 +790,11 @@ OsmlStreamParser::State OsmlStreamParser::parse()
State res = parseCommand(token.location.getStart(),
type == OsmlTokens.AnnotationStart);
switch (res) {
- case State::ERROR:
+ case State::IRRECOVERABLE_ERROR:
throw LoggableException(
"Last error was irrecoverable, ending parsing "
"process");
- case State::NONE:
+ case State::RECOVERABLE_ERROR:
continue;
default:
return res;
@@ -558,15 +813,12 @@ OsmlStreamParser::State OsmlStreamParser::parse()
token.location.getStart() + 1);
}
- data.append(c, token.location.getStart(), reader.getPeekOffset());
+ // Append the character to the output data, mark it as protected
+ data.append(c, token.location.getStart(), reader.getPeekOffset(),
+ true);
reader.consumePeek();
continue;
} else if (type == Tokens::Data) {
- // Check whether FIELD_START has to be issued before appending text
- if (checkIssueFieldStart()) {
- location = token.location;
- return State::FIELD_START;
- }
reader.consumePeek();
continue;
}
@@ -580,7 +832,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
// We will handle the token now, consume the peeked characters
reader.consumePeek();
- // Update the location to the current token location
+ // Synchronize the location with the current token location
location = token.location;
if (token.id == OsmlTokens.LineComment) {
@@ -588,39 +840,27 @@ OsmlStreamParser::State OsmlStreamParser::parse()
} else if (token.id == OsmlTokens.BlockCommentStart) {
parseBlockComment();
} else if (token.id == OsmlTokens.FieldStart) {
- Command &cmd = commands.top();
- if (!cmd.inField) {
- cmd.inField = true;
- }
+ cmd().pushField(false, token.location);
return State::FIELD_START;
-/* logger.error(
- "Got field start token \"{\", but no command for which to "
- "start the field. Write \"\\{\" to insert this sequence as "
- "text.",
- token);*/
} else if (token.id == OsmlTokens.FieldEnd) {
- closeField();
- return State::FIELD_END;
-/* if (closeField()) {
+ // Remove all commands from the list that currently are not in any
+ // field
+ while (!cmd().inField()) {
+ commands.pop();
+ }
+
+ // If the remaining command is not in a range field, remove this
+ // command
+ if (cmd().inNonRangeField()) {
+ cmd().popField();
return State::FIELD_END;
}
logger.error(
- "Got field end token \"}\", but there is no field to end. "
- "Write \"\\}\" to insert this sequence as text.",
- token);*/
+ "Got field end token \"}\", but there is no field to end.",
+ token);
} else if (token.id == OsmlTokens.DefaultFieldStart) {
- // Try to start a default field the first time the token is reached
- Command &topCmd = commands.top();
- if (!topCmd.inField) {
- topCmd.inField = true;
- topCmd.inDefaultField = true;
- }
+ cmd().pushField(true, token.location);
return State::FIELD_START;
-/* logger.error(
- "Got default field start token \"{!\", but no command for "
- "which to start the field. Write \"\\{!\" to insert this "
- "sequence as text",
- token);*/
} else if (token.id == OsmlTokens.AnnotationEnd) {
// We got a single annotation end token "\>" -- simply issue the
// ANNOTATION_END event
@@ -641,11 +881,25 @@ OsmlStreamParser::State OsmlStreamParser::parse()
// Make sure all open commands and fields have been ended at the end of the
// stream
while (commands.size() > 1) {
- Command &cmd = commands.top();
- if (cmd.inField || cmd.hasRange) {
- logger.error("Reached end of stream, but command \"" +
- cmd.name.asString() + "\" has not been ended",
- cmd.name);
+ if (cmd().inField()) {
+ // If the stream ended with an open range field, issue information
+ // about the range field
+ if (cmd().inRangeField()) {
+ // Inform about the still open command itself
+ logger.error("Reached end of stream, but command \"" +
+ getCommandName().asString() +
+ "\" has not been ended",
+ getCommandName());
+ } else {
+ // Issue information about still open fields
+ const std::vector<Field> &fields = cmd().getFields();
+ if (!fields.empty()) {
+ logger.error(
+ std::string(
+ "Reached end of stream, but field is still open."),
+ fields.back().location);
+ }
+ }
}
commands.pop();
}
@@ -654,26 +908,45 @@ OsmlStreamParser::State OsmlStreamParser::parse()
return State::END;
}
-Variant OsmlStreamParser::getText(WhitespaceMode mode)
+/* Class OsmlStreamParser */
+
+OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
+ : impl(new OsmlStreamParserImpl(reader, logger))
+{
+}
+
+OsmlStreamParser::~OsmlStreamParser()
+{
+ // Stub needed because OsmlStreamParserImpl is incomplete in header
+}
+
+OsmlStreamParser::State OsmlStreamParser::parse()
+{
+ return static_cast<State>(impl->parse());
+}
+
+const TokenizedData &OsmlStreamParser::getData() const
{
- TokenizedData dataFork = data;
- Variant text = dataFork.text(mode);
- location = text.getLocation();
- return text;
+ return impl->getData();
}
const Variant &OsmlStreamParser::getCommandName() const
{
- return commands.top().name;
+ return impl->getCommandName();
}
const Variant &OsmlStreamParser::getCommandArguments() const
{
- return commands.top().arguments;
+ return impl->getCommandArguments();
}
-bool OsmlStreamParser::inDefaultField() const
+const SourceLocation &OsmlStreamParser::getLocation() const
{
- return commands.top().inRangeField || commands.top().inDefaultField;
+ return impl->getLocation();
}
+
+bool OsmlStreamParser::inDefaultField() const { return impl->inDefaultField(); }
+
+bool OsmlStreamParser::inRangeCommand() const { return impl->inRangeCommand(); }
+
}
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index 453a2bb..1fee90b 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -29,30 +29,29 @@
#ifndef _OUSIA_OSML_STREAM_PARSER_HPP_
#define _OUSIA_OSML_STREAM_PARSER_HPP_
+#include <cstdint>
#include <memory>
-#include <core/common/Variant.hpp>
-#include <core/common/Whitespace.hpp>
-#include <core/parser/utils/Tokenizer.hpp>
-#include <core/parser/utils/TokenizedData.hpp>
-
namespace ousia {
// Forward declarations
class CharReader;
class Logger;
class OsmlStreamParserImpl;
+class TokenizedData;
+class Variant;
/**
* The OsmlStreamParser class provides a low-level reader for the TeX-esque osml
* format. The parser is constructed around a "parse" function, which reads data
* from the underlying CharReader until a new state is reached and indicates
* this state in a return value. The calling code then has to pull corresponding
- * data from the stream reader. The reader makes sure the incommind file is
+ * data from the stream reader. The reader makes sure the incomming stream is
* syntactically valid and tries to recorver from most errors. If an error is
* irrecoverable (this is the case for errors with wrong nesting of commands or
* fields, as this would lead to too many consecutive errors) a
- * LoggableException is thrown.
+ * LoggableException is thrown. The OsmlStreamParser can be compared to a SAX
+ * parser for XML.
*/
class OsmlStreamParser {
public:
@@ -60,39 +59,21 @@ public:
* Enum used to indicate which state the OsmlStreamParser class is in
* after calling the "parse" function.
*/
- enum class State {
- /**
- * State returned if a fully featured command has been read. A command
- * consists of the command name and its arguments (which optionally
- * includes the name).
- */
- COMMAND,
-
- /**
- * State returned if data is given. The reader must decide which field
- * or command this should be routed to. Trailing or leading whitespace
- * has been removed. Only called if the data is non-empty.
- */
- DATA,
-
+ enum class State : uint8_t {
/**
- * A user-defined entity has been found. The entity sequence is stored
- * in the command name.
+ * State returned if the start of a command has been read. Use the
+ * getCommandName(), getCommandArguments() and inRangeCommand()
+ * functions the retrieve more information about the command that was
+ * just started.
*/
- ENTITY,
+ COMMAND_START = 0,
/**
- * State returned if an annotation was started. An annotation consists
- * of the command name and its arguments (which optionally include the
- * name).
+ * State returned if a range command has just ended. This state is not
+ * returned for non-range commands (as the actual end of a command is
+ * context dependant).
*/
- ANNOTATION_START,
-
- /**
- * State returned if an annotation ends. The reader indicates which
- * annotation ends.
- */
- ANNOTATION_END,
+ COMMAND_END = 1,
/**
* State returned if a new field started. The reader assures that the
@@ -100,200 +81,47 @@ public:
* is not started if data has been given outside of a field. The
* field number is set to the current field index.
*/
- FIELD_START,
+ FIELD_START = 2,
/**
* State returned if the current field ends. The reader assures that a
* field was actually open.
*/
- FIELD_END,
+ FIELD_END = 3,
/**
- * The end of the stream has been reached.
+ * State returned if an annotation was started. An annotation consists
+ * of the command name and its arguments (which optionally include the
+ * name).
*/
- END,
+ ANNOTATION_START = 4,
/**
- * Returned from internal functions if nothing should be done.
+ * State returned if an annotation ends. The reader indicates which
+ * annotation ends.
*/
- NONE,
+ ANNOTATION_END = 5,
/**
- * Returned from internal function to indicate irrecoverable errors.
+ * State returned if data is given. The reader must decide which field
+ * or command this should be routed to. Trailing or leading whitespace
+ * has been removed. Only called if the data is non-empty.
*/
- ERROR
- };
-
- /**
- * Entry used for the command stack.
- */
- struct Command {
- /**
- * Name and location of the current command.
- */
- Variant name;
-
- /**
- * Arguments that were passed to the command.
- */
- Variant arguments;
-
- /**
- * Vector used as stack for holding the number of opening/closing braces
- * and the corresponding "isDefaultField" flag.
- */
- std::vector<bool> fields;
-
- /**
- * Set to true if this is a command with clear begin and end.
- */
- bool hasRange;
-
- /**
- * Default constructor.
- */
- Command()
- : hasRange(false),
- inField(false),
- inDefaultField()
- {
- }
+ DATA = 6,
/**
- * Constructor of the Command class.
- *
- * @param name is a string variant with name and location of the
- * command.
- * @param arguments is a map variant with the arguments given to the
- * command.
- * @param hasRange should be set to true if this is a command with
- * explicit range.
- * @param inDefaultField is set to true if we currently are in a
- * specially marked default field.
- */
- Command(Variant name, Variant arguments, bool hasRange)
- : name(std::move(name)),
- arguments(std::move(arguments)),
- hasRange(hasRange),
- inField(inField),
- inRangeField(inRangeField),
- inDefaultField(inDefaultField)
- {
- }
+ * The end of the stream has been reached.
+ */
+ END = 7
};
private:
/**
- * Reference to the CharReader instance from which the incomming bytes are
- * read.
- */
- CharReader &reader;
-
- /**
- * Reference at the logger instance to which all error messages are sent.
- */
- Logger &logger;
-
- /**
- * Tokenizer instance used to read individual tokens from the text.
- */
- Tokenizer tokenizer;
-
- /**
- * Variant containing the tokenized data that was returned from the
- * tokenizer as data.
- */
- TokenizedData data;
-
- /**
- * Stack containing the current commands.
- */
- std::stack<Command> commands;
-
- /**
- * Pointer at
+ * Pointer at the class containing the internal implementation (according
+ * to the PIMPL idiom).
*/
std::unique_ptr<OsmlStreamParserImpl> impl;
- /**
- * Function used internall to parse an identifier.
- *
- * @param start is the start byte offset of the identifier (including the
- * backslash).
- * @param allowNSSep should be set to true if the namespace separator is
- * allowed in the identifier name. Issues error if the namespace separator
- * is placed incorrectly.
- */
- Variant parseIdentifier(size_t start, bool allowNSSep = false);
-
- /**
- * Function used internally to handle the special "\begin" command.
- */
- State parseBeginCommand();
-
- /**
- * Function used internally to handle the special "\end" command.
- */
- State parseEndCommand();
-
- /**
- * Pushes the parsed command onto the command stack.
- */
- void pushCommand(Variant commandName, Variant commandArguments,
- bool hasRange);
-
- /**
- * Parses the command arguments.
- */
- Variant parseCommandArguments(Variant commandArgName);
-
- /**
- * Function used internally to parse a command.
- *
- * @param start is the start byte offset of the command (including the
- * backslash)
- * @param isAnnotation if true, the command is not returned as command, but
- * as annotation start.
- * @return true if a command was actuall parsed, false otherwise.
- */
- State parseCommand(size_t start, bool isAnnotation);
-
- /**
- * Function used internally to parse a block comment.
- */
- void parseBlockComment();
-
- /**
- * Function used internally to parse a generic comment.
- */
- void parseLineComment();
-
- /**
- * Checks whether there is any data pending to be issued, if yes, issues it.
- *
- * @return true if there was any data and DATA should be returned by the
- * parse function, false otherwise.
- */
- bool checkIssueData();
-
- /**
- * Called before any data is appended to the internal data handler. Checks
- * whether a new field should be started or implicitly ended.
- *
- * @return true if FIELD_START should be returned by the parse function.
- */
- bool checkIssueFieldStart();
-
- /**
- * Closes a currently open field. Note that the command will be removed from
- * the internal command stack if the field that is being closed is a
- * field marked as default field.
- *
- * @return true if the field could be closed, false if there was no field
- * to close.
- */
- bool closeField();
-
public:
/**
* Constructor of the OsmlStreamParser class. Attaches the new
@@ -322,29 +150,9 @@ public:
State parse();
/**
- * Returns a reference at the internally stored data. Only valid if
- * State::DATA was returned by the "parse" function.
- *
- * @return a reference at a variant containing the data parsed by the
- * "parse" function.
- */
- const TokenizedData &getData() const { return data; }
-
- /**
- * Returns the complete content of the internal TokenizedData instance as
- * a single string Variant. This method is mainly used in the unit tests for
- * this class, it simply calls the text() method of TokenizedData.
- *
- * @param mode is the WhitespaceMode that should be used for returning the
- * text.
- * @return a string variant containing the text content of the internal
- * TokenizedData instance or a nullptr variant if there is no text.
- */
- Variant getText(WhitespaceMode mode = WhitespaceMode::COLLAPSE);
-
- /**
* Returns a reference at the internally stored command name. Only valid if
- * State::COMMAND was returned by the "parse" function.
+ * State::COMMAND_START, State::ANNOTATION_START or State::ANNOTATION_END
+ * was returned by the "parse" function.
*
* @return a reference at a variant containing name and location of the
* parsed command.
@@ -353,7 +161,8 @@ public:
/**
* Returns a reference at the internally stored command name. Only valid if
- * State::COMMAND was returned by the "parse" function.
+ * State::COMMAND_START, State::ANNOTATION_START or State::ANNOTATION_END
+ * was returned by the "parse" function.
*
* @return a reference at a variant containing arguments given to the
* command.
@@ -361,10 +170,37 @@ public:
const Variant &getCommandArguments() const;
/**
+ * Returns a reference at the internally stored data. Only valid if
+ * State::DATA was returned by the "parse" function.
+ *
+ * @return a reference at a variant containing the data parsed by the
+ * "parse" function.
+ */
+ const TokenizedData &getData() const;
+
+ /**
+ * Returns the location of the current token.
+ */
+ const SourceLocation &getLocation() const;
+
+ /**
+ * Returns true if the currently started command is a range command, only
+ * valid if State::COMMAND_START was returned by the "parse" function.
+ *
+ * @return true if the command is started is a range command, false
+ * otherwise.
+ */
+ bool inRangeCommand() const;
+
+ /**
* Returns true if the current field is the "default" field. This is true if
* the parser either is in the outer range of a range command or inside a
- * field that has been especially marked as "default" field (using the "|"
- * syntax).
+ * field that has been especially marked as "default" field (using the "{!"
+ * syntax). Only valid if State::FIELD_START was returned by the "parse"
+ * function.
+ *
+ * @return true if the current field was marked as default field (using the
+ * "{!" syntax).
*/
bool inDefaultField() const;
};
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index 3d01007..8b64e51 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -21,7 +21,9 @@
#include <iostream>
#include <core/common/CharReader.hpp>
+#include <core/common/Variant.hpp>
#include <core/frontend/TerminalLogger.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
#include <formats/osml/OsmlStreamParser.hpp>
@@ -30,147 +32,196 @@ namespace ousia {
static TerminalLogger logger(std::cerr, true);
// static ConcreteLogger logger;
-static OsmlStreamParser::State skipEmptyData(OsmlStreamParser &reader)
+static void assertCommandStart(OsmlStreamParser &parser,
+ const std::string &name,
+ bool rangeCommand,
+ SourceOffset start = InvalidSourceOffset,
+ SourceOffset end = InvalidSourceOffset)
{
- OsmlStreamParser::State res = reader.parse();
- if (res == OsmlStreamParser::State::DATA) {
- EXPECT_FALSE(reader.getData().hasNonWhitespaceText());
- res = reader.parse();
- }
- return res;
-}
-
-static void assertCommand(OsmlStreamParser &reader, const std::string &name,
- SourceOffset start = InvalidSourceOffset,
- SourceOffset end = InvalidSourceOffset)
-{
- ASSERT_EQ(OsmlStreamParser::State::COMMAND, skipEmptyData(reader));
- EXPECT_EQ(name, reader.getCommandName().asString());
+ ASSERT_EQ(OsmlStreamParser::State::COMMAND_START, parser.parse());
+ EXPECT_EQ(name, parser.getCommandName().asString());
+ EXPECT_EQ(rangeCommand, parser.inRangeCommand());
if (start != InvalidSourceOffset) {
- EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
- EXPECT_EQ(start, reader.getLocation().getStart());
+ EXPECT_EQ(start, parser.getCommandName().getLocation().getStart());
+ EXPECT_EQ(start, parser.getLocation().getStart());
}
if (end != InvalidSourceOffset) {
- EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
- EXPECT_EQ(end, reader.getLocation().getEnd());
+ EXPECT_EQ(end, parser.getCommandName().getLocation().getEnd());
+ EXPECT_EQ(end, parser.getLocation().getEnd());
}
}
-static void assertCommand(OsmlStreamParser &reader, const std::string &name,
- const Variant::mapType &args,
- SourceOffset start = InvalidSourceOffset,
- SourceOffset end = InvalidSourceOffset)
+static void assertCommandStart(OsmlStreamParser &parser,
+ const std::string &name,
+ bool rangeCommand,
+ const Variant::mapType &args,
+ SourceOffset start = InvalidSourceOffset,
+ SourceOffset end = InvalidSourceOffset)
{
- assertCommand(reader, name, start, end);
- EXPECT_EQ(args, reader.getCommandArguments());
+ assertCommandStart(parser, name, rangeCommand, start, end);
+ EXPECT_EQ(args, parser.getCommandArguments());
}
-static void assertData(OsmlStreamParser &reader, const std::string &data,
- SourceOffset start = InvalidSourceOffset,
- SourceOffset end = InvalidSourceOffset,
- WhitespaceMode mode = WhitespaceMode::COLLAPSE)
+static void assertCommand(OsmlStreamParser &parser,
+ const std::string &name,
+ SourceOffset start = InvalidSourceOffset,
+ SourceOffset end = InvalidSourceOffset)
{
- ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
- Variant text = reader.getText(mode);
- ASSERT_TRUE(text.isString());
- EXPECT_EQ(data, text.asString());
+ assertCommandStart(parser, name, false, Variant::mapType{}, start, end);
+}
+
+static void assertCommandEnd(OsmlStreamParser &parser,
+ SourceOffset start = InvalidSourceOffset,
+ SourceOffset end = InvalidSourceOffset)
+{
+ ASSERT_EQ(OsmlStreamParser::State::COMMAND_END, parser.parse());
if (start != InvalidSourceOffset) {
- EXPECT_EQ(start, text.getLocation().getStart());
- EXPECT_EQ(start, reader.getLocation().getStart());
+ EXPECT_EQ(start, parser.getLocation().getStart());
}
if (end != InvalidSourceOffset) {
- EXPECT_EQ(end, text.getLocation().getEnd());
- EXPECT_EQ(end, reader.getLocation().getEnd());
+ EXPECT_EQ(end, parser.getLocation().getEnd());
+ }
+}
+
+static void assertTextData(OsmlStreamParser &parser, const std::string &text,
+ SourceOffset dataStart = InvalidSourceOffset,
+ SourceOffset dataEnd = InvalidSourceOffset,
+ SourceOffset textStart = InvalidSourceOffset,
+ SourceOffset textEnd = InvalidSourceOffset,
+ WhitespaceMode mode = WhitespaceMode::COLLAPSE)
+{
+ ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse());
+
+ const TokenizedData &data = parser.getData();
+ TokenizedDataReader dataReader = data.reader();
+
+ Token token;
+ ASSERT_TRUE(dataReader.read(token, TokenSet{}, mode));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ(text, token.content);
+ if (dataStart != InvalidSourceOffset) {
+ EXPECT_EQ(dataStart, data.getLocation().getStart());
+ EXPECT_EQ(dataStart, parser.getLocation().getStart());
+ }
+ if (dataEnd != InvalidSourceOffset) {
+ EXPECT_EQ(dataEnd, data.getLocation().getEnd());
+ EXPECT_EQ(dataEnd, parser.getLocation().getEnd());
+ }
+ if (textStart != InvalidSourceOffset) {
+ EXPECT_EQ(textStart, token.getLocation().getStart());
}
+ if (textEnd != InvalidSourceOffset) {
+ EXPECT_EQ(textEnd, token.getLocation().getEnd());
+ }
+}
+
+static void assertData(OsmlStreamParser &parser, const std::string &text,
+ SourceOffset textStart = InvalidSourceOffset,
+ SourceOffset textEnd = InvalidSourceOffset,
+ WhitespaceMode mode = WhitespaceMode::COLLAPSE)
+{
+ assertTextData(parser, text, InvalidSourceOffset, InvalidSourceOffset, textStart, textEnd, mode);
+}
+
+static void assertEmptyData(OsmlStreamParser &parser)
+{
+ ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse());
+
+ const TokenizedData &data = parser.getData();
+ TokenizedDataReader dataReader = data.reader();
+
+ Token token;
+ EXPECT_FALSE(dataReader.read(token, TokenSet{}, WhitespaceMode::TRIM));
}
-static void assertFieldStart(OsmlStreamParser &reader, bool defaultField,
+
+static void assertFieldStart(OsmlStreamParser &parser, bool defaultField,
SourceOffset start = InvalidSourceOffset,
SourceOffset end = InvalidSourceOffset)
{
- ASSERT_EQ(OsmlStreamParser::State::FIELD_START, skipEmptyData(reader));
- EXPECT_EQ(defaultField, reader.inDefaultField());
+ ASSERT_EQ(OsmlStreamParser::State::FIELD_START, parser.parse());
+ EXPECT_EQ(defaultField, parser.inDefaultField());
if (start != InvalidSourceOffset) {
- EXPECT_EQ(start, reader.getLocation().getStart());
+ EXPECT_EQ(start, parser.getLocation().getStart());
}
if (end != InvalidSourceOffset) {
- EXPECT_EQ(end, reader.getLocation().getEnd());
+ EXPECT_EQ(end, parser.getLocation().getEnd());
}
}
-static void assertFieldEnd(OsmlStreamParser &reader,
+static void assertFieldEnd(OsmlStreamParser &parser,
SourceOffset start = InvalidSourceOffset,
SourceOffset end = InvalidSourceOffset)
{
- ASSERT_EQ(OsmlStreamParser::State::FIELD_END, skipEmptyData(reader));
+ ASSERT_EQ(OsmlStreamParser::State::FIELD_END, parser.parse());
if (start != InvalidSourceOffset) {
- EXPECT_EQ(start, reader.getLocation().getStart());
+ EXPECT_EQ(start, parser.getLocation().getStart());
}
if (end != InvalidSourceOffset) {
- EXPECT_EQ(end, reader.getLocation().getEnd());
+ EXPECT_EQ(end, parser.getLocation().getEnd());
}
}
-static void assertAnnotationStart(OsmlStreamParser &reader,
+static void assertAnnotationStart(OsmlStreamParser &parser,
const std::string &name,
SourceOffset start = InvalidSourceOffset,
SourceOffset end = InvalidSourceOffset)
{
- ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, skipEmptyData(reader));
- EXPECT_EQ(name, reader.getCommandName().asString());
+ ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, parser.parse());
+ EXPECT_EQ(name, parser.getCommandName().asString());
if (start != InvalidSourceOffset) {
- EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
- EXPECT_EQ(start, reader.getLocation().getStart());
+ EXPECT_EQ(start, parser.getCommandName().getLocation().getStart());
+ EXPECT_EQ(start, parser.getLocation().getStart());
}
if (end != InvalidSourceOffset) {
- EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
- EXPECT_EQ(end, reader.getLocation().getEnd());
+ EXPECT_EQ(end, parser.getCommandName().getLocation().getEnd());
+ EXPECT_EQ(end, parser.getLocation().getEnd());
}
}
-static void assertAnnotationStart(OsmlStreamParser &reader,
+static void assertAnnotationStart(OsmlStreamParser &parser,
const std::string &name,
const Variant::mapType &args,
SourceOffset start = InvalidSourceOffset,
SourceOffset end = InvalidSourceOffset)
{
- assertAnnotationStart(reader, name, start, end);
- EXPECT_EQ(args, reader.getCommandArguments());
+ assertAnnotationStart(parser, name, start, end);
+ EXPECT_EQ(args, parser.getCommandArguments());
}
-static void assertAnnotationEnd(OsmlStreamParser &reader,
+static void assertAnnotationEnd(OsmlStreamParser &parser,
const std::string &name,
const std::string &elementName,
SourceOffset start = InvalidSourceOffset,
SourceOffset end = InvalidSourceOffset)
{
- ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, skipEmptyData(reader));
- ASSERT_EQ(name, reader.getCommandName().asString());
+ ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, parser.parse());
+ ASSERT_EQ(name, parser.getCommandName().asString());
if (!elementName.empty()) {
- ASSERT_EQ(1U, reader.getCommandArguments().asMap().size());
- ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name"));
+ ASSERT_EQ(1U, parser.getCommandArguments().asMap().size());
+ ASSERT_EQ(1U, parser.getCommandArguments().asMap().count("name"));
- auto it = reader.getCommandArguments().asMap().find("name");
+ auto it = parser.getCommandArguments().asMap().find("name");
ASSERT_EQ(elementName, it->second.asString());
}
if (start != InvalidSourceOffset) {
- EXPECT_EQ(start, reader.getLocation().getStart());
+ EXPECT_EQ(start, parser.getLocation().getStart());
}
if (end != InvalidSourceOffset) {
- EXPECT_EQ(end, reader.getLocation().getEnd());
+ EXPECT_EQ(end, parser.getLocation().getEnd());
}
}
-static void assertEnd(OsmlStreamParser &reader,
+static void assertEnd(OsmlStreamParser &parser,
SourceOffset start = InvalidSourceOffset,
SourceOffset end = InvalidSourceOffset)
{
- ASSERT_EQ(OsmlStreamParser::State::END, skipEmptyData(reader));
+ ASSERT_EQ(OsmlStreamParser::State::END, parser.parse());
if (start != InvalidSourceOffset) {
- EXPECT_EQ(start, reader.getLocation().getStart());
+ EXPECT_EQ(start, parser.getLocation().getStart());
}
if (end != InvalidSourceOffset) {
- EXPECT_EQ(end, reader.getLocation().getEnd());
+ EXPECT_EQ(end, parser.getLocation().getEnd());
}
}
@@ -179,9 +230,9 @@ TEST(OsmlStreamParser, empty)
const char *testString = "";
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+ assertEnd(parser, 0, 0);
}
TEST(OsmlStreamParser, oneCharacter)
@@ -189,45 +240,102 @@ TEST(OsmlStreamParser, oneCharacter)
const char *testString = "a";
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
+
+ assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::COLLAPSE);
+ assertEnd(parser, 1, 1);
+}
+
+TEST(OsmlStreamParser, whitespacePreserve)
+{
+ const char *testString = " hello \t world ";
+ // 0123456 78901234
+ // 0 1
+ CharReader charReader(testString);
+
+ OsmlStreamParser parser(charReader, logger);
+
+ assertTextData(parser, " hello \t world ", 0, 15, 0, 15,
+ WhitespaceMode::PRESERVE);
+ assertEnd(parser, 15, 15);
+}
+
+TEST(OsmlStreamParser, whitespaceTrim)
+{
+ const char *testString = " hello \t world ";
+ // 0123456 78901234
+ // 0 1
+ CharReader charReader(testString);
+
+ OsmlStreamParser parser(charReader, logger);
- assertData(reader, "a", 0, 1);
+ assertTextData(parser, "hello \t world", 0, 15, 1, 14,
+ WhitespaceMode::TRIM);
+ assertEnd(parser, 15, 15);
}
-TEST(OsmlStreamParser, whitespaceElimination)
+TEST(OsmlStreamParser, whitespaceCollapse)
{
const char *testString = " hello \t world ";
// 0123456 78901234
// 0 1
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- assertData(reader, "hello world", 1, 14);
+ assertTextData(parser, "hello world", 0, 15, 1, 14,
+ WhitespaceMode::COLLAPSE);
+ assertEnd(parser, 15, 15);
}
-TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak)
+TEST(OsmlStreamParser, whitespaceCollapseLinebreak)
{
const char *testString = " hello \n world ";
// 0123456 78901234
// 0 1
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
+
+ assertTextData(parser, "hello world", 0, 15, 1, 14,
+ WhitespaceMode::COLLAPSE);
+ assertEnd(parser, 15, 15);
+}
+
+TEST(OsmlStreamParser, whitespaceCollapseProtected)
+{
+ const char *testString = " hello\\ \\ world ";
+ // 012345 67 89012345
+ // 0 1
+ CharReader charReader(testString);
+
+ OsmlStreamParser parser(charReader, logger);
+
+ assertTextData(parser, "hello world", 0, 16, 1, 15,
+ WhitespaceMode::COLLAPSE);
+ assertEnd(parser, 16, 16);
+}
+
+TEST(OsmlStreamParser, whitespaceCollapseProtected2)
+{
+ const char *testString = " hello \\ \\ world ";
+ // 012345 67 89012345
+ // 0 1
+ CharReader charReader(testString);
+
+ OsmlStreamParser parser(charReader, logger);
- assertData(reader, "hello world", 1, 14);
+ assertTextData(parser, "hello world", 0, 17, 1, 16,
+ WhitespaceMode::COLLAPSE);
+ assertEnd(parser, 17, 17);
}
static void testEscapeSpecialCharacter(const std::string &c)
{
CharReader charReader(std::string("\\") + c);
- OsmlStreamParser reader(charReader, logger);
- EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse());
- EXPECT_EQ(c, reader.getText().asString());
-
- SourceLocation loc = reader.getText().getLocation();
- EXPECT_EQ(0U, loc.getStart());
- EXPECT_EQ(1U + c.size(), loc.getEnd());
+ OsmlStreamParser parser(charReader, logger);
+ assertTextData(parser, c, 0, 2, 0, 2, WhitespaceMode::PRESERVE);
+ assertEnd(parser, 2, 2);
}
TEST(OsmlStreamParser, escapeSpecialCharacters)
@@ -240,9 +348,11 @@ TEST(OsmlStreamParser, escapeSpecialCharacters)
TEST(OsmlStreamParser, simpleSingleLineComment)
{
const char *testString = "% This is a single line comment";
+ // 0123456789012345678901234567890
+ // 0 1 2 3
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
- ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+ OsmlStreamParser parser(charReader, logger);
+ assertEnd(parser, 31, 31);
}
TEST(OsmlStreamParser, singleLineComment)
@@ -251,24 +361,11 @@ TEST(OsmlStreamParser, singleLineComment)
// 01234567890123456789012345678901 23
// 0 1 2 3
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
- {
- ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
- ASSERT_EQ("a", reader.getText().asString());
- SourceLocation loc = reader.getText().getLocation();
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(1U, loc.getEnd());
- }
-
- {
- ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
- ASSERT_EQ("b", reader.getText().asString());
- SourceLocation loc = reader.getText().getLocation();
- ASSERT_EQ(33U, loc.getStart());
- ASSERT_EQ(34U, loc.getEnd());
- }
+ OsmlStreamParser parser(charReader, logger);
- ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+ assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
+ assertTextData(parser, "b", 33, 34, 33, 34, WhitespaceMode::PRESERVE);
+ assertEnd(parser, 34, 34);
}
TEST(OsmlStreamParser, multilineComment)
@@ -277,24 +374,27 @@ TEST(OsmlStreamParser, multilineComment)
// 0123456789012 3 456789012345678901234567890
// 0 1 2 3 4
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
- {
- ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
- ASSERT_EQ("a", reader.getText().asString());
- SourceLocation loc = reader.getText().getLocation();
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(1U, loc.getEnd());
- }
+ OsmlStreamParser parser(charReader, logger);
- {
- ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
- ASSERT_EQ("b", reader.getText().asString());
- SourceLocation loc = reader.getText().getLocation();
- ASSERT_EQ(40U, loc.getStart());
- ASSERT_EQ(41U, loc.getEnd());
- }
+ assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
+ assertTextData(parser, "b", 40, 41, 40, 41, WhitespaceMode::PRESERVE);
+ assertEnd(parser, 41, 41);
+}
- ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+TEST(OsmlStreamParser, unfinishedMultilineComment)
+{
+ const char *testString = "a%{ This is a\n\n multiline line comment";
+ // 0123456789012 3 456789012345678901234567
+ // 0 1 2 3
+ CharReader charReader(testString);
+ OsmlStreamParser parser(charReader, logger);
+
+ logger.reset();
+
+ assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
+ ASSERT_FALSE(logger.hasError());
+ assertEnd(parser, 38, 38);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, nestedMultilineComment)
@@ -303,24 +403,11 @@ TEST(OsmlStreamParser, nestedMultilineComment)
// 0123456789012 3 456789012345678901234567890
// 0 1 2 3 4
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
- {
- ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
- ASSERT_EQ("a", reader.getText().asString());
- SourceLocation loc = reader.getText().getLocation();
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(1U, loc.getEnd());
- }
-
- {
- ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
- ASSERT_EQ("b", reader.getText().asString());
- SourceLocation loc = reader.getText().getLocation();
- ASSERT_EQ(40U, loc.getStart());
- ASSERT_EQ(41U, loc.getEnd());
- }
+ OsmlStreamParser parser(charReader, logger);
- ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+ assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
+ assertTextData(parser, "b", 40, 41, 40, 41, WhitespaceMode::PRESERVE);
+ assertEnd(parser, 41, 41);
}
TEST(OsmlStreamParser, simpleCommand)
@@ -328,45 +415,27 @@ TEST(OsmlStreamParser, simpleCommand)
const char *testString = "\\test";
// 0 12345
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
- ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+ OsmlStreamParser parser(charReader, logger);
- Variant commandName = reader.getCommandName();
- ASSERT_EQ("test", commandName.asString());
-
- SourceLocation loc = commandName.getLocation();
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(5U, loc.getEnd());
-
- ASSERT_EQ(0U, reader.getCommandArguments().asMap().size());
- ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+ assertCommand(parser, "test", 0, 5);
+ assertEnd(parser);
}
TEST(OsmlStreamParser, simpleCommandWithName)
{
- const char *testString = "\\test#bla";
- // 0 12345678
+ const char *testString = "\\test#foo";
+ // 012345678
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
- ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
-
- Variant commandName = reader.getCommandName();
- ASSERT_EQ("test", commandName.asString());
- SourceLocation loc = commandName.getLocation();
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(5U, loc.getEnd());
+ OsmlStreamParser parser(charReader, logger);
- Variant commandArguments = reader.getCommandArguments();
- ASSERT_TRUE(commandArguments.isMap());
- ASSERT_EQ(1U, commandArguments.asMap().size());
- ASSERT_EQ(1U, commandArguments.asMap().count("name"));
- ASSERT_EQ("bla", commandArguments.asMap()["name"].asString());
+ assertCommandStart(parser, "test", false, Variant::mapType{{"name", "foo"}},
+ 0, 5);
- loc = commandArguments.asMap()["name"].getLocation();
- ASSERT_EQ(5U, loc.getStart());
- ASSERT_EQ(9U, loc.getEnd());
+ Variant::mapType args = parser.getCommandArguments().asMap();
+ ASSERT_EQ(5U, args["name"].getLocation().getStart());
+ ASSERT_EQ(9U, args["name"].getLocation().getEnd());
- ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+ assertEnd(parser);
}
TEST(OsmlStreamParser, simpleCommandWithArguments)
@@ -375,38 +444,21 @@ TEST(OsmlStreamParser, simpleCommandWithArguments)
// 0 123456789012345 678901 2
// 0 1 2
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
- ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+ OsmlStreamParser parser(charReader, logger);
- Variant commandName = reader.getCommandName();
- ASSERT_EQ("test", commandName.asString());
- SourceLocation loc = commandName.getLocation();
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(5U, loc.getEnd());
+ assertCommandStart(parser, "test", false,
+ Variant::mapType{{"a", 1}, {"b", 2}, {"c", "test"}}, 0,
+ 5);
- Variant commandArguments = reader.getCommandArguments();
- ASSERT_TRUE(commandArguments.isMap());
- ASSERT_EQ(3U, commandArguments.asMap().size());
- ASSERT_EQ(1U, commandArguments.asMap().count("a"));
- ASSERT_EQ(1U, commandArguments.asMap().count("b"));
- ASSERT_EQ(1U, commandArguments.asMap().count("c"));
- ASSERT_EQ(1, commandArguments.asMap()["a"].asInt());
- ASSERT_EQ(2, commandArguments.asMap()["b"].asInt());
- ASSERT_EQ("test", commandArguments.asMap()["c"].asString());
+ Variant::mapType args = parser.getCommandArguments().asMap();
+ ASSERT_EQ(8U, args["a"].getLocation().getStart());
+ ASSERT_EQ(9U, args["a"].getLocation().getEnd());
+ ASSERT_EQ(12U, args["b"].getLocation().getStart());
+ ASSERT_EQ(13U, args["b"].getLocation().getEnd());
+ ASSERT_EQ(16U, args["c"].getLocation().getStart());
+ ASSERT_EQ(22U, args["c"].getLocation().getEnd());
- loc = commandArguments.asMap()["a"].getLocation();
- ASSERT_EQ(8U, loc.getStart());
- ASSERT_EQ(9U, loc.getEnd());
-
- loc = commandArguments.asMap()["b"].getLocation();
- ASSERT_EQ(12U, loc.getStart());
- ASSERT_EQ(13U, loc.getEnd());
-
- loc = commandArguments.asMap()["c"].getLocation();
- ASSERT_EQ(16U, loc.getStart());
- ASSERT_EQ(22U, loc.getEnd());
-
- ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+ assertEnd(parser);
}
TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName)
@@ -415,44 +467,24 @@ TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName)
// 0 1234567890123456789 01234 56
// 0 1 2
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
- ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
-
- Variant commandName = reader.getCommandName();
- ASSERT_EQ("test", commandName.asString());
- SourceLocation loc = commandName.getLocation();
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(5U, loc.getEnd());
-
- Variant commandArguments = reader.getCommandArguments();
- ASSERT_TRUE(commandArguments.isMap());
- ASSERT_EQ(4U, commandArguments.asMap().size());
- ASSERT_EQ(1U, commandArguments.asMap().count("a"));
- ASSERT_EQ(1U, commandArguments.asMap().count("b"));
- ASSERT_EQ(1U, commandArguments.asMap().count("c"));
- ASSERT_EQ(1U, commandArguments.asMap().count("name"));
- ASSERT_EQ(1, commandArguments.asMap()["a"].asInt());
- ASSERT_EQ(2, commandArguments.asMap()["b"].asInt());
- ASSERT_EQ("test", commandArguments.asMap()["c"].asString());
- ASSERT_EQ("bla", commandArguments.asMap()["name"].asString());
-
- loc = commandArguments.asMap()["a"].getLocation();
- ASSERT_EQ(12U, loc.getStart());
- ASSERT_EQ(13U, loc.getEnd());
-
- loc = commandArguments.asMap()["b"].getLocation();
- ASSERT_EQ(16U, loc.getStart());
- ASSERT_EQ(17U, loc.getEnd());
-
- loc = commandArguments.asMap()["c"].getLocation();
- ASSERT_EQ(20U, loc.getStart());
- ASSERT_EQ(26U, loc.getEnd());
-
- loc = commandArguments.asMap()["name"].getLocation();
- ASSERT_EQ(5U, loc.getStart());
- ASSERT_EQ(9U, loc.getEnd());
-
- ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+ OsmlStreamParser parser(charReader, logger);
+
+ assertCommandStart(
+ parser, "test", false,
+ Variant::mapType{{"name", "bla"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 0,
+ 5);
+
+ Variant::mapType args = parser.getCommandArguments().asMap();
+ ASSERT_EQ(5U, args["name"].getLocation().getStart());
+ ASSERT_EQ(9U, args["name"].getLocation().getEnd());
+ ASSERT_EQ(12U, args["a"].getLocation().getStart());
+ ASSERT_EQ(13U, args["a"].getLocation().getEnd());
+ ASSERT_EQ(16U, args["b"].getLocation().getStart());
+ ASSERT_EQ(17U, args["b"].getLocation().getEnd());
+ ASSERT_EQ(20U, args["c"].getLocation().getStart());
+ ASSERT_EQ(26U, args["c"].getLocation().getEnd());
+
+ assertEnd(parser);
}
TEST(OsmlStreamParser, fields)
@@ -461,21 +493,21 @@ TEST(OsmlStreamParser, fields)
// 01234567890123
// 0 1
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- assertCommand(reader, "test", 0, 5);
- assertFieldStart(reader, false, 5, 6);
- assertData(reader, "a", 6, 7);
- assertFieldEnd(reader, 7, 8);
+ assertCommand(parser, "test", 0, 5);
+ assertFieldStart(parser, false, 5, 6);
+ assertTextData(parser, "a", 6, 7, 6, 7, WhitespaceMode::PRESERVE);
+ assertFieldEnd(parser, 7, 8);
- assertFieldStart(reader, false, 8, 9);
- assertData(reader, "b", 9, 10);
- assertFieldEnd(reader, 10, 11);
+ assertFieldStart(parser, false, 8, 9);
+ assertTextData(parser, "b", 9, 10, 9, 10, WhitespaceMode::PRESERVE);
+ assertFieldEnd(parser, 10, 11);
- assertFieldStart(reader, false, 11, 12);
- assertData(reader, "c", 12, 13);
- assertFieldEnd(reader, 13, 14);
- assertEnd(reader, 14, 14);
+ assertFieldStart(parser, false, 11, 12);
+ assertTextData(parser, "c", 12, 13, 12, 13, WhitespaceMode::PRESERVE);
+ assertFieldEnd(parser, 13, 14);
+ assertEnd(parser, 14, 14);
}
TEST(OsmlStreamParser, dataOutsideField)
@@ -484,785 +516,781 @@ TEST(OsmlStreamParser, dataOutsideField)
// 0123456789012
// 0 1
CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- assertCommand(reader, "test", 0, 5);
- assertFieldStart(reader, false, 5, 6);
- assertData(reader, "a", 6, 7);
- assertFieldEnd(reader, 7, 8);
+ assertCommand(parser, "test", 0, 5);
+ assertFieldStart(parser, false, 5, 6);
+ assertTextData(parser, "a", 6, 7, 6, 7, WhitespaceMode::COLLAPSE);
+ assertFieldEnd(parser, 7, 8);
- assertFieldStart(reader, false, 8, 9);
- assertData(reader, "b", 9, 10);
- assertFieldEnd(reader, 10, 11);
+ assertFieldStart(parser, false, 8, 9);
+ assertTextData(parser, "b", 9, 10, 9, 10, WhitespaceMode::COLLAPSE);
+ assertFieldEnd(parser, 10, 11);
- assertData(reader, "c", 12, 13);
- assertEnd(reader, 13, 13);
+ assertTextData(parser, "c", 11, 13, 12, 13, WhitespaceMode::COLLAPSE);
+ assertEnd(parser, 13, 13);
}
TEST(OsmlStreamParser, nestedCommand)
{
- const char *testString = "\\test{a}{\\test2{b} c} d";
- // 012345678 90123456789012
- // 0 1 2
- CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ const char *testString = "\\test{a}{\\test2{b} c} d";
+ // 012345678 90123456789012
+ // 0 1 2
+ CharReader charReader(testString);
+ OsmlStreamParser parser(charReader, logger);
- assertCommand(reader, "test", 0, 5);
+ assertCommand(parser, "test", 0, 5);
+ assertFieldStart(parser, false, 5, 6);
+ assertData(parser, "a", 6, 7);
+ assertFieldEnd(parser, 7, 8);
- assertFieldStart(reader, false, 5, 6);
- assertData(reader, "a", 6, 7);
- assertFieldEnd(reader, 7, 8);
-
- assertFieldStart(reader, false, 8, 9);
- {
- assertCommand(reader, "test2", 9, 15);
- assertFieldStart(reader, false, 15, 16);
- assertData(reader, "b", 16, 17);
- assertFieldEnd(reader, 17, 18);
- }
- assertData(reader, "c", 19, 20);
- assertFieldEnd(reader, 20, 21);
- assertData(reader, "d", 22, 23);
- assertEnd(reader, 23, 23);
+ assertFieldStart(parser, false, 8, 9);
+ assertCommand(parser, "test2", 9, 15);
+ assertFieldStart(parser, false, 15, 16);
+ assertData(parser, "b", 16, 17);
+ assertFieldEnd(parser, 17, 18);
+ assertData(parser, "c", 19, 20);
+ assertFieldEnd(parser, 20, 21);
+ assertData(parser, "d", 22, 23);
+ assertEnd(parser, 23, 23);
}
+
TEST(OsmlStreamParser, nestedCommandImmediateEnd)
{
- const char *testString = "\\test{\\test2{b}} d";
- // 012345 678901234567
- // 0 1
- CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
-
- assertCommand(reader, "test", 0, 5);
- assertFieldStart(reader, false, 5, 6);
- {
- assertCommand(reader, "test2", 6, 12);
- assertFieldStart(reader, false, 12, 13);
- assertData(reader, "b", 13, 14);
- assertFieldEnd(reader, 14, 15);
- }
- assertFieldEnd(reader, 15, 16);
- assertData(reader, "d", 17, 18);
- assertEnd(reader, 18, 18);
+ const char *testString = "\\test{\\test2{b}} d";
+ // 012345 678901234567
+ // 0 1
+ CharReader charReader(testString);
+ OsmlStreamParser parser(charReader, logger);
+
+ assertCommand(parser, "test", 0, 5);
+ assertFieldStart(parser, false, 5, 6);
+ {
+ assertCommand(parser, "test2", 6, 12);
+ assertFieldStart(parser, false, 12, 13);
+ assertData(parser, "b", 13, 14);
+ assertFieldEnd(parser, 14, 15);
+ }
+ assertFieldEnd(parser, 15, 16);
+ assertData(parser, "d", 17, 18);
+ assertEnd(parser, 18, 18);
}
TEST(OsmlStreamParser, nestedCommandNoData)
{
- const char *testString = "\\test{\\test2}";
- // 012345 6789012
- CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ const char *testString = "\\test{\\test2}";
+ // 012345 6789012
+ CharReader charReader(testString);
+ OsmlStreamParser parser(charReader, logger);
- assertCommand(reader, "test", 0, 5);
- assertFieldStart(reader, false, 5, 6);
- assertCommand(reader, "test2", 6, 12);
- assertFieldEnd(reader, 12, 13);
- assertEnd(reader, 13, 13);
+ assertCommand(parser, "test", 0, 5);
+ assertFieldStart(parser, false, 5, 6);
+ assertCommand(parser, "test2", 6, 12);
+ assertFieldEnd(parser, 12, 13);
+ assertEnd(parser, 13, 13);
}
TEST(OsmlStreamParser, multipleCommands)
{
- const char *testString = "\\a \\b \\c \\d";
- // 012 345 678 90
- // 0 1
- CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ const char *testString = "\\a \\b \\c \\d";
+ // 012 345 678 90
+ // 0 1
+ CharReader charReader(testString);
+ OsmlStreamParser parser(charReader, logger);
- assertCommand(reader, "a", 0, 2);
- assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE);
- assertCommand(reader, "b", 3, 5);
- assertData(reader, " ", 5, 6, WhitespaceMode::PRESERVE);
- assertCommand(reader, "c", 6, 8);
- assertData(reader, " ", 8, 9, WhitespaceMode::PRESERVE);
- assertCommand(reader, "d", 9, 11);
- assertEnd(reader, 11, 11);
+ assertCommand(parser, "a", 0, 2);
+ assertEmptyData(parser);
+ assertCommand(parser, "b", 3, 5);
+ assertEmptyData(parser);
+ assertCommand(parser, "c", 6, 8);
+ assertEmptyData(parser);
+ assertCommand(parser, "d", 9, 11);
+ assertEnd(parser, 11, 11);
}
TEST(OsmlStreamParser, fieldsWithSpaces)
{
- const char *testString = "\\a {\\b \\c} \n\n {\\d}";
- // 0123 456 789012 3 456 789
- // 0 1
- CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
-
- assertCommand(reader, "a", 0, 2);
- assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE);
- assertFieldStart(reader, false, 3, 4);
- assertCommand(reader, "b", 4, 6);
- assertData(reader, " ", 6, 7, WhitespaceMode::PRESERVE);
- assertCommand(reader, "c", 7, 9);
- assertFieldEnd(reader, 9, 10);
- assertData(reader, " \n\n {", 10, 12, WhitespaceMode::PRESERVE);
- assertFieldStart(reader, false, 16, 17);
- assertCommand(reader, "d", 17, 19);
- assertFieldEnd(reader, 19, 20);
- assertEnd(reader, 20, 20);
-}
-
-TEST(OsmlStreamParser, errorNoFieldToStart)
-{
- const char *testString = "\\a b {";
- // 012345
- // 0
- CharReader charReader(testString);
+ const char *testString = "\\a {\\b \\c} \n\n {\\d}";
+ // 0123 456 789012 3 456 789
+ // 0 1
+ CharReader charReader(testString);
+ OsmlStreamParser parser(charReader, logger);
- OsmlStreamParser reader(charReader, logger);
+ assertCommand(parser, "a", 0, 2);
+ assertEmptyData(parser);
+ assertFieldStart(parser, false, 3, 4);
+ assertCommand(parser, "b", 4, 6);
+ assertEmptyData(parser);
+ assertCommand(parser, "c", 7, 9);
+ assertFieldEnd(parser, 9, 10);
+ assertEmptyData(parser);
+ assertFieldStart(parser, false, 16, 17);
+ assertCommand(parser, "d", 17, 19);
+ assertFieldEnd(parser, 19, 20);
+ assertEnd(parser, 20, 20);
+}
- logger.reset();
- assertCommand(reader, "a", 0, 2);
- assertData(reader, "b", 3, 4);
- ASSERT_FALSE(logger.hasError());
- assertEnd(reader, 6, 6);
- ASSERT_TRUE(logger.hasError());
+TEST(OsmlStreamParser, errorEndButOpenField)
+{
+ const char *testString = "\\a b {";
+ // 012345
+ // 0
+ CharReader charReader(testString);
+
+ OsmlStreamParser parser(charReader, logger);
+
+ logger.reset();
+ assertCommand(parser, "a", 0, 2);
+ assertData(parser, "b", 3, 4);
+ assertFieldStart(parser, false, 5, 6);
+ ASSERT_FALSE(logger.hasError());
+ assertEnd(parser, 6, 6);
+ ASSERT_TRUE(logger.hasError());
}
+
TEST(OsmlStreamParser, errorNoFieldToEnd)
{
- const char *testString = "\\a b }";
- // 012345
- // 0
- CharReader charReader(testString);
+ const char *testString = "\\a b }";
+ // 012345
+ // 0
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- logger.reset();
- assertCommand(reader, "a", 0, 2);
- assertData(reader, "b", 3, 4);
- ASSERT_FALSE(logger.hasError());
- assertEnd(reader, 6, 6);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ assertCommand(parser, "a", 0, 2);
+ assertData(parser, "b", 3, 4);
+ ASSERT_FALSE(logger.hasError());
+ assertEnd(parser, 6, 6);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, errorNoFieldEndNested)
{
- const char *testString = "\\test{\\test2{}}}";
- // 012345 6789012345
- // 0 1
- CharReader charReader(testString);
+ const char *testString = "\\test{\\test2{}}}";
+ // 012345 6789012345
+ // 0 1
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- logger.reset();
- assertCommand(reader, "test", 0, 5);
- assertFieldStart(reader, false, 5, 6);
- assertCommand(reader, "test2", 6, 12);
- assertFieldStart(reader, false, 12, 13);
- assertFieldEnd(reader, 13, 14);
- assertFieldEnd(reader, 14, 15);
- ASSERT_FALSE(logger.hasError());
- assertEnd(reader, 16, 16);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ assertCommand(parser, "test", 0, 5);
+ assertFieldStart(parser, false, 5, 6);
+ assertCommand(parser, "test2", 6, 12);
+ assertFieldStart(parser, false, 12, 13);
+ assertFieldEnd(parser, 13, 14);
+ assertFieldEnd(parser, 14, 15);
+ ASSERT_FALSE(logger.hasError());
+ assertEnd(parser, 16, 16);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, errorNoFieldEndNestedData)
{
- const char *testString = "\\test{\\test2{}}a}";
- // 012345 67890123456
- // 0 1
- CharReader charReader(testString);
+ const char *testString = "\\test{\\test2{}}a}";
+ // 012345 67890123456
+ // 0 1
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- logger.reset();
- assertCommand(reader, "test", 0, 5);
- assertFieldStart(reader, false, 5, 6);
- assertCommand(reader, "test2", 6, 12);
- assertFieldStart(reader, false, 12, 13);
- assertFieldEnd(reader, 13, 14);
- assertFieldEnd(reader, 14, 15);
- assertData(reader, "a", 15, 16);
- ASSERT_FALSE(logger.hasError());
- assertEnd(reader, 17, 17);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ assertCommand(parser, "test", 0, 5);
+ assertFieldStart(parser, false, 5, 6);
+ assertCommand(parser, "test2", 6, 12);
+ assertFieldStart(parser, false, 12, 13);
+ assertFieldEnd(parser, 13, 14);
+ assertFieldEnd(parser, 14, 15);
+ assertData(parser, "a", 15, 16);
+ ASSERT_FALSE(logger.hasError());
+ assertEnd(parser, 17, 17);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, beginEnd)
{
- const char *testString = "\\begin{book}\\end{book}";
- // 012345678901 2345678901
- // 0 1 2
- CharReader charReader(testString);
+ const char *testString = "\\begin{book}\\end{book}";
+ // 012345678901 2345678901
+ // 0 1 2
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- assertCommand(reader, "book", 7, 11);
- assertFieldStart(reader, true, 12, 13);
- assertFieldEnd(reader, 17, 21);
- assertEnd(reader, 22, 22);
+ assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+ assertCommandEnd(parser, 17, 21);
+ assertEnd(parser, 22, 22);
}
TEST(OsmlStreamParser, beginEndWithName)
{
- const char *testString = "\\begin{book#a}\\end{book}";
- // 01234567890123 4567890123
- // 0 1 2
- CharReader charReader(testString);
+ const char *testString = "\\begin{book#a}\\end{book}";
+ // 01234567890123 4567890123
+ // 0 1 2
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- assertCommand(reader, "book", {{"name", "a"}}, 7, 11);
- assertFieldStart(reader, true, 14, 15);
- assertFieldEnd(reader, 19, 23);
- assertEnd(reader, 24, 24);
+ assertCommandStart(parser, "book", true, {{"name", "a"}}, 7, 11);
+ assertCommandEnd(parser, 19, 23);
+ assertEnd(parser, 24, 24);
}
TEST(OsmlStreamParser, beginEndWithNameAndArgs)
{
- const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}";
- // 0123456789012345678901234 56789 01 2345678901
- // 0 1 2 3 4
- CharReader charReader(testString);
+ const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}";
+ // 0123456789012345678901234 56789 01 2345678901
+ // 0 1 2 3 4
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- assertCommand(reader, "book",
- {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
- assertFieldStart(reader, true, 32, 33);
- assertFieldEnd(reader, 37, 41);
- assertEnd(reader, 42, 42);
+ assertCommandStart(parser, "book", true,
+ {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
+ assertCommandEnd(parser, 37, 41);
+ assertEnd(parser, 42, 42);
}
TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields)
{
- const char *testString =
- "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}";
- // 0123456789012345678901234 56789 01234 567890123 45678901 2345678901
- // 0 1 2 3 4 5 6
- CharReader charReader(testString);
-
- OsmlStreamParser reader(charReader, logger);
-
- assertCommand(reader, "book",
- {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
- assertFieldStart(reader, false, 32, 33);
- assertData(reader, "a", 33, 34);
- assertCommand(reader, "test", Variant::mapType{}, 35, 40);
- assertFieldEnd(reader, 40, 41);
- assertFieldStart(reader, false, 41, 42);
- assertData(reader, "b", 42, 43);
- assertCommand(reader, "test", Variant::mapType{}, 44, 49);
- assertFieldStart(reader, false, 49, 50);
- assertFieldEnd(reader, 50, 51);
- assertFieldEnd(reader, 51, 52);
- assertFieldStart(reader, true, 52, 53);
- assertFieldEnd(reader, 57, 61);
- assertEnd(reader, 62, 62);
+ const char *testString =
+ "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}";
+ // 0123456789012345678901234 56789 01234 567890123 45678901 2345678901
+ // 0 1 2 3 4 5 6
+ CharReader charReader(testString);
+
+ OsmlStreamParser parser(charReader, logger);
+
+ assertCommandStart(parser, "book", true,
+ {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
+ assertFieldStart(parser, false, 32, 33);
+ assertData(parser, "a", 33, 34);
+ assertCommand(parser, "test", 35, 40);
+ assertFieldEnd(parser, 40, 41);
+ assertFieldStart(parser, false, 41, 42);
+ assertData(parser, "b", 42, 43);
+ assertCommand(parser, "test", 44, 49);
+ assertFieldStart(parser, false, 49, 50);
+ assertFieldEnd(parser, 50, 51);
+ assertFieldEnd(parser, 51, 52);
+ assertCommandEnd(parser, 57, 61);
+ assertEnd(parser, 62, 62);
}
TEST(OsmlStreamParser, beginEndWithData)
{
- const char *testString = "\\begin{book}a\\end{book}";
- // 0123456789012 3456789012
- // 0 1 2
- CharReader charReader(testString);
+ const char *testString = "\\begin{book}a\\end{book}";
+ // 0123456789012 3456789012
+ // 0 1 2
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser parser(charReader, logger);
- assertCommand(reader, "book", 7, 11);
- assertFieldStart(reader, true, 12, 13);
- assertData(reader, "a", 12, 13);
- assertFieldEnd(reader, 18, 22);
- assertEnd(reader, 23, 23);
+ assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+ assertData(parser, "a", 12, 13);
+ assertCommandEnd(parser, 18, 22);
+ assertEnd(parser, 23, 23);
}
-
+/*
TEST(OsmlStreamParser, beginEndNested)
{
- const char *testString =
- "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}";
- // 012345678901234 5678901234567890 123456 7890123 4567890
- // 0 1 2 3 4 5
- CharReader charReader(testString);
-
- OsmlStreamParser reader(charReader, logger);
-
- assertCommand(reader, "a", 7, 8);
- assertFieldStart(reader, false, 9, 10);
- assertData(reader, "b", 10, 11);
- assertFieldEnd(reader, 11, 12);
- assertFieldStart(reader, true, 13, 14);
- assertData(reader, "c", 13, 14);
- assertCommand(reader, "d", 22, 23);
- assertFieldStart(reader, false, 24, 25);
- assertData(reader, "e", 25, 26);
- assertFieldEnd(reader, 26, 27);
- assertFieldStart(reader, false, 27, 28);
- assertData(reader, "f", 28, 29);
- assertFieldEnd(reader, 29, 30);
- assertFieldStart(reader, true, 31, 32);
- assertCommand(reader, "g", 31, 33);
- assertFieldStart(reader, false, 33, 34);
- assertData(reader, "h", 34, 35);
- assertFieldEnd(reader, 35, 36);
- assertFieldEnd(reader, 42, 43);
- assertFieldEnd(reader, 49, 50);
- assertEnd(reader, 51, 51);
+ const char *testString =
+ "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}";
+ // 012345678901234 5678901234567890 123456 7890123 4567890
+ // 0 1 2 3 4 5
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertCommand(reader, "a", 7, 8);
+ assertFieldStart(reader, false, 9, 10);
+ assertData(reader, "b", 10, 11);
+ assertFieldEnd(reader, 11, 12);
+ assertFieldStart(reader, true, 13, 14);
+ assertData(reader, "c", 13, 14);
+ assertCommand(reader, "d", 22, 23);
+ assertFieldStart(reader, false, 24, 25);
+ assertData(reader, "e", 25, 26);
+ assertFieldEnd(reader, 26, 27);
+ assertFieldStart(reader, false, 27, 28);
+ assertData(reader, "f", 28, 29);
+ assertFieldEnd(reader, 29, 30);
+ assertFieldStart(reader, true, 31, 32);
+ assertCommand(reader, "g", 31, 33);
+ assertFieldStart(reader, false, 33, 34);
+ assertData(reader, "h", 34, 35);
+ assertFieldEnd(reader, 35, 36);
+ assertFieldEnd(reader, 42, 43);
+ assertFieldEnd(reader, 49, 50);
+ assertEnd(reader, 51, 51);
}
TEST(OsmlStreamParser, beginEndWithCommand)
{
- const char *testString = "\\begin{book}\\a{test}\\end{book}";
- // 012345678901 23456789 0123456789
- // 0 1 2
- CharReader charReader(testString);
+ const char *testString = "\\begin{book}\\a{test}\\end{book}";
+ // 012345678901 23456789 0123456789
+ // 0 1 2
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertCommand(reader, "book", 7, 11);
- assertFieldStart(reader, true, 12, 13);
- assertCommand(reader, "a", 12, 14);
- assertFieldStart(reader, false, 14, 15);
- assertData(reader, "test", 15, 19);
- assertFieldEnd(reader, 19, 20);
- assertFieldEnd(reader, 25, 29);
- assertEnd(reader, 30, 30);
+ assertCommand(reader, "book", 7, 11);
+ assertFieldStart(reader, true, 12, 13);
+ assertCommand(reader, "a", 12, 14);
+ assertFieldStart(reader, false, 14, 15);
+ assertData(reader, "test", 15, 19);
+ assertFieldEnd(reader, 19, 20);
+ assertFieldEnd(reader, 25, 29);
+ assertEnd(reader, 30, 30);
}
TEST(OsmlStreamParser, errorBeginNoBraceOpen)
{
- const char *testString = "\\begin a";
- // 01234567
- CharReader charReader(testString);
+ const char *testString = "\\begin a";
+ // 01234567
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- assertData(reader, "a", 7, 8);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ assertData(reader, "a", 7, 8);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, errorBeginNoIdentifier)
{
- const char *testString = "\\begin{!";
- CharReader charReader(testString);
+ const char *testString = "\\begin{!";
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- ASSERT_THROW(reader.parse(), LoggableException);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ ASSERT_THROW(parser.parse(), LoggableException);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, errorBeginNoBraceClose)
{
- const char *testString = "\\begin{a";
- CharReader charReader(testString);
+ const char *testString = "\\begin{a";
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- ASSERT_THROW(reader.parse(), LoggableException);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ ASSERT_THROW(parser.parse(), LoggableException);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, errorBeginNoName)
{
- const char *testString = "\\begin{a#}";
- CharReader charReader(testString);
+ const char *testString = "\\begin{a#}";
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- assertCommand(reader, "a");
- ASSERT_TRUE(logger.hasError());
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- assertEnd(reader);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ assertCommand(reader, "a");
+ ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ assertEnd(reader);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, errorEndNoBraceOpen)
{
- const char *testString = "\\end a";
- // 012345
- CharReader charReader(testString);
+ const char *testString = "\\end a";
+ // 012345
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- assertData(reader, "a", 5, 6);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ assertData(reader, "a", 5, 6);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, errorEndNoIdentifier)
{
- const char *testString = "\\end{!";
- CharReader charReader(testString);
+ const char *testString = "\\end{!";
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- ASSERT_THROW(reader.parse(), LoggableException);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ ASSERT_THROW(parser.parse(), LoggableException);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, errorEndNoBraceClose)
{
- const char *testString = "\\end{a";
- CharReader charReader(testString);
+ const char *testString = "\\end{a";
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- ASSERT_THROW(reader.parse(), LoggableException);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ ASSERT_THROW(parser.parse(), LoggableException);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, errorEndNoBegin)
{
- const char *testString = "\\end{a}";
- CharReader charReader(testString);
+ const char *testString = "\\end{a}";
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- ASSERT_THROW(reader.parse(), LoggableException);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ ASSERT_THROW(parser.parse(), LoggableException);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, errorBeginEndMismatch)
{
- const char *testString = "\\begin{a} \\begin{b} test \\end{a}";
- // 0123456789 012345678901234 5678901
- // 0 1 2 3
- CharReader charReader(testString);
+ const char *testString = "\\begin{a} \\begin{b} test \\end{a}";
+ // 0123456789 012345678901234 5678901
+ // 0 1 2 3
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- assertCommand(reader, "a", 7, 8);
- assertFieldStart(reader, true, 10, 11);
- assertCommand(reader, "b", 17, 18);
- assertFieldStart(reader, true, 20, 24);
- assertData(reader, "test", 20, 24);
- ASSERT_FALSE(logger.hasError());
- ASSERT_THROW(reader.parse(), LoggableException);
- ASSERT_TRUE(logger.hasError());
+ logger.reset();
+ assertCommand(reader, "a", 7, 8);
+ assertFieldStart(reader, true, 10, 11);
+ assertCommand(reader, "b", 17, 18);
+ assertFieldStart(reader, true, 20, 24);
+ assertData(reader, "test", 20, 24);
+ ASSERT_FALSE(logger.hasError());
+ ASSERT_THROW(parser.parse(), LoggableException);
+ ASSERT_TRUE(logger.hasError());
}
TEST(OsmlStreamParser, commandWithNSSep)
{
- const char *testString = "\\test1:test2";
- // 012345678901
- CharReader charReader(testString);
+ const char *testString = "\\test1:test2";
+ // 012345678901
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertCommand(reader, "test1:test2", 0, 12);
- assertEnd(reader, 12, 12);
+ assertCommand(reader, "test1:test2", 0, 12);
+ assertEnd(reader, 12, 12);
}
TEST(OsmlStreamParser, beginEndWithNSSep)
{
- const char *testString = "\\begin{test1:test2}\\end{test1:test2}";
- // 0123456789012345678 90123456789012345
- // 0 1 2 3
- CharReader charReader(testString);
+ const char *testString = "\\begin{test1:test2}\\end{test1:test2}";
+ // 0123456789012345678 90123456789012345
+ // 0 1 2 3
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertCommand(reader, "test1:test2", 7, 18);
- assertFieldStart(reader, true, 19, 20);
- assertFieldEnd(reader, 24, 35);
- assertEnd(reader, 36, 36);
+ assertCommand(reader, "test1:test2", 7, 18);
+ assertFieldStart(reader, true, 19, 20);
+ assertFieldEnd(reader, 24, 35);
+ assertEnd(reader, 36, 36);
}
TEST(OsmlStreamParser, errorBeginNSSep)
{
- const char *testString = "\\begin:test{blub}\\end{blub}";
- CharReader charReader(testString);
+ const char *testString = "\\begin:test{blub}\\end{blub}";
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- assertCommand(reader, "blub");
- ASSERT_TRUE(logger.hasError());
- assertFieldStart(reader, true);
- assertFieldEnd(reader);
- assertEnd(reader);
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ assertCommand(reader, "blub");
+ ASSERT_TRUE(logger.hasError());
+ assertFieldStart(reader, true);
+ assertFieldEnd(reader);
+ assertEnd(reader);
}
TEST(OsmlStreamParser, errorEndNSSep)
{
- const char *testString = "\\begin{blub}\\end:test{blub}";
- CharReader charReader(testString);
+ const char *testString = "\\begin{blub}\\end:test{blub}";
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- assertCommand(reader, "blub");
- assertFieldStart(reader, true);
- ASSERT_FALSE(logger.hasError());
- assertFieldEnd(reader);
- ASSERT_TRUE(logger.hasError());
- assertEnd(reader);
+ logger.reset();
+ assertCommand(reader, "blub");
+ assertFieldStart(reader, true);
+ ASSERT_FALSE(logger.hasError());
+ assertFieldEnd(reader);
+ ASSERT_TRUE(logger.hasError());
+ assertEnd(reader);
}
TEST(OsmlStreamParser, errorEmptyNs)
{
- const char *testString = "\\test:";
- CharReader charReader(testString);
+ const char *testString = "\\test:";
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- assertCommand(reader, "test");
- ASSERT_TRUE(logger.hasError());
- assertData(reader, ":");
- assertEnd(reader);
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ assertCommand(reader, "test");
+ ASSERT_TRUE(logger.hasError());
+ assertData(reader, ":");
+ assertEnd(reader);
}
TEST(OsmlStreamParser, errorRepeatedNs)
{
- const char *testString = "\\test::";
- CharReader charReader(testString);
+ const char *testString = "\\test::";
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- assertCommand(reader, "test");
- ASSERT_TRUE(logger.hasError());
- assertData(reader, "::");
- assertEnd(reader);
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ assertCommand(reader, "test");
+ ASSERT_TRUE(logger.hasError());
+ assertData(reader, "::");
+ assertEnd(reader);
}
TEST(OsmlStreamParser, explicitDefaultField)
{
- const char *testString = "\\a{!b}c";
- // 01234567
- CharReader charReader(testString);
+ const char *testString = "\\a{!b}c";
+ // 01234567
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertCommand(reader, "a", 0, 2);
- assertFieldStart(reader, true, 2, 4);
- assertData(reader, "b", 4, 5);
- assertFieldEnd(reader, 5, 6);
- assertData(reader, "c", 6, 7);
- assertEnd(reader, 7, 7);
+ assertCommand(reader, "a", 0, 2);
+ assertFieldStart(reader, true, 2, 4);
+ assertData(reader, "b", 4, 5);
+ assertFieldEnd(reader, 5, 6);
+ assertData(reader, "c", 6, 7);
+ assertEnd(reader, 7, 7);
}
TEST(OsmlStreamParser, explicitDefaultFieldWithCommand)
{
- const char *testString = "\\a{!\\b}c";
- // 0123 4567
- CharReader charReader(testString);
+ const char *testString = "\\a{!\\b}c";
+ // 0123 4567
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertCommand(reader, "a", 0, 2);
- assertFieldStart(reader, true, 2, 4);
- assertCommand(reader, "b", 4, 6);
- assertFieldEnd(reader, 6, 7);
- assertData(reader, "c", 7, 8);
- assertEnd(reader, 8, 8);
+ assertCommand(reader, "a", 0, 2);
+ assertFieldStart(reader, true, 2, 4);
+ assertCommand(reader, "b", 4, 6);
+ assertFieldEnd(reader, 6, 7);
+ assertData(reader, "c", 7, 8);
+ assertEnd(reader, 8, 8);
}
TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField)
{
- const char *testString = "\\a{!\\b}{c}";
- // 0123 456789
- CharReader charReader(testString);
+ const char *testString = "\\a{!\\b}{c}";
+ // 0123 456789
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- assertCommand(reader, "a", 0, 2);
- assertFieldStart(reader, true, 2, 4);
- assertCommand(reader, "b", 4, 6);
- assertFieldEnd(reader, 6, 7);
- ASSERT_FALSE(logger.hasError());
- assertData(reader, "c", 8, 9);
- ASSERT_TRUE(logger.hasError());
- assertEnd(reader, 10, 10);
+ logger.reset();
+ assertCommand(reader, "a", 0, 2);
+ assertFieldStart(reader, true, 2, 4);
+ assertCommand(reader, "b", 4, 6);
+ assertFieldEnd(reader, 6, 7);
+ ASSERT_FALSE(logger.hasError());
+ assertData(reader, "c", 8, 9);
+ ASSERT_TRUE(logger.hasError());
+ assertEnd(reader, 10, 10);
}
TEST(OsmlStreamParser, annotationStart)
{
- const char *testString = "<\\a";
- // 0 12
+ const char *testString = "<\\a";
+ // 0 12
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
- assertEnd(reader, 3, 3);
+ assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
+ assertEnd(reader, 3, 3);
}
TEST(OsmlStreamParser, annotationStartWithName)
{
- const char *testString = "<\\annotationWithName#aName";
- // 0 1234567890123456789012345
- // 0 1 2
+ const char *testString = "<\\annotationWithName#aName";
+ // 0 1234567890123456789012345
+ // 0 1 2
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertAnnotationStart(reader, "annotationWithName",
- Variant::mapType{{"name", "aName"}}, 0, 20);
- assertEnd(reader, 26, 26);
+ assertAnnotationStart(reader, "annotationWithName",
+ Variant::mapType{{"name", "aName"}}, 0, 20);
+ assertEnd(reader, 26, 26);
}
TEST(OsmlStreamParser, annotationStartWithArguments)
{
- const char *testString = "<\\annotationWithName#aName[a=1,b=2]";
- // 0 1234567890123456789012345678901234
- // 0 1 2 3
+ const char *testString = "<\\annotationWithName#aName[a=1,b=2]";
+ // 0 1234567890123456789012345678901234
+ // 0 1 2 3
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertAnnotationStart(
- reader, "annotationWithName",
- Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20);
- assertEnd(reader, 35, 35);
+ assertAnnotationStart(
+ reader, "annotationWithName",
+ Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20);
+ assertEnd(reader, 35, 35);
}
TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd)
{
- const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>";
- // 0 123456789012345678901234567 89012345 67
- // 0 1 2 3
+ const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>";
+ // 0 123456789012345678901234567 89012345 67
+ // 0 1 2 3
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertAnnotationStart(
- reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
- 10);
- assertFieldStart(reader, true, 26, 27);
- assertData(reader, "a", 26, 27);
- assertFieldEnd(reader, 33, 35);
- assertAnnotationEnd(reader, "", "", 36, 38);
- assertEnd(reader, 38, 38);
+ assertAnnotationStart(
+ reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
+ 10);
+ assertFieldStart(reader, true, 26, 27);
+ assertData(reader, "a", 26, 27);
+ assertFieldEnd(reader, 33, 35);
+ assertAnnotationEnd(reader, "", "", 36, 38);
+ assertEnd(reader, 38, 38);
}
TEST(OsmlStreamParser, annotationEnd)
{
- const char *testString = "\\a>";
- // 012
+ const char *testString = "\\a>";
+ // 012
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertAnnotationEnd(reader, "a", "", 0, 2);
- assertEnd(reader, 3, 3);
+ assertAnnotationEnd(reader, "a", "", 0, 2);
+ assertEnd(reader, 3, 3);
}
TEST(OsmlStreamParser, annotationEndWithName)
{
- const char *testString = "\\a#name>";
- // 01234567
+ const char *testString = "\\a#name>";
+ // 01234567
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertAnnotationEnd(reader, "a", "name", 0, 2);
- assertEnd(reader, 8, 8);
+ assertAnnotationEnd(reader, "a", "name", 0, 2);
+ assertEnd(reader, 8, 8);
}
TEST(OsmlStreamParser, annotationEndWithNameAsArgs)
{
- const char *testString = "\\a[name=name]>";
- // 01234567890123
+ const char *testString = "\\a[name=name]>";
+ // 01234567890123
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertAnnotationEnd(reader, "a", "name", 0, 2);
- assertEnd(reader, 14, 14);
+ assertAnnotationEnd(reader, "a", "name", 0, 2);
+ assertEnd(reader, 14, 14);
}
TEST(OsmlStreamParser, errorAnnotationEndWithArguments)
{
- const char *testString = "\\a[foo=bar]>";
- // 012345678901
- // 0 1
+ const char *testString = "\\a[foo=bar]>";
+ // 012345678901
+ // 0 1
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- logger.reset();
- ASSERT_FALSE(logger.hasError());
- assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2);
- ASSERT_TRUE(logger.hasError());
- assertData(reader, ">", 11, 12);
- assertEnd(reader, 12, 12);
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2);
+ ASSERT_TRUE(logger.hasError());
+ assertData(reader, ">", 11, 12);
+ assertEnd(reader, 12, 12);
}
TEST(OsmlStreamParser, closingAnnotation)
{
- const char *testString = "<\\a>";
- // 0 123
+ const char *testString = "<\\a>";
+ // 0 123
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
- assertData(reader, ">", 3, 4);
- assertEnd(reader, 4, 4);
+ assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
+ assertData(reader, ">", 3, 4);
+ assertEnd(reader, 4, 4);
}
TEST(OsmlStreamParser, annotationWithFields)
{
- const char *testString = "a <\\b{c}{d}{!e} f \\> g";
- // 012 345678901234567 8901
- // 0 1 2
+ const char *testString = "a <\\b{c}{d}{!e} f \\> g";
+ // 012 345678901234567 8901
+ // 0 1 2
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
-
- assertData(reader, "a", 0, 1);
- assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5);
- assertFieldStart(reader, false, 5, 6);
- assertData(reader, "c", 6, 7);
- assertFieldEnd(reader, 7, 8);
- assertFieldStart(reader, false, 8, 9);
- assertData(reader, "d", 9, 10);
- assertFieldEnd(reader, 10, 11);
- assertFieldStart(reader, true, 11, 13);
- assertData(reader, "e", 13, 14);
- assertFieldEnd(reader, 14, 15);
- assertData(reader, "f", 16, 17);
- assertAnnotationEnd(reader, "", "", 18, 20);
- assertData(reader, "g", 21, 22);
- assertEnd(reader, 22, 22);
+ OsmlStreamParser reader(charReader, logger);
+
+ assertData(reader, "a", 0, 1);
+ assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5);
+ assertFieldStart(reader, false, 5, 6);
+ assertData(reader, "c", 6, 7);
+ assertFieldEnd(reader, 7, 8);
+ assertFieldStart(reader, false, 8, 9);
+ assertData(reader, "d", 9, 10);
+ assertFieldEnd(reader, 10, 11);
+ assertFieldStart(reader, true, 11, 13);
+ assertData(reader, "e", 13, 14);
+ assertFieldEnd(reader, 14, 15);
+ assertData(reader, "f", 16, 17);
+ assertAnnotationEnd(reader, "", "", 18, 20);
+ assertData(reader, "g", 21, 22);
+ assertEnd(reader, 22, 22);
}
TEST(OsmlStreamParser, annotationStartEscape)
{
- const char *testString = "<\\%test";
- // 0 123456
- // 0
+ const char *testString = "<\\%test";
+ // 0 123456
+ // 0
- CharReader charReader(testString);
+ CharReader charReader(testString);
- OsmlStreamParser reader(charReader, logger);
+ OsmlStreamParser reader(charReader, logger);
- assertData(reader, "<%test", 0, 7);
- assertEnd(reader, 7, 7);
+ assertData(reader, "<%test", 0, 7);
+ assertEnd(reader, 7, 7);
}
+*/
}