diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-27 18:52:43 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-27 18:52:43 +0100 |
commit | 12e10d18810b7ea4ce142d76e846b4faf0c33488 (patch) | |
tree | e0e5fb04d0e24033f5c884f7821866ffde5d0fba | |
parent | 19dd5946125e90dcbd61966896c9f6cfc4451d80 (diff) |
Made OsmlStreamParser ready for user defined tokens, started to adapt unit tests.
-rw-r--r-- | CMakeLists.txt | 32 | ||||
-rw-r--r-- | src/formats/osml/OsmlStreamParser.cpp | 701 | ||||
-rw-r--r-- | src/formats/osml/OsmlStreamParser.hpp | 298 | ||||
-rw-r--r-- | test/formats/osml/OsmlStreamParserTest.cpp | 1542 |
4 files changed, 1355 insertions, 1218 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 75909e9..4e2d7f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -212,14 +212,14 @@ ADD_LIBRARY(ousia_core # ousia_core #) -#ADD_LIBRARY(ousia_osml +ADD_LIBRARY(ousia_osml # src/formats/osml/OsmlParser -# src/formats/osml/OsmlStreamParser -#) + src/formats/osml/OsmlStreamParser +) -#TARGET_LINK_LIBRARIES(ousia_osml -# ousia_core -#) +TARGET_LINK_LIBRARIES(ousia_osml + ousia_core +) ADD_LIBRARY(ousia_osxml src/formats/osxml/OsxmlAttributeLocator @@ -383,17 +383,17 @@ IF(TEST) # ousia_mozjs # ) -# ADD_EXECUTABLE(ousia_test_osml + ADD_EXECUTABLE(ousia_test_osml # test/formats/osml/OsmlParserTest -# test/formats/osml/OsmlStreamParserTest -# ) + test/formats/osml/OsmlStreamParserTest + ) -# TARGET_LINK_LIBRARIES(ousia_test_osml -# ${GTEST_LIBRARIES} -# ousia_core -# ousia_osml -# ousia_filesystem -# ) + TARGET_LINK_LIBRARIES(ousia_test_osml + ${GTEST_LIBRARIES} + ousia_core + ousia_osml + ousia_filesystem + ) # ADD_EXECUTABLE(ousia_test_osxml # test/formats/osxml/OsxmlEventParserTest @@ -423,7 +423,7 @@ IF(TEST) ADD_TEST(ousia_test_filesystem ousia_test_filesystem) ADD_TEST(ousia_test_html ousia_test_html) # ADD_TEST(ousia_test_mozjs ousia_test_mozjs) -# ADD_TEST(ousia_test_osml ousia_test_osml) + ADD_TEST(ousia_test_osml ousia_test_osml) # ADD_TEST(ousia_test_osxml ousia_test_osxml) ADD_TEST(ousia_test_xml ousia_test_xml) ENDIF() diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp index d4cdbf8..7e01a3c 100644 --- a/src/formats/osml/OsmlStreamParser.cpp +++ b/src/formats/osml/OsmlStreamParser.cpp @@ -19,92 +19,411 @@ #include <core/common/CharReader.hpp> #include <core/common/Logger.hpp> #include <core/common/Utils.hpp> +#include <core/common/Variant.hpp> #include <core/common/VariantReader.hpp> +#include <core/parser/utils/Tokenizer.hpp> +#include <core/parser/utils/TokenizedData.hpp> + #include "OsmlStreamParser.hpp" +#include <stack> +#include <vector> + namespace ousia { +namespace { /** - * Plain format default tokenizer. + * Osml format default tokenizer. Registers the primary tokens in its + * constructor. A single, static instance of this class is created as + * "OsmlTokens", which is copied to the Tokenizer instance of + * OsmlStreamParserImpl. */ -class PlainFormatTokens : public Tokenizer { +class OsmlFormatTokens : public Tokenizer { public: + TokenId Backslash; + TokenId LineComment; + TokenId BlockCommentStart; + TokenId BlockCommentEnd; + TokenId FieldStart; + TokenId FieldEnd; + TokenId DefaultFieldStart; + TokenId AnnotationStart; + TokenId AnnotationEnd; + /** - * Id of the backslash token. + * Registers the plain format tokens in the internal tokenizer. */ - TokenId Backslash; + OsmlFormatTokens() + { + Backslash = registerToken("\\"); + LineComment = registerToken("%"); + BlockCommentStart = registerToken("%{"); + BlockCommentEnd = registerToken("}%"); + FieldStart = registerToken("{"); + FieldEnd = registerToken("}"); + DefaultFieldStart = registerToken("{!"); + AnnotationStart = registerToken("<\\"); + AnnotationEnd = registerToken("\\>"); + } +}; + +/** + * Instance of OsmlFormatTokens used to initialize the internal tokenizer + * instance of OsmlStreamParserImpl. + */ +static const OsmlFormatTokens OsmlTokens; +/** + * Structure representing a field. + */ +struct Field { /** - * Id of the line comment token. + * Specifies whether this field was marked as default field. */ - TokenId LineComment; + bool defaultField; /** - * Id of the block comment start token. + * Location at which the field was started. */ - TokenId BlockCommentStart; + SourceLocation location; /** - * Id of the block comment end token. + * Constructor of the Field structure, initializes all member variables with + * the given values. + * + * @param defaultField is a flag specifying whether this field is a default + * field. + * @param location specifies the location at which the field was started. */ - TokenId BlockCommentEnd; + Field(bool defaultField = false, + const SourceLocation &location = SourceLocation{}) + : defaultField(defaultField), location(location) + { + } +}; +/** + * Entry used for the command stack. + */ +class Command { +private: /** - * Id of the field start token. + * Name and location of the current command. */ - TokenId FieldStart; + Variant name; /** - * Id of the field end token. + * Arguments that were passed to the command. */ - TokenId FieldEnd; + Variant arguments; /** - * Id of the default field start token. + * Vector used as stack for holding the number of opening/closing braces + * and the corresponding "isDefaultField" flag. */ - TokenId DefaultFieldStart; + std::vector<Field> fields; /** - * Id of the annotation start token. + * Set to true if this is a command with clear begin and end. */ - TokenId AnnotationStart; + bool hasRange; +public: /** - * Id of the annotation end token. + * Default constructor, marks this command as normal, non-range command. */ - TokenId AnnotationEnd; + Command() : hasRange(false) {} /** - * Registers the plain format tokens in the internal tokenizer. + * Constructor of the Command class. + * + * @param name is a string variant with name and location of the + * command. + * @param arguments is a map variant with the arguments given to the + * command. + * @param hasRange should be set to true if this is a command with + * explicit range. */ - PlainFormatTokens() + Command(Variant name, Variant arguments, bool hasRange) + : name(std::move(name)), + arguments(std::move(arguments)), + hasRange(hasRange) { - Backslash = registerToken("\\"); - LineComment = registerToken("%"); - BlockCommentStart = registerToken("%{"); - BlockCommentEnd = registerToken("}%"); - FieldStart = registerToken("{"); - FieldEnd = registerToken("}"); - DefaultFieldStart = registerToken("{!"); - AnnotationStart = registerToken("<\\"); - AnnotationEnd = registerToken("\\>"); + } + + /** + * Returns a reference at the variant representing name and location of the + * command. + * + * @return a variant containing name and location of the command. + */ + const Variant &getName() const { return name; } + + /** + * Returns a reference at the variant containing name, value and location of + * the arguments. + * + * @return the arguments stored for the command. + */ + const Variant &getArguments() const { return arguments; } + + /** + * Returns a reference at the internal field list. This list should be used + * for printing error messages when fields are still open although the outer + * range field closes. + * + * @return a const reference at the internal field vector. + */ + const std::vector<Field> &getFields() const { return fields; } + + /** + * Returns true if this command is currently in a default field. + * + * @return true if the current field on the field stack was explicitly + * marked as default field. If the field stack is empty, true is returned + * if this is a range command. + */ + bool inDefaultField() const + { + return (!fields.empty() && fields.back().defaultField) || + (fields.empty() && hasRange); + } + + /** + * Returns true if this command currently is in any field. + * + * @return true if a field is on the stack or this is a range commands. + * Range commands always are in a field. + */ + bool inField() const { return !fields.empty() || hasRange; } + + /** + * Returns true if this command currently is in a range field. + * + * @return true if the command has a range and no other ranges are on the + * stack. + */ + bool inRangeField() const { return fields.empty() && hasRange; } + + /** + * Returns true if this command currently is in a non-range field. + * + * @return true if the command is in a field, but the field is not the field + * constructed by the "range" + */ + bool inNonRangeField() const { return !fields.empty(); } + + /** + * Pushes another field onto the field stack of this command. + * + * @param defaultField if true, explicitly marks this field as default + * field. + * @param location is the source location at which the field was started. + * Used for error messages in which the user is notified about an error with + * too few closing fields. + */ + void pushField(bool defaultField = false, + const SourceLocation &location = SourceLocation{}) + { + fields.emplace_back(defaultField, location); + } + + /** + * Removes another field from the field stack of this command, returns true + * if the operation was successful. + * + * @return true if there was a field to pop on the stack, false otherwise. + */ + bool popField() + { + if (!fields.empty()) { + fields.pop_back(); + return true; + } + return false; } }; +} -static const PlainFormatTokens OsmlTokens; +/* Class OsmlStreamParserImpl */ -OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger) - : reader(reader), - logger(logger), - tokenizer(OsmlTokens), - data(reader.getSourceId()) +/** + * Internal implementation of OsmlStreamParser. + */ +class OsmlStreamParserImpl { +public: + /** + * State enum compatible with OsmlStreamParserState but extended by two more + * entries (END and NONE). + */ + enum class State : uint8_t { + COMMAND_START = 0, + COMMAND_END = 1, + FIELD_START = 2, + FIELD_END = 3, + ANNOTATION_START = 4, + ANNOTATION_END = 5, + DATA = 6, + END = 7, + RECOVERABLE_ERROR = 8, + IRRECOVERABLE_ERROR = 9 + }; + +private: + /** + * Reference to the CharReader instance from which the incomming bytes are + * read. + */ + CharReader &reader; + + /** + * Reference at the logger instance to which all error messages are sent. + */ + Logger &logger; + + /** + * Tokenizer instance used to read individual tokens from the text. + */ + Tokenizer tokenizer; + + /** + * Stack containing the current commands. + */ + std::stack<Command> commands; + + /** + * Variant containing the tokenized data that was returned from the + * tokenizer as data. + */ + TokenizedData data; + + /** + * Variable containing the current location of the parser. + */ + SourceLocation location; + + /** + * Function used internally to parse an identifier. + * + * @param start is the start byte offset of the identifier (including the + * backslash). + * @param allowNSSep should be set to true if the namespace separator is + * allowed in the identifier name. Issues error if the namespace separator + * is placed incorrectly. + */ + Variant parseIdentifier(size_t start, bool allowNSSep = false); + + /** + * Function used internally to handle the special "\begin" command. + * + * @return an internal State specifying whether an error occured (return + * values State::REOVERABLE_ERROR or State::IRRECOVERABLE_ERROR) or a + * command was actually started (return value State::COMMAND_START). + */ + State parseBeginCommand(); + + /** + * Function used internally to handle the special "\end" command. + * + * @return an internal State specifying whether an error occured (return + * values State::REOVERABLE_ERROR or State::IRRECOVERABLE_ERROR) or a + * command was actually ended (return value State::COMMAND_END). + */ + State parseEndCommand(); + + /** + * Parses the command arguments. Handles errors if the name of the command + * was given using the hash notation and as a name field. + * + * @param commandArgName is the name argument that was given using the hash + * notation. + * @return a map variant containing the arguments. + */ + Variant parseCommandArguments(Variant commandArgName); + + /** + * Function used internally to parse a command. + * + * @param start is the start byte offset of the command (including the + * backslash) + * @param isAnnotation if true, the command is not returned as command, but + * as annotation start. + * @return true if a command was actuall parsed, false otherwise. + */ + State parseCommand(size_t start, bool isAnnotation); + + /** + * Function used internally to parse a block comment. + */ + void parseBlockComment(); + + /** + * Function used internally to parse a generic comment. + */ + void parseLineComment(); + + /** + * Pushes the parsed command onto the command stack. + */ + void pushCommand(Variant commandName, Variant commandArguments, + bool hasRange); + + /** + * Checks whether there is any data pending to be issued, if yes, resets the + * currently peeked characters and returns true. + * + * @return true if there was any data and DATA should be returned by the + * parse function, false otherwise. + */ + bool checkIssueData(); + + /** + * Returns a reference at the current command at the top of the command + * stack. + * + * @return a reference at the top command in the command stack. + */ + Command &cmd() { return commands.top(); } + + /** + * Returns a reference at the current command at the top of the command + * stack. + * + * @return a reference at the top command in the command stack. + */ + const Command &cmd() const { return commands.top(); } + +public: + /** + * Constructor of the OsmlStreamParserImpl class. Attaches the new + * OsmlStreamParserImpl to the given CharReader and Logger instances. + * + * @param reader is the reader instance from which incomming characters + * should be read. + * @param logger is the logger instance to which errors should be written. + */ + OsmlStreamParserImpl(CharReader &reader, Logger &logger); + + State parse(); + + const TokenizedData &getData() const { return data; } + const Variant &getCommandName() const { return cmd().getName(); } + const Variant &getCommandArguments() const { return cmd().getArguments(); } + const SourceLocation &getLocation() const { return location; } + bool inRangeCommand() const { return cmd().inRangeField(); }; + bool inDefaultField() const { return cmd().inDefaultField(); } +}; + +/* Class OsmlStreamParserImpl */ + +OsmlStreamParserImpl::OsmlStreamParserImpl(CharReader &reader, Logger &logger) + : reader(reader), logger(logger), tokenizer(OsmlTokens) { - // Place an intial command representing the complete file on the stack - commands.push(Command{"", Variant::mapType{}, true, true, true, false}); + commands.emplace("", Variant::mapType{}, true); } -Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep) +Variant OsmlStreamParserImpl::parseIdentifier(size_t start, bool allowNSSep) { bool first = true; bool hasCharSinceNSSep = false; @@ -147,20 +466,20 @@ Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep) return res; } -OsmlStreamParser::State OsmlStreamParser::parseBeginCommand() +OsmlStreamParserImpl::State OsmlStreamParserImpl::parseBeginCommand() { // Expect a '{' after the command reader.consumeWhitespace(); if (!reader.expect('{')) { logger.error("Expected \"{\" after \\begin", reader); - return State::NONE; + return State::RECOVERABLE_ERROR; } // Parse the name of the command that should be opened Variant commandName = parseIdentifier(reader.getOffset(), true); if (commandName.asString().empty()) { logger.error("Expected identifier", commandName); - return State::ERROR; + return State::IRRECOVERABLE_ERROR; } // Check whether the next character is a '#', indicating the start of the @@ -176,7 +495,7 @@ OsmlStreamParser::State OsmlStreamParser::parseBeginCommand() if (!reader.expect('}')) { logger.error("Expected \"}\"", reader); - return State::ERROR; + return State::IRRECOVERABLE_ERROR; } // Parse the arguments @@ -185,28 +504,15 @@ OsmlStreamParser::State OsmlStreamParser::parseBeginCommand() // Push the command onto the command stack pushCommand(std::move(commandName), std::move(commandArguments), true); - return State::COMMAND; -} - -static bool checkStillInField(const OsmlStreamParser::Command &cmd, - const Variant &endName, Logger &logger) -{ - if (cmd.inField && !cmd.inRangeField) { - logger.error(std::string("\\end in open field of command \"") + - cmd.name.asString() + std::string("\""), - endName); - logger.note(std::string("Open command started here:"), cmd.name); - return true; - } - return false; + return State::COMMAND_START; } -OsmlStreamParser::State OsmlStreamParser::parseEndCommand() +OsmlStreamParserImpl::State OsmlStreamParserImpl::parseEndCommand() { // Expect a '{' after the command if (!reader.expect('{')) { logger.error("Expected \"{\" after \\end", reader); - return State::NONE; + return State::RECOVERABLE_ERROR; } // Fetch the name of the command that should be ended here @@ -215,56 +521,58 @@ OsmlStreamParser::State OsmlStreamParser::parseEndCommand() // Make sure the given command name is not empty if (name.asString().empty()) { logger.error("Expected identifier", name); - return State::ERROR; + return State::IRRECOVERABLE_ERROR; } // Make sure the command name is terminated with a '}' if (!reader.expect('}')) { logger.error("Expected \"}\"", reader); - return State::ERROR; + return State::IRRECOVERABLE_ERROR; } - // Unroll the command stack up to the last range command - while (!commands.top().hasRange) { - if (checkStillInField(commands.top(), name, logger)) { - return State::ERROR; + // Unroll the command stack up to the last range command, make sure we do + // not intersect with any open field + while (!cmd().inRangeField()) { + if (cmd().inField()) { + logger.error(std::string("\\end in open field of command \"") + + cmd().getName().asString() + std::string("\""), + name); + const std::vector<Field> &fields = cmd().getFields(); + for (const Field &field : fields) { + logger.note(std::string("Still open field started here: "), + field.location); + } + return State::IRRECOVERABLE_ERROR; } commands.pop(); } - // Make sure we're not in an open field of this command - if (checkStillInField(commands.top(), name, logger)) { - return State::ERROR; - } - // Special error message if the top-level command is reached if (commands.size() == 1) { logger.error(std::string("Cannot end command \"") + name.asString() + std::string("\" here, no command open"), name); - return State::ERROR; + return State::IRRECOVERABLE_ERROR; } - // Inform the about command mismatches - const Command &cmd = commands.top(); - if (commands.top().name.asString() != name.asString()) { - logger.error(std::string("Trying to end command \"") + - cmd.name.asString() + + // Inform the user about command mismatches, copy the current command + // descriptor before popping it from the stack + if (getCommandName().asString() != name.asString()) { + logger.error(std::string("Trying to end command \"") + name.asString() + std::string("\", but open command is \"") + - name.asString() + std::string("\""), + getCommandName().asString() + std::string("\""), name); - logger.note("Last command was opened here:", cmd.name); - return State::ERROR; + logger.note("Open command started here:", getCommandName()); + return State::IRRECOVERABLE_ERROR; } - // Set the location to the location of the command that was ended, then end - // the current command + // End the current command location = name.getLocation(); commands.pop(); - return cmd.inRangeField ? State::FIELD_END : State::NONE; + return State::COMMAND_END; } -Variant OsmlStreamParser::parseCommandArguments(Variant commandArgName) +Variant OsmlStreamParserImpl::parseCommandArguments(Variant commandArgName) { // Parse the arguments using the universal VariantReader Variant commandArguments; @@ -290,29 +598,14 @@ Variant OsmlStreamParser::parseCommandArguments(Variant commandArgName) return commandArguments; } -void OsmlStreamParser::pushCommand(Variant commandName, - Variant commandArguments, bool hasRange) -{ - // Store the location on the stack - location = commandName.getLocation(); - - // Place the command on the command stack, remove the last commands if we're - // not currently inside a field of these commands - while (!commands.top().inField) { - commands.pop(); - } - commands.push(Command{std::move(commandName), std::move(commandArguments), - hasRange, false, false, false}); -} - -OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start, - bool isAnnotation) +OsmlStreamParserImpl::State OsmlStreamParserImpl::parseCommand( + size_t start, bool isAnnotation) { // Parse the commandName as a first identifier Variant commandName = parseIdentifier(start, true); if (commandName.asString().empty()) { logger.error("Empty command name", reader); - return State::NONE; + return State::RECOVERABLE_ERROR; } // Handle the special "begin" and "end" commands @@ -322,7 +615,7 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start, const bool isEnd = commandNameComponents[0] == "end"; // Parse the begin or end command - State res = State::COMMAND; + State res = State::COMMAND_START; if (isBegin || isEnd) { if (commandNameComponents.size() > 1) { logger.error( @@ -378,12 +671,13 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start, } else { // Make sure no arguments apart from the "name" argument are given // to an annotation end - Variant::mapType &map = commands.top().arguments.asMap(); + const Variant::mapType &map = getCommandArguments().asMap(); if (!map.empty()) { if (map.count("name") == 0 || map.size() > 1U) { logger.error( "An annotation end command may not have any arguments " - "other than \"name\""); + "other than \"name\"", + reader); return res; } } @@ -397,13 +691,13 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start, // If we're starting an annotation, return the command as annotation start // instead of command - if (isAnnotation && res == State::COMMAND) { + if (isAnnotation && res == State::COMMAND_START) { return State::ANNOTATION_START; } return res; } -void OsmlStreamParser::parseBlockComment() +void OsmlStreamParserImpl::parseBlockComment() { Token token; size_t depth = 1; @@ -426,7 +720,7 @@ void OsmlStreamParser::parseBlockComment() logger.error("File ended while being in a block comment", reader); } -void OsmlStreamParser::parseLineComment() +void OsmlStreamParserImpl::parseLineComment() { char c; while (reader.read(c)) { @@ -436,65 +730,34 @@ void OsmlStreamParser::parseLineComment() } } -bool OsmlStreamParser::checkIssueData() -{ - if (!data.empty()) { - location = data.getLocation(); - reader.resetPeek(); - return true; - } - return false; -} - -bool OsmlStreamParser::checkIssueFieldStart() +void OsmlStreamParserImpl::pushCommand(Variant commandName, + Variant commandArguments, bool hasRange) { - // Fetch the current command, and check whether we're currently inside a - // field of this command - Command &cmd = commands.top(); - if (!cmd.inField) { - // If this is a range command, we're now implicitly inside the field of - // this command -- we'll have to issue a field start command! - if (cmd.hasRange) { - cmd.inField = true; - cmd.inRangeField = true; - reader.resetPeek(); - return true; - } + // Store the location of the command + location = commandName.getLocation(); - // This was not a range command, so obviously we're now inside within - // a field of some command -- so unroll the commands stack until a - // command with open field is reached - while (!commands.top().inField) { - commands.pop(); - } + // Place the command on the command stack, remove the last commands if we're + // not currently inside a field of these commands + while (!cmd().inField()) { + commands.pop(); } - return false; + + // Push the new command onto the command stack + commands.emplace(std::move(commandName), std::move(commandArguments), + hasRange); } -bool OsmlStreamParser::closeField() +bool OsmlStreamParserImpl::checkIssueData() { - // Try to end an open field of the current command -- if the current command - // is not inside an open field, end this command and try to close the next - // one - for (int i = 0; i < 2 && commands.size() > 1; i++) { - Command &cmd = commands.top(); - if (!cmd.inRangeField) { - if (cmd.inField) { - cmd.inField = false; - if (cmd.inDefaultField) { - commands.pop(); - } - return true; - } - commands.pop(); - } else { - return false; - } + if (!data.empty()) { + location = data.getLocation(); + reader.resetPeek(); + return true; } return false; } -OsmlStreamParser::State OsmlStreamParser::parse() +OsmlStreamParserImpl::State OsmlStreamParserImpl::parse() { // Reset the data handler data.clear(); @@ -507,14 +770,6 @@ OsmlStreamParser::State OsmlStreamParser::parse() // Special handling for Backslash and Text if (type == OsmlTokens.Backslash || type == OsmlTokens.AnnotationStart) { - // Before appending anything to the output data or starting a new - // command, check whether FIELD_START has to be issued, as the - // current command is a command with range - if (checkIssueFieldStart()) { - location = token.location; - return State::FIELD_START; - } - // Check whether a command starts now, without advancing the peek // cursor char c; @@ -535,11 +790,11 @@ OsmlStreamParser::State OsmlStreamParser::parse() State res = parseCommand(token.location.getStart(), type == OsmlTokens.AnnotationStart); switch (res) { - case State::ERROR: + case State::IRRECOVERABLE_ERROR: throw LoggableException( "Last error was irrecoverable, ending parsing " "process"); - case State::NONE: + case State::RECOVERABLE_ERROR: continue; default: return res; @@ -558,15 +813,12 @@ OsmlStreamParser::State OsmlStreamParser::parse() token.location.getStart() + 1); } - data.append(c, token.location.getStart(), reader.getPeekOffset()); + // Append the character to the output data, mark it as protected + data.append(c, token.location.getStart(), reader.getPeekOffset(), + true); reader.consumePeek(); continue; } else if (type == Tokens::Data) { - // Check whether FIELD_START has to be issued before appending text - if (checkIssueFieldStart()) { - location = token.location; - return State::FIELD_START; - } reader.consumePeek(); continue; } @@ -580,7 +832,7 @@ OsmlStreamParser::State OsmlStreamParser::parse() // We will handle the token now, consume the peeked characters reader.consumePeek(); - // Update the location to the current token location + // Synchronize the location with the current token location location = token.location; if (token.id == OsmlTokens.LineComment) { @@ -588,39 +840,27 @@ OsmlStreamParser::State OsmlStreamParser::parse() } else if (token.id == OsmlTokens.BlockCommentStart) { parseBlockComment(); } else if (token.id == OsmlTokens.FieldStart) { - Command &cmd = commands.top(); - if (!cmd.inField) { - cmd.inField = true; - } + cmd().pushField(false, token.location); return State::FIELD_START; -/* logger.error( - "Got field start token \"{\", but no command for which to " - "start the field. Write \"\\{\" to insert this sequence as " - "text.", - token);*/ } else if (token.id == OsmlTokens.FieldEnd) { - closeField(); - return State::FIELD_END; -/* if (closeField()) { + // Remove all commands from the list that currently are not in any + // field + while (!cmd().inField()) { + commands.pop(); + } + + // If the remaining command is not in a range field, remove this + // command + if (cmd().inNonRangeField()) { + cmd().popField(); return State::FIELD_END; } logger.error( - "Got field end token \"}\", but there is no field to end. " - "Write \"\\}\" to insert this sequence as text.", - token);*/ + "Got field end token \"}\", but there is no field to end.", + token); } else if (token.id == OsmlTokens.DefaultFieldStart) { - // Try to start a default field the first time the token is reached - Command &topCmd = commands.top(); - if (!topCmd.inField) { - topCmd.inField = true; - topCmd.inDefaultField = true; - } + cmd().pushField(true, token.location); return State::FIELD_START; -/* logger.error( - "Got default field start token \"{!\", but no command for " - "which to start the field. Write \"\\{!\" to insert this " - "sequence as text", - token);*/ } else if (token.id == OsmlTokens.AnnotationEnd) { // We got a single annotation end token "\>" -- simply issue the // ANNOTATION_END event @@ -641,11 +881,25 @@ OsmlStreamParser::State OsmlStreamParser::parse() // Make sure all open commands and fields have been ended at the end of the // stream while (commands.size() > 1) { - Command &cmd = commands.top(); - if (cmd.inField || cmd.hasRange) { - logger.error("Reached end of stream, but command \"" + - cmd.name.asString() + "\" has not been ended", - cmd.name); + if (cmd().inField()) { + // If the stream ended with an open range field, issue information + // about the range field + if (cmd().inRangeField()) { + // Inform about the still open command itself + logger.error("Reached end of stream, but command \"" + + getCommandName().asString() + + "\" has not been ended", + getCommandName()); + } else { + // Issue information about still open fields + const std::vector<Field> &fields = cmd().getFields(); + if (!fields.empty()) { + logger.error( + std::string( + "Reached end of stream, but field is still open."), + fields.back().location); + } + } } commands.pop(); } @@ -654,26 +908,45 @@ OsmlStreamParser::State OsmlStreamParser::parse() return State::END; } -Variant OsmlStreamParser::getText(WhitespaceMode mode) +/* Class OsmlStreamParser */ + +OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger) + : impl(new OsmlStreamParserImpl(reader, logger)) +{ +} + +OsmlStreamParser::~OsmlStreamParser() +{ + // Stub needed because OsmlStreamParserImpl is incomplete in header +} + +OsmlStreamParser::State OsmlStreamParser::parse() +{ + return static_cast<State>(impl->parse()); +} + +const TokenizedData &OsmlStreamParser::getData() const { - TokenizedData dataFork = data; - Variant text = dataFork.text(mode); - location = text.getLocation(); - return text; + return impl->getData(); } const Variant &OsmlStreamParser::getCommandName() const { - return commands.top().name; + return impl->getCommandName(); } const Variant &OsmlStreamParser::getCommandArguments() const { - return commands.top().arguments; + return impl->getCommandArguments(); } -bool OsmlStreamParser::inDefaultField() const +const SourceLocation &OsmlStreamParser::getLocation() const { - return commands.top().inRangeField || commands.top().inDefaultField; + return impl->getLocation(); } + +bool OsmlStreamParser::inDefaultField() const { return impl->inDefaultField(); } + +bool OsmlStreamParser::inRangeCommand() const { return impl->inRangeCommand(); } + } diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp index 453a2bb..1fee90b 100644 --- a/src/formats/osml/OsmlStreamParser.hpp +++ b/src/formats/osml/OsmlStreamParser.hpp @@ -29,30 +29,29 @@ #ifndef _OUSIA_OSML_STREAM_PARSER_HPP_ #define _OUSIA_OSML_STREAM_PARSER_HPP_ +#include <cstdint> #include <memory> -#include <core/common/Variant.hpp> -#include <core/common/Whitespace.hpp> -#include <core/parser/utils/Tokenizer.hpp> -#include <core/parser/utils/TokenizedData.hpp> - namespace ousia { // Forward declarations class CharReader; class Logger; class OsmlStreamParserImpl; +class TokenizedData; +class Variant; /** * The OsmlStreamParser class provides a low-level reader for the TeX-esque osml * format. The parser is constructed around a "parse" function, which reads data * from the underlying CharReader until a new state is reached and indicates * this state in a return value. The calling code then has to pull corresponding - * data from the stream reader. The reader makes sure the incommind file is + * data from the stream reader. The reader makes sure the incomming stream is * syntactically valid and tries to recorver from most errors. If an error is * irrecoverable (this is the case for errors with wrong nesting of commands or * fields, as this would lead to too many consecutive errors) a - * LoggableException is thrown. + * LoggableException is thrown. The OsmlStreamParser can be compared to a SAX + * parser for XML. */ class OsmlStreamParser { public: @@ -60,39 +59,21 @@ public: * Enum used to indicate which state the OsmlStreamParser class is in * after calling the "parse" function. */ - enum class State { - /** - * State returned if a fully featured command has been read. A command - * consists of the command name and its arguments (which optionally - * includes the name). - */ - COMMAND, - - /** - * State returned if data is given. The reader must decide which field - * or command this should be routed to. Trailing or leading whitespace - * has been removed. Only called if the data is non-empty. - */ - DATA, - + enum class State : uint8_t { /** - * A user-defined entity has been found. The entity sequence is stored - * in the command name. + * State returned if the start of a command has been read. Use the + * getCommandName(), getCommandArguments() and inRangeCommand() + * functions the retrieve more information about the command that was + * just started. */ - ENTITY, + COMMAND_START = 0, /** - * State returned if an annotation was started. An annotation consists - * of the command name and its arguments (which optionally include the - * name). + * State returned if a range command has just ended. This state is not + * returned for non-range commands (as the actual end of a command is + * context dependant). */ - ANNOTATION_START, - - /** - * State returned if an annotation ends. The reader indicates which - * annotation ends. - */ - ANNOTATION_END, + COMMAND_END = 1, /** * State returned if a new field started. The reader assures that the @@ -100,200 +81,47 @@ public: * is not started if data has been given outside of a field. The * field number is set to the current field index. */ - FIELD_START, + FIELD_START = 2, /** * State returned if the current field ends. The reader assures that a * field was actually open. */ - FIELD_END, + FIELD_END = 3, /** - * The end of the stream has been reached. + * State returned if an annotation was started. An annotation consists + * of the command name and its arguments (which optionally include the + * name). */ - END, + ANNOTATION_START = 4, /** - * Returned from internal functions if nothing should be done. + * State returned if an annotation ends. The reader indicates which + * annotation ends. */ - NONE, + ANNOTATION_END = 5, /** - * Returned from internal function to indicate irrecoverable errors. + * State returned if data is given. The reader must decide which field + * or command this should be routed to. Trailing or leading whitespace + * has been removed. Only called if the data is non-empty. */ - ERROR - }; - - /** - * Entry used for the command stack. - */ - struct Command { - /** - * Name and location of the current command. - */ - Variant name; - - /** - * Arguments that were passed to the command. - */ - Variant arguments; - - /** - * Vector used as stack for holding the number of opening/closing braces - * and the corresponding "isDefaultField" flag. - */ - std::vector<bool> fields; - - /** - * Set to true if this is a command with clear begin and end. - */ - bool hasRange; - - /** - * Default constructor. - */ - Command() - : hasRange(false), - inField(false), - inDefaultField() - { - } + DATA = 6, /** - * Constructor of the Command class. - * - * @param name is a string variant with name and location of the - * command. - * @param arguments is a map variant with the arguments given to the - * command. - * @param hasRange should be set to true if this is a command with - * explicit range. - * @param inDefaultField is set to true if we currently are in a - * specially marked default field. - */ - Command(Variant name, Variant arguments, bool hasRange) - : name(std::move(name)), - arguments(std::move(arguments)), - hasRange(hasRange), - inField(inField), - inRangeField(inRangeField), - inDefaultField(inDefaultField) - { - } + * The end of the stream has been reached. + */ + END = 7 }; private: /** - * Reference to the CharReader instance from which the incomming bytes are - * read. - */ - CharReader &reader; - - /** - * Reference at the logger instance to which all error messages are sent. - */ - Logger &logger; - - /** - * Tokenizer instance used to read individual tokens from the text. - */ - Tokenizer tokenizer; - - /** - * Variant containing the tokenized data that was returned from the - * tokenizer as data. - */ - TokenizedData data; - - /** - * Stack containing the current commands. - */ - std::stack<Command> commands; - - /** - * Pointer at + * Pointer at the class containing the internal implementation (according + * to the PIMPL idiom). */ std::unique_ptr<OsmlStreamParserImpl> impl; - /** - * Function used internall to parse an identifier. - * - * @param start is the start byte offset of the identifier (including the - * backslash). - * @param allowNSSep should be set to true if the namespace separator is - * allowed in the identifier name. Issues error if the namespace separator - * is placed incorrectly. - */ - Variant parseIdentifier(size_t start, bool allowNSSep = false); - - /** - * Function used internally to handle the special "\begin" command. - */ - State parseBeginCommand(); - - /** - * Function used internally to handle the special "\end" command. - */ - State parseEndCommand(); - - /** - * Pushes the parsed command onto the command stack. - */ - void pushCommand(Variant commandName, Variant commandArguments, - bool hasRange); - - /** - * Parses the command arguments. - */ - Variant parseCommandArguments(Variant commandArgName); - - /** - * Function used internally to parse a command. - * - * @param start is the start byte offset of the command (including the - * backslash) - * @param isAnnotation if true, the command is not returned as command, but - * as annotation start. - * @return true if a command was actuall parsed, false otherwise. - */ - State parseCommand(size_t start, bool isAnnotation); - - /** - * Function used internally to parse a block comment. - */ - void parseBlockComment(); - - /** - * Function used internally to parse a generic comment. - */ - void parseLineComment(); - - /** - * Checks whether there is any data pending to be issued, if yes, issues it. - * - * @return true if there was any data and DATA should be returned by the - * parse function, false otherwise. - */ - bool checkIssueData(); - - /** - * Called before any data is appended to the internal data handler. Checks - * whether a new field should be started or implicitly ended. - * - * @return true if FIELD_START should be returned by the parse function. - */ - bool checkIssueFieldStart(); - - /** - * Closes a currently open field. Note that the command will be removed from - * the internal command stack if the field that is being closed is a - * field marked as default field. - * - * @return true if the field could be closed, false if there was no field - * to close. - */ - bool closeField(); - public: /** * Constructor of the OsmlStreamParser class. Attaches the new @@ -322,29 +150,9 @@ public: State parse(); /** - * Returns a reference at the internally stored data. Only valid if - * State::DATA was returned by the "parse" function. - * - * @return a reference at a variant containing the data parsed by the - * "parse" function. - */ - const TokenizedData &getData() const { return data; } - - /** - * Returns the complete content of the internal TokenizedData instance as - * a single string Variant. This method is mainly used in the unit tests for - * this class, it simply calls the text() method of TokenizedData. - * - * @param mode is the WhitespaceMode that should be used for returning the - * text. - * @return a string variant containing the text content of the internal - * TokenizedData instance or a nullptr variant if there is no text. - */ - Variant getText(WhitespaceMode mode = WhitespaceMode::COLLAPSE); - - /** * Returns a reference at the internally stored command name. Only valid if - * State::COMMAND was returned by the "parse" function. + * State::COMMAND_START, State::ANNOTATION_START or State::ANNOTATION_END + * was returned by the "parse" function. * * @return a reference at a variant containing name and location of the * parsed command. @@ -353,7 +161,8 @@ public: /** * Returns a reference at the internally stored command name. Only valid if - * State::COMMAND was returned by the "parse" function. + * State::COMMAND_START, State::ANNOTATION_START or State::ANNOTATION_END + * was returned by the "parse" function. * * @return a reference at a variant containing arguments given to the * command. @@ -361,10 +170,37 @@ public: const Variant &getCommandArguments() const; /** + * Returns a reference at the internally stored data. Only valid if + * State::DATA was returned by the "parse" function. + * + * @return a reference at a variant containing the data parsed by the + * "parse" function. + */ + const TokenizedData &getData() const; + + /** + * Returns the location of the current token. + */ + const SourceLocation &getLocation() const; + + /** + * Returns true if the currently started command is a range command, only + * valid if State::COMMAND_START was returned by the "parse" function. + * + * @return true if the command is started is a range command, false + * otherwise. + */ + bool inRangeCommand() const; + + /** * Returns true if the current field is the "default" field. This is true if * the parser either is in the outer range of a range command or inside a - * field that has been especially marked as "default" field (using the "|" - * syntax). + * field that has been especially marked as "default" field (using the "{!" + * syntax). Only valid if State::FIELD_START was returned by the "parse" + * function. + * + * @return true if the current field was marked as default field (using the + * "{!" syntax). */ bool inDefaultField() const; }; diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index 3d01007..8b64e51 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -21,7 +21,9 @@ #include <iostream> #include <core/common/CharReader.hpp> +#include <core/common/Variant.hpp> #include <core/frontend/TerminalLogger.hpp> +#include <core/parser/utils/TokenizedData.hpp> #include <formats/osml/OsmlStreamParser.hpp> @@ -30,147 +32,196 @@ namespace ousia { static TerminalLogger logger(std::cerr, true); // static ConcreteLogger logger; -static OsmlStreamParser::State skipEmptyData(OsmlStreamParser &reader) +static void assertCommandStart(OsmlStreamParser &parser, + const std::string &name, + bool rangeCommand, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) { - OsmlStreamParser::State res = reader.parse(); - if (res == OsmlStreamParser::State::DATA) { - EXPECT_FALSE(reader.getData().hasNonWhitespaceText()); - res = reader.parse(); - } - return res; -} - -static void assertCommand(OsmlStreamParser &reader, const std::string &name, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::COMMAND, skipEmptyData(reader)); - EXPECT_EQ(name, reader.getCommandName().asString()); + ASSERT_EQ(OsmlStreamParser::State::COMMAND_START, parser.parse()); + EXPECT_EQ(name, parser.getCommandName().asString()); + EXPECT_EQ(rangeCommand, parser.inRangeCommand()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } -static void assertCommand(OsmlStreamParser &reader, const std::string &name, - const Variant::mapType &args, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) +static void assertCommandStart(OsmlStreamParser &parser, + const std::string &name, + bool rangeCommand, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) { - assertCommand(reader, name, start, end); - EXPECT_EQ(args, reader.getCommandArguments()); + assertCommandStart(parser, name, rangeCommand, start, end); + EXPECT_EQ(args, parser.getCommandArguments()); } -static void assertData(OsmlStreamParser &reader, const std::string &data, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset, - WhitespaceMode mode = WhitespaceMode::COLLAPSE) +static void assertCommand(OsmlStreamParser &parser, + const std::string &name, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - Variant text = reader.getText(mode); - ASSERT_TRUE(text.isString()); - EXPECT_EQ(data, text.asString()); + assertCommandStart(parser, name, false, Variant::mapType{}, start, end); +} + +static void assertCommandEnd(OsmlStreamParser &parser, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::COMMAND_END, parser.parse()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, text.getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, text.getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); + } +} + +static void assertTextData(OsmlStreamParser &parser, const std::string &text, + SourceOffset dataStart = InvalidSourceOffset, + SourceOffset dataEnd = InvalidSourceOffset, + SourceOffset textStart = InvalidSourceOffset, + SourceOffset textEnd = InvalidSourceOffset, + WhitespaceMode mode = WhitespaceMode::COLLAPSE) +{ + ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse()); + + const TokenizedData &data = parser.getData(); + TokenizedDataReader dataReader = data.reader(); + + Token token; + ASSERT_TRUE(dataReader.read(token, TokenSet{}, mode)); + EXPECT_EQ(Tokens::Data, token.id); + EXPECT_EQ(text, token.content); + if (dataStart != InvalidSourceOffset) { + EXPECT_EQ(dataStart, data.getLocation().getStart()); + EXPECT_EQ(dataStart, parser.getLocation().getStart()); + } + if (dataEnd != InvalidSourceOffset) { + EXPECT_EQ(dataEnd, data.getLocation().getEnd()); + EXPECT_EQ(dataEnd, parser.getLocation().getEnd()); + } + if (textStart != InvalidSourceOffset) { + EXPECT_EQ(textStart, token.getLocation().getStart()); } + if (textEnd != InvalidSourceOffset) { + EXPECT_EQ(textEnd, token.getLocation().getEnd()); + } +} + +static void assertData(OsmlStreamParser &parser, const std::string &text, + SourceOffset textStart = InvalidSourceOffset, + SourceOffset textEnd = InvalidSourceOffset, + WhitespaceMode mode = WhitespaceMode::COLLAPSE) +{ + assertTextData(parser, text, InvalidSourceOffset, InvalidSourceOffset, textStart, textEnd, mode); +} + +static void assertEmptyData(OsmlStreamParser &parser) +{ + ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse()); + + const TokenizedData &data = parser.getData(); + TokenizedDataReader dataReader = data.reader(); + + Token token; + EXPECT_FALSE(dataReader.read(token, TokenSet{}, WhitespaceMode::TRIM)); } -static void assertFieldStart(OsmlStreamParser &reader, bool defaultField, + +static void assertFieldStart(OsmlStreamParser &parser, bool defaultField, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::FIELD_START, skipEmptyData(reader)); - EXPECT_EQ(defaultField, reader.inDefaultField()); + ASSERT_EQ(OsmlStreamParser::State::FIELD_START, parser.parse()); + EXPECT_EQ(defaultField, parser.inDefaultField()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } -static void assertFieldEnd(OsmlStreamParser &reader, +static void assertFieldEnd(OsmlStreamParser &parser, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::FIELD_END, skipEmptyData(reader)); + ASSERT_EQ(OsmlStreamParser::State::FIELD_END, parser.parse()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } -static void assertAnnotationStart(OsmlStreamParser &reader, +static void assertAnnotationStart(OsmlStreamParser &parser, const std::string &name, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, skipEmptyData(reader)); - EXPECT_EQ(name, reader.getCommandName().asString()); + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, parser.parse()); + EXPECT_EQ(name, parser.getCommandName().asString()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } -static void assertAnnotationStart(OsmlStreamParser &reader, +static void assertAnnotationStart(OsmlStreamParser &parser, const std::string &name, const Variant::mapType &args, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - assertAnnotationStart(reader, name, start, end); - EXPECT_EQ(args, reader.getCommandArguments()); + assertAnnotationStart(parser, name, start, end); + EXPECT_EQ(args, parser.getCommandArguments()); } -static void assertAnnotationEnd(OsmlStreamParser &reader, +static void assertAnnotationEnd(OsmlStreamParser &parser, const std::string &name, const std::string &elementName, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, skipEmptyData(reader)); - ASSERT_EQ(name, reader.getCommandName().asString()); + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, parser.parse()); + ASSERT_EQ(name, parser.getCommandName().asString()); if (!elementName.empty()) { - ASSERT_EQ(1U, reader.getCommandArguments().asMap().size()); - ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name")); + ASSERT_EQ(1U, parser.getCommandArguments().asMap().size()); + ASSERT_EQ(1U, parser.getCommandArguments().asMap().count("name")); - auto it = reader.getCommandArguments().asMap().find("name"); + auto it = parser.getCommandArguments().asMap().find("name"); ASSERT_EQ(elementName, it->second.asString()); } if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } -static void assertEnd(OsmlStreamParser &reader, +static void assertEnd(OsmlStreamParser &parser, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::END, skipEmptyData(reader)); + ASSERT_EQ(OsmlStreamParser::State::END, parser.parse()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } @@ -179,9 +230,9 @@ TEST(OsmlStreamParser, empty) const char *testString = ""; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertEnd(parser, 0, 0); } TEST(OsmlStreamParser, oneCharacter) @@ -189,45 +240,102 @@ TEST(OsmlStreamParser, oneCharacter) const char *testString = "a"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); + + assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::COLLAPSE); + assertEnd(parser, 1, 1); +} + +TEST(OsmlStreamParser, whitespacePreserve) +{ + const char *testString = " hello \t world "; + // 0123456 78901234 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); + + assertTextData(parser, " hello \t world ", 0, 15, 0, 15, + WhitespaceMode::PRESERVE); + assertEnd(parser, 15, 15); +} + +TEST(OsmlStreamParser, whitespaceTrim) +{ + const char *testString = " hello \t world "; + // 0123456 78901234 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); - assertData(reader, "a", 0, 1); + assertTextData(parser, "hello \t world", 0, 15, 1, 14, + WhitespaceMode::TRIM); + assertEnd(parser, 15, 15); } -TEST(OsmlStreamParser, whitespaceElimination) +TEST(OsmlStreamParser, whitespaceCollapse) { const char *testString = " hello \t world "; // 0123456 78901234 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertData(reader, "hello world", 1, 14); + assertTextData(parser, "hello world", 0, 15, 1, 14, + WhitespaceMode::COLLAPSE); + assertEnd(parser, 15, 15); } -TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) +TEST(OsmlStreamParser, whitespaceCollapseLinebreak) { const char *testString = " hello \n world "; // 0123456 78901234 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); + + assertTextData(parser, "hello world", 0, 15, 1, 14, + WhitespaceMode::COLLAPSE); + assertEnd(parser, 15, 15); +} + +TEST(OsmlStreamParser, whitespaceCollapseProtected) +{ + const char *testString = " hello\\ \\ world "; + // 012345 67 89012345 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); + + assertTextData(parser, "hello world", 0, 16, 1, 15, + WhitespaceMode::COLLAPSE); + assertEnd(parser, 16, 16); +} + +TEST(OsmlStreamParser, whitespaceCollapseProtected2) +{ + const char *testString = " hello \\ \\ world "; + // 012345 67 89012345 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); - assertData(reader, "hello world", 1, 14); + assertTextData(parser, "hello world", 0, 17, 1, 16, + WhitespaceMode::COLLAPSE); + assertEnd(parser, 17, 17); } static void testEscapeSpecialCharacter(const std::string &c) { CharReader charReader(std::string("\\") + c); - OsmlStreamParser reader(charReader, logger); - EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - EXPECT_EQ(c, reader.getText().asString()); - - SourceLocation loc = reader.getText().getLocation(); - EXPECT_EQ(0U, loc.getStart()); - EXPECT_EQ(1U + c.size(), loc.getEnd()); + OsmlStreamParser parser(charReader, logger); + assertTextData(parser, c, 0, 2, 0, 2, WhitespaceMode::PRESERVE); + assertEnd(parser, 2, 2); } TEST(OsmlStreamParser, escapeSpecialCharacters) @@ -240,9 +348,11 @@ TEST(OsmlStreamParser, escapeSpecialCharacters) TEST(OsmlStreamParser, simpleSingleLineComment) { const char *testString = "% This is a single line comment"; + // 0123456789012345678901234567890 + // 0 1 2 3 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + OsmlStreamParser parser(charReader, logger); + assertEnd(parser, 31, 31); } TEST(OsmlStreamParser, singleLineComment) @@ -251,24 +361,11 @@ TEST(OsmlStreamParser, singleLineComment) // 01234567890123456789012345678901 23 // 0 1 2 3 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getText().asString()); - SourceLocation loc = reader.getText().getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); - } - - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("b", reader.getText().asString()); - SourceLocation loc = reader.getText().getLocation(); - ASSERT_EQ(33U, loc.getStart()); - ASSERT_EQ(34U, loc.getEnd()); - } + OsmlStreamParser parser(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE); + assertTextData(parser, "b", 33, 34, 33, 34, WhitespaceMode::PRESERVE); + assertEnd(parser, 34, 34); } TEST(OsmlStreamParser, multilineComment) @@ -277,24 +374,27 @@ TEST(OsmlStreamParser, multilineComment) // 0123456789012 3 456789012345678901234567890 // 0 1 2 3 4 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getText().asString()); - SourceLocation loc = reader.getText().getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); - } + OsmlStreamParser parser(charReader, logger); - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("b", reader.getText().asString()); - SourceLocation loc = reader.getText().getLocation(); - ASSERT_EQ(40U, loc.getStart()); - ASSERT_EQ(41U, loc.getEnd()); - } + assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE); + assertTextData(parser, "b", 40, 41, 40, 41, WhitespaceMode::PRESERVE); + assertEnd(parser, 41, 41); +} - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +TEST(OsmlStreamParser, unfinishedMultilineComment) +{ + const char *testString = "a%{ This is a\n\n multiline line comment"; + // 0123456789012 3 456789012345678901234567 + // 0 1 2 3 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); + + logger.reset(); + + assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE); + ASSERT_FALSE(logger.hasError()); + assertEnd(parser, 38, 38); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, nestedMultilineComment) @@ -303,24 +403,11 @@ TEST(OsmlStreamParser, nestedMultilineComment) // 0123456789012 3 456789012345678901234567890 // 0 1 2 3 4 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getText().asString()); - SourceLocation loc = reader.getText().getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); - } - - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("b", reader.getText().asString()); - SourceLocation loc = reader.getText().getLocation(); - ASSERT_EQ(40U, loc.getStart()); - ASSERT_EQ(41U, loc.getEnd()); - } + OsmlStreamParser parser(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE); + assertTextData(parser, "b", 40, 41, 40, 41, WhitespaceMode::PRESERVE); + assertEnd(parser, 41, 41); } TEST(OsmlStreamParser, simpleCommand) @@ -328,45 +415,27 @@ TEST(OsmlStreamParser, simpleCommand) const char *testString = "\\test"; // 0 12345 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser parser(charReader, logger); - Variant commandName = reader.getCommandName(); - ASSERT_EQ("test", commandName.asString()); - - SourceLocation loc = commandName.getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); - - ASSERT_EQ(0U, reader.getCommandArguments().asMap().size()); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertCommand(parser, "test", 0, 5); + assertEnd(parser); } TEST(OsmlStreamParser, simpleCommandWithName) { - const char *testString = "\\test#bla"; - // 0 12345678 + const char *testString = "\\test#foo"; + // 012345678 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - - Variant commandName = reader.getCommandName(); - ASSERT_EQ("test", commandName.asString()); - SourceLocation loc = commandName.getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); + OsmlStreamParser parser(charReader, logger); - Variant commandArguments = reader.getCommandArguments(); - ASSERT_TRUE(commandArguments.isMap()); - ASSERT_EQ(1U, commandArguments.asMap().size()); - ASSERT_EQ(1U, commandArguments.asMap().count("name")); - ASSERT_EQ("bla", commandArguments.asMap()["name"].asString()); + assertCommandStart(parser, "test", false, Variant::mapType{{"name", "foo"}}, + 0, 5); - loc = commandArguments.asMap()["name"].getLocation(); - ASSERT_EQ(5U, loc.getStart()); - ASSERT_EQ(9U, loc.getEnd()); + Variant::mapType args = parser.getCommandArguments().asMap(); + ASSERT_EQ(5U, args["name"].getLocation().getStart()); + ASSERT_EQ(9U, args["name"].getLocation().getEnd()); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertEnd(parser); } TEST(OsmlStreamParser, simpleCommandWithArguments) @@ -375,38 +444,21 @@ TEST(OsmlStreamParser, simpleCommandWithArguments) // 0 123456789012345 678901 2 // 0 1 2 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser parser(charReader, logger); - Variant commandName = reader.getCommandName(); - ASSERT_EQ("test", commandName.asString()); - SourceLocation loc = commandName.getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); + assertCommandStart(parser, "test", false, + Variant::mapType{{"a", 1}, {"b", 2}, {"c", "test"}}, 0, + 5); - Variant commandArguments = reader.getCommandArguments(); - ASSERT_TRUE(commandArguments.isMap()); - ASSERT_EQ(3U, commandArguments.asMap().size()); - ASSERT_EQ(1U, commandArguments.asMap().count("a")); - ASSERT_EQ(1U, commandArguments.asMap().count("b")); - ASSERT_EQ(1U, commandArguments.asMap().count("c")); - ASSERT_EQ(1, commandArguments.asMap()["a"].asInt()); - ASSERT_EQ(2, commandArguments.asMap()["b"].asInt()); - ASSERT_EQ("test", commandArguments.asMap()["c"].asString()); + Variant::mapType args = parser.getCommandArguments().asMap(); + ASSERT_EQ(8U, args["a"].getLocation().getStart()); + ASSERT_EQ(9U, args["a"].getLocation().getEnd()); + ASSERT_EQ(12U, args["b"].getLocation().getStart()); + ASSERT_EQ(13U, args["b"].getLocation().getEnd()); + ASSERT_EQ(16U, args["c"].getLocation().getStart()); + ASSERT_EQ(22U, args["c"].getLocation().getEnd()); - loc = commandArguments.asMap()["a"].getLocation(); - ASSERT_EQ(8U, loc.getStart()); - ASSERT_EQ(9U, loc.getEnd()); - - loc = commandArguments.asMap()["b"].getLocation(); - ASSERT_EQ(12U, loc.getStart()); - ASSERT_EQ(13U, loc.getEnd()); - - loc = commandArguments.asMap()["c"].getLocation(); - ASSERT_EQ(16U, loc.getStart()); - ASSERT_EQ(22U, loc.getEnd()); - - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertEnd(parser); } TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) @@ -415,44 +467,24 @@ TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) // 0 1234567890123456789 01234 56 // 0 1 2 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - - Variant commandName = reader.getCommandName(); - ASSERT_EQ("test", commandName.asString()); - SourceLocation loc = commandName.getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); - - Variant commandArguments = reader.getCommandArguments(); - ASSERT_TRUE(commandArguments.isMap()); - ASSERT_EQ(4U, commandArguments.asMap().size()); - ASSERT_EQ(1U, commandArguments.asMap().count("a")); - ASSERT_EQ(1U, commandArguments.asMap().count("b")); - ASSERT_EQ(1U, commandArguments.asMap().count("c")); - ASSERT_EQ(1U, commandArguments.asMap().count("name")); - ASSERT_EQ(1, commandArguments.asMap()["a"].asInt()); - ASSERT_EQ(2, commandArguments.asMap()["b"].asInt()); - ASSERT_EQ("test", commandArguments.asMap()["c"].asString()); - ASSERT_EQ("bla", commandArguments.asMap()["name"].asString()); - - loc = commandArguments.asMap()["a"].getLocation(); - ASSERT_EQ(12U, loc.getStart()); - ASSERT_EQ(13U, loc.getEnd()); - - loc = commandArguments.asMap()["b"].getLocation(); - ASSERT_EQ(16U, loc.getStart()); - ASSERT_EQ(17U, loc.getEnd()); - - loc = commandArguments.asMap()["c"].getLocation(); - ASSERT_EQ(20U, loc.getStart()); - ASSERT_EQ(26U, loc.getEnd()); - - loc = commandArguments.asMap()["name"].getLocation(); - ASSERT_EQ(5U, loc.getStart()); - ASSERT_EQ(9U, loc.getEnd()); - - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + OsmlStreamParser parser(charReader, logger); + + assertCommandStart( + parser, "test", false, + Variant::mapType{{"name", "bla"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 0, + 5); + + Variant::mapType args = parser.getCommandArguments().asMap(); + ASSERT_EQ(5U, args["name"].getLocation().getStart()); + ASSERT_EQ(9U, args["name"].getLocation().getEnd()); + ASSERT_EQ(12U, args["a"].getLocation().getStart()); + ASSERT_EQ(13U, args["a"].getLocation().getEnd()); + ASSERT_EQ(16U, args["b"].getLocation().getStart()); + ASSERT_EQ(17U, args["b"].getLocation().getEnd()); + ASSERT_EQ(20U, args["c"].getLocation().getStart()); + ASSERT_EQ(26U, args["c"].getLocation().getEnd()); + + assertEnd(parser); } TEST(OsmlStreamParser, fields) @@ -461,21 +493,21 @@ TEST(OsmlStreamParser, fields) // 01234567890123 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - assertData(reader, "a", 6, 7); - assertFieldEnd(reader, 7, 8); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertTextData(parser, "a", 6, 7, 6, 7, WhitespaceMode::PRESERVE); + assertFieldEnd(parser, 7, 8); - assertFieldStart(reader, false, 8, 9); - assertData(reader, "b", 9, 10); - assertFieldEnd(reader, 10, 11); + assertFieldStart(parser, false, 8, 9); + assertTextData(parser, "b", 9, 10, 9, 10, WhitespaceMode::PRESERVE); + assertFieldEnd(parser, 10, 11); - assertFieldStart(reader, false, 11, 12); - assertData(reader, "c", 12, 13); - assertFieldEnd(reader, 13, 14); - assertEnd(reader, 14, 14); + assertFieldStart(parser, false, 11, 12); + assertTextData(parser, "c", 12, 13, 12, 13, WhitespaceMode::PRESERVE); + assertFieldEnd(parser, 13, 14); + assertEnd(parser, 14, 14); } TEST(OsmlStreamParser, dataOutsideField) @@ -484,785 +516,781 @@ TEST(OsmlStreamParser, dataOutsideField) // 0123456789012 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - assertData(reader, "a", 6, 7); - assertFieldEnd(reader, 7, 8); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertTextData(parser, "a", 6, 7, 6, 7, WhitespaceMode::COLLAPSE); + assertFieldEnd(parser, 7, 8); - assertFieldStart(reader, false, 8, 9); - assertData(reader, "b", 9, 10); - assertFieldEnd(reader, 10, 11); + assertFieldStart(parser, false, 8, 9); + assertTextData(parser, "b", 9, 10, 9, 10, WhitespaceMode::COLLAPSE); + assertFieldEnd(parser, 10, 11); - assertData(reader, "c", 12, 13); - assertEnd(reader, 13, 13); + assertTextData(parser, "c", 11, 13, 12, 13, WhitespaceMode::COLLAPSE); + assertEnd(parser, 13, 13); } TEST(OsmlStreamParser, nestedCommand) { - const char *testString = "\\test{a}{\\test2{b} c} d"; - // 012345678 90123456789012 - // 0 1 2 - CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + const char *testString = "\\test{a}{\\test2{b} c} d"; + // 012345678 90123456789012 + // 0 1 2 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test", 0, 5); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertData(parser, "a", 6, 7); + assertFieldEnd(parser, 7, 8); - assertFieldStart(reader, false, 5, 6); - assertData(reader, "a", 6, 7); - assertFieldEnd(reader, 7, 8); - - assertFieldStart(reader, false, 8, 9); - { - assertCommand(reader, "test2", 9, 15); - assertFieldStart(reader, false, 15, 16); - assertData(reader, "b", 16, 17); - assertFieldEnd(reader, 17, 18); - } - assertData(reader, "c", 19, 20); - assertFieldEnd(reader, 20, 21); - assertData(reader, "d", 22, 23); - assertEnd(reader, 23, 23); + assertFieldStart(parser, false, 8, 9); + assertCommand(parser, "test2", 9, 15); + assertFieldStart(parser, false, 15, 16); + assertData(parser, "b", 16, 17); + assertFieldEnd(parser, 17, 18); + assertData(parser, "c", 19, 20); + assertFieldEnd(parser, 20, 21); + assertData(parser, "d", 22, 23); + assertEnd(parser, 23, 23); } + TEST(OsmlStreamParser, nestedCommandImmediateEnd) { - const char *testString = "\\test{\\test2{b}} d"; - // 012345 678901234567 - // 0 1 - CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - { - assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, false, 12, 13); - assertData(reader, "b", 13, 14); - assertFieldEnd(reader, 14, 15); - } - assertFieldEnd(reader, 15, 16); - assertData(reader, "d", 17, 18); - assertEnd(reader, 18, 18); + const char *testString = "\\test{\\test2{b}} d"; + // 012345 678901234567 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); + + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + { + assertCommand(parser, "test2", 6, 12); + assertFieldStart(parser, false, 12, 13); + assertData(parser, "b", 13, 14); + assertFieldEnd(parser, 14, 15); + } + assertFieldEnd(parser, 15, 16); + assertData(parser, "d", 17, 18); + assertEnd(parser, 18, 18); } TEST(OsmlStreamParser, nestedCommandNoData) { - const char *testString = "\\test{\\test2}"; - // 012345 6789012 - CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + const char *testString = "\\test{\\test2}"; + // 012345 6789012 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - assertCommand(reader, "test2", 6, 12); - assertFieldEnd(reader, 12, 13); - assertEnd(reader, 13, 13); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertCommand(parser, "test2", 6, 12); + assertFieldEnd(parser, 12, 13); + assertEnd(parser, 13, 13); } TEST(OsmlStreamParser, multipleCommands) { - const char *testString = "\\a \\b \\c \\d"; - // 012 345 678 90 - // 0 1 - CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + const char *testString = "\\a \\b \\c \\d"; + // 012 345 678 90 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "a", 0, 2); - assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE); - assertCommand(reader, "b", 3, 5); - assertData(reader, " ", 5, 6, WhitespaceMode::PRESERVE); - assertCommand(reader, "c", 6, 8); - assertData(reader, " ", 8, 9, WhitespaceMode::PRESERVE); - assertCommand(reader, "d", 9, 11); - assertEnd(reader, 11, 11); + assertCommand(parser, "a", 0, 2); + assertEmptyData(parser); + assertCommand(parser, "b", 3, 5); + assertEmptyData(parser); + assertCommand(parser, "c", 6, 8); + assertEmptyData(parser); + assertCommand(parser, "d", 9, 11); + assertEnd(parser, 11, 11); } TEST(OsmlStreamParser, fieldsWithSpaces) { - const char *testString = "\\a {\\b \\c} \n\n {\\d}"; - // 0123 456 789012 3 456 789 - // 0 1 - CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - - assertCommand(reader, "a", 0, 2); - assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE); - assertFieldStart(reader, false, 3, 4); - assertCommand(reader, "b", 4, 6); - assertData(reader, " ", 6, 7, WhitespaceMode::PRESERVE); - assertCommand(reader, "c", 7, 9); - assertFieldEnd(reader, 9, 10); - assertData(reader, " \n\n {", 10, 12, WhitespaceMode::PRESERVE); - assertFieldStart(reader, false, 16, 17); - assertCommand(reader, "d", 17, 19); - assertFieldEnd(reader, 19, 20); - assertEnd(reader, 20, 20); -} - -TEST(OsmlStreamParser, errorNoFieldToStart) -{ - const char *testString = "\\a b {"; - // 012345 - // 0 - CharReader charReader(testString); + const char *testString = "\\a {\\b \\c} \n\n {\\d}"; + // 0123 456 789012 3 456 789 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); - OsmlStreamParser reader(charReader, logger); + assertCommand(parser, "a", 0, 2); + assertEmptyData(parser); + assertFieldStart(parser, false, 3, 4); + assertCommand(parser, "b", 4, 6); + assertEmptyData(parser); + assertCommand(parser, "c", 7, 9); + assertFieldEnd(parser, 9, 10); + assertEmptyData(parser); + assertFieldStart(parser, false, 16, 17); + assertCommand(parser, "d", 17, 19); + assertFieldEnd(parser, 19, 20); + assertEnd(parser, 20, 20); +} - logger.reset(); - assertCommand(reader, "a", 0, 2); - assertData(reader, "b", 3, 4); - ASSERT_FALSE(logger.hasError()); - assertEnd(reader, 6, 6); - ASSERT_TRUE(logger.hasError()); +TEST(OsmlStreamParser, errorEndButOpenField) +{ + const char *testString = "\\a b {"; + // 012345 + // 0 + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); + + logger.reset(); + assertCommand(parser, "a", 0, 2); + assertData(parser, "b", 3, 4); + assertFieldStart(parser, false, 5, 6); + ASSERT_FALSE(logger.hasError()); + assertEnd(parser, 6, 6); + ASSERT_TRUE(logger.hasError()); } + TEST(OsmlStreamParser, errorNoFieldToEnd) { - const char *testString = "\\a b }"; - // 012345 - // 0 - CharReader charReader(testString); + const char *testString = "\\a b }"; + // 012345 + // 0 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - logger.reset(); - assertCommand(reader, "a", 0, 2); - assertData(reader, "b", 3, 4); - ASSERT_FALSE(logger.hasError()); - assertEnd(reader, 6, 6); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + assertCommand(parser, "a", 0, 2); + assertData(parser, "b", 3, 4); + ASSERT_FALSE(logger.hasError()); + assertEnd(parser, 6, 6); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorNoFieldEndNested) { - const char *testString = "\\test{\\test2{}}}"; - // 012345 6789012345 - // 0 1 - CharReader charReader(testString); + const char *testString = "\\test{\\test2{}}}"; + // 012345 6789012345 + // 0 1 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - logger.reset(); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, false, 12, 13); - assertFieldEnd(reader, 13, 14); - assertFieldEnd(reader, 14, 15); - ASSERT_FALSE(logger.hasError()); - assertEnd(reader, 16, 16); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertCommand(parser, "test2", 6, 12); + assertFieldStart(parser, false, 12, 13); + assertFieldEnd(parser, 13, 14); + assertFieldEnd(parser, 14, 15); + ASSERT_FALSE(logger.hasError()); + assertEnd(parser, 16, 16); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorNoFieldEndNestedData) { - const char *testString = "\\test{\\test2{}}a}"; - // 012345 67890123456 - // 0 1 - CharReader charReader(testString); + const char *testString = "\\test{\\test2{}}a}"; + // 012345 67890123456 + // 0 1 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - logger.reset(); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, false, 12, 13); - assertFieldEnd(reader, 13, 14); - assertFieldEnd(reader, 14, 15); - assertData(reader, "a", 15, 16); - ASSERT_FALSE(logger.hasError()); - assertEnd(reader, 17, 17); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertCommand(parser, "test2", 6, 12); + assertFieldStart(parser, false, 12, 13); + assertFieldEnd(parser, 13, 14); + assertFieldEnd(parser, 14, 15); + assertData(parser, "a", 15, 16); + ASSERT_FALSE(logger.hasError()); + assertEnd(parser, 17, 17); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, beginEnd) { - const char *testString = "\\begin{book}\\end{book}"; - // 012345678901 2345678901 - // 0 1 2 - CharReader charReader(testString); + const char *testString = "\\begin{book}\\end{book}"; + // 012345678901 2345678901 + // 0 1 2 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, true, 12, 13); - assertFieldEnd(reader, 17, 21); - assertEnd(reader, 22, 22); + assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); + assertCommandEnd(parser, 17, 21); + assertEnd(parser, 22, 22); } TEST(OsmlStreamParser, beginEndWithName) { - const char *testString = "\\begin{book#a}\\end{book}"; - // 01234567890123 4567890123 - // 0 1 2 - CharReader charReader(testString); + const char *testString = "\\begin{book#a}\\end{book}"; + // 01234567890123 4567890123 + // 0 1 2 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "book", {{"name", "a"}}, 7, 11); - assertFieldStart(reader, true, 14, 15); - assertFieldEnd(reader, 19, 23); - assertEnd(reader, 24, 24); + assertCommandStart(parser, "book", true, {{"name", "a"}}, 7, 11); + assertCommandEnd(parser, 19, 23); + assertEnd(parser, 24, 24); } TEST(OsmlStreamParser, beginEndWithNameAndArgs) { - const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}"; - // 0123456789012345678901234 56789 01 2345678901 - // 0 1 2 3 4 - CharReader charReader(testString); + const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}"; + // 0123456789012345678901234 56789 01 2345678901 + // 0 1 2 3 4 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "book", - {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, true, 32, 33); - assertFieldEnd(reader, 37, 41); - assertEnd(reader, 42, 42); + assertCommandStart(parser, "book", true, + {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); + assertCommandEnd(parser, 37, 41); + assertEnd(parser, 42, 42); } TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields) { - const char *testString = - "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}"; - // 0123456789012345678901234 56789 01234 567890123 45678901 2345678901 - // 0 1 2 3 4 5 6 - CharReader charReader(testString); - - OsmlStreamParser reader(charReader, logger); - - assertCommand(reader, "book", - {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, false, 32, 33); - assertData(reader, "a", 33, 34); - assertCommand(reader, "test", Variant::mapType{}, 35, 40); - assertFieldEnd(reader, 40, 41); - assertFieldStart(reader, false, 41, 42); - assertData(reader, "b", 42, 43); - assertCommand(reader, "test", Variant::mapType{}, 44, 49); - assertFieldStart(reader, false, 49, 50); - assertFieldEnd(reader, 50, 51); - assertFieldEnd(reader, 51, 52); - assertFieldStart(reader, true, 52, 53); - assertFieldEnd(reader, 57, 61); - assertEnd(reader, 62, 62); + const char *testString = + "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}"; + // 0123456789012345678901234 56789 01234 567890123 45678901 2345678901 + // 0 1 2 3 4 5 6 + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); + + assertCommandStart(parser, "book", true, + {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); + assertFieldStart(parser, false, 32, 33); + assertData(parser, "a", 33, 34); + assertCommand(parser, "test", 35, 40); + assertFieldEnd(parser, 40, 41); + assertFieldStart(parser, false, 41, 42); + assertData(parser, "b", 42, 43); + assertCommand(parser, "test", 44, 49); + assertFieldStart(parser, false, 49, 50); + assertFieldEnd(parser, 50, 51); + assertFieldEnd(parser, 51, 52); + assertCommandEnd(parser, 57, 61); + assertEnd(parser, 62, 62); } TEST(OsmlStreamParser, beginEndWithData) { - const char *testString = "\\begin{book}a\\end{book}"; - // 0123456789012 3456789012 - // 0 1 2 - CharReader charReader(testString); + const char *testString = "\\begin{book}a\\end{book}"; + // 0123456789012 3456789012 + // 0 1 2 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, true, 12, 13); - assertData(reader, "a", 12, 13); - assertFieldEnd(reader, 18, 22); - assertEnd(reader, 23, 23); + assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); + assertData(parser, "a", 12, 13); + assertCommandEnd(parser, 18, 22); + assertEnd(parser, 23, 23); } - +/* TEST(OsmlStreamParser, beginEndNested) { - const char *testString = - "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}"; - // 012345678901234 5678901234567890 123456 7890123 4567890 - // 0 1 2 3 4 5 - CharReader charReader(testString); - - OsmlStreamParser reader(charReader, logger); - - assertCommand(reader, "a", 7, 8); - assertFieldStart(reader, false, 9, 10); - assertData(reader, "b", 10, 11); - assertFieldEnd(reader, 11, 12); - assertFieldStart(reader, true, 13, 14); - assertData(reader, "c", 13, 14); - assertCommand(reader, "d", 22, 23); - assertFieldStart(reader, false, 24, 25); - assertData(reader, "e", 25, 26); - assertFieldEnd(reader, 26, 27); - assertFieldStart(reader, false, 27, 28); - assertData(reader, "f", 28, 29); - assertFieldEnd(reader, 29, 30); - assertFieldStart(reader, true, 31, 32); - assertCommand(reader, "g", 31, 33); - assertFieldStart(reader, false, 33, 34); - assertData(reader, "h", 34, 35); - assertFieldEnd(reader, 35, 36); - assertFieldEnd(reader, 42, 43); - assertFieldEnd(reader, 49, 50); - assertEnd(reader, 51, 51); + const char *testString = + "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}"; + // 012345678901234 5678901234567890 123456 7890123 4567890 + // 0 1 2 3 4 5 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 7, 8); + assertFieldStart(reader, false, 9, 10); + assertData(reader, "b", 10, 11); + assertFieldEnd(reader, 11, 12); + assertFieldStart(reader, true, 13, 14); + assertData(reader, "c", 13, 14); + assertCommand(reader, "d", 22, 23); + assertFieldStart(reader, false, 24, 25); + assertData(reader, "e", 25, 26); + assertFieldEnd(reader, 26, 27); + assertFieldStart(reader, false, 27, 28); + assertData(reader, "f", 28, 29); + assertFieldEnd(reader, 29, 30); + assertFieldStart(reader, true, 31, 32); + assertCommand(reader, "g", 31, 33); + assertFieldStart(reader, false, 33, 34); + assertData(reader, "h", 34, 35); + assertFieldEnd(reader, 35, 36); + assertFieldEnd(reader, 42, 43); + assertFieldEnd(reader, 49, 50); + assertEnd(reader, 51, 51); } TEST(OsmlStreamParser, beginEndWithCommand) { - const char *testString = "\\begin{book}\\a{test}\\end{book}"; - // 012345678901 23456789 0123456789 - // 0 1 2 - CharReader charReader(testString); + const char *testString = "\\begin{book}\\a{test}\\end{book}"; + // 012345678901 23456789 0123456789 + // 0 1 2 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, true, 12, 13); - assertCommand(reader, "a", 12, 14); - assertFieldStart(reader, false, 14, 15); - assertData(reader, "test", 15, 19); - assertFieldEnd(reader, 19, 20); - assertFieldEnd(reader, 25, 29); - assertEnd(reader, 30, 30); + assertCommand(reader, "book", 7, 11); + assertFieldStart(reader, true, 12, 13); + assertCommand(reader, "a", 12, 14); + assertFieldStart(reader, false, 14, 15); + assertData(reader, "test", 15, 19); + assertFieldEnd(reader, 19, 20); + assertFieldEnd(reader, 25, 29); + assertEnd(reader, 30, 30); } TEST(OsmlStreamParser, errorBeginNoBraceOpen) { - const char *testString = "\\begin a"; - // 01234567 - CharReader charReader(testString); + const char *testString = "\\begin a"; + // 01234567 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - assertData(reader, "a", 7, 8); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertData(reader, "a", 7, 8); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorBeginNoIdentifier) { - const char *testString = "\\begin{!"; - CharReader charReader(testString); + const char *testString = "\\begin{!"; + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(parser.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorBeginNoBraceClose) { - const char *testString = "\\begin{a"; - CharReader charReader(testString); + const char *testString = "\\begin{a"; + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(parser.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorBeginNoName) { - const char *testString = "\\begin{a#}"; - CharReader charReader(testString); + const char *testString = "\\begin{a#}"; + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - assertCommand(reader, "a"); - ASSERT_TRUE(logger.hasError()); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - assertEnd(reader); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "a"); + ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorEndNoBraceOpen) { - const char *testString = "\\end a"; - // 012345 - CharReader charReader(testString); + const char *testString = "\\end a"; + // 012345 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - assertData(reader, "a", 5, 6); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertData(reader, "a", 5, 6); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorEndNoIdentifier) { - const char *testString = "\\end{!"; - CharReader charReader(testString); + const char *testString = "\\end{!"; + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(parser.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorEndNoBraceClose) { - const char *testString = "\\end{a"; - CharReader charReader(testString); + const char *testString = "\\end{a"; + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(parser.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorEndNoBegin) { - const char *testString = "\\end{a}"; - CharReader charReader(testString); + const char *testString = "\\end{a}"; + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(parser.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorBeginEndMismatch) { - const char *testString = "\\begin{a} \\begin{b} test \\end{a}"; - // 0123456789 012345678901234 5678901 - // 0 1 2 3 - CharReader charReader(testString); + const char *testString = "\\begin{a} \\begin{b} test \\end{a}"; + // 0123456789 012345678901234 5678901 + // 0 1 2 3 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - assertCommand(reader, "a", 7, 8); - assertFieldStart(reader, true, 10, 11); - assertCommand(reader, "b", 17, 18); - assertFieldStart(reader, true, 20, 24); - assertData(reader, "test", 20, 24); - ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); - ASSERT_TRUE(logger.hasError()); + logger.reset(); + assertCommand(reader, "a", 7, 8); + assertFieldStart(reader, true, 10, 11); + assertCommand(reader, "b", 17, 18); + assertFieldStart(reader, true, 20, 24); + assertData(reader, "test", 20, 24); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(parser.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, commandWithNSSep) { - const char *testString = "\\test1:test2"; - // 012345678901 - CharReader charReader(testString); + const char *testString = "\\test1:test2"; + // 012345678901 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertCommand(reader, "test1:test2", 0, 12); - assertEnd(reader, 12, 12); + assertCommand(reader, "test1:test2", 0, 12); + assertEnd(reader, 12, 12); } TEST(OsmlStreamParser, beginEndWithNSSep) { - const char *testString = "\\begin{test1:test2}\\end{test1:test2}"; - // 0123456789012345678 90123456789012345 - // 0 1 2 3 - CharReader charReader(testString); + const char *testString = "\\begin{test1:test2}\\end{test1:test2}"; + // 0123456789012345678 90123456789012345 + // 0 1 2 3 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertCommand(reader, "test1:test2", 7, 18); - assertFieldStart(reader, true, 19, 20); - assertFieldEnd(reader, 24, 35); - assertEnd(reader, 36, 36); + assertCommand(reader, "test1:test2", 7, 18); + assertFieldStart(reader, true, 19, 20); + assertFieldEnd(reader, 24, 35); + assertEnd(reader, 36, 36); } TEST(OsmlStreamParser, errorBeginNSSep) { - const char *testString = "\\begin:test{blub}\\end{blub}"; - CharReader charReader(testString); + const char *testString = "\\begin:test{blub}\\end{blub}"; + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - assertCommand(reader, "blub"); - ASSERT_TRUE(logger.hasError()); - assertFieldStart(reader, true); - assertFieldEnd(reader); - assertEnd(reader); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "blub"); + ASSERT_TRUE(logger.hasError()); + assertFieldStart(reader, true); + assertFieldEnd(reader); + assertEnd(reader); } TEST(OsmlStreamParser, errorEndNSSep) { - const char *testString = "\\begin{blub}\\end:test{blub}"; - CharReader charReader(testString); + const char *testString = "\\begin{blub}\\end:test{blub}"; + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - assertCommand(reader, "blub"); - assertFieldStart(reader, true); - ASSERT_FALSE(logger.hasError()); - assertFieldEnd(reader); - ASSERT_TRUE(logger.hasError()); - assertEnd(reader); + logger.reset(); + assertCommand(reader, "blub"); + assertFieldStart(reader, true); + ASSERT_FALSE(logger.hasError()); + assertFieldEnd(reader); + ASSERT_TRUE(logger.hasError()); + assertEnd(reader); } TEST(OsmlStreamParser, errorEmptyNs) { - const char *testString = "\\test:"; - CharReader charReader(testString); + const char *testString = "\\test:"; + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - assertCommand(reader, "test"); - ASSERT_TRUE(logger.hasError()); - assertData(reader, ":"); - assertEnd(reader); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "test"); + ASSERT_TRUE(logger.hasError()); + assertData(reader, ":"); + assertEnd(reader); } TEST(OsmlStreamParser, errorRepeatedNs) { - const char *testString = "\\test::"; - CharReader charReader(testString); + const char *testString = "\\test::"; + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - assertCommand(reader, "test"); - ASSERT_TRUE(logger.hasError()); - assertData(reader, "::"); - assertEnd(reader); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "test"); + ASSERT_TRUE(logger.hasError()); + assertData(reader, "::"); + assertEnd(reader); } TEST(OsmlStreamParser, explicitDefaultField) { - const char *testString = "\\a{!b}c"; - // 01234567 - CharReader charReader(testString); + const char *testString = "\\a{!b}c"; + // 01234567 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, true, 2, 4); - assertData(reader, "b", 4, 5); - assertFieldEnd(reader, 5, 6); - assertData(reader, "c", 6, 7); - assertEnd(reader, 7, 7); + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertData(reader, "b", 4, 5); + assertFieldEnd(reader, 5, 6); + assertData(reader, "c", 6, 7); + assertEnd(reader, 7, 7); } TEST(OsmlStreamParser, explicitDefaultFieldWithCommand) { - const char *testString = "\\a{!\\b}c"; - // 0123 4567 - CharReader charReader(testString); + const char *testString = "\\a{!\\b}c"; + // 0123 4567 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, true, 2, 4); - assertCommand(reader, "b", 4, 6); - assertFieldEnd(reader, 6, 7); - assertData(reader, "c", 7, 8); - assertEnd(reader, 8, 8); + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertCommand(reader, "b", 4, 6); + assertFieldEnd(reader, 6, 7); + assertData(reader, "c", 7, 8); + assertEnd(reader, 8, 8); } TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField) { - const char *testString = "\\a{!\\b}{c}"; - // 0123 456789 - CharReader charReader(testString); + const char *testString = "\\a{!\\b}{c}"; + // 0123 456789 + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, true, 2, 4); - assertCommand(reader, "b", 4, 6); - assertFieldEnd(reader, 6, 7); - ASSERT_FALSE(logger.hasError()); - assertData(reader, "c", 8, 9); - ASSERT_TRUE(logger.hasError()); - assertEnd(reader, 10, 10); + logger.reset(); + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertCommand(reader, "b", 4, 6); + assertFieldEnd(reader, 6, 7); + ASSERT_FALSE(logger.hasError()); + assertData(reader, "c", 8, 9); + ASSERT_TRUE(logger.hasError()); + assertEnd(reader, 10, 10); } TEST(OsmlStreamParser, annotationStart) { - const char *testString = "<\\a"; - // 0 12 + const char *testString = "<\\a"; + // 0 12 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); - assertEnd(reader, 3, 3); + assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); + assertEnd(reader, 3, 3); } TEST(OsmlStreamParser, annotationStartWithName) { - const char *testString = "<\\annotationWithName#aName"; - // 0 1234567890123456789012345 - // 0 1 2 + const char *testString = "<\\annotationWithName#aName"; + // 0 1234567890123456789012345 + // 0 1 2 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertAnnotationStart(reader, "annotationWithName", - Variant::mapType{{"name", "aName"}}, 0, 20); - assertEnd(reader, 26, 26); + assertAnnotationStart(reader, "annotationWithName", + Variant::mapType{{"name", "aName"}}, 0, 20); + assertEnd(reader, 26, 26); } TEST(OsmlStreamParser, annotationStartWithArguments) { - const char *testString = "<\\annotationWithName#aName[a=1,b=2]"; - // 0 1234567890123456789012345678901234 - // 0 1 2 3 + const char *testString = "<\\annotationWithName#aName[a=1,b=2]"; + // 0 1234567890123456789012345678901234 + // 0 1 2 3 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertAnnotationStart( - reader, "annotationWithName", - Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20); - assertEnd(reader, 35, 35); + assertAnnotationStart( + reader, "annotationWithName", + Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20); + assertEnd(reader, 35, 35); } TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd) { - const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>"; - // 0 123456789012345678901234567 89012345 67 - // 0 1 2 3 + const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>"; + // 0 123456789012345678901234567 89012345 67 + // 0 1 2 3 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertAnnotationStart( - reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8, - 10); - assertFieldStart(reader, true, 26, 27); - assertData(reader, "a", 26, 27); - assertFieldEnd(reader, 33, 35); - assertAnnotationEnd(reader, "", "", 36, 38); - assertEnd(reader, 38, 38); + assertAnnotationStart( + reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8, + 10); + assertFieldStart(reader, true, 26, 27); + assertData(reader, "a", 26, 27); + assertFieldEnd(reader, 33, 35); + assertAnnotationEnd(reader, "", "", 36, 38); + assertEnd(reader, 38, 38); } TEST(OsmlStreamParser, annotationEnd) { - const char *testString = "\\a>"; - // 012 + const char *testString = "\\a>"; + // 012 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertAnnotationEnd(reader, "a", "", 0, 2); - assertEnd(reader, 3, 3); + assertAnnotationEnd(reader, "a", "", 0, 2); + assertEnd(reader, 3, 3); } TEST(OsmlStreamParser, annotationEndWithName) { - const char *testString = "\\a#name>"; - // 01234567 + const char *testString = "\\a#name>"; + // 01234567 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertAnnotationEnd(reader, "a", "name", 0, 2); - assertEnd(reader, 8, 8); + assertAnnotationEnd(reader, "a", "name", 0, 2); + assertEnd(reader, 8, 8); } TEST(OsmlStreamParser, annotationEndWithNameAsArgs) { - const char *testString = "\\a[name=name]>"; - // 01234567890123 + const char *testString = "\\a[name=name]>"; + // 01234567890123 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertAnnotationEnd(reader, "a", "name", 0, 2); - assertEnd(reader, 14, 14); + assertAnnotationEnd(reader, "a", "name", 0, 2); + assertEnd(reader, 14, 14); } TEST(OsmlStreamParser, errorAnnotationEndWithArguments) { - const char *testString = "\\a[foo=bar]>"; - // 012345678901 - // 0 1 + const char *testString = "\\a[foo=bar]>"; + // 012345678901 + // 0 1 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - logger.reset(); - ASSERT_FALSE(logger.hasError()); - assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2); - ASSERT_TRUE(logger.hasError()); - assertData(reader, ">", 11, 12); - assertEnd(reader, 12, 12); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2); + ASSERT_TRUE(logger.hasError()); + assertData(reader, ">", 11, 12); + assertEnd(reader, 12, 12); } TEST(OsmlStreamParser, closingAnnotation) { - const char *testString = "<\\a>"; - // 0 123 + const char *testString = "<\\a>"; + // 0 123 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); - assertData(reader, ">", 3, 4); - assertEnd(reader, 4, 4); + assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); + assertData(reader, ">", 3, 4); + assertEnd(reader, 4, 4); } TEST(OsmlStreamParser, annotationWithFields) { - const char *testString = "a <\\b{c}{d}{!e} f \\> g"; - // 012 345678901234567 8901 - // 0 1 2 + const char *testString = "a <\\b{c}{d}{!e} f \\> g"; + // 012 345678901234567 8901 + // 0 1 2 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - - assertData(reader, "a", 0, 1); - assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5); - assertFieldStart(reader, false, 5, 6); - assertData(reader, "c", 6, 7); - assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, false, 8, 9); - assertData(reader, "d", 9, 10); - assertFieldEnd(reader, 10, 11); - assertFieldStart(reader, true, 11, 13); - assertData(reader, "e", 13, 14); - assertFieldEnd(reader, 14, 15); - assertData(reader, "f", 16, 17); - assertAnnotationEnd(reader, "", "", 18, 20); - assertData(reader, "g", 21, 22); - assertEnd(reader, 22, 22); + OsmlStreamParser reader(charReader, logger); + + assertData(reader, "a", 0, 1); + assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5); + assertFieldStart(reader, false, 5, 6); + assertData(reader, "c", 6, 7); + assertFieldEnd(reader, 7, 8); + assertFieldStart(reader, false, 8, 9); + assertData(reader, "d", 9, 10); + assertFieldEnd(reader, 10, 11); + assertFieldStart(reader, true, 11, 13); + assertData(reader, "e", 13, 14); + assertFieldEnd(reader, 14, 15); + assertData(reader, "f", 16, 17); + assertAnnotationEnd(reader, "", "", 18, 20); + assertData(reader, "g", 21, 22); + assertEnd(reader, 22, 22); } TEST(OsmlStreamParser, annotationStartEscape) { - const char *testString = "<\\%test"; - // 0 123456 - // 0 + const char *testString = "<\\%test"; + // 0 123456 + // 0 - CharReader charReader(testString); + CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - assertData(reader, "<%test", 0, 7); - assertEnd(reader, 7, 7); + assertData(reader, "<%test", 0, 7); + assertEnd(reader, 7, 7); } +*/ } |