diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/core/common/UtilsTest.cpp | 33 | ||||
-rw-r--r-- | test/core/model/OntologyTest.cpp | 175 | ||||
-rw-r--r-- | test/core/parser/stack/StackTest.cpp | 646 | ||||
-rw-r--r-- | test/core/parser/stack/TokenRegistryTest.cpp | 82 | ||||
-rw-r--r-- | test/core/parser/utils/SourceOffsetVectorTest.cpp | 2 | ||||
-rw-r--r-- | test/core/parser/utils/TokenizedDataTest.cpp | 567 | ||||
-rw-r--r-- | test/core/parser/utils/TokenizedDataTestUtils.hpp | 64 | ||||
-rw-r--r-- | test/core/parser/utils/TokenizerTest.cpp | 441 | ||||
-rw-r--r-- | test/formats/osml/OsmlStreamParserTest.cpp | 1208 | ||||
-rw-r--r-- | test/formats/osxml/OsxmlEventParserTest.cpp | 99 |
10 files changed, 1975 insertions, 1342 deletions
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index 4bf1587..2aaa430 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -131,4 +131,35 @@ TEST(Utils, collapse) ASSERT_EQ("long test", Utils::collapse(" long test ")); } -}
\ No newline at end of file +TEST(Utils, isUserDefinedToken) +{ + EXPECT_FALSE(Utils::isUserDefinedToken("")); + EXPECT_FALSE(Utils::isUserDefinedToken("a")); + EXPECT_TRUE(Utils::isUserDefinedToken(":")); + EXPECT_TRUE(Utils::isUserDefinedToken("::")); + EXPECT_TRUE(Utils::isUserDefinedToken("!?")); + EXPECT_TRUE(Utils::isUserDefinedToken(".")); + EXPECT_TRUE(Utils::isUserDefinedToken("<<")); + EXPECT_TRUE(Utils::isUserDefinedToken(">>")); + EXPECT_TRUE(Utils::isUserDefinedToken("''")); + EXPECT_TRUE(Utils::isUserDefinedToken("``")); + EXPECT_TRUE(Utils::isUserDefinedToken("´´")); + EXPECT_TRUE(Utils::isUserDefinedToken("´")); + EXPECT_TRUE(Utils::isUserDefinedToken("`")); + EXPECT_TRUE(Utils::isUserDefinedToken("<")); + EXPECT_TRUE(Utils::isUserDefinedToken(">")); + EXPECT_TRUE(Utils::isUserDefinedToken("<+>")); + EXPECT_FALSE(Utils::isUserDefinedToken("a:")); + EXPECT_FALSE(Utils::isUserDefinedToken("a:a")); + EXPECT_FALSE(Utils::isUserDefinedToken(":a")); + EXPECT_FALSE(Utils::isUserDefinedToken("{")); + EXPECT_FALSE(Utils::isUserDefinedToken("{{")); + EXPECT_FALSE(Utils::isUserDefinedToken("}}")); + EXPECT_FALSE(Utils::isUserDefinedToken("{{}{}")); + EXPECT_FALSE(Utils::isUserDefinedToken("<\\")); + EXPECT_FALSE(Utils::isUserDefinedToken("\\>")); + EXPECT_FALSE(Utils::isUserDefinedToken("{!")); + EXPECT_FALSE(Utils::isUserDefinedToken("< + >")); +} + +} diff --git a/test/core/model/OntologyTest.cpp b/test/core/model/OntologyTest.cpp index 764dcb4..265e9e2 100644 --- a/test/core/model/OntologyTest.cpp +++ b/test/core/model/OntologyTest.cpp @@ -82,9 +82,7 @@ TEST(Ontology, testOntologyResolving) } // i use this wrapper due to the strange behaviour of GTEST. -static void assertFalse(bool b){ - ASSERT_FALSE(b); -} +static void assertFalse(bool b) { ASSERT_FALSE(b); } static Rooted<FieldDescriptor> createUnsortedPrimitiveField( Handle<StructuredClass> strct, Handle<Type> type, Logger &logger, bool tree, @@ -170,7 +168,6 @@ TEST(StructuredClass, getFieldDescriptors) } } - TEST(StructuredClass, getFieldDescriptorsCycles) { Logger logger; @@ -523,6 +520,91 @@ TEST(Descriptor, getPermittedChildrenCycles) ASSERT_EQ(A, children[0]); } +TEST(Descriptor, getSyntaxDescriptor) +{ + // build an ontology with some custom syntax. + Manager mgr{1}; + Logger logger; + Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)}; + // Construct the ontology + Rooted<Ontology> ontology{new Ontology(mgr, sys, "ontology")}; + Rooted<StructuredClass> A{new StructuredClass( + mgr, "A", ontology, Cardinality::any(), {nullptr}, true, true)}; + A->setStartToken(TokenDescriptor(Tokens::Indent)); + A->setEndToken(TokenDescriptor(Tokens::Dedent)); + { + TokenDescriptor sh{"<+>"}; + sh.id = 1; + A->setShortToken(sh); + } + // check the SyntaxDescriptor + SyntaxDescriptor stx = A->getSyntaxDescriptor(); + ASSERT_EQ(Tokens::Indent, stx.start); + ASSERT_EQ(Tokens::Dedent, stx.end); + ASSERT_EQ(1, stx.shortForm); + ASSERT_EQ(A, stx.descriptor); + ASSERT_TRUE(stx.isStruct()); + ASSERT_FALSE(stx.isAnnotation()); + ASSERT_FALSE(stx.isFieldDescriptor()); +} + +TEST(Descriptor, getPermittedTokens) +{ + // build an ontology with some custom syntax. + Manager mgr{1}; + Logger logger; + Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)}; + // Construct the ontology + Rooted<Ontology> ontology{new Ontology(mgr, sys, "ontology")}; + // add one StructuredClass with all tokens set. + Rooted<StructuredClass> A{new StructuredClass( + mgr, "A", ontology, Cardinality::any(), {nullptr}, true, true)}; + A->setStartToken(TokenDescriptor(Tokens::Indent)); + A->setEndToken(TokenDescriptor(Tokens::Dedent)); + { + TokenDescriptor sh{"<+>"}; + sh.id = 1; + A->setShortToken(sh); + } + // add a field with one token set. + Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first; + A_field->setEndToken(TokenDescriptor(Tokens::Newline)); + A_field->addChild(A); + // add an annotation with start and end set. + Rooted<AnnotationClass> A_anno = ontology->createAnnotationClass("A"); + { + TokenDescriptor start{"<"}; + start.id = 7; + A_anno->setStartToken(start); + } + { + TokenDescriptor end{">"}; + end.id = 8; + A_anno->setEndToken(end); + } + // add a trivial annotation, which should not be returned. + Rooted<AnnotationClass> B_anno = ontology->createAnnotationClass("B"); + ASSERT_TRUE(ontology->validate(logger)); + + // check result. + std::vector<SyntaxDescriptor> stxs = A->getPermittedTokens(); + ASSERT_EQ(3, stxs.size()); + // the field should be first, because A itself should not be collected + // directly. + ASSERT_EQ(A_field, stxs[0].descriptor); + ASSERT_EQ(Tokens::Empty, stxs[0].start); + ASSERT_EQ(Tokens::Newline, stxs[0].end); + ASSERT_EQ(Tokens::Empty, stxs[0].shortForm); + ASSERT_EQ(A, stxs[1].descriptor); + ASSERT_EQ(Tokens::Indent, stxs[1].start); + ASSERT_EQ(Tokens::Dedent, stxs[1].end); + ASSERT_EQ(1, stxs[1].shortForm); + ASSERT_EQ(A_anno, stxs[2].descriptor); + ASSERT_EQ(7, stxs[2].start); + ASSERT_EQ(8, stxs[2].end); + ASSERT_EQ(Tokens::Empty, stxs[2].shortForm); +} + TEST(StructuredClass, isSubclassOf) { // create an inheritance hierarchy. @@ -619,6 +701,14 @@ TEST(Ontology, validate) base->setName("myClass"); ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); ASSERT_TRUE(ontology->validate(logger)); + // add an invalid short token. + base->setShortToken(TokenDescriptor("bla")); + ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); + ASSERT_FALSE(ontology->validate(logger)); + // make it valid. + base->setShortToken(TokenDescriptor("!bla!")); + ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); + ASSERT_TRUE(ontology->validate(logger)); // Let's add a primitive field (without a primitive type at first) Rooted<FieldDescriptor> base_field = base->createPrimitiveFieldDescriptor(nullptr, logger).first; @@ -629,6 +719,14 @@ TEST(Ontology, validate) base_field->setPrimitiveType(sys->getStringType()); ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); ASSERT_TRUE(ontology->validate(logger)); + // add an invalid start token. + base_field->setStartToken(TokenDescriptor("< + >")); + ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); + ASSERT_FALSE(ontology->validate(logger)); + // make it valid. + base_field->setStartToken(TokenDescriptor("<")); + ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); + ASSERT_TRUE(ontology->validate(logger)); // add a subclass for our base class. Rooted<StructuredClass> sub{new StructuredClass(mgr, "sub", ontology)}; // this should be valid in itself. @@ -686,4 +784,71 @@ TEST(Ontology, validate) ASSERT_TRUE(ontology->validate(logger)); } } -}
\ No newline at end of file + +TEST(Ontology, getAllTokenDescriptors) +{ + // build an ontology with some custom syntax. + Manager mgr{1}; + Logger logger; + Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)}; + // Construct the ontology + Rooted<Ontology> ontology{new Ontology(mgr, sys, "ontology")}; + // add one StructuredClass with all tokens set. + Rooted<StructuredClass> A{new StructuredClass( + mgr, "A", ontology, Cardinality::any(), {nullptr}, true, true)}; + A->setStartToken(TokenDescriptor(Tokens::Indent)); + A->setEndToken(TokenDescriptor(Tokens::Dedent)); + { + TokenDescriptor sh{"<+>"}; + sh.id = 1; + A->setShortToken(sh); + } + // add a field with one token set. + Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first; + A_field->setEndToken(TokenDescriptor(Tokens::Newline)); + A_field->addChild(A); + // add an annotation with start and end set. + Rooted<AnnotationClass> A_anno = ontology->createAnnotationClass("A"); + { + TokenDescriptor start{"<"}; + start.id = 7; + A_anno->setStartToken(start); + } + { + TokenDescriptor end{">"}; + end.id = 8; + A_anno->setEndToken(end); + } + // add a trivial annotation, which should not be returned. + Rooted<AnnotationClass> B_anno = ontology->createAnnotationClass("B"); + ASSERT_TRUE(ontology->validate(logger)); + + // check the result. + std::vector<TokenDescriptor *> tks = ontology->getAllTokenDescriptors(); + + // A short token + ASSERT_EQ("<+>", tks[0]->token); + ASSERT_EQ(1, tks[0]->id); + ASSERT_FALSE(tks[0]->special); + // A start token + ASSERT_EQ("", tks[1]->token); + ASSERT_EQ(Tokens::Indent, tks[1]->id); + ASSERT_TRUE(tks[1]->special); + // A end token + ASSERT_EQ("", tks[2]->token); + ASSERT_EQ(Tokens::Dedent, tks[2]->id); + ASSERT_TRUE(tks[2]->special); + // A field end token + ASSERT_EQ("", tks[3]->token); + ASSERT_EQ(Tokens::Newline, tks[3]->id); + ASSERT_TRUE(tks[3]->special); + // A anno start token + ASSERT_EQ("<", tks[4]->token); + ASSERT_EQ(7, tks[4]->id); + ASSERT_FALSE(tks[4]->special); + // A anno end token + ASSERT_EQ(">", tks[5]->token); + ASSERT_EQ(8, tks[5]->id); + ASSERT_FALSE(tks[5]->special); +} +} diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp index a93f14a..a831c32 100644 --- a/test/core/parser/stack/StackTest.cpp +++ b/test/core/parser/stack/StackTest.cpp @@ -21,9 +21,11 @@ #include <gtest/gtest.h> #include <core/frontend/TerminalLogger.hpp> +#include <core/parser/stack/Callbacks.hpp> #include <core/parser/stack/Handler.hpp> #include <core/parser/stack/Stack.hpp> #include <core/parser/stack/State.hpp> +#include <core/parser/utils/TokenizedData.hpp> #include <core/StandaloneEnvironment.hpp> @@ -37,70 +39,88 @@ static StandaloneEnvironment env(logger); namespace { +class Parser : public ParserCallbacks { + TokenId registerToken(const std::string &token) override + { + return Tokens::Empty; + } + + void unregisterToken(TokenId id) override + { + // Do nothing here + } +}; + +static Parser parser; + struct Tracker { - int startCount; + int startCommandCount; + int startAnnotationCount; + int startTokenCount; + int endTokenCount; int endCount; int fieldStartCount; int fieldEndCount; - int annotationStartCount; - int annotationEndCount; int dataCount; - Variant::mapType startArgs; - bool fieldStartIsDefault; - size_t fieldStartIdx; - Variant annotationStartClassName; - Variant::mapType annotationStartArgs; - Variant annotationEndClassName; - Variant annotationEndElementName; - Variant dataData; - - bool startResult; - bool fieldStartSetIsDefault; + bool startCommandResult; + bool startAnnotationResult; + bool startTokenResult; + Handler::EndTokenResult endTokenResult; bool fieldStartResult; - bool annotationStartResult; - bool annotationEndResult; bool dataResult; + Variant::mapType startCommandArgs; + Variant::mapType startAnnotationArgs; + + bool fieldStartReturnValue; + size_t fieldStartIdx; + bool fieldStartIsDefault; + bool fieldStartSetIsDefault; + + Variant dataData; + Tracker() { reset(); } void reset() { - startCount = 0; + startCommandCount = 0; + startAnnotationCount = 0; + startTokenCount = 0; + endTokenCount = 0; endCount = 0; fieldStartCount = 0; fieldEndCount = 0; - annotationStartCount = 0; - annotationEndCount = 0; dataCount = 0; - startArgs = Variant::mapType{}; - fieldStartIsDefault = false; - fieldStartIdx = 0; - annotationStartClassName = Variant::fromString(std::string{}); - annotationStartArgs = Variant::mapType{}; - annotationEndClassName = Variant::fromString(std::string{}); - annotationEndElementName = Variant::fromString(std::string{}); - dataData = Variant::fromString(std::string{}); - - startResult = true; - fieldStartSetIsDefault = false; + startCommandResult = true; + startAnnotationResult = true; + startTokenResult = true; + endTokenResult = Handler::EndTokenResult::ENDED_THIS; fieldStartResult = true; - annotationStartResult = true; - annotationEndResult = true; dataResult = true; + + startCommandArgs = Variant::mapType{}; + startAnnotationArgs = Variant::mapType{}; + + fieldStartIdx = 0; + fieldStartIsDefault = false; + fieldStartSetIsDefault = false; + + dataData = Variant{}; } - void expect(int startCount, int endCount, int fieldStartCount, - int fieldEndCount, int annotationStartCount, - int annotationEndCount, int dataCount) + void expect(int startCommandCount, int endCount, int fieldStartCount, + int fieldEndCount, int dataCount, int startAnnotationCount = 0, + int startTokenCount = 0, int endTokenCount = 0) { - EXPECT_EQ(startCount, this->startCount); + EXPECT_EQ(startCommandCount, this->startCommandCount); + EXPECT_EQ(startAnnotationCount, this->startAnnotationCount); + EXPECT_EQ(startTokenCount, this->startTokenCount); + EXPECT_EQ(endTokenCount, this->endTokenCount); EXPECT_EQ(endCount, this->endCount); EXPECT_EQ(fieldStartCount, this->fieldStartCount); EXPECT_EQ(fieldEndCount, this->fieldEndCount); - EXPECT_EQ(annotationStartCount, this->annotationStartCount); - EXPECT_EQ(annotationEndCount, this->annotationEndCount); EXPECT_EQ(dataCount, this->dataCount); } }; @@ -112,55 +132,57 @@ private: TestHandler(const HandlerData &handlerData) : Handler(handlerData) {} public: - bool start(Variant::mapType &args) override + bool startCommand(Variant::mapType &args) override { - tracker.startCount++; - tracker.startArgs = args; - if (!tracker.startResult) { + tracker.startCommandArgs = args; + tracker.startCommandCount++; + if (!tracker.startCommandResult) { logger().error( - "The TestHandler was told not to allow a field start. So it " - "doesn't. The TestHandler always obeys its master."); + "TestHandler was told not to allow a command start. " + "TestHandler always obeys its master."); } - return tracker.startResult; + return tracker.startCommandResult; + } + + bool startAnnotation(Variant::mapType &args, + AnnotationType annotationType) override + { + tracker.startAnnotationArgs = args; + tracker.startAnnotationCount++; + return tracker.startAnnotationResult; + } + + bool startToken(Handle<Node> node) override + { + tracker.startTokenCount++; + return tracker.startTokenResult; + } + + EndTokenResult endToken(const Token &token, Handle<Node> node) override + { + tracker.endTokenCount++; + return tracker.endTokenResult; } void end() override { tracker.endCount++; } bool fieldStart(bool &isDefault, size_t fieldIdx) override { - tracker.fieldStartCount++; tracker.fieldStartIsDefault = isDefault; tracker.fieldStartIdx = fieldIdx; if (tracker.fieldStartSetIsDefault) { isDefault = true; } + tracker.fieldStartCount++; return tracker.fieldStartResult; } void fieldEnd() override { tracker.fieldEndCount++; } - bool annotationStart(const Variant &className, - Variant::mapType &args) override - { - tracker.annotationStartCount++; - tracker.annotationStartClassName = className; - tracker.annotationStartArgs = args; - return tracker.annotationStartResult; - } - - bool annotationEnd(const Variant &className, - const Variant &elementName) override - { - tracker.annotationEndCount++; - tracker.annotationEndClassName = className; - tracker.annotationEndElementName = elementName; - return tracker.annotationEndResult; - } - - bool data(Variant &data) override + bool data() override { + tracker.dataData = readData(); tracker.dataCount++; - tracker.dataData = data; return tracker.dataResult; } @@ -204,75 +226,137 @@ TEST(Stack, basicTest) tracker.reset(); logger.reset(); { - Stack s{env.context, States::TestHandlers}; + Stack s{parser, env.context, States::TestHandlers}; EXPECT_EQ("", s.currentCommandName()); EXPECT_EQ(&States::None, &s.currentState()); - s.command("document", {}); + s.commandStart("document", {}); s.fieldStart(true); s.data("test1"); EXPECT_EQ("document", s.currentCommandName()); EXPECT_EQ(&States::Document, &s.currentState()); - tracker.expect(1, 0, 1, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 0, 1); // scc, ec, fsc, fec, dc, sac, stc, etc - s.command("body", {}); + s.commandStart("body", {}); s.fieldStart(true); s.data("test2"); EXPECT_EQ("body", s.currentCommandName()); EXPECT_EQ(&States::Body, &s.currentState()); - tracker.expect(2, 0, 2, 0, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(2, 0, 2, 0, 2); // scc, ec, fsc, fec, dc, sac, stc, etc - s.command("inner", {}); + s.commandStart("inner", {}); s.fieldStart(true); EXPECT_EQ("inner", s.currentCommandName()); EXPECT_EQ(&States::BodyChildren, &s.currentState()); s.fieldEnd(); - tracker.expect(3, 0, 3, 1, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(3, 0, 3, 1, 2); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldEnd(); EXPECT_EQ("body", s.currentCommandName()); EXPECT_EQ(&States::Body, &s.currentState()); - tracker.expect(3, 1, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(3, 1, 3, 2, 2); // scc, ec, fsc, fec, dc, sac, stc, etc - s.command("body", {}); + s.commandStart("body", {}); EXPECT_EQ("body", s.currentCommandName()); EXPECT_EQ(&States::Body, &s.currentState()); - tracker.expect(4, 2, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(4, 2, 3, 2, 2); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldStart(true); s.data("test3"); EXPECT_EQ("body", s.currentCommandName()); EXPECT_EQ(&States::Body, &s.currentState()); s.fieldEnd(); - tracker.expect(4, 2, 4, 3, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(4, 2, 4, 3, 3); // scc, ec, fsc, fec, dc, sac, stc, etc EXPECT_EQ("body", s.currentCommandName()); EXPECT_EQ(&States::Body, &s.currentState()); s.fieldEnd(); - tracker.expect(4, 3, 4, 4, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(4, 3, 4, 4, 3); // scc, ec, fsc, fec, dc, sac, stc, etc EXPECT_EQ("document", s.currentCommandName()); EXPECT_EQ(&States::Document, &s.currentState()); } - tracker.expect(4, 4, 4, 4, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(4, 4, 4, 4, 3); // scc, ec, fsc, fec, dc, sac, stc, etc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, basicTestRangeCommands) +{ + tracker.reset(); + logger.reset(); + { + Stack s{parser, env.context, States::TestHandlers}; + + EXPECT_EQ("", s.currentCommandName()); + EXPECT_EQ(&States::None, &s.currentState()); + + s.commandStart("document", {}, true); + EXPECT_EQ("document", s.currentCommandName()); + EXPECT_EQ(&States::Document, &s.currentState()); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + + s.data("test1"); + tracker.expect(1, 0, 1, 0, 1); // scc, ec, fsc, fec, dc, sac, stc, etc + + s.commandStart("body", {}, true); + tracker.expect(2, 0, 1, 0, 1); // scc, ec, fsc, fec, dc, sac, stc, etc + s.data("test2"); + tracker.expect(2, 0, 2, 0, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + EXPECT_EQ("body", s.currentCommandName()); + EXPECT_EQ(&States::Body, &s.currentState()); + + s.commandStart("inner", {}, true); + tracker.expect(3, 0, 2, 0, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + EXPECT_EQ("inner", s.currentCommandName()); + EXPECT_EQ(&States::BodyChildren, &s.currentState()); + s.rangeEnd(); + tracker.expect(3, 1, 3, 1, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + EXPECT_EQ("body", s.currentCommandName()); + EXPECT_EQ(&States::Body, &s.currentState()); + s.rangeEnd(); + tracker.expect(3, 2, 3, 2, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + + s.commandStart("body", {}, true); + EXPECT_EQ("body", s.currentCommandName()); + EXPECT_EQ(&States::Body, &s.currentState()); + tracker.expect(4, 2, 3, 2, 2); // scc, ec, fsc, fse, dc, sac, stc, etc + s.fieldStart(true); + tracker.expect(4, 2, 4, 2, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + s.data("test3"); + tracker.expect(4, 2, 4, 2, 3); // scc, ec, fsc, fec, dc, sac, stc, etc + EXPECT_EQ("body", s.currentCommandName()); + EXPECT_EQ(&States::Body, &s.currentState()); + s.fieldEnd(); + tracker.expect(4, 2, 4, 3, 3); // scc, ec, fsc, fec, dc, sac, stc, etc + EXPECT_EQ("body", s.currentCommandName()); + EXPECT_EQ(&States::Body, &s.currentState()); + s.rangeEnd(); + tracker.expect(4, 3, 4, 3, 3); // scc, ec, fsc, fec, dc, sac, stc, etc + + EXPECT_EQ("document", s.currentCommandName()); + EXPECT_EQ(&States::Document, &s.currentState()); + s.rangeEnd(); + tracker.expect(4, 4, 4, 4, 3); // scc, ec, fsc, fec, dc, sac, stc, etc + } + tracker.expect(4, 4, 4, 4, 3); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_FALSE(logger.hasError()); } TEST(Stack, errorInvalidCommands) { - Stack s{env.context, States::TestHandlers}; + Stack s{parser, env.context, States::TestHandlers}; tracker.reset(); - EXPECT_THROW(s.command("body", {}), LoggableException); - s.command("document", {}); + EXPECT_THROW(s.commandStart("body", {}), LoggableException); + s.commandStart("document", {}); s.fieldStart(true); - EXPECT_THROW(s.command("document", {}), LoggableException); - s.command("empty", {}); + EXPECT_THROW(s.commandStart("document", {}), LoggableException); + s.commandStart("empty", {}); s.fieldStart(true); - EXPECT_THROW(s.command("body", {}), LoggableException); - s.command("special", {}); + EXPECT_THROW(s.commandStart("body", {}), LoggableException); + s.commandStart("special", {}); s.fieldStart(true); s.fieldEnd(); s.fieldEnd(); @@ -288,23 +372,23 @@ TEST(Stack, errorInvalidCommands) TEST(Stack, validation) { - Stack s{env.context, States::TestHandlers}; + Stack s{parser, env.context, States::TestHandlers}; tracker.reset(); logger.reset(); - s.command("arguments", {}); + s.commandStart("arguments", {}); EXPECT_TRUE(logger.hasError()); s.fieldStart(true); s.fieldEnd(); logger.reset(); - s.command("arguments", {{"a", 5}}); + s.commandStart("arguments", {{"a", 5}}, false); EXPECT_TRUE(logger.hasError()); s.fieldStart(true); s.fieldEnd(); logger.reset(); - s.command("arguments", {{"a", 5}, {"b", "test"}}); + s.commandStart("arguments", {{"a", 5}, {"b", "test"}}, false); EXPECT_FALSE(logger.hasError()); s.fieldStart(true); s.fieldEnd(); @@ -315,33 +399,33 @@ TEST(Stack, invalidCommandName) tracker.reset(); logger.reset(); - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldStart(true); s.fieldEnd(); - tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc - s.command("a_", {}); - tracker.expect(2, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("a_", {}); + tracker.expect(2, 1, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldStart(true); s.fieldEnd(); - tracker.expect(2, 1, 2, 2, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(2, 1, 2, 2, 0); // scc, ec, fsc, fec, dc, sac, stc, etc - s.command("a_:b", {}); - tracker.expect(3, 2, 2, 2, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("a_:b", {}); + tracker.expect(3, 2, 2, 2, 0); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldStart(true); s.fieldEnd(); - tracker.expect(3, 2, 3, 3, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(3, 2, 3, 3, 0); // scc, ec, fsc, fec, dc, sac, stc, etc - ASSERT_THROW(s.command("_a", {}), LoggableException); - tracker.expect(3, 3, 3, 3, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_THROW(s.commandStart("_a", {}), LoggableException); + tracker.expect(3, 3, 3, 3, 0); // scc, ec, fsc, fec, dc, sac, stc, etc - ASSERT_THROW(s.command("a:", {}), LoggableException); - tracker.expect(3, 3, 3, 3, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_THROW(s.commandStart("a:", {}), LoggableException); + tracker.expect(3, 3, 3, 3, 0); // scc, ec, fsc, fec, dc, sac, stc, etc - ASSERT_THROW(s.command("a:_b", {}), LoggableException); - tracker.expect(3, 3, 3, 3, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_THROW(s.commandStart("a:_b", {}), LoggableException); + tracker.expect(3, 3, 3, 3, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } TEST(Stack, multipleFields) @@ -349,50 +433,50 @@ TEST(Stack, multipleFields) tracker.reset(); logger.reset(); { - Stack s{env.context, States::AnyHandlers}; + Stack s{parser, env.context, States::AnyHandlers}; - s.command("a", {{"a", false}}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("a", {{"a", false}}, false); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc EXPECT_EQ("a", s.currentCommandName()); - EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startArgs); + EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startCommandArgs); s.fieldStart(false); - tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc EXPECT_FALSE(tracker.fieldStartIsDefault); EXPECT_EQ(0U, tracker.fieldStartIdx); s.data("test"); - tracker.expect(1, 0, 1, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc - EXPECT_EQ("test", tracker.dataData); + tracker.expect(1, 0, 1, 0, 1); // scc, ec, fsc, fec, dc, sac, stc, etc + EXPECT_EQ("test", tracker.dataData.asString()); s.fieldEnd(); - tracker.expect(1, 0, 1, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 1, 1); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldStart(false); - tracker.expect(1, 0, 2, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 2, 1, 1); // scc, ec, fsc, fec, dc, sac, stc, etc EXPECT_FALSE(tracker.fieldStartIsDefault); EXPECT_EQ(1U, tracker.fieldStartIdx); s.data("test2"); - tracker.expect(1, 0, 2, 1, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc - EXPECT_EQ("test2", tracker.dataData); + tracker.expect(1, 0, 2, 1, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + EXPECT_EQ("test2", tracker.dataData.asString()); s.fieldEnd(); - tracker.expect(1, 0, 2, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 2, 2, 2); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldStart(true); - tracker.expect(1, 0, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 3, 2, 2); // scc, ec, fsc, fec, dc, sac, stc, etc EXPECT_TRUE(tracker.fieldStartIsDefault); EXPECT_EQ(2U, tracker.fieldStartIdx); s.data("test3"); - tracker.expect(1, 0, 3, 2, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc - EXPECT_EQ("test3", tracker.dataData); + tracker.expect(1, 0, 3, 2, 3); // scc, ec, fsc, fec, dc, sac, stc, etc + EXPECT_EQ("test3", tracker.dataData.asString()); s.fieldEnd(); - tracker.expect(1, 0, 3, 3, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 3, 3, 3); // scc, ec, fsc, fec, dc, sac, stc, etc } - tracker.expect(1, 1, 3, 3, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 1, 3, 3, 3); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_FALSE(logger.hasError()); } @@ -401,15 +485,15 @@ TEST(Stack, implicitDefaultFieldOnNewCommand) tracker.reset(); logger.reset(); { - Stack s{env.context, States::AnyHandlers}; + Stack s{parser, env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc - s.command("b", {}); - tracker.expect(2, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("b", {}); + tracker.expect(2, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } - tracker.expect(2, 2, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(2, 2, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_FALSE(logger.hasError()); } @@ -418,21 +502,21 @@ TEST(Stack, implicitDefaultFieldOnNewCommandWithExplicitDefaultField) tracker.reset(); logger.reset(); { - Stack s{env.context, States::AnyHandlers}; + Stack s{parser, env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_EQ("a", s.currentCommandName()); - s.command("b", {}); - tracker.expect(2, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("b", {}); + tracker.expect(2, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_EQ("b", s.currentCommandName()); s.fieldStart(true); s.fieldEnd(); - tracker.expect(2, 0, 2, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(2, 0, 2, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_EQ("b", s.currentCommandName()); } - tracker.expect(2, 2, 2, 2, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(2, 2, 2, 2, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_FALSE(logger.hasError()); } @@ -441,18 +525,18 @@ TEST(Stack, noImplicitDefaultFieldOnIncompatibleCommand) tracker.reset(); logger.reset(); { - Stack s{env.context, States::AnyHandlers}; + Stack s{parser, env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_EQ("a", s.currentCommandName()); tracker.fieldStartResult = false; - s.command("b", {}); - tracker.expect(2, 1, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("b", {}); + tracker.expect(2, 1, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_EQ("b", s.currentCommandName()); } - tracker.expect(2, 2, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(2, 2, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_FALSE(logger.hasError()); } @@ -461,23 +545,23 @@ TEST(Stack, noImplicitDefaultFieldIfDefaultFieldGiven) tracker.reset(); logger.reset(); { - Stack s{env.context, States::AnyHandlers}; + Stack s{parser, env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_EQ("a", s.currentCommandName()); s.fieldStart(true); - tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_EQ("a", s.currentCommandName()); s.fieldEnd(); - tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_EQ("a", s.currentCommandName()); - s.command("b", {}); - tracker.expect(2, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("b", {}); + tracker.expect(2, 1, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_EQ("b", s.currentCommandName()); } - tracker.expect(2, 2, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(2, 2, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_FALSE(logger.hasError()); } @@ -486,18 +570,18 @@ TEST(Stack, noEndIfStartFails) tracker.reset(); logger.reset(); { - Stack s{env.context, States::AnyHandlers}; + Stack s{parser, env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_EQ("a", s.currentCommandName()); - tracker.startResult = false; - s.command("b", {}); - tracker.expect(3, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc - ASSERT_EQ("b", s.currentCommandName()); + tracker.startCommandResult = false; + s.commandStart("b", {}); + tracker.expect(3, 1, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + EXPECT_EQ(&States::None, &s.currentState()); } - tracker.expect(3, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(3, 1, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_TRUE(logger.hasError()); } @@ -506,15 +590,15 @@ TEST(Stack, implicitDefaultFieldOnData) tracker.reset(); logger.reset(); { - Stack s{env.context, States::AnyHandlers}; + Stack s{parser, env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc s.data("test"); - tracker.expect(1, 0, 1, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 0, 1); // scc, ec, fsc, fec, dc, sac, stc, etc } - tracker.expect(1, 1, 1, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 1, 1, 1, 1); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_FALSE(logger.hasError()); } @@ -524,11 +608,11 @@ TEST(Stack, autoFieldEnd) logger.reset(); { - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } - tracker.expect(1, 1, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 1, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_FALSE(logger.hasError()); } @@ -538,17 +622,17 @@ TEST(Stack, autoImplicitFieldEnd) logger.reset(); { - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); - s.command("b", {}); - s.command("c", {}); - s.command("d", {}); - s.command("e", {}); + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); + s.commandStart("b", {}); + s.commandStart("c", {}); + s.commandStart("d", {}); + s.commandStart("e", {}); s.fieldStart(true); s.fieldEnd(); - tracker.expect(5, 0, 5, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(5, 0, 5, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } - tracker.expect(5, 5, 5, 5, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(5, 5, 5, 5, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_FALSE(logger.hasError()); } @@ -558,14 +642,14 @@ TEST(Stack, invalidDefaultField) logger.reset(); { - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); tracker.fieldStartResult = false; s.fieldStart(true); s.fieldEnd(); - tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } - tracker.expect(1, 1, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 1, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc ASSERT_FALSE(logger.hasError()); } @@ -575,17 +659,17 @@ TEST(Stack, errorInvalidDefaultFieldData) logger.reset(); { - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); tracker.fieldStartResult = false; s.fieldStart(true); ASSERT_FALSE(logger.hasError()); s.data("test"); ASSERT_TRUE(logger.hasError()); s.fieldEnd(); - tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } - tracker.expect(1, 1, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 1, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } TEST(Stack, errorInvalidFieldData) @@ -594,17 +678,17 @@ TEST(Stack, errorInvalidFieldData) logger.reset(); { - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); tracker.fieldStartResult = false; ASSERT_FALSE(logger.hasError()); s.fieldStart(false); ASSERT_TRUE(logger.hasError()); s.data("test"); s.fieldEnd(); - tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } - tracker.expect(1, 1, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 1, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } TEST(Stack, errorFieldStartNoCommand) @@ -612,10 +696,10 @@ TEST(Stack, errorFieldStartNoCommand) tracker.reset(); logger.reset(); - Stack s{env.context, States::AnyHandlers}; + Stack s{parser, env.context, States::AnyHandlers}; ASSERT_THROW(s.fieldStart(false), LoggableException); ASSERT_THROW(s.fieldStart(true), LoggableException); - tracker.expect(0, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(0, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } TEST(Stack, errorMultipleFieldStarts) @@ -624,20 +708,20 @@ TEST(Stack, errorMultipleFieldStarts) logger.reset(); { - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldStart(false); ASSERT_FALSE(logger.hasError()); s.fieldStart(false); ASSERT_TRUE(logger.hasError()); - tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldEnd(); - tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } - tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 1, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } TEST(Stack, errorMultipleFieldEnds) @@ -646,102 +730,122 @@ TEST(Stack, errorMultipleFieldEnds) logger.reset(); { - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldStart(false); s.fieldEnd(); ASSERT_FALSE(logger.hasError()); - tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 0, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc s.fieldEnd(); ASSERT_TRUE(logger.hasError()); - tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 1, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } - tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.expect(1, 1, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } TEST(Stack, errorOpenField) { - tracker.reset(); - logger.reset(); - - { - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc - - s.fieldStart(false); - ASSERT_FALSE(logger.hasError()); - } - ASSERT_TRUE(logger.hasError()); - tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + tracker.reset(); + logger.reset(); + + { + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + + s.fieldStart(false); + ASSERT_FALSE(logger.hasError()); + } + ASSERT_TRUE(logger.hasError()); + tracker.expect(1, 1, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc } TEST(Stack, fieldEndWhenImplicitDefaultFieldOpen) { - tracker.reset(); - logger.reset(); - - { - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); - s.fieldStart(true); - s.command("b", {}); - s.data("test"); - s.fieldEnd(); - tracker.expect(2, 1, 2, 2, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc - } - tracker.expect(2, 2, 2, 2, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc - ASSERT_FALSE(logger.hasError()); + tracker.reset(); + logger.reset(); + + { + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); + s.fieldStart(true); + s.commandStart("b", {}); + s.data("test"); + s.fieldEnd(); + tracker.expect(2, 1, 2, 2, 1); // scc, ec, fsc, fec, dc, sac, stc, etc + } + tracker.expect(2, 2, 2, 2, 1); // scc, ec, fsc, fec, dc, sac, stc, etc + ASSERT_FALSE(logger.hasError()); } TEST(Stack, fieldAfterDefaultField) { - tracker.reset(); - logger.reset(); - - { - Stack s{env.context, States::AnyHandlers}; - s.command("a", {}); - tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc - s.fieldStart(true); - tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc - - s.command("b", {}); - tracker.expect(2, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc - - s.fieldStart(false); - tracker.expect(2, 0, 2, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc - s.data("f1"); - tracker.expect(2, 0, 2, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc - s.fieldEnd(); - tracker.expect(2, 0, 2, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc - tracker.fieldStartSetIsDefault = true; - - s.fieldStart(false); - tracker.fieldStartSetIsDefault = false; - tracker.expect(2, 0, 3, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc - s.data("f2"); - tracker.expect(2, 0, 3, 1, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc - s.fieldEnd(); - tracker.expect(2, 0, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + tracker.reset(); + logger.reset(); + + { + Stack s{parser, env.context, States::AnyHandlers}; + s.commandStart("a", {}); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + s.fieldStart(true); + tracker.expect(1, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + + s.commandStart("b", {}); + tracker.expect(2, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + + s.fieldStart(false); + tracker.expect(2, 0, 2, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + s.data("f1"); + tracker.expect(2, 0, 2, 0, 1); // scc, ec, fsc, fec, dc, sac, stc, etc + s.fieldEnd(); + tracker.expect(2, 0, 2, 1, 1); // scc, ec, fsc, fec, dc, sac, stc, etc + tracker.fieldStartSetIsDefault = true; + + s.fieldStart(false); + tracker.fieldStartSetIsDefault = false; + tracker.expect(2, 0, 3, 1, 1); // scc, ec, fsc, fec, dc, sac, stc, etc + s.data("f2"); + tracker.expect(2, 0, 3, 1, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + s.fieldEnd(); + tracker.expect(2, 0, 3, 2, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + + ASSERT_FALSE(logger.hasError()); + s.fieldStart(false); + ASSERT_TRUE(logger.hasError()); + logger.reset(); + tracker.expect(2, 0, 3, 2, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + s.data("f3"); + tracker.expect(2, 0, 3, 2, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + s.fieldEnd(); + tracker.expect(2, 0, 3, 2, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + + s.fieldEnd(); + tracker.expect(2, 1, 3, 3, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + } + tracker.expect(2, 2, 3, 3, 2); // scc, ec, fsc, fec, dc, sac, stc, etc + ASSERT_FALSE(logger.hasError()); +} - ASSERT_FALSE(logger.hasError()); - s.fieldStart(false); - ASSERT_TRUE(logger.hasError()); - logger.reset(); - tracker.expect(2, 0, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc - s.data("f3"); - tracker.expect(2, 0, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc - s.fieldEnd(); - tracker.expect(2, 0, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc +TEST(Stack, rangeCommandUnranged) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{parser, env.context, States::AnyHandlers}; + tracker.expect(0, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + s.commandStart("a", {}, true); + tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + s.commandStart("b", {}); + tracker.expect(2, 0, 1, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + s.rangeEnd(); + tracker.expect(2, 2, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + } + tracker.expect(2, 2, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + ASSERT_FALSE(logger.hasError()); +} - s.fieldEnd(); - tracker.expect(2, 1, 3, 3, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc - } - tracker.expect(2, 2, 3, 3, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc - ASSERT_FALSE(logger.hasError()); } } -}
\ No newline at end of file diff --git a/test/core/parser/stack/TokenRegistryTest.cpp b/test/core/parser/stack/TokenRegistryTest.cpp new file mode 100644 index 0000000..20d6cd0 --- /dev/null +++ b/test/core/parser/stack/TokenRegistryTest.cpp @@ -0,0 +1,82 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <gtest/gtest.h> + +#include <core/parser/stack/Callbacks.hpp> +#include <core/parser/stack/TokenRegistry.hpp> + +namespace ousia { +namespace parser_stack { + +class ParserCallbacksProxy : public ParserCallbacks { +public: + size_t registerTokenCount = 0; + size_t unregisterTokenCount = 0; + + TokenId registerToken(const std::string &token) override + { + registerTokenCount++; + return registerTokenCount; + } + + void unregisterToken(TokenId id) override { unregisterTokenCount++; } +}; + +TEST(TokenRegistry, simple) +{ + ParserCallbacksProxy parser; + { + TokenRegistry registry(parser); + + ASSERT_EQ(0U, parser.registerTokenCount); + ASSERT_EQ(0U, parser.unregisterTokenCount); + + ASSERT_EQ(1U, registry.registerToken("test")); + ASSERT_EQ(1U, registry.registerToken("test")); + ASSERT_EQ(2U, registry.registerToken("test2")); + ASSERT_EQ(2U, registry.registerToken("test2")); + ASSERT_EQ(3U, registry.registerToken("test3")); + ASSERT_EQ(3U, parser.registerTokenCount); + ASSERT_EQ(0U, parser.unregisterTokenCount); + + registry.unregisterToken(1); + ASSERT_EQ(3U, parser.registerTokenCount); + ASSERT_EQ(0U, parser.unregisterTokenCount); + + registry.unregisterToken(1); + ASSERT_EQ(3U, parser.registerTokenCount); + ASSERT_EQ(1U, parser.unregisterTokenCount); + + registry.unregisterToken(1); + ASSERT_EQ(3U, parser.registerTokenCount); + ASSERT_EQ(1U, parser.unregisterTokenCount); + + registry.unregisterToken(2); + ASSERT_EQ(3U, parser.registerTokenCount); + ASSERT_EQ(1U, parser.unregisterTokenCount); + + registry.unregisterToken(2); + ASSERT_EQ(3U, parser.registerTokenCount); + ASSERT_EQ(2U, parser.unregisterTokenCount); + } + ASSERT_EQ(3U, parser.unregisterTokenCount); +} +} +} + diff --git a/test/core/parser/utils/SourceOffsetVectorTest.cpp b/test/core/parser/utils/SourceOffsetVectorTest.cpp index 25a4163..26254f9 100644 --- a/test/core/parser/utils/SourceOffsetVectorTest.cpp +++ b/test/core/parser/utils/SourceOffsetVectorTest.cpp @@ -51,7 +51,7 @@ TEST(SourceOffsetVector, gaps) for (size_t i = 0; i < 999; i++) { auto elem = vec.loadOffset(i); EXPECT_EQ(i * 3 + 5, elem.first); - EXPECT_EQ((i + 1) * 3 + 5, elem.second); + EXPECT_EQ(i * 3 + 7, elem.second); } auto elem = vec.loadOffset(999); EXPECT_EQ(999U * 3 + 5, elem.first); diff --git a/test/core/parser/utils/TokenizedDataTest.cpp b/test/core/parser/utils/TokenizedDataTest.cpp index 231bad9..8488459 100644 --- a/test/core/parser/utils/TokenizedDataTest.cpp +++ b/test/core/parser/utils/TokenizedDataTest.cpp @@ -20,6 +20,8 @@ #include <core/parser/utils/TokenizedData.hpp> +#include "TokenizedDataTestUtils.hpp" + namespace ousia { TEST(TokenizedData, dataWhitespacePreserve) @@ -29,15 +31,10 @@ TEST(TokenizedData, dataWhitespacePreserve) // 0123456789012345 // 0 1 - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" test1 test2 ", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(16U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, " test1 test2 ", TokenSet{}, WhitespaceMode::PRESERVE, + 0, 16); + assertEnd(reader); } TEST(TokenizedData, dataWhitespaceTrim) @@ -47,15 +44,10 @@ TEST(TokenizedData, dataWhitespaceTrim) // 0123456789012345 // 0 1 - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("test1 test2", token.content); - EXPECT_EQ(1U, token.getLocation().getStart()); - EXPECT_EQ(14U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "test1 test2", TokenSet{}, WhitespaceMode::TRIM, 1, + 14); + assertEnd(reader); } TEST(TokenizedData, dataWhitespaceCollapse) @@ -65,15 +57,10 @@ TEST(TokenizedData, dataWhitespaceCollapse) // 0123456789012345 // 0 1 - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("test1 test2", token.content); - EXPECT_EQ(1U, token.getLocation().getStart()); - EXPECT_EQ(14U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "test1 test2", TokenSet{}, WhitespaceMode::COLLAPSE, 1, + 14); + assertEnd(reader); } TEST(TokenizedData, singleToken) @@ -82,17 +69,9 @@ TEST(TokenizedData, singleToken) ASSERT_EQ(2U, data.append("$$")); data.mark(5, 0, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); + assertEnd(reader); } TEST(TokenizedData, singleDisabledToken) @@ -101,15 +80,9 @@ TEST(TokenizedData, singleDisabledToken) ASSERT_EQ(2U, data.append("$$")); data.mark(5, 0, 2); - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "$$", TokenSet{}, WhitespaceMode::COLLAPSE, 0, 2); + assertEnd(reader); } TEST(TokenizedData, dualToken) @@ -120,18 +93,10 @@ TEST(TokenizedData, dualToken) data.mark(5, 0, 2); data.mark(6, 1, 1); - data.enableToken(5); - data.enableToken(6); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5, 6}, WhitespaceMode::COLLAPSE, 0, + 2); + assertEnd(reader); } TEST(TokenizedData, dualTokenShorterEnabled) @@ -142,385 +107,281 @@ TEST(TokenizedData, dualTokenShorterEnabled) data.mark(5, 0, 2); data.mark(6, 1, 1); - data.enableToken(6); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(6U, token.id); - EXPECT_EQ("$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(1U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(6U, token.id); - EXPECT_EQ("$", token.content); - EXPECT_EQ(1U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 6, "$", TokenSet{6}, WhitespaceMode::COLLAPSE, 0, 1); + assertToken(reader, 6, "$", TokenSet{6}, WhitespaceMode::COLLAPSE, 1, 2); + assertEnd(reader); } TEST(TokenizedData, dualTokenLongerEnabled) { TokenizedData data; ASSERT_EQ(2U, data.append("$$")); + data.mark(6, 0, 1); data.mark(5, 0, 2); + data.mark(6, 1, 1); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); + assertEnd(reader); } TEST(TokenizedData, tokensAndDataPreserveWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ test $$")); - // 0123456789 + ASSERT_EQ(18U, data.append("$$ test text $$")); + // 012345678901234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" test ", token.content); - EXPECT_EQ(2U, token.getLocation().getStart()); - EXPECT_EQ(8U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 0, 2); + assertText(reader, " test text ", TokenSet{5}, WhitespaceMode::PRESERVE, + 2, 16); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 16, 18); + assertEnd(reader); } TEST(TokenizedData, tokensAndDataTrimWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ test $$")); - // 0123456789 + ASSERT_EQ(18U, data.append("$$ test text $$")); + // 012345678901234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("test", token.content); - EXPECT_EQ(3U, token.getLocation().getStart()); - EXPECT_EQ(7U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 0, 2); + assertText(reader, "test text", TokenSet{5}, WhitespaceMode::TRIM, 3, + 15); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 16, 18); + assertEnd(reader); } TEST(TokenizedData, tokensAndDataCollapseWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ test $$")); - // 0123456789 + ASSERT_EQ(18U, data.append("$$ test text $$")); + // 012345678901234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("test", token.content); - EXPECT_EQ(3U, token.getLocation().getStart()); - EXPECT_EQ(7U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); + assertText(reader, "test text", TokenSet{5}, WhitespaceMode::COLLAPSE, 3, + 15); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 16, 18); + assertEnd(reader); } TEST(TokenizedData, tokensAndWhitespacePreserveWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ $$")); - // 0123456789 + ASSERT_EQ(8U, data.append("$$ $$")); + // 01234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" ", token.content); - EXPECT_EQ(2U, token.getLocation().getStart()); - EXPECT_EQ(8U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 0, 2); + assertText(reader, " ", TokenSet{5}, WhitespaceMode::PRESERVE, 2, 6); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 6, 8); + assertEnd(reader); } TEST(TokenizedData, tokensAndWhitespaceTrimWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ $$")); - // 0123456789 + ASSERT_EQ(8U, data.append("$$ $$")); + // 01234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 0, 2); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 6, 8); + assertEnd(reader); } TEST(TokenizedData, tokensAndWhitespaceCollapseWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ $$")); - // 0123456789 + ASSERT_EQ(8U, data.append("$$ $$")); + // 01234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 6, 8); + assertEnd(reader); +} - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); +TEST(TokenizedData, appendChars) +{ + TokenizedData data; + ASSERT_EQ(1U, data.append('t', 5, 7)); + ASSERT_EQ(2U, data.append('e', 7, 8)); + ASSERT_EQ(3U, data.append('s', 8, 10)); + ASSERT_EQ(4U, data.append('t', 10, 12)); - ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "test", TokenSet{5}, WhitespaceMode::COLLAPSE, 5, 12); + assertEnd(reader); } -TEST(TokenizedData, textPreserveWhitespace) +TEST(TokenizedData, protectedWhitespace) { TokenizedData data; - ASSERT_EQ(6U, data.append(" $$ ")); - // 012345 - data.mark(5, 2, 2); - - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" ", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(2U, token.getLocation().getStart()); - EXPECT_EQ(4U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" ", token.content); - EXPECT_EQ(4U, token.getLocation().getStart()); - EXPECT_EQ(6U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.text(token, WhitespaceMode::PRESERVE)); - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + ASSERT_EQ(4U, data.append("test", 10)); + ASSERT_EQ(11U, data.append(" test", 14, true)); + + TokenizedDataReader reader = data.reader(); + assertText(reader, "test test", TokenSet{5}, WhitespaceMode::COLLAPSE, 10, + 21); + assertEnd(reader); } -TEST(TokenizedData, textTrimWhitespace) +TEST(TokenizedData, specialNewlineToken) { TokenizedData data; - ASSERT_EQ(6U, data.append(" $$ ")); - // 012345 - data.mark(5, 2, 2); - - data.enableToken(5); + data.append("a\nb\n \nc\n"); + // 0 12 3456 78 9 + + const TokenSet tokens{Tokens::Newline}; + + TokenizedDataReader reader = data.reader(); + assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 1, 2); + assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 2, 3); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 3, 4); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 7, 8); + assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 8, 9); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 9, 10); + assertEnd(reader); +} - Token token; - ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM)); +TEST(TokenizedData, specialParagraphToken) +{ + TokenizedData data; + data.append("a\nb\n \nc\n"); + // 0 12 3456 78 9 - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(2U, token.getLocation().getStart()); - EXPECT_EQ(4U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); + const TokenSet tokens{Tokens::Paragraph}; - ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM)); - ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "a b", tokens, WhitespaceMode::COLLAPSE, 0, 3); + assertToken(reader, Tokens::Paragraph, "\n \n", tokens, + WhitespaceMode::COLLAPSE, 3, 8); + assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 8, 9); + assertEnd(reader); } -TEST(TokenizedData, textCollapseWhitespace) +TEST(TokenizedData, specialSectionToken) { TokenizedData data; - ASSERT_EQ(6U, data.append(" $$ ")); - // 012345 - data.mark(5, 2, 2); + data.append("a\nb\n \n \t \n"); + // 0 12 3456 789 01 2 + // 0 1 - data.enableToken(5); + const TokenSet tokens{Tokens::Section}; - Token token; - ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); - - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(2U, token.getLocation().getStart()); - EXPECT_EQ(4U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); - ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "a b", tokens, WhitespaceMode::COLLAPSE, 0, 3); + assertToken(reader, Tokens::Section, "\n \n \t \n", tokens, + WhitespaceMode::COLLAPSE, 3, 13); + assertEnd(reader); } -TEST(TokenizedData, appendChars) +TEST(TokenizedData, specialTokenPrecedence) { TokenizedData data; - ASSERT_EQ(1U, data.append('t', 5, 7)); - ASSERT_EQ(2U, data.append('e', 7, 8)); - ASSERT_EQ(3U, data.append('s', 8, 10)); - ASSERT_EQ(4U, data.append('t', 10, 12)); + data.append("a\nb\n\nc\n\n\nd"); + // 0 12 3 45 6 7 89 + + const TokenSet tokens{Tokens::Newline, Tokens::Paragraph, Tokens::Section}; + + TokenizedDataReader reader = data.reader(); + assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 1, 2); + assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 2, 3); + assertToken(reader, Tokens::Paragraph, "\n\n", tokens, + WhitespaceMode::COLLAPSE, 3, 5); + assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 5, 6); + assertToken(reader, Tokens::Section, "\n\n\n", tokens, + WhitespaceMode::COLLAPSE, 6, 9); + assertText(reader, "d", tokens, WhitespaceMode::COLLAPSE, 9, 10); + assertEnd(reader); +} - Token token; - ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("test", token.content); - EXPECT_EQ(5U, token.getLocation().getStart()); - EXPECT_EQ(12U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); +TEST(TokenizedData, specialTokenPrecedence2) +{ + TokenizedData data; + data.append("\nb\n\nc\n\n\n"); + // 0 12 3 45 6 7 + + const TokenSet tokens{Tokens::Newline, Tokens::Paragraph, Tokens::Section}; + + TokenizedDataReader reader = data.reader(); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 0, 1); + assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 1, 2); + assertToken(reader, Tokens::Paragraph, "\n\n", tokens, + WhitespaceMode::COLLAPSE, 2, 4); + assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 4, 5); + assertToken(reader, Tokens::Section, "\n\n\n", tokens, + WhitespaceMode::COLLAPSE, 5, 8); + assertEnd(reader); +} - ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); - ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); +TEST(TokenizedData, specialTokenIndent) +{ + TokenizedData data; + data.append(" test\n\ttest2\n test3 \ttest4\ntest5"); + // 01234567 8 901234 5678901234567890 123456 789012 + // 0 1 2 3 4 + const TokenSet tokens{Tokens::Indent, Tokens::Dedent}; + + TokenizedDataReader reader = data.reader(); + assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, + 4, 4); + assertText(reader, "test", tokens, WhitespaceMode::COLLAPSE, 4, 8); + assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, + 10, 10); + assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37); + assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE, + 38, 38); + assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43); + assertEnd(reader); } -TEST(TokenizedData, copy) +TEST(TokenizedData, specialTokenIndentOverlap) { TokenizedData data; - ASSERT_EQ(7U, data.append(" a $ b ")); - // 0123456 - data.mark(6, 3, 1); - data.enableToken(6); - - Token token; - ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("a", token.content); - EXPECT_EQ(1U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); - - TokenizedData dataCopy = data; - - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(6U, token.id); - EXPECT_EQ("$", token.content); - EXPECT_EQ(3U, token.getLocation().getStart()); - EXPECT_EQ(4U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(dataCopy.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(6U, token.id); - EXPECT_EQ("$", token.content); - EXPECT_EQ(3U, token.getLocation().getStart()); - EXPECT_EQ(4U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" b ", token.content); - EXPECT_EQ(4U, token.getLocation().getStart()); - EXPECT_EQ(7U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - ASSERT_FALSE(data.next(token)); - - ASSERT_TRUE(dataCopy.text(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("b", token.content); - EXPECT_EQ(5U, token.getLocation().getStart()); - EXPECT_EQ(6U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - ASSERT_FALSE(dataCopy.next(token)); + data.append(" test\n\ttest2\n test3 \ttest4\ntest5"); + // 01234567 8 901234 5678901234567890 123456 789012 + // 0 1 2 3 4 + const TokenSet tokens{Tokens::Indent, Tokens::Dedent, 5}; + + data.mark(5, 4, 4); + + TokenizedDataReader reader = data.reader(); + assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, + 4, 4); + assertToken(reader, 5, "test", tokens, WhitespaceMode::COLLAPSE, 4, 8); + assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, + 10, 10); + assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37); + assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE, + 38, 38); + assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43); + assertEnd(reader); } + } diff --git a/test/core/parser/utils/TokenizedDataTestUtils.hpp b/test/core/parser/utils/TokenizedDataTestUtils.hpp new file mode 100644 index 0000000..c384f9d --- /dev/null +++ b/test/core/parser/utils/TokenizedDataTestUtils.hpp @@ -0,0 +1,64 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_ +#define _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_ + +namespace ousia { + +static void assertToken(TokenizedDataReader &reader, TokenId id, + const std::string &text, const TokenSet &tokens = TokenSet{}, + WhitespaceMode mode = WhitespaceMode::TRIM, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset, + SourceId sourceId = InvalidSourceId) +{ + Token token; + ASSERT_TRUE(reader.read(token, tokens, mode)); + EXPECT_EQ(id, token.id); + EXPECT_EQ(text, token.content); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, token.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, token.getLocation().getEnd()); + } + EXPECT_EQ(sourceId, token.getLocation().getSourceId()); +} + +static void assertText(TokenizedDataReader &reader, const std::string &text, + const TokenSet &tokens = TokenSet{}, + WhitespaceMode mode = WhitespaceMode::TRIM, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset, + SourceId id = InvalidSourceId) +{ + assertToken(reader, Tokens::Data, text, tokens, mode, start, end, id); +} + +static void assertEnd(TokenizedDataReader &reader) +{ + Token token; + ASSERT_TRUE(reader.atEnd()); + ASSERT_FALSE(reader.read(token)); +} + +} + +#endif /* _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_ */ + diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp index 3809a12..45fc77a 100644 --- a/test/core/parser/utils/TokenizerTest.cpp +++ b/test/core/parser/utils/TokenizerTest.cpp @@ -20,9 +20,66 @@ #include <core/common/CharReader.hpp> #include <core/parser/utils/Tokenizer.hpp> +#include <core/parser/utils/TokenizedData.hpp> + +#include "TokenizedDataTestUtils.hpp" namespace ousia { +static void assertPrimaryToken(CharReader &reader, Tokenizer &tokenizer, + TokenId id, const std::string &text, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset, + SourceId sourceId = InvalidSourceId) +{ + Token token; + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + EXPECT_EQ(id, token.id); + EXPECT_EQ(text, token.content); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, token.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, token.getLocation().getEnd()); + } + EXPECT_EQ(sourceId, token.getLocation().getSourceId()); +} + +static void expectData(const std::string &expected, SourceOffset tokenStart, + SourceOffset tokenEnd, SourceOffset textStart, + SourceOffset textEnd, const Token &token, + TokenizedData &data, + WhitespaceMode mode = WhitespaceMode::PRESERVE) +{ + ASSERT_EQ(Tokens::Data, token.id); + + Token textToken; + TokenizedDataReader reader = data.reader(); + ASSERT_TRUE(reader.read(textToken, TokenSet{}, mode)); + + EXPECT_EQ(expected, textToken.content); + EXPECT_EQ(tokenStart, token.location.getStart()); + EXPECT_EQ(tokenEnd, token.location.getEnd()); + EXPECT_EQ(textStart, textToken.getLocation().getStart()); + EXPECT_EQ(textEnd, textToken.getLocation().getEnd()); + EXPECT_TRUE(reader.atEnd()); +} + +static void assertDataToken(CharReader &reader, Tokenizer &tokenizer, + const std::string &expected, + SourceOffset tokenStart, SourceOffset tokenEnd, + SourceOffset textStart, SourceOffset textEnd, + WhitespaceMode mode = WhitespaceMode::PRESERVE) +{ + Token token; + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + + expectData(expected, tokenStart, tokenEnd, textStart, textEnd, token, data, + mode); +} + TEST(Tokenizer, tokenRegistration) { Tokenizer tokenizer; @@ -31,23 +88,23 @@ TEST(Tokenizer, tokenRegistration) ASSERT_EQ(0U, tokenizer.registerToken("a")); ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("a")); - ASSERT_EQ("a", tokenizer.getTokenString(0U)); + ASSERT_EQ("a", tokenizer.lookupToken(0U).string); ASSERT_EQ(1U, tokenizer.registerToken("b")); ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("b")); - ASSERT_EQ("b", tokenizer.getTokenString(1U)); + ASSERT_EQ("b", tokenizer.lookupToken(1U).string); ASSERT_EQ(2U, tokenizer.registerToken("c")); ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("c")); - ASSERT_EQ("c", tokenizer.getTokenString(2U)); + ASSERT_EQ("c", tokenizer.lookupToken(2U).string); ASSERT_TRUE(tokenizer.unregisterToken(1U)); ASSERT_FALSE(tokenizer.unregisterToken(1U)); - ASSERT_EQ("", tokenizer.getTokenString(1U)); + ASSERT_EQ("", tokenizer.lookupToken(1U).string); ASSERT_EQ(1U, tokenizer.registerToken("d")); ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("d")); - ASSERT_EQ("d", tokenizer.getTokenString(1U)); + ASSERT_EQ("d", tokenizer.lookupToken(1U).string); } TEST(Tokenizer, textTokenPreserveWhitespace) @@ -56,36 +113,34 @@ TEST(Tokenizer, textTokenPreserveWhitespace) CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::PRESERVE}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ(" this \t is only a \n\n test text ", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(36U, loc.getEnd()); + expectData(" this \t is only a \n\n test text ", 0, 36, 0, 36, + token, data, WhitespaceMode::PRESERVE); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } { CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::PRESERVE}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("this \t is only a \n\n test text", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); + expectData("this \t is only a \n\n test text", 0, 32, 0, 32, token, + data, WhitespaceMode::PRESERVE); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } } @@ -95,36 +150,34 @@ TEST(Tokenizer, textTokenTrimWhitespace) CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::TRIM}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("this \t is only a \n\n test text", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(33U, loc.getEnd()); + expectData("this \t is only a \n\n test text", 0, 36, 1, 33, token, + data, WhitespaceMode::TRIM); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } { CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::TRIM}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("this \t is only a \n\n test text", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); + expectData("this \t is only a \n\n test text", 0, 32, 0, 32, token, + data, WhitespaceMode::TRIM); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } } @@ -134,36 +187,34 @@ TEST(Tokenizer, textTokenCollapseWhitespace) CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::COLLAPSE}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("this is only a test text", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(33U, loc.getEnd()); + expectData("this is only a test text", 0, 36, 1, 33, token, data, + WhitespaceMode::COLLAPSE); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } { CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::COLLAPSE}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("this is only a test text", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); + expectData("this is only a test text", 0, 32, 0, 32, token, data, + WhitespaceMode::COLLAPSE); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } } @@ -177,14 +228,12 @@ TEST(Tokenizer, simpleReadToken) { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test1", token.content); - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); + expectData("test1", 0, 5, 0, 5, token, data); char c; ASSERT_TRUE(reader.peek(c)); @@ -193,7 +242,8 @@ TEST(Tokenizer, simpleReadToken) { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); ASSERT_EQ(tid, token.id); ASSERT_EQ(":", token.content); @@ -209,14 +259,10 @@ TEST(Tokenizer, simpleReadToken) { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); + expectData("test2", 6, 11, 6, 11, token, data); char c; ASSERT_FALSE(reader.peek(c)); @@ -233,21 +279,17 @@ TEST(Tokenizer, simplePeekToken) { Token token; - ASSERT_TRUE(tokenizer.peek(reader, token)); - - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test1", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); + TokenizedData data; + ASSERT_TRUE(tokenizer.peek(reader, token, data)); + expectData("test1", 0, 5, 0, 5, token, data); ASSERT_EQ(0U, reader.getOffset()); ASSERT_EQ(5U, reader.getPeekOffset()); } { Token token; - ASSERT_TRUE(tokenizer.peek(reader, token)); + TokenizedData data; + ASSERT_TRUE(tokenizer.peek(reader, token, data)); ASSERT_EQ(tid, token.id); ASSERT_EQ(":", token.content); @@ -261,35 +303,26 @@ TEST(Tokenizer, simplePeekToken) { Token token; - ASSERT_TRUE(tokenizer.peek(reader, token)); - - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); + TokenizedData data; + ASSERT_TRUE(tokenizer.peek(reader, token, data)); + expectData("test2", 6, 11, 6, 11, token, data); ASSERT_EQ(0U, reader.getOffset()); ASSERT_EQ(11U, reader.getPeekOffset()); } { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test1", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + expectData("test1", 0, 5, 0, 5, token, data); ASSERT_EQ(5U, reader.getOffset()); ASSERT_EQ(5U, reader.getPeekOffset()); } { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); ASSERT_EQ(tid, token.id); ASSERT_EQ(":", token.content); @@ -303,14 +336,9 @@ TEST(Tokenizer, simplePeekToken) { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + expectData("test2", 6, 11, 6, 11, token, data); ASSERT_EQ(11U, reader.getOffset()); ASSERT_EQ(11U, reader.getPeekOffset()); } @@ -320,6 +348,7 @@ TEST(Tokenizer, ambiguousTokens) { CharReader reader{"abc"}; Tokenizer tokenizer; + TokenizedData data; TokenId t1 = tokenizer.registerToken("abd"); TokenId t2 = tokenizer.registerToken("bc"); @@ -328,16 +357,17 @@ TEST(Tokenizer, ambiguousTokens) ASSERT_EQ(1U, t2); Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_TRUE(tokenizer.read(reader, token, data)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("a", token.content); + expectData("a", 0, 1, 0, 1, token, data); SourceLocation loc = token.location; ASSERT_EQ(0U, loc.getStart()); ASSERT_EQ(1U, loc.getEnd()); - ASSERT_TRUE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_TRUE(tokenizer.read(reader, token, data)); ASSERT_EQ(t2, token.id); ASSERT_EQ("bc", token.content); @@ -346,7 +376,8 @@ TEST(Tokenizer, ambiguousTokens) ASSERT_EQ(1U, loc.getStart()); ASSERT_EQ(3U, loc.getEnd()); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } TEST(Tokenizer, commentTestWhitespacePreserve) @@ -354,7 +385,7 @@ TEST(Tokenizer, commentTestWhitespacePreserve) CharReader reader{"Test/Test /* Block Comment */", 0}; // 012345678901234567890123456789 // 0 1 2 - Tokenizer tokenizer(WhitespaceMode::PRESERVE); + Tokenizer tokenizer; const TokenId t1 = tokenizer.registerToken("/"); const TokenId t2 = tokenizer.registerToken("/*"); @@ -370,45 +401,189 @@ TEST(Tokenizer, commentTestWhitespacePreserve) Token t; for (auto &te : expected) { - EXPECT_TRUE(tokenizer.read(reader, t)); + TokenizedData data(0); + EXPECT_TRUE(tokenizer.read(reader, t, data)); EXPECT_EQ(te.id, t.id); - EXPECT_EQ(te.content, t.content); + if (te.id != Tokens::Data) { + EXPECT_EQ(te.content, t.content); + } else { + TokenizedDataReader dataReader = data.reader(); + Token textToken; + ASSERT_TRUE(dataReader.read(textToken, TokenSet{}, + WhitespaceMode::PRESERVE)); + EXPECT_TRUE(dataReader.atEnd()); + EXPECT_EQ(te.content, textToken.content); + } EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); EXPECT_EQ(te.location.getStart(), t.location.getStart()); EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); } - ASSERT_FALSE(tokenizer.read(reader, t)); + + TokenizedData data; + ASSERT_FALSE(tokenizer.read(reader, t, data)); } -TEST(Tokenizer, commentTestWhitespaceCollapse) +TEST(Tokenizer, nonPrimaryTokens) { - CharReader reader{"Test/Test /* Block Comment */", 0}; - // 012345678901234567890123456789 - // 0 1 2 - Tokenizer tokenizer(WhitespaceMode::COLLAPSE); + CharReader reader{ + "<<switch to $inline \\math mode$ they said, see the world they " + "said>>"}; + // 012345678901234567890 12345678901234567890123456789012345678901234567 + // 0 1 2 3 4 5 6 - const TokenId t1 = tokenizer.registerToken("/"); - const TokenId t2 = tokenizer.registerToken("/*"); - const TokenId t3 = tokenizer.registerToken("*/"); + Tokenizer tokenizer; - std::vector<Token> expected = { - {Tokens::Data, "Test", SourceLocation{0, 0, 4}}, - {t1, "/", SourceLocation{0, 4, 5}}, - {Tokens::Data, "Test", SourceLocation{0, 5, 9}}, - {t2, "/*", SourceLocation{0, 10, 12}}, - {Tokens::Data, "Block Comment", SourceLocation{0, 13, 26}}, - {t3, "*/", SourceLocation{0, 27, 29}}}; + TokenId tBackslash = tokenizer.registerToken("\\"); + TokenId tDollar = tokenizer.registerToken("$", false); + TokenId tSpeechStart = tokenizer.registerToken("<<", false); + TokenId tSpeechEnd = tokenizer.registerToken(">>", false); - Token t; - for (auto &te : expected) { - EXPECT_TRUE(tokenizer.read(reader, t)); - EXPECT_EQ(te.id, t.id); - EXPECT_EQ(te.content, t.content); - EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); - EXPECT_EQ(te.location.getStart(), t.location.getStart()); - EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); + TokenSet tokens = TokenSet{tDollar, tSpeechStart, tSpeechEnd}; + + Token token, textToken; + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ(Tokens::Data, token.id); + + TokenizedDataReader dataReader = data.reader(); + assertToken(dataReader, tSpeechStart, "<<", tokens, + WhitespaceMode::TRIM, 0, 2); + assertText(dataReader, "switch to", tokens, WhitespaceMode::TRIM, 2, + 11); + assertToken(dataReader, tDollar, "$", tokens, WhitespaceMode::TRIM, 12, + 13); + assertText(dataReader, "inline", tokens, WhitespaceMode::TRIM, 13, 19); + assertEnd(dataReader); + } + + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ(tBackslash, token.id); + ASSERT_EQ(20U, token.location.getStart()); + ASSERT_EQ(21U, token.location.getEnd()); + } + + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ(Tokens::Data, token.id); + + TokenizedDataReader dataReader = data.reader(); + assertText(dataReader, "math mode", tokens, WhitespaceMode::TRIM, 21, + 30); + assertToken(dataReader, tDollar, "$", tokens, WhitespaceMode::TRIM, 30, + 31); + assertText(dataReader, "they said, see the world they said", tokens, + WhitespaceMode::TRIM, 32, 66); + assertToken(dataReader, tSpeechEnd, ">>", tokens, WhitespaceMode::TRIM, + 66, 68); + assertEnd(dataReader); + } + + TokenizedData data; + ASSERT_FALSE(tokenizer.read(reader, token, data)); +} + +TEST(Tokenizer, primaryNonPrimaryTokenInteraction) +{ + CharReader reader{"<<test1>><test2><<test3\\><<<test4>>>"}; + // 01234567890123456789012 3456789012345 + // 0 1 2 3 + + Tokenizer tokenizer; + + TokenId tP1 = tokenizer.registerToken("<", true); + TokenId tP2 = tokenizer.registerToken(">", true); + TokenId tP3 = tokenizer.registerToken("\\>", true); + TokenId tN1 = tokenizer.registerToken("<<", false); + TokenId tN2 = tokenizer.registerToken(">>", false); + + TokenSet tokens = TokenSet{tN1, tN2}; + + Token token, textToken; + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ(Tokens::Data, token.id); + + TokenizedDataReader dataReader = data.reader(); + assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 0, 2); + assertText(dataReader, "test1", tokens, WhitespaceMode::TRIM, 2, 7); + assertToken(dataReader, tN2, ">>", tokens, WhitespaceMode::TRIM, 7, 9); + assertEnd(dataReader); + } + + assertPrimaryToken(reader, tokenizer, tP1, "<", 9, 10); + assertDataToken(reader, tokenizer, "test2", 10, 15, 10, 15); + assertPrimaryToken(reader, tokenizer, tP2, ">", 15, 16); + + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ(Tokens::Data, token.id); + + TokenizedDataReader dataReader = data.reader(); + assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 16, 18); + assertText(dataReader, "test3", tokens, WhitespaceMode::TRIM, 18, 23); + assertEnd(dataReader); + } + + assertPrimaryToken(reader, tokenizer, tP3, "\\>", 23, 25); + + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ(Tokens::Data, token.id); + + TokenizedDataReader dataReader = data.reader(); + assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 25, 27); + assertEnd(dataReader); + } + + assertPrimaryToken(reader, tokenizer, tP1, "<", 27, 28); + + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ(Tokens::Data, token.id); + + TokenizedDataReader dataReader = data.reader(); + assertText(dataReader, "test4", tokens, WhitespaceMode::TRIM, 28, 33); + assertToken(dataReader, tN2, ">>", tokens, WhitespaceMode::TRIM, 33, 35); + assertEnd(dataReader); + } + + assertPrimaryToken(reader, tokenizer, tP2, ">", 35, 36); + + TokenizedData data; + ASSERT_FALSE(tokenizer.read(reader, token, data)); +} + +TEST(Tokenizer, ambiguousTokens2) +{ + CharReader reader{"<\\"}; + + Tokenizer tokenizer; + + TokenId tBackslash = tokenizer.registerToken("\\"); + TokenId tAnnotationStart = tokenizer.registerToken("<\\"); + + TokenSet tokens = TokenSet{tBackslash, tAnnotationStart}; + Token token; + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ("<\\", token.content); + ASSERT_EQ(tAnnotationStart, token.id); + ASSERT_TRUE(data.empty()); + } + + { + TokenizedData data; + ASSERT_FALSE(tokenizer.read(reader, token, data)); } - ASSERT_FALSE(tokenizer.read(reader, t)); } } diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index d52fa5b..d47f529 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -21,143 +21,205 @@ #include <iostream> #include <core/common/CharReader.hpp> +#include <core/common/Variant.hpp> #include <core/frontend/TerminalLogger.hpp> - +#include <core/parser/utils/TokenizedData.hpp> #include <formats/osml/OsmlStreamParser.hpp> +#include <core/parser/utils/TokenizedDataTestUtils.hpp> + namespace ousia { static TerminalLogger logger(std::cerr, true); // static ConcreteLogger logger; -static void assertCommand(OsmlStreamParser &reader, const std::string &name, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) +static void assertCommandStart(OsmlStreamParser &parser, + const std::string &name, bool rangeCommand, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - EXPECT_EQ(name, reader.getCommandName().asString()); + ASSERT_EQ(OsmlStreamParser::State::COMMAND_START, parser.parse()); + EXPECT_EQ(name, parser.getCommandName().asString()); + EXPECT_EQ(rangeCommand, parser.inRangeCommand()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } -static void assertCommand(OsmlStreamParser &reader, const std::string &name, - const Variant::mapType &args, +static void assertCommandStart(OsmlStreamParser &parser, + const std::string &name, bool rangeCommand, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + assertCommandStart(parser, name, rangeCommand, start, end); + EXPECT_EQ(args, parser.getCommandArguments()); +} + +static void assertCommand(OsmlStreamParser &parser, const std::string &name, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - assertCommand(reader, name, start, end); - EXPECT_EQ(args, reader.getCommandArguments()); + assertCommandStart(parser, name, false, Variant::mapType{}, start, end); } -static void assertData(OsmlStreamParser &reader, const std::string &data, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) +static void assertRangeEnd(OsmlStreamParser &parser, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - EXPECT_EQ(data, reader.getData().asString()); + ASSERT_EQ(OsmlStreamParser::State::RANGE_END, parser.parse()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getData().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getData().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); + } +} + +static void assertTextData(OsmlStreamParser &parser, const std::string &text, + SourceOffset dataStart = InvalidSourceOffset, + SourceOffset dataEnd = InvalidSourceOffset, + SourceOffset textStart = InvalidSourceOffset, + SourceOffset textEnd = InvalidSourceOffset, + WhitespaceMode mode = WhitespaceMode::COLLAPSE) +{ + ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse()); + + const TokenizedData &data = parser.getData(); + TokenizedDataReader dataReader = data.reader(); + + Token token; + ASSERT_TRUE(dataReader.read(token, TokenSet{}, mode)); + EXPECT_EQ(Tokens::Data, token.id); + EXPECT_EQ(text, token.content); + if (dataStart != InvalidSourceOffset) { + EXPECT_EQ(dataStart, data.getLocation().getStart()); + EXPECT_EQ(dataStart, parser.getLocation().getStart()); + } + if (dataEnd != InvalidSourceOffset) { + EXPECT_EQ(dataEnd, data.getLocation().getEnd()); + EXPECT_EQ(dataEnd, parser.getLocation().getEnd()); + } + if (textStart != InvalidSourceOffset) { + EXPECT_EQ(textStart, token.getLocation().getStart()); + } + if (textEnd != InvalidSourceOffset) { + EXPECT_EQ(textEnd, token.getLocation().getEnd()); } } -static void assertFieldStart(OsmlStreamParser &reader, bool defaultField, +static void assertData(OsmlStreamParser &parser, const std::string &text, + SourceOffset textStart = InvalidSourceOffset, + SourceOffset textEnd = InvalidSourceOffset, + WhitespaceMode mode = WhitespaceMode::COLLAPSE) +{ + assertTextData(parser, text, InvalidSourceOffset, InvalidSourceOffset, + textStart, textEnd, mode); +} + +static void assertEmptyData(OsmlStreamParser &parser) +{ + ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse()); + + const TokenizedData &data = parser.getData(); + TokenizedDataReader dataReader = data.reader(); + + Token token; + EXPECT_FALSE(dataReader.read(token, TokenSet{}, WhitespaceMode::TRIM)); +} + +static void assertFieldStart(OsmlStreamParser &parser, bool defaultField, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); - EXPECT_EQ(defaultField, reader.inDefaultField()); + ASSERT_EQ(OsmlStreamParser::State::FIELD_START, parser.parse()); + EXPECT_EQ(defaultField, parser.inDefaultField()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } -static void assertFieldEnd(OsmlStreamParser &reader, +static void assertFieldEnd(OsmlStreamParser &parser, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::FIELD_END, parser.parse()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } -static void assertAnnotationStart(OsmlStreamParser &reader, +static void assertAnnotationStart(OsmlStreamParser &parser, const std::string &name, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse()); - EXPECT_EQ(name, reader.getCommandName().asString()); + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, parser.parse()); + EXPECT_EQ(name, parser.getCommandName().asString()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } -static void assertAnnotationStart(OsmlStreamParser &reader, +static void assertAnnotationStart(OsmlStreamParser &parser, const std::string &name, const Variant::mapType &args, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - assertAnnotationStart(reader, name, start, end); - EXPECT_EQ(args, reader.getCommandArguments()); + assertAnnotationStart(parser, name, start, end); + EXPECT_EQ(args, parser.getCommandArguments()); } -static void assertAnnotationEnd(OsmlStreamParser &reader, +static void assertAnnotationEnd(OsmlStreamParser &parser, const std::string &name, const std::string &elementName, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse()); - ASSERT_EQ(name, reader.getCommandName().asString()); + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, parser.parse()); + ASSERT_EQ(name, parser.getCommandName().asString()); if (!elementName.empty()) { - ASSERT_EQ(1U, reader.getCommandArguments().asMap().size()); - ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name")); + ASSERT_EQ(1U, parser.getCommandArguments().asMap().size()); + ASSERT_EQ(1U, parser.getCommandArguments().asMap().count("name")); - auto it = reader.getCommandArguments().asMap().find("name"); + auto it = parser.getCommandArguments().asMap().find("name"); ASSERT_EQ(elementName, it->second.asString()); } if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } -static void assertEnd(OsmlStreamParser &reader, +static void assertEnd(OsmlStreamParser &parser, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, parser.parse()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); + EXPECT_EQ(start, parser.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); + EXPECT_EQ(end, parser.getLocation().getEnd()); } } @@ -166,9 +228,9 @@ TEST(OsmlStreamParser, empty) const char *testString = ""; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertEnd(parser, 0, 0); } TEST(OsmlStreamParser, oneCharacter) @@ -176,57 +238,102 @@ TEST(OsmlStreamParser, oneCharacter) const char *testString = "a"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertData(reader, "a", 0, 1); + assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::COLLAPSE); + assertEnd(parser, 1, 1); } -TEST(OsmlStreamParser, whitespaceElimination) +TEST(OsmlStreamParser, whitespacePreserve) { const char *testString = " hello \t world "; // 0123456 78901234 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertData(reader, "hello world", 1, 14); + assertTextData(parser, " hello \t world ", 0, 15, 0, 15, + WhitespaceMode::PRESERVE); + assertEnd(parser, 15, 15); } -TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) +TEST(OsmlStreamParser, whitespaceTrim) +{ + const char *testString = " hello \t world "; + // 0123456 78901234 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); + + assertTextData(parser, "hello \t world", 0, 15, 1, 14, + WhitespaceMode::TRIM); + assertEnd(parser, 15, 15); +} + +TEST(OsmlStreamParser, whitespaceCollapse) +{ + const char *testString = " hello \t world "; + // 0123456 78901234 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); + + assertTextData(parser, "hello world", 0, 15, 1, 14, + WhitespaceMode::COLLAPSE); + assertEnd(parser, 15, 15); +} + +TEST(OsmlStreamParser, whitespaceCollapseLinebreak) { const char *testString = " hello \n world "; // 0123456 78901234 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertData(reader, "hello world", 1, 14); + assertTextData(parser, "hello world", 0, 15, 1, 14, + WhitespaceMode::COLLAPSE); + assertEnd(parser, 15, 15); } -TEST(OsmlStreamParser, escapeWhitespace) +TEST(OsmlStreamParser, whitespaceCollapseProtected) { const char *testString = " hello\\ \\ world "; // 012345 67 89012345 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertData(reader, "hello world", 1, 15); + assertTextData(parser, "hello world", 0, 16, 1, 15, + WhitespaceMode::COLLAPSE); + assertEnd(parser, 16, 16); +} + +TEST(OsmlStreamParser, whitespaceCollapseProtected2) +{ + const char *testString = " hello \\ \\ world "; + // 012345 67 89012345 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); + + assertTextData(parser, "hello world", 0, 17, 1, 16, + WhitespaceMode::COLLAPSE); + assertEnd(parser, 17, 17); } static void testEscapeSpecialCharacter(const std::string &c) { CharReader charReader(std::string("\\") + c); - OsmlStreamParser reader(charReader, logger); - EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - EXPECT_EQ(c, reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - EXPECT_EQ(0U, loc.getStart()); - EXPECT_EQ(1U + c.size(), loc.getEnd()); + OsmlStreamParser parser(charReader, logger); + assertTextData(parser, c, 0, 2, 0, 2, WhitespaceMode::PRESERVE); + assertEnd(parser, 2, 2); } TEST(OsmlStreamParser, escapeSpecialCharacters) @@ -239,9 +346,11 @@ TEST(OsmlStreamParser, escapeSpecialCharacters) TEST(OsmlStreamParser, simpleSingleLineComment) { const char *testString = "% This is a single line comment"; + // 0123456789012345678901234567890 + // 0 1 2 3 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + OsmlStreamParser parser(charReader, logger); + assertEnd(parser, 31, 31); } TEST(OsmlStreamParser, singleLineComment) @@ -250,24 +359,10 @@ TEST(OsmlStreamParser, singleLineComment) // 01234567890123456789012345678901 23 // 0 1 2 3 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); - } - - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("b", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(33U, loc.getStart()); - ASSERT_EQ(34U, loc.getEnd()); - } + OsmlStreamParser parser(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertTextData(parser, "ab", 0, 34, 0, 34, WhitespaceMode::PRESERVE); + assertEnd(parser, 34, 34); } TEST(OsmlStreamParser, multilineComment) @@ -276,24 +371,26 @@ TEST(OsmlStreamParser, multilineComment) // 0123456789012 3 456789012345678901234567890 // 0 1 2 3 4 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); - } + OsmlStreamParser parser(charReader, logger); - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("b", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(40U, loc.getStart()); - ASSERT_EQ(41U, loc.getEnd()); - } + assertTextData(parser, "ab", 0, 41, 0, 41, WhitespaceMode::PRESERVE); + assertEnd(parser, 41, 41); +} - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +TEST(OsmlStreamParser, unfinishedMultilineComment) +{ + const char *testString = "a%{ This is a\n\n multiline line comment"; + // 0123456789012 3 456789012345678901234567 + // 0 1 2 3 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); + + logger.reset(); + + ASSERT_FALSE(logger.hasError()); + assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE); + ASSERT_TRUE(logger.hasError()); + assertEnd(parser, 38, 38); } TEST(OsmlStreamParser, nestedMultilineComment) @@ -302,24 +399,10 @@ TEST(OsmlStreamParser, nestedMultilineComment) // 0123456789012 3 456789012345678901234567890 // 0 1 2 3 4 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); - } + OsmlStreamParser parser(charReader, logger); - { - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("b", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(40U, loc.getStart()); - ASSERT_EQ(41U, loc.getEnd()); - } - - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertTextData(parser, "ab", 0, 41, 0, 41, WhitespaceMode::PRESERVE); + assertEnd(parser, 41, 41); } TEST(OsmlStreamParser, simpleCommand) @@ -327,45 +410,27 @@ TEST(OsmlStreamParser, simpleCommand) const char *testString = "\\test"; // 0 12345 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - - Variant commandName = reader.getCommandName(); - ASSERT_EQ("test", commandName.asString()); + OsmlStreamParser parser(charReader, logger); - SourceLocation loc = commandName.getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); - - ASSERT_EQ(0U, reader.getCommandArguments().asMap().size()); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertCommand(parser, "test", 0, 5); + assertEnd(parser); } TEST(OsmlStreamParser, simpleCommandWithName) { - const char *testString = "\\test#bla"; - // 0 12345678 + const char *testString = "\\test#foo"; + // 012345678 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - - Variant commandName = reader.getCommandName(); - ASSERT_EQ("test", commandName.asString()); - SourceLocation loc = commandName.getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); + OsmlStreamParser parser(charReader, logger); - Variant commandArguments = reader.getCommandArguments(); - ASSERT_TRUE(commandArguments.isMap()); - ASSERT_EQ(1U, commandArguments.asMap().size()); - ASSERT_EQ(1U, commandArguments.asMap().count("name")); - ASSERT_EQ("bla", commandArguments.asMap()["name"].asString()); + assertCommandStart(parser, "test", false, Variant::mapType{{"name", "foo"}}, + 0, 5); - loc = commandArguments.asMap()["name"].getLocation(); - ASSERT_EQ(5U, loc.getStart()); - ASSERT_EQ(9U, loc.getEnd()); + Variant::mapType args = parser.getCommandArguments().asMap(); + ASSERT_EQ(5U, args["name"].getLocation().getStart()); + ASSERT_EQ(9U, args["name"].getLocation().getEnd()); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertEnd(parser); } TEST(OsmlStreamParser, simpleCommandWithArguments) @@ -374,38 +439,21 @@ TEST(OsmlStreamParser, simpleCommandWithArguments) // 0 123456789012345 678901 2 // 0 1 2 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - - Variant commandName = reader.getCommandName(); - ASSERT_EQ("test", commandName.asString()); - SourceLocation loc = commandName.getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); - - Variant commandArguments = reader.getCommandArguments(); - ASSERT_TRUE(commandArguments.isMap()); - ASSERT_EQ(3U, commandArguments.asMap().size()); - ASSERT_EQ(1U, commandArguments.asMap().count("a")); - ASSERT_EQ(1U, commandArguments.asMap().count("b")); - ASSERT_EQ(1U, commandArguments.asMap().count("c")); - ASSERT_EQ(1, commandArguments.asMap()["a"].asInt()); - ASSERT_EQ(2, commandArguments.asMap()["b"].asInt()); - ASSERT_EQ("test", commandArguments.asMap()["c"].asString()); - - loc = commandArguments.asMap()["a"].getLocation(); - ASSERT_EQ(8U, loc.getStart()); - ASSERT_EQ(9U, loc.getEnd()); + OsmlStreamParser parser(charReader, logger); - loc = commandArguments.asMap()["b"].getLocation(); - ASSERT_EQ(12U, loc.getStart()); - ASSERT_EQ(13U, loc.getEnd()); + assertCommandStart(parser, "test", false, + Variant::mapType{{"a", 1}, {"b", 2}, {"c", "test"}}, 0, + 5); - loc = commandArguments.asMap()["c"].getLocation(); - ASSERT_EQ(16U, loc.getStart()); - ASSERT_EQ(22U, loc.getEnd()); + Variant::mapType args = parser.getCommandArguments().asMap(); + ASSERT_EQ(8U, args["a"].getLocation().getStart()); + ASSERT_EQ(9U, args["a"].getLocation().getEnd()); + ASSERT_EQ(12U, args["b"].getLocation().getStart()); + ASSERT_EQ(13U, args["b"].getLocation().getEnd()); + ASSERT_EQ(16U, args["c"].getLocation().getStart()); + ASSERT_EQ(22U, args["c"].getLocation().getEnd()); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertEnd(parser); } TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) @@ -414,44 +462,24 @@ TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) // 0 1234567890123456789 01234 56 // 0 1 2 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser parser(charReader, logger); - Variant commandName = reader.getCommandName(); - ASSERT_EQ("test", commandName.asString()); - SourceLocation loc = commandName.getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); + assertCommandStart( + parser, "test", false, + Variant::mapType{{"name", "bla"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 0, + 5); - Variant commandArguments = reader.getCommandArguments(); - ASSERT_TRUE(commandArguments.isMap()); - ASSERT_EQ(4U, commandArguments.asMap().size()); - ASSERT_EQ(1U, commandArguments.asMap().count("a")); - ASSERT_EQ(1U, commandArguments.asMap().count("b")); - ASSERT_EQ(1U, commandArguments.asMap().count("c")); - ASSERT_EQ(1U, commandArguments.asMap().count("name")); - ASSERT_EQ(1, commandArguments.asMap()["a"].asInt()); - ASSERT_EQ(2, commandArguments.asMap()["b"].asInt()); - ASSERT_EQ("test", commandArguments.asMap()["c"].asString()); - ASSERT_EQ("bla", commandArguments.asMap()["name"].asString()); + Variant::mapType args = parser.getCommandArguments().asMap(); + ASSERT_EQ(5U, args["name"].getLocation().getStart()); + ASSERT_EQ(9U, args["name"].getLocation().getEnd()); + ASSERT_EQ(12U, args["a"].getLocation().getStart()); + ASSERT_EQ(13U, args["a"].getLocation().getEnd()); + ASSERT_EQ(16U, args["b"].getLocation().getStart()); + ASSERT_EQ(17U, args["b"].getLocation().getEnd()); + ASSERT_EQ(20U, args["c"].getLocation().getStart()); + ASSERT_EQ(26U, args["c"].getLocation().getEnd()); - loc = commandArguments.asMap()["a"].getLocation(); - ASSERT_EQ(12U, loc.getStart()); - ASSERT_EQ(13U, loc.getEnd()); - - loc = commandArguments.asMap()["b"].getLocation(); - ASSERT_EQ(16U, loc.getStart()); - ASSERT_EQ(17U, loc.getEnd()); - - loc = commandArguments.asMap()["c"].getLocation(); - ASSERT_EQ(20U, loc.getStart()); - ASSERT_EQ(26U, loc.getEnd()); - - loc = commandArguments.asMap()["name"].getLocation(); - ASSERT_EQ(5U, loc.getStart()); - ASSERT_EQ(9U, loc.getEnd()); - - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertEnd(parser); } TEST(OsmlStreamParser, fields) @@ -460,21 +488,76 @@ TEST(OsmlStreamParser, fields) // 01234567890123 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); + + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertTextData(parser, "a", 6, 7, 6, 7, WhitespaceMode::PRESERVE); + assertFieldEnd(parser, 7, 8); + + assertFieldStart(parser, false, 8, 9); + assertTextData(parser, "b", 9, 10, 9, 10, WhitespaceMode::PRESERVE); + assertFieldEnd(parser, 10, 11); + + assertFieldStart(parser, false, 11, 12); + assertTextData(parser, "c", 12, 13, 12, 13, WhitespaceMode::PRESERVE); + assertFieldEnd(parser, 13, 14); + assertEnd(parser, 14, 14); +} + +TEST(OsmlStreamParser, fieldsWithoutCommand) +{ + const char *testString = "{a}{b}{c}"; + // 012345678 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - assertData(reader, "a", 6, 7); - assertFieldEnd(reader, 7, 8); + assertFieldStart(parser, false, 0, 1); + assertTextData(parser, "a", 1, 2, 1, 2, WhitespaceMode::PRESERVE); + assertFieldEnd(parser, 2, 3); - assertFieldStart(reader, false, 8, 9); - assertData(reader, "b", 9, 10); - assertFieldEnd(reader, 10, 11); + assertFieldStart(parser, false, 3, 4); + assertTextData(parser, "b", 4, 5, 4, 5, WhitespaceMode::PRESERVE); + assertFieldEnd(parser, 5, 6); - assertFieldStart(reader, false, 11, 12); - assertData(reader, "c", 12, 13); - assertFieldEnd(reader, 13, 14); - assertEnd(reader, 14, 14); + assertFieldStart(parser, false, 6, 7); + assertTextData(parser, "c", 7, 8, 7, 8, WhitespaceMode::PRESERVE); + assertFieldEnd(parser, 8, 9); + assertEnd(parser, 9, 9); +} + +TEST(OsmlStreamParser, nestedField) +{ + const char *testString = "{{a{b}}}"; + // 01234567 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); + + assertFieldStart(parser, false, 0, 1); + assertFieldStart(parser, false, 1, 2); + assertTextData(parser, "a", 2, 3, 2, 3, WhitespaceMode::PRESERVE); + assertFieldStart(parser, false, 3, 4); + assertTextData(parser, "b", 4, 5, 4, 5, WhitespaceMode::PRESERVE); + assertFieldEnd(parser, 5, 6); + assertFieldEnd(parser, 6, 7); + assertFieldEnd(parser, 7, 8); + assertEnd(parser, 8, 8); +} + +TEST(OsmlStreamParser, errorUnbalancedField) +{ + const char *testString = "{a"; + // 01 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); + + logger.reset(); + + assertFieldStart(parser, false, 0, 1); + assertTextData(parser, "a", 1, 2, 1, 2, WhitespaceMode::PRESERVE); + ASSERT_FALSE(logger.hasError()); + assertEnd(parser, 2, 2); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, dataOutsideField) @@ -483,19 +566,19 @@ TEST(OsmlStreamParser, dataOutsideField) // 0123456789012 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - assertData(reader, "a", 6, 7); - assertFieldEnd(reader, 7, 8); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertTextData(parser, "a", 6, 7, 6, 7, WhitespaceMode::COLLAPSE); + assertFieldEnd(parser, 7, 8); - assertFieldStart(reader, false, 8, 9); - assertData(reader, "b", 9, 10); - assertFieldEnd(reader, 10, 11); + assertFieldStart(parser, false, 8, 9); + assertTextData(parser, "b", 9, 10, 9, 10, WhitespaceMode::COLLAPSE); + assertFieldEnd(parser, 10, 11); - assertData(reader, "c", 12, 13); - assertEnd(reader, 13, 13); + assertTextData(parser, "c", 11, 13, 12, 13, WhitespaceMode::COLLAPSE); + assertEnd(parser, 13, 13); } TEST(OsmlStreamParser, nestedCommand) @@ -504,25 +587,22 @@ TEST(OsmlStreamParser, nestedCommand) // 012345678 90123456789012 // 0 1 2 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test", 0, 5); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertData(parser, "a", 6, 7); + assertFieldEnd(parser, 7, 8); - assertFieldStart(reader, false, 5, 6); - assertData(reader, "a", 6, 7); - assertFieldEnd(reader, 7, 8); - - assertFieldStart(reader, false, 8, 9); - { - assertCommand(reader, "test2", 9, 15); - assertFieldStart(reader, false, 15, 16); - assertData(reader, "b", 16, 17); - assertFieldEnd(reader, 17, 18); - } - assertData(reader, "c", 19, 20); - assertFieldEnd(reader, 20, 21); - assertData(reader, "d", 22, 23); - assertEnd(reader, 23, 23); + assertFieldStart(parser, false, 8, 9); + assertCommand(parser, "test2", 9, 15); + assertFieldStart(parser, false, 15, 16); + assertData(parser, "b", 16, 17); + assertFieldEnd(parser, 17, 18); + assertData(parser, "c", 19, 20); + assertFieldEnd(parser, 20, 21); + assertData(parser, "d", 22, 23); + assertEnd(parser, 23, 23); } TEST(OsmlStreamParser, nestedCommandImmediateEnd) @@ -531,19 +611,19 @@ TEST(OsmlStreamParser, nestedCommandImmediateEnd) // 012345 678901234567 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); { - assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, false, 12, 13); - assertData(reader, "b", 13, 14); - assertFieldEnd(reader, 14, 15); + assertCommand(parser, "test2", 6, 12); + assertFieldStart(parser, false, 12, 13); + assertData(parser, "b", 13, 14); + assertFieldEnd(parser, 14, 15); } - assertFieldEnd(reader, 15, 16); - assertData(reader, "d", 17, 18); - assertEnd(reader, 18, 18); + assertFieldEnd(parser, 15, 16); + assertData(parser, "d", 17, 18); + assertEnd(parser, 18, 18); } TEST(OsmlStreamParser, nestedCommandNoData) @@ -551,13 +631,13 @@ TEST(OsmlStreamParser, nestedCommandNoData) const char *testString = "\\test{\\test2}"; // 012345 6789012 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - assertCommand(reader, "test2", 6, 12); - assertFieldEnd(reader, 12, 13); - assertEnd(reader, 13, 13); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertCommand(parser, "test2", 6, 12); + assertFieldEnd(parser, 12, 13); + assertEnd(parser, 13, 13); } TEST(OsmlStreamParser, multipleCommands) @@ -566,13 +646,16 @@ TEST(OsmlStreamParser, multipleCommands) // 012 345 678 90 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "a", 0, 2); - assertCommand(reader, "b", 3, 5); - assertCommand(reader, "c", 6, 8); - assertCommand(reader, "d", 9, 11); - assertEnd(reader, 11, 11); + assertCommand(parser, "a", 0, 2); + assertEmptyData(parser); + assertCommand(parser, "b", 3, 5); + assertEmptyData(parser); + assertCommand(parser, "c", 6, 8); + assertEmptyData(parser); + assertCommand(parser, "d", 9, 11); + assertEnd(parser, 11, 11); } TEST(OsmlStreamParser, fieldsWithSpaces) @@ -581,33 +664,37 @@ TEST(OsmlStreamParser, fieldsWithSpaces) // 0123 456 789012 3 456 789 // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, false, 3, 4); - assertCommand(reader, "b", 4, 6); - assertCommand(reader, "c", 7, 9); - assertFieldEnd(reader, 9, 10); - assertFieldStart(reader, false, 16, 17); - assertCommand(reader, "d", 17, 19); - assertFieldEnd(reader, 19, 20); - assertEnd(reader, 20, 20); + assertCommand(parser, "a", 0, 2); + assertEmptyData(parser); + assertFieldStart(parser, false, 3, 4); + assertCommand(parser, "b", 4, 6); + assertEmptyData(parser); + assertCommand(parser, "c", 7, 9); + assertFieldEnd(parser, 9, 10); + assertEmptyData(parser); + assertFieldStart(parser, false, 16, 17); + assertCommand(parser, "d", 17, 19); + assertFieldEnd(parser, 19, 20); + assertEnd(parser, 20, 20); } -TEST(OsmlStreamParser, errorNoFieldToStart) +TEST(OsmlStreamParser, errorEndButOpenField) { const char *testString = "\\a b {"; // 012345 // 0 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); - assertCommand(reader, "a", 0, 2); - assertData(reader, "b", 3, 4); + assertCommand(parser, "a", 0, 2); + assertData(parser, "b", 3, 4); + assertFieldStart(parser, false, 5, 6); ASSERT_FALSE(logger.hasError()); - assertEnd(reader, 6, 6); + assertEnd(parser, 6, 6); ASSERT_TRUE(logger.hasError()); } @@ -618,13 +705,13 @@ TEST(OsmlStreamParser, errorNoFieldToEnd) // 0 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); - assertCommand(reader, "a", 0, 2); - assertData(reader, "b", 3, 4); + assertCommand(parser, "a", 0, 2); + assertData(parser, "b", 3, 4); ASSERT_FALSE(logger.hasError()); - assertEnd(reader, 6, 6); + assertEnd(parser, 6, 6); ASSERT_TRUE(logger.hasError()); } @@ -635,17 +722,17 @@ TEST(OsmlStreamParser, errorNoFieldEndNested) // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, false, 12, 13); - assertFieldEnd(reader, 13, 14); - assertFieldEnd(reader, 14, 15); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertCommand(parser, "test2", 6, 12); + assertFieldStart(parser, false, 12, 13); + assertFieldEnd(parser, 13, 14); + assertFieldEnd(parser, 14, 15); ASSERT_FALSE(logger.hasError()); - assertEnd(reader, 16, 16); + assertEnd(parser, 16, 16); ASSERT_TRUE(logger.hasError()); } @@ -656,18 +743,18 @@ TEST(OsmlStreamParser, errorNoFieldEndNestedData) // 0 1 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); - assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, false, 5, 6); - assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, false, 12, 13); - assertFieldEnd(reader, 13, 14); - assertFieldEnd(reader, 14, 15); - assertData(reader, "a", 15, 16); + assertCommand(parser, "test", 0, 5); + assertFieldStart(parser, false, 5, 6); + assertCommand(parser, "test2", 6, 12); + assertFieldStart(parser, false, 12, 13); + assertFieldEnd(parser, 13, 14); + assertFieldEnd(parser, 14, 15); + assertData(parser, "a", 15, 16); ASSERT_FALSE(logger.hasError()); - assertEnd(reader, 17, 17); + assertEnd(parser, 17, 17); ASSERT_TRUE(logger.hasError()); } @@ -678,12 +765,11 @@ TEST(OsmlStreamParser, beginEnd) // 0 1 2 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, true, 12, 13); - assertFieldEnd(reader, 17, 21); - assertEnd(reader, 22, 22); + assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); + assertRangeEnd(parser, 17, 21); + assertEnd(parser, 22, 22); } TEST(OsmlStreamParser, beginEndWithName) @@ -693,12 +779,11 @@ TEST(OsmlStreamParser, beginEndWithName) // 0 1 2 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "book", {{"name", "a"}}, 7, 11); - assertFieldStart(reader, true, 14, 15); - assertFieldEnd(reader, 19, 23); - assertEnd(reader, 24, 24); + assertCommandStart(parser, "book", true, {{"name", "a"}}, 7, 11); + assertRangeEnd(parser, 19, 23); + assertEnd(parser, 24, 24); } TEST(OsmlStreamParser, beginEndWithNameAndArgs) @@ -708,13 +793,13 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgs) // 0 1 2 3 4 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "book", - {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, true, 32, 33); - assertFieldEnd(reader, 37, 41); - assertEnd(reader, 42, 42); + assertCommandStart(parser, "book", true, + {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, + 11); + assertRangeEnd(parser, 37, 41); + assertEnd(parser, 42, 42); } TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields) @@ -725,23 +810,23 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields) // 0 1 2 3 4 5 6 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - - assertCommand(reader, "book", - {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, false, 32, 33); - assertData(reader, "a", 33, 34); - assertCommand(reader, "test", Variant::mapType{}, 35, 40); - assertFieldEnd(reader, 40, 41); - assertFieldStart(reader, false, 41, 42); - assertData(reader, "b", 42, 43); - assertCommand(reader, "test", Variant::mapType{}, 44, 49); - assertFieldStart(reader, false, 49, 50); - assertFieldEnd(reader, 50, 51); - assertFieldEnd(reader, 51, 52); - assertFieldStart(reader, true, 52, 53); - assertFieldEnd(reader, 57, 61); - assertEnd(reader, 62, 62); + OsmlStreamParser parser(charReader, logger); + + assertCommandStart(parser, "book", true, + {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, + 11); + assertFieldStart(parser, false, 32, 33); + assertData(parser, "a", 33, 34); + assertCommand(parser, "test", 35, 40); + assertFieldEnd(parser, 40, 41); + assertFieldStart(parser, false, 41, 42); + assertData(parser, "b", 42, 43); + assertCommand(parser, "test", 44, 49); + assertFieldStart(parser, false, 49, 50); + assertFieldEnd(parser, 50, 51); + assertFieldEnd(parser, 51, 52); + assertRangeEnd(parser, 57, 61); + assertEnd(parser, 62, 62); } TEST(OsmlStreamParser, beginEndWithData) @@ -751,13 +836,12 @@ TEST(OsmlStreamParser, beginEndWithData) // 0 1 2 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, true, 12, 13); - assertData(reader, "a", 12, 13); - assertFieldEnd(reader, 18, 22); - assertEnd(reader, 23, 23); + assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); + assertData(parser, "a", 12, 13); + assertRangeEnd(parser, 18, 22); + assertEnd(parser, 23, 23); } TEST(OsmlStreamParser, beginEndNested) @@ -768,29 +852,32 @@ TEST(OsmlStreamParser, beginEndNested) // 0 1 2 3 4 5 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - - assertCommand(reader, "a", 7, 8); - assertFieldStart(reader, false, 9, 10); - assertData(reader, "b", 10, 11); - assertFieldEnd(reader, 11, 12); - assertFieldStart(reader, true, 13, 14); - assertData(reader, "c", 13, 14); - assertCommand(reader, "d", 22, 23); - assertFieldStart(reader, false, 24, 25); - assertData(reader, "e", 25, 26); - assertFieldEnd(reader, 26, 27); - assertFieldStart(reader, false, 27, 28); - assertData(reader, "f", 28, 29); - assertFieldEnd(reader, 29, 30); - assertFieldStart(reader, true, 31, 32); - assertCommand(reader, "g", 31, 33); - assertFieldStart(reader, false, 33, 34); - assertData(reader, "h", 34, 35); - assertFieldEnd(reader, 35, 36); - assertFieldEnd(reader, 42, 43); - assertFieldEnd(reader, 49, 50); - assertEnd(reader, 51, 51); + OsmlStreamParser parser(charReader, logger); + + assertCommandStart(parser, "a", true, Variant::mapType{}, 7, 8); + assertFieldStart(parser, false, 9, 10); + assertData(parser, "b", 10, 11); + assertFieldEnd(parser, 11, 12); + + assertData(parser, "c", 13, 14); + + assertCommandStart(parser, "d", true, Variant::mapType{}, 22, 23); + assertFieldStart(parser, false, 24, 25); + assertData(parser, "e", 25, 26); + assertFieldEnd(parser, 26, 27); + assertFieldStart(parser, false, 27, 28); + assertData(parser, "f", 28, 29); + assertFieldEnd(parser, 29, 30); + + assertEmptyData(parser); + assertCommand(parser, "g", 31, 33); + assertFieldStart(parser, false, 33, 34); + assertData(parser, "h", 34, 35); + assertFieldEnd(parser, 35, 36); + assertEmptyData(parser); + assertRangeEnd(parser, 42, 43); + assertRangeEnd(parser, 49, 50); + assertEnd(parser, 51, 51); } TEST(OsmlStreamParser, beginEndWithCommand) @@ -800,16 +887,75 @@ TEST(OsmlStreamParser, beginEndWithCommand) // 0 1 2 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, true, 12, 13); - assertCommand(reader, "a", 12, 14); - assertFieldStart(reader, false, 14, 15); - assertData(reader, "test", 15, 19); - assertFieldEnd(reader, 19, 20); - assertFieldEnd(reader, 25, 29); - assertEnd(reader, 30, 30); + assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); + assertCommand(parser, "a", 12, 14); + assertFieldStart(parser, false, 14, 15); + assertData(parser, "test", 15, 19); + assertFieldEnd(parser, 19, 20); + assertRangeEnd(parser, 25, 29); + assertEnd(parser, 30, 30); +} + +TEST(OsmlStreamParser, beginEndNestedFields) +{ + const char *testString = "\\begin{book}a{{b{c}}}\\end{book}"; + // 012345678901234567890 1234567890 + // 0 1 2 3 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); + logger.reset(); + + assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); + assertData(parser, "a", 12, 13); + assertFieldStart(parser, false, 13, 14); + assertFieldStart(parser, false, 14, 15); + assertData(parser, "b", 15, 16); + assertFieldStart(parser, false, 16, 17); + assertData(parser, "c", 17, 18); + assertFieldEnd(parser, 18, 19); + assertFieldEnd(parser, 19, 20); + assertFieldEnd(parser, 20, 21); + assertRangeEnd(parser, 26, 30); + assertEnd(parser, 31, 31); +} + +TEST(OsmlStreamParser, errorBeginEndUnbalancedNestedFields) +{ + const char *testString = "\\begin{book}a{{b{c}}\\end{book}"; + // 012345678901234567890 123456789 + // 0 1 2 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); + logger.reset(); + + assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); + assertData(parser, "a", 12, 13); + assertFieldStart(parser, false, 13, 14); + assertFieldStart(parser, false, 14, 15); + assertData(parser, "b", 15, 16); + assertFieldStart(parser, false, 16, 17); + assertData(parser, "c", 17, 18); + assertFieldEnd(parser, 18, 19); + assertFieldEnd(parser, 19, 20); + ASSERT_THROW(assertRangeEnd(parser, 25, 29), LoggableException); +} + +TEST(OsmlStreamParser, errorBeginEndUnbalancedFields) +{ + const char *testString = "{a"; + // 01 + CharReader charReader(testString); + OsmlStreamParser parser(charReader, logger); + + logger.reset(); + + assertFieldStart(parser, false, 0, 1); + assertTextData(parser, "a", 1, 2, 1, 2, WhitespaceMode::PRESERVE); + ASSERT_FALSE(logger.hasError()); + assertEnd(parser, 2, 2); + ASSERT_TRUE(logger.hasError()); } TEST(OsmlStreamParser, errorBeginNoBraceOpen) @@ -818,12 +964,13 @@ TEST(OsmlStreamParser, errorBeginNoBraceOpen) // 01234567 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - assertData(reader, "a", 7, 8); + assertData(parser, "a", 7, 8); ASSERT_TRUE(logger.hasError()); + assertEnd(parser, 8, 8); } TEST(OsmlStreamParser, errorBeginNoIdentifier) @@ -831,11 +978,11 @@ TEST(OsmlStreamParser, errorBeginNoIdentifier) const char *testString = "\\begin{!"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_THROW(parser.parse(), LoggableException); ASSERT_TRUE(logger.hasError()); } @@ -844,11 +991,11 @@ TEST(OsmlStreamParser, errorBeginNoBraceClose) const char *testString = "\\begin{a"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_THROW(parser.parse(), LoggableException); ASSERT_TRUE(logger.hasError()); } @@ -857,15 +1004,15 @@ TEST(OsmlStreamParser, errorBeginNoName) const char *testString = "\\begin{a#}"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - assertCommand(reader, "a"); + assertCommandStart(parser, "a", true); ASSERT_TRUE(logger.hasError()); logger.reset(); ASSERT_FALSE(logger.hasError()); - assertEnd(reader); + assertEnd(parser); ASSERT_TRUE(logger.hasError()); } @@ -875,11 +1022,11 @@ TEST(OsmlStreamParser, errorEndNoBraceOpen) // 012345 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - assertData(reader, "a", 5, 6); + assertData(parser, "a", 5, 6); ASSERT_TRUE(logger.hasError()); } @@ -888,11 +1035,11 @@ TEST(OsmlStreamParser, errorEndNoIdentifier) const char *testString = "\\end{!"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_THROW(parser.parse(), LoggableException); ASSERT_TRUE(logger.hasError()); } @@ -901,11 +1048,11 @@ TEST(OsmlStreamParser, errorEndNoBraceClose) const char *testString = "\\end{a"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_THROW(parser.parse(), LoggableException); ASSERT_TRUE(logger.hasError()); } @@ -914,11 +1061,11 @@ TEST(OsmlStreamParser, errorEndNoBegin) const char *testString = "\\end{a}"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_THROW(parser.parse(), LoggableException); ASSERT_TRUE(logger.hasError()); } @@ -929,16 +1076,15 @@ TEST(OsmlStreamParser, errorBeginEndMismatch) // 0 1 2 3 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); - assertCommand(reader, "a", 7, 8); - assertFieldStart(reader, true, 10, 11); - assertCommand(reader, "b", 17, 18); - assertFieldStart(reader, true, 20, 24); - assertData(reader, "test", 20, 24); + assertCommandStart(parser, "a", true, Variant::mapType{}, 7, 8); + assertEmptyData(parser); + assertCommandStart(parser, "b", true, Variant::mapType{}, 17, 18); + assertData(parser, "test", 20, 24); ASSERT_FALSE(logger.hasError()); - ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_THROW(parser.parse(), LoggableException); ASSERT_TRUE(logger.hasError()); } @@ -948,10 +1094,10 @@ TEST(OsmlStreamParser, commandWithNSSep) // 012345678901 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test1:test2", 0, 12); - assertEnd(reader, 12, 12); + assertCommand(parser, "test1:test2", 0, 12); + assertEnd(parser, 12, 12); } TEST(OsmlStreamParser, beginEndWithNSSep) @@ -961,12 +1107,11 @@ TEST(OsmlStreamParser, beginEndWithNSSep) // 0 1 2 3 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "test1:test2", 7, 18); - assertFieldStart(reader, true, 19, 20); - assertFieldEnd(reader, 24, 35); - assertEnd(reader, 36, 36); + assertCommandStart(parser, "test1:test2", true, Variant::mapType{}, 7, 18); + assertRangeEnd(parser, 24, 35); + assertEnd(parser, 36, 36); } TEST(OsmlStreamParser, errorBeginNSSep) @@ -974,15 +1119,14 @@ TEST(OsmlStreamParser, errorBeginNSSep) const char *testString = "\\begin:test{blub}\\end{blub}"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - assertCommand(reader, "blub"); + assertCommandStart(parser, "blub", true, Variant::mapType{}); ASSERT_TRUE(logger.hasError()); - assertFieldStart(reader, true); - assertFieldEnd(reader); - assertEnd(reader); + assertRangeEnd(parser); + assertEnd(parser); } TEST(OsmlStreamParser, errorEndNSSep) @@ -990,15 +1134,14 @@ TEST(OsmlStreamParser, errorEndNSSep) const char *testString = "\\begin{blub}\\end:test{blub}"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); - assertCommand(reader, "blub"); - assertFieldStart(reader, true); + assertCommandStart(parser, "blub", true, Variant::mapType{}); ASSERT_FALSE(logger.hasError()); - assertFieldEnd(reader); + assertRangeEnd(parser); ASSERT_TRUE(logger.hasError()); - assertEnd(reader); + assertEnd(parser); } TEST(OsmlStreamParser, errorEmptyNs) @@ -1006,14 +1149,14 @@ TEST(OsmlStreamParser, errorEmptyNs) const char *testString = "\\test:"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - assertCommand(reader, "test"); + assertCommand(parser, "test"); ASSERT_TRUE(logger.hasError()); - assertData(reader, ":"); - assertEnd(reader); + assertData(parser, ":"); + assertEnd(parser); } TEST(OsmlStreamParser, errorRepeatedNs) @@ -1021,14 +1164,14 @@ TEST(OsmlStreamParser, errorRepeatedNs) const char *testString = "\\test::"; CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - assertCommand(reader, "test"); + assertCommand(parser, "test"); ASSERT_TRUE(logger.hasError()); - assertData(reader, "::"); - assertEnd(reader); + assertData(parser, "::"); + assertEnd(parser); } TEST(OsmlStreamParser, explicitDefaultField) @@ -1037,14 +1180,14 @@ TEST(OsmlStreamParser, explicitDefaultField) // 01234567 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, true, 2, 4); - assertData(reader, "b", 4, 5); - assertFieldEnd(reader, 5, 6); - assertData(reader, "c", 6, 7); - assertEnd(reader, 7, 7); + assertCommand(parser, "a", 0, 2); + assertFieldStart(parser, true, 2, 4); + assertData(parser, "b", 4, 5); + assertFieldEnd(parser, 5, 6); + assertData(parser, "c", 6, 7); + assertEnd(parser, 7, 7); } TEST(OsmlStreamParser, explicitDefaultFieldWithCommand) @@ -1053,33 +1196,33 @@ TEST(OsmlStreamParser, explicitDefaultFieldWithCommand) // 0123 4567 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, true, 2, 4); - assertCommand(reader, "b", 4, 6); - assertFieldEnd(reader, 6, 7); - assertData(reader, "c", 7, 8); - assertEnd(reader, 8, 8); + assertCommand(parser, "a", 0, 2); + assertFieldStart(parser, true, 2, 4); + assertCommand(parser, "b", 4, 6); + assertFieldEnd(parser, 6, 7); + assertData(parser, "c", 7, 8); + assertEnd(parser, 8, 8); } -TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField) +TEST(OsmlStreamParser, fieldAfterExplicitDefaultField) { const char *testString = "\\a{!\\b}{c}"; // 0123 456789 CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); - assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, true, 2, 4); - assertCommand(reader, "b", 4, 6); - assertFieldEnd(reader, 6, 7); - ASSERT_FALSE(logger.hasError()); - assertData(reader, "c", 8, 9); - ASSERT_TRUE(logger.hasError()); - assertEnd(reader, 10, 10); + assertCommand(parser, "a", 0, 2); + assertFieldStart(parser, true, 2, 4); + assertCommand(parser, "b", 4, 6); + assertFieldEnd(parser, 6, 7); + assertFieldStart(parser, false, 7, 8); + assertData(parser, "c", 8, 9); + assertFieldEnd(parser, 9, 10); + assertEnd(parser, 10, 10); } TEST(OsmlStreamParser, annotationStart) @@ -1089,10 +1232,10 @@ TEST(OsmlStreamParser, annotationStart) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); - assertEnd(reader, 3, 3); + assertAnnotationStart(parser, "a", Variant::mapType{}, 0, 3); + assertEnd(parser, 3, 3); } TEST(OsmlStreamParser, annotationStartWithName) @@ -1103,11 +1246,11 @@ TEST(OsmlStreamParser, annotationStartWithName) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertAnnotationStart(reader, "annotationWithName", + assertAnnotationStart(parser, "annotationWithName", Variant::mapType{{"name", "aName"}}, 0, 20); - assertEnd(reader, 26, 26); + assertEnd(parser, 26, 26); } TEST(OsmlStreamParser, annotationStartWithArguments) @@ -1118,12 +1261,12 @@ TEST(OsmlStreamParser, annotationStartWithArguments) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); assertAnnotationStart( - reader, "annotationWithName", + parser, "annotationWithName", Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20); - assertEnd(reader, 35, 35); + assertEnd(parser, 35, 35); } TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd) @@ -1134,16 +1277,16 @@ TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); assertAnnotationStart( - reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8, + parser, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8, 10); - assertFieldStart(reader, true, 26, 27); - assertData(reader, "a", 26, 27); - assertFieldEnd(reader, 33, 35); - assertAnnotationEnd(reader, "", "", 36, 38); - assertEnd(reader, 38, 38); + ASSERT_TRUE(parser.inRangeCommand()); + assertData(parser, "a", 26, 27); + assertRangeEnd(parser, 33, 35); + assertAnnotationEnd(parser, "", "", 36, 38); + assertEnd(parser, 38, 38); } TEST(OsmlStreamParser, annotationEnd) @@ -1153,10 +1296,10 @@ TEST(OsmlStreamParser, annotationEnd) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertAnnotationEnd(reader, "a", "", 0, 2); - assertEnd(reader, 3, 3); + assertAnnotationEnd(parser, "a", "", 0, 2); + assertEnd(parser, 3, 3); } TEST(OsmlStreamParser, annotationEndWithName) @@ -1166,10 +1309,10 @@ TEST(OsmlStreamParser, annotationEndWithName) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertAnnotationEnd(reader, "a", "name", 0, 2); - assertEnd(reader, 8, 8); + assertAnnotationEnd(parser, "a", "name", 0, 2); + assertEnd(parser, 8, 8); } TEST(OsmlStreamParser, annotationEndWithNameAsArgs) @@ -1179,10 +1322,10 @@ TEST(OsmlStreamParser, annotationEndWithNameAsArgs) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertAnnotationEnd(reader, "a", "name", 0, 2); - assertEnd(reader, 14, 14); + assertAnnotationEnd(parser, "a", "name", 0, 2); + assertEnd(parser, 14, 14); } TEST(OsmlStreamParser, errorAnnotationEndWithArguments) @@ -1193,14 +1336,15 @@ TEST(OsmlStreamParser, errorAnnotationEndWithArguments) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); - assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2); + assertCommandStart(parser, "a", false, Variant::mapType{{"foo", "bar"}}, 0, + 2); ASSERT_TRUE(logger.hasError()); - assertData(reader, ">", 11, 12); - assertEnd(reader, 12, 12); + assertData(parser, ">", 11, 12); + assertEnd(parser, 12, 12); } TEST(OsmlStreamParser, closingAnnotation) @@ -1210,11 +1354,11 @@ TEST(OsmlStreamParser, closingAnnotation) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); - assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); - assertData(reader, ">", 3, 4); - assertEnd(reader, 4, 4); + assertAnnotationStart(parser, "a", Variant::mapType{}, 0, 3); + assertData(parser, ">", 3, 4); + assertEnd(parser, 4, 4); } TEST(OsmlStreamParser, annotationWithFields) @@ -1225,23 +1369,23 @@ TEST(OsmlStreamParser, annotationWithFields) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); - - assertData(reader, "a", 0, 1); - assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5); - assertFieldStart(reader, false, 5, 6); - assertData(reader, "c", 6, 7); - assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, false, 8, 9); - assertData(reader, "d", 9, 10); - assertFieldEnd(reader, 10, 11); - assertFieldStart(reader, true, 11, 13); - assertData(reader, "e", 13, 14); - assertFieldEnd(reader, 14, 15); - assertData(reader, "f", 16, 17); - assertAnnotationEnd(reader, "", "", 18, 20); - assertData(reader, "g", 21, 22); - assertEnd(reader, 22, 22); + OsmlStreamParser parser(charReader, logger); + + assertData(parser, "a", 0, 1); + assertAnnotationStart(parser, "b", Variant::mapType{}, 2, 5); + assertFieldStart(parser, false, 5, 6); + assertData(parser, "c", 6, 7); + assertFieldEnd(parser, 7, 8); + assertFieldStart(parser, false, 8, 9); + assertData(parser, "d", 9, 10); + assertFieldEnd(parser, 10, 11); + assertFieldStart(parser, true, 11, 13); + assertData(parser, "e", 13, 14); + assertFieldEnd(parser, 14, 15); + assertData(parser, "f", 16, 17); + assertAnnotationEnd(parser, "", "", 18, 20); + assertData(parser, "g", 21, 22); + assertEnd(parser, 22, 22); } TEST(OsmlStreamParser, annotationStartEscape) @@ -1252,10 +1396,44 @@ TEST(OsmlStreamParser, annotationStartEscape) CharReader charReader(testString); - OsmlStreamParser reader(charReader, logger); + OsmlStreamParser parser(charReader, logger); + + assertData(parser, "<%test", 0, 7); + assertEnd(parser, 7, 7); +} - assertData(reader, "<%test", 0, 7); - assertEnd(reader, 7, 7); +TEST(OsmlStreamParser, userDefinedTokens) +{ + const char *testString = "<<My dear fellows>>, the *old man* said."; + // 0123456789012345678901234567890123456789 + // 0 1 2 3 + + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); + + TokenId tSpeechStart = parser.registerToken("<<"); + TokenId tSpeechEnd = parser.registerToken(">>"); + TokenId tStar = parser.registerToken("*"); + + ASSERT_TRUE(tSpeechStart != Tokens::Empty); + ASSERT_TRUE(tSpeechEnd != Tokens::Empty); + ASSERT_TRUE(tStar != Tokens::Empty); + + TokenSet tokens{tSpeechStart, tSpeechEnd, tStar}; + + ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse()); + TokenizedDataReader reader = parser.getData().reader(); + + assertToken(reader, tSpeechStart, "<<", tokens, WhitespaceMode::PRESERVE, 0, 2); + assertText(reader, "My dear fellows", tokens, WhitespaceMode::PRESERVE, 2, 17); + assertToken(reader, tSpeechEnd, ">>", tokens, WhitespaceMode::PRESERVE, 17, 19); + assertText(reader, ", the ", tokens, WhitespaceMode::PRESERVE, 19, 25); + assertToken(reader, tStar, "*", tokens, WhitespaceMode::PRESERVE, 25, 26); + assertText(reader, "old man", tokens, WhitespaceMode::PRESERVE, 26, 33); + assertToken(reader, tStar, "*", tokens, WhitespaceMode::PRESERVE, 33, 34); + assertText(reader, " said.", tokens, WhitespaceMode::PRESERVE, 34, 40); + assertEnd(reader); } } diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp index 3293370..d4e9443 100644 --- a/test/formats/osxml/OsxmlEventParserTest.cpp +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -21,6 +21,7 @@ #include <core/frontend/TerminalLogger.hpp> #include <core/common/CharReader.hpp> #include <core/common/Variant.hpp> +#include <core/parser/utils/TokenizedData.hpp> #include <formats/osxml/OsxmlEventParser.hpp> @@ -31,10 +32,10 @@ static TerminalLogger logger(std::cerr, true); namespace { enum class OsxmlEvent { - COMMAND, + COMMAND_START, ANNOTATION_START, ANNOTATION_END, - FIELD_END, + RANGE_END, DATA }; @@ -42,9 +43,10 @@ class TestOsxmlEventListener : public OsxmlEvents { public: std::vector<std::pair<OsxmlEvent, Variant>> events; - void command(const Variant &name, const Variant::mapType &args) override + void commandStart(const Variant &name, + const Variant::mapType &args) override { - events.emplace_back(OsxmlEvent::COMMAND, + events.emplace_back(OsxmlEvent::COMMAND_START, Variant::arrayType{name, args}); } @@ -62,25 +64,30 @@ public: Variant::arrayType{className, elementName}); } - void fieldEnd() override + void rangeEnd() override { - events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{}); + events.emplace_back(OsxmlEvent::RANGE_END, Variant::arrayType{}); } - void data(const Variant &data) override + void data(const TokenizedData &data) override { - events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data}); + Token token; + Variant text; + TokenizedDataReader reader = data.reader(); + reader.read(token, TokenSet{}, WhitespaceMode::PRESERVE); + EXPECT_EQ(Tokens::Data, token.id); + text = Variant::fromString(token.content); + text.setLocation(token.getLocation()); + events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{text}); } }; static std::vector<std::pair<OsxmlEvent, Variant>> parseXml( - const char *testString, - WhitespaceMode whitespaceMode = WhitespaceMode::TRIM) + const char *testString) { TestOsxmlEventListener listener; CharReader reader(testString); OsxmlEventParser parser(reader, listener, logger); - parser.setWhitespaceMode(whitespaceMode); parser.parse(); return listener.events; } @@ -93,11 +100,11 @@ TEST(OsxmlEventParser, simpleCommandWithArgs) // 0 1 2 3 std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ - {OsxmlEvent::COMMAND, + {OsxmlEvent::COMMAND_START, Variant::arrayType{ "a", Variant::mapType{ {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + {OsxmlEvent::RANGE_END, Variant::arrayType{}}}; auto events = parseXml(testString); ASSERT_EQ(expectedEvents, events); @@ -133,10 +140,12 @@ TEST(OsxmlEventParser, magicTopLevelTag) const char *testString = "<ousia><a/><b/></ousia>"; std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}, - {OsxmlEvent::COMMAND, Variant::arrayType{{"b", Variant::mapType{}}}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::RANGE_END, Variant::arrayType{}}, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"b", Variant::mapType{}}}}, + {OsxmlEvent::RANGE_END, Variant::arrayType{}}}; auto events = parseXml(testString); ASSERT_EQ(expectedEvents, events); @@ -147,71 +156,35 @@ TEST(OsxmlEventParser, magicTopLevelTagInside) const char *testString = "<a><ousia/></a>"; std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, - {OsxmlEvent::COMMAND, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::COMMAND_START, Variant::arrayType{{"ousia", Variant::mapType{}}}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + {OsxmlEvent::RANGE_END, Variant::arrayType{}}, + {OsxmlEvent::RANGE_END, Variant::arrayType{}}}; auto events = parseXml(testString); ASSERT_EQ(expectedEvents, events); } -TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) +TEST(OsxmlEventParser, commandWithData) { const char *testString = "<a> hello \n world </a>"; // 012345678901 234567890123 // 0 1 2 std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, {OsxmlEvent::DATA, Variant::arrayType{" hello \n world "}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + {OsxmlEvent::RANGE_END, Variant::arrayType{}}}; - auto events = parseXml(testString, WhitespaceMode::PRESERVE); + auto events = parseXml(testString); ASSERT_EQ(expectedEvents, events); // Check the location of the text ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart()); ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd()); } - -TEST(OsxmlEventParser, commandWithDataTrimWhitespace) -{ - const char *testString = "<a> hello \n world </a>"; - // 012345678901 234567890123 - // 0 1 2 - - std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, - {OsxmlEvent::DATA, Variant::arrayType{"hello \n world"}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; - - auto events = parseXml(testString, WhitespaceMode::TRIM); - ASSERT_EQ(expectedEvents, events); - - // Check the location of the text - ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); - ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); -} - -TEST(OsxmlEventParser, commandWithDataCollapseWhitespace) -{ - const char *testString = "<a> hello \n world </a>"; - // 012345678901 234567890123 - // 0 1 2 - - std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, - {OsxmlEvent::DATA, Variant::arrayType{"hello world"}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; - - auto events = parseXml(testString, WhitespaceMode::COLLAPSE); - ASSERT_EQ(expectedEvents, events); - - // Check the location of the text - ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); - ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); -} } |