diff options
| author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-03-03 15:08:18 +0100 | 
|---|---|---|
| committer | Andreas Stöckel <andreas@somweyr.de> | 2015-03-03 15:08:18 +0100 | 
| commit | 466ff991bcfad76d78100193aacbfaf74d542b26 (patch) | |
| tree | dafdb41ec766e83c6e37a8b9865e6ef454ff4def /test | |
| parent | b5cdca0331117ad3834b61eadd94ab3fcb6d2fba (diff) | |
| parent | fb8d4cdf01909b61e4e5d0806ec6de178ff0058c (diff) | |
Storing type and name in the HandlerData once again, using a Token
Conflicts:
	application/src/core/parser/stack/Callbacks.hpp
Diffstat (limited to 'test')
| -rw-r--r-- | test/core/common/UtilsTest.cpp | 33 | ||||
| -rw-r--r-- | test/core/model/OntologyTest.cpp | 175 | ||||
| -rw-r--r-- | test/core/parser/stack/StackTest.cpp | 646 | ||||
| -rw-r--r-- | test/core/parser/stack/TokenRegistryTest.cpp | 82 | ||||
| -rw-r--r-- | test/core/parser/utils/SourceOffsetVectorTest.cpp | 2 | ||||
| -rw-r--r-- | test/core/parser/utils/TokenizedDataTest.cpp | 567 | ||||
| -rw-r--r-- | test/core/parser/utils/TokenizedDataTestUtils.hpp | 64 | ||||
| -rw-r--r-- | test/core/parser/utils/TokenizerTest.cpp | 441 | ||||
| -rw-r--r-- | test/formats/osml/OsmlStreamParserTest.cpp | 1208 | ||||
| -rw-r--r-- | test/formats/osxml/OsxmlEventParserTest.cpp | 99 | 
10 files changed, 1975 insertions, 1342 deletions
| diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index 4bf1587..2aaa430 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -131,4 +131,35 @@ TEST(Utils, collapse)  	ASSERT_EQ("long test", Utils::collapse("     long    test   "));  } -}
\ No newline at end of file +TEST(Utils, isUserDefinedToken) +{ +	EXPECT_FALSE(Utils::isUserDefinedToken("")); +	EXPECT_FALSE(Utils::isUserDefinedToken("a")); +	EXPECT_TRUE(Utils::isUserDefinedToken(":")); +	EXPECT_TRUE(Utils::isUserDefinedToken("::")); +	EXPECT_TRUE(Utils::isUserDefinedToken("!?")); +	EXPECT_TRUE(Utils::isUserDefinedToken(".")); +	EXPECT_TRUE(Utils::isUserDefinedToken("<<")); +	EXPECT_TRUE(Utils::isUserDefinedToken(">>")); +	EXPECT_TRUE(Utils::isUserDefinedToken("''")); +	EXPECT_TRUE(Utils::isUserDefinedToken("``")); +	EXPECT_TRUE(Utils::isUserDefinedToken("´´")); +	EXPECT_TRUE(Utils::isUserDefinedToken("´")); +	EXPECT_TRUE(Utils::isUserDefinedToken("`")); +	EXPECT_TRUE(Utils::isUserDefinedToken("<")); +	EXPECT_TRUE(Utils::isUserDefinedToken(">")); +	EXPECT_TRUE(Utils::isUserDefinedToken("<+>")); +	EXPECT_FALSE(Utils::isUserDefinedToken("a:")); +	EXPECT_FALSE(Utils::isUserDefinedToken("a:a")); +	EXPECT_FALSE(Utils::isUserDefinedToken(":a")); +	EXPECT_FALSE(Utils::isUserDefinedToken("{")); +	EXPECT_FALSE(Utils::isUserDefinedToken("{{")); +	EXPECT_FALSE(Utils::isUserDefinedToken("}}")); +	EXPECT_FALSE(Utils::isUserDefinedToken("{{}{}")); +	EXPECT_FALSE(Utils::isUserDefinedToken("<\\")); +	EXPECT_FALSE(Utils::isUserDefinedToken("\\>")); +	EXPECT_FALSE(Utils::isUserDefinedToken("{!")); +	EXPECT_FALSE(Utils::isUserDefinedToken("< + >")); +} + +} diff --git a/test/core/model/OntologyTest.cpp b/test/core/model/OntologyTest.cpp index 764dcb4..265e9e2 100644 --- a/test/core/model/OntologyTest.cpp +++ b/test/core/model/OntologyTest.cpp @@ -82,9 +82,7 @@ TEST(Ontology, testOntologyResolving)  }  // i use this wrapper due to the strange behaviour of GTEST. -static void assertFalse(bool b){ -	ASSERT_FALSE(b); -} +static void assertFalse(bool b) { ASSERT_FALSE(b); }  static Rooted<FieldDescriptor> createUnsortedPrimitiveField(      Handle<StructuredClass> strct, Handle<Type> type, Logger &logger, bool tree, @@ -170,7 +168,6 @@ TEST(StructuredClass, getFieldDescriptors)  	}  } -  TEST(StructuredClass, getFieldDescriptorsCycles)  {  	Logger logger; @@ -523,6 +520,91 @@ TEST(Descriptor, getPermittedChildrenCycles)  	ASSERT_EQ(A, children[0]);  } +TEST(Descriptor, getSyntaxDescriptor) +{ +	// build an ontology with some custom syntax. +	Manager mgr{1}; +	Logger logger; +	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)}; +	// Construct the ontology +	Rooted<Ontology> ontology{new Ontology(mgr, sys, "ontology")}; +	Rooted<StructuredClass> A{new StructuredClass( +	    mgr, "A", ontology, Cardinality::any(), {nullptr}, true, true)}; +	A->setStartToken(TokenDescriptor(Tokens::Indent)); +	A->setEndToken(TokenDescriptor(Tokens::Dedent)); +	{ +		TokenDescriptor sh{"<+>"}; +		sh.id = 1; +		A->setShortToken(sh); +	} +	// check the SyntaxDescriptor +	SyntaxDescriptor stx = A->getSyntaxDescriptor(); +	ASSERT_EQ(Tokens::Indent, stx.start); +	ASSERT_EQ(Tokens::Dedent, stx.end); +	ASSERT_EQ(1, stx.shortForm); +	ASSERT_EQ(A, stx.descriptor); +	ASSERT_TRUE(stx.isStruct()); +	ASSERT_FALSE(stx.isAnnotation()); +	ASSERT_FALSE(stx.isFieldDescriptor()); +} + +TEST(Descriptor, getPermittedTokens) +{ +	// build an ontology with some custom syntax. +	Manager mgr{1}; +	Logger logger; +	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)}; +	// Construct the ontology +	Rooted<Ontology> ontology{new Ontology(mgr, sys, "ontology")}; +	// add one StructuredClass with all tokens set. +	Rooted<StructuredClass> A{new StructuredClass( +	    mgr, "A", ontology, Cardinality::any(), {nullptr}, true, true)}; +	A->setStartToken(TokenDescriptor(Tokens::Indent)); +	A->setEndToken(TokenDescriptor(Tokens::Dedent)); +	{ +		TokenDescriptor sh{"<+>"}; +		sh.id = 1; +		A->setShortToken(sh); +	} +	// add a field with one token set. +	Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first; +	A_field->setEndToken(TokenDescriptor(Tokens::Newline)); +	A_field->addChild(A); +	// add an annotation with start and end set. +	Rooted<AnnotationClass> A_anno = ontology->createAnnotationClass("A"); +	{ +		TokenDescriptor start{"<"}; +		start.id = 7; +		A_anno->setStartToken(start); +	} +	{ +		TokenDescriptor end{">"}; +		end.id = 8; +		A_anno->setEndToken(end); +	} +	// add a trivial annotation, which should not be returned. +	Rooted<AnnotationClass> B_anno = ontology->createAnnotationClass("B"); +	ASSERT_TRUE(ontology->validate(logger)); + +	// check result. +	std::vector<SyntaxDescriptor> stxs = A->getPermittedTokens(); +	ASSERT_EQ(3, stxs.size()); +	// the field should be first, because A itself should not be collected +	// directly. +	ASSERT_EQ(A_field, stxs[0].descriptor); +	ASSERT_EQ(Tokens::Empty, stxs[0].start); +	ASSERT_EQ(Tokens::Newline, stxs[0].end); +	ASSERT_EQ(Tokens::Empty, stxs[0].shortForm); +	ASSERT_EQ(A, stxs[1].descriptor); +	ASSERT_EQ(Tokens::Indent, stxs[1].start); +	ASSERT_EQ(Tokens::Dedent, stxs[1].end); +	ASSERT_EQ(1, stxs[1].shortForm); +	ASSERT_EQ(A_anno, stxs[2].descriptor); +	ASSERT_EQ(7, stxs[2].start); +	ASSERT_EQ(8, stxs[2].end); +	ASSERT_EQ(Tokens::Empty, stxs[2].shortForm); +} +  TEST(StructuredClass, isSubclassOf)  {  	// create an inheritance hierarchy. @@ -619,6 +701,14 @@ TEST(Ontology, validate)  		base->setName("myClass");  		ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState());  		ASSERT_TRUE(ontology->validate(logger)); +		// add an invalid short token. +		base->setShortToken(TokenDescriptor("bla")); +		ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); +		ASSERT_FALSE(ontology->validate(logger)); +		// make it valid. +		base->setShortToken(TokenDescriptor("!bla!")); +		ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); +		ASSERT_TRUE(ontology->validate(logger));  		// Let's add a primitive field (without a primitive type at first)  		Rooted<FieldDescriptor> base_field =  		    base->createPrimitiveFieldDescriptor(nullptr, logger).first; @@ -629,6 +719,14 @@ TEST(Ontology, validate)  		base_field->setPrimitiveType(sys->getStringType());  		ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState());  		ASSERT_TRUE(ontology->validate(logger)); +		// add an invalid start token. +		base_field->setStartToken(TokenDescriptor("< + >")); +		ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); +		ASSERT_FALSE(ontology->validate(logger)); +		// make it valid. +		base_field->setStartToken(TokenDescriptor("<")); +		ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); +		ASSERT_TRUE(ontology->validate(logger));  		// add a subclass for our base class.  		Rooted<StructuredClass> sub{new StructuredClass(mgr, "sub", ontology)};  		// this should be valid in itself. @@ -686,4 +784,71 @@ TEST(Ontology, validate)  		ASSERT_TRUE(ontology->validate(logger));  	}  } -}
\ No newline at end of file + +TEST(Ontology, getAllTokenDescriptors) +{ +	// build an ontology with some custom syntax. +	Manager mgr{1}; +	Logger logger; +	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)}; +	// Construct the ontology +	Rooted<Ontology> ontology{new Ontology(mgr, sys, "ontology")}; +	// add one StructuredClass with all tokens set. +	Rooted<StructuredClass> A{new StructuredClass( +	    mgr, "A", ontology, Cardinality::any(), {nullptr}, true, true)}; +	A->setStartToken(TokenDescriptor(Tokens::Indent)); +	A->setEndToken(TokenDescriptor(Tokens::Dedent)); +	{ +		TokenDescriptor sh{"<+>"}; +		sh.id = 1; +		A->setShortToken(sh); +	} +	// add a field with one token set. +	Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first; +	A_field->setEndToken(TokenDescriptor(Tokens::Newline)); +	A_field->addChild(A); +	// add an annotation with start and end set. +	Rooted<AnnotationClass> A_anno = ontology->createAnnotationClass("A"); +	{ +		TokenDescriptor start{"<"}; +		start.id = 7; +		A_anno->setStartToken(start); +	} +	{ +		TokenDescriptor end{">"}; +		end.id = 8; +		A_anno->setEndToken(end); +	} +	// add a trivial annotation, which should not be returned. +	Rooted<AnnotationClass> B_anno = ontology->createAnnotationClass("B"); +	ASSERT_TRUE(ontology->validate(logger)); + +	// check the result. +	std::vector<TokenDescriptor *> tks = ontology->getAllTokenDescriptors(); + +	// A short token +	ASSERT_EQ("<+>", tks[0]->token); +	ASSERT_EQ(1, tks[0]->id); +	ASSERT_FALSE(tks[0]->special); +	// A start token +	ASSERT_EQ("", tks[1]->token); +	ASSERT_EQ(Tokens::Indent, tks[1]->id); +	ASSERT_TRUE(tks[1]->special); +	// A end token +	ASSERT_EQ("", tks[2]->token); +	ASSERT_EQ(Tokens::Dedent, tks[2]->id); +	ASSERT_TRUE(tks[2]->special); +	// A field end token +	ASSERT_EQ("", tks[3]->token); +	ASSERT_EQ(Tokens::Newline, tks[3]->id); +	ASSERT_TRUE(tks[3]->special); +	// A anno start token +	ASSERT_EQ("<", tks[4]->token); +	ASSERT_EQ(7, tks[4]->id); +	ASSERT_FALSE(tks[4]->special); +	// A anno end token +	ASSERT_EQ(">", tks[5]->token); +	ASSERT_EQ(8, tks[5]->id); +	ASSERT_FALSE(tks[5]->special); +} +} diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp index a93f14a..a831c32 100644 --- a/test/core/parser/stack/StackTest.cpp +++ b/test/core/parser/stack/StackTest.cpp @@ -21,9 +21,11 @@  #include <gtest/gtest.h>  #include <core/frontend/TerminalLogger.hpp> +#include <core/parser/stack/Callbacks.hpp>  #include <core/parser/stack/Handler.hpp>  #include <core/parser/stack/Stack.hpp>  #include <core/parser/stack/State.hpp> +#include <core/parser/utils/TokenizedData.hpp>  #include <core/StandaloneEnvironment.hpp> @@ -37,70 +39,88 @@ static StandaloneEnvironment env(logger);  namespace { +class Parser : public ParserCallbacks { +	TokenId registerToken(const std::string &token) override +	{ +		return Tokens::Empty; +	} + +	void unregisterToken(TokenId id) override +	{ +		// Do nothing here +	} +}; + +static Parser parser; +  struct Tracker { -	int startCount; +	int startCommandCount; +	int startAnnotationCount; +	int startTokenCount; +	int endTokenCount;  	int endCount;  	int fieldStartCount;  	int fieldEndCount; -	int annotationStartCount; -	int annotationEndCount;  	int dataCount; -	Variant::mapType startArgs; -	bool fieldStartIsDefault; -	size_t fieldStartIdx; -	Variant annotationStartClassName; -	Variant::mapType annotationStartArgs; -	Variant annotationEndClassName; -	Variant annotationEndElementName; -	Variant dataData; - -	bool startResult; -	bool fieldStartSetIsDefault; +	bool startCommandResult; +	bool startAnnotationResult; +	bool startTokenResult; +	Handler::EndTokenResult endTokenResult;  	bool fieldStartResult; -	bool annotationStartResult; -	bool annotationEndResult;  	bool dataResult; +	Variant::mapType startCommandArgs; +	Variant::mapType startAnnotationArgs; + +	bool fieldStartReturnValue; +	size_t fieldStartIdx; +	bool fieldStartIsDefault; +	bool fieldStartSetIsDefault; + +	Variant dataData; +  	Tracker() { reset(); }  	void reset()  	{ -		startCount = 0; +		startCommandCount = 0; +		startAnnotationCount = 0; +		startTokenCount = 0; +		endTokenCount = 0;  		endCount = 0;  		fieldStartCount = 0;  		fieldEndCount = 0; -		annotationStartCount = 0; -		annotationEndCount = 0;  		dataCount = 0; -		startArgs = Variant::mapType{}; -		fieldStartIsDefault = false; -		fieldStartIdx = 0; -		annotationStartClassName = Variant::fromString(std::string{}); -		annotationStartArgs = Variant::mapType{}; -		annotationEndClassName = Variant::fromString(std::string{}); -		annotationEndElementName = Variant::fromString(std::string{}); -		dataData = Variant::fromString(std::string{}); - -		startResult = true; -		fieldStartSetIsDefault = false; +		startCommandResult = true; +		startAnnotationResult = true; +		startTokenResult = true; +		endTokenResult = Handler::EndTokenResult::ENDED_THIS;  		fieldStartResult = true; -		annotationStartResult = true; -		annotationEndResult = true;  		dataResult = true; + +		startCommandArgs = Variant::mapType{}; +		startAnnotationArgs = Variant::mapType{}; + +		fieldStartIdx = 0; +		fieldStartIsDefault = false; +		fieldStartSetIsDefault = false; + +		dataData = Variant{};  	} -	void expect(int startCount, int endCount, int fieldStartCount, -	            int fieldEndCount, int annotationStartCount, -	            int annotationEndCount, int dataCount) +	void expect(int startCommandCount, int endCount, int fieldStartCount, +	            int fieldEndCount, int dataCount, int startAnnotationCount = 0, +	            int startTokenCount = 0, int endTokenCount = 0)  	{ -		EXPECT_EQ(startCount, this->startCount); +		EXPECT_EQ(startCommandCount, this->startCommandCount); +		EXPECT_EQ(startAnnotationCount, this->startAnnotationCount); +		EXPECT_EQ(startTokenCount, this->startTokenCount); +		EXPECT_EQ(endTokenCount, this->endTokenCount);  		EXPECT_EQ(endCount, this->endCount);  		EXPECT_EQ(fieldStartCount, this->fieldStartCount);  		EXPECT_EQ(fieldEndCount, this->fieldEndCount); -		EXPECT_EQ(annotationStartCount, this->annotationStartCount); -		EXPECT_EQ(annotationEndCount, this->annotationEndCount);  		EXPECT_EQ(dataCount, this->dataCount);  	}  }; @@ -112,55 +132,57 @@ private:  	TestHandler(const HandlerData &handlerData) : Handler(handlerData) {}  public: -	bool start(Variant::mapType &args) override +	bool startCommand(Variant::mapType &args) override  	{ -		tracker.startCount++; -		tracker.startArgs = args; -		if (!tracker.startResult) { +		tracker.startCommandArgs = args; +		tracker.startCommandCount++; +		if (!tracker.startCommandResult) {  			logger().error( -			    "The TestHandler was told not to allow a field start. So it " -			    "doesn't. The TestHandler always obeys its master."); +			    "TestHandler was told not to allow a command start. " +			    "TestHandler always obeys its master.");  		} -		return tracker.startResult; +		return tracker.startCommandResult; +	} + +	bool startAnnotation(Variant::mapType &args, +	                     AnnotationType annotationType) override +	{ +		tracker.startAnnotationArgs = args; +		tracker.startAnnotationCount++; +		return tracker.startAnnotationResult; +	} + +	bool startToken(Handle<Node> node) override +	{ +		tracker.startTokenCount++; +		return tracker.startTokenResult; +	} + +	EndTokenResult endToken(const Token &token, Handle<Node> node) override +	{ +		tracker.endTokenCount++; +		return tracker.endTokenResult;  	}  	void end() override { tracker.endCount++; }  	bool fieldStart(bool &isDefault, size_t fieldIdx) override  	{ -		tracker.fieldStartCount++;  		tracker.fieldStartIsDefault = isDefault;  		tracker.fieldStartIdx = fieldIdx;  		if (tracker.fieldStartSetIsDefault) {  			isDefault = true;  		} +		tracker.fieldStartCount++;  		return tracker.fieldStartResult;  	}  	void fieldEnd() override { tracker.fieldEndCount++; } -	bool annotationStart(const Variant &className, -	                     Variant::mapType &args) override -	{ -		tracker.annotationStartCount++; -		tracker.annotationStartClassName = className; -		tracker.annotationStartArgs = args; -		return tracker.annotationStartResult; -	} - -	bool annotationEnd(const Variant &className, -	                   const Variant &elementName) override -	{ -		tracker.annotationEndCount++; -		tracker.annotationEndClassName = className; -		tracker.annotationEndElementName = elementName; -		return tracker.annotationEndResult; -	} - -	bool data(Variant &data) override +	bool data() override  	{ +		tracker.dataData = readData();  		tracker.dataCount++; -		tracker.dataData = data;  		return tracker.dataResult;  	} @@ -204,75 +226,137 @@ TEST(Stack, basicTest)  	tracker.reset();  	logger.reset();  	{ -		Stack s{env.context, States::TestHandlers}; +		Stack s{parser, env.context, States::TestHandlers};  		EXPECT_EQ("", s.currentCommandName());  		EXPECT_EQ(&States::None, &s.currentState()); -		s.command("document", {}); +		s.commandStart("document", {});  		s.fieldStart(true);  		s.data("test1");  		EXPECT_EQ("document", s.currentCommandName());  		EXPECT_EQ(&States::Document, &s.currentState()); -		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc -		s.command("body", {}); +		s.commandStart("body", {});  		s.fieldStart(true);  		s.data("test2");  		EXPECT_EQ("body", s.currentCommandName());  		EXPECT_EQ(&States::Body, &s.currentState()); -		tracker.expect(2, 0, 2, 0, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(2, 0, 2, 0, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc -		s.command("inner", {}); +		s.commandStart("inner", {});  		s.fieldStart(true);  		EXPECT_EQ("inner", s.currentCommandName());  		EXPECT_EQ(&States::BodyChildren, &s.currentState());  		s.fieldEnd(); -		tracker.expect(3, 0, 3, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(3, 0, 3, 1, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc  		s.fieldEnd();  		EXPECT_EQ("body", s.currentCommandName());  		EXPECT_EQ(&States::Body, &s.currentState()); -		tracker.expect(3, 1, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(3, 1, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc -		s.command("body", {}); +		s.commandStart("body", {});  		EXPECT_EQ("body", s.currentCommandName());  		EXPECT_EQ(&States::Body, &s.currentState()); -		tracker.expect(4, 2, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(4, 2, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc  		s.fieldStart(true);  		s.data("test3");  		EXPECT_EQ("body", s.currentCommandName());  		EXPECT_EQ(&States::Body, &s.currentState());  		s.fieldEnd(); -		tracker.expect(4, 2, 4, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(4, 2, 4, 3, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc  		EXPECT_EQ("body", s.currentCommandName());  		EXPECT_EQ(&States::Body, &s.currentState());  		s.fieldEnd(); -		tracker.expect(4, 3, 4, 4, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(4, 3, 4, 4, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc  		EXPECT_EQ("document", s.currentCommandName());  		EXPECT_EQ(&States::Document, &s.currentState());  	} -	tracker.expect(4, 4, 4, 4, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(4, 4, 4, 4, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, basicTestRangeCommands) +{ +	tracker.reset(); +	logger.reset(); +	{ +		Stack s{parser, env.context, States::TestHandlers}; + +		EXPECT_EQ("", s.currentCommandName()); +		EXPECT_EQ(&States::None, &s.currentState()); + +		s.commandStart("document", {}, true); +		EXPECT_EQ("document", s.currentCommandName()); +		EXPECT_EQ(&States::Document, &s.currentState()); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc + +		s.data("test1"); +		tracker.expect(1, 0, 1, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc + +		s.commandStart("body", {}, true); +		tracker.expect(2, 0, 1, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc +		s.data("test2"); +		tracker.expect(2, 0, 2, 0, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc +		EXPECT_EQ("body", s.currentCommandName()); +		EXPECT_EQ(&States::Body, &s.currentState()); + +		s.commandStart("inner", {}, true); +		tracker.expect(3, 0, 2, 0, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc +		EXPECT_EQ("inner", s.currentCommandName()); +		EXPECT_EQ(&States::BodyChildren, &s.currentState()); +		s.rangeEnd(); +		tracker.expect(3, 1, 3, 1, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc +		EXPECT_EQ("body", s.currentCommandName()); +		EXPECT_EQ(&States::Body, &s.currentState()); +		s.rangeEnd(); +		tracker.expect(3, 2, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc + +		s.commandStart("body", {}, true); +		EXPECT_EQ("body", s.currentCommandName()); +		EXPECT_EQ(&States::Body, &s.currentState()); +		tracker.expect(4, 2, 3, 2, 2);  // scc, ec, fsc, fse, dc, sac, stc, etc +		s.fieldStart(true); +		tracker.expect(4, 2, 4, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc +		s.data("test3"); +		tracker.expect(4, 2, 4, 2, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc +		EXPECT_EQ("body", s.currentCommandName()); +		EXPECT_EQ(&States::Body, &s.currentState()); +		s.fieldEnd(); +		tracker.expect(4, 2, 4, 3, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc +		EXPECT_EQ("body", s.currentCommandName()); +		EXPECT_EQ(&States::Body, &s.currentState()); +		s.rangeEnd(); +		tracker.expect(4, 3, 4, 3, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc + +		EXPECT_EQ("document", s.currentCommandName()); +		EXPECT_EQ(&States::Document, &s.currentState()); +		s.rangeEnd(); +		tracker.expect(4, 4, 4, 4, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc +	} +	tracker.expect(4, 4, 4, 4, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_FALSE(logger.hasError());  }  TEST(Stack, errorInvalidCommands)  { -	Stack s{env.context, States::TestHandlers}; +	Stack s{parser, env.context, States::TestHandlers};  	tracker.reset(); -	EXPECT_THROW(s.command("body", {}), LoggableException); -	s.command("document", {}); +	EXPECT_THROW(s.commandStart("body", {}), LoggableException); +	s.commandStart("document", {});  	s.fieldStart(true); -	EXPECT_THROW(s.command("document", {}), LoggableException); -	s.command("empty", {}); +	EXPECT_THROW(s.commandStart("document", {}), LoggableException); +	s.commandStart("empty", {});  	s.fieldStart(true); -	EXPECT_THROW(s.command("body", {}), LoggableException); -	s.command("special", {}); +	EXPECT_THROW(s.commandStart("body", {}), LoggableException); +	s.commandStart("special", {});  	s.fieldStart(true);  	s.fieldEnd();  	s.fieldEnd(); @@ -288,23 +372,23 @@ TEST(Stack, errorInvalidCommands)  TEST(Stack, validation)  { -	Stack s{env.context, States::TestHandlers}; +	Stack s{parser, env.context, States::TestHandlers};  	tracker.reset();  	logger.reset(); -	s.command("arguments", {}); +	s.commandStart("arguments", {});  	EXPECT_TRUE(logger.hasError());  	s.fieldStart(true);  	s.fieldEnd();  	logger.reset(); -	s.command("arguments", {{"a", 5}}); +	s.commandStart("arguments", {{"a", 5}}, false);  	EXPECT_TRUE(logger.hasError());  	s.fieldStart(true);  	s.fieldEnd();  	logger.reset(); -	s.command("arguments", {{"a", 5}, {"b", "test"}}); +	s.commandStart("arguments", {{"a", 5}, {"b", "test"}}, false);  	EXPECT_FALSE(logger.hasError());  	s.fieldStart(true);  	s.fieldEnd(); @@ -315,33 +399,33 @@ TEST(Stack, invalidCommandName)  	tracker.reset();  	logger.reset(); -	Stack s{env.context, States::AnyHandlers}; -	s.command("a", {}); -	tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	Stack s{parser, env.context, States::AnyHandlers}; +	s.commandStart("a", {}); +	tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	s.fieldStart(true);  	s.fieldEnd(); -	tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(1, 0, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc -	s.command("a_", {}); -	tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	s.commandStart("a_", {}); +	tracker.expect(2, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	s.fieldStart(true);  	s.fieldEnd(); -	tracker.expect(2, 1, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(2, 1, 2, 2, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc -	s.command("a_:b", {}); -	tracker.expect(3, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	s.commandStart("a_:b", {}); +	tracker.expect(3, 2, 2, 2, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	s.fieldStart(true);  	s.fieldEnd(); -	tracker.expect(3, 2, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(3, 2, 3, 3, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc -	ASSERT_THROW(s.command("_a", {}), LoggableException); -	tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_THROW(s.commandStart("_a", {}), LoggableException); +	tracker.expect(3, 3, 3, 3, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc -	ASSERT_THROW(s.command("a:", {}), LoggableException); -	tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_THROW(s.commandStart("a:", {}), LoggableException); +	tracker.expect(3, 3, 3, 3, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc -	ASSERT_THROW(s.command("a:_b", {}), LoggableException); -	tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_THROW(s.commandStart("a:_b", {}), LoggableException); +	tracker.expect(3, 3, 3, 3, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  }  TEST(Stack, multipleFields) @@ -349,50 +433,50 @@ TEST(Stack, multipleFields)  	tracker.reset();  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; +		Stack s{parser, env.context, States::AnyHandlers}; -		s.command("a", {{"a", false}}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("a", {{"a", false}}, false); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		EXPECT_EQ("a", s.currentCommandName()); -		EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startArgs); +		EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startCommandArgs);  		s.fieldStart(false); -		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		EXPECT_FALSE(tracker.fieldStartIsDefault);  		EXPECT_EQ(0U, tracker.fieldStartIdx);  		s.data("test"); -		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc -		EXPECT_EQ("test", tracker.dataData); +		tracker.expect(1, 0, 1, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc +		EXPECT_EQ("test", tracker.dataData.asString());  		s.fieldEnd(); -		tracker.expect(1, 0, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 1, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc  		s.fieldStart(false); -		tracker.expect(1, 0, 2, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 2, 1, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc  		EXPECT_FALSE(tracker.fieldStartIsDefault);  		EXPECT_EQ(1U, tracker.fieldStartIdx);  		s.data("test2"); -		tracker.expect(1, 0, 2, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc -		EXPECT_EQ("test2", tracker.dataData); +		tracker.expect(1, 0, 2, 1, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc +		EXPECT_EQ("test2", tracker.dataData.asString());  		s.fieldEnd(); -		tracker.expect(1, 0, 2, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 2, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc  		s.fieldStart(true); -		tracker.expect(1, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc  		EXPECT_TRUE(tracker.fieldStartIsDefault);  		EXPECT_EQ(2U, tracker.fieldStartIdx);  		s.data("test3"); -		tracker.expect(1, 0, 3, 2, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc -		EXPECT_EQ("test3", tracker.dataData); +		tracker.expect(1, 0, 3, 2, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc +		EXPECT_EQ("test3", tracker.dataData.asString());  		s.fieldEnd(); -		tracker.expect(1, 0, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 3, 3, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc  	} -	tracker.expect(1, 1, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(1, 1, 3, 3, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_FALSE(logger.hasError());  } @@ -401,15 +485,15 @@ TEST(Stack, implicitDefaultFieldOnNewCommand)  	tracker.reset();  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; +		Stack s{parser, env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("a", {}); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc -		s.command("b", {}); -		tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("b", {}); +		tracker.expect(2, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	} -	tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(2, 2, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_FALSE(logger.hasError());  } @@ -418,21 +502,21 @@ TEST(Stack, implicitDefaultFieldOnNewCommandWithExplicitDefaultField)  	tracker.reset();  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; +		Stack s{parser, env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("a", {}); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		ASSERT_EQ("a", s.currentCommandName()); -		s.command("b", {}); -		tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("b", {}); +		tracker.expect(2, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		ASSERT_EQ("b", s.currentCommandName());  		s.fieldStart(true);  		s.fieldEnd(); -		tracker.expect(2, 0, 2, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(2, 0, 2, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		ASSERT_EQ("b", s.currentCommandName());  	} -	tracker.expect(2, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(2, 2, 2, 2, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_FALSE(logger.hasError());  } @@ -441,18 +525,18 @@ TEST(Stack, noImplicitDefaultFieldOnIncompatibleCommand)  	tracker.reset();  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; +		Stack s{parser, env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("a", {}); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		ASSERT_EQ("a", s.currentCommandName());  		tracker.fieldStartResult = false; -		s.command("b", {}); -		tracker.expect(2, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("b", {}); +		tracker.expect(2, 1, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		ASSERT_EQ("b", s.currentCommandName());  	} -	tracker.expect(2, 2, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(2, 2, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_FALSE(logger.hasError());  } @@ -461,23 +545,23 @@ TEST(Stack, noImplicitDefaultFieldIfDefaultFieldGiven)  	tracker.reset();  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; +		Stack s{parser, env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("a", {}); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		ASSERT_EQ("a", s.currentCommandName());  		s.fieldStart(true); -		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		ASSERT_EQ("a", s.currentCommandName());  		s.fieldEnd(); -		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		ASSERT_EQ("a", s.currentCommandName()); -		s.command("b", {}); -		tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("b", {}); +		tracker.expect(2, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		ASSERT_EQ("b", s.currentCommandName());  	} -	tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(2, 2, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_FALSE(logger.hasError());  } @@ -486,18 +570,18 @@ TEST(Stack, noEndIfStartFails)  	tracker.reset();  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; +		Stack s{parser, env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("a", {}); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		ASSERT_EQ("a", s.currentCommandName()); -		tracker.startResult = false; -		s.command("b", {}); -		tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc -		ASSERT_EQ("b", s.currentCommandName()); +		tracker.startCommandResult = false; +		s.commandStart("b", {}); +		tracker.expect(3, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc +		EXPECT_EQ(&States::None, &s.currentState());  	} -	tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(3, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_TRUE(logger.hasError());  } @@ -506,15 +590,15 @@ TEST(Stack, implicitDefaultFieldOnData)  	tracker.reset();  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; +		Stack s{parser, env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.commandStart("a", {}); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		s.data("test"); -		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc  	} -	tracker.expect(1, 1, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(1, 1, 1, 1, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_FALSE(logger.hasError());  } @@ -524,11 +608,11 @@ TEST(Stack, autoFieldEnd)  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		Stack s{parser, env.context, States::AnyHandlers}; +		s.commandStart("a", {}); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	} -	tracker.expect(1, 1, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(1, 1, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_FALSE(logger.hasError());  } @@ -538,17 +622,17 @@ TEST(Stack, autoImplicitFieldEnd)  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; -		s.command("a", {}); -		s.command("b", {}); -		s.command("c", {}); -		s.command("d", {}); -		s.command("e", {}); +		Stack s{parser, env.context, States::AnyHandlers}; +		s.commandStart("a", {}); +		s.commandStart("b", {}); +		s.commandStart("c", {}); +		s.commandStart("d", {}); +		s.commandStart("e", {});  		s.fieldStart(true);  		s.fieldEnd(); -		tracker.expect(5, 0, 5, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(5, 0, 5, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	} -	tracker.expect(5, 5, 5, 5, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(5, 5, 5, 5, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_FALSE(logger.hasError());  } @@ -558,14 +642,14 @@ TEST(Stack, invalidDefaultField)  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; -		s.command("a", {}); +		Stack s{parser, env.context, States::AnyHandlers}; +		s.commandStart("a", {});  		tracker.fieldStartResult = false;  		s.fieldStart(true);  		s.fieldEnd(); -		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	} -	tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(1, 1, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	ASSERT_FALSE(logger.hasError());  } @@ -575,17 +659,17 @@ TEST(Stack, errorInvalidDefaultFieldData)  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; -		s.command("a", {}); +		Stack s{parser, env.context, States::AnyHandlers}; +		s.commandStart("a", {});  		tracker.fieldStartResult = false;  		s.fieldStart(true);  		ASSERT_FALSE(logger.hasError());  		s.data("test");  		ASSERT_TRUE(logger.hasError());  		s.fieldEnd(); -		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	} -	tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(1, 1, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  }  TEST(Stack, errorInvalidFieldData) @@ -594,17 +678,17 @@ TEST(Stack, errorInvalidFieldData)  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; -		s.command("a", {}); +		Stack s{parser, env.context, States::AnyHandlers}; +		s.commandStart("a", {});  		tracker.fieldStartResult = false;  		ASSERT_FALSE(logger.hasError());  		s.fieldStart(false);  		ASSERT_TRUE(logger.hasError());  		s.data("test");  		s.fieldEnd(); -		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	} -	tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(1, 1, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  }  TEST(Stack, errorFieldStartNoCommand) @@ -612,10 +696,10 @@ TEST(Stack, errorFieldStartNoCommand)  	tracker.reset();  	logger.reset(); -	Stack s{env.context, States::AnyHandlers}; +	Stack s{parser, env.context, States::AnyHandlers};  	ASSERT_THROW(s.fieldStart(false), LoggableException);  	ASSERT_THROW(s.fieldStart(true), LoggableException); -	tracker.expect(0, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(0, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  }  TEST(Stack, errorMultipleFieldStarts) @@ -624,20 +708,20 @@ TEST(Stack, errorMultipleFieldStarts)  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		Stack s{parser, env.context, States::AnyHandlers}; +		s.commandStart("a", {}); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		s.fieldStart(false);  		ASSERT_FALSE(logger.hasError());  		s.fieldStart(false);  		ASSERT_TRUE(logger.hasError()); -		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		s.fieldEnd(); -		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	} -	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(1, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  }  TEST(Stack, errorMultipleFieldEnds) @@ -646,102 +730,122 @@ TEST(Stack, errorMultipleFieldEnds)  	logger.reset();  	{ -		Stack s{env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		Stack s{parser, env.context, States::AnyHandlers}; +		s.commandStart("a", {}); +		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		s.fieldStart(false);  		s.fieldEnd();  		ASSERT_FALSE(logger.hasError()); -		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 0, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  		s.fieldEnd();  		ASSERT_TRUE(logger.hasError()); -		tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		tracker.expect(1, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  	} -	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	tracker.expect(1, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc  }  TEST(Stack, errorOpenField)  { -	tracker.reset(); -	logger.reset(); - -	{ -		Stack s{env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc - -		s.fieldStart(false); -		ASSERT_FALSE(logger.hasError()); -	} -	ASSERT_TRUE(logger.hasError()); -	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +    tracker.reset(); +    logger.reset(); + +    { +        Stack s{parser, env.context, States::AnyHandlers}; +        s.commandStart("a", {}); +        tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc + +        s.fieldStart(false); +        ASSERT_FALSE(logger.hasError()); +    } +    ASSERT_TRUE(logger.hasError()); +    tracker.expect(1, 1, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc  }  TEST(Stack, fieldEndWhenImplicitDefaultFieldOpen)  { -	tracker.reset(); -	logger.reset(); - -	{ -		Stack s{env.context, States::AnyHandlers}; -		s.command("a", {}); -		s.fieldStart(true); -		s.command("b", {}); -		s.data("test"); -		s.fieldEnd(); -		tracker.expect(2, 1, 2, 2, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc -	} -	tracker.expect(2, 2, 2, 2, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc -	ASSERT_FALSE(logger.hasError()); +    tracker.reset(); +    logger.reset(); + +    { +        Stack s{parser, env.context, States::AnyHandlers}; +        s.commandStart("a", {}); +        s.fieldStart(true); +        s.commandStart("b", {}); +        s.data("test"); +        s.fieldEnd(); +        tracker.expect(2, 1, 2, 2, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc +    } +    tracker.expect(2, 2, 2, 2, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc +    ASSERT_FALSE(logger.hasError());  }  TEST(Stack, fieldAfterDefaultField)  { -	tracker.reset(); -	logger.reset(); - -	{ -		Stack s{env.context, States::AnyHandlers}; -		s.command("a", {}); -		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc -		s.fieldStart(true); -		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc - -		s.command("b", {}); -		tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc - -		s.fieldStart(false); -		tracker.expect(2, 0, 2, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc -		s.data("f1"); -		tracker.expect(2, 0, 2, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc -		s.fieldEnd(); -		tracker.expect(2, 0, 2, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc -		tracker.fieldStartSetIsDefault = true; - -		s.fieldStart(false); -		tracker.fieldStartSetIsDefault = false; -		tracker.expect(2, 0, 3, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc -		s.data("f2"); -		tracker.expect(2, 0, 3, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc -		s.fieldEnd(); -		tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc +    tracker.reset(); +    logger.reset(); + +    { +        Stack s{parser, env.context, States::AnyHandlers}; +        s.commandStart("a", {}); +        tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc +        s.fieldStart(true); +        tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc + +        s.commandStart("b", {}); +        tracker.expect(2, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc + +        s.fieldStart(false); +        tracker.expect(2, 0, 2, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc +        s.data("f1"); +        tracker.expect(2, 0, 2, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc +        s.fieldEnd(); +        tracker.expect(2, 0, 2, 1, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc +        tracker.fieldStartSetIsDefault = true; + +        s.fieldStart(false); +        tracker.fieldStartSetIsDefault = false; +        tracker.expect(2, 0, 3, 1, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc +        s.data("f2"); +        tracker.expect(2, 0, 3, 1, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc +        s.fieldEnd(); +        tracker.expect(2, 0, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc + +        ASSERT_FALSE(logger.hasError()); +        s.fieldStart(false); +        ASSERT_TRUE(logger.hasError()); +        logger.reset(); +        tracker.expect(2, 0, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc +        s.data("f3"); +        tracker.expect(2, 0, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc +        s.fieldEnd(); +        tracker.expect(2, 0, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc + +        s.fieldEnd(); +        tracker.expect(2, 1, 3, 3, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc +    } +    tracker.expect(2, 2, 3, 3, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc +    ASSERT_FALSE(logger.hasError()); +} -		ASSERT_FALSE(logger.hasError()); -		s.fieldStart(false); -		ASSERT_TRUE(logger.hasError()); -		logger.reset(); -		tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc -		s.data("f3"); -		tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc -		s.fieldEnd(); -		tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc +TEST(Stack, rangeCommandUnranged) +{ +    tracker.reset(); +    logger.reset(); + +    { +        Stack s{parser, env.context, States::AnyHandlers}; +        tracker.expect(0, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc +        s.commandStart("a", {}, true); +        tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc +        s.commandStart("b", {}); +        tracker.expect(2, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc +        s.rangeEnd(); +        tracker.expect(2, 2, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc +    } +    tracker.expect(2, 2, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc +    ASSERT_FALSE(logger.hasError()); +} -		s.fieldEnd(); -		tracker.expect(2, 1, 3, 3, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc -	} -	tracker.expect(2, 2, 3, 3, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc -	ASSERT_FALSE(logger.hasError());  }  } -}
\ No newline at end of file diff --git a/test/core/parser/stack/TokenRegistryTest.cpp b/test/core/parser/stack/TokenRegistryTest.cpp new file mode 100644 index 0000000..20d6cd0 --- /dev/null +++ b/test/core/parser/stack/TokenRegistryTest.cpp @@ -0,0 +1,82 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <gtest/gtest.h> + +#include <core/parser/stack/Callbacks.hpp> +#include <core/parser/stack/TokenRegistry.hpp> + +namespace ousia { +namespace parser_stack { + +class ParserCallbacksProxy : public ParserCallbacks { +public: +	size_t registerTokenCount = 0; +	size_t unregisterTokenCount = 0; + +	TokenId registerToken(const std::string &token) override +	{ +		registerTokenCount++; +		return registerTokenCount; +	} + +	void unregisterToken(TokenId id) override { unregisterTokenCount++; } +}; + +TEST(TokenRegistry, simple) +{ +	ParserCallbacksProxy parser; +	{ +		TokenRegistry registry(parser); + +		ASSERT_EQ(0U, parser.registerTokenCount); +		ASSERT_EQ(0U, parser.unregisterTokenCount); + +		ASSERT_EQ(1U, registry.registerToken("test")); +		ASSERT_EQ(1U, registry.registerToken("test")); +		ASSERT_EQ(2U, registry.registerToken("test2")); +		ASSERT_EQ(2U, registry.registerToken("test2")); +		ASSERT_EQ(3U, registry.registerToken("test3")); +		ASSERT_EQ(3U, parser.registerTokenCount); +		ASSERT_EQ(0U, parser.unregisterTokenCount); + +		registry.unregisterToken(1); +		ASSERT_EQ(3U, parser.registerTokenCount); +		ASSERT_EQ(0U, parser.unregisterTokenCount); + +		registry.unregisterToken(1); +		ASSERT_EQ(3U, parser.registerTokenCount); +		ASSERT_EQ(1U, parser.unregisterTokenCount); + +		registry.unregisterToken(1); +		ASSERT_EQ(3U, parser.registerTokenCount); +		ASSERT_EQ(1U, parser.unregisterTokenCount); + +		registry.unregisterToken(2); +		ASSERT_EQ(3U, parser.registerTokenCount); +		ASSERT_EQ(1U, parser.unregisterTokenCount); + +		registry.unregisterToken(2); +		ASSERT_EQ(3U, parser.registerTokenCount); +		ASSERT_EQ(2U, parser.unregisterTokenCount); +	} +	ASSERT_EQ(3U, parser.unregisterTokenCount); +} +} +} + diff --git a/test/core/parser/utils/SourceOffsetVectorTest.cpp b/test/core/parser/utils/SourceOffsetVectorTest.cpp index 25a4163..26254f9 100644 --- a/test/core/parser/utils/SourceOffsetVectorTest.cpp +++ b/test/core/parser/utils/SourceOffsetVectorTest.cpp @@ -51,7 +51,7 @@ TEST(SourceOffsetVector, gaps)  	for (size_t i = 0; i < 999; i++) {  		auto elem = vec.loadOffset(i);  		EXPECT_EQ(i * 3 + 5, elem.first); -		EXPECT_EQ((i + 1) * 3 + 5, elem.second); +		EXPECT_EQ(i * 3 + 7, elem.second);  	}  	auto elem = vec.loadOffset(999);  	EXPECT_EQ(999U * 3 + 5, elem.first); diff --git a/test/core/parser/utils/TokenizedDataTest.cpp b/test/core/parser/utils/TokenizedDataTest.cpp index 231bad9..8488459 100644 --- a/test/core/parser/utils/TokenizedDataTest.cpp +++ b/test/core/parser/utils/TokenizedDataTest.cpp @@ -20,6 +20,8 @@  #include <core/parser/utils/TokenizedData.hpp> +#include "TokenizedDataTestUtils.hpp" +  namespace ousia {  TEST(TokenizedData, dataWhitespacePreserve) @@ -29,15 +31,10 @@ TEST(TokenizedData, dataWhitespacePreserve)  	//                          0123456789012345  	//                          0         1 -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ(" test1   test2  ", token.content); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(16U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); +	TokenizedDataReader reader = data.reader(); +	assertText(reader, " test1   test2  ", TokenSet{}, WhitespaceMode::PRESERVE, +	           0, 16); +	assertEnd(reader);  }  TEST(TokenizedData, dataWhitespaceTrim) @@ -47,15 +44,10 @@ TEST(TokenizedData, dataWhitespaceTrim)  	//                          0123456789012345  	//                          0         1 -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("test1   test2", token.content); -	EXPECT_EQ(1U, token.getLocation().getStart()); -	EXPECT_EQ(14U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); +	TokenizedDataReader reader = data.reader(); +	assertText(reader, "test1   test2", TokenSet{}, WhitespaceMode::TRIM, 1, +	           14); +	assertEnd(reader);  }  TEST(TokenizedData, dataWhitespaceCollapse) @@ -65,15 +57,10 @@ TEST(TokenizedData, dataWhitespaceCollapse)  	//                          0123456789012345  	//                          0         1 -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("test1 test2", token.content); -	EXPECT_EQ(1U, token.getLocation().getStart()); -	EXPECT_EQ(14U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); +	TokenizedDataReader reader = data.reader(); +	assertText(reader, "test1 test2", TokenSet{}, WhitespaceMode::COLLAPSE, 1, +	           14); +	assertEnd(reader);  }  TEST(TokenizedData, singleToken) @@ -82,17 +69,9 @@ TEST(TokenizedData, singleToken)  	ASSERT_EQ(2U, data.append("$$"));  	data.mark(5, 0, 2); -	data.enableToken(5); - -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); +	assertEnd(reader);  }  TEST(TokenizedData, singleDisabledToken) @@ -101,15 +80,9 @@ TEST(TokenizedData, singleDisabledToken)  	ASSERT_EQ(2U, data.append("$$"));  	data.mark(5, 0, 2); -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); +	TokenizedDataReader reader = data.reader(); +	assertText(reader, "$$", TokenSet{}, WhitespaceMode::COLLAPSE, 0, 2); +	assertEnd(reader);  }  TEST(TokenizedData, dualToken) @@ -120,18 +93,10 @@ TEST(TokenizedData, dualToken)  	data.mark(5, 0, 2);  	data.mark(6, 1, 1); -	data.enableToken(5); -	data.enableToken(6); - -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, 5, "$$", TokenSet{5, 6}, WhitespaceMode::COLLAPSE, 0, +	            2); +	assertEnd(reader);  }  TEST(TokenizedData, dualTokenShorterEnabled) @@ -142,385 +107,281 @@ TEST(TokenizedData, dualTokenShorterEnabled)  	data.mark(5, 0, 2);  	data.mark(6, 1, 1); -	data.enableToken(6); - -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(6U, token.id); -	EXPECT_EQ("$", token.content); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(1U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(6U, token.id); -	EXPECT_EQ("$", token.content); -	EXPECT_EQ(1U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, 6, "$", TokenSet{6}, WhitespaceMode::COLLAPSE, 0, 1); +	assertToken(reader, 6, "$", TokenSet{6}, WhitespaceMode::COLLAPSE, 1, 2); +	assertEnd(reader);  }  TEST(TokenizedData, dualTokenLongerEnabled)  {  	TokenizedData data;  	ASSERT_EQ(2U, data.append("$$")); +	data.mark(6, 0, 1);  	data.mark(5, 0, 2); +	data.mark(6, 1, 1); -	data.enableToken(5); - -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); +	assertEnd(reader);  }  TEST(TokenizedData, tokensAndDataPreserveWhitespace)  {  	TokenizedData data; -	ASSERT_EQ(10U, data.append("$$ test $$")); -	//                          0123456789 +	ASSERT_EQ(18U, data.append("$$ test    text $$")); +	//                          012345678901234567  	data.mark(5, 0, 2);  	data.mark(5, 2); -	data.enableToken(5); - -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ(" test ", token.content); -	EXPECT_EQ(2U, token.getLocation().getStart()); -	EXPECT_EQ(8U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(8U, token.getLocation().getStart()); -	EXPECT_EQ(10U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 0, 2); +	assertText(reader, " test    text ", TokenSet{5}, WhitespaceMode::PRESERVE, +	           2, 16); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 16, 18); +	assertEnd(reader);  }  TEST(TokenizedData, tokensAndDataTrimWhitespace)  {  	TokenizedData data; -	ASSERT_EQ(10U, data.append("$$ test $$")); -	//                          0123456789 +	ASSERT_EQ(18U, data.append("$$ test    text $$")); +	//                          012345678901234567  	data.mark(5, 0, 2);  	data.mark(5, 2); -	data.enableToken(5); - -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("test", token.content); -	EXPECT_EQ(3U, token.getLocation().getStart()); -	EXPECT_EQ(7U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(8U, token.getLocation().getStart()); -	EXPECT_EQ(10U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 0, 2); +	assertText(reader, "test    text", TokenSet{5}, WhitespaceMode::TRIM, 3, +	           15); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 16, 18); +	assertEnd(reader);  }  TEST(TokenizedData, tokensAndDataCollapseWhitespace)  {  	TokenizedData data; -	ASSERT_EQ(10U, data.append("$$ test $$")); -	//                          0123456789 +	ASSERT_EQ(18U, data.append("$$ test    text $$")); +	//                          012345678901234567  	data.mark(5, 0, 2);  	data.mark(5, 2); -	data.enableToken(5); - -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("test", token.content); -	EXPECT_EQ(3U, token.getLocation().getStart()); -	EXPECT_EQ(7U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(8U, token.getLocation().getStart()); -	EXPECT_EQ(10U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); +	assertText(reader, "test text", TokenSet{5}, WhitespaceMode::COLLAPSE, 3, +	           15); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 16, 18); +	assertEnd(reader);  }  TEST(TokenizedData, tokensAndWhitespacePreserveWhitespace)  {  	TokenizedData data; -	ASSERT_EQ(10U, data.append("$$      $$")); -	//                          0123456789 +	ASSERT_EQ(8U, data.append("$$    $$")); +	//                         01234567  	data.mark(5, 0, 2);  	data.mark(5, 2); -	data.enableToken(5); - -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("      ", token.content); -	EXPECT_EQ(2U, token.getLocation().getStart()); -	EXPECT_EQ(8U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(8U, token.getLocation().getStart()); -	EXPECT_EQ(10U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 0, 2); +	assertText(reader, "    ", TokenSet{5}, WhitespaceMode::PRESERVE, 2, 6); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 6, 8); +	assertEnd(reader);  }  TEST(TokenizedData, tokensAndWhitespaceTrimWhitespace)  {  	TokenizedData data; -	ASSERT_EQ(10U, data.append("$$      $$")); -	//                          0123456789 +	ASSERT_EQ(8U, data.append("$$    $$")); +	//                         01234567  	data.mark(5, 0, 2);  	data.mark(5, 2); -	data.enableToken(5); - -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(8U, token.getLocation().getStart()); -	EXPECT_EQ(10U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 0, 2); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 6, 8); +	assertEnd(reader);  }  TEST(TokenizedData, tokensAndWhitespaceCollapseWhitespace)  {  	TokenizedData data; -	ASSERT_EQ(10U, data.append("$$      $$")); -	//                          0123456789 +	ASSERT_EQ(8U, data.append("$$    $$")); +	//                         01234567  	data.mark(5, 0, 2);  	data.mark(5, 2); -	data.enableToken(5); - -	Token token; -	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); +	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 6, 8); +	assertEnd(reader); +} -	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(8U, token.getLocation().getStart()); -	EXPECT_EQ(10U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); +TEST(TokenizedData, appendChars) +{ +	TokenizedData data; +	ASSERT_EQ(1U, data.append('t', 5, 7)); +	ASSERT_EQ(2U, data.append('e', 7, 8)); +	ASSERT_EQ(3U, data.append('s', 8, 10)); +	ASSERT_EQ(4U, data.append('t', 10, 12)); -	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); +	TokenizedDataReader reader = data.reader(); +	assertText(reader, "test", TokenSet{5}, WhitespaceMode::COLLAPSE, 5, 12); +	assertEnd(reader);  } -TEST(TokenizedData, textPreserveWhitespace) +TEST(TokenizedData, protectedWhitespace)  {  	TokenizedData data; -	ASSERT_EQ(6U, data.append("  $$  ")); -	//                         012345 -	data.mark(5, 2, 2); - -	data.enableToken(5); - -	Token token; -	ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("  ", token.content); -	EXPECT_EQ(0U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(2U, token.getLocation().getStart()); -	EXPECT_EQ(4U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("  ", token.content); -	EXPECT_EQ(4U, token.getLocation().getStart()); -	EXPECT_EQ(6U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.text(token, WhitespaceMode::PRESERVE)); -	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); +	ASSERT_EQ(4U, data.append("test", 10)); +	ASSERT_EQ(11U, data.append("   test", 14, true)); + +	TokenizedDataReader reader = data.reader(); +	assertText(reader, "test   test", TokenSet{5}, WhitespaceMode::COLLAPSE, 10, +	           21); +	assertEnd(reader);  } -TEST(TokenizedData, textTrimWhitespace) +TEST(TokenizedData, specialNewlineToken)  {  	TokenizedData data; -	ASSERT_EQ(6U, data.append("  $$  ")); -	//                         012345 -	data.mark(5, 2, 2); - -	data.enableToken(5); +	data.append("a\nb\n   \nc\n"); +	//           0 12 3456 78 9 + +	const TokenSet tokens{Tokens::Newline}; + +	TokenizedDataReader reader = data.reader(); +	assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1); +	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, +	            1, 2); +	assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 2, 3); +	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, +	            3, 4); +	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, +	            7, 8); +	assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 8, 9); +	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, +	            9, 10); +	assertEnd(reader); +} -	Token token; -	ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM)); +TEST(TokenizedData, specialParagraphToken) +{ +	TokenizedData data; +	data.append("a\nb\n   \nc\n"); +	//           0 12 3456 78 9 -	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(2U, token.getLocation().getStart()); -	EXPECT_EQ(4U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); +	const TokenSet tokens{Tokens::Paragraph}; -	ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM)); -	ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); +	TokenizedDataReader reader = data.reader(); +	assertText(reader, "a b", tokens, WhitespaceMode::COLLAPSE, 0, 3); +	assertToken(reader, Tokens::Paragraph, "\n   \n", tokens, +	            WhitespaceMode::COLLAPSE, 3, 8); +	assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 8, 9); +	assertEnd(reader);  } -TEST(TokenizedData, textCollapseWhitespace) +TEST(TokenizedData, specialSectionToken)  {  	TokenizedData data; -	ASSERT_EQ(6U, data.append("  $$  ")); -	//                         012345 -	data.mark(5, 2, 2); +	data.append("a\nb\n   \n  \t \n"); +	//           0 12 3456 789 01 2 +	//           0             1 -	data.enableToken(5); +	const TokenSet tokens{Tokens::Section}; -	Token token; -	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); - -	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(5U, token.id); -	EXPECT_EQ("$$", token.content); -	EXPECT_EQ(2U, token.getLocation().getStart()); -	EXPECT_EQ(4U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); -	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); +	TokenizedDataReader reader = data.reader(); +	assertText(reader, "a b", tokens, WhitespaceMode::COLLAPSE, 0, 3); +	assertToken(reader, Tokens::Section, "\n   \n  \t \n", tokens, +	            WhitespaceMode::COLLAPSE, 3, 13); +	assertEnd(reader);  } -TEST(TokenizedData, appendChars) +TEST(TokenizedData, specialTokenPrecedence)  {  	TokenizedData data; -	ASSERT_EQ(1U, data.append('t', 5, 7)); -	ASSERT_EQ(2U, data.append('e', 7, 8)); -	ASSERT_EQ(3U, data.append('s', 8, 10)); -	ASSERT_EQ(4U, data.append('t', 10, 12)); +	data.append("a\nb\n\nc\n\n\nd"); +	//           0 12 3 45 6 7 89 + +	const TokenSet tokens{Tokens::Newline, Tokens::Paragraph, Tokens::Section}; + +	TokenizedDataReader reader = data.reader(); +	assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1); +	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, +	            1, 2); +	assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 2, 3); +	assertToken(reader, Tokens::Paragraph, "\n\n", tokens, +	            WhitespaceMode::COLLAPSE, 3, 5); +	assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 5, 6); +	assertToken(reader, Tokens::Section, "\n\n\n", tokens, +	            WhitespaceMode::COLLAPSE, 6, 9); +	assertText(reader, "d", tokens, WhitespaceMode::COLLAPSE, 9, 10); +	assertEnd(reader); +} -	Token token; -	ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("test", token.content); -	EXPECT_EQ(5U, token.getLocation().getStart()); -	EXPECT_EQ(12U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); +TEST(TokenizedData, specialTokenPrecedence2) +{ +	TokenizedData data; +	data.append("\nb\n\nc\n\n\n"); +	//            0 12 3 45 6 7 + +	const TokenSet tokens{Tokens::Newline, Tokens::Paragraph, Tokens::Section}; + +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, +	            0, 1); +	assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 1, 2); +	assertToken(reader, Tokens::Paragraph, "\n\n", tokens, +	            WhitespaceMode::COLLAPSE, 2, 4); +	assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 4, 5); +	assertToken(reader, Tokens::Section, "\n\n\n", tokens, +	            WhitespaceMode::COLLAPSE, 5, 8); +	assertEnd(reader); +} -	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); -	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); +TEST(TokenizedData, specialTokenIndent) +{ +	TokenizedData data; +	data.append("    test\n\ttest2\n        test3  \ttest4\ntest5"); +	//           01234567 8 901234 5678901234567890 123456 789012 +	//           0           1          2         3           4 +	const TokenSet tokens{Tokens::Indent, Tokens::Dedent}; + +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, +	            4, 4); +	assertText(reader, "test", tokens, WhitespaceMode::COLLAPSE, 4, 8); +	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, +	            10, 10); +	assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37); +	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE, +	            38, 38); +	assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43); +	assertEnd(reader);  } -TEST(TokenizedData, copy) +TEST(TokenizedData, specialTokenIndentOverlap)  {  	TokenizedData data; -	ASSERT_EQ(7U, data.append(" a $ b ")); -	//                         0123456 -	data.mark(6, 3, 1); -	data.enableToken(6); - -	Token token; -	ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("a", token.content); -	EXPECT_EQ(1U, token.getLocation().getStart()); -	EXPECT_EQ(2U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); - -	TokenizedData dataCopy = data; - -	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(6U, token.id); -	EXPECT_EQ("$", token.content); -	EXPECT_EQ(3U, token.getLocation().getStart()); -	EXPECT_EQ(4U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(dataCopy.next(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(6U, token.id); -	EXPECT_EQ("$", token.content); -	EXPECT_EQ(3U, token.getLocation().getStart()); -	EXPECT_EQ(4U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - -	ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ(" b ", token.content); -	EXPECT_EQ(4U, token.getLocation().getStart()); -	EXPECT_EQ(7U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); -	ASSERT_FALSE(data.next(token)); - -	ASSERT_TRUE(dataCopy.text(token, WhitespaceMode::COLLAPSE)); -	EXPECT_EQ(Tokens::Data, token.id); -	EXPECT_EQ("b", token.content); -	EXPECT_EQ(5U, token.getLocation().getStart()); -	EXPECT_EQ(6U, token.getLocation().getEnd()); -	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); -	ASSERT_FALSE(dataCopy.next(token)); +	data.append("    test\n\ttest2\n        test3  \ttest4\ntest5"); +	//           01234567 8 901234 5678901234567890 123456 789012 +	//           0           1          2         3           4 +	const TokenSet tokens{Tokens::Indent, Tokens::Dedent, 5}; + +	data.mark(5, 4, 4); + +	TokenizedDataReader reader = data.reader(); +	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, +	            4, 4); +	assertToken(reader, 5, "test", tokens, WhitespaceMode::COLLAPSE, 4, 8); +	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, +	            10, 10); +	assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37); +	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE, +	            38, 38); +	assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43); +	assertEnd(reader);  } +  } diff --git a/test/core/parser/utils/TokenizedDataTestUtils.hpp b/test/core/parser/utils/TokenizedDataTestUtils.hpp new file mode 100644 index 0000000..c384f9d --- /dev/null +++ b/test/core/parser/utils/TokenizedDataTestUtils.hpp @@ -0,0 +1,64 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_ +#define _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_ + +namespace ousia { + +static void assertToken(TokenizedDataReader &reader, TokenId id, +                 const std::string &text, const TokenSet &tokens = TokenSet{}, +                 WhitespaceMode mode = WhitespaceMode::TRIM, +                 SourceOffset start = InvalidSourceOffset, +                 SourceOffset end = InvalidSourceOffset, +                 SourceId sourceId = InvalidSourceId) +{ +	Token token; +	ASSERT_TRUE(reader.read(token, tokens, mode)); +	EXPECT_EQ(id, token.id); +	EXPECT_EQ(text, token.content); +	if (start != InvalidSourceOffset) { +		EXPECT_EQ(start, token.getLocation().getStart()); +	} +	if (end != InvalidSourceOffset) { +		EXPECT_EQ(end, token.getLocation().getEnd()); +	} +	EXPECT_EQ(sourceId, token.getLocation().getSourceId()); +} + +static void assertText(TokenizedDataReader &reader, const std::string &text, +                const TokenSet &tokens = TokenSet{}, +                WhitespaceMode mode = WhitespaceMode::TRIM, +                SourceOffset start = InvalidSourceOffset, +                SourceOffset end = InvalidSourceOffset, +                SourceId id = InvalidSourceId) +{ +	assertToken(reader, Tokens::Data, text, tokens, mode, start, end, id); +} + +static void assertEnd(TokenizedDataReader &reader) +{ +	Token token; +	ASSERT_TRUE(reader.atEnd()); +	ASSERT_FALSE(reader.read(token)); +} + +} + +#endif /* _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_ */ + diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp index 3809a12..45fc77a 100644 --- a/test/core/parser/utils/TokenizerTest.cpp +++ b/test/core/parser/utils/TokenizerTest.cpp @@ -20,9 +20,66 @@  #include <core/common/CharReader.hpp>  #include <core/parser/utils/Tokenizer.hpp> +#include <core/parser/utils/TokenizedData.hpp> + +#include "TokenizedDataTestUtils.hpp"  namespace ousia { +static void assertPrimaryToken(CharReader &reader, Tokenizer &tokenizer, +                               TokenId id, const std::string &text, +                               SourceOffset start = InvalidSourceOffset, +                               SourceOffset end = InvalidSourceOffset, +                               SourceId sourceId = InvalidSourceId) +{ +	Token token; +	TokenizedData data; +	ASSERT_TRUE(tokenizer.read(reader, token, data)); +	EXPECT_EQ(id, token.id); +	EXPECT_EQ(text, token.content); +	if (start != InvalidSourceOffset) { +		EXPECT_EQ(start, token.getLocation().getStart()); +	} +	if (end != InvalidSourceOffset) { +		EXPECT_EQ(end, token.getLocation().getEnd()); +	} +	EXPECT_EQ(sourceId, token.getLocation().getSourceId()); +} + +static void expectData(const std::string &expected, SourceOffset tokenStart, +                       SourceOffset tokenEnd, SourceOffset textStart, +                       SourceOffset textEnd, const Token &token, +                       TokenizedData &data, +                       WhitespaceMode mode = WhitespaceMode::PRESERVE) +{ +	ASSERT_EQ(Tokens::Data, token.id); + +	Token textToken; +	TokenizedDataReader reader = data.reader(); +	ASSERT_TRUE(reader.read(textToken, TokenSet{}, mode)); + +	EXPECT_EQ(expected, textToken.content); +	EXPECT_EQ(tokenStart, token.location.getStart()); +	EXPECT_EQ(tokenEnd, token.location.getEnd()); +	EXPECT_EQ(textStart, textToken.getLocation().getStart()); +	EXPECT_EQ(textEnd, textToken.getLocation().getEnd()); +	EXPECT_TRUE(reader.atEnd()); +} + +static void assertDataToken(CharReader &reader, Tokenizer &tokenizer, +                            const std::string &expected, +                            SourceOffset tokenStart, SourceOffset tokenEnd, +                            SourceOffset textStart, SourceOffset textEnd, +                            WhitespaceMode mode = WhitespaceMode::PRESERVE) +{ +	Token token; +	TokenizedData data; +	ASSERT_TRUE(tokenizer.read(reader, token, data)); + +	expectData(expected, tokenStart, tokenEnd, textStart, textEnd, token, data, +	           mode); +} +  TEST(Tokenizer, tokenRegistration)  {  	Tokenizer tokenizer; @@ -31,23 +88,23 @@ TEST(Tokenizer, tokenRegistration)  	ASSERT_EQ(0U, tokenizer.registerToken("a"));  	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("a")); -	ASSERT_EQ("a", tokenizer.getTokenString(0U)); +	ASSERT_EQ("a", tokenizer.lookupToken(0U).string);  	ASSERT_EQ(1U, tokenizer.registerToken("b"));  	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("b")); -	ASSERT_EQ("b", tokenizer.getTokenString(1U)); +	ASSERT_EQ("b", tokenizer.lookupToken(1U).string);  	ASSERT_EQ(2U, tokenizer.registerToken("c"));  	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("c")); -	ASSERT_EQ("c", tokenizer.getTokenString(2U)); +	ASSERT_EQ("c", tokenizer.lookupToken(2U).string);  	ASSERT_TRUE(tokenizer.unregisterToken(1U));  	ASSERT_FALSE(tokenizer.unregisterToken(1U)); -	ASSERT_EQ("", tokenizer.getTokenString(1U)); +	ASSERT_EQ("", tokenizer.lookupToken(1U).string);  	ASSERT_EQ(1U, tokenizer.registerToken("d"));  	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("d")); -	ASSERT_EQ("d", tokenizer.getTokenString(1U)); +	ASSERT_EQ("d", tokenizer.lookupToken(1U).string);  }  TEST(Tokenizer, textTokenPreserveWhitespace) @@ -56,36 +113,34 @@ TEST(Tokenizer, textTokenPreserveWhitespace)  		CharReader reader{" this \t is only a  \n\n test   text   "};  		//                 012345 6789012345678 9 0123456789012345  		//                 0          1           2         3 -		Tokenizer tokenizer{WhitespaceMode::PRESERVE}; +		Tokenizer tokenizer;  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); -		SourceLocation loc = token.location; -		ASSERT_EQ(0U, loc.getStart()); -		ASSERT_EQ(36U, loc.getEnd()); +		expectData(" this \t is only a  \n\n test   text   ", 0, 36, 0, 36, +		           token, data, WhitespaceMode::PRESERVE); -		ASSERT_FALSE(tokenizer.read(reader, token)); +		data.clear(); +		ASSERT_FALSE(tokenizer.read(reader, token, data));  	}  	{  		CharReader reader{"this \t is only a  \n\n test   text"};  		//                 01234 5678901234567 8 9012345678901  		//                 0          1           2         3 -		Tokenizer tokenizer{WhitespaceMode::PRESERVE}; +		Tokenizer tokenizer;  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("this \t is only a  \n\n test   text", token.content); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); -		SourceLocation loc = token.location; -		ASSERT_EQ(0U, loc.getStart()); -		ASSERT_EQ(32U, loc.getEnd()); +		expectData("this \t is only a  \n\n test   text", 0, 32, 0, 32, token, +		           data, WhitespaceMode::PRESERVE); -		ASSERT_FALSE(tokenizer.read(reader, token)); +		data.clear(); +		ASSERT_FALSE(tokenizer.read(reader, token, data));  	}  } @@ -95,36 +150,34 @@ TEST(Tokenizer, textTokenTrimWhitespace)  		CharReader reader{" this \t is only a  \n\n test   text   "};  		//                 012345 6789012345678 9 0123456789012345  		//                 0          1           2         3 -		Tokenizer tokenizer{WhitespaceMode::TRIM}; +		Tokenizer tokenizer;  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("this \t is only a  \n\n test   text", token.content); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); -		SourceLocation loc = token.location; -		ASSERT_EQ(1U, loc.getStart()); -		ASSERT_EQ(33U, loc.getEnd()); +		expectData("this \t is only a  \n\n test   text", 0, 36, 1, 33, token, +		           data, WhitespaceMode::TRIM); -		ASSERT_FALSE(tokenizer.read(reader, token)); +		data.clear(); +		ASSERT_FALSE(tokenizer.read(reader, token, data));  	}  	{  		CharReader reader{"this \t is only a  \n\n test   text"};  		//                 01234 5678901234567 8 9012345678901  		//                 0          1           2         3 -		Tokenizer tokenizer{WhitespaceMode::TRIM}; +		Tokenizer tokenizer;  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("this \t is only a  \n\n test   text", token.content); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); -		SourceLocation loc = token.location; -		ASSERT_EQ(0U, loc.getStart()); -		ASSERT_EQ(32U, loc.getEnd()); +		expectData("this \t is only a  \n\n test   text", 0, 32, 0, 32, token, +		           data, WhitespaceMode::TRIM); -		ASSERT_FALSE(tokenizer.read(reader, token)); +		data.clear(); +		ASSERT_FALSE(tokenizer.read(reader, token, data));  	}  } @@ -134,36 +187,34 @@ TEST(Tokenizer, textTokenCollapseWhitespace)  		CharReader reader{" this \t is only a  \n\n test   text   "};  		//                 012345 6789012345678 9 0123456789012345  		//                 0          1           2         3 -		Tokenizer tokenizer{WhitespaceMode::COLLAPSE}; +		Tokenizer tokenizer;  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("this is only a test text", token.content); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); -		SourceLocation loc = token.location; -		ASSERT_EQ(1U, loc.getStart()); -		ASSERT_EQ(33U, loc.getEnd()); +		expectData("this is only a test text", 0, 36, 1, 33, token, data, +		           WhitespaceMode::COLLAPSE); -		ASSERT_FALSE(tokenizer.read(reader, token)); +		data.clear(); +		ASSERT_FALSE(tokenizer.read(reader, token, data));  	}  	{  		CharReader reader{"this \t is only a  \n\n test   text"};  		//                 01234 5678901234567 8 9012345678901  		//                 0          1           2         3 -		Tokenizer tokenizer{WhitespaceMode::COLLAPSE}; +		Tokenizer tokenizer;  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("this is only a test text", token.content); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); -		SourceLocation loc = token.location; -		ASSERT_EQ(0U, loc.getStart()); -		ASSERT_EQ(32U, loc.getEnd()); +		expectData("this is only a test text", 0, 32, 0, 32, token, data, +		           WhitespaceMode::COLLAPSE); -		ASSERT_FALSE(tokenizer.read(reader, token)); +		data.clear(); +		ASSERT_FALSE(tokenizer.read(reader, token, data));  	}  } @@ -177,14 +228,12 @@ TEST(Tokenizer, simpleReadToken)  	{  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data));  		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("test1", token.content); -		SourceLocation loc = token.location; -		ASSERT_EQ(0U, loc.getStart()); -		ASSERT_EQ(5U, loc.getEnd()); +		expectData("test1", 0, 5, 0, 5, token, data);  		char c;  		ASSERT_TRUE(reader.peek(c)); @@ -193,7 +242,8 @@ TEST(Tokenizer, simpleReadToken)  	{  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data));  		ASSERT_EQ(tid, token.id);  		ASSERT_EQ(":", token.content); @@ -209,14 +259,10 @@ TEST(Tokenizer, simpleReadToken)  	{  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("test2", token.content); - -		SourceLocation loc = token.location; -		ASSERT_EQ(6U, loc.getStart()); -		ASSERT_EQ(11U, loc.getEnd()); +		expectData("test2", 6, 11, 6, 11, token, data);  		char c;  		ASSERT_FALSE(reader.peek(c)); @@ -233,21 +279,17 @@ TEST(Tokenizer, simplePeekToken)  	{  		Token token; -		ASSERT_TRUE(tokenizer.peek(reader, token)); - -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("test1", token.content); - -		SourceLocation loc = token.location; -		ASSERT_EQ(0U, loc.getStart()); -		ASSERT_EQ(5U, loc.getEnd()); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.peek(reader, token, data)); +		expectData("test1", 0, 5, 0, 5, token, data);  		ASSERT_EQ(0U, reader.getOffset());  		ASSERT_EQ(5U, reader.getPeekOffset());  	}  	{  		Token token; -		ASSERT_TRUE(tokenizer.peek(reader, token)); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.peek(reader, token, data));  		ASSERT_EQ(tid, token.id);  		ASSERT_EQ(":", token.content); @@ -261,35 +303,26 @@ TEST(Tokenizer, simplePeekToken)  	{  		Token token; -		ASSERT_TRUE(tokenizer.peek(reader, token)); - -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("test2", token.content); - -		SourceLocation loc = token.location; -		ASSERT_EQ(6U, loc.getStart()); -		ASSERT_EQ(11U, loc.getEnd()); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.peek(reader, token, data)); +		expectData("test2", 6, 11, 6, 11, token, data);  		ASSERT_EQ(0U, reader.getOffset());  		ASSERT_EQ(11U, reader.getPeekOffset());  	}  	{  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); - -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("test1", token.content); - -		SourceLocation loc = token.location; -		ASSERT_EQ(0U, loc.getStart()); -		ASSERT_EQ(5U, loc.getEnd()); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); +		expectData("test1", 0, 5, 0, 5, token, data);  		ASSERT_EQ(5U, reader.getOffset());  		ASSERT_EQ(5U, reader.getPeekOffset());  	}  	{  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data));  		ASSERT_EQ(tid, token.id);  		ASSERT_EQ(":", token.content); @@ -303,14 +336,9 @@ TEST(Tokenizer, simplePeekToken)  	{  		Token token; -		ASSERT_TRUE(tokenizer.read(reader, token)); - -		ASSERT_EQ(Tokens::Data, token.id); -		ASSERT_EQ("test2", token.content); - -		SourceLocation loc = token.location; -		ASSERT_EQ(6U, loc.getStart()); -		ASSERT_EQ(11U, loc.getEnd()); +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); +		expectData("test2", 6, 11, 6, 11, token, data);  		ASSERT_EQ(11U, reader.getOffset());  		ASSERT_EQ(11U, reader.getPeekOffset());  	} @@ -320,6 +348,7 @@ TEST(Tokenizer, ambiguousTokens)  {  	CharReader reader{"abc"};  	Tokenizer tokenizer; +	TokenizedData data;  	TokenId t1 = tokenizer.registerToken("abd");  	TokenId t2 = tokenizer.registerToken("bc"); @@ -328,16 +357,17 @@ TEST(Tokenizer, ambiguousTokens)  	ASSERT_EQ(1U, t2);  	Token token; -	ASSERT_TRUE(tokenizer.read(reader, token)); +	data.clear(); +	ASSERT_TRUE(tokenizer.read(reader, token, data)); -	ASSERT_EQ(Tokens::Data, token.id); -	ASSERT_EQ("a", token.content); +	expectData("a", 0, 1, 0, 1, token, data);  	SourceLocation loc = token.location;  	ASSERT_EQ(0U, loc.getStart());  	ASSERT_EQ(1U, loc.getEnd()); -	ASSERT_TRUE(tokenizer.read(reader, token)); +	data.clear(); +	ASSERT_TRUE(tokenizer.read(reader, token, data));  	ASSERT_EQ(t2, token.id);  	ASSERT_EQ("bc", token.content); @@ -346,7 +376,8 @@ TEST(Tokenizer, ambiguousTokens)  	ASSERT_EQ(1U, loc.getStart());  	ASSERT_EQ(3U, loc.getEnd()); -	ASSERT_FALSE(tokenizer.read(reader, token)); +	data.clear(); +	ASSERT_FALSE(tokenizer.read(reader, token, data));  }  TEST(Tokenizer, commentTestWhitespacePreserve) @@ -354,7 +385,7 @@ TEST(Tokenizer, commentTestWhitespacePreserve)  	CharReader reader{"Test/Test /* Block Comment */", 0};  	//                 012345678901234567890123456789  	//                 0        1         2 -	Tokenizer tokenizer(WhitespaceMode::PRESERVE); +	Tokenizer tokenizer;  	const TokenId t1 = tokenizer.registerToken("/");  	const TokenId t2 = tokenizer.registerToken("/*"); @@ -370,45 +401,189 @@ TEST(Tokenizer, commentTestWhitespacePreserve)  	Token t;  	for (auto &te : expected) { -		EXPECT_TRUE(tokenizer.read(reader, t)); +		TokenizedData data(0); +		EXPECT_TRUE(tokenizer.read(reader, t, data));  		EXPECT_EQ(te.id, t.id); -		EXPECT_EQ(te.content, t.content); +		if (te.id != Tokens::Data) { +			EXPECT_EQ(te.content, t.content); +		} else { +			TokenizedDataReader dataReader = data.reader(); +			Token textToken; +			ASSERT_TRUE(dataReader.read(textToken, TokenSet{}, +			                            WhitespaceMode::PRESERVE)); +			EXPECT_TRUE(dataReader.atEnd()); +			EXPECT_EQ(te.content, textToken.content); +		}  		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());  		EXPECT_EQ(te.location.getStart(), t.location.getStart());  		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());  	} -	ASSERT_FALSE(tokenizer.read(reader, t)); + +	TokenizedData data; +	ASSERT_FALSE(tokenizer.read(reader, t, data));  } -TEST(Tokenizer, commentTestWhitespaceCollapse) +TEST(Tokenizer, nonPrimaryTokens)  { -	CharReader reader{"Test/Test /* Block Comment */", 0}; -	//                 012345678901234567890123456789 -	//                 0        1         2 -	Tokenizer tokenizer(WhitespaceMode::COLLAPSE); +	CharReader reader{ +	    "<<switch to $inline \\math mode$ they said, see the world they " +	    "said>>"}; +	//   012345678901234567890 12345678901234567890123456789012345678901234567 +	//   0         1         2          3         4         5         6 -	const TokenId t1 = tokenizer.registerToken("/"); -	const TokenId t2 = tokenizer.registerToken("/*"); -	const TokenId t3 = tokenizer.registerToken("*/"); +	Tokenizer tokenizer; -	std::vector<Token> expected = { -	    {Tokens::Data, "Test", SourceLocation{0, 0, 4}}, -	    {t1, "/", SourceLocation{0, 4, 5}}, -	    {Tokens::Data, "Test", SourceLocation{0, 5, 9}}, -	    {t2, "/*", SourceLocation{0, 10, 12}}, -	    {Tokens::Data, "Block Comment", SourceLocation{0, 13, 26}}, -	    {t3, "*/", SourceLocation{0, 27, 29}}}; +	TokenId tBackslash = tokenizer.registerToken("\\"); +	TokenId tDollar = tokenizer.registerToken("$", false); +	TokenId tSpeechStart = tokenizer.registerToken("<<", false); +	TokenId tSpeechEnd = tokenizer.registerToken(">>", false); -	Token t; -	for (auto &te : expected) { -		EXPECT_TRUE(tokenizer.read(reader, t)); -		EXPECT_EQ(te.id, t.id); -		EXPECT_EQ(te.content, t.content); -		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); -		EXPECT_EQ(te.location.getStart(), t.location.getStart()); -		EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); +	TokenSet tokens = TokenSet{tDollar, tSpeechStart, tSpeechEnd}; + +	Token token, textToken; +	{ +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); +		ASSERT_EQ(Tokens::Data, token.id); + +		TokenizedDataReader dataReader = data.reader(); +		assertToken(dataReader, tSpeechStart, "<<", tokens, +		            WhitespaceMode::TRIM, 0, 2); +		assertText(dataReader, "switch to", tokens, WhitespaceMode::TRIM, 2, +		           11); +		assertToken(dataReader, tDollar, "$", tokens, WhitespaceMode::TRIM, 12, +		            13); +		assertText(dataReader, "inline", tokens, WhitespaceMode::TRIM, 13, 19); +		assertEnd(dataReader); +	} + +	{ +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); +		ASSERT_EQ(tBackslash, token.id); +		ASSERT_EQ(20U, token.location.getStart()); +		ASSERT_EQ(21U, token.location.getEnd()); +	} + +	{ +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); +		ASSERT_EQ(Tokens::Data, token.id); + +		TokenizedDataReader dataReader = data.reader(); +		assertText(dataReader, "math mode", tokens, WhitespaceMode::TRIM, 21, +		           30); +		assertToken(dataReader, tDollar, "$", tokens, WhitespaceMode::TRIM, 30, +		            31); +		assertText(dataReader, "they said, see the world they said", tokens, +		           WhitespaceMode::TRIM, 32, 66); +		assertToken(dataReader, tSpeechEnd, ">>", tokens, WhitespaceMode::TRIM, +		            66, 68); +		assertEnd(dataReader); +	} + +	TokenizedData data; +	ASSERT_FALSE(tokenizer.read(reader, token, data)); +} + +TEST(Tokenizer, primaryNonPrimaryTokenInteraction) +{ +	CharReader reader{"<<test1>><test2><<test3\\><<<test4>>>"}; +	//                 01234567890123456789012 3456789012345 +	//                 0         1         2          3 + +	Tokenizer tokenizer; + +	TokenId tP1 = tokenizer.registerToken("<", true); +	TokenId tP2 = tokenizer.registerToken(">", true); +	TokenId tP3 = tokenizer.registerToken("\\>", true); +	TokenId tN1 = tokenizer.registerToken("<<", false); +	TokenId tN2 = tokenizer.registerToken(">>", false); + +	TokenSet tokens = TokenSet{tN1, tN2}; + +	Token token, textToken; +	{ +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); +		ASSERT_EQ(Tokens::Data, token.id); + +		TokenizedDataReader dataReader = data.reader(); +		assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 0, 2); +		assertText(dataReader, "test1", tokens, WhitespaceMode::TRIM, 2, 7); +		assertToken(dataReader, tN2, ">>", tokens, WhitespaceMode::TRIM, 7, 9); +		assertEnd(dataReader); +	} + +	assertPrimaryToken(reader, tokenizer, tP1, "<", 9, 10); +	assertDataToken(reader, tokenizer, "test2", 10, 15, 10, 15); +	assertPrimaryToken(reader, tokenizer, tP2, ">", 15, 16); + +	{ +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); +		ASSERT_EQ(Tokens::Data, token.id); + +		TokenizedDataReader dataReader = data.reader(); +		assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 16, 18); +		assertText(dataReader, "test3", tokens, WhitespaceMode::TRIM, 18, 23); +		assertEnd(dataReader); +	} + +	assertPrimaryToken(reader, tokenizer, tP3, "\\>", 23, 25); + +	{ +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); +		ASSERT_EQ(Tokens::Data, token.id); + +		TokenizedDataReader dataReader = data.reader(); +		assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 25, 27); +		assertEnd(dataReader); +	} + +	assertPrimaryToken(reader, tokenizer, tP1, "<", 27, 28); + +	{ +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); +		ASSERT_EQ(Tokens::Data, token.id); + +		TokenizedDataReader dataReader = data.reader(); +		assertText(dataReader, "test4", tokens, WhitespaceMode::TRIM, 28, 33); +		assertToken(dataReader, tN2, ">>", tokens, WhitespaceMode::TRIM, 33, 35); +		assertEnd(dataReader); +	} + +	assertPrimaryToken(reader, tokenizer, tP2, ">", 35, 36); + +	TokenizedData data; +	ASSERT_FALSE(tokenizer.read(reader, token, data)); +} + +TEST(Tokenizer, ambiguousTokens2) +{ +	CharReader reader{"<\\"}; + +	Tokenizer tokenizer; + +	TokenId tBackslash = tokenizer.registerToken("\\"); +	TokenId tAnnotationStart = tokenizer.registerToken("<\\"); + +	TokenSet tokens = TokenSet{tBackslash, tAnnotationStart}; +	Token token; +	{ +		TokenizedData data; +		ASSERT_TRUE(tokenizer.read(reader, token, data)); +		ASSERT_EQ("<\\", token.content); +		ASSERT_EQ(tAnnotationStart, token.id); +		ASSERT_TRUE(data.empty()); +	} + +	{ +		TokenizedData data; +		ASSERT_FALSE(tokenizer.read(reader, token, data));  	} -	ASSERT_FALSE(tokenizer.read(reader, t));  }  } diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index d52fa5b..d47f529 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -21,143 +21,205 @@  #include <iostream>  #include <core/common/CharReader.hpp> +#include <core/common/Variant.hpp>  #include <core/frontend/TerminalLogger.hpp> - +#include <core/parser/utils/TokenizedData.hpp>  #include <formats/osml/OsmlStreamParser.hpp> +#include <core/parser/utils/TokenizedDataTestUtils.hpp> +  namespace ousia {  static TerminalLogger logger(std::cerr, true);  // static ConcreteLogger logger; -static void assertCommand(OsmlStreamParser &reader, const std::string &name, -                          SourceOffset start = InvalidSourceOffset, -                          SourceOffset end = InvalidSourceOffset) +static void assertCommandStart(OsmlStreamParser &parser, +                               const std::string &name, bool rangeCommand, +                               SourceOffset start = InvalidSourceOffset, +                               SourceOffset end = InvalidSourceOffset)  { -	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); -	EXPECT_EQ(name, reader.getCommandName().asString()); +	ASSERT_EQ(OsmlStreamParser::State::COMMAND_START, parser.parse()); +	EXPECT_EQ(name, parser.getCommandName().asString()); +	EXPECT_EQ(rangeCommand, parser.inRangeCommand());  	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); -		EXPECT_EQ(start, reader.getLocation().getStart()); +		EXPECT_EQ(start, parser.getCommandName().getLocation().getStart()); +		EXPECT_EQ(start, parser.getLocation().getStart());  	}  	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); -		EXPECT_EQ(end, reader.getLocation().getEnd()); +		EXPECT_EQ(end, parser.getCommandName().getLocation().getEnd()); +		EXPECT_EQ(end, parser.getLocation().getEnd());  	}  } -static void assertCommand(OsmlStreamParser &reader, const std::string &name, -                          const Variant::mapType &args, +static void assertCommandStart(OsmlStreamParser &parser, +                               const std::string &name, bool rangeCommand, +                               const Variant::mapType &args, +                               SourceOffset start = InvalidSourceOffset, +                               SourceOffset end = InvalidSourceOffset) +{ +	assertCommandStart(parser, name, rangeCommand, start, end); +	EXPECT_EQ(args, parser.getCommandArguments()); +} + +static void assertCommand(OsmlStreamParser &parser, const std::string &name,                            SourceOffset start = InvalidSourceOffset,                            SourceOffset end = InvalidSourceOffset)  { -	assertCommand(reader, name, start, end); -	EXPECT_EQ(args, reader.getCommandArguments()); +	assertCommandStart(parser, name, false, Variant::mapType{}, start, end);  } -static void assertData(OsmlStreamParser &reader, const std::string &data, -                       SourceOffset start = InvalidSourceOffset, -                       SourceOffset end = InvalidSourceOffset) +static void assertRangeEnd(OsmlStreamParser &parser, +                           SourceOffset start = InvalidSourceOffset, +                           SourceOffset end = InvalidSourceOffset)  { -	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); -	EXPECT_EQ(data, reader.getData().asString()); +	ASSERT_EQ(OsmlStreamParser::State::RANGE_END, parser.parse());  	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getData().getLocation().getStart()); -		EXPECT_EQ(start, reader.getLocation().getStart()); +		EXPECT_EQ(start, parser.getLocation().getStart());  	}  	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getData().getLocation().getEnd()); -		EXPECT_EQ(end, reader.getLocation().getEnd()); +		EXPECT_EQ(end, parser.getLocation().getEnd()); +	} +} + +static void assertTextData(OsmlStreamParser &parser, const std::string &text, +                           SourceOffset dataStart = InvalidSourceOffset, +                           SourceOffset dataEnd = InvalidSourceOffset, +                           SourceOffset textStart = InvalidSourceOffset, +                           SourceOffset textEnd = InvalidSourceOffset, +                           WhitespaceMode mode = WhitespaceMode::COLLAPSE) +{ +	ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse()); + +	const TokenizedData &data = parser.getData(); +	TokenizedDataReader dataReader = data.reader(); + +	Token token; +	ASSERT_TRUE(dataReader.read(token, TokenSet{}, mode)); +	EXPECT_EQ(Tokens::Data, token.id); +	EXPECT_EQ(text, token.content); +	if (dataStart != InvalidSourceOffset) { +		EXPECT_EQ(dataStart, data.getLocation().getStart()); +		EXPECT_EQ(dataStart, parser.getLocation().getStart()); +	} +	if (dataEnd != InvalidSourceOffset) { +		EXPECT_EQ(dataEnd, data.getLocation().getEnd()); +		EXPECT_EQ(dataEnd, parser.getLocation().getEnd()); +	} +	if (textStart != InvalidSourceOffset) { +		EXPECT_EQ(textStart, token.getLocation().getStart()); +	} +	if (textEnd != InvalidSourceOffset) { +		EXPECT_EQ(textEnd, token.getLocation().getEnd());  	}  } -static void assertFieldStart(OsmlStreamParser &reader, bool defaultField, +static void assertData(OsmlStreamParser &parser, const std::string &text, +                       SourceOffset textStart = InvalidSourceOffset, +                       SourceOffset textEnd = InvalidSourceOffset, +                       WhitespaceMode mode = WhitespaceMode::COLLAPSE) +{ +	assertTextData(parser, text, InvalidSourceOffset, InvalidSourceOffset, +	               textStart, textEnd, mode); +} + +static void assertEmptyData(OsmlStreamParser &parser) +{ +	ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse()); + +	const TokenizedData &data = parser.getData(); +	TokenizedDataReader dataReader = data.reader(); + +	Token token; +	EXPECT_FALSE(dataReader.read(token, TokenSet{}, WhitespaceMode::TRIM)); +} + +static void assertFieldStart(OsmlStreamParser &parser, bool defaultField,                               SourceOffset start = InvalidSourceOffset,                               SourceOffset end = InvalidSourceOffset)  { -	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); -	EXPECT_EQ(defaultField, reader.inDefaultField()); +	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, parser.parse()); +	EXPECT_EQ(defaultField, parser.inDefaultField());  	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getLocation().getStart()); +		EXPECT_EQ(start, parser.getLocation().getStart());  	}  	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getLocation().getEnd()); +		EXPECT_EQ(end, parser.getLocation().getEnd());  	}  } -static void assertFieldEnd(OsmlStreamParser &reader, +static void assertFieldEnd(OsmlStreamParser &parser,                             SourceOffset start = InvalidSourceOffset,                             SourceOffset end = InvalidSourceOffset)  { -	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); +	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, parser.parse());  	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getLocation().getStart()); +		EXPECT_EQ(start, parser.getLocation().getStart());  	}  	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getLocation().getEnd()); +		EXPECT_EQ(end, parser.getLocation().getEnd());  	}  } -static void assertAnnotationStart(OsmlStreamParser &reader, +static void assertAnnotationStart(OsmlStreamParser &parser,                                    const std::string &name,                                    SourceOffset start = InvalidSourceOffset,                                    SourceOffset end = InvalidSourceOffset)  { -	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse()); -	EXPECT_EQ(name, reader.getCommandName().asString()); +	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, parser.parse()); +	EXPECT_EQ(name, parser.getCommandName().asString());  	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); -		EXPECT_EQ(start, reader.getLocation().getStart()); +		EXPECT_EQ(start, parser.getCommandName().getLocation().getStart()); +		EXPECT_EQ(start, parser.getLocation().getStart());  	}  	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); -		EXPECT_EQ(end, reader.getLocation().getEnd()); +		EXPECT_EQ(end, parser.getCommandName().getLocation().getEnd()); +		EXPECT_EQ(end, parser.getLocation().getEnd());  	}  } -static void assertAnnotationStart(OsmlStreamParser &reader, +static void assertAnnotationStart(OsmlStreamParser &parser,                                    const std::string &name,                                    const Variant::mapType &args,                                    SourceOffset start = InvalidSourceOffset,                                    SourceOffset end = InvalidSourceOffset)  { -	assertAnnotationStart(reader, name, start, end); -	EXPECT_EQ(args, reader.getCommandArguments()); +	assertAnnotationStart(parser, name, start, end); +	EXPECT_EQ(args, parser.getCommandArguments());  } -static void assertAnnotationEnd(OsmlStreamParser &reader, +static void assertAnnotationEnd(OsmlStreamParser &parser,                                  const std::string &name,                                  const std::string &elementName,                                  SourceOffset start = InvalidSourceOffset,                                  SourceOffset end = InvalidSourceOffset)  { -	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse()); -	ASSERT_EQ(name, reader.getCommandName().asString()); +	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, parser.parse()); +	ASSERT_EQ(name, parser.getCommandName().asString());  	if (!elementName.empty()) { -		ASSERT_EQ(1U, reader.getCommandArguments().asMap().size()); -		ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name")); +		ASSERT_EQ(1U, parser.getCommandArguments().asMap().size()); +		ASSERT_EQ(1U, parser.getCommandArguments().asMap().count("name")); -		auto it = reader.getCommandArguments().asMap().find("name"); +		auto it = parser.getCommandArguments().asMap().find("name");  		ASSERT_EQ(elementName, it->second.asString());  	}  	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getLocation().getStart()); +		EXPECT_EQ(start, parser.getLocation().getStart());  	}  	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getLocation().getEnd()); +		EXPECT_EQ(end, parser.getLocation().getEnd());  	}  } -static void assertEnd(OsmlStreamParser &reader, +static void assertEnd(OsmlStreamParser &parser,                        SourceOffset start = InvalidSourceOffset,                        SourceOffset end = InvalidSourceOffset)  { -	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +	ASSERT_EQ(OsmlStreamParser::State::END, parser.parse());  	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getLocation().getStart()); +		EXPECT_EQ(start, parser.getLocation().getStart());  	}  	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getLocation().getEnd()); +		EXPECT_EQ(end, parser.getLocation().getEnd());  	}  } @@ -166,9 +228,9 @@ TEST(OsmlStreamParser, empty)  	const char *testString = "";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +	assertEnd(parser, 0, 0);  }  TEST(OsmlStreamParser, oneCharacter) @@ -176,57 +238,102 @@ TEST(OsmlStreamParser, oneCharacter)  	const char *testString = "a";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertData(reader, "a", 0, 1); +	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::COLLAPSE); +	assertEnd(parser, 1, 1);  } -TEST(OsmlStreamParser, whitespaceElimination) +TEST(OsmlStreamParser, whitespacePreserve)  {  	const char *testString = " hello \t world ";  	//                        0123456 78901234  	//                        0          1  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertData(reader, "hello world", 1, 14); +	assertTextData(parser, " hello \t world ", 0, 15, 0, 15, +	               WhitespaceMode::PRESERVE); +	assertEnd(parser, 15, 15);  } -TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) +TEST(OsmlStreamParser, whitespaceTrim) +{ +	const char *testString = " hello \t world "; +	//                        0123456 78901234 +	//                        0          1 +	CharReader charReader(testString); + +	OsmlStreamParser parser(charReader, logger); + +	assertTextData(parser, "hello \t world", 0, 15, 1, 14, +	               WhitespaceMode::TRIM); +	assertEnd(parser, 15, 15); +} + +TEST(OsmlStreamParser, whitespaceCollapse) +{ +	const char *testString = " hello \t world "; +	//                        0123456 78901234 +	//                        0          1 +	CharReader charReader(testString); + +	OsmlStreamParser parser(charReader, logger); + +	assertTextData(parser, "hello world", 0, 15, 1, 14, +	               WhitespaceMode::COLLAPSE); +	assertEnd(parser, 15, 15); +} + +TEST(OsmlStreamParser, whitespaceCollapseLinebreak)  {  	const char *testString = " hello \n world ";  	//                        0123456 78901234  	//                        0          1  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertData(reader, "hello world", 1, 14); +	assertTextData(parser, "hello world", 0, 15, 1, 14, +	               WhitespaceMode::COLLAPSE); +	assertEnd(parser, 15, 15);  } -TEST(OsmlStreamParser, escapeWhitespace) +TEST(OsmlStreamParser, whitespaceCollapseProtected)  {  	const char *testString = " hello\\ \\ world ";  	//                        012345 67 89012345  	//                        0           1  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertData(reader, "hello  world", 1, 15); +	assertTextData(parser, "hello  world", 0, 16, 1, 15, +	               WhitespaceMode::COLLAPSE); +	assertEnd(parser, 16, 16); +} + +TEST(OsmlStreamParser, whitespaceCollapseProtected2) +{ +	const char *testString = " hello \\ \\ world "; +	//                        012345 67 89012345 +	//                        0           1 +	CharReader charReader(testString); + +	OsmlStreamParser parser(charReader, logger); + +	assertTextData(parser, "hello   world", 0, 17, 1, 16, +	               WhitespaceMode::COLLAPSE); +	assertEnd(parser, 17, 17);  }  static void testEscapeSpecialCharacter(const std::string &c)  {  	CharReader charReader(std::string("\\") + c); -	OsmlStreamParser reader(charReader, logger); -	EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse()); -	EXPECT_EQ(c, reader.getData().asString()); - -	SourceLocation loc = reader.getData().getLocation(); -	EXPECT_EQ(0U, loc.getStart()); -	EXPECT_EQ(1U + c.size(), loc.getEnd()); +	OsmlStreamParser parser(charReader, logger); +	assertTextData(parser, c, 0, 2, 0, 2, WhitespaceMode::PRESERVE); +	assertEnd(parser, 2, 2);  }  TEST(OsmlStreamParser, escapeSpecialCharacters) @@ -239,9 +346,11 @@ TEST(OsmlStreamParser, escapeSpecialCharacters)  TEST(OsmlStreamParser, simpleSingleLineComment)  {  	const char *testString = "% This is a single line comment"; +	//                        0123456789012345678901234567890 +	//                        0         1         2         3  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); -	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +	OsmlStreamParser parser(charReader, logger); +	assertEnd(parser, 31, 31);  }  TEST(OsmlStreamParser, singleLineComment) @@ -250,24 +359,10 @@ TEST(OsmlStreamParser, singleLineComment)  	//                        01234567890123456789012345678901 23  	//                        0         1         2         3  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); -	{ -		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); -		ASSERT_EQ("a", reader.getData().asString()); -		SourceLocation loc = reader.getData().getLocation(); -		ASSERT_EQ(0U, loc.getStart()); -		ASSERT_EQ(1U, loc.getEnd()); -	} - -	{ -		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); -		ASSERT_EQ("b", reader.getData().asString()); -		SourceLocation loc = reader.getData().getLocation(); -		ASSERT_EQ(33U, loc.getStart()); -		ASSERT_EQ(34U, loc.getEnd()); -	} +	OsmlStreamParser parser(charReader, logger); -	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +	assertTextData(parser, "ab", 0, 34, 0, 34, WhitespaceMode::PRESERVE); +	assertEnd(parser, 34, 34);  }  TEST(OsmlStreamParser, multilineComment) @@ -276,24 +371,26 @@ TEST(OsmlStreamParser, multilineComment)  	//                        0123456789012 3 456789012345678901234567890  	//                        0         1           2         3         4  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); -	{ -		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); -		ASSERT_EQ("a", reader.getData().asString()); -		SourceLocation loc = reader.getData().getLocation(); -		ASSERT_EQ(0U, loc.getStart()); -		ASSERT_EQ(1U, loc.getEnd()); -	} +	OsmlStreamParser parser(charReader, logger); -	{ -		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); -		ASSERT_EQ("b", reader.getData().asString()); -		SourceLocation loc = reader.getData().getLocation(); -		ASSERT_EQ(40U, loc.getStart()); -		ASSERT_EQ(41U, loc.getEnd()); -	} +	assertTextData(parser, "ab", 0, 41, 0, 41, WhitespaceMode::PRESERVE); +	assertEnd(parser, 41, 41); +} -	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +TEST(OsmlStreamParser, unfinishedMultilineComment) +{ +	const char *testString = "a%{ This is a\n\n multiline line comment"; +	//                        0123456789012 3 456789012345678901234567 +	//                        0         1           2         3 +	CharReader charReader(testString); +	OsmlStreamParser parser(charReader, logger); + +	logger.reset(); + +	ASSERT_FALSE(logger.hasError()); +	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE); +	ASSERT_TRUE(logger.hasError()); +	assertEnd(parser, 38, 38);  }  TEST(OsmlStreamParser, nestedMultilineComment) @@ -302,24 +399,10 @@ TEST(OsmlStreamParser, nestedMultilineComment)  	//                        0123456789012 3 456789012345678901234567890  	//                        0         1           2         3         4  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); -	{ -		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); -		ASSERT_EQ("a", reader.getData().asString()); -		SourceLocation loc = reader.getData().getLocation(); -		ASSERT_EQ(0U, loc.getStart()); -		ASSERT_EQ(1U, loc.getEnd()); -	} +	OsmlStreamParser parser(charReader, logger); -	{ -		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); -		ASSERT_EQ("b", reader.getData().asString()); -		SourceLocation loc = reader.getData().getLocation(); -		ASSERT_EQ(40U, loc.getStart()); -		ASSERT_EQ(41U, loc.getEnd()); -	} - -	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +	assertTextData(parser, "ab", 0, 41, 0, 41, WhitespaceMode::PRESERVE); +	assertEnd(parser, 41, 41);  }  TEST(OsmlStreamParser, simpleCommand) @@ -327,45 +410,27 @@ TEST(OsmlStreamParser, simpleCommand)  	const char *testString = "\\test";  	//                        0 12345  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); -	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - -	Variant commandName = reader.getCommandName(); -	ASSERT_EQ("test", commandName.asString()); +	OsmlStreamParser parser(charReader, logger); -	SourceLocation loc = commandName.getLocation(); -	ASSERT_EQ(0U, loc.getStart()); -	ASSERT_EQ(5U, loc.getEnd()); - -	ASSERT_EQ(0U, reader.getCommandArguments().asMap().size()); -	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +	assertCommand(parser, "test", 0, 5); +	assertEnd(parser);  }  TEST(OsmlStreamParser, simpleCommandWithName)  { -	const char *testString = "\\test#bla"; -	//                        0 12345678 +	const char *testString = "\\test#foo"; +	//                         012345678  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); -	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - -	Variant commandName = reader.getCommandName(); -	ASSERT_EQ("test", commandName.asString()); -	SourceLocation loc = commandName.getLocation(); -	ASSERT_EQ(0U, loc.getStart()); -	ASSERT_EQ(5U, loc.getEnd()); +	OsmlStreamParser parser(charReader, logger); -	Variant commandArguments = reader.getCommandArguments(); -	ASSERT_TRUE(commandArguments.isMap()); -	ASSERT_EQ(1U, commandArguments.asMap().size()); -	ASSERT_EQ(1U, commandArguments.asMap().count("name")); -	ASSERT_EQ("bla", commandArguments.asMap()["name"].asString()); +	assertCommandStart(parser, "test", false, Variant::mapType{{"name", "foo"}}, +	                   0, 5); -	loc = commandArguments.asMap()["name"].getLocation(); -	ASSERT_EQ(5U, loc.getStart()); -	ASSERT_EQ(9U, loc.getEnd()); +	Variant::mapType args = parser.getCommandArguments().asMap(); +	ASSERT_EQ(5U, args["name"].getLocation().getStart()); +	ASSERT_EQ(9U, args["name"].getLocation().getEnd()); -	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +	assertEnd(parser);  }  TEST(OsmlStreamParser, simpleCommandWithArguments) @@ -374,38 +439,21 @@ TEST(OsmlStreamParser, simpleCommandWithArguments)  	//                        0 123456789012345 678901 2  	//                        0          1          2  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); -	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - -	Variant commandName = reader.getCommandName(); -	ASSERT_EQ("test", commandName.asString()); -	SourceLocation loc = commandName.getLocation(); -	ASSERT_EQ(0U, loc.getStart()); -	ASSERT_EQ(5U, loc.getEnd()); - -	Variant commandArguments = reader.getCommandArguments(); -	ASSERT_TRUE(commandArguments.isMap()); -	ASSERT_EQ(3U, commandArguments.asMap().size()); -	ASSERT_EQ(1U, commandArguments.asMap().count("a")); -	ASSERT_EQ(1U, commandArguments.asMap().count("b")); -	ASSERT_EQ(1U, commandArguments.asMap().count("c")); -	ASSERT_EQ(1, commandArguments.asMap()["a"].asInt()); -	ASSERT_EQ(2, commandArguments.asMap()["b"].asInt()); -	ASSERT_EQ("test", commandArguments.asMap()["c"].asString()); - -	loc = commandArguments.asMap()["a"].getLocation(); -	ASSERT_EQ(8U, loc.getStart()); -	ASSERT_EQ(9U, loc.getEnd()); +	OsmlStreamParser parser(charReader, logger); -	loc = commandArguments.asMap()["b"].getLocation(); -	ASSERT_EQ(12U, loc.getStart()); -	ASSERT_EQ(13U, loc.getEnd()); +	assertCommandStart(parser, "test", false, +	                   Variant::mapType{{"a", 1}, {"b", 2}, {"c", "test"}}, 0, +	                   5); -	loc = commandArguments.asMap()["c"].getLocation(); -	ASSERT_EQ(16U, loc.getStart()); -	ASSERT_EQ(22U, loc.getEnd()); +	Variant::mapType args = parser.getCommandArguments().asMap(); +	ASSERT_EQ(8U, args["a"].getLocation().getStart()); +	ASSERT_EQ(9U, args["a"].getLocation().getEnd()); +	ASSERT_EQ(12U, args["b"].getLocation().getStart()); +	ASSERT_EQ(13U, args["b"].getLocation().getEnd()); +	ASSERT_EQ(16U, args["c"].getLocation().getStart()); +	ASSERT_EQ(22U, args["c"].getLocation().getEnd()); -	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +	assertEnd(parser);  }  TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) @@ -414,44 +462,24 @@ TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName)  	//                        0 1234567890123456789 01234 56  	//                        0          1          2  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); -	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); +	OsmlStreamParser parser(charReader, logger); -	Variant commandName = reader.getCommandName(); -	ASSERT_EQ("test", commandName.asString()); -	SourceLocation loc = commandName.getLocation(); -	ASSERT_EQ(0U, loc.getStart()); -	ASSERT_EQ(5U, loc.getEnd()); +	assertCommandStart( +	    parser, "test", false, +	    Variant::mapType{{"name", "bla"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 0, +	    5); -	Variant commandArguments = reader.getCommandArguments(); -	ASSERT_TRUE(commandArguments.isMap()); -	ASSERT_EQ(4U, commandArguments.asMap().size()); -	ASSERT_EQ(1U, commandArguments.asMap().count("a")); -	ASSERT_EQ(1U, commandArguments.asMap().count("b")); -	ASSERT_EQ(1U, commandArguments.asMap().count("c")); -	ASSERT_EQ(1U, commandArguments.asMap().count("name")); -	ASSERT_EQ(1, commandArguments.asMap()["a"].asInt()); -	ASSERT_EQ(2, commandArguments.asMap()["b"].asInt()); -	ASSERT_EQ("test", commandArguments.asMap()["c"].asString()); -	ASSERT_EQ("bla", commandArguments.asMap()["name"].asString()); +	Variant::mapType args = parser.getCommandArguments().asMap(); +	ASSERT_EQ(5U, args["name"].getLocation().getStart()); +	ASSERT_EQ(9U, args["name"].getLocation().getEnd()); +	ASSERT_EQ(12U, args["a"].getLocation().getStart()); +	ASSERT_EQ(13U, args["a"].getLocation().getEnd()); +	ASSERT_EQ(16U, args["b"].getLocation().getStart()); +	ASSERT_EQ(17U, args["b"].getLocation().getEnd()); +	ASSERT_EQ(20U, args["c"].getLocation().getStart()); +	ASSERT_EQ(26U, args["c"].getLocation().getEnd()); -	loc = commandArguments.asMap()["a"].getLocation(); -	ASSERT_EQ(12U, loc.getStart()); -	ASSERT_EQ(13U, loc.getEnd()); - -	loc = commandArguments.asMap()["b"].getLocation(); -	ASSERT_EQ(16U, loc.getStart()); -	ASSERT_EQ(17U, loc.getEnd()); - -	loc = commandArguments.asMap()["c"].getLocation(); -	ASSERT_EQ(20U, loc.getStart()); -	ASSERT_EQ(26U, loc.getEnd()); - -	loc = commandArguments.asMap()["name"].getLocation(); -	ASSERT_EQ(5U, loc.getStart()); -	ASSERT_EQ(9U, loc.getEnd()); - -	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +	assertEnd(parser);  }  TEST(OsmlStreamParser, fields) @@ -460,21 +488,76 @@ TEST(OsmlStreamParser, fields)  	//                         01234567890123  	//                         0         1  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); + +	assertCommand(parser, "test", 0, 5); +	assertFieldStart(parser, false, 5, 6); +	assertTextData(parser, "a", 6, 7, 6, 7, WhitespaceMode::PRESERVE); +	assertFieldEnd(parser, 7, 8); + +	assertFieldStart(parser, false, 8, 9); +	assertTextData(parser, "b", 9, 10, 9, 10, WhitespaceMode::PRESERVE); +	assertFieldEnd(parser, 10, 11); + +	assertFieldStart(parser, false, 11, 12); +	assertTextData(parser, "c", 12, 13, 12, 13, WhitespaceMode::PRESERVE); +	assertFieldEnd(parser, 13, 14); +	assertEnd(parser, 14, 14); +} + +TEST(OsmlStreamParser, fieldsWithoutCommand) +{ +	const char *testString = "{a}{b}{c}"; +	//                        012345678 +	CharReader charReader(testString); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, false, 5, 6); -	assertData(reader, "a", 6, 7); -	assertFieldEnd(reader, 7, 8); +	assertFieldStart(parser, false, 0, 1); +	assertTextData(parser, "a", 1, 2, 1, 2, WhitespaceMode::PRESERVE); +	assertFieldEnd(parser, 2, 3); -	assertFieldStart(reader, false, 8, 9); -	assertData(reader, "b", 9, 10); -	assertFieldEnd(reader, 10, 11); +	assertFieldStart(parser, false, 3, 4); +	assertTextData(parser, "b", 4, 5, 4, 5, WhitespaceMode::PRESERVE); +	assertFieldEnd(parser, 5, 6); -	assertFieldStart(reader, false, 11, 12); -	assertData(reader, "c", 12, 13); -	assertFieldEnd(reader, 13, 14); -	assertEnd(reader, 14, 14); +	assertFieldStart(parser, false, 6, 7); +	assertTextData(parser, "c", 7, 8, 7, 8, WhitespaceMode::PRESERVE); +	assertFieldEnd(parser, 8, 9); +	assertEnd(parser, 9, 9); +} + +TEST(OsmlStreamParser, nestedField) +{ +	const char *testString = "{{a{b}}}"; +	//                        01234567 +	CharReader charReader(testString); +	OsmlStreamParser parser(charReader, logger); + +	assertFieldStart(parser, false, 0, 1); +	assertFieldStart(parser, false, 1, 2); +	assertTextData(parser, "a", 2, 3, 2, 3, WhitespaceMode::PRESERVE); +	assertFieldStart(parser, false, 3, 4); +	assertTextData(parser, "b", 4, 5, 4, 5, WhitespaceMode::PRESERVE); +	assertFieldEnd(parser, 5, 6); +	assertFieldEnd(parser, 6, 7); +	assertFieldEnd(parser, 7, 8); +	assertEnd(parser, 8, 8); +} + +TEST(OsmlStreamParser, errorUnbalancedField) +{ +	const char *testString = "{a"; +	//                        01 +	CharReader charReader(testString); +	OsmlStreamParser parser(charReader, logger); + +	logger.reset(); + +	assertFieldStart(parser, false, 0, 1); +	assertTextData(parser, "a", 1, 2, 1, 2, WhitespaceMode::PRESERVE); +	ASSERT_FALSE(logger.hasError()); +	assertEnd(parser, 2, 2); +	ASSERT_TRUE(logger.hasError());  }  TEST(OsmlStreamParser, dataOutsideField) @@ -483,19 +566,19 @@ TEST(OsmlStreamParser, dataOutsideField)  	//                         0123456789012  	//                         0         1  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, false, 5, 6); -	assertData(reader, "a", 6, 7); -	assertFieldEnd(reader, 7, 8); +	assertCommand(parser, "test", 0, 5); +	assertFieldStart(parser, false, 5, 6); +	assertTextData(parser, "a", 6, 7, 6, 7, WhitespaceMode::COLLAPSE); +	assertFieldEnd(parser, 7, 8); -	assertFieldStart(reader, false, 8, 9); -	assertData(reader, "b", 9, 10); -	assertFieldEnd(reader, 10, 11); +	assertFieldStart(parser, false, 8, 9); +	assertTextData(parser, "b", 9, 10, 9, 10, WhitespaceMode::COLLAPSE); +	assertFieldEnd(parser, 10, 11); -	assertData(reader, "c", 12, 13); -	assertEnd(reader, 13, 13); +	assertTextData(parser, "c", 11, 13, 12, 13, WhitespaceMode::COLLAPSE); +	assertEnd(parser, 13, 13);  }  TEST(OsmlStreamParser, nestedCommand) @@ -504,25 +587,22 @@ TEST(OsmlStreamParser, nestedCommand)  	//                         012345678 90123456789012  	//                         0          1         2  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "test", 0, 5); +	assertCommand(parser, "test", 0, 5); +	assertFieldStart(parser, false, 5, 6); +	assertData(parser, "a", 6, 7); +	assertFieldEnd(parser, 7, 8); -	assertFieldStart(reader, false, 5, 6); -	assertData(reader, "a", 6, 7); -	assertFieldEnd(reader, 7, 8); - -	assertFieldStart(reader, false, 8, 9); -	{ -		assertCommand(reader, "test2", 9, 15); -		assertFieldStart(reader, false, 15, 16); -		assertData(reader, "b", 16, 17); -		assertFieldEnd(reader, 17, 18); -	} -	assertData(reader, "c", 19, 20); -	assertFieldEnd(reader, 20, 21); -	assertData(reader, "d", 22, 23); -	assertEnd(reader, 23, 23); +	assertFieldStart(parser, false, 8, 9); +	assertCommand(parser, "test2", 9, 15); +	assertFieldStart(parser, false, 15, 16); +	assertData(parser, "b", 16, 17); +	assertFieldEnd(parser, 17, 18); +	assertData(parser, "c", 19, 20); +	assertFieldEnd(parser, 20, 21); +	assertData(parser, "d", 22, 23); +	assertEnd(parser, 23, 23);  }  TEST(OsmlStreamParser, nestedCommandImmediateEnd) @@ -531,19 +611,19 @@ TEST(OsmlStreamParser, nestedCommandImmediateEnd)  	//                         012345 678901234567  	//                         0          1  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, false, 5, 6); +	assertCommand(parser, "test", 0, 5); +	assertFieldStart(parser, false, 5, 6);  	{ -		assertCommand(reader, "test2", 6, 12); -		assertFieldStart(reader, false, 12, 13); -		assertData(reader, "b", 13, 14); -		assertFieldEnd(reader, 14, 15); +		assertCommand(parser, "test2", 6, 12); +		assertFieldStart(parser, false, 12, 13); +		assertData(parser, "b", 13, 14); +		assertFieldEnd(parser, 14, 15);  	} -	assertFieldEnd(reader, 15, 16); -	assertData(reader, "d", 17, 18); -	assertEnd(reader, 18, 18); +	assertFieldEnd(parser, 15, 16); +	assertData(parser, "d", 17, 18); +	assertEnd(parser, 18, 18);  }  TEST(OsmlStreamParser, nestedCommandNoData) @@ -551,13 +631,13 @@ TEST(OsmlStreamParser, nestedCommandNoData)  	const char *testString = "\\test{\\test2}";  	//                         012345 6789012  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, false, 5, 6); -	assertCommand(reader, "test2", 6, 12); -	assertFieldEnd(reader, 12, 13); -	assertEnd(reader, 13, 13); +	assertCommand(parser, "test", 0, 5); +	assertFieldStart(parser, false, 5, 6); +	assertCommand(parser, "test2", 6, 12); +	assertFieldEnd(parser, 12, 13); +	assertEnd(parser, 13, 13);  }  TEST(OsmlStreamParser, multipleCommands) @@ -566,13 +646,16 @@ TEST(OsmlStreamParser, multipleCommands)  	//                         012 345 678 90  	//                         0            1  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "a", 0, 2); -	assertCommand(reader, "b", 3, 5); -	assertCommand(reader, "c", 6, 8); -	assertCommand(reader, "d", 9, 11); -	assertEnd(reader, 11, 11); +	assertCommand(parser, "a", 0, 2); +	assertEmptyData(parser); +	assertCommand(parser, "b", 3, 5); +	assertEmptyData(parser); +	assertCommand(parser, "c", 6, 8); +	assertEmptyData(parser); +	assertCommand(parser, "d", 9, 11); +	assertEnd(parser, 11, 11);  }  TEST(OsmlStreamParser, fieldsWithSpaces) @@ -581,33 +664,37 @@ TEST(OsmlStreamParser, fieldsWithSpaces)  	//                         0123 456 789012 3 456 789  	//                         0           1  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "a", 0, 2); -	assertFieldStart(reader, false, 3, 4); -	assertCommand(reader, "b", 4, 6); -	assertCommand(reader, "c", 7, 9); -	assertFieldEnd(reader, 9, 10); -	assertFieldStart(reader, false, 16, 17); -	assertCommand(reader, "d", 17, 19); -	assertFieldEnd(reader, 19, 20); -	assertEnd(reader, 20, 20); +	assertCommand(parser, "a", 0, 2); +	assertEmptyData(parser); +	assertFieldStart(parser, false, 3, 4); +	assertCommand(parser, "b", 4, 6); +	assertEmptyData(parser); +	assertCommand(parser, "c", 7, 9); +	assertFieldEnd(parser, 9, 10); +	assertEmptyData(parser); +	assertFieldStart(parser, false, 16, 17); +	assertCommand(parser, "d", 17, 19); +	assertFieldEnd(parser, 19, 20); +	assertEnd(parser, 20, 20);  } -TEST(OsmlStreamParser, errorNoFieldToStart) +TEST(OsmlStreamParser, errorEndButOpenField)  {  	const char *testString = "\\a b {";  	//                         012345  	//                         0  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset(); -	assertCommand(reader, "a", 0, 2); -	assertData(reader, "b", 3, 4); +	assertCommand(parser, "a", 0, 2); +	assertData(parser, "b", 3, 4); +	assertFieldStart(parser, false, 5, 6);  	ASSERT_FALSE(logger.hasError()); -	assertEnd(reader, 6, 6); +	assertEnd(parser, 6, 6);  	ASSERT_TRUE(logger.hasError());  } @@ -618,13 +705,13 @@ TEST(OsmlStreamParser, errorNoFieldToEnd)  	//                         0  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset(); -	assertCommand(reader, "a", 0, 2); -	assertData(reader, "b", 3, 4); +	assertCommand(parser, "a", 0, 2); +	assertData(parser, "b", 3, 4);  	ASSERT_FALSE(logger.hasError()); -	assertEnd(reader, 6, 6); +	assertEnd(parser, 6, 6);  	ASSERT_TRUE(logger.hasError());  } @@ -635,17 +722,17 @@ TEST(OsmlStreamParser, errorNoFieldEndNested)  	//                         0          1  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset(); -	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, false, 5, 6); -	assertCommand(reader, "test2", 6, 12); -	assertFieldStart(reader, false, 12, 13); -	assertFieldEnd(reader, 13, 14); -	assertFieldEnd(reader, 14, 15); +	assertCommand(parser, "test", 0, 5); +	assertFieldStart(parser, false, 5, 6); +	assertCommand(parser, "test2", 6, 12); +	assertFieldStart(parser, false, 12, 13); +	assertFieldEnd(parser, 13, 14); +	assertFieldEnd(parser, 14, 15);  	ASSERT_FALSE(logger.hasError()); -	assertEnd(reader, 16, 16); +	assertEnd(parser, 16, 16);  	ASSERT_TRUE(logger.hasError());  } @@ -656,18 +743,18 @@ TEST(OsmlStreamParser, errorNoFieldEndNestedData)  	//                         0          1  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset(); -	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, false, 5, 6); -	assertCommand(reader, "test2", 6, 12); -	assertFieldStart(reader, false, 12, 13); -	assertFieldEnd(reader, 13, 14); -	assertFieldEnd(reader, 14, 15); -	assertData(reader, "a", 15, 16); +	assertCommand(parser, "test", 0, 5); +	assertFieldStart(parser, false, 5, 6); +	assertCommand(parser, "test2", 6, 12); +	assertFieldStart(parser, false, 12, 13); +	assertFieldEnd(parser, 13, 14); +	assertFieldEnd(parser, 14, 15); +	assertData(parser, "a", 15, 16);  	ASSERT_FALSE(logger.hasError()); -	assertEnd(reader, 17, 17); +	assertEnd(parser, 17, 17);  	ASSERT_TRUE(logger.hasError());  } @@ -678,12 +765,11 @@ TEST(OsmlStreamParser, beginEnd)  	//                         0         1          2  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "book", 7, 11); -	assertFieldStart(reader, true, 12, 13); -	assertFieldEnd(reader, 17, 21); -	assertEnd(reader, 22, 22); +	assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); +	assertRangeEnd(parser, 17, 21); +	assertEnd(parser, 22, 22);  }  TEST(OsmlStreamParser, beginEndWithName) @@ -693,12 +779,11 @@ TEST(OsmlStreamParser, beginEndWithName)  	//                         0         1          2  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "book", {{"name", "a"}}, 7, 11); -	assertFieldStart(reader, true, 14, 15); -	assertFieldEnd(reader, 19, 23); -	assertEnd(reader, 24, 24); +	assertCommandStart(parser, "book", true, {{"name", "a"}}, 7, 11); +	assertRangeEnd(parser, 19, 23); +	assertEnd(parser, 24, 24);  }  TEST(OsmlStreamParser, beginEndWithNameAndArgs) @@ -708,13 +793,13 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgs)  	//                         0         1         2           3          4  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "book", -	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); -	assertFieldStart(reader, true, 32, 33); -	assertFieldEnd(reader, 37, 41); -	assertEnd(reader, 42, 42); +	assertCommandStart(parser, "book", true, +	                   {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, +	                   11); +	assertRangeEnd(parser, 37, 41); +	assertEnd(parser, 42, 42);  }  TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields) @@ -725,23 +810,23 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields)  	//    0         1         2           3          4          5          6  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); - -	assertCommand(reader, "book", -	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); -	assertFieldStart(reader, false, 32, 33); -	assertData(reader, "a", 33, 34); -	assertCommand(reader, "test", Variant::mapType{}, 35, 40); -	assertFieldEnd(reader, 40, 41); -	assertFieldStart(reader, false, 41, 42); -	assertData(reader, "b", 42, 43); -	assertCommand(reader, "test", Variant::mapType{}, 44, 49); -	assertFieldStart(reader, false, 49, 50); -	assertFieldEnd(reader, 50, 51); -	assertFieldEnd(reader, 51, 52); -	assertFieldStart(reader, true, 52, 53); -	assertFieldEnd(reader, 57, 61); -	assertEnd(reader, 62, 62); +	OsmlStreamParser parser(charReader, logger); + +	assertCommandStart(parser, "book", true, +	                   {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, +	                   11); +	assertFieldStart(parser, false, 32, 33); +	assertData(parser, "a", 33, 34); +	assertCommand(parser, "test", 35, 40); +	assertFieldEnd(parser, 40, 41); +	assertFieldStart(parser, false, 41, 42); +	assertData(parser, "b", 42, 43); +	assertCommand(parser, "test", 44, 49); +	assertFieldStart(parser, false, 49, 50); +	assertFieldEnd(parser, 50, 51); +	assertFieldEnd(parser, 51, 52); +	assertRangeEnd(parser, 57, 61); +	assertEnd(parser, 62, 62);  }  TEST(OsmlStreamParser, beginEndWithData) @@ -751,13 +836,12 @@ TEST(OsmlStreamParser, beginEndWithData)  	//                         0         1          2  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "book", 7, 11); -	assertFieldStart(reader, true, 12, 13); -	assertData(reader, "a", 12, 13); -	assertFieldEnd(reader, 18, 22); -	assertEnd(reader, 23, 23); +	assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); +	assertData(parser, "a", 12, 13); +	assertRangeEnd(parser, 18, 22); +	assertEnd(parser, 23, 23);  }  TEST(OsmlStreamParser, beginEndNested) @@ -768,29 +852,32 @@ TEST(OsmlStreamParser, beginEndNested)  	//    0         1          2         3           4          5  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); - -	assertCommand(reader, "a", 7, 8); -	assertFieldStart(reader, false, 9, 10); -	assertData(reader, "b", 10, 11); -	assertFieldEnd(reader, 11, 12); -	assertFieldStart(reader, true, 13, 14); -	assertData(reader, "c", 13, 14); -	assertCommand(reader, "d", 22, 23); -	assertFieldStart(reader, false, 24, 25); -	assertData(reader, "e", 25, 26); -	assertFieldEnd(reader, 26, 27); -	assertFieldStart(reader, false, 27, 28); -	assertData(reader, "f", 28, 29); -	assertFieldEnd(reader, 29, 30); -	assertFieldStart(reader, true, 31, 32); -	assertCommand(reader, "g", 31, 33); -	assertFieldStart(reader, false, 33, 34); -	assertData(reader, "h", 34, 35); -	assertFieldEnd(reader, 35, 36); -	assertFieldEnd(reader, 42, 43); -	assertFieldEnd(reader, 49, 50); -	assertEnd(reader, 51, 51); +	OsmlStreamParser parser(charReader, logger); + +	assertCommandStart(parser, "a", true, Variant::mapType{}, 7, 8); +	assertFieldStart(parser, false, 9, 10); +	assertData(parser, "b", 10, 11); +	assertFieldEnd(parser, 11, 12); + +	assertData(parser, "c", 13, 14); + +	assertCommandStart(parser, "d", true, Variant::mapType{}, 22, 23); +	assertFieldStart(parser, false, 24, 25); +	assertData(parser, "e", 25, 26); +	assertFieldEnd(parser, 26, 27); +	assertFieldStart(parser, false, 27, 28); +	assertData(parser, "f", 28, 29); +	assertFieldEnd(parser, 29, 30); + +	assertEmptyData(parser); +	assertCommand(parser, "g", 31, 33); +	assertFieldStart(parser, false, 33, 34); +	assertData(parser, "h", 34, 35); +	assertFieldEnd(parser, 35, 36); +	assertEmptyData(parser); +	assertRangeEnd(parser, 42, 43); +	assertRangeEnd(parser, 49, 50); +	assertEnd(parser, 51, 51);  }  TEST(OsmlStreamParser, beginEndWithCommand) @@ -800,16 +887,75 @@ TEST(OsmlStreamParser, beginEndWithCommand)  	//                         0         1           2  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "book", 7, 11); -	assertFieldStart(reader, true, 12, 13); -	assertCommand(reader, "a", 12, 14); -	assertFieldStart(reader, false, 14, 15); -	assertData(reader, "test", 15, 19); -	assertFieldEnd(reader, 19, 20); -	assertFieldEnd(reader, 25, 29); -	assertEnd(reader, 30, 30); +	assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); +	assertCommand(parser, "a", 12, 14); +	assertFieldStart(parser, false, 14, 15); +	assertData(parser, "test", 15, 19); +	assertFieldEnd(parser, 19, 20); +	assertRangeEnd(parser, 25, 29); +	assertEnd(parser, 30, 30); +} + +TEST(OsmlStreamParser, beginEndNestedFields) +{ +	const char *testString = "\\begin{book}a{{b{c}}}\\end{book}"; +	//                         012345678901234567890 1234567890 +	//                         0         1         2          3 +	CharReader charReader(testString); +	OsmlStreamParser parser(charReader, logger); +	logger.reset(); + +	assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); +	assertData(parser, "a", 12, 13); +	assertFieldStart(parser, false, 13, 14); +	assertFieldStart(parser, false, 14, 15); +	assertData(parser, "b", 15, 16); +	assertFieldStart(parser, false, 16, 17); +	assertData(parser, "c", 17, 18); +	assertFieldEnd(parser, 18, 19); +	assertFieldEnd(parser, 19, 20); +	assertFieldEnd(parser, 20, 21); +	assertRangeEnd(parser, 26, 30); +	assertEnd(parser, 31, 31); +} + +TEST(OsmlStreamParser, errorBeginEndUnbalancedNestedFields) +{ +	const char *testString = "\\begin{book}a{{b{c}}\\end{book}"; +	//                         012345678901234567890 123456789 +	//                         0         1         2 +	CharReader charReader(testString); +	OsmlStreamParser parser(charReader, logger); +	logger.reset(); + +	assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11); +	assertData(parser, "a", 12, 13); +	assertFieldStart(parser, false, 13, 14); +	assertFieldStart(parser, false, 14, 15); +	assertData(parser, "b", 15, 16); +	assertFieldStart(parser, false, 16, 17); +	assertData(parser, "c", 17, 18); +	assertFieldEnd(parser, 18, 19); +	assertFieldEnd(parser, 19, 20); +	ASSERT_THROW(assertRangeEnd(parser, 25, 29), LoggableException); +} + +TEST(OsmlStreamParser, errorBeginEndUnbalancedFields) +{ +	const char *testString = "{a"; +	//                        01 +	CharReader charReader(testString); +	OsmlStreamParser parser(charReader, logger); + +	logger.reset(); + +	assertFieldStart(parser, false, 0, 1); +	assertTextData(parser, "a", 1, 2, 1, 2, WhitespaceMode::PRESERVE); +	ASSERT_FALSE(logger.hasError()); +	assertEnd(parser, 2, 2); +	ASSERT_TRUE(logger.hasError());  }  TEST(OsmlStreamParser, errorBeginNoBraceOpen) @@ -818,12 +964,13 @@ TEST(OsmlStreamParser, errorBeginNoBraceOpen)  	//                         01234567  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	assertData(reader, "a", 7, 8); +	assertData(parser, "a", 7, 8);  	ASSERT_TRUE(logger.hasError()); +	assertEnd(parser, 8, 8);  }  TEST(OsmlStreamParser, errorBeginNoIdentifier) @@ -831,11 +978,11 @@ TEST(OsmlStreamParser, errorBeginNoIdentifier)  	const char *testString = "\\begin{!";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	ASSERT_THROW(reader.parse(), LoggableException); +	ASSERT_THROW(parser.parse(), LoggableException);  	ASSERT_TRUE(logger.hasError());  } @@ -844,11 +991,11 @@ TEST(OsmlStreamParser, errorBeginNoBraceClose)  	const char *testString = "\\begin{a";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	ASSERT_THROW(reader.parse(), LoggableException); +	ASSERT_THROW(parser.parse(), LoggableException);  	ASSERT_TRUE(logger.hasError());  } @@ -857,15 +1004,15 @@ TEST(OsmlStreamParser, errorBeginNoName)  	const char *testString = "\\begin{a#}";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	assertCommand(reader, "a"); +	assertCommandStart(parser, "a", true);  	ASSERT_TRUE(logger.hasError());  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	assertEnd(reader); +	assertEnd(parser);  	ASSERT_TRUE(logger.hasError());  } @@ -875,11 +1022,11 @@ TEST(OsmlStreamParser, errorEndNoBraceOpen)  	//                         012345  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	assertData(reader, "a", 5, 6); +	assertData(parser, "a", 5, 6);  	ASSERT_TRUE(logger.hasError());  } @@ -888,11 +1035,11 @@ TEST(OsmlStreamParser, errorEndNoIdentifier)  	const char *testString = "\\end{!";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	ASSERT_THROW(reader.parse(), LoggableException); +	ASSERT_THROW(parser.parse(), LoggableException);  	ASSERT_TRUE(logger.hasError());  } @@ -901,11 +1048,11 @@ TEST(OsmlStreamParser, errorEndNoBraceClose)  	const char *testString = "\\end{a";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	ASSERT_THROW(reader.parse(), LoggableException); +	ASSERT_THROW(parser.parse(), LoggableException);  	ASSERT_TRUE(logger.hasError());  } @@ -914,11 +1061,11 @@ TEST(OsmlStreamParser, errorEndNoBegin)  	const char *testString = "\\end{a}";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	ASSERT_THROW(reader.parse(), LoggableException); +	ASSERT_THROW(parser.parse(), LoggableException);  	ASSERT_TRUE(logger.hasError());  } @@ -929,16 +1076,15 @@ TEST(OsmlStreamParser, errorBeginEndMismatch)  	//                         0          1         2          3  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset(); -	assertCommand(reader, "a", 7, 8); -	assertFieldStart(reader, true, 10, 11); -	assertCommand(reader, "b", 17, 18); -	assertFieldStart(reader, true, 20, 24); -	assertData(reader, "test", 20, 24); +	assertCommandStart(parser, "a", true, Variant::mapType{}, 7, 8); +	assertEmptyData(parser); +	assertCommandStart(parser, "b", true, Variant::mapType{}, 17, 18); +	assertData(parser, "test", 20, 24);  	ASSERT_FALSE(logger.hasError()); -	ASSERT_THROW(reader.parse(), LoggableException); +	ASSERT_THROW(parser.parse(), LoggableException);  	ASSERT_TRUE(logger.hasError());  } @@ -948,10 +1094,10 @@ TEST(OsmlStreamParser, commandWithNSSep)  	//                         012345678901  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "test1:test2", 0, 12); -	assertEnd(reader, 12, 12); +	assertCommand(parser, "test1:test2", 0, 12); +	assertEnd(parser, 12, 12);  }  TEST(OsmlStreamParser, beginEndWithNSSep) @@ -961,12 +1107,11 @@ TEST(OsmlStreamParser, beginEndWithNSSep)  	//                         0         1          2         3  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "test1:test2", 7, 18); -	assertFieldStart(reader, true, 19, 20); -	assertFieldEnd(reader, 24, 35); -	assertEnd(reader, 36, 36); +	assertCommandStart(parser, "test1:test2", true, Variant::mapType{}, 7, 18); +	assertRangeEnd(parser, 24, 35); +	assertEnd(parser, 36, 36);  }  TEST(OsmlStreamParser, errorBeginNSSep) @@ -974,15 +1119,14 @@ TEST(OsmlStreamParser, errorBeginNSSep)  	const char *testString = "\\begin:test{blub}\\end{blub}";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	assertCommand(reader, "blub"); +	assertCommandStart(parser, "blub", true, Variant::mapType{});  	ASSERT_TRUE(logger.hasError()); -	assertFieldStart(reader, true); -	assertFieldEnd(reader); -	assertEnd(reader); +	assertRangeEnd(parser); +	assertEnd(parser);  }  TEST(OsmlStreamParser, errorEndNSSep) @@ -990,15 +1134,14 @@ TEST(OsmlStreamParser, errorEndNSSep)  	const char *testString = "\\begin{blub}\\end:test{blub}";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset(); -	assertCommand(reader, "blub"); -	assertFieldStart(reader, true); +	assertCommandStart(parser, "blub", true, Variant::mapType{});  	ASSERT_FALSE(logger.hasError()); -	assertFieldEnd(reader); +	assertRangeEnd(parser);  	ASSERT_TRUE(logger.hasError()); -	assertEnd(reader); +	assertEnd(parser);  }  TEST(OsmlStreamParser, errorEmptyNs) @@ -1006,14 +1149,14 @@ TEST(OsmlStreamParser, errorEmptyNs)  	const char *testString = "\\test:";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	assertCommand(reader, "test"); +	assertCommand(parser, "test");  	ASSERT_TRUE(logger.hasError()); -	assertData(reader, ":"); -	assertEnd(reader); +	assertData(parser, ":"); +	assertEnd(parser);  }  TEST(OsmlStreamParser, errorRepeatedNs) @@ -1021,14 +1164,14 @@ TEST(OsmlStreamParser, errorRepeatedNs)  	const char *testString = "\\test::";  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	assertCommand(reader, "test"); +	assertCommand(parser, "test");  	ASSERT_TRUE(logger.hasError()); -	assertData(reader, "::"); -	assertEnd(reader); +	assertData(parser, "::"); +	assertEnd(parser);  }  TEST(OsmlStreamParser, explicitDefaultField) @@ -1037,14 +1180,14 @@ TEST(OsmlStreamParser, explicitDefaultField)  	//                         01234567  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "a", 0, 2); -	assertFieldStart(reader, true, 2, 4); -	assertData(reader, "b", 4, 5); -	assertFieldEnd(reader, 5, 6); -	assertData(reader, "c", 6, 7); -	assertEnd(reader, 7, 7); +	assertCommand(parser, "a", 0, 2); +	assertFieldStart(parser, true, 2, 4); +	assertData(parser, "b", 4, 5); +	assertFieldEnd(parser, 5, 6); +	assertData(parser, "c", 6, 7); +	assertEnd(parser, 7, 7);  }  TEST(OsmlStreamParser, explicitDefaultFieldWithCommand) @@ -1053,33 +1196,33 @@ TEST(OsmlStreamParser, explicitDefaultFieldWithCommand)  	//                         0123 4567  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertCommand(reader, "a", 0, 2); -	assertFieldStart(reader, true, 2, 4); -	assertCommand(reader, "b", 4, 6); -	assertFieldEnd(reader, 6, 7); -	assertData(reader, "c", 7, 8); -	assertEnd(reader, 8, 8); +	assertCommand(parser, "a", 0, 2); +	assertFieldStart(parser, true, 2, 4); +	assertCommand(parser, "b", 4, 6); +	assertFieldEnd(parser, 6, 7); +	assertData(parser, "c", 7, 8); +	assertEnd(parser, 8, 8);  } -TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField) +TEST(OsmlStreamParser, fieldAfterExplicitDefaultField)  {  	const char *testString = "\\a{!\\b}{c}";  	//                         0123 456789  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset(); -	assertCommand(reader, "a", 0, 2); -	assertFieldStart(reader, true, 2, 4); -	assertCommand(reader, "b", 4, 6); -	assertFieldEnd(reader, 6, 7); -	ASSERT_FALSE(logger.hasError()); -	assertData(reader, "c", 8, 9); -	ASSERT_TRUE(logger.hasError()); -	assertEnd(reader, 10, 10); +	assertCommand(parser, "a", 0, 2); +	assertFieldStart(parser, true, 2, 4); +	assertCommand(parser, "b", 4, 6); +	assertFieldEnd(parser, 6, 7); +	assertFieldStart(parser, false, 7, 8); +	assertData(parser, "c", 8, 9); +	assertFieldEnd(parser, 9, 10); +	assertEnd(parser, 10, 10);  }  TEST(OsmlStreamParser, annotationStart) @@ -1089,10 +1232,10 @@ TEST(OsmlStreamParser, annotationStart)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); -	assertEnd(reader, 3, 3); +	assertAnnotationStart(parser, "a", Variant::mapType{}, 0, 3); +	assertEnd(parser, 3, 3);  }  TEST(OsmlStreamParser, annotationStartWithName) @@ -1103,11 +1246,11 @@ TEST(OsmlStreamParser, annotationStartWithName)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertAnnotationStart(reader, "annotationWithName", +	assertAnnotationStart(parser, "annotationWithName",  	                      Variant::mapType{{"name", "aName"}}, 0, 20); -	assertEnd(reader, 26, 26); +	assertEnd(parser, 26, 26);  }  TEST(OsmlStreamParser, annotationStartWithArguments) @@ -1118,12 +1261,12 @@ TEST(OsmlStreamParser, annotationStartWithArguments)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	assertAnnotationStart( -	    reader, "annotationWithName", +	    parser, "annotationWithName",  	    Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20); -	assertEnd(reader, 35, 35); +	assertEnd(parser, 35, 35);  }  TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd) @@ -1134,16 +1277,16 @@ TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	assertAnnotationStart( -	    reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8, +	    parser, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,  	    10); -	assertFieldStart(reader, true, 26, 27); -	assertData(reader, "a", 26, 27); -	assertFieldEnd(reader, 33, 35); -	assertAnnotationEnd(reader, "", "", 36, 38); -	assertEnd(reader, 38, 38); +	ASSERT_TRUE(parser.inRangeCommand()); +	assertData(parser, "a", 26, 27); +	assertRangeEnd(parser, 33, 35); +	assertAnnotationEnd(parser, "", "", 36, 38); +	assertEnd(parser, 38, 38);  }  TEST(OsmlStreamParser, annotationEnd) @@ -1153,10 +1296,10 @@ TEST(OsmlStreamParser, annotationEnd)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertAnnotationEnd(reader, "a", "", 0, 2); -	assertEnd(reader, 3, 3); +	assertAnnotationEnd(parser, "a", "", 0, 2); +	assertEnd(parser, 3, 3);  }  TEST(OsmlStreamParser, annotationEndWithName) @@ -1166,10 +1309,10 @@ TEST(OsmlStreamParser, annotationEndWithName)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertAnnotationEnd(reader, "a", "name", 0, 2); -	assertEnd(reader, 8, 8); +	assertAnnotationEnd(parser, "a", "name", 0, 2); +	assertEnd(parser, 8, 8);  }  TEST(OsmlStreamParser, annotationEndWithNameAsArgs) @@ -1179,10 +1322,10 @@ TEST(OsmlStreamParser, annotationEndWithNameAsArgs)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertAnnotationEnd(reader, "a", "name", 0, 2); -	assertEnd(reader, 14, 14); +	assertAnnotationEnd(parser, "a", "name", 0, 2); +	assertEnd(parser, 14, 14);  }  TEST(OsmlStreamParser, errorAnnotationEndWithArguments) @@ -1193,14 +1336,15 @@ TEST(OsmlStreamParser, errorAnnotationEndWithArguments)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); -	assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2); +	assertCommandStart(parser, "a", false, Variant::mapType{{"foo", "bar"}}, 0, +	                   2);  	ASSERT_TRUE(logger.hasError()); -	assertData(reader, ">", 11, 12); -	assertEnd(reader, 12, 12); +	assertData(parser, ">", 11, 12); +	assertEnd(parser, 12, 12);  }  TEST(OsmlStreamParser, closingAnnotation) @@ -1210,11 +1354,11 @@ TEST(OsmlStreamParser, closingAnnotation)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); -	assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); -	assertData(reader, ">", 3, 4); -	assertEnd(reader, 4, 4); +	assertAnnotationStart(parser, "a", Variant::mapType{}, 0, 3); +	assertData(parser, ">", 3, 4); +	assertEnd(parser, 4, 4);  }  TEST(OsmlStreamParser, annotationWithFields) @@ -1225,23 +1369,23 @@ TEST(OsmlStreamParser, annotationWithFields)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); - -	assertData(reader, "a", 0, 1); -	assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5); -	assertFieldStart(reader, false, 5, 6); -	assertData(reader, "c", 6, 7); -	assertFieldEnd(reader, 7, 8); -	assertFieldStart(reader, false, 8, 9); -	assertData(reader, "d", 9, 10); -	assertFieldEnd(reader, 10, 11); -	assertFieldStart(reader, true, 11, 13); -	assertData(reader, "e", 13, 14); -	assertFieldEnd(reader, 14, 15); -	assertData(reader, "f", 16, 17); -	assertAnnotationEnd(reader, "", "", 18, 20); -	assertData(reader, "g", 21, 22); -	assertEnd(reader, 22, 22); +	OsmlStreamParser parser(charReader, logger); + +	assertData(parser, "a", 0, 1); +	assertAnnotationStart(parser, "b", Variant::mapType{}, 2, 5); +	assertFieldStart(parser, false, 5, 6); +	assertData(parser, "c", 6, 7); +	assertFieldEnd(parser, 7, 8); +	assertFieldStart(parser, false, 8, 9); +	assertData(parser, "d", 9, 10); +	assertFieldEnd(parser, 10, 11); +	assertFieldStart(parser, true, 11, 13); +	assertData(parser, "e", 13, 14); +	assertFieldEnd(parser, 14, 15); +	assertData(parser, "f", 16, 17); +	assertAnnotationEnd(parser, "", "", 18, 20); +	assertData(parser, "g", 21, 22); +	assertEnd(parser, 22, 22);  }  TEST(OsmlStreamParser, annotationStartEscape) @@ -1252,10 +1396,44 @@ TEST(OsmlStreamParser, annotationStartEscape)  	CharReader charReader(testString); -	OsmlStreamParser reader(charReader, logger); +	OsmlStreamParser parser(charReader, logger); + +	assertData(parser, "<%test", 0, 7); +	assertEnd(parser, 7, 7); +} -	assertData(reader, "<%test", 0, 7); -	assertEnd(reader, 7, 7); +TEST(OsmlStreamParser, userDefinedTokens) +{ +	const char *testString = "<<My dear fellows>>, the *old man* said."; +	//                        0123456789012345678901234567890123456789 +	//                        0         1         2         3 + +	CharReader charReader(testString); + +	OsmlStreamParser parser(charReader, logger); + +	TokenId tSpeechStart = parser.registerToken("<<"); +	TokenId tSpeechEnd = parser.registerToken(">>"); +	TokenId tStar = parser.registerToken("*"); + +	ASSERT_TRUE(tSpeechStart != Tokens::Empty); +	ASSERT_TRUE(tSpeechEnd != Tokens::Empty); +	ASSERT_TRUE(tStar != Tokens::Empty); + +	TokenSet tokens{tSpeechStart, tSpeechEnd, tStar}; + +	ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse()); +	TokenizedDataReader reader = parser.getData().reader(); + +	assertToken(reader, tSpeechStart, "<<", tokens, WhitespaceMode::PRESERVE, 0, 2); +	assertText(reader, "My dear fellows", tokens, WhitespaceMode::PRESERVE, 2, 17); +	assertToken(reader, tSpeechEnd, ">>", tokens, WhitespaceMode::PRESERVE, 17, 19); +	assertText(reader, ", the ", tokens, WhitespaceMode::PRESERVE, 19, 25); +	assertToken(reader, tStar, "*", tokens, WhitespaceMode::PRESERVE, 25, 26); +	assertText(reader, "old man", tokens, WhitespaceMode::PRESERVE, 26, 33); +	assertToken(reader, tStar, "*", tokens, WhitespaceMode::PRESERVE, 33, 34); +	assertText(reader, " said.", tokens, WhitespaceMode::PRESERVE, 34, 40); +	assertEnd(reader);  }  } diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp index 3293370..d4e9443 100644 --- a/test/formats/osxml/OsxmlEventParserTest.cpp +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -21,6 +21,7 @@  #include <core/frontend/TerminalLogger.hpp>  #include <core/common/CharReader.hpp>  #include <core/common/Variant.hpp> +#include <core/parser/utils/TokenizedData.hpp>  #include <formats/osxml/OsxmlEventParser.hpp> @@ -31,10 +32,10 @@ static TerminalLogger logger(std::cerr, true);  namespace {  enum class OsxmlEvent { -	COMMAND, +	COMMAND_START,  	ANNOTATION_START,  	ANNOTATION_END, -	FIELD_END, +	RANGE_END,  	DATA  }; @@ -42,9 +43,10 @@ class TestOsxmlEventListener : public OsxmlEvents {  public:  	std::vector<std::pair<OsxmlEvent, Variant>> events; -	void command(const Variant &name, const Variant::mapType &args) override +	void commandStart(const Variant &name, +	                  const Variant::mapType &args) override  	{ -		events.emplace_back(OsxmlEvent::COMMAND, +		events.emplace_back(OsxmlEvent::COMMAND_START,  		                    Variant::arrayType{name, args});  	} @@ -62,25 +64,30 @@ public:  		                    Variant::arrayType{className, elementName});  	} -	void fieldEnd() override +	void rangeEnd() override  	{ -		events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{}); +		events.emplace_back(OsxmlEvent::RANGE_END, Variant::arrayType{});  	} -	void data(const Variant &data) override +	void data(const TokenizedData &data) override  	{ -		events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data}); +		Token token; +		Variant text; +		TokenizedDataReader reader = data.reader(); +		reader.read(token, TokenSet{}, WhitespaceMode::PRESERVE); +		EXPECT_EQ(Tokens::Data, token.id); +		text = Variant::fromString(token.content); +		text.setLocation(token.getLocation()); +		events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{text});  	}  };  static std::vector<std::pair<OsxmlEvent, Variant>> parseXml( -    const char *testString, -    WhitespaceMode whitespaceMode = WhitespaceMode::TRIM) +    const char *testString)  {  	TestOsxmlEventListener listener;  	CharReader reader(testString);  	OsxmlEventParser parser(reader, listener, logger); -	parser.setWhitespaceMode(whitespaceMode);  	parser.parse();  	return listener.events;  } @@ -93,11 +100,11 @@ TEST(OsxmlEventParser, simpleCommandWithArgs)  	//                        0          1            2            3  	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ -	    {OsxmlEvent::COMMAND, +	    {OsxmlEvent::COMMAND_START,  	     Variant::arrayType{  	         "a", Variant::mapType{  	                  {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}}, -	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; +	    {OsxmlEvent::RANGE_END, Variant::arrayType{}}};  	auto events = parseXml(testString);  	ASSERT_EQ(expectedEvents, events); @@ -133,10 +140,12 @@ TEST(OsxmlEventParser, magicTopLevelTag)  	const char *testString = "<ousia><a/><b/></ousia>";  	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ -	    {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, -	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}, -	    {OsxmlEvent::COMMAND, Variant::arrayType{{"b", Variant::mapType{}}}}, -	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; +	    {OsxmlEvent::COMMAND_START, +	     Variant::arrayType{{"a", Variant::mapType{}}}}, +	    {OsxmlEvent::RANGE_END, Variant::arrayType{}}, +	    {OsxmlEvent::COMMAND_START, +	     Variant::arrayType{{"b", Variant::mapType{}}}}, +	    {OsxmlEvent::RANGE_END, Variant::arrayType{}}};  	auto events = parseXml(testString);  	ASSERT_EQ(expectedEvents, events); @@ -147,71 +156,35 @@ TEST(OsxmlEventParser, magicTopLevelTagInside)  	const char *testString = "<a><ousia/></a>";  	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ -	    {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, -	    {OsxmlEvent::COMMAND, +	    {OsxmlEvent::COMMAND_START, +	     Variant::arrayType{{"a", Variant::mapType{}}}}, +	    {OsxmlEvent::COMMAND_START,  	     Variant::arrayType{{"ousia", Variant::mapType{}}}}, -	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}, -	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; +	    {OsxmlEvent::RANGE_END, Variant::arrayType{}}, +	    {OsxmlEvent::RANGE_END, Variant::arrayType{}}};  	auto events = parseXml(testString);  	ASSERT_EQ(expectedEvents, events);  } -TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) +TEST(OsxmlEventParser, commandWithData)  {  	const char *testString = "<a>  hello  \n world </a>";  	//                        012345678901 234567890123  	//                        0         1          2  	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ -	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, +	    {OsxmlEvent::COMMAND_START, +	     Variant::arrayType{"a", Variant::mapType{}}},  	    {OsxmlEvent::DATA, Variant::arrayType{"  hello  \n world "}}, -	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; +	    {OsxmlEvent::RANGE_END, Variant::arrayType{}}}; -	auto events = parseXml(testString, WhitespaceMode::PRESERVE); +	auto events = parseXml(testString);  	ASSERT_EQ(expectedEvents, events);  	// Check the location of the text  	ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart());  	ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd());  } - -TEST(OsxmlEventParser, commandWithDataTrimWhitespace) -{ -	const char *testString = "<a>  hello  \n world </a>"; -	//                        012345678901 234567890123 -	//                        0         1          2 - -	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ -	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, -	    {OsxmlEvent::DATA, Variant::arrayType{"hello  \n world"}}, -	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; - -	auto events = parseXml(testString, WhitespaceMode::TRIM); -	ASSERT_EQ(expectedEvents, events); - -	// Check the location of the text -	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); -	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); -} - -TEST(OsxmlEventParser, commandWithDataCollapseWhitespace) -{ -	const char *testString = "<a>  hello  \n world </a>"; -	//                        012345678901 234567890123 -	//                        0         1          2 - -	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ -	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, -	    {OsxmlEvent::DATA, Variant::arrayType{"hello world"}}, -	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; - -	auto events = parseXml(testString, WhitespaceMode::COLLAPSE); -	ASSERT_EQ(expectedEvents, events); - -	// Check the location of the text -	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); -	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); -}  } | 
