diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-25 23:09:26 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-25 23:09:26 +0100 |
commit | 84c9abc3e9762c4486ddc5ca0352a5d697a51987 (patch) | |
tree | b95db6ab2c2c6c2fba430218411a4ddf1d31b19f /test/core | |
parent | 8891dea26a1653a003b4171155e155d3aa6689ae (diff) |
start of branch, commit log will be rewritten
Diffstat (limited to 'test/core')
-rw-r--r-- | test/core/parser/stack/StackTest.cpp | 15 | ||||
-rw-r--r-- | test/core/parser/utils/TokenizedDataTest.cpp | 602 | ||||
-rw-r--r-- | test/core/parser/utils/TokenizerTest.cpp | 248 |
3 files changed, 366 insertions, 499 deletions
diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp index a93f14a..83966d5 100644 --- a/test/core/parser/stack/StackTest.cpp +++ b/test/core/parser/stack/StackTest.cpp @@ -24,6 +24,7 @@ #include <core/parser/stack/Handler.hpp> #include <core/parser/stack/Stack.hpp> #include <core/parser/stack/State.hpp> +#include <core/parser/utils/TokenizedData.hpp> #include <core/StandaloneEnvironment.hpp> @@ -53,7 +54,7 @@ struct Tracker { Variant::mapType annotationStartArgs; Variant annotationEndClassName; Variant annotationEndElementName; - Variant dataData; + TokenizedData dataData; bool startResult; bool fieldStartSetIsDefault; @@ -81,7 +82,7 @@ struct Tracker { annotationStartArgs = Variant::mapType{}; annotationEndClassName = Variant::fromString(std::string{}); annotationEndElementName = Variant::fromString(std::string{}); - dataData = Variant::fromString(std::string{}); + dataData = TokenizedData(); startResult = true; fieldStartSetIsDefault = false; @@ -157,7 +158,7 @@ public: return tracker.annotationEndResult; } - bool data(Variant &data) override + bool data(TokenizedData &data) override { tracker.dataCount++; tracker.dataData = data; @@ -363,7 +364,7 @@ TEST(Stack, multipleFields) s.data("test"); tracker.expect(1, 0, 1, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc - EXPECT_EQ("test", tracker.dataData); + EXPECT_EQ("test", tracker.dataData.text().asString()); s.fieldEnd(); tracker.expect(1, 0, 1, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc @@ -375,7 +376,7 @@ TEST(Stack, multipleFields) s.data("test2"); tracker.expect(1, 0, 2, 1, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc - EXPECT_EQ("test2", tracker.dataData); + EXPECT_EQ("test2", tracker.dataData.text().asString()); s.fieldEnd(); tracker.expect(1, 0, 2, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc @@ -387,7 +388,7 @@ TEST(Stack, multipleFields) s.data("test3"); tracker.expect(1, 0, 3, 2, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc - EXPECT_EQ("test3", tracker.dataData); + EXPECT_EQ("test3", tracker.dataData.text().asString()); s.fieldEnd(); tracker.expect(1, 0, 3, 3, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc @@ -744,4 +745,4 @@ TEST(Stack, fieldAfterDefaultField) ASSERT_FALSE(logger.hasError()); } } -}
\ No newline at end of file +} diff --git a/test/core/parser/utils/TokenizedDataTest.cpp b/test/core/parser/utils/TokenizedDataTest.cpp index 231bad9..dfe2526 100644 --- a/test/core/parser/utils/TokenizedDataTest.cpp +++ b/test/core/parser/utils/TokenizedDataTest.cpp @@ -22,6 +22,43 @@ namespace ousia { +void assertToken(TokenizedDataReader &reader, TokenId id, + const std::string &text, const TokenSet &tokens = TokenSet{}, + WhitespaceMode mode = WhitespaceMode::TRIM, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset, + SourceId sourceId = InvalidSourceId) +{ + Token token; + ASSERT_TRUE(reader.read(token, tokens, mode)); + EXPECT_EQ(id, token.id); + EXPECT_EQ(text, token.content); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, token.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, token.getLocation().getEnd()); + } + EXPECT_EQ(sourceId, token.getLocation().getSourceId()); +} + +void assertText(TokenizedDataReader &reader, const std::string &text, + const TokenSet &tokens = TokenSet{}, + WhitespaceMode mode = WhitespaceMode::TRIM, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset, + SourceId id = InvalidSourceId) +{ + assertToken(reader, Tokens::Data, text, tokens, mode, start, end, id); +} + +void assertEnd(TokenizedDataReader &reader) +{ + Token token; + ASSERT_TRUE(reader.atEnd()); + ASSERT_FALSE(reader.read(token)); +} + TEST(TokenizedData, dataWhitespacePreserve) { TokenizedData data; @@ -29,15 +66,10 @@ TEST(TokenizedData, dataWhitespacePreserve) // 0123456789012345 // 0 1 - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" test1 test2 ", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(16U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, " test1 test2 ", TokenSet{}, WhitespaceMode::PRESERVE, + 0, 16); + assertEnd(reader); } TEST(TokenizedData, dataWhitespaceTrim) @@ -47,15 +79,10 @@ TEST(TokenizedData, dataWhitespaceTrim) // 0123456789012345 // 0 1 - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("test1 test2", token.content); - EXPECT_EQ(1U, token.getLocation().getStart()); - EXPECT_EQ(14U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "test1 test2", TokenSet{}, WhitespaceMode::TRIM, 1, + 14); + assertEnd(reader); } TEST(TokenizedData, dataWhitespaceCollapse) @@ -65,15 +92,10 @@ TEST(TokenizedData, dataWhitespaceCollapse) // 0123456789012345 // 0 1 - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("test1 test2", token.content); - EXPECT_EQ(1U, token.getLocation().getStart()); - EXPECT_EQ(14U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "test1 test2", TokenSet{}, WhitespaceMode::COLLAPSE, 1, + 14); + assertEnd(reader); } TEST(TokenizedData, singleToken) @@ -82,17 +104,9 @@ TEST(TokenizedData, singleToken) ASSERT_EQ(2U, data.append("$$")); data.mark(5, 0, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); + assertEnd(reader); } TEST(TokenizedData, singleDisabledToken) @@ -101,15 +115,9 @@ TEST(TokenizedData, singleDisabledToken) ASSERT_EQ(2U, data.append("$$")); data.mark(5, 0, 2); - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "$$", TokenSet{}, WhitespaceMode::COLLAPSE, 0, 2); + assertEnd(reader); } TEST(TokenizedData, dualToken) @@ -120,18 +128,10 @@ TEST(TokenizedData, dualToken) data.mark(5, 0, 2); data.mark(6, 1, 1); - data.enableToken(5); - data.enableToken(6); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5, 6}, WhitespaceMode::COLLAPSE, 0, + 2); + assertEnd(reader); } TEST(TokenizedData, dualTokenShorterEnabled) @@ -142,385 +142,281 @@ TEST(TokenizedData, dualTokenShorterEnabled) data.mark(5, 0, 2); data.mark(6, 1, 1); - data.enableToken(6); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(6U, token.id); - EXPECT_EQ("$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(1U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(6U, token.id); - EXPECT_EQ("$", token.content); - EXPECT_EQ(1U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 6, "$", TokenSet{6}, WhitespaceMode::COLLAPSE, 0, 1); + assertToken(reader, 6, "$", TokenSet{6}, WhitespaceMode::COLLAPSE, 1, 2); + assertEnd(reader); } TEST(TokenizedData, dualTokenLongerEnabled) { TokenizedData data; ASSERT_EQ(2U, data.append("$$")); + data.mark(6, 0, 1); data.mark(5, 0, 2); + data.mark(6, 1, 1); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); + assertEnd(reader); } TEST(TokenizedData, tokensAndDataPreserveWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ test $$")); - // 0123456789 + ASSERT_EQ(18U, data.append("$$ test text $$")); + // 012345678901234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" test ", token.content); - EXPECT_EQ(2U, token.getLocation().getStart()); - EXPECT_EQ(8U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 0, 2); + assertText(reader, " test text ", TokenSet{5}, WhitespaceMode::PRESERVE, + 2, 16); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 16, 18); + assertEnd(reader); } TEST(TokenizedData, tokensAndDataTrimWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ test $$")); - // 0123456789 + ASSERT_EQ(18U, data.append("$$ test text $$")); + // 012345678901234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("test", token.content); - EXPECT_EQ(3U, token.getLocation().getStart()); - EXPECT_EQ(7U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 0, 2); + assertText(reader, "test text", TokenSet{5}, WhitespaceMode::TRIM, 3, + 15); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 16, 18); + assertEnd(reader); } TEST(TokenizedData, tokensAndDataCollapseWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ test $$")); - // 0123456789 + ASSERT_EQ(18U, data.append("$$ test text $$")); + // 012345678901234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("test", token.content); - EXPECT_EQ(3U, token.getLocation().getStart()); - EXPECT_EQ(7U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); + assertText(reader, "test text", TokenSet{5}, WhitespaceMode::COLLAPSE, 3, + 15); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 16, 18); + assertEnd(reader); } TEST(TokenizedData, tokensAndWhitespacePreserveWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ $$")); - // 0123456789 + ASSERT_EQ(8U, data.append("$$ $$")); + // 01234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" ", token.content); - EXPECT_EQ(2U, token.getLocation().getStart()); - EXPECT_EQ(8U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 0, 2); + assertText(reader, " ", TokenSet{5}, WhitespaceMode::PRESERVE, 2, 6); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 6, 8); + assertEnd(reader); } TEST(TokenizedData, tokensAndWhitespaceTrimWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ $$")); - // 0123456789 + ASSERT_EQ(8U, data.append("$$ $$")); + // 01234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 0, 2); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 6, 8); + assertEnd(reader); } TEST(TokenizedData, tokensAndWhitespaceCollapseWhitespace) { TokenizedData data; - ASSERT_EQ(10U, data.append("$$ $$")); - // 0123456789 + ASSERT_EQ(8U, data.append("$$ $$")); + // 01234567 data.mark(5, 0, 2); data.mark(5, 2); - data.enableToken(5); - - Token token; - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(8U, token.getLocation().getStart()); - EXPECT_EQ(10U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); + TokenizedDataReader reader = data.reader(); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2); + assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 6, 8); + assertEnd(reader); } -TEST(TokenizedData, textPreserveWhitespace) +TEST(TokenizedData, appendChars) { TokenizedData data; - ASSERT_EQ(6U, data.append(" $$ ")); - // 012345 - data.mark(5, 2, 2); - - data.enableToken(5); + ASSERT_EQ(1U, data.append('t', 5, 7)); + ASSERT_EQ(2U, data.append('e', 7, 8)); + ASSERT_EQ(3U, data.append('s', 8, 10)); + ASSERT_EQ(4U, data.append('t', 10, 12)); - Token token; - ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" ", token.content); - EXPECT_EQ(0U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(2U, token.getLocation().getStart()); - EXPECT_EQ(4U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" ", token.content); - EXPECT_EQ(4U, token.getLocation().getStart()); - EXPECT_EQ(6U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.text(token, WhitespaceMode::PRESERVE)); - ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "test", TokenSet{5}, WhitespaceMode::COLLAPSE, 5, 12); + assertEnd(reader); } -TEST(TokenizedData, textTrimWhitespace) +TEST(TokenizedData, protectedWhitespace) { TokenizedData data; - ASSERT_EQ(6U, data.append(" $$ ")); - // 012345 - data.mark(5, 2, 2); + ASSERT_EQ(4U, data.append("test", 10)); + ASSERT_EQ(11U, data.append(" test", 14, true)); - data.enableToken(5); - - Token token; - ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM)); - - ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(2U, token.getLocation().getStart()); - EXPECT_EQ(4U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); + TokenizedDataReader reader = data.reader(); + assertText(reader, "test test", TokenSet{5}, WhitespaceMode::COLLAPSE, 10, + 21); + assertEnd(reader); +} - ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM)); - ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM)); +TEST(TokenizedData, specialNewlineToken) +{ + TokenizedData data; + data.append("a\nb\n \nc\n"); + // 0 12 3456 78 9 + + const TokenSet tokens{Tokens::Newline}; + + TokenizedDataReader reader = data.reader(); + assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 1, 2); + assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 2, 3); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 3, 4); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 7, 8); + assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 8, 9); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 9, 10); + assertEnd(reader); } -TEST(TokenizedData, textCollapseWhitespace) +TEST(TokenizedData, specialParagraphToken) { TokenizedData data; - ASSERT_EQ(6U, data.append(" $$ ")); - // 012345 - data.mark(5, 2, 2); + data.append("a\nb\n \nc\n"); + // 0 12 3456 78 9 - data.enableToken(5); + const TokenSet tokens{Tokens::Paragraph}; - Token token; - ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "a b", tokens, WhitespaceMode::COLLAPSE, 0, 3); + assertToken(reader, Tokens::Paragraph, "\n \n", tokens, + WhitespaceMode::COLLAPSE, 3, 8); + assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 8, 9); + assertEnd(reader); +} - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(5U, token.id); - EXPECT_EQ("$$", token.content); - EXPECT_EQ(2U, token.getLocation().getStart()); - EXPECT_EQ(4U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); +TEST(TokenizedData, specialSectionToken) +{ + TokenizedData data; + data.append("a\nb\n \n \t \n"); + // 0 12 3456 789 01 2 + // 0 1 + + const TokenSet tokens{Tokens::Section}; - ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); - ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); + TokenizedDataReader reader = data.reader(); + assertText(reader, "a b", tokens, WhitespaceMode::COLLAPSE, 0, 3); + assertToken(reader, Tokens::Section, "\n \n \t \n", tokens, + WhitespaceMode::COLLAPSE, 3, 13); + assertEnd(reader); } -TEST(TokenizedData, appendChars) +TEST(TokenizedData, specialTokenPrecedence) { TokenizedData data; - ASSERT_EQ(1U, data.append('t', 5, 7)); - ASSERT_EQ(2U, data.append('e', 7, 8)); - ASSERT_EQ(3U, data.append('s', 8, 10)); - ASSERT_EQ(4U, data.append('t', 10, 12)); + data.append("a\nb\n\nc\n\n\nd"); + // 0 12 3 45 6 7 89 + + const TokenSet tokens{Tokens::Newline, Tokens::Paragraph, Tokens::Section}; + + TokenizedDataReader reader = data.reader(); + assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 1, 2); + assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 2, 3); + assertToken(reader, Tokens::Paragraph, "\n\n", tokens, + WhitespaceMode::COLLAPSE, 3, 5); + assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 5, 6); + assertToken(reader, Tokens::Section, "\n\n\n", tokens, + WhitespaceMode::COLLAPSE, 6, 9); + assertText(reader, "d", tokens, WhitespaceMode::COLLAPSE, 9, 10); + assertEnd(reader); +} - Token token; - ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("test", token.content); - EXPECT_EQ(5U, token.getLocation().getStart()); - EXPECT_EQ(12U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); - ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE)); +TEST(TokenizedData, specialTokenPrecedence2) +{ + TokenizedData data; + data.append("\nb\n\nc\n\n\n"); + // 0 12 3 45 6 7 + + const TokenSet tokens{Tokens::Newline, Tokens::Paragraph, Tokens::Section}; + + TokenizedDataReader reader = data.reader(); + assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE, + 0, 1); + assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 1, 2); + assertToken(reader, Tokens::Paragraph, "\n\n", tokens, + WhitespaceMode::COLLAPSE, 2, 4); + assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 4, 5); + assertToken(reader, Tokens::Section, "\n\n\n", tokens, + WhitespaceMode::COLLAPSE, 5, 8); + assertEnd(reader); } -TEST(TokenizedData, copy) +TEST(TokenizedData, specialTokenIndent) { TokenizedData data; - ASSERT_EQ(7U, data.append(" a $ b ")); - // 0123456 - data.mark(6, 3, 1); - data.enableToken(6); + data.append(" test\n\ttest2\n test3 \ttest4\ntest5"); + // 01234567 8 901234 5678901234567890 123456 789012 + // 0 1 2 3 4 + const TokenSet tokens{Tokens::Indent, Tokens::Dedent}; + + TokenizedDataReader reader = data.reader(); + assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, + 4, 4); + assertText(reader, "test", tokens, WhitespaceMode::COLLAPSE, 4, 8); + assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, + 10, 10); + assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37); + assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE, + 38, 38); + assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43); + assertEnd(reader); +} - Token token; - ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("a", token.content); - EXPECT_EQ(1U, token.getLocation().getStart()); - EXPECT_EQ(2U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE)); - - TokenizedData dataCopy = data; - - ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(6U, token.id); - EXPECT_EQ("$", token.content); - EXPECT_EQ(3U, token.getLocation().getStart()); - EXPECT_EQ(4U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(dataCopy.next(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(6U, token.id); - EXPECT_EQ("$", token.content); - EXPECT_EQ(3U, token.getLocation().getStart()); - EXPECT_EQ(4U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - - ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ(" b ", token.content); - EXPECT_EQ(4U, token.getLocation().getStart()); - EXPECT_EQ(7U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - ASSERT_FALSE(data.next(token)); - - ASSERT_TRUE(dataCopy.text(token, WhitespaceMode::COLLAPSE)); - EXPECT_EQ(Tokens::Data, token.id); - EXPECT_EQ("b", token.content); - EXPECT_EQ(5U, token.getLocation().getStart()); - EXPECT_EQ(6U, token.getLocation().getEnd()); - EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId()); - ASSERT_FALSE(dataCopy.next(token)); +TEST(TokenizedData, specialTokenIndentOverlap) +{ + TokenizedData data; + data.append(" test\n\ttest2\n test3 \ttest4\ntest5"); + // 01234567 8 901234 5678901234567890 123456 789012 + // 0 1 2 3 4 + const TokenSet tokens{Tokens::Indent, Tokens::Dedent, 5}; + + data.mark(5, 4, 4); + + TokenizedDataReader reader = data.reader(); + assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, + 4, 4); + assertToken(reader, 5, "test", tokens, WhitespaceMode::COLLAPSE, 4, 8); + assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE, + 10, 10); + assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37); + assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE, + 38, 38); + assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43); + assertEnd(reader); } + } diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp index 3809a12..0f2bfb7 100644 --- a/test/core/parser/utils/TokenizerTest.cpp +++ b/test/core/parser/utils/TokenizerTest.cpp @@ -20,6 +20,7 @@ #include <core/common/CharReader.hpp> #include <core/parser/utils/Tokenizer.hpp> +#include <core/parser/utils/TokenizedData.hpp> namespace ousia { @@ -31,23 +32,40 @@ TEST(Tokenizer, tokenRegistration) ASSERT_EQ(0U, tokenizer.registerToken("a")); ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("a")); - ASSERT_EQ("a", tokenizer.getTokenString(0U)); + ASSERT_EQ("a", tokenizer.lookupToken(0U).string); ASSERT_EQ(1U, tokenizer.registerToken("b")); ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("b")); - ASSERT_EQ("b", tokenizer.getTokenString(1U)); + ASSERT_EQ("b", tokenizer.lookupToken(1U).string); ASSERT_EQ(2U, tokenizer.registerToken("c")); ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("c")); - ASSERT_EQ("c", tokenizer.getTokenString(2U)); + ASSERT_EQ("c", tokenizer.lookupToken(2U).string); ASSERT_TRUE(tokenizer.unregisterToken(1U)); ASSERT_FALSE(tokenizer.unregisterToken(1U)); - ASSERT_EQ("", tokenizer.getTokenString(1U)); + ASSERT_EQ("", tokenizer.lookupToken(1U).string); ASSERT_EQ(1U, tokenizer.registerToken("d")); ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("d")); - ASSERT_EQ("d", tokenizer.getTokenString(1U)); + ASSERT_EQ("d", tokenizer.lookupToken(1U).string); +} + +void expectData(const std::string &expected, SourceOffset tokenStart, + SourceOffset tokenEnd, SourceOffset textStart, + SourceOffset textEnd, const Token &token, TokenizedData &data, + WhitespaceMode mode = WhitespaceMode::PRESERVE) +{ + ASSERT_EQ(Tokens::Data, token.id); + + Variant text = data.text(mode); + ASSERT_TRUE(text.isString()); + + EXPECT_EQ(expected, text.asString()); + EXPECT_EQ(tokenStart, token.location.getStart()); + EXPECT_EQ(tokenEnd, token.location.getEnd()); + EXPECT_EQ(textStart, text.getLocation().getStart()); + EXPECT_EQ(textEnd, text.getLocation().getEnd()); } TEST(Tokenizer, textTokenPreserveWhitespace) @@ -56,36 +74,34 @@ TEST(Tokenizer, textTokenPreserveWhitespace) CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::PRESERVE}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ(" this \t is only a \n\n test text ", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(36U, loc.getEnd()); + expectData(" this \t is only a \n\n test text ", 0, 36, 0, 36, + token, data, WhitespaceMode::PRESERVE); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } { CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::PRESERVE}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("this \t is only a \n\n test text", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); + expectData("this \t is only a \n\n test text", 0, 32, 0, 32, + token, data, WhitespaceMode::PRESERVE); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } } @@ -95,36 +111,34 @@ TEST(Tokenizer, textTokenTrimWhitespace) CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::TRIM}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("this \t is only a \n\n test text", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(33U, loc.getEnd()); + expectData("this \t is only a \n\n test text", 0, 36, 1, 33, token, + data, WhitespaceMode::TRIM); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } { CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::TRIM}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("this \t is only a \n\n test text", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); + expectData("this \t is only a \n\n test text", 0, 32, 0, 32, + token, data, WhitespaceMode::TRIM); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } } @@ -134,36 +148,34 @@ TEST(Tokenizer, textTokenCollapseWhitespace) CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::COLLAPSE}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("this is only a test text", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(33U, loc.getEnd()); + expectData("this is only a test text", 0, 36, 1, 33, token, data, + WhitespaceMode::COLLAPSE); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } { CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - Tokenizer tokenizer{WhitespaceMode::COLLAPSE}; + Tokenizer tokenizer; Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("this is only a test text", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); + expectData("this is only a test text", 0, 32, 0, 32, token, data, + WhitespaceMode::COLLAPSE); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } } @@ -177,14 +189,12 @@ TEST(Tokenizer, simpleReadToken) { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test1", token.content); - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); + expectData("test1", 0, 5, 0, 5, token, data); char c; ASSERT_TRUE(reader.peek(c)); @@ -193,7 +203,8 @@ TEST(Tokenizer, simpleReadToken) { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); ASSERT_EQ(tid, token.id); ASSERT_EQ(":", token.content); @@ -209,14 +220,10 @@ TEST(Tokenizer, simpleReadToken) { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test2", token.content); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); + expectData("test2", 6, 11, 6, 11, token, data); char c; ASSERT_FALSE(reader.peek(c)); @@ -233,21 +240,17 @@ TEST(Tokenizer, simplePeekToken) { Token token; - ASSERT_TRUE(tokenizer.peek(reader, token)); - - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test1", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); + TokenizedData data; + ASSERT_TRUE(tokenizer.peek(reader, token, data)); + expectData("test1", 0, 5, 0, 5, token, data); ASSERT_EQ(0U, reader.getOffset()); ASSERT_EQ(5U, reader.getPeekOffset()); } { Token token; - ASSERT_TRUE(tokenizer.peek(reader, token)); + TokenizedData data; + ASSERT_TRUE(tokenizer.peek(reader, token, data)); ASSERT_EQ(tid, token.id); ASSERT_EQ(":", token.content); @@ -261,35 +264,26 @@ TEST(Tokenizer, simplePeekToken) { Token token; - ASSERT_TRUE(tokenizer.peek(reader, token)); - - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); + TokenizedData data; + ASSERT_TRUE(tokenizer.peek(reader, token, data)); + expectData("test2", 6, 11, 6, 11, token, data); ASSERT_EQ(0U, reader.getOffset()); ASSERT_EQ(11U, reader.getPeekOffset()); } { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test1", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + expectData("test1", 0, 5, 0, 5, token, data); ASSERT_EQ(5U, reader.getOffset()); ASSERT_EQ(5U, reader.getPeekOffset()); } { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); ASSERT_EQ(tid, token.id); ASSERT_EQ(":", token.content); @@ -303,14 +297,9 @@ TEST(Tokenizer, simplePeekToken) { Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + expectData("test2", 6, 11, 6, 11, token, data); ASSERT_EQ(11U, reader.getOffset()); ASSERT_EQ(11U, reader.getPeekOffset()); } @@ -320,6 +309,7 @@ TEST(Tokenizer, ambiguousTokens) { CharReader reader{"abc"}; Tokenizer tokenizer; + TokenizedData data; TokenId t1 = tokenizer.registerToken("abd"); TokenId t2 = tokenizer.registerToken("bc"); @@ -328,16 +318,17 @@ TEST(Tokenizer, ambiguousTokens) ASSERT_EQ(1U, t2); Token token; - ASSERT_TRUE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_TRUE(tokenizer.read(reader, token, data)); - ASSERT_EQ(Tokens::Data, token.id); - ASSERT_EQ("a", token.content); + expectData("a", 0, 1, 0, 1, token, data); SourceLocation loc = token.location; ASSERT_EQ(0U, loc.getStart()); ASSERT_EQ(1U, loc.getEnd()); - ASSERT_TRUE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_TRUE(tokenizer.read(reader, token, data)); ASSERT_EQ(t2, token.id); ASSERT_EQ("bc", token.content); @@ -346,7 +337,8 @@ TEST(Tokenizer, ambiguousTokens) ASSERT_EQ(1U, loc.getStart()); ASSERT_EQ(3U, loc.getEnd()); - ASSERT_FALSE(tokenizer.read(reader, token)); + data.clear(); + ASSERT_FALSE(tokenizer.read(reader, token, data)); } TEST(Tokenizer, commentTestWhitespacePreserve) @@ -354,7 +346,7 @@ TEST(Tokenizer, commentTestWhitespacePreserve) CharReader reader{"Test/Test /* Block Comment */", 0}; // 012345678901234567890123456789 // 0 1 2 - Tokenizer tokenizer(WhitespaceMode::PRESERVE); + Tokenizer tokenizer; const TokenId t1 = tokenizer.registerToken("/"); const TokenId t2 = tokenizer.registerToken("/*"); @@ -370,45 +362,23 @@ TEST(Tokenizer, commentTestWhitespacePreserve) Token t; for (auto &te : expected) { - EXPECT_TRUE(tokenizer.read(reader, t)); + TokenizedData data(0); + EXPECT_TRUE(tokenizer.read(reader, t, data)); EXPECT_EQ(te.id, t.id); - EXPECT_EQ(te.content, t.content); + if (te.id != Tokens::Data) { + EXPECT_EQ(te.content, t.content); + } else { + Variant text = data.text(WhitespaceMode::PRESERVE); + ASSERT_TRUE(text.isString()); + EXPECT_EQ(te.content, text.asString()); + } EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); EXPECT_EQ(te.location.getStart(), t.location.getStart()); EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); } - ASSERT_FALSE(tokenizer.read(reader, t)); -} - -TEST(Tokenizer, commentTestWhitespaceCollapse) -{ - CharReader reader{"Test/Test /* Block Comment */", 0}; - // 012345678901234567890123456789 - // 0 1 2 - Tokenizer tokenizer(WhitespaceMode::COLLAPSE); - const TokenId t1 = tokenizer.registerToken("/"); - const TokenId t2 = tokenizer.registerToken("/*"); - const TokenId t3 = tokenizer.registerToken("*/"); - - std::vector<Token> expected = { - {Tokens::Data, "Test", SourceLocation{0, 0, 4}}, - {t1, "/", SourceLocation{0, 4, 5}}, - {Tokens::Data, "Test", SourceLocation{0, 5, 9}}, - {t2, "/*", SourceLocation{0, 10, 12}}, - {Tokens::Data, "Block Comment", SourceLocation{0, 13, 26}}, - {t3, "*/", SourceLocation{0, 27, 29}}}; - - Token t; - for (auto &te : expected) { - EXPECT_TRUE(tokenizer.read(reader, t)); - EXPECT_EQ(te.id, t.id); - EXPECT_EQ(te.content, t.content); - EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); - EXPECT_EQ(te.location.getStart(), t.location.getStart()); - EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); - } - ASSERT_FALSE(tokenizer.read(reader, t)); + TokenizedData data; + ASSERT_FALSE(tokenizer.read(reader, t, data)); } } |