From 4854509f8add1e2ff167623fb0e8d4216d9d6023 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 8 Feb 2015 17:54:27 +0100 Subject: Implemented DynamicTokenizer and unit tests --- test/plugins/plain/DynamicTokenizerTest.cpp | 416 ++++++++++++++++++++++++++++ 1 file changed, 416 insertions(+) (limited to 'test/plugins/plain/DynamicTokenizerTest.cpp') diff --git a/test/plugins/plain/DynamicTokenizerTest.cpp b/test/plugins/plain/DynamicTokenizerTest.cpp index e69de29..63fa466 100644 --- a/test/plugins/plain/DynamicTokenizerTest.cpp +++ b/test/plugins/plain/DynamicTokenizerTest.cpp @@ -0,0 +1,416 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include + +namespace ousia { + +TEST(DynamicTokenizer, tokenRegistration) +{ + CharReader reader{"test"}; + DynamicTokenizer tokenizer{reader}; + + ASSERT_EQ(EmptyToken, tokenizer.registerToken("")); + + ASSERT_EQ(0U, tokenizer.registerToken("a")); + ASSERT_EQ(EmptyToken, tokenizer.registerToken("a")); + ASSERT_EQ("a", tokenizer.getTokenString(0U)); + + ASSERT_EQ(1U, tokenizer.registerToken("b")); + ASSERT_EQ(EmptyToken, tokenizer.registerToken("b")); + ASSERT_EQ("b", tokenizer.getTokenString(1U)); + + ASSERT_EQ(2U, tokenizer.registerToken("c")); + ASSERT_EQ(EmptyToken, tokenizer.registerToken("c")); + ASSERT_EQ("c", tokenizer.getTokenString(2U)); + + ASSERT_TRUE(tokenizer.unregisterToken(1U)); + ASSERT_FALSE(tokenizer.unregisterToken(1U)); + ASSERT_EQ("", tokenizer.getTokenString(1U)); + + ASSERT_EQ(1U, tokenizer.registerToken("d")); + ASSERT_EQ(EmptyToken, tokenizer.registerToken("d")); + ASSERT_EQ("d", tokenizer.getTokenString(1U)); +} + +TEST(DynamicTokenizer, textTokenPreserveWhitespace) +{ + { + CharReader reader{" this \t is only a \n\n test text "}; + // 012345 6789012345678 9 0123456789012345 + // 0 1 2 3 + DynamicTokenizer tokenizer{reader, WhitespaceMode::PRESERVE}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(" this \t is only a \n\n test text ", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(36U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(token)); + } + + { + CharReader reader{"this \t is only a \n\n test text"}; + // 01234 5678901234567 8 9012345678901 + // 0 1 2 3 + DynamicTokenizer tokenizer{reader, WhitespaceMode::PRESERVE}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("this \t is only a \n\n test text", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(32U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(token)); + } +} + +TEST(DynamicTokenizer, textTokenTrimWhitespace) +{ + { + CharReader reader{" this \t is only a \n\n test text "}; + // 012345 6789012345678 9 0123456789012345 + // 0 1 2 3 + DynamicTokenizer tokenizer{reader, WhitespaceMode::TRIM}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("this \t is only a \n\n test text", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(33U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(token)); + } + + { + CharReader reader{"this \t is only a \n\n test text"}; + // 01234 5678901234567 8 9012345678901 + // 0 1 2 3 + DynamicTokenizer tokenizer{reader, WhitespaceMode::TRIM}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("this \t is only a \n\n test text", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(32U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(token)); + } +} + +TEST(DynamicTokenizer, textTokenCollapseWhitespace) +{ + { + CharReader reader{" this \t is only a \n\n test text "}; + // 012345 6789012345678 9 0123456789012345 + // 0 1 2 3 + DynamicTokenizer tokenizer{reader, WhitespaceMode::COLLAPSE}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("this is only a test text", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(33U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(token)); + } + + { + CharReader reader{"this \t is only a \n\n test text"}; + // 01234 5678901234567 8 9012345678901 + // 0 1 2 3 + DynamicTokenizer tokenizer{reader, WhitespaceMode::COLLAPSE}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("this is only a test text", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(32U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(token)); + } +} + +TEST(DynamicTokenizer, simpleReadToken) +{ + CharReader reader{"test1:test2"}; + DynamicTokenizer tokenizer{reader}; + + const TokenTypeId tid = tokenizer.registerToken(":"); + ASSERT_EQ(0U, tid); + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test1", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + + char c; + ASSERT_TRUE(reader.peek(c)); + ASSERT_EQ(':', c); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + + ASSERT_EQ(tid, token.type); + ASSERT_EQ(":", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(5U, loc.getStart()); + ASSERT_EQ(6U, loc.getEnd()); + + char c; + ASSERT_TRUE(reader.peek(c)); + ASSERT_EQ('t', c); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test2", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(6U, loc.getStart()); + ASSERT_EQ(11U, loc.getEnd()); + + char c; + ASSERT_FALSE(reader.peek(c)); + } +} + +TEST(DynamicTokenizer, simplePeekToken) +{ + CharReader reader{"test1:test2"}; + DynamicTokenizer tokenizer{reader}; + + const TokenTypeId tid = tokenizer.registerToken(":"); + ASSERT_EQ(0U, tid); + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.peek(token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test1", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + ASSERT_EQ(0U, reader.getOffset()); + ASSERT_EQ(5U, reader.getPeekOffset()); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.peek(token)); + + ASSERT_EQ(tid, token.type); + ASSERT_EQ(":", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(5U, loc.getStart()); + ASSERT_EQ(6U, loc.getEnd()); + ASSERT_EQ(0U, reader.getOffset()); + ASSERT_EQ(6U, reader.getPeekOffset()); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.peek(token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test2", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(6U, loc.getStart()); + ASSERT_EQ(11U, loc.getEnd()); + ASSERT_EQ(0U, reader.getOffset()); + ASSERT_EQ(11U, reader.getPeekOffset()); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test1", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + ASSERT_EQ(5U, reader.getOffset()); + ASSERT_EQ(5U, reader.getPeekOffset()); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + + ASSERT_EQ(tid, token.type); + ASSERT_EQ(":", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(5U, loc.getStart()); + ASSERT_EQ(6U, loc.getEnd()); + ASSERT_EQ(6U, reader.getOffset()); + ASSERT_EQ(6U, reader.getPeekOffset()); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test2", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(6U, loc.getStart()); + ASSERT_EQ(11U, loc.getEnd()); + ASSERT_EQ(11U, reader.getOffset()); + ASSERT_EQ(11U, reader.getPeekOffset()); + } +} + +TEST(DynamicTokenizer, ambiguousTokens) +{ + CharReader reader{"abc"}; + DynamicTokenizer tokenizer(reader); + + TokenTypeId t1 = tokenizer.registerToken("abd"); + TokenTypeId t2 = tokenizer.registerToken("bc"); + + ASSERT_EQ(0U, t1); + ASSERT_EQ(1U, t2); + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("a", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(1U, loc.getEnd()); + + ASSERT_TRUE(tokenizer.read(token)); + + ASSERT_EQ(t2, token.type); + ASSERT_EQ("bc", token.content); + + loc = token.location; + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(3U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(token)); +} + +TEST(DynamicTokenizer, commentTestWhitespacePreserve) +{ + CharReader reader{"Test/Test /* Block Comment */", 0}; + // 012345678901234567890123456789 + // 0 1 2 + DynamicTokenizer tokenizer(reader, WhitespaceMode::PRESERVE); + + const TokenTypeId t1 = tokenizer.registerToken("/"); + const TokenTypeId t2 = tokenizer.registerToken("/*"); + const TokenTypeId t3 = tokenizer.registerToken("*/"); + + std::vector expected = { + {TextToken, "Test", SourceLocation{0, 0, 4}}, + {t1, "/", SourceLocation{0, 4, 5}}, + {TextToken, "Test ", SourceLocation{0, 5, 10}}, + {t2, "/*", SourceLocation{0, 10, 12}}, + {TextToken, " Block Comment ", SourceLocation{0, 12, 27}}, + {t3, "*/", SourceLocation{0, 27, 29}}}; + + DynamicToken t; + for (auto &te : expected) { + EXPECT_TRUE(tokenizer.read(t)); + EXPECT_EQ(te.type, t.type); + EXPECT_EQ(te.content, t.content); + EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); + EXPECT_EQ(te.location.getStart(), t.location.getStart()); + EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); + } + ASSERT_FALSE(tokenizer.read(t)); +} + +TEST(DynamicTokenizer, commentTestWhitespaceCollapse) +{ + CharReader reader{"Test/Test /* Block Comment */", 0}; + // 012345678901234567890123456789 + // 0 1 2 + DynamicTokenizer tokenizer(reader, WhitespaceMode::COLLAPSE); + + const TokenTypeId t1 = tokenizer.registerToken("/"); + const TokenTypeId t2 = tokenizer.registerToken("/*"); + const TokenTypeId t3 = tokenizer.registerToken("*/"); + + std::vector expected = { + {TextToken, "Test", SourceLocation{0, 0, 4}}, + {t1, "/", SourceLocation{0, 4, 5}}, + {TextToken, "Test", SourceLocation{0, 5, 9}}, + {t2, "/*", SourceLocation{0, 10, 12}}, + {TextToken, "Block Comment", SourceLocation{0, 13, 26}}, + {t3, "*/", SourceLocation{0, 27, 29}}}; + + DynamicToken t; + for (auto &te : expected) { + EXPECT_TRUE(tokenizer.read(t)); + EXPECT_EQ(te.type, t.type); + EXPECT_EQ(te.content, t.content); + EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); + EXPECT_EQ(te.location.getStart(), t.location.getStart()); + EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); + } + ASSERT_FALSE(tokenizer.read(t)); +} + +} + -- cgit v1.2.3 From f713b1d393230e7083727d457623fdac878eb248 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 8 Feb 2015 18:48:07 +0100 Subject: DynamicTokenizer now gets the reader as a parameter to read and peek -- the beauty of this tokenizer is that it has no internal state depending on the reader, so it doesn't need to hold a reference to it --- src/plugins/plain/DynamicTokenizer.cpp | 35 ++++++------- src/plugins/plain/DynamicTokenizer.hpp | 22 ++++---- test/plugins/plain/DynamicTokenizerTest.cpp | 81 ++++++++++++++--------------- 3 files changed, 67 insertions(+), 71 deletions(-) (limited to 'test/plugins/plain/DynamicTokenizerTest.cpp') diff --git a/src/plugins/plain/DynamicTokenizer.cpp b/src/plugins/plain/DynamicTokenizer.cpp index a8f2317..f2cfcd1 100644 --- a/src/plugins/plain/DynamicTokenizer.cpp +++ b/src/plugins/plain/DynamicTokenizer.cpp @@ -345,14 +345,13 @@ public: /* Class DynamicTokenizer */ -DynamicTokenizer::DynamicTokenizer(CharReader &reader, - WhitespaceMode whitespaceMode) - : reader(reader), whitespaceMode(whitespaceMode), nextTokenTypeId(0) +DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode) + : whitespaceMode(whitespaceMode), nextTokenTypeId(0) { } template -bool DynamicTokenizer::next(DynamicToken &token) +bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token) { // If we're in the read mode, reset the char reader peek position to the // current read position @@ -437,28 +436,28 @@ bool DynamicTokenizer::next(DynamicToken &token) return match.hasMatch(); } -bool DynamicTokenizer::read(DynamicToken &token) +bool DynamicTokenizer::read(CharReader &reader,DynamicToken &token) { switch (whitespaceMode) { case WhitespaceMode::PRESERVE: - return next(token); + return next(reader, token); case WhitespaceMode::TRIM: - return next(token); + return next(reader, token); case WhitespaceMode::COLLAPSE: - return next(token); + return next(reader, token); } return false; } -bool DynamicTokenizer::peek(DynamicToken &token) +bool DynamicTokenizer::peek(CharReader &reader,DynamicToken &token) { switch (whitespaceMode) { case WhitespaceMode::PRESERVE: - return next(token); + return next(reader, token); case WhitespaceMode::TRIM: - return next(token); + return next(reader, token); case WhitespaceMode::COLLAPSE: - return next(token); + return next(reader, token); } return false; } @@ -530,16 +529,16 @@ WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; } /* Explicitly instantiate all possible instantiations of the "next" member function */ template bool DynamicTokenizer::next( - DynamicToken &token); + CharReader &reader, DynamicToken &token); template bool DynamicTokenizer::next( - DynamicToken &token); + CharReader &reader, DynamicToken &token); template bool DynamicTokenizer::next( - DynamicToken &token); + CharReader &reader,DynamicToken &token); template bool DynamicTokenizer::next( - DynamicToken &token); + CharReader &reader,DynamicToken &token); template bool DynamicTokenizer::next( - DynamicToken &token); + CharReader &reader,DynamicToken &token); template bool DynamicTokenizer::next( - DynamicToken &token); + CharReader &reader,DynamicToken &token); } diff --git a/src/plugins/plain/DynamicTokenizer.hpp b/src/plugins/plain/DynamicTokenizer.hpp index 760bebf..0b4dd39 100644 --- a/src/plugins/plain/DynamicTokenizer.hpp +++ b/src/plugins/plain/DynamicTokenizer.hpp @@ -118,11 +118,6 @@ enum class WhitespaceMode { */ class DynamicTokenizer { private: - /** - * CharReader instance from which the tokens should be read. - */ - CharReader &reader; - /** * Internally used token trie. This object holds all registered tokens. */ @@ -151,23 +146,22 @@ private: * @tparam TextHandler is the type to be used for the textHandler instance. * @tparam read specifies whether the function should start from and advance * the read pointer of the char reader. + * @param reader is the CharReader instance from which the data should be + * read. * @param token is the token structure into which the token information * should be written. * @return false if the end of the stream has been reached, true otherwise. */ template - bool next(DynamicToken &token); + bool next(CharReader &reader, DynamicToken &token); public: /** * Constructor of the DynamicTokenizer class. * - * @param reader is the CharReader that should be used for reading the - * tokens. * @param whitespaceMode specifies how whitespace should be handled. */ - DynamicTokenizer(CharReader &reader, - WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); + DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * Registers the given string as a token. Returns a const pointer at a @@ -222,23 +216,27 @@ public: * Reads a new token from the CharReader and stores it in the given * DynamicToken instance. * + * @param reader is the CharReader instance from which the data should be + * read. * @param token is a reference at the token instance into which the Token * information should be written. * @return true if a token could be read, false if the end of the stream * has been reached. */ - bool read(DynamicToken &token); + bool read(CharReader &reader, DynamicToken &token); /** * The peek method does not advance the read position of the char reader, * but reads the next token from the current char reader peek position. * + * @param reader is the CharReader instance from which the data should be + * read. * @param token is a reference at the token instance into which the Token * information should be written. * @return true if a token could be read, false if the end of the stream * has been reached. */ - bool peek(DynamicToken &token); + bool peek(CharReader &reader, DynamicToken &token); }; } diff --git a/test/plugins/plain/DynamicTokenizerTest.cpp b/test/plugins/plain/DynamicTokenizerTest.cpp index 63fa466..5183fdd 100644 --- a/test/plugins/plain/DynamicTokenizerTest.cpp +++ b/test/plugins/plain/DynamicTokenizerTest.cpp @@ -25,8 +25,7 @@ namespace ousia { TEST(DynamicTokenizer, tokenRegistration) { - CharReader reader{"test"}; - DynamicTokenizer tokenizer{reader}; + DynamicTokenizer tokenizer; ASSERT_EQ(EmptyToken, tokenizer.registerToken("")); @@ -57,10 +56,10 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace) CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - DynamicTokenizer tokenizer{reader, WhitespaceMode::PRESERVE}; + DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ(" this \t is only a \n\n test text ", token.content); @@ -68,17 +67,17 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace) ASSERT_EQ(0U, loc.getStart()); ASSERT_EQ(36U, loc.getEnd()); - ASSERT_FALSE(tokenizer.read(token)); + ASSERT_FALSE(tokenizer.read(reader, token)); } { CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - DynamicTokenizer tokenizer{reader, WhitespaceMode::PRESERVE}; + DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("this \t is only a \n\n test text", token.content); @@ -86,7 +85,7 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace) ASSERT_EQ(0U, loc.getStart()); ASSERT_EQ(32U, loc.getEnd()); - ASSERT_FALSE(tokenizer.read(token)); + ASSERT_FALSE(tokenizer.read(reader, token)); } } @@ -96,10 +95,10 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace) CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - DynamicTokenizer tokenizer{reader, WhitespaceMode::TRIM}; + DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("this \t is only a \n\n test text", token.content); @@ -107,17 +106,17 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace) ASSERT_EQ(1U, loc.getStart()); ASSERT_EQ(33U, loc.getEnd()); - ASSERT_FALSE(tokenizer.read(token)); + ASSERT_FALSE(tokenizer.read(reader, token)); } { CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - DynamicTokenizer tokenizer{reader, WhitespaceMode::TRIM}; + DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("this \t is only a \n\n test text", token.content); @@ -125,7 +124,7 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace) ASSERT_EQ(0U, loc.getStart()); ASSERT_EQ(32U, loc.getEnd()); - ASSERT_FALSE(tokenizer.read(token)); + ASSERT_FALSE(tokenizer.read(reader, token)); } } @@ -135,10 +134,10 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace) CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - DynamicTokenizer tokenizer{reader, WhitespaceMode::COLLAPSE}; + DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("this is only a test text", token.content); @@ -146,17 +145,17 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace) ASSERT_EQ(1U, loc.getStart()); ASSERT_EQ(33U, loc.getEnd()); - ASSERT_FALSE(tokenizer.read(token)); + ASSERT_FALSE(tokenizer.read(reader, token)); } { CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - DynamicTokenizer tokenizer{reader, WhitespaceMode::COLLAPSE}; + DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("this is only a test text", token.content); @@ -164,21 +163,21 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace) ASSERT_EQ(0U, loc.getStart()); ASSERT_EQ(32U, loc.getEnd()); - ASSERT_FALSE(tokenizer.read(token)); + ASSERT_FALSE(tokenizer.read(reader, token)); } } TEST(DynamicTokenizer, simpleReadToken) { CharReader reader{"test1:test2"}; - DynamicTokenizer tokenizer{reader}; + DynamicTokenizer tokenizer; const TokenTypeId tid = tokenizer.registerToken(":"); ASSERT_EQ(0U, tid); { DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("test1", token.content); @@ -194,7 +193,7 @@ TEST(DynamicTokenizer, simpleReadToken) { DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(tid, token.type); ASSERT_EQ(":", token.content); @@ -210,7 +209,7 @@ TEST(DynamicTokenizer, simpleReadToken) { DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("test2", token.content); @@ -227,14 +226,14 @@ TEST(DynamicTokenizer, simpleReadToken) TEST(DynamicTokenizer, simplePeekToken) { CharReader reader{"test1:test2"}; - DynamicTokenizer tokenizer{reader}; + DynamicTokenizer tokenizer; const TokenTypeId tid = tokenizer.registerToken(":"); ASSERT_EQ(0U, tid); { DynamicToken token; - ASSERT_TRUE(tokenizer.peek(token)); + ASSERT_TRUE(tokenizer.peek(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("test1", token.content); @@ -248,7 +247,7 @@ TEST(DynamicTokenizer, simplePeekToken) { DynamicToken token; - ASSERT_TRUE(tokenizer.peek(token)); + ASSERT_TRUE(tokenizer.peek(reader, token)); ASSERT_EQ(tid, token.type); ASSERT_EQ(":", token.content); @@ -262,7 +261,7 @@ TEST(DynamicTokenizer, simplePeekToken) { DynamicToken token; - ASSERT_TRUE(tokenizer.peek(token)); + ASSERT_TRUE(tokenizer.peek(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("test2", token.content); @@ -276,7 +275,7 @@ TEST(DynamicTokenizer, simplePeekToken) { DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("test1", token.content); @@ -290,7 +289,7 @@ TEST(DynamicTokenizer, simplePeekToken) { DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(tid, token.type); ASSERT_EQ(":", token.content); @@ -304,7 +303,7 @@ TEST(DynamicTokenizer, simplePeekToken) { DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("test2", token.content); @@ -320,7 +319,7 @@ TEST(DynamicTokenizer, simplePeekToken) TEST(DynamicTokenizer, ambiguousTokens) { CharReader reader{"abc"}; - DynamicTokenizer tokenizer(reader); + DynamicTokenizer tokenizer; TokenTypeId t1 = tokenizer.registerToken("abd"); TokenTypeId t2 = tokenizer.registerToken("bc"); @@ -329,7 +328,7 @@ TEST(DynamicTokenizer, ambiguousTokens) ASSERT_EQ(1U, t2); DynamicToken token; - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("a", token.content); @@ -338,7 +337,7 @@ TEST(DynamicTokenizer, ambiguousTokens) ASSERT_EQ(0U, loc.getStart()); ASSERT_EQ(1U, loc.getEnd()); - ASSERT_TRUE(tokenizer.read(token)); + ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(t2, token.type); ASSERT_EQ("bc", token.content); @@ -347,7 +346,7 @@ TEST(DynamicTokenizer, ambiguousTokens) ASSERT_EQ(1U, loc.getStart()); ASSERT_EQ(3U, loc.getEnd()); - ASSERT_FALSE(tokenizer.read(token)); + ASSERT_FALSE(tokenizer.read(reader, token)); } TEST(DynamicTokenizer, commentTestWhitespacePreserve) @@ -355,7 +354,7 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve) CharReader reader{"Test/Test /* Block Comment */", 0}; // 012345678901234567890123456789 // 0 1 2 - DynamicTokenizer tokenizer(reader, WhitespaceMode::PRESERVE); + DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE); const TokenTypeId t1 = tokenizer.registerToken("/"); const TokenTypeId t2 = tokenizer.registerToken("/*"); @@ -371,14 +370,14 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve) DynamicToken t; for (auto &te : expected) { - EXPECT_TRUE(tokenizer.read(t)); + EXPECT_TRUE(tokenizer.read(reader, t)); EXPECT_EQ(te.type, t.type); EXPECT_EQ(te.content, t.content); EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); EXPECT_EQ(te.location.getStart(), t.location.getStart()); EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); } - ASSERT_FALSE(tokenizer.read(t)); + ASSERT_FALSE(tokenizer.read(reader, t)); } TEST(DynamicTokenizer, commentTestWhitespaceCollapse) @@ -386,7 +385,7 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse) CharReader reader{"Test/Test /* Block Comment */", 0}; // 012345678901234567890123456789 // 0 1 2 - DynamicTokenizer tokenizer(reader, WhitespaceMode::COLLAPSE); + DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE); const TokenTypeId t1 = tokenizer.registerToken("/"); const TokenTypeId t2 = tokenizer.registerToken("/*"); @@ -402,14 +401,14 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse) DynamicToken t; for (auto &te : expected) { - EXPECT_TRUE(tokenizer.read(t)); + EXPECT_TRUE(tokenizer.read(reader, t)); EXPECT_EQ(te.type, t.type); EXPECT_EQ(te.content, t.content); EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); EXPECT_EQ(te.location.getStart(), t.location.getStart()); EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); } - ASSERT_FALSE(tokenizer.read(t)); + ASSERT_FALSE(tokenizer.read(reader, t)); } } -- cgit v1.2.3