From 4854509f8add1e2ff167623fb0e8d4216d9d6023 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 8 Feb 2015 17:54:27 +0100
Subject: Implemented DynamicTokenizer and unit tests

---
 test/plugins/plain/DynamicTokenizerTest.cpp | 416 ++++++++++++++++++++++++++++
 1 file changed, 416 insertions(+)

(limited to 'test/plugins/plain/DynamicTokenizerTest.cpp')

diff --git a/test/plugins/plain/DynamicTokenizerTest.cpp b/test/plugins/plain/DynamicTokenizerTest.cpp
index e69de29..63fa466 100644
--- a/test/plugins/plain/DynamicTokenizerTest.cpp
+++ b/test/plugins/plain/DynamicTokenizerTest.cpp
@@ -0,0 +1,416 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/common/CharReader.hpp>
+#include <plugins/plain/DynamicTokenizer.hpp>
+
+namespace ousia {
+
+TEST(DynamicTokenizer, tokenRegistration)
+{
+	CharReader reader{"test"};
+	DynamicTokenizer tokenizer{reader};
+
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken(""));
+
+	ASSERT_EQ(0U, tokenizer.registerToken("a"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("a"));
+	ASSERT_EQ("a", tokenizer.getTokenString(0U));
+
+	ASSERT_EQ(1U, tokenizer.registerToken("b"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("b"));
+	ASSERT_EQ("b", tokenizer.getTokenString(1U));
+
+	ASSERT_EQ(2U, tokenizer.registerToken("c"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("c"));
+	ASSERT_EQ("c", tokenizer.getTokenString(2U));
+
+	ASSERT_TRUE(tokenizer.unregisterToken(1U));
+	ASSERT_FALSE(tokenizer.unregisterToken(1U));
+	ASSERT_EQ("", tokenizer.getTokenString(1U));
+
+	ASSERT_EQ(1U, tokenizer.registerToken("d"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("d"));
+	ASSERT_EQ("d", tokenizer.getTokenString(1U));
+}
+
+TEST(DynamicTokenizer, textTokenPreserveWhitespace)
+{
+	{
+		CharReader reader{" this \t is only a  \n\n test   text   "};
+		//                 012345 6789012345678 9 0123456789012345
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{reader, WhitespaceMode::PRESERVE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(36U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(token));
+	}
+
+	{
+		CharReader reader{"this \t is only a  \n\n test   text"};
+		//                 01234 5678901234567 8 9012345678901
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{reader, WhitespaceMode::PRESERVE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(32U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(token));
+	}
+}
+
+TEST(DynamicTokenizer, textTokenTrimWhitespace)
+{
+	{
+		CharReader reader{" this \t is only a  \n\n test   text   "};
+		//                 012345 6789012345678 9 0123456789012345
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{reader, WhitespaceMode::TRIM};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(1U, loc.getStart());
+		ASSERT_EQ(33U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(token));
+	}
+
+	{
+		CharReader reader{"this \t is only a  \n\n test   text"};
+		//                 01234 5678901234567 8 9012345678901
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{reader, WhitespaceMode::TRIM};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(32U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(token));
+	}
+}
+
+TEST(DynamicTokenizer, textTokenCollapseWhitespace)
+{
+	{
+		CharReader reader{" this \t is only a  \n\n test   text   "};
+		//                 012345 6789012345678 9 0123456789012345
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{reader, WhitespaceMode::COLLAPSE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this is only a test text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(1U, loc.getStart());
+		ASSERT_EQ(33U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(token));
+	}
+
+	{
+		CharReader reader{"this \t is only a  \n\n test   text"};
+		//                 01234 5678901234567 8 9012345678901
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{reader, WhitespaceMode::COLLAPSE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this is only a test text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(32U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(token));
+	}
+}
+
+TEST(DynamicTokenizer, simpleReadToken)
+{
+	CharReader reader{"test1:test2"};
+	DynamicTokenizer tokenizer{reader};
+
+	const TokenTypeId tid = tokenizer.registerToken(":");
+	ASSERT_EQ(0U, tid);
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test1", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(5U, loc.getEnd());
+
+		char c;
+		ASSERT_TRUE(reader.peek(c));
+		ASSERT_EQ(':', c);
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+
+		ASSERT_EQ(tid, token.type);
+		ASSERT_EQ(":", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(5U, loc.getStart());
+		ASSERT_EQ(6U, loc.getEnd());
+
+		char c;
+		ASSERT_TRUE(reader.peek(c));
+		ASSERT_EQ('t', c);
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test2", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(6U, loc.getStart());
+		ASSERT_EQ(11U, loc.getEnd());
+
+		char c;
+		ASSERT_FALSE(reader.peek(c));
+	}
+}
+
+TEST(DynamicTokenizer, simplePeekToken)
+{
+	CharReader reader{"test1:test2"};
+	DynamicTokenizer tokenizer{reader};
+
+	const TokenTypeId tid = tokenizer.registerToken(":");
+	ASSERT_EQ(0U, tid);
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.peek(token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test1", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(5U, loc.getEnd());
+		ASSERT_EQ(0U, reader.getOffset());
+		ASSERT_EQ(5U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.peek(token));
+
+		ASSERT_EQ(tid, token.type);
+		ASSERT_EQ(":", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(5U, loc.getStart());
+		ASSERT_EQ(6U, loc.getEnd());
+		ASSERT_EQ(0U, reader.getOffset());
+		ASSERT_EQ(6U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.peek(token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test2", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(6U, loc.getStart());
+		ASSERT_EQ(11U, loc.getEnd());
+		ASSERT_EQ(0U, reader.getOffset());
+		ASSERT_EQ(11U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test1", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(5U, loc.getEnd());
+		ASSERT_EQ(5U, reader.getOffset());
+		ASSERT_EQ(5U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+
+		ASSERT_EQ(tid, token.type);
+		ASSERT_EQ(":", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(5U, loc.getStart());
+		ASSERT_EQ(6U, loc.getEnd());
+		ASSERT_EQ(6U, reader.getOffset());
+		ASSERT_EQ(6U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test2", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(6U, loc.getStart());
+		ASSERT_EQ(11U, loc.getEnd());
+		ASSERT_EQ(11U, reader.getOffset());
+		ASSERT_EQ(11U, reader.getPeekOffset());
+	}
+}
+
+TEST(DynamicTokenizer, ambiguousTokens)
+{
+	CharReader reader{"abc"};
+	DynamicTokenizer tokenizer(reader);
+
+	TokenTypeId t1 = tokenizer.registerToken("abd");
+	TokenTypeId t2 = tokenizer.registerToken("bc");
+
+	ASSERT_EQ(0U, t1);
+	ASSERT_EQ(1U, t2);
+
+	DynamicToken token;
+	ASSERT_TRUE(tokenizer.read(token));
+
+	ASSERT_EQ(TextToken, token.type);
+	ASSERT_EQ("a", token.content);
+
+	SourceLocation loc = token.location;
+	ASSERT_EQ(0U, loc.getStart());
+	ASSERT_EQ(1U, loc.getEnd());
+
+	ASSERT_TRUE(tokenizer.read(token));
+
+	ASSERT_EQ(t2, token.type);
+	ASSERT_EQ("bc", token.content);
+
+	loc = token.location;
+	ASSERT_EQ(1U, loc.getStart());
+	ASSERT_EQ(3U, loc.getEnd());
+
+	ASSERT_FALSE(tokenizer.read(token));
+}
+
+TEST(DynamicTokenizer, commentTestWhitespacePreserve)
+{
+	CharReader reader{"Test/Test /* Block Comment */", 0};
+	//                 012345678901234567890123456789
+	//                 0        1         2
+	DynamicTokenizer tokenizer(reader, WhitespaceMode::PRESERVE);
+
+	const TokenTypeId t1 = tokenizer.registerToken("/");
+	const TokenTypeId t2 = tokenizer.registerToken("/*");
+	const TokenTypeId t3 = tokenizer.registerToken("*/");
+
+	std::vector<DynamicToken> expected = {
+	    {TextToken, "Test", SourceLocation{0, 0, 4}},
+	    {t1, "/", SourceLocation{0, 4, 5}},
+	    {TextToken, "Test ", SourceLocation{0, 5, 10}},
+	    {t2, "/*", SourceLocation{0, 10, 12}},
+	    {TextToken, " Block Comment ", SourceLocation{0, 12, 27}},
+	    {t3, "*/", SourceLocation{0, 27, 29}}};
+
+	DynamicToken t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.read(t));
+		EXPECT_EQ(te.type, t.type);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.read(t));
+}
+
+TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
+{
+	CharReader reader{"Test/Test /* Block Comment */", 0};
+	//                 012345678901234567890123456789
+	//                 0        1         2
+	DynamicTokenizer tokenizer(reader, WhitespaceMode::COLLAPSE);
+
+	const TokenTypeId t1 = tokenizer.registerToken("/");
+	const TokenTypeId t2 = tokenizer.registerToken("/*");
+	const TokenTypeId t3 = tokenizer.registerToken("*/");
+
+	std::vector<DynamicToken> expected = {
+	    {TextToken, "Test", SourceLocation{0, 0, 4}},
+	    {t1, "/", SourceLocation{0, 4, 5}},
+	    {TextToken, "Test", SourceLocation{0, 5, 9}},
+	    {t2, "/*", SourceLocation{0, 10, 12}},
+	    {TextToken, "Block Comment", SourceLocation{0, 13, 26}},
+	    {t3, "*/", SourceLocation{0, 27, 29}}};
+
+	DynamicToken t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.read(t));
+		EXPECT_EQ(te.type, t.type);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.read(t));
+}
+
+}
+
-- 
cgit v1.2.3


From f713b1d393230e7083727d457623fdac878eb248 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 8 Feb 2015 18:48:07 +0100
Subject: DynamicTokenizer now gets the reader as a parameter to read and peek
 -- the beauty of this tokenizer is that it has no internal state depending on
 the reader, so it doesn't need to hold a reference to it

---
 src/plugins/plain/DynamicTokenizer.cpp      | 35 ++++++-------
 src/plugins/plain/DynamicTokenizer.hpp      | 22 ++++----
 test/plugins/plain/DynamicTokenizerTest.cpp | 81 ++++++++++++++---------------
 3 files changed, 67 insertions(+), 71 deletions(-)

(limited to 'test/plugins/plain/DynamicTokenizerTest.cpp')

diff --git a/src/plugins/plain/DynamicTokenizer.cpp b/src/plugins/plain/DynamicTokenizer.cpp
index a8f2317..f2cfcd1 100644
--- a/src/plugins/plain/DynamicTokenizer.cpp
+++ b/src/plugins/plain/DynamicTokenizer.cpp
@@ -345,14 +345,13 @@ public:
 
 /* Class DynamicTokenizer */
 
-DynamicTokenizer::DynamicTokenizer(CharReader &reader,
-                                   WhitespaceMode whitespaceMode)
-    : reader(reader), whitespaceMode(whitespaceMode), nextTokenTypeId(0)
+DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode)
+    : whitespaceMode(whitespaceMode), nextTokenTypeId(0)
 {
 }
 
 template <typename TextHandler, bool read>
-bool DynamicTokenizer::next(DynamicToken &token)
+bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
 {
 	// If we're in the read mode, reset the char reader peek position to the
 	// current read position
@@ -437,28 +436,28 @@ bool DynamicTokenizer::next(DynamicToken &token)
 	return match.hasMatch();
 }
 
-bool DynamicTokenizer::read(DynamicToken &token)
+bool DynamicTokenizer::read(CharReader &reader,DynamicToken &token)
 {
 	switch (whitespaceMode) {
 		case WhitespaceMode::PRESERVE:
-			return next<PreservingTextHandler, true>(token);
+			return next<PreservingTextHandler, true>(reader, token);
 		case WhitespaceMode::TRIM:
-			return next<TrimmingTextHandler, true>(token);
+			return next<TrimmingTextHandler, true>(reader, token);
 		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingTextHandler, true>(token);
+			return next<CollapsingTextHandler, true>(reader, token);
 	}
 	return false;
 }
 
-bool DynamicTokenizer::peek(DynamicToken &token)
+bool DynamicTokenizer::peek(CharReader &reader,DynamicToken &token)
 {
 	switch (whitespaceMode) {
 		case WhitespaceMode::PRESERVE:
-			return next<PreservingTextHandler, false>(token);
+			return next<PreservingTextHandler, false>(reader, token);
 		case WhitespaceMode::TRIM:
-			return next<TrimmingTextHandler, false>(token);
+			return next<TrimmingTextHandler, false>(reader, token);
 		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingTextHandler, false>(token);
+			return next<CollapsingTextHandler, false>(reader, token);
 	}
 	return false;
 }
@@ -530,16 +529,16 @@ WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; }
 /* Explicitly instantiate all possible instantiations of the "next" member
    function */
 template bool DynamicTokenizer::next<PreservingTextHandler, false>(
-    DynamicToken &token);
+    CharReader &reader, DynamicToken &token);
 template bool DynamicTokenizer::next<TrimmingTextHandler, false>(
-    DynamicToken &token);
+    CharReader &reader, DynamicToken &token);
 template bool DynamicTokenizer::next<CollapsingTextHandler, false>(
-    DynamicToken &token);
+    CharReader &reader,DynamicToken &token);
 template bool DynamicTokenizer::next<PreservingTextHandler, true>(
-    DynamicToken &token);
+    CharReader &reader,DynamicToken &token);
 template bool DynamicTokenizer::next<TrimmingTextHandler, true>(
-    DynamicToken &token);
+    CharReader &reader,DynamicToken &token);
 template bool DynamicTokenizer::next<CollapsingTextHandler, true>(
-    DynamicToken &token);
+    CharReader &reader,DynamicToken &token);
 }
 
diff --git a/src/plugins/plain/DynamicTokenizer.hpp b/src/plugins/plain/DynamicTokenizer.hpp
index 760bebf..0b4dd39 100644
--- a/src/plugins/plain/DynamicTokenizer.hpp
+++ b/src/plugins/plain/DynamicTokenizer.hpp
@@ -118,11 +118,6 @@ enum class WhitespaceMode {
  */
 class DynamicTokenizer {
 private:
-	/**
-	 * CharReader instance from which the tokens should be read.
-	 */
-	CharReader &reader;
-
 	/**
 	 * Internally used token trie. This object holds all registered tokens.
 	 */
@@ -151,23 +146,22 @@ private:
 	 * @tparam TextHandler is the type to be used for the textHandler instance.
 	 * @tparam read specifies whether the function should start from and advance
 	 * the read pointer of the char reader.
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
 	 * @param token is the token structure into which the token information
 	 * should be written.
 	 * @return false if the end of the stream has been reached, true otherwise.
 	 */
 	template <typename TextHandler, bool read>
-	bool next(DynamicToken &token);
+	bool next(CharReader &reader, DynamicToken &token);
 
 public:
 	/**
 	 * Constructor of the DynamicTokenizer class.
 	 *
-	 * @param reader is the CharReader that should be used for reading the
-	 * tokens.
 	 * @param whitespaceMode specifies how whitespace should be handled.
 	 */
-	DynamicTokenizer(CharReader &reader,
-	                 WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
+	DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * Registers the given string as a token. Returns a const pointer at a
@@ -222,23 +216,27 @@ public:
 	 * Reads a new token from the CharReader and stores it in the given
 	 * DynamicToken instance.
 	 *
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
 	 * @param token is a reference at the token instance into which the Token
 	 * information should be written.
 	 * @return true if a token could be read, false if the end of the stream
 	 * has been reached.
 	 */
-	bool read(DynamicToken &token);
+	bool read(CharReader &reader, DynamicToken &token);
 
 	/**
 	 * The peek method does not advance the read position of the char reader,
 	 * but reads the next token from the current char reader peek position.
 	 *
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
 	 * @param token is a reference at the token instance into which the Token
 	 * information should be written.
 	 * @return true if a token could be read, false if the end of the stream
 	 * has been reached.
 	 */
-	bool peek(DynamicToken &token);
+	bool peek(CharReader &reader, DynamicToken &token);
 };
 }
 
diff --git a/test/plugins/plain/DynamicTokenizerTest.cpp b/test/plugins/plain/DynamicTokenizerTest.cpp
index 63fa466..5183fdd 100644
--- a/test/plugins/plain/DynamicTokenizerTest.cpp
+++ b/test/plugins/plain/DynamicTokenizerTest.cpp
@@ -25,8 +25,7 @@ namespace ousia {
 
 TEST(DynamicTokenizer, tokenRegistration)
 {
-	CharReader reader{"test"};
-	DynamicTokenizer tokenizer{reader};
+	DynamicTokenizer tokenizer;
 
 	ASSERT_EQ(EmptyToken, tokenizer.registerToken(""));
 
@@ -57,10 +56,10 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace)
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{reader, WhitespaceMode::PRESERVE};
+		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
 
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content);
 
@@ -68,17 +67,17 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace)
 		ASSERT_EQ(0U, loc.getStart());
 		ASSERT_EQ(36U, loc.getEnd());
 
-		ASSERT_FALSE(tokenizer.read(token));
+		ASSERT_FALSE(tokenizer.read(reader, token));
 	}
 
 	{
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{reader, WhitespaceMode::PRESERVE};
+		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
 
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
 
@@ -86,7 +85,7 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace)
 		ASSERT_EQ(0U, loc.getStart());
 		ASSERT_EQ(32U, loc.getEnd());
 
-		ASSERT_FALSE(tokenizer.read(token));
+		ASSERT_FALSE(tokenizer.read(reader, token));
 	}
 }
 
@@ -96,10 +95,10 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace)
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{reader, WhitespaceMode::TRIM};
+		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
 
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
 
@@ -107,17 +106,17 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace)
 		ASSERT_EQ(1U, loc.getStart());
 		ASSERT_EQ(33U, loc.getEnd());
 
-		ASSERT_FALSE(tokenizer.read(token));
+		ASSERT_FALSE(tokenizer.read(reader, token));
 	}
 
 	{
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{reader, WhitespaceMode::TRIM};
+		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
 
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
 
@@ -125,7 +124,7 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace)
 		ASSERT_EQ(0U, loc.getStart());
 		ASSERT_EQ(32U, loc.getEnd());
 
-		ASSERT_FALSE(tokenizer.read(token));
+		ASSERT_FALSE(tokenizer.read(reader, token));
 	}
 }
 
@@ -135,10 +134,10 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace)
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{reader, WhitespaceMode::COLLAPSE};
+		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
 
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("this is only a test text", token.content);
 
@@ -146,17 +145,17 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace)
 		ASSERT_EQ(1U, loc.getStart());
 		ASSERT_EQ(33U, loc.getEnd());
 
-		ASSERT_FALSE(tokenizer.read(token));
+		ASSERT_FALSE(tokenizer.read(reader, token));
 	}
 
 	{
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{reader, WhitespaceMode::COLLAPSE};
+		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
 
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("this is only a test text", token.content);
 
@@ -164,21 +163,21 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace)
 		ASSERT_EQ(0U, loc.getStart());
 		ASSERT_EQ(32U, loc.getEnd());
 
-		ASSERT_FALSE(tokenizer.read(token));
+		ASSERT_FALSE(tokenizer.read(reader, token));
 	}
 }
 
 TEST(DynamicTokenizer, simpleReadToken)
 {
 	CharReader reader{"test1:test2"};
-	DynamicTokenizer tokenizer{reader};
+	DynamicTokenizer tokenizer;
 
 	const TokenTypeId tid = tokenizer.registerToken(":");
 	ASSERT_EQ(0U, tid);
 
 	{
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("test1", token.content);
@@ -194,7 +193,7 @@ TEST(DynamicTokenizer, simpleReadToken)
 
 	{
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(tid, token.type);
 		ASSERT_EQ(":", token.content);
@@ -210,7 +209,7 @@ TEST(DynamicTokenizer, simpleReadToken)
 
 	{
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("test2", token.content);
@@ -227,14 +226,14 @@ TEST(DynamicTokenizer, simpleReadToken)
 TEST(DynamicTokenizer, simplePeekToken)
 {
 	CharReader reader{"test1:test2"};
-	DynamicTokenizer tokenizer{reader};
+	DynamicTokenizer tokenizer;
 
 	const TokenTypeId tid = tokenizer.registerToken(":");
 	ASSERT_EQ(0U, tid);
 
 	{
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(token));
+		ASSERT_TRUE(tokenizer.peek(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("test1", token.content);
@@ -248,7 +247,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 
 	{
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(token));
+		ASSERT_TRUE(tokenizer.peek(reader, token));
 
 		ASSERT_EQ(tid, token.type);
 		ASSERT_EQ(":", token.content);
@@ -262,7 +261,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 
 	{
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(token));
+		ASSERT_TRUE(tokenizer.peek(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("test2", token.content);
@@ -276,7 +275,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 
 	{
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("test1", token.content);
@@ -290,7 +289,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 
 	{
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(tid, token.type);
 		ASSERT_EQ(":", token.content);
@@ -304,7 +303,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 
 	{
 		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(token));
+		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("test2", token.content);
@@ -320,7 +319,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 TEST(DynamicTokenizer, ambiguousTokens)
 {
 	CharReader reader{"abc"};
-	DynamicTokenizer tokenizer(reader);
+	DynamicTokenizer tokenizer;
 
 	TokenTypeId t1 = tokenizer.registerToken("abd");
 	TokenTypeId t2 = tokenizer.registerToken("bc");
@@ -329,7 +328,7 @@ TEST(DynamicTokenizer, ambiguousTokens)
 	ASSERT_EQ(1U, t2);
 
 	DynamicToken token;
-	ASSERT_TRUE(tokenizer.read(token));
+	ASSERT_TRUE(tokenizer.read(reader, token));
 
 	ASSERT_EQ(TextToken, token.type);
 	ASSERT_EQ("a", token.content);
@@ -338,7 +337,7 @@ TEST(DynamicTokenizer, ambiguousTokens)
 	ASSERT_EQ(0U, loc.getStart());
 	ASSERT_EQ(1U, loc.getEnd());
 
-	ASSERT_TRUE(tokenizer.read(token));
+	ASSERT_TRUE(tokenizer.read(reader, token));
 
 	ASSERT_EQ(t2, token.type);
 	ASSERT_EQ("bc", token.content);
@@ -347,7 +346,7 @@ TEST(DynamicTokenizer, ambiguousTokens)
 	ASSERT_EQ(1U, loc.getStart());
 	ASSERT_EQ(3U, loc.getEnd());
 
-	ASSERT_FALSE(tokenizer.read(token));
+	ASSERT_FALSE(tokenizer.read(reader, token));
 }
 
 TEST(DynamicTokenizer, commentTestWhitespacePreserve)
@@ -355,7 +354,7 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve)
 	CharReader reader{"Test/Test /* Block Comment */", 0};
 	//                 012345678901234567890123456789
 	//                 0        1         2
-	DynamicTokenizer tokenizer(reader, WhitespaceMode::PRESERVE);
+	DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE);
 
 	const TokenTypeId t1 = tokenizer.registerToken("/");
 	const TokenTypeId t2 = tokenizer.registerToken("/*");
@@ -371,14 +370,14 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve)
 
 	DynamicToken t;
 	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(t));
+		EXPECT_TRUE(tokenizer.read(reader, t));
 		EXPECT_EQ(te.type, t.type);
 		EXPECT_EQ(te.content, t.content);
 		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
 		EXPECT_EQ(te.location.getStart(), t.location.getStart());
 		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
 	}
-	ASSERT_FALSE(tokenizer.read(t));
+	ASSERT_FALSE(tokenizer.read(reader, t));
 }
 
 TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
@@ -386,7 +385,7 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
 	CharReader reader{"Test/Test /* Block Comment */", 0};
 	//                 012345678901234567890123456789
 	//                 0        1         2
-	DynamicTokenizer tokenizer(reader, WhitespaceMode::COLLAPSE);
+	DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE);
 
 	const TokenTypeId t1 = tokenizer.registerToken("/");
 	const TokenTypeId t2 = tokenizer.registerToken("/*");
@@ -402,14 +401,14 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
 
 	DynamicToken t;
 	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(t));
+		EXPECT_TRUE(tokenizer.read(reader, t));
 		EXPECT_EQ(te.type, t.type);
 		EXPECT_EQ(te.content, t.content);
 		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
 		EXPECT_EQ(te.location.getStart(), t.location.getStart());
 		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
 	}
-	ASSERT_FALSE(tokenizer.read(t));
+	ASSERT_FALSE(tokenizer.read(reader, t));
 }
 
 }
-- 
cgit v1.2.3