2 files changed, 101 insertions, 1 deletions
diff --git a/test/core/utils/CodeTokenizerTest.cpp b/test/core/utils/CodeTokenizerTest.cpp
index d0f9a17..0b9d7b3 100644
--- a/test/core/utils/CodeTokenizerTest.cpp
+++ b/test/core/utils/CodeTokenizerTest.cpp
@@ -22,9 +22,81 @@
 
 namespace ousia {
 namespace utils {
+
+static const int BLOCK_COMMENT = 30;
+static const int LINE_COMMENT = 31;
+static const int STRING = 20;
+static const int ESCAPE = 21;
+static const int LINEBREAK = 21;
+static const int CURLY_OPEN = 40;
+static const int CURLY_CLOSE = 41;
+
 TEST(CodeTokenizer, testTokenizer)
 {
-	
+	BufferedCharReader reader;
+	reader.feed("/**\n");                                 // 1
+	reader.feed(" * Some Block Comment\n");               // 2
+	reader.feed(" */\n");                                 // 3
+	reader.feed("var my_string = 'My \\'String\\'';\n");  // 4
+	reader.feed("// and a line comment\n");               // 5
+	reader.feed("var my_obj = { a = 4;}");                // 6
+	//           123456789012345678901234567890123456789012345678901234567890123456789
+	//           0        1         2         3         4         5         6
+	TokenTreeNode root{{{"/*", 1},
+	                    {"*/", 2},
+	                    {"//", 3},
+	                    {"'", 4},
+	                    {"\\", 5},
+	                    {"{", CURLY_OPEN},
+	                    {"}", CURLY_CLOSE},
+	                    {"\n", 6}}};
+	std::map<int, CodeTokenDescriptor> descriptors{
+	    // the block comment start Token has the id 1 and if the Tokenizer
+	    // returns a Block Comment Token that should have the id 10.
+	    {1, {CodeTokenMode::BLOCK_COMMENT_START, BLOCK_COMMENT}},
+	    {2, {CodeTokenMode::BLOCK_COMMENT_END, BLOCK_COMMENT}},
+	    {3, {CodeTokenMode::LINE_COMMENT, LINE_COMMENT}},
+	    {4, {CodeTokenMode::STRING_START_END, STRING}},
+	    {5, {CodeTokenMode::ESCAPE, ESCAPE}},
+	    {6, {CodeTokenMode::LINEBREAK, LINEBREAK}}};
+
+	std::vector<Token> expected = {
+	    {BLOCK_COMMENT, "*\n * Some Block Comment\n ", 1, 1, 4, 3},
+	    {LINEBREAK, "\n", 4, 3, 1, 4},
+	    {TOKEN_TEXT, "var", 1, 4, 4, 4},
+	    {TOKEN_TEXT, "my_string", 5, 4, 14, 4},
+	    {TOKEN_TEXT, "=", 15, 4, 16, 4},
+	    {STRING, "My 'String'", 17, 4, 32, 4},
+	    {TOKEN_TEXT, ";", 32, 4, 33, 4},
+	    {LINEBREAK, "\n", 33, 4, 1, 5},
+		//this is slightly counter-intuitive but makes sense if you think about
+		//it: As a line comment is ended by a line break the line break is
+		//technically still a part of the line comment and thus the ending
+		//is in the next line.
+	    {LINE_COMMENT, " and a line comment", 1, 5, 1, 6},
+	    {TOKEN_TEXT, "var", 1, 6, 4, 6},
+	    {TOKEN_TEXT, "my_obj", 5, 6, 11, 6},
+	    {TOKEN_TEXT, "=", 12, 6, 13, 6},
+	    {CURLY_OPEN, "{", 14, 6, 15, 6},
+	    {TOKEN_TEXT, "a", 16, 6, 17, 6},
+	    {TOKEN_TEXT, "=", 18, 6, 19, 6},
+	    {TOKEN_TEXT, "4;", 20, 6, 22, 6},
+	    {CURLY_CLOSE, "}", 22, 6, 23, 6},
+	};
+
+	CodeTokenizer tokenizer{reader, root, descriptors};
+
+	Token t;
+	for (auto &te : expected) {
+		ASSERT_TRUE(tokenizer.next(t));
+		ASSERT_EQ(te.tokenId, t.tokenId);
+		ASSERT_EQ(te.content, t.content);
+		ASSERT_EQ(te.startColumn, t.startColumn);
+		ASSERT_EQ(te.startLine, t.startLine);
+		ASSERT_EQ(te.endColumn, t.endColumn);
+		ASSERT_EQ(te.endLine, t.endLine);
+	}
+	ASSERT_FALSE(tokenizer.next(t));
 }
 }
 }
diff --git a/test/core/utils/TokenizerTest.cpp b/test/core/utils/TokenizerTest.cpp
index ba06c33..79cc01d 100644
--- a/test/core/utils/TokenizerTest.cpp
+++ b/test/core/utils/TokenizerTest.cpp
@@ -93,5 +93,33 @@ TEST(Tokenizer, testTokenization)
 	}
 	ASSERT_FALSE(tokenizer.next(t));
 }
+
+TEST(Tokenizer, testIncompleteTokens)
+{
+	TokenTreeNode root{{{"ab", 1}, {"c", 2}}};
+
+	BufferedCharReader reader;
+	reader.feed("ac");
+	//           1234567890
+	//           0        1
+
+	std::vector<Token> expected = {
+	    {TOKEN_TEXT, "a", 1, 1, 2, 1},
+	    {2, "c", 2, 1, 3, 1}};
+
+	Tokenizer tokenizer{reader, root};
+
+	Token t;
+	for (auto &te : expected) {
+		ASSERT_TRUE(tokenizer.next(t));
+		ASSERT_EQ(te.tokenId, t.tokenId);
+		ASSERT_EQ(te.content, t.content);
+		ASSERT_EQ(te.startColumn, t.startColumn);
+		ASSERT_EQ(te.startLine, t.startLine);
+		ASSERT_EQ(te.endColumn, t.endColumn);
+		ASSERT_EQ(te.endLine, t.endLine);
+	}
+	ASSERT_FALSE(tokenizer.next(t));
+}
 }
 }