From efe60ac3c3a8725ac71329c0bb19fa9d9c58f399 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:42:05 +0100
Subject: Moved specific file format parsers to formats/ folder, moved old
 tokenizer to css code (this is the only place where it is actually used)

---
 test/plugins/css/CodeTokenizerTest.cpp | 100 ++++++++++++++++++++++++++++
 test/plugins/css/TokenizerTest.cpp     | 118 +++++++++++++++++++++++++++++++++
 2 files changed, 218 insertions(+)
 create mode 100644 test/plugins/css/CodeTokenizerTest.cpp
 create mode 100644 test/plugins/css/TokenizerTest.cpp

(limited to 'test/plugins/css')
diff --git a/test/plugins/css/CodeTokenizerTest.cpp b/test/plugins/css/CodeTokenizerTest.cpp
new file mode 100644
index 0000000..2d4d5a7
--- /dev/null
+++ b/test/plugins/css/CodeTokenizerTest.cpp
@@ -0,0 +1,100 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/CodeTokenizer.hpp>
+
+namespace ousia {
+
+static const int BLOCK_COMMENT = 30;
+static const int LINE_COMMENT = 31;
+static const int STRING = 20;
+static const int ESCAPE = 21;
+static const int LINEBREAK = 21;
+static const int CURLY_OPEN = 40;
+static const int CURLY_CLOSE = 41;
+
+TEST(CodeTokenizer, testTokenizer)
+{
+	CharReader reader{
+	    "/**\n"                                 // 1
+	    " * Some Block Comment\n"               // 2
+	    " */\n"                                 // 3
+	    "var my_string = 'My \\'String\\'';\n"  // 4
+	    "// and a line comment\n"               // 5
+	    "var my_obj = { a = 4;}", 0};              // 6
+	//   123456789012345678901234567890123456789
+	//   0        1         2         3
+	TokenTreeNode root{{{"/*", 1},
+	                    {"*/", 2},
+	                    {"//", 3},
+	                    {"'", 4},
+	                    {"\\", 5},
+	                    {"{", CURLY_OPEN},
+	                    {"}", CURLY_CLOSE},
+	                    {"\n", 6}}};
+	std::map<int, CodeTokenDescriptor> descriptors{
+	    // the block comment start Token has the id 1 and if the Tokenizer
+	    // returns a Block Comment Token that should have the id 10.
+	    {1, {CodeTokenMode::BLOCK_COMMENT_START, BLOCK_COMMENT}},
+	    {2, {CodeTokenMode::BLOCK_COMMENT_END, BLOCK_COMMENT}},
+	    {3, {CodeTokenMode::LINE_COMMENT, LINE_COMMENT}},
+	    {4, {CodeTokenMode::STRING_START_END, STRING}},
+	    {5, {CodeTokenMode::ESCAPE, ESCAPE}},
+	    {6, {CodeTokenMode::LINEBREAK, LINEBREAK}}};
+
+	std::vector<Token> expected = {
+	    {BLOCK_COMMENT, "*\n * Some Block Comment\n ", SourceLocation{0, 0, 29}},
+	    {LINEBREAK, "\n", SourceLocation{0, 29, 30}},
+	    {TOKEN_TEXT, "var", SourceLocation{0, 30, 33}},
+	    {TOKEN_TEXT, "my_string", SourceLocation{0, 34, 43}},
+	    {TOKEN_TEXT, "=", SourceLocation{0, 44, 45}},
+	    {STRING, "My 'String'", SourceLocation{0, 46, 61}},
+	    {TOKEN_TEXT, ";", SourceLocation{0, 61, 62}},
+	    {LINEBREAK, "\n", SourceLocation{0, 62, 63}},
+	    // this is slightly counter-intuitive but makes sense if you think about
+	    // it: As a line comment is ended by a line break the line break is
+	    // technically still a part of the line comment and thus the ending
+	    // is in the next line.
+	    {LINE_COMMENT, " and a line comment", SourceLocation{0, 63, 85}},
+	    {TOKEN_TEXT, "var", SourceLocation{0, 85, 88}},
+	    {TOKEN_TEXT, "my_obj", SourceLocation{0, 89, 95}},
+	    {TOKEN_TEXT, "=", SourceLocation{0, 96, 97}},
+	    {CURLY_OPEN, "{", SourceLocation{0, 98, 99}},
+	    {TOKEN_TEXT, "a", SourceLocation{0, 100, 101}},
+	    {TOKEN_TEXT, "=", SourceLocation{0, 102, 103}},
+	    {TOKEN_TEXT, "4;", SourceLocation{0, 104, 106}},
+	    {CURLY_CLOSE, "}", SourceLocation{0, 106, 107}},
+	};
+
+	CodeTokenizer tokenizer{reader, root, descriptors};
+
+	Token t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.next(t));
+		EXPECT_EQ(te.tokenId, t.tokenId);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.next(t));
+}
+}
+
diff --git a/test/plugins/css/TokenizerTest.cpp b/test/plugins/css/TokenizerTest.cpp
new file mode 100644
index 0000000..c53f93d
--- /dev/null
+++ b/test/plugins/css/TokenizerTest.cpp
@@ -0,0 +1,118 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/common/CharReader.hpp>
+
+#include <core/Tokenizer.hpp>
+
+namespace ousia {
+TEST(TokenTreeNode, testConstructor)
+{
+	TokenTreeNode root{{{"a", 1}, {"aab", 2}, {"aac", 3}, {"abd", 4}}};
+
+	ASSERT_EQ(-1, root.tokenId);
+	ASSERT_EQ(1U, root.children.size());
+	ASSERT_TRUE(root.children.find('a') != root.children.end());
+
+	const TokenTreeNode &a = root.children.at('a');
+	ASSERT_EQ(1, a.tokenId);
+	ASSERT_EQ(2U, a.children.size());
+	ASSERT_TRUE(a.children.find('a') != a.children.end());
+	ASSERT_TRUE(a.children.find('b') != a.children.end());
+
+	const TokenTreeNode &aa = a.children.at('a');
+	ASSERT_EQ(-1, aa.tokenId);
+	ASSERT_EQ(2U, aa.children.size());
+	ASSERT_TRUE(aa.children.find('b') != aa.children.end());
+	ASSERT_TRUE(aa.children.find('c') != aa.children.end());
+
+	const TokenTreeNode &aab = aa.children.at('b');
+	ASSERT_EQ(2, aab.tokenId);
+	ASSERT_EQ(0U, aab.children.size());
+
+	const TokenTreeNode &aac = aa.children.at('c');
+	ASSERT_EQ(3, aac.tokenId);
+	ASSERT_EQ(0U, aac.children.size());
+
+	const TokenTreeNode &ab = a.children.at('b');
+	ASSERT_EQ(-1, ab.tokenId);
+	ASSERT_EQ(1U, ab.children.size());
+	ASSERT_TRUE(ab.children.find('d') != ab.children.end());
+
+	const TokenTreeNode &abd = ab.children.at('d');
+	ASSERT_EQ(4, abd.tokenId);
+	ASSERT_EQ(0U, abd.children.size());
+}
+
+TEST(Tokenizer, testTokenization)
+{
+	TokenTreeNode root{{{"/", 1}, {"/*", 2}, {"*/", 3}}};
+
+	CharReader reader{"Test/Test /* Block Comment */", 0};
+	//                 012345678901234567890123456789
+	//                 0        1         2
+
+	std::vector<Token> expected = {
+	    {TOKEN_TEXT, "Test", SourceLocation{0, 0, 4}},
+	    {1, "/", SourceLocation{0, 4, 5}},
+	    {TOKEN_TEXT, "Test ", SourceLocation{0, 5, 10}},
+	    {2, "/*", SourceLocation{0, 10, 12}},
+	    {TOKEN_TEXT, " Block Comment ", SourceLocation{0, 12, 27}},
+	    {3, "*/", SourceLocation{0, 27, 29}}};
+
+	Tokenizer tokenizer{reader, root};
+
+	Token t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.next(t));
+		EXPECT_EQ(te.tokenId, t.tokenId);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.next(t));
+}
+
+TEST(Tokenizer, testIncompleteTokens)
+{
+	TokenTreeNode root{{{"ab", 1}, {"c", 2}}};
+
+	CharReader reader{"ac", 0};
+
+	std::vector<Token> expected = {
+	    {TOKEN_TEXT, "a", SourceLocation{0, 0, 1}},
+	    {2, "c", SourceLocation{0, 1, 2}}};
+
+	Tokenizer tokenizer{reader, root};
+
+	Token t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.next(t));
+		EXPECT_EQ(te.tokenId, t.tokenId);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.next(t));
+}
+}
+
-- 
cgit v1.2.3