From efe60ac3c3a8725ac71329c0bb19fa9d9c58f399 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:42:05 +0100 Subject: Moved specific file format parsers to formats/ folder, moved old tokenizer to css code (this is the only place where it is actually used) --- test/plugins/css/CodeTokenizerTest.cpp | 100 ++++++++++++++++++++++++++++ test/plugins/css/TokenizerTest.cpp | 118 +++++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 test/plugins/css/CodeTokenizerTest.cpp create mode 100644 test/plugins/css/TokenizerTest.cpp (limited to 'test/plugins/css') diff --git a/test/plugins/css/CodeTokenizerTest.cpp b/test/plugins/css/CodeTokenizerTest.cpp new file mode 100644 index 0000000..2d4d5a7 --- /dev/null +++ b/test/plugins/css/CodeTokenizerTest.cpp @@ -0,0 +1,100 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +namespace ousia { + +static const int BLOCK_COMMENT = 30; +static const int LINE_COMMENT = 31; +static const int STRING = 20; +static const int ESCAPE = 21; +static const int LINEBREAK = 21; +static const int CURLY_OPEN = 40; +static const int CURLY_CLOSE = 41; + +TEST(CodeTokenizer, testTokenizer) +{ + CharReader reader{ + "/**\n" // 1 + " * Some Block Comment\n" // 2 + " */\n" // 3 + "var my_string = 'My \\'String\\'';\n" // 4 + "// and a line comment\n" // 5 + "var my_obj = { a = 4;}", 0}; // 6 + // 123456789012345678901234567890123456789 + // 0 1 2 3 + TokenTreeNode root{{{"/*", 1}, + {"*/", 2}, + {"//", 3}, + {"'", 4}, + {"\\", 5}, + {"{", CURLY_OPEN}, + {"}", CURLY_CLOSE}, + {"\n", 6}}}; + std::map descriptors{ + // the block comment start Token has the id 1 and if the Tokenizer + // returns a Block Comment Token that should have the id 10. + {1, {CodeTokenMode::BLOCK_COMMENT_START, BLOCK_COMMENT}}, + {2, {CodeTokenMode::BLOCK_COMMENT_END, BLOCK_COMMENT}}, + {3, {CodeTokenMode::LINE_COMMENT, LINE_COMMENT}}, + {4, {CodeTokenMode::STRING_START_END, STRING}}, + {5, {CodeTokenMode::ESCAPE, ESCAPE}}, + {6, {CodeTokenMode::LINEBREAK, LINEBREAK}}}; + + std::vector expected = { + {BLOCK_COMMENT, "*\n * Some Block Comment\n ", SourceLocation{0, 0, 29}}, + {LINEBREAK, "\n", SourceLocation{0, 29, 30}}, + {TOKEN_TEXT, "var", SourceLocation{0, 30, 33}}, + {TOKEN_TEXT, "my_string", SourceLocation{0, 34, 43}}, + {TOKEN_TEXT, "=", SourceLocation{0, 44, 45}}, + {STRING, "My 'String'", SourceLocation{0, 46, 61}}, + {TOKEN_TEXT, ";", SourceLocation{0, 61, 62}}, + {LINEBREAK, "\n", SourceLocation{0, 62, 63}}, + // this is slightly counter-intuitive but makes sense if you think about + // it: As a line comment is ended by a line break the line break is + // technically still a part of the line comment and thus the ending + // is in the next line. + {LINE_COMMENT, " and a line comment", SourceLocation{0, 63, 85}}, + {TOKEN_TEXT, "var", SourceLocation{0, 85, 88}}, + {TOKEN_TEXT, "my_obj", SourceLocation{0, 89, 95}}, + {TOKEN_TEXT, "=", SourceLocation{0, 96, 97}}, + {CURLY_OPEN, "{", SourceLocation{0, 98, 99}}, + {TOKEN_TEXT, "a", SourceLocation{0, 100, 101}}, + {TOKEN_TEXT, "=", SourceLocation{0, 102, 103}}, + {TOKEN_TEXT, "4;", SourceLocation{0, 104, 106}}, + {CURLY_CLOSE, "}", SourceLocation{0, 106, 107}}, + }; + + CodeTokenizer tokenizer{reader, root, descriptors}; + + Token t; + for (auto &te : expected) { + EXPECT_TRUE(tokenizer.next(t)); + EXPECT_EQ(te.tokenId, t.tokenId); + EXPECT_EQ(te.content, t.content); + EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); + EXPECT_EQ(te.location.getStart(), t.location.getStart()); + EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); + } + ASSERT_FALSE(tokenizer.next(t)); +} +} + diff --git a/test/plugins/css/TokenizerTest.cpp b/test/plugins/css/TokenizerTest.cpp new file mode 100644 index 0000000..c53f93d --- /dev/null +++ b/test/plugins/css/TokenizerTest.cpp @@ -0,0 +1,118 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +#include + +namespace ousia { +TEST(TokenTreeNode, testConstructor) +{ + TokenTreeNode root{{{"a", 1}, {"aab", 2}, {"aac", 3}, {"abd", 4}}}; + + ASSERT_EQ(-1, root.tokenId); + ASSERT_EQ(1U, root.children.size()); + ASSERT_TRUE(root.children.find('a') != root.children.end()); + + const TokenTreeNode &a = root.children.at('a'); + ASSERT_EQ(1, a.tokenId); + ASSERT_EQ(2U, a.children.size()); + ASSERT_TRUE(a.children.find('a') != a.children.end()); + ASSERT_TRUE(a.children.find('b') != a.children.end()); + + const TokenTreeNode &aa = a.children.at('a'); + ASSERT_EQ(-1, aa.tokenId); + ASSERT_EQ(2U, aa.children.size()); + ASSERT_TRUE(aa.children.find('b') != aa.children.end()); + ASSERT_TRUE(aa.children.find('c') != aa.children.end()); + + const TokenTreeNode &aab = aa.children.at('b'); + ASSERT_EQ(2, aab.tokenId); + ASSERT_EQ(0U, aab.children.size()); + + const TokenTreeNode &aac = aa.children.at('c'); + ASSERT_EQ(3, aac.tokenId); + ASSERT_EQ(0U, aac.children.size()); + + const TokenTreeNode &ab = a.children.at('b'); + ASSERT_EQ(-1, ab.tokenId); + ASSERT_EQ(1U, ab.children.size()); + ASSERT_TRUE(ab.children.find('d') != ab.children.end()); + + const TokenTreeNode &abd = ab.children.at('d'); + ASSERT_EQ(4, abd.tokenId); + ASSERT_EQ(0U, abd.children.size()); +} + +TEST(Tokenizer, testTokenization) +{ + TokenTreeNode root{{{"/", 1}, {"/*", 2}, {"*/", 3}}}; + + CharReader reader{"Test/Test /* Block Comment */", 0}; + // 012345678901234567890123456789 + // 0 1 2 + + std::vector expected = { + {TOKEN_TEXT, "Test", SourceLocation{0, 0, 4}}, + {1, "/", SourceLocation{0, 4, 5}}, + {TOKEN_TEXT, "Test ", SourceLocation{0, 5, 10}}, + {2, "/*", SourceLocation{0, 10, 12}}, + {TOKEN_TEXT, " Block Comment ", SourceLocation{0, 12, 27}}, + {3, "*/", SourceLocation{0, 27, 29}}}; + + Tokenizer tokenizer{reader, root}; + + Token t; + for (auto &te : expected) { + EXPECT_TRUE(tokenizer.next(t)); + EXPECT_EQ(te.tokenId, t.tokenId); + EXPECT_EQ(te.content, t.content); + EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); + EXPECT_EQ(te.location.getStart(), t.location.getStart()); + EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); + } + ASSERT_FALSE(tokenizer.next(t)); +} + +TEST(Tokenizer, testIncompleteTokens) +{ + TokenTreeNode root{{{"ab", 1}, {"c", 2}}}; + + CharReader reader{"ac", 0}; + + std::vector expected = { + {TOKEN_TEXT, "a", SourceLocation{0, 0, 1}}, + {2, "c", SourceLocation{0, 1, 2}}}; + + Tokenizer tokenizer{reader, root}; + + Token t; + for (auto &te : expected) { + EXPECT_TRUE(tokenizer.next(t)); + EXPECT_EQ(te.tokenId, t.tokenId); + EXPECT_EQ(te.content, t.content); + EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); + EXPECT_EQ(te.location.getStart(), t.location.getStart()); + EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); + } + ASSERT_FALSE(tokenizer.next(t)); +} +} + -- cgit v1.2.3