Moved DynamicTokenizer and TokenTrie to parser/utils

author: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> 2015-02-14 23:47:11 +0100
committer: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> 2015-02-14 23:47:11 +0100
commit: 65bbbd778f6e0a3668c859b0e22cced7075a726d (patch)
tree: c7e12a5b3330198deb5ab51eecdd12dd9e03286e /test/formats
parent: fec6ac1d65aee3e4e5c948b0f7cbdec7ceb6cb45 (diff)
2 files changed, 0 insertions, 507 deletions
diff --git a/test/formats/osdm/DynamicTokenizerTest.cpp b/test/formats/osdm/DynamicTokenizerTest.cpp
deleted file mode 100644
index c1f8785..0000000
--- a/test/formats/osdm/DynamicTokenizerTest.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <core/common/CharReader.hpp>
-#include <formats/osdm/DynamicTokenizer.hpp>
-
-namespace ousia {
-
-TEST(DynamicTokenizer, tokenRegistration)
-{
-	DynamicTokenizer tokenizer;
-
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken(""));
-
-	ASSERT_EQ(0U, tokenizer.registerToken("a"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("a"));
-	ASSERT_EQ("a", tokenizer.getTokenString(0U));
-
-	ASSERT_EQ(1U, tokenizer.registerToken("b"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("b"));
-	ASSERT_EQ("b", tokenizer.getTokenString(1U));
-
-	ASSERT_EQ(2U, tokenizer.registerToken("c"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("c"));
-	ASSERT_EQ("c", tokenizer.getTokenString(2U));
-
-	ASSERT_TRUE(tokenizer.unregisterToken(1U));
-	ASSERT_FALSE(tokenizer.unregisterToken(1U));
-	ASSERT_EQ("", tokenizer.getTokenString(1U));
-
-	ASSERT_EQ(1U, tokenizer.registerToken("d"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("d"));
-	ASSERT_EQ("d", tokenizer.getTokenString(1U));
-}
-
-TEST(DynamicTokenizer, textTokenPreserveWhitespace)
-{
-	{
-		CharReader reader{" this \t is only a  \n\n test   text   "};
-		//                 012345 6789012345678 9 0123456789012345
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(36U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-
-	{
-		CharReader reader{"this \t is only a  \n\n test   text"};
-		//                 01234 5678901234567 8 9012345678901
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-}
-
-TEST(DynamicTokenizer, textTokenTrimWhitespace)
-{
-	{
-		CharReader reader{" this \t is only a  \n\n test   text   "};
-		//                 012345 6789012345678 9 0123456789012345
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(1U, loc.getStart());
-		ASSERT_EQ(33U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-
-	{
-		CharReader reader{"this \t is only a  \n\n test   text"};
-		//                 01234 5678901234567 8 9012345678901
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-}
-
-TEST(DynamicTokenizer, textTokenCollapseWhitespace)
-{
-	{
-		CharReader reader{" this \t is only a  \n\n test   text   "};
-		//                 012345 6789012345678 9 0123456789012345
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this is only a test text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(1U, loc.getStart());
-		ASSERT_EQ(33U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-
-	{
-		CharReader reader{"this \t is only a  \n\n test   text"};
-		//                 01234 5678901234567 8 9012345678901
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this is only a test text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-}
-
-TEST(DynamicTokenizer, simpleReadToken)
-{
-	CharReader reader{"test1:test2"};
-	DynamicTokenizer tokenizer;
-
-	const TokenTypeId tid = tokenizer.registerToken(":");
-	ASSERT_EQ(0U, tid);
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
-
-		char c;
-		ASSERT_TRUE(reader.peek(c));
-		ASSERT_EQ(':', c);
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(tid, token.type);
-		ASSERT_EQ(":", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(5U, loc.getStart());
-		ASSERT_EQ(6U, loc.getEnd());
-
-		char c;
-		ASSERT_TRUE(reader.peek(c));
-		ASSERT_EQ('t', c);
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
-
-		char c;
-		ASSERT_FALSE(reader.peek(c));
-	}
-}
-
-TEST(DynamicTokenizer, simplePeekToken)
-{
-	CharReader reader{"test1:test2"};
-	DynamicTokenizer tokenizer;
-
-	const TokenTypeId tid = tokenizer.registerToken(":");
-	ASSERT_EQ(0U, tid);
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
-		ASSERT_EQ(0U, reader.getOffset());
-		ASSERT_EQ(5U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(tid, token.type);
-		ASSERT_EQ(":", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(5U, loc.getStart());
-		ASSERT_EQ(6U, loc.getEnd());
-		ASSERT_EQ(0U, reader.getOffset());
-		ASSERT_EQ(6U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
-		ASSERT_EQ(0U, reader.getOffset());
-		ASSERT_EQ(11U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
-		ASSERT_EQ(5U, reader.getOffset());
-		ASSERT_EQ(5U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(tid, token.type);
-		ASSERT_EQ(":", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(5U, loc.getStart());
-		ASSERT_EQ(6U, loc.getEnd());
-		ASSERT_EQ(6U, reader.getOffset());
-		ASSERT_EQ(6U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
-		ASSERT_EQ(11U, reader.getOffset());
-		ASSERT_EQ(11U, reader.getPeekOffset());
-	}
-}
-
-TEST(DynamicTokenizer, ambiguousTokens)
-{
-	CharReader reader{"abc"};
-	DynamicTokenizer tokenizer;
-
-	TokenTypeId t1 = tokenizer.registerToken("abd");
-	TokenTypeId t2 = tokenizer.registerToken("bc");
-
-	ASSERT_EQ(0U, t1);
-	ASSERT_EQ(1U, t2);
-
-	DynamicToken token;
-	ASSERT_TRUE(tokenizer.read(reader, token));
-
-	ASSERT_EQ(TextToken, token.type);
-	ASSERT_EQ("a", token.content);
-
-	SourceLocation loc = token.location;
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(1U, loc.getEnd());
-
-	ASSERT_TRUE(tokenizer.read(reader, token));
-
-	ASSERT_EQ(t2, token.type);
-	ASSERT_EQ("bc", token.content);
-
-	loc = token.location;
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(3U, loc.getEnd());
-
-	ASSERT_FALSE(tokenizer.read(reader, token));
-}
-
-TEST(DynamicTokenizer, commentTestWhitespacePreserve)
-{
-	CharReader reader{"Test/Test /* Block Comment */", 0};
-	//                 012345678901234567890123456789
-	//                 0        1         2
-	DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE);
-
-	const TokenTypeId t1 = tokenizer.registerToken("/");
-	const TokenTypeId t2 = tokenizer.registerToken("/*");
-	const TokenTypeId t3 = tokenizer.registerToken("*/");
-
-	std::vector<DynamicToken> expected = {
-	    {TextToken, "Test", SourceLocation{0, 0, 4}},
-	    {t1, "/", SourceLocation{0, 4, 5}},
-	    {TextToken, "Test ", SourceLocation{0, 5, 10}},
-	    {t2, "/*", SourceLocation{0, 10, 12}},
-	    {TextToken, " Block Comment ", SourceLocation{0, 12, 27}},
-	    {t3, "*/", SourceLocation{0, 27, 29}}};
-
-	DynamicToken t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(reader, t));
-		EXPECT_EQ(te.type, t.type);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.read(reader, t));
-}
-
-TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
-{
-	CharReader reader{"Test/Test /* Block Comment */", 0};
-	//                 012345678901234567890123456789
-	//                 0        1         2
-	DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE);
-
-	const TokenTypeId t1 = tokenizer.registerToken("/");
-	const TokenTypeId t2 = tokenizer.registerToken("/*");
-	const TokenTypeId t3 = tokenizer.registerToken("*/");
-
-	std::vector<DynamicToken> expected = {
-	    {TextToken, "Test", SourceLocation{0, 0, 4}},
-	    {t1, "/", SourceLocation{0, 4, 5}},
-	    {TextToken, "Test", SourceLocation{0, 5, 9}},
-	    {t2, "/*", SourceLocation{0, 10, 12}},
-	    {TextToken, "Block Comment", SourceLocation{0, 13, 26}},
-	    {t3, "*/", SourceLocation{0, 27, 29}}};
-
-	DynamicToken t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(reader, t));
-		EXPECT_EQ(te.type, t.type);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.read(reader, t));
-}
-
-}
-
diff --git a/test/formats/osdm/TokenTrieTest.cpp b/test/formats/osdm/TokenTrieTest.cpp
deleted file mode 100644
index aacd6c0..0000000
--- a/test/formats/osdm/TokenTrieTest.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <formats/osdm/TokenTrie.hpp>
-
-namespace ousia {
-
-static const TokenTypeId t1 = 0;
-static const TokenTypeId t2 = 1;
-static const TokenTypeId t3 = 2;
-static const TokenTypeId t4 = 3;
-
-TEST(TokenTrie, registerToken)
-{
-	TokenTrie tree;
-
-	ASSERT_TRUE(tree.registerToken("a", t1));
-	ASSERT_TRUE(tree.registerToken("ab", t2));
-	ASSERT_TRUE(tree.registerToken("b", t3));
-	ASSERT_TRUE(tree.registerToken("hello", t4));
-
-	ASSERT_FALSE(tree.registerToken("", t1));
-	ASSERT_FALSE(tree.registerToken("a", t4));
-	ASSERT_FALSE(tree.registerToken("ab", t4));
-	ASSERT_FALSE(tree.registerToken("b", t4));
-	ASSERT_FALSE(tree.registerToken("hello", t4));
-
-	ASSERT_EQ(t1, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(t3, tree.hasToken("b"));
-	ASSERT_EQ(t4, tree.hasToken("hello"));
-	ASSERT_EQ(EmptyToken, tree.hasToken(""));
-	ASSERT_EQ(EmptyToken, tree.hasToken("abc"));
-}
-
-TEST(TokenTrie, unregisterToken)
-{
-	TokenTrie tree;
-
-	ASSERT_TRUE(tree.registerToken("a", t1));
-	ASSERT_FALSE(tree.registerToken("a", t4));
-
-	ASSERT_TRUE(tree.registerToken("ab", t2));
-	ASSERT_FALSE(tree.registerToken("ab", t4));
-
-	ASSERT_TRUE(tree.registerToken("b", t3));
-	ASSERT_FALSE(tree.registerToken("b", t4));
-
-	ASSERT_EQ(t1, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(t3, tree.hasToken("b"));
-
-	ASSERT_TRUE(tree.unregisterToken("a"));
-	ASSERT_FALSE(tree.unregisterToken("a"));
-
-	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(t3, tree.hasToken("b"));
-
-	ASSERT_TRUE(tree.unregisterToken("b"));
-	ASSERT_FALSE(tree.unregisterToken("b"));
-
-	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(EmptyToken, tree.hasToken("b"));
-
-	ASSERT_TRUE(tree.unregisterToken("ab"));
-	ASSERT_FALSE(tree.unregisterToken("ab"));
-
-	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
-	ASSERT_EQ(EmptyToken, tree.hasToken("ab"));
-	ASSERT_EQ(EmptyToken, tree.hasToken("b"));
-}
-}
-
author	Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>	2015-02-14 23:47:11 +0100
committer	Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>	2015-02-14 23:47:11 +0100
commit	65bbbd778f6e0a3668c859b0e22cced7075a726d (patch)
tree	c7e12a5b3330198deb5ab51eecdd12dd9e03286e /test/formats
parent	fec6ac1d65aee3e4e5c948b0f7cbdec7ceb6cb45 (diff)