summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Paassen <bpaassen@techfak.uni-bielefeld.de>2014-10-31 13:20:09 +0000
committerbenjamin <benjamin@daaaf23c-2e50-4459-9457-1e69db5a47bf>2014-10-31 13:20:09 +0000
commitc54065160a03f266c1406edf74d97ab74ee75d51 (patch)
tree68b3d83aee2b592b94130892461a368f7756a210
parent73cc54cbf494d9da61b640035f25ad9c5eb86d84 (diff)
finished work on TokenTree and tested it.
git-svn-id: file:///var/local/svn/basicwriter@88 daaaf23c-2e50-4459-9457-1e69db5a47bf
-rw-r--r--CMakeLists.txt2
-rw-r--r--src/core/utils/Tokenizer.cpp13
-rw-r--r--src/core/utils/Tokenizer.hpp8
-rw-r--r--test/core/utils/TokenizerTest.cpp63
4 files changed, 75 insertions, 11 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eff2fcb..726e1a3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -100,6 +100,7 @@ ADD_LIBRARY(ousia_core
src/core/script/Object.cpp
src/core/script/ScriptEngine.cpp
src/core/script/Variant.cpp
+ src/core/utils/Tokenizer.cpp
src/core/utils/Utils.cpp
)
@@ -130,6 +131,7 @@ IF(test)
# Add all unit test files
ADD_EXECUTABLE(ousia_test_core
test/core/utils/RangeSetTest
+ test/core/utils/TokenizerTest
test/core/utils/UtilsTest
test/core/script/FunctionTest
test/core/script/ObjectTest
diff --git a/src/core/utils/Tokenizer.cpp b/src/core/utils/Tokenizer.cpp
index 1a84f0c..38f7585 100644
--- a/src/core/utils/Tokenizer.cpp
+++ b/src/core/utils/Tokenizer.cpp
@@ -21,11 +21,11 @@
namespace ousia {
namespace utils {
-static std::unordered_map<char, TokenTreeNode> buildChildren(
+static std::map<char, TokenTreeNode> buildChildren(
const std::map<std::string, int> &inputs)
{
- std::std::unordered_map<char, TokenTreeNode> children;
- std::unordered_map<char, std::map<std::string, int>> nexts;
+ std::map<char, TokenTreeNode> children;
+ std::map<char, std::map<std::string, int>> nexts;
for (auto &e : inputs) {
const std::string &s = e.first;
@@ -57,7 +57,7 @@ static int buildId(const std::map<std::string, int> &inputs)
if (e.first.empty()) {
if (tokenId != -1) {
throw TokenizerException{std::string{"Ambigous token found: "} +
- e.second};
+ std::to_string(e.second)};
} else {
tokenId = e.second;
}
@@ -67,10 +67,9 @@ static int buildId(const std::map<std::string, int> &inputs)
}
TokenTreeNode::TokenTreeNode(const std::map<std::string, int> &inputs)
- : children(buildChildren(inputs), tokenId(buildId(inputs)))
+ : children(buildChildren(inputs)), tokenId(buildId(inputs))
+
{
}
-
}
}
-
diff --git a/src/core/utils/Tokenizer.hpp b/src/core/utils/Tokenizer.hpp
index 1d0db43..24c4f30 100644
--- a/src/core/utils/Tokenizer.hpp
+++ b/src/core/utils/Tokenizer.hpp
@@ -20,7 +20,7 @@
#define _OUSIA_UTILS_TOKENIZER_HPP_
#include <istream>
-#include <unordered_map>
+#include <map>
#include <queue>
namespace ousia {
@@ -30,14 +30,14 @@ class TokenizerException : public std::exception {
public:
const std::string msg;
- ArgumentValidatorError(const std::string &msg) : msg(msg){};
+ TokenizerException(const std::string &msg) : msg(msg){};
virtual const char *what() const noexcept override { return msg.c_str(); }
};
class TokenTreeNode {
public:
- const std::unordered_map<char, TokenTreeNode> children;
+ const std::map<char, TokenTreeNode> children;
const int tokenId;
TokenTreeNode(const std::map<std::string, int> &inputs);
@@ -59,7 +59,7 @@ class Tokenizer {
private:
const std::istream &input;
const TokenTreeNode root;
- const std::queue<Token> peek;
+ const std::queue<Token> peekQueue;
public:
Tokenizer(const TokenTreeNode &root, std::istream &input);
diff --git a/test/core/utils/TokenizerTest.cpp b/test/core/utils/TokenizerTest.cpp
new file mode 100644
index 0000000..f441fd8
--- /dev/null
+++ b/test/core/utils/TokenizerTest.cpp
@@ -0,0 +1,63 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/utils/Tokenizer.hpp>
+
+namespace ousia {
+namespace utils {
+TEST(TokenTreeNode, testConstructor)
+{
+ TokenTreeNode root{{{"a", 1}, {"aab", 2}, {"aac", 3}, {"abd", 4}}};
+
+ ASSERT_EQ(-1, root.tokenId);
+ ASSERT_EQ(1, root.children.size());
+ ASSERT_TRUE(root.children.find('a') != root.children.end());
+
+ const TokenTreeNode &a = root.children.at('a');
+ ASSERT_EQ(1, a.tokenId);
+ ASSERT_EQ(2, a.children.size());
+ ASSERT_TRUE(a.children.find('a') != a.children.end());
+ ASSERT_TRUE(a.children.find('b') != a.children.end());
+
+ const TokenTreeNode &aa = a.children.at('a');
+ ASSERT_EQ(-1, aa.tokenId);
+ ASSERT_EQ(2, aa.children.size());
+ ASSERT_TRUE(aa.children.find('b') != aa.children.end());
+ ASSERT_TRUE(aa.children.find('c') != aa.children.end());
+
+ const TokenTreeNode &aab = aa.children.at('b');
+ ASSERT_EQ(2, aab.tokenId);
+ ASSERT_EQ(0, aab.children.size());
+
+ const TokenTreeNode &aac = aa.children.at('c');
+ ASSERT_EQ(3, aac.tokenId);
+ ASSERT_EQ(0, aac.children.size());
+
+ const TokenTreeNode &ab = a.children.at('b');
+ ASSERT_EQ(-1, ab.tokenId);
+ ASSERT_EQ(1, ab.children.size());
+ ASSERT_TRUE(ab.children.find('d') != ab.children.end());
+
+ const TokenTreeNode &abd = ab.children.at('d');
+ ASSERT_EQ(4, abd.tokenId);
+ ASSERT_EQ(0, abd.children.size());
+}
+}
+}