summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Paassen <bpaassen@techfak.uni-bielefeld.de>2014-11-19 19:42:07 +0100
committerBenjamin Paassen <bpaassen@techfak.uni-bielefeld.de>2014-11-19 19:42:07 +0100
commitec31aae293f88e36190aa32169a97a776873567a (patch)
tree04081a475d65c4fe574296b0492595f99bccdeba
parent7b44735f31b8b5d236d66ea1a681abb99ac83bf9 (diff)
fixed a bug preventing the Tokenizer from finding the right token if a parse was incomplete beforehand. Also cleared the buffers if a subclass returns false from doPrepare. Failing to clear the buffers lead to subsequent problems.
-rw-r--r--src/core/utils/Tokenizer.cpp26
-rw-r--r--test/core/utils/TokenizerTest.cpp28
2 files changed, 49 insertions, 5 deletions
diff --git a/src/core/utils/Tokenizer.cpp b/src/core/utils/Tokenizer.cpp
index 164a30f..a0ca3aa 100644
--- a/src/core/utils/Tokenizer.cpp
+++ b/src/core/utils/Tokenizer.cpp
@@ -82,8 +82,8 @@ bool Tokenizer::prepare()
{
std::stringstream buffer;
char c;
- const int startColumn = input.getColumn();
- const int startLine = input.getLine();
+ int startColumn = input.getColumn();
+ int startLine = input.getLine();
bool bufEmpty = true;
while (input.peek(&c)) {
if (root.children.find(c) != root.children.end()) {
@@ -118,9 +118,10 @@ bool Tokenizer::prepare()
break;
}
}
+ //reset the peek pointer to the last valid position.
+ input.resetPeek();
// check if we did indeed find a special token.
if (match != TOKEN_NONE) {
- input.resetPeek();
if (bufEmpty) {
// if we did not have text before, construct that token.
if (doPrepare(
@@ -128,8 +129,11 @@ bool Tokenizer::prepare()
input.getColumn(), input.getLine()},
peeked)) {
return true;
+ } else {
+ startColumn = input.getColumn();
+ startLine = input.getLine();
+ continue;
}
-
} else {
// otherwise we return the text before the token.
if (doPrepare(Token{TOKEN_TEXT, buffer.str(), startColumn,
@@ -137,8 +141,20 @@ bool Tokenizer::prepare()
input.getLine()},
peeked)) {
return true;
- }
+ } else{
+ //we need to clear the buffer here. After all the token
+ //corresponding to this buffer segment is already
+ //constructed.
+ buffer.str(std::string());
+ bufEmpty = true;
+ startColumn = input.getColumn();
+ startLine = input.getLine();
+ continue;
+ }
}
+ } else{
+ //if we found nothing, read at least one character.
+ input.peek(&c);
}
}
buffer << c;
diff --git a/test/core/utils/TokenizerTest.cpp b/test/core/utils/TokenizerTest.cpp
index ba06c33..79cc01d 100644
--- a/test/core/utils/TokenizerTest.cpp
+++ b/test/core/utils/TokenizerTest.cpp
@@ -93,5 +93,33 @@ TEST(Tokenizer, testTokenization)
}
ASSERT_FALSE(tokenizer.next(t));
}
+
+TEST(Tokenizer, testIncompleteTokens)
+{
+ TokenTreeNode root{{{"ab", 1}, {"c", 2}}};
+
+ BufferedCharReader reader;
+ reader.feed("ac");
+ // 1234567890
+ // 0 1
+
+ std::vector<Token> expected = {
+ {TOKEN_TEXT, "a", 1, 1, 2, 1},
+ {2, "c", 2, 1, 3, 1}};
+
+ Tokenizer tokenizer{reader, root};
+
+ Token t;
+ for (auto &te : expected) {
+ ASSERT_TRUE(tokenizer.next(t));
+ ASSERT_EQ(te.tokenId, t.tokenId);
+ ASSERT_EQ(te.content, t.content);
+ ASSERT_EQ(te.startColumn, t.startColumn);
+ ASSERT_EQ(te.startLine, t.startLine);
+ ASSERT_EQ(te.endColumn, t.endColumn);
+ ASSERT_EQ(te.endLine, t.endLine);
+ }
+ ASSERT_FALSE(tokenizer.next(t));
+}
}
}