diff options
author | Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de> | 2014-11-19 19:42:07 +0100 |
---|---|---|
committer | Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de> | 2014-11-19 19:42:07 +0100 |
commit | ec31aae293f88e36190aa32169a97a776873567a (patch) | |
tree | 04081a475d65c4fe574296b0492595f99bccdeba | |
parent | 7b44735f31b8b5d236d66ea1a681abb99ac83bf9 (diff) |
fixed a bug preventing the Tokenizer from finding the right token if a parse was incomplete beforehand. Also cleared the buffers if a subclass returns false from doPrepare. Failing to clear the buffers lead to subsequent problems.
-rw-r--r-- | src/core/utils/Tokenizer.cpp | 26 | ||||
-rw-r--r-- | test/core/utils/TokenizerTest.cpp | 28 |
2 files changed, 49 insertions, 5 deletions
diff --git a/src/core/utils/Tokenizer.cpp b/src/core/utils/Tokenizer.cpp index 164a30f..a0ca3aa 100644 --- a/src/core/utils/Tokenizer.cpp +++ b/src/core/utils/Tokenizer.cpp @@ -82,8 +82,8 @@ bool Tokenizer::prepare() { std::stringstream buffer; char c; - const int startColumn = input.getColumn(); - const int startLine = input.getLine(); + int startColumn = input.getColumn(); + int startLine = input.getLine(); bool bufEmpty = true; while (input.peek(&c)) { if (root.children.find(c) != root.children.end()) { @@ -118,9 +118,10 @@ bool Tokenizer::prepare() break; } } + //reset the peek pointer to the last valid position. + input.resetPeek(); // check if we did indeed find a special token. if (match != TOKEN_NONE) { - input.resetPeek(); if (bufEmpty) { // if we did not have text before, construct that token. if (doPrepare( @@ -128,8 +129,11 @@ bool Tokenizer::prepare() input.getColumn(), input.getLine()}, peeked)) { return true; + } else { + startColumn = input.getColumn(); + startLine = input.getLine(); + continue; } - } else { // otherwise we return the text before the token. if (doPrepare(Token{TOKEN_TEXT, buffer.str(), startColumn, @@ -137,8 +141,20 @@ bool Tokenizer::prepare() input.getLine()}, peeked)) { return true; - } + } else{ + //we need to clear the buffer here. After all the token + //corresponding to this buffer segment is already + //constructed. + buffer.str(std::string()); + bufEmpty = true; + startColumn = input.getColumn(); + startLine = input.getLine(); + continue; + } } + } else{ + //if we found nothing, read at least one character. + input.peek(&c); } } buffer << c; diff --git a/test/core/utils/TokenizerTest.cpp b/test/core/utils/TokenizerTest.cpp index ba06c33..79cc01d 100644 --- a/test/core/utils/TokenizerTest.cpp +++ b/test/core/utils/TokenizerTest.cpp @@ -93,5 +93,33 @@ TEST(Tokenizer, testTokenization) } ASSERT_FALSE(tokenizer.next(t)); } + +TEST(Tokenizer, testIncompleteTokens) +{ + TokenTreeNode root{{{"ab", 1}, {"c", 2}}}; + + BufferedCharReader reader; + reader.feed("ac"); + // 1234567890 + // 0 1 + + std::vector<Token> expected = { + {TOKEN_TEXT, "a", 1, 1, 2, 1}, + {2, "c", 2, 1, 3, 1}}; + + Tokenizer tokenizer{reader, root}; + + Token t; + for (auto &te : expected) { + ASSERT_TRUE(tokenizer.next(t)); + ASSERT_EQ(te.tokenId, t.tokenId); + ASSERT_EQ(te.content, t.content); + ASSERT_EQ(te.startColumn, t.startColumn); + ASSERT_EQ(te.startLine, t.startLine); + ASSERT_EQ(te.endColumn, t.endColumn); + ASSERT_EQ(te.endLine, t.endLine); + } + ASSERT_FALSE(tokenizer.next(t)); +} } } |