summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/plugins/plain/DynamicTokenizer.cpp35
-rw-r--r--src/plugins/plain/DynamicTokenizer.hpp22
-rw-r--r--test/plugins/plain/DynamicTokenizerTest.cpp81
3 files changed, 67 insertions, 71 deletions
diff --git a/src/plugins/plain/DynamicTokenizer.cpp b/src/plugins/plain/DynamicTokenizer.cpp
index a8f2317..f2cfcd1 100644
--- a/src/plugins/plain/DynamicTokenizer.cpp
+++ b/src/plugins/plain/DynamicTokenizer.cpp
@@ -345,14 +345,13 @@ public:
/* Class DynamicTokenizer */
-DynamicTokenizer::DynamicTokenizer(CharReader &reader,
- WhitespaceMode whitespaceMode)
- : reader(reader), whitespaceMode(whitespaceMode), nextTokenTypeId(0)
+DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode)
+ : whitespaceMode(whitespaceMode), nextTokenTypeId(0)
{
}
template <typename TextHandler, bool read>
-bool DynamicTokenizer::next(DynamicToken &token)
+bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
{
// If we're in the read mode, reset the char reader peek position to the
// current read position
@@ -437,28 +436,28 @@ bool DynamicTokenizer::next(DynamicToken &token)
return match.hasMatch();
}
-bool DynamicTokenizer::read(DynamicToken &token)
+bool DynamicTokenizer::read(CharReader &reader,DynamicToken &token)
{
switch (whitespaceMode) {
case WhitespaceMode::PRESERVE:
- return next<PreservingTextHandler, true>(token);
+ return next<PreservingTextHandler, true>(reader, token);
case WhitespaceMode::TRIM:
- return next<TrimmingTextHandler, true>(token);
+ return next<TrimmingTextHandler, true>(reader, token);
case WhitespaceMode::COLLAPSE:
- return next<CollapsingTextHandler, true>(token);
+ return next<CollapsingTextHandler, true>(reader, token);
}
return false;
}
-bool DynamicTokenizer::peek(DynamicToken &token)
+bool DynamicTokenizer::peek(CharReader &reader,DynamicToken &token)
{
switch (whitespaceMode) {
case WhitespaceMode::PRESERVE:
- return next<PreservingTextHandler, false>(token);
+ return next<PreservingTextHandler, false>(reader, token);
case WhitespaceMode::TRIM:
- return next<TrimmingTextHandler, false>(token);
+ return next<TrimmingTextHandler, false>(reader, token);
case WhitespaceMode::COLLAPSE:
- return next<CollapsingTextHandler, false>(token);
+ return next<CollapsingTextHandler, false>(reader, token);
}
return false;
}
@@ -530,16 +529,16 @@ WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; }
/* Explicitly instantiate all possible instantiations of the "next" member
function */
template bool DynamicTokenizer::next<PreservingTextHandler, false>(
- DynamicToken &token);
+ CharReader &reader, DynamicToken &token);
template bool DynamicTokenizer::next<TrimmingTextHandler, false>(
- DynamicToken &token);
+ CharReader &reader, DynamicToken &token);
template bool DynamicTokenizer::next<CollapsingTextHandler, false>(
- DynamicToken &token);
+ CharReader &reader,DynamicToken &token);
template bool DynamicTokenizer::next<PreservingTextHandler, true>(
- DynamicToken &token);
+ CharReader &reader,DynamicToken &token);
template bool DynamicTokenizer::next<TrimmingTextHandler, true>(
- DynamicToken &token);
+ CharReader &reader,DynamicToken &token);
template bool DynamicTokenizer::next<CollapsingTextHandler, true>(
- DynamicToken &token);
+ CharReader &reader,DynamicToken &token);
}
diff --git a/src/plugins/plain/DynamicTokenizer.hpp b/src/plugins/plain/DynamicTokenizer.hpp
index 760bebf..0b4dd39 100644
--- a/src/plugins/plain/DynamicTokenizer.hpp
+++ b/src/plugins/plain/DynamicTokenizer.hpp
@@ -119,11 +119,6 @@ enum class WhitespaceMode {
class DynamicTokenizer {
private:
/**
- * CharReader instance from which the tokens should be read.
- */
- CharReader &reader;
-
- /**
* Internally used token trie. This object holds all registered tokens.
*/
TokenTrie trie;
@@ -151,23 +146,22 @@ private:
* @tparam TextHandler is the type to be used for the textHandler instance.
* @tparam read specifies whether the function should start from and advance
* the read pointer of the char reader.
+ * @param reader is the CharReader instance from which the data should be
+ * read.
* @param token is the token structure into which the token information
* should be written.
* @return false if the end of the stream has been reached, true otherwise.
*/
template <typename TextHandler, bool read>
- bool next(DynamicToken &token);
+ bool next(CharReader &reader, DynamicToken &token);
public:
/**
* Constructor of the DynamicTokenizer class.
*
- * @param reader is the CharReader that should be used for reading the
- * tokens.
* @param whitespaceMode specifies how whitespace should be handled.
*/
- DynamicTokenizer(CharReader &reader,
- WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
+ DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
/**
* Registers the given string as a token. Returns a const pointer at a
@@ -222,23 +216,27 @@ public:
* Reads a new token from the CharReader and stores it in the given
* DynamicToken instance.
*
+ * @param reader is the CharReader instance from which the data should be
+ * read.
* @param token is a reference at the token instance into which the Token
* information should be written.
* @return true if a token could be read, false if the end of the stream
* has been reached.
*/
- bool read(DynamicToken &token);
+ bool read(CharReader &reader, DynamicToken &token);
/**
* The peek method does not advance the read position of the char reader,
* but reads the next token from the current char reader peek position.
*
+ * @param reader is the CharReader instance from which the data should be
+ * read.
* @param token is a reference at the token instance into which the Token
* information should be written.
* @return true if a token could be read, false if the end of the stream
* has been reached.
*/
- bool peek(DynamicToken &token);
+ bool peek(CharReader &reader, DynamicToken &token);
};
}
diff --git a/test/plugins/plain/DynamicTokenizerTest.cpp b/test/plugins/plain/DynamicTokenizerTest.cpp
index 63fa466..5183fdd 100644
--- a/test/plugins/plain/DynamicTokenizerTest.cpp
+++ b/test/plugins/plain/DynamicTokenizerTest.cpp
@@ -25,8 +25,7 @@ namespace ousia {
TEST(DynamicTokenizer, tokenRegistration)
{
- CharReader reader{"test"};
- DynamicTokenizer tokenizer{reader};
+ DynamicTokenizer tokenizer;
ASSERT_EQ(EmptyToken, tokenizer.registerToken(""));
@@ -57,10 +56,10 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace)
CharReader reader{" this \t is only a \n\n test text "};
// 012345 6789012345678 9 0123456789012345
// 0 1 2 3
- DynamicTokenizer tokenizer{reader, WhitespaceMode::PRESERVE};
+ DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ(" this \t is only a \n\n test text ", token.content);
@@ -68,17 +67,17 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace)
ASSERT_EQ(0U, loc.getStart());
ASSERT_EQ(36U, loc.getEnd());
- ASSERT_FALSE(tokenizer.read(token));
+ ASSERT_FALSE(tokenizer.read(reader, token));
}
{
CharReader reader{"this \t is only a \n\n test text"};
// 01234 5678901234567 8 9012345678901
// 0 1 2 3
- DynamicTokenizer tokenizer{reader, WhitespaceMode::PRESERVE};
+ DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("this \t is only a \n\n test text", token.content);
@@ -86,7 +85,7 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace)
ASSERT_EQ(0U, loc.getStart());
ASSERT_EQ(32U, loc.getEnd());
- ASSERT_FALSE(tokenizer.read(token));
+ ASSERT_FALSE(tokenizer.read(reader, token));
}
}
@@ -96,10 +95,10 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace)
CharReader reader{" this \t is only a \n\n test text "};
// 012345 6789012345678 9 0123456789012345
// 0 1 2 3
- DynamicTokenizer tokenizer{reader, WhitespaceMode::TRIM};
+ DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("this \t is only a \n\n test text", token.content);
@@ -107,17 +106,17 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace)
ASSERT_EQ(1U, loc.getStart());
ASSERT_EQ(33U, loc.getEnd());
- ASSERT_FALSE(tokenizer.read(token));
+ ASSERT_FALSE(tokenizer.read(reader, token));
}
{
CharReader reader{"this \t is only a \n\n test text"};
// 01234 5678901234567 8 9012345678901
// 0 1 2 3
- DynamicTokenizer tokenizer{reader, WhitespaceMode::TRIM};
+ DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("this \t is only a \n\n test text", token.content);
@@ -125,7 +124,7 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace)
ASSERT_EQ(0U, loc.getStart());
ASSERT_EQ(32U, loc.getEnd());
- ASSERT_FALSE(tokenizer.read(token));
+ ASSERT_FALSE(tokenizer.read(reader, token));
}
}
@@ -135,10 +134,10 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace)
CharReader reader{" this \t is only a \n\n test text "};
// 012345 6789012345678 9 0123456789012345
// 0 1 2 3
- DynamicTokenizer tokenizer{reader, WhitespaceMode::COLLAPSE};
+ DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("this is only a test text", token.content);
@@ -146,17 +145,17 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace)
ASSERT_EQ(1U, loc.getStart());
ASSERT_EQ(33U, loc.getEnd());
- ASSERT_FALSE(tokenizer.read(token));
+ ASSERT_FALSE(tokenizer.read(reader, token));
}
{
CharReader reader{"this \t is only a \n\n test text"};
// 01234 5678901234567 8 9012345678901
// 0 1 2 3
- DynamicTokenizer tokenizer{reader, WhitespaceMode::COLLAPSE};
+ DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("this is only a test text", token.content);
@@ -164,21 +163,21 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace)
ASSERT_EQ(0U, loc.getStart());
ASSERT_EQ(32U, loc.getEnd());
- ASSERT_FALSE(tokenizer.read(token));
+ ASSERT_FALSE(tokenizer.read(reader, token));
}
}
TEST(DynamicTokenizer, simpleReadToken)
{
CharReader reader{"test1:test2"};
- DynamicTokenizer tokenizer{reader};
+ DynamicTokenizer tokenizer;
const TokenTypeId tid = tokenizer.registerToken(":");
ASSERT_EQ(0U, tid);
{
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("test1", token.content);
@@ -194,7 +193,7 @@ TEST(DynamicTokenizer, simpleReadToken)
{
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(tid, token.type);
ASSERT_EQ(":", token.content);
@@ -210,7 +209,7 @@ TEST(DynamicTokenizer, simpleReadToken)
{
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("test2", token.content);
@@ -227,14 +226,14 @@ TEST(DynamicTokenizer, simpleReadToken)
TEST(DynamicTokenizer, simplePeekToken)
{
CharReader reader{"test1:test2"};
- DynamicTokenizer tokenizer{reader};
+ DynamicTokenizer tokenizer;
const TokenTypeId tid = tokenizer.registerToken(":");
ASSERT_EQ(0U, tid);
{
DynamicToken token;
- ASSERT_TRUE(tokenizer.peek(token));
+ ASSERT_TRUE(tokenizer.peek(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("test1", token.content);
@@ -248,7 +247,7 @@ TEST(DynamicTokenizer, simplePeekToken)
{
DynamicToken token;
- ASSERT_TRUE(tokenizer.peek(token));
+ ASSERT_TRUE(tokenizer.peek(reader, token));
ASSERT_EQ(tid, token.type);
ASSERT_EQ(":", token.content);
@@ -262,7 +261,7 @@ TEST(DynamicTokenizer, simplePeekToken)
{
DynamicToken token;
- ASSERT_TRUE(tokenizer.peek(token));
+ ASSERT_TRUE(tokenizer.peek(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("test2", token.content);
@@ -276,7 +275,7 @@ TEST(DynamicTokenizer, simplePeekToken)
{
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("test1", token.content);
@@ -290,7 +289,7 @@ TEST(DynamicTokenizer, simplePeekToken)
{
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(tid, token.type);
ASSERT_EQ(":", token.content);
@@ -304,7 +303,7 @@ TEST(DynamicTokenizer, simplePeekToken)
{
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("test2", token.content);
@@ -320,7 +319,7 @@ TEST(DynamicTokenizer, simplePeekToken)
TEST(DynamicTokenizer, ambiguousTokens)
{
CharReader reader{"abc"};
- DynamicTokenizer tokenizer(reader);
+ DynamicTokenizer tokenizer;
TokenTypeId t1 = tokenizer.registerToken("abd");
TokenTypeId t2 = tokenizer.registerToken("bc");
@@ -329,7 +328,7 @@ TEST(DynamicTokenizer, ambiguousTokens)
ASSERT_EQ(1U, t2);
DynamicToken token;
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(TextToken, token.type);
ASSERT_EQ("a", token.content);
@@ -338,7 +337,7 @@ TEST(DynamicTokenizer, ambiguousTokens)
ASSERT_EQ(0U, loc.getStart());
ASSERT_EQ(1U, loc.getEnd());
- ASSERT_TRUE(tokenizer.read(token));
+ ASSERT_TRUE(tokenizer.read(reader, token));
ASSERT_EQ(t2, token.type);
ASSERT_EQ("bc", token.content);
@@ -347,7 +346,7 @@ TEST(DynamicTokenizer, ambiguousTokens)
ASSERT_EQ(1U, loc.getStart());
ASSERT_EQ(3U, loc.getEnd());
- ASSERT_FALSE(tokenizer.read(token));
+ ASSERT_FALSE(tokenizer.read(reader, token));
}
TEST(DynamicTokenizer, commentTestWhitespacePreserve)
@@ -355,7 +354,7 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve)
CharReader reader{"Test/Test /* Block Comment */", 0};
// 012345678901234567890123456789
// 0 1 2
- DynamicTokenizer tokenizer(reader, WhitespaceMode::PRESERVE);
+ DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE);
const TokenTypeId t1 = tokenizer.registerToken("/");
const TokenTypeId t2 = tokenizer.registerToken("/*");
@@ -371,14 +370,14 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve)
DynamicToken t;
for (auto &te : expected) {
- EXPECT_TRUE(tokenizer.read(t));
+ EXPECT_TRUE(tokenizer.read(reader, t));
EXPECT_EQ(te.type, t.type);
EXPECT_EQ(te.content, t.content);
EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
EXPECT_EQ(te.location.getStart(), t.location.getStart());
EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
}
- ASSERT_FALSE(tokenizer.read(t));
+ ASSERT_FALSE(tokenizer.read(reader, t));
}
TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
@@ -386,7 +385,7 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
CharReader reader{"Test/Test /* Block Comment */", 0};
// 012345678901234567890123456789
// 0 1 2
- DynamicTokenizer tokenizer(reader, WhitespaceMode::COLLAPSE);
+ DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE);
const TokenTypeId t1 = tokenizer.registerToken("/");
const TokenTypeId t2 = tokenizer.registerToken("/*");
@@ -402,14 +401,14 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
DynamicToken t;
for (auto &te : expected) {
- EXPECT_TRUE(tokenizer.read(t));
+ EXPECT_TRUE(tokenizer.read(reader, t));
EXPECT_EQ(te.type, t.type);
EXPECT_EQ(te.content, t.content);
EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
EXPECT_EQ(te.location.getStart(), t.location.getStart());
EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
}
- ASSERT_FALSE(tokenizer.read(t));
+ ASSERT_FALSE(tokenizer.read(reader, t));
}
}