diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-26 00:22:23 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-26 00:22:23 +0100 |
commit | 19dd5946125e90dcbd61966896c9f6cfc4451d80 (patch) | |
tree | da552f4d2c881a53e62b5af59060b3e8cc6ead1b /test | |
parent | 041a2dd18050e9e26ca1ee00851461dff1e1f90c (diff) |
Reactivated TokenizerTest
Diffstat (limited to 'test')
-rw-r--r-- | test/core/parser/utils/TokenizerTest.cpp | 94 |
1 files changed, 82 insertions, 12 deletions
diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp index 0f2bfb7..785bd81 100644 --- a/test/core/parser/utils/TokenizerTest.cpp +++ b/test/core/parser/utils/TokenizerTest.cpp @@ -22,6 +22,8 @@ #include <core/parser/utils/Tokenizer.hpp> #include <core/parser/utils/TokenizedData.hpp> +#include "TokenizedDataTestUtils.hpp" + namespace ousia { TEST(Tokenizer, tokenRegistration) @@ -58,14 +60,16 @@ void expectData(const std::string &expected, SourceOffset tokenStart, { ASSERT_EQ(Tokens::Data, token.id); - Variant text = data.text(mode); - ASSERT_TRUE(text.isString()); + Token textToken; + TokenizedDataReader reader = data.reader(); + ASSERT_TRUE(reader.read(textToken, TokenSet{}, mode)); - EXPECT_EQ(expected, text.asString()); + EXPECT_EQ(expected, textToken.content); EXPECT_EQ(tokenStart, token.location.getStart()); EXPECT_EQ(tokenEnd, token.location.getEnd()); - EXPECT_EQ(textStart, text.getLocation().getStart()); - EXPECT_EQ(textEnd, text.getLocation().getEnd()); + EXPECT_EQ(textStart, textToken.getLocation().getStart()); + EXPECT_EQ(textEnd, textToken.getLocation().getEnd()); + EXPECT_TRUE(reader.atEnd()); } TEST(Tokenizer, textTokenPreserveWhitespace) @@ -97,8 +101,8 @@ TEST(Tokenizer, textTokenPreserveWhitespace) TokenizedData data; ASSERT_TRUE(tokenizer.read(reader, token, data)); - expectData("this \t is only a \n\n test text", 0, 32, 0, 32, - token, data, WhitespaceMode::PRESERVE); + expectData("this \t is only a \n\n test text", 0, 32, 0, 32, token, + data, WhitespaceMode::PRESERVE); data.clear(); ASSERT_FALSE(tokenizer.read(reader, token, data)); @@ -134,8 +138,8 @@ TEST(Tokenizer, textTokenTrimWhitespace) TokenizedData data; ASSERT_TRUE(tokenizer.read(reader, token, data)); - expectData("this \t is only a \n\n test text", 0, 32, 0, 32, - token, data, WhitespaceMode::TRIM); + expectData("this \t is only a \n\n test text", 0, 32, 0, 32, token, + data, WhitespaceMode::TRIM); data.clear(); ASSERT_FALSE(tokenizer.read(reader, token, data)); @@ -368,9 +372,12 @@ TEST(Tokenizer, commentTestWhitespacePreserve) if (te.id != Tokens::Data) { EXPECT_EQ(te.content, t.content); } else { - Variant text = data.text(WhitespaceMode::PRESERVE); - ASSERT_TRUE(text.isString()); - EXPECT_EQ(te.content, text.asString()); + TokenizedDataReader dataReader = data.reader(); + Token textToken; + ASSERT_TRUE(dataReader.read(textToken, TokenSet{}, + WhitespaceMode::PRESERVE)); + EXPECT_TRUE(dataReader.atEnd()); + EXPECT_EQ(te.content, textToken.content); } EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); EXPECT_EQ(te.location.getStart(), t.location.getStart()); @@ -380,5 +387,68 @@ TEST(Tokenizer, commentTestWhitespacePreserve) TokenizedData data; ASSERT_FALSE(tokenizer.read(reader, t, data)); } + +TEST(Tokenizer, nonPrimaryTokens) +{ + CharReader reader{ + "<<switch to $inline \\math mode$ they said, see the world they " + "said>>"}; + // 012345678901234567890 12345678901234567890123456789012345678901234567 + // 0 1 2 3 4 5 6 + + Tokenizer tokenizer; + + TokenId tBackslash = tokenizer.registerToken("\\"); + TokenId tDollar = tokenizer.registerToken("$", false); + TokenId tSpeechStart = tokenizer.registerToken("<<", false); + TokenId tSpeechEnd = tokenizer.registerToken(">>", false); + + TokenSet tokens = TokenSet{tDollar, tSpeechStart, tSpeechEnd}; + + Token token, textToken; + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ(Tokens::Data, token.id); + + TokenizedDataReader dataReader = data.reader(); + assertToken(dataReader, tSpeechStart, "<<", tokens, + WhitespaceMode::TRIM, 0, 2); + assertText(dataReader, "switch to", tokens, WhitespaceMode::TRIM, 2, + 11); + assertToken(dataReader, tDollar, "$", tokens, WhitespaceMode::TRIM, 12, + 13); + assertText(dataReader, "inline", tokens, WhitespaceMode::TRIM, 13, 19); + assertEnd(dataReader); + } + + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ(tBackslash, token.id); + ASSERT_EQ(20U, token.location.getStart()); + ASSERT_EQ(21U, token.location.getEnd()); + } + + { + TokenizedData data; + ASSERT_TRUE(tokenizer.read(reader, token, data)); + ASSERT_EQ(Tokens::Data, token.id); + + TokenizedDataReader dataReader = data.reader(); + assertText(dataReader, "math mode", tokens, WhitespaceMode::TRIM, 21, + 30); + assertToken(dataReader, tDollar, "$", tokens, WhitespaceMode::TRIM, 30, + 31); + assertText(dataReader, "they said, see the world they said", tokens, + WhitespaceMode::TRIM, 32, 66); + assertToken(dataReader, tSpeechEnd, ">>", tokens, WhitespaceMode::TRIM, + 66, 68); + assertEnd(dataReader); + } + + TokenizedData data; + ASSERT_FALSE(tokenizer.read(reader, token, data)); +} } |