summaryrefslogtreecommitdiff
path: root/test/core/parser/utils
diff options
context:
space:
mode:
Diffstat (limited to 'test/core/parser/utils')
-rw-r--r--test/core/parser/utils/SourceOffsetVectorTest.cpp2
-rw-r--r--test/core/parser/utils/TokenizedDataTest.cpp567
-rw-r--r--test/core/parser/utils/TokenizedDataTestUtils.hpp64
-rw-r--r--test/core/parser/utils/TokenizerTest.cpp441
4 files changed, 587 insertions, 487 deletions
diff --git a/test/core/parser/utils/SourceOffsetVectorTest.cpp b/test/core/parser/utils/SourceOffsetVectorTest.cpp
index 25a4163..26254f9 100644
--- a/test/core/parser/utils/SourceOffsetVectorTest.cpp
+++ b/test/core/parser/utils/SourceOffsetVectorTest.cpp
@@ -51,7 +51,7 @@ TEST(SourceOffsetVector, gaps)
for (size_t i = 0; i < 999; i++) {
auto elem = vec.loadOffset(i);
EXPECT_EQ(i * 3 + 5, elem.first);
- EXPECT_EQ((i + 1) * 3 + 5, elem.second);
+ EXPECT_EQ(i * 3 + 7, elem.second);
}
auto elem = vec.loadOffset(999);
EXPECT_EQ(999U * 3 + 5, elem.first);
diff --git a/test/core/parser/utils/TokenizedDataTest.cpp b/test/core/parser/utils/TokenizedDataTest.cpp
index 231bad9..8488459 100644
--- a/test/core/parser/utils/TokenizedDataTest.cpp
+++ b/test/core/parser/utils/TokenizedDataTest.cpp
@@ -20,6 +20,8 @@
#include <core/parser/utils/TokenizedData.hpp>
+#include "TokenizedDataTestUtils.hpp"
+
namespace ousia {
TEST(TokenizedData, dataWhitespacePreserve)
@@ -29,15 +31,10 @@ TEST(TokenizedData, dataWhitespacePreserve)
// 0123456789012345
// 0 1
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ(" test1 test2 ", token.content);
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(16U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+ TokenizedDataReader reader = data.reader();
+ assertText(reader, " test1 test2 ", TokenSet{}, WhitespaceMode::PRESERVE,
+ 0, 16);
+ assertEnd(reader);
}
TEST(TokenizedData, dataWhitespaceTrim)
@@ -47,15 +44,10 @@ TEST(TokenizedData, dataWhitespaceTrim)
// 0123456789012345
// 0 1
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ("test1 test2", token.content);
- EXPECT_EQ(1U, token.getLocation().getStart());
- EXPECT_EQ(14U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+ TokenizedDataReader reader = data.reader();
+ assertText(reader, "test1 test2", TokenSet{}, WhitespaceMode::TRIM, 1,
+ 14);
+ assertEnd(reader);
}
TEST(TokenizedData, dataWhitespaceCollapse)
@@ -65,15 +57,10 @@ TEST(TokenizedData, dataWhitespaceCollapse)
// 0123456789012345
// 0 1
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ("test1 test2", token.content);
- EXPECT_EQ(1U, token.getLocation().getStart());
- EXPECT_EQ(14U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+ TokenizedDataReader reader = data.reader();
+ assertText(reader, "test1 test2", TokenSet{}, WhitespaceMode::COLLAPSE, 1,
+ 14);
+ assertEnd(reader);
}
TEST(TokenizedData, singleToken)
@@ -82,17 +69,9 @@ TEST(TokenizedData, singleToken)
ASSERT_EQ(2U, data.append("$$"));
data.mark(5, 0, 2);
- data.enableToken(5);
-
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2);
+ assertEnd(reader);
}
TEST(TokenizedData, singleDisabledToken)
@@ -101,15 +80,9 @@ TEST(TokenizedData, singleDisabledToken)
ASSERT_EQ(2U, data.append("$$"));
data.mark(5, 0, 2);
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+ TokenizedDataReader reader = data.reader();
+ assertText(reader, "$$", TokenSet{}, WhitespaceMode::COLLAPSE, 0, 2);
+ assertEnd(reader);
}
TEST(TokenizedData, dualToken)
@@ -120,18 +93,10 @@ TEST(TokenizedData, dualToken)
data.mark(5, 0, 2);
data.mark(6, 1, 1);
- data.enableToken(5);
- data.enableToken(6);
-
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, 5, "$$", TokenSet{5, 6}, WhitespaceMode::COLLAPSE, 0,
+ 2);
+ assertEnd(reader);
}
TEST(TokenizedData, dualTokenShorterEnabled)
@@ -142,385 +107,281 @@ TEST(TokenizedData, dualTokenShorterEnabled)
data.mark(5, 0, 2);
data.mark(6, 1, 1);
- data.enableToken(6);
-
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(6U, token.id);
- EXPECT_EQ("$", token.content);
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(1U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(6U, token.id);
- EXPECT_EQ("$", token.content);
- EXPECT_EQ(1U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, 6, "$", TokenSet{6}, WhitespaceMode::COLLAPSE, 0, 1);
+ assertToken(reader, 6, "$", TokenSet{6}, WhitespaceMode::COLLAPSE, 1, 2);
+ assertEnd(reader);
}
TEST(TokenizedData, dualTokenLongerEnabled)
{
TokenizedData data;
ASSERT_EQ(2U, data.append("$$"));
+ data.mark(6, 0, 1);
data.mark(5, 0, 2);
+ data.mark(6, 1, 1);
- data.enableToken(5);
-
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2);
+ assertEnd(reader);
}
TEST(TokenizedData, tokensAndDataPreserveWhitespace)
{
TokenizedData data;
- ASSERT_EQ(10U, data.append("$$ test $$"));
- // 0123456789
+ ASSERT_EQ(18U, data.append("$$ test text $$"));
+ // 012345678901234567
data.mark(5, 0, 2);
data.mark(5, 2);
- data.enableToken(5);
-
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ(" test ", token.content);
- EXPECT_EQ(2U, token.getLocation().getStart());
- EXPECT_EQ(8U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(8U, token.getLocation().getStart());
- EXPECT_EQ(10U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 0, 2);
+ assertText(reader, " test text ", TokenSet{5}, WhitespaceMode::PRESERVE,
+ 2, 16);
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 16, 18);
+ assertEnd(reader);
}
TEST(TokenizedData, tokensAndDataTrimWhitespace)
{
TokenizedData data;
- ASSERT_EQ(10U, data.append("$$ test $$"));
- // 0123456789
+ ASSERT_EQ(18U, data.append("$$ test text $$"));
+ // 012345678901234567
data.mark(5, 0, 2);
data.mark(5, 2);
- data.enableToken(5);
-
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ("test", token.content);
- EXPECT_EQ(3U, token.getLocation().getStart());
- EXPECT_EQ(7U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(8U, token.getLocation().getStart());
- EXPECT_EQ(10U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 0, 2);
+ assertText(reader, "test text", TokenSet{5}, WhitespaceMode::TRIM, 3,
+ 15);
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 16, 18);
+ assertEnd(reader);
}
TEST(TokenizedData, tokensAndDataCollapseWhitespace)
{
TokenizedData data;
- ASSERT_EQ(10U, data.append("$$ test $$"));
- // 0123456789
+ ASSERT_EQ(18U, data.append("$$ test text $$"));
+ // 012345678901234567
data.mark(5, 0, 2);
data.mark(5, 2);
- data.enableToken(5);
-
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ("test", token.content);
- EXPECT_EQ(3U, token.getLocation().getStart());
- EXPECT_EQ(7U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(8U, token.getLocation().getStart());
- EXPECT_EQ(10U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2);
+ assertText(reader, "test text", TokenSet{5}, WhitespaceMode::COLLAPSE, 3,
+ 15);
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 16, 18);
+ assertEnd(reader);
}
TEST(TokenizedData, tokensAndWhitespacePreserveWhitespace)
{
TokenizedData data;
- ASSERT_EQ(10U, data.append("$$ $$"));
- // 0123456789
+ ASSERT_EQ(8U, data.append("$$ $$"));
+ // 01234567
data.mark(5, 0, 2);
data.mark(5, 2);
- data.enableToken(5);
-
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ(" ", token.content);
- EXPECT_EQ(2U, token.getLocation().getStart());
- EXPECT_EQ(8U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(8U, token.getLocation().getStart());
- EXPECT_EQ(10U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 0, 2);
+ assertText(reader, " ", TokenSet{5}, WhitespaceMode::PRESERVE, 2, 6);
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 6, 8);
+ assertEnd(reader);
}
TEST(TokenizedData, tokensAndWhitespaceTrimWhitespace)
{
TokenizedData data;
- ASSERT_EQ(10U, data.append("$$ $$"));
- // 0123456789
+ ASSERT_EQ(8U, data.append("$$ $$"));
+ // 01234567
data.mark(5, 0, 2);
data.mark(5, 2);
- data.enableToken(5);
-
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(8U, token.getLocation().getStart());
- EXPECT_EQ(10U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 0, 2);
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 6, 8);
+ assertEnd(reader);
}
TEST(TokenizedData, tokensAndWhitespaceCollapseWhitespace)
{
TokenizedData data;
- ASSERT_EQ(10U, data.append("$$ $$"));
- // 0123456789
+ ASSERT_EQ(8U, data.append("$$ $$"));
+ // 01234567
data.mark(5, 0, 2);
data.mark(5, 2);
- data.enableToken(5);
-
- Token token;
- ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2);
+ assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 6, 8);
+ assertEnd(reader);
+}
- ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(8U, token.getLocation().getStart());
- EXPECT_EQ(10U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+TEST(TokenizedData, appendChars)
+{
+ TokenizedData data;
+ ASSERT_EQ(1U, data.append('t', 5, 7));
+ ASSERT_EQ(2U, data.append('e', 7, 8));
+ ASSERT_EQ(3U, data.append('s', 8, 10));
+ ASSERT_EQ(4U, data.append('t', 10, 12));
- ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+ TokenizedDataReader reader = data.reader();
+ assertText(reader, "test", TokenSet{5}, WhitespaceMode::COLLAPSE, 5, 12);
+ assertEnd(reader);
}
-TEST(TokenizedData, textPreserveWhitespace)
+TEST(TokenizedData, protectedWhitespace)
{
TokenizedData data;
- ASSERT_EQ(6U, data.append(" $$ "));
- // 012345
- data.mark(5, 2, 2);
-
- data.enableToken(5);
-
- Token token;
- ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ(" ", token.content);
- EXPECT_EQ(0U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(2U, token.getLocation().getStart());
- EXPECT_EQ(4U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ(" ", token.content);
- EXPECT_EQ(4U, token.getLocation().getStart());
- EXPECT_EQ(6U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.text(token, WhitespaceMode::PRESERVE));
- ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+ ASSERT_EQ(4U, data.append("test", 10));
+ ASSERT_EQ(11U, data.append(" test", 14, true));
+
+ TokenizedDataReader reader = data.reader();
+ assertText(reader, "test test", TokenSet{5}, WhitespaceMode::COLLAPSE, 10,
+ 21);
+ assertEnd(reader);
}
-TEST(TokenizedData, textTrimWhitespace)
+TEST(TokenizedData, specialNewlineToken)
{
TokenizedData data;
- ASSERT_EQ(6U, data.append(" $$ "));
- // 012345
- data.mark(5, 2, 2);
-
- data.enableToken(5);
+ data.append("a\nb\n \nc\n");
+ // 0 12 3456 78 9
+
+ const TokenSet tokens{Tokens::Newline};
+
+ TokenizedDataReader reader = data.reader();
+ assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1);
+ assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+ 1, 2);
+ assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 2, 3);
+ assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+ 3, 4);
+ assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+ 7, 8);
+ assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 8, 9);
+ assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+ 9, 10);
+ assertEnd(reader);
+}
- Token token;
- ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM));
+TEST(TokenizedData, specialParagraphToken)
+{
+ TokenizedData data;
+ data.append("a\nb\n \nc\n");
+ // 0 12 3456 78 9
- ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(2U, token.getLocation().getStart());
- EXPECT_EQ(4U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+ const TokenSet tokens{Tokens::Paragraph};
- ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM));
- ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+ TokenizedDataReader reader = data.reader();
+ assertText(reader, "a b", tokens, WhitespaceMode::COLLAPSE, 0, 3);
+ assertToken(reader, Tokens::Paragraph, "\n \n", tokens,
+ WhitespaceMode::COLLAPSE, 3, 8);
+ assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 8, 9);
+ assertEnd(reader);
}
-TEST(TokenizedData, textCollapseWhitespace)
+TEST(TokenizedData, specialSectionToken)
{
TokenizedData data;
- ASSERT_EQ(6U, data.append(" $$ "));
- // 012345
- data.mark(5, 2, 2);
+ data.append("a\nb\n \n \t \n");
+ // 0 12 3456 789 01 2
+ // 0 1
- data.enableToken(5);
+ const TokenSet tokens{Tokens::Section};
- Token token;
- ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(5U, token.id);
- EXPECT_EQ("$$", token.content);
- EXPECT_EQ(2U, token.getLocation().getStart());
- EXPECT_EQ(4U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
- ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+ TokenizedDataReader reader = data.reader();
+ assertText(reader, "a b", tokens, WhitespaceMode::COLLAPSE, 0, 3);
+ assertToken(reader, Tokens::Section, "\n \n \t \n", tokens,
+ WhitespaceMode::COLLAPSE, 3, 13);
+ assertEnd(reader);
}
-TEST(TokenizedData, appendChars)
+TEST(TokenizedData, specialTokenPrecedence)
{
TokenizedData data;
- ASSERT_EQ(1U, data.append('t', 5, 7));
- ASSERT_EQ(2U, data.append('e', 7, 8));
- ASSERT_EQ(3U, data.append('s', 8, 10));
- ASSERT_EQ(4U, data.append('t', 10, 12));
+ data.append("a\nb\n\nc\n\n\nd");
+ // 0 12 3 45 6 7 89
+
+ const TokenSet tokens{Tokens::Newline, Tokens::Paragraph, Tokens::Section};
+
+ TokenizedDataReader reader = data.reader();
+ assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1);
+ assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+ 1, 2);
+ assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 2, 3);
+ assertToken(reader, Tokens::Paragraph, "\n\n", tokens,
+ WhitespaceMode::COLLAPSE, 3, 5);
+ assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 5, 6);
+ assertToken(reader, Tokens::Section, "\n\n\n", tokens,
+ WhitespaceMode::COLLAPSE, 6, 9);
+ assertText(reader, "d", tokens, WhitespaceMode::COLLAPSE, 9, 10);
+ assertEnd(reader);
+}
- Token token;
- ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ("test", token.content);
- EXPECT_EQ(5U, token.getLocation().getStart());
- EXPECT_EQ(12U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+TEST(TokenizedData, specialTokenPrecedence2)
+{
+ TokenizedData data;
+ data.append("\nb\n\nc\n\n\n");
+ // 0 12 3 45 6 7
+
+ const TokenSet tokens{Tokens::Newline, Tokens::Paragraph, Tokens::Section};
+
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+ 0, 1);
+ assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 1, 2);
+ assertToken(reader, Tokens::Paragraph, "\n\n", tokens,
+ WhitespaceMode::COLLAPSE, 2, 4);
+ assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 4, 5);
+ assertToken(reader, Tokens::Section, "\n\n\n", tokens,
+ WhitespaceMode::COLLAPSE, 5, 8);
+ assertEnd(reader);
+}
- ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
- ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+TEST(TokenizedData, specialTokenIndent)
+{
+ TokenizedData data;
+ data.append(" test\n\ttest2\n test3 \ttest4\ntest5");
+ // 01234567 8 901234 5678901234567890 123456 789012
+ // 0 1 2 3 4
+ const TokenSet tokens{Tokens::Indent, Tokens::Dedent};
+
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+ 4, 4);
+ assertText(reader, "test", tokens, WhitespaceMode::COLLAPSE, 4, 8);
+ assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+ 10, 10);
+ assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37);
+ assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
+ 38, 38);
+ assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43);
+ assertEnd(reader);
}
-TEST(TokenizedData, copy)
+TEST(TokenizedData, specialTokenIndentOverlap)
{
TokenizedData data;
- ASSERT_EQ(7U, data.append(" a $ b "));
- // 0123456
- data.mark(6, 3, 1);
- data.enableToken(6);
-
- Token token;
- ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ("a", token.content);
- EXPECT_EQ(1U, token.getLocation().getStart());
- EXPECT_EQ(2U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
-
- TokenizedData dataCopy = data;
-
- ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(6U, token.id);
- EXPECT_EQ("$", token.content);
- EXPECT_EQ(3U, token.getLocation().getStart());
- EXPECT_EQ(4U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(dataCopy.next(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(6U, token.id);
- EXPECT_EQ("$", token.content);
- EXPECT_EQ(3U, token.getLocation().getStart());
- EXPECT_EQ(4U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
- ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ(" b ", token.content);
- EXPECT_EQ(4U, token.getLocation().getStart());
- EXPECT_EQ(7U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
- ASSERT_FALSE(data.next(token));
-
- ASSERT_TRUE(dataCopy.text(token, WhitespaceMode::COLLAPSE));
- EXPECT_EQ(Tokens::Data, token.id);
- EXPECT_EQ("b", token.content);
- EXPECT_EQ(5U, token.getLocation().getStart());
- EXPECT_EQ(6U, token.getLocation().getEnd());
- EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
- ASSERT_FALSE(dataCopy.next(token));
+ data.append(" test\n\ttest2\n test3 \ttest4\ntest5");
+ // 01234567 8 901234 5678901234567890 123456 789012
+ // 0 1 2 3 4
+ const TokenSet tokens{Tokens::Indent, Tokens::Dedent, 5};
+
+ data.mark(5, 4, 4);
+
+ TokenizedDataReader reader = data.reader();
+ assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+ 4, 4);
+ assertToken(reader, 5, "test", tokens, WhitespaceMode::COLLAPSE, 4, 8);
+ assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+ 10, 10);
+ assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37);
+ assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
+ 38, 38);
+ assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43);
+ assertEnd(reader);
}
+
}
diff --git a/test/core/parser/utils/TokenizedDataTestUtils.hpp b/test/core/parser/utils/TokenizedDataTestUtils.hpp
new file mode 100644
index 0000000..c384f9d
--- /dev/null
+++ b/test/core/parser/utils/TokenizedDataTestUtils.hpp
@@ -0,0 +1,64 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_
+#define _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_
+
+namespace ousia {
+
+static void assertToken(TokenizedDataReader &reader, TokenId id,
+ const std::string &text, const TokenSet &tokens = TokenSet{},
+ WhitespaceMode mode = WhitespaceMode::TRIM,
+ SourceOffset start = InvalidSourceOffset,
+ SourceOffset end = InvalidSourceOffset,
+ SourceId sourceId = InvalidSourceId)
+{
+ Token token;
+ ASSERT_TRUE(reader.read(token, tokens, mode));
+ EXPECT_EQ(id, token.id);
+ EXPECT_EQ(text, token.content);
+ if (start != InvalidSourceOffset) {
+ EXPECT_EQ(start, token.getLocation().getStart());
+ }
+ if (end != InvalidSourceOffset) {
+ EXPECT_EQ(end, token.getLocation().getEnd());
+ }
+ EXPECT_EQ(sourceId, token.getLocation().getSourceId());
+}
+
+static void assertText(TokenizedDataReader &reader, const std::string &text,
+ const TokenSet &tokens = TokenSet{},
+ WhitespaceMode mode = WhitespaceMode::TRIM,
+ SourceOffset start = InvalidSourceOffset,
+ SourceOffset end = InvalidSourceOffset,
+ SourceId id = InvalidSourceId)
+{
+ assertToken(reader, Tokens::Data, text, tokens, mode, start, end, id);
+}
+
+static void assertEnd(TokenizedDataReader &reader)
+{
+ Token token;
+ ASSERT_TRUE(reader.atEnd());
+ ASSERT_FALSE(reader.read(token));
+}
+
+}
+
+#endif /* _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_ */
+
diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp
index 3809a12..45fc77a 100644
--- a/test/core/parser/utils/TokenizerTest.cpp
+++ b/test/core/parser/utils/TokenizerTest.cpp
@@ -20,9 +20,66 @@
#include <core/common/CharReader.hpp>
#include <core/parser/utils/Tokenizer.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
+
+#include "TokenizedDataTestUtils.hpp"
namespace ousia {
+static void assertPrimaryToken(CharReader &reader, Tokenizer &tokenizer,
+ TokenId id, const std::string &text,
+ SourceOffset start = InvalidSourceOffset,
+ SourceOffset end = InvalidSourceOffset,
+ SourceId sourceId = InvalidSourceId)
+{
+ Token token;
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ EXPECT_EQ(id, token.id);
+ EXPECT_EQ(text, token.content);
+ if (start != InvalidSourceOffset) {
+ EXPECT_EQ(start, token.getLocation().getStart());
+ }
+ if (end != InvalidSourceOffset) {
+ EXPECT_EQ(end, token.getLocation().getEnd());
+ }
+ EXPECT_EQ(sourceId, token.getLocation().getSourceId());
+}
+
+static void expectData(const std::string &expected, SourceOffset tokenStart,
+ SourceOffset tokenEnd, SourceOffset textStart,
+ SourceOffset textEnd, const Token &token,
+ TokenizedData &data,
+ WhitespaceMode mode = WhitespaceMode::PRESERVE)
+{
+ ASSERT_EQ(Tokens::Data, token.id);
+
+ Token textToken;
+ TokenizedDataReader reader = data.reader();
+ ASSERT_TRUE(reader.read(textToken, TokenSet{}, mode));
+
+ EXPECT_EQ(expected, textToken.content);
+ EXPECT_EQ(tokenStart, token.location.getStart());
+ EXPECT_EQ(tokenEnd, token.location.getEnd());
+ EXPECT_EQ(textStart, textToken.getLocation().getStart());
+ EXPECT_EQ(textEnd, textToken.getLocation().getEnd());
+ EXPECT_TRUE(reader.atEnd());
+}
+
+static void assertDataToken(CharReader &reader, Tokenizer &tokenizer,
+ const std::string &expected,
+ SourceOffset tokenStart, SourceOffset tokenEnd,
+ SourceOffset textStart, SourceOffset textEnd,
+ WhitespaceMode mode = WhitespaceMode::PRESERVE)
+{
+ Token token;
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+
+ expectData(expected, tokenStart, tokenEnd, textStart, textEnd, token, data,
+ mode);
+}
+
TEST(Tokenizer, tokenRegistration)
{
Tokenizer tokenizer;
@@ -31,23 +88,23 @@ TEST(Tokenizer, tokenRegistration)
ASSERT_EQ(0U, tokenizer.registerToken("a"));
ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("a"));
- ASSERT_EQ("a", tokenizer.getTokenString(0U));
+ ASSERT_EQ("a", tokenizer.lookupToken(0U).string);
ASSERT_EQ(1U, tokenizer.registerToken("b"));
ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("b"));
- ASSERT_EQ("b", tokenizer.getTokenString(1U));
+ ASSERT_EQ("b", tokenizer.lookupToken(1U).string);
ASSERT_EQ(2U, tokenizer.registerToken("c"));
ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("c"));
- ASSERT_EQ("c", tokenizer.getTokenString(2U));
+ ASSERT_EQ("c", tokenizer.lookupToken(2U).string);
ASSERT_TRUE(tokenizer.unregisterToken(1U));
ASSERT_FALSE(tokenizer.unregisterToken(1U));
- ASSERT_EQ("", tokenizer.getTokenString(1U));
+ ASSERT_EQ("", tokenizer.lookupToken(1U).string);
ASSERT_EQ(1U, tokenizer.registerToken("d"));
ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("d"));
- ASSERT_EQ("d", tokenizer.getTokenString(1U));
+ ASSERT_EQ("d", tokenizer.lookupToken(1U).string);
}
TEST(Tokenizer, textTokenPreserveWhitespace)
@@ -56,36 +113,34 @@ TEST(Tokenizer, textTokenPreserveWhitespace)
CharReader reader{" this \t is only a \n\n test text "};
// 012345 6789012345678 9 0123456789012345
// 0 1 2 3
- Tokenizer tokenizer{WhitespaceMode::PRESERVE};
+ Tokenizer tokenizer;
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ(" this \t is only a \n\n test text ", token.content);
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
- SourceLocation loc = token.location;
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(36U, loc.getEnd());
+ expectData(" this \t is only a \n\n test text ", 0, 36, 0, 36,
+ token, data, WhitespaceMode::PRESERVE);
- ASSERT_FALSE(tokenizer.read(reader, token));
+ data.clear();
+ ASSERT_FALSE(tokenizer.read(reader, token, data));
}
{
CharReader reader{"this \t is only a \n\n test text"};
// 01234 5678901234567 8 9012345678901
// 0 1 2 3
- Tokenizer tokenizer{WhitespaceMode::PRESERVE};
+ Tokenizer tokenizer;
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("this \t is only a \n\n test text", token.content);
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
- SourceLocation loc = token.location;
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(32U, loc.getEnd());
+ expectData("this \t is only a \n\n test text", 0, 32, 0, 32, token,
+ data, WhitespaceMode::PRESERVE);
- ASSERT_FALSE(tokenizer.read(reader, token));
+ data.clear();
+ ASSERT_FALSE(tokenizer.read(reader, token, data));
}
}
@@ -95,36 +150,34 @@ TEST(Tokenizer, textTokenTrimWhitespace)
CharReader reader{" this \t is only a \n\n test text "};
// 012345 6789012345678 9 0123456789012345
// 0 1 2 3
- Tokenizer tokenizer{WhitespaceMode::TRIM};
+ Tokenizer tokenizer;
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("this \t is only a \n\n test text", token.content);
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
- SourceLocation loc = token.location;
- ASSERT_EQ(1U, loc.getStart());
- ASSERT_EQ(33U, loc.getEnd());
+ expectData("this \t is only a \n\n test text", 0, 36, 1, 33, token,
+ data, WhitespaceMode::TRIM);
- ASSERT_FALSE(tokenizer.read(reader, token));
+ data.clear();
+ ASSERT_FALSE(tokenizer.read(reader, token, data));
}
{
CharReader reader{"this \t is only a \n\n test text"};
// 01234 5678901234567 8 9012345678901
// 0 1 2 3
- Tokenizer tokenizer{WhitespaceMode::TRIM};
+ Tokenizer tokenizer;
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("this \t is only a \n\n test text", token.content);
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
- SourceLocation loc = token.location;
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(32U, loc.getEnd());
+ expectData("this \t is only a \n\n test text", 0, 32, 0, 32, token,
+ data, WhitespaceMode::TRIM);
- ASSERT_FALSE(tokenizer.read(reader, token));
+ data.clear();
+ ASSERT_FALSE(tokenizer.read(reader, token, data));
}
}
@@ -134,36 +187,34 @@ TEST(Tokenizer, textTokenCollapseWhitespace)
CharReader reader{" this \t is only a \n\n test text "};
// 012345 6789012345678 9 0123456789012345
// 0 1 2 3
- Tokenizer tokenizer{WhitespaceMode::COLLAPSE};
+ Tokenizer tokenizer;
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("this is only a test text", token.content);
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
- SourceLocation loc = token.location;
- ASSERT_EQ(1U, loc.getStart());
- ASSERT_EQ(33U, loc.getEnd());
+ expectData("this is only a test text", 0, 36, 1, 33, token, data,
+ WhitespaceMode::COLLAPSE);
- ASSERT_FALSE(tokenizer.read(reader, token));
+ data.clear();
+ ASSERT_FALSE(tokenizer.read(reader, token, data));
}
{
CharReader reader{"this \t is only a \n\n test text"};
// 01234 5678901234567 8 9012345678901
// 0 1 2 3
- Tokenizer tokenizer{WhitespaceMode::COLLAPSE};
+ Tokenizer tokenizer;
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("this is only a test text", token.content);
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
- SourceLocation loc = token.location;
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(32U, loc.getEnd());
+ expectData("this is only a test text", 0, 32, 0, 32, token, data,
+ WhitespaceMode::COLLAPSE);
- ASSERT_FALSE(tokenizer.read(reader, token));
+ data.clear();
+ ASSERT_FALSE(tokenizer.read(reader, token, data));
}
}
@@ -177,14 +228,12 @@ TEST(Tokenizer, simpleReadToken)
{
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("test1", token.content);
- SourceLocation loc = token.location;
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(5U, loc.getEnd());
+ expectData("test1", 0, 5, 0, 5, token, data);
char c;
ASSERT_TRUE(reader.peek(c));
@@ -193,7 +242,8 @@ TEST(Tokenizer, simpleReadToken)
{
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
ASSERT_EQ(tid, token.id);
ASSERT_EQ(":", token.content);
@@ -209,14 +259,10 @@ TEST(Tokenizer, simpleReadToken)
{
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("test2", token.content);
-
- SourceLocation loc = token.location;
- ASSERT_EQ(6U, loc.getStart());
- ASSERT_EQ(11U, loc.getEnd());
+ expectData("test2", 6, 11, 6, 11, token, data);
char c;
ASSERT_FALSE(reader.peek(c));
@@ -233,21 +279,17 @@ TEST(Tokenizer, simplePeekToken)
{
Token token;
- ASSERT_TRUE(tokenizer.peek(reader, token));
-
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("test1", token.content);
-
- SourceLocation loc = token.location;
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(5U, loc.getEnd());
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.peek(reader, token, data));
+ expectData("test1", 0, 5, 0, 5, token, data);
ASSERT_EQ(0U, reader.getOffset());
ASSERT_EQ(5U, reader.getPeekOffset());
}
{
Token token;
- ASSERT_TRUE(tokenizer.peek(reader, token));
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.peek(reader, token, data));
ASSERT_EQ(tid, token.id);
ASSERT_EQ(":", token.content);
@@ -261,35 +303,26 @@ TEST(Tokenizer, simplePeekToken)
{
Token token;
- ASSERT_TRUE(tokenizer.peek(reader, token));
-
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("test2", token.content);
-
- SourceLocation loc = token.location;
- ASSERT_EQ(6U, loc.getStart());
- ASSERT_EQ(11U, loc.getEnd());
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.peek(reader, token, data));
+ expectData("test2", 6, 11, 6, 11, token, data);
ASSERT_EQ(0U, reader.getOffset());
ASSERT_EQ(11U, reader.getPeekOffset());
}
{
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
-
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("test1", token.content);
-
- SourceLocation loc = token.location;
- ASSERT_EQ(0U, loc.getStart());
- ASSERT_EQ(5U, loc.getEnd());
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ expectData("test1", 0, 5, 0, 5, token, data);
ASSERT_EQ(5U, reader.getOffset());
ASSERT_EQ(5U, reader.getPeekOffset());
}
{
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
ASSERT_EQ(tid, token.id);
ASSERT_EQ(":", token.content);
@@ -303,14 +336,9 @@ TEST(Tokenizer, simplePeekToken)
{
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
-
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("test2", token.content);
-
- SourceLocation loc = token.location;
- ASSERT_EQ(6U, loc.getStart());
- ASSERT_EQ(11U, loc.getEnd());
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ expectData("test2", 6, 11, 6, 11, token, data);
ASSERT_EQ(11U, reader.getOffset());
ASSERT_EQ(11U, reader.getPeekOffset());
}
@@ -320,6 +348,7 @@ TEST(Tokenizer, ambiguousTokens)
{
CharReader reader{"abc"};
Tokenizer tokenizer;
+ TokenizedData data;
TokenId t1 = tokenizer.registerToken("abd");
TokenId t2 = tokenizer.registerToken("bc");
@@ -328,16 +357,17 @@ TEST(Tokenizer, ambiguousTokens)
ASSERT_EQ(1U, t2);
Token token;
- ASSERT_TRUE(tokenizer.read(reader, token));
+ data.clear();
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
- ASSERT_EQ(Tokens::Data, token.id);
- ASSERT_EQ("a", token.content);
+ expectData("a", 0, 1, 0, 1, token, data);
SourceLocation loc = token.location;
ASSERT_EQ(0U, loc.getStart());
ASSERT_EQ(1U, loc.getEnd());
- ASSERT_TRUE(tokenizer.read(reader, token));
+ data.clear();
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
ASSERT_EQ(t2, token.id);
ASSERT_EQ("bc", token.content);
@@ -346,7 +376,8 @@ TEST(Tokenizer, ambiguousTokens)
ASSERT_EQ(1U, loc.getStart());
ASSERT_EQ(3U, loc.getEnd());
- ASSERT_FALSE(tokenizer.read(reader, token));
+ data.clear();
+ ASSERT_FALSE(tokenizer.read(reader, token, data));
}
TEST(Tokenizer, commentTestWhitespacePreserve)
@@ -354,7 +385,7 @@ TEST(Tokenizer, commentTestWhitespacePreserve)
CharReader reader{"Test/Test /* Block Comment */", 0};
// 012345678901234567890123456789
// 0 1 2
- Tokenizer tokenizer(WhitespaceMode::PRESERVE);
+ Tokenizer tokenizer;
const TokenId t1 = tokenizer.registerToken("/");
const TokenId t2 = tokenizer.registerToken("/*");
@@ -370,45 +401,189 @@ TEST(Tokenizer, commentTestWhitespacePreserve)
Token t;
for (auto &te : expected) {
- EXPECT_TRUE(tokenizer.read(reader, t));
+ TokenizedData data(0);
+ EXPECT_TRUE(tokenizer.read(reader, t, data));
EXPECT_EQ(te.id, t.id);
- EXPECT_EQ(te.content, t.content);
+ if (te.id != Tokens::Data) {
+ EXPECT_EQ(te.content, t.content);
+ } else {
+ TokenizedDataReader dataReader = data.reader();
+ Token textToken;
+ ASSERT_TRUE(dataReader.read(textToken, TokenSet{},
+ WhitespaceMode::PRESERVE));
+ EXPECT_TRUE(dataReader.atEnd());
+ EXPECT_EQ(te.content, textToken.content);
+ }
EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
EXPECT_EQ(te.location.getStart(), t.location.getStart());
EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
}
- ASSERT_FALSE(tokenizer.read(reader, t));
+
+ TokenizedData data;
+ ASSERT_FALSE(tokenizer.read(reader, t, data));
}
-TEST(Tokenizer, commentTestWhitespaceCollapse)
+TEST(Tokenizer, nonPrimaryTokens)
{
- CharReader reader{"Test/Test /* Block Comment */", 0};
- // 012345678901234567890123456789
- // 0 1 2
- Tokenizer tokenizer(WhitespaceMode::COLLAPSE);
+ CharReader reader{
+ "<<switch to $inline \\math mode$ they said, see the world they "
+ "said>>"};
+ // 012345678901234567890 12345678901234567890123456789012345678901234567
+ // 0 1 2 3 4 5 6
- const TokenId t1 = tokenizer.registerToken("/");
- const TokenId t2 = tokenizer.registerToken("/*");
- const TokenId t3 = tokenizer.registerToken("*/");
+ Tokenizer tokenizer;
- std::vector<Token> expected = {
- {Tokens::Data, "Test", SourceLocation{0, 0, 4}},
- {t1, "/", SourceLocation{0, 4, 5}},
- {Tokens::Data, "Test", SourceLocation{0, 5, 9}},
- {t2, "/*", SourceLocation{0, 10, 12}},
- {Tokens::Data, "Block Comment", SourceLocation{0, 13, 26}},
- {t3, "*/", SourceLocation{0, 27, 29}}};
+ TokenId tBackslash = tokenizer.registerToken("\\");
+ TokenId tDollar = tokenizer.registerToken("$", false);
+ TokenId tSpeechStart = tokenizer.registerToken("<<", false);
+ TokenId tSpeechEnd = tokenizer.registerToken(">>", false);
- Token t;
- for (auto &te : expected) {
- EXPECT_TRUE(tokenizer.read(reader, t));
- EXPECT_EQ(te.id, t.id);
- EXPECT_EQ(te.content, t.content);
- EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
- EXPECT_EQ(te.location.getStart(), t.location.getStart());
- EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+ TokenSet tokens = TokenSet{tDollar, tSpeechStart, tSpeechEnd};
+
+ Token token, textToken;
+ {
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ ASSERT_EQ(Tokens::Data, token.id);
+
+ TokenizedDataReader dataReader = data.reader();
+ assertToken(dataReader, tSpeechStart, "<<", tokens,
+ WhitespaceMode::TRIM, 0, 2);
+ assertText(dataReader, "switch to", tokens, WhitespaceMode::TRIM, 2,
+ 11);
+ assertToken(dataReader, tDollar, "$", tokens, WhitespaceMode::TRIM, 12,
+ 13);
+ assertText(dataReader, "inline", tokens, WhitespaceMode::TRIM, 13, 19);
+ assertEnd(dataReader);
+ }
+
+ {
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ ASSERT_EQ(tBackslash, token.id);
+ ASSERT_EQ(20U, token.location.getStart());
+ ASSERT_EQ(21U, token.location.getEnd());
+ }
+
+ {
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ ASSERT_EQ(Tokens::Data, token.id);
+
+ TokenizedDataReader dataReader = data.reader();
+ assertText(dataReader, "math mode", tokens, WhitespaceMode::TRIM, 21,
+ 30);
+ assertToken(dataReader, tDollar, "$", tokens, WhitespaceMode::TRIM, 30,
+ 31);
+ assertText(dataReader, "they said, see the world they said", tokens,
+ WhitespaceMode::TRIM, 32, 66);
+ assertToken(dataReader, tSpeechEnd, ">>", tokens, WhitespaceMode::TRIM,
+ 66, 68);
+ assertEnd(dataReader);
+ }
+
+ TokenizedData data;
+ ASSERT_FALSE(tokenizer.read(reader, token, data));
+}
+
+TEST(Tokenizer, primaryNonPrimaryTokenInteraction)
+{
+ CharReader reader{"<<test1>><test2><<test3\\><<<test4>>>"};
+ // 01234567890123456789012 3456789012345
+ // 0 1 2 3
+
+ Tokenizer tokenizer;
+
+ TokenId tP1 = tokenizer.registerToken("<", true);
+ TokenId tP2 = tokenizer.registerToken(">", true);
+ TokenId tP3 = tokenizer.registerToken("\\>", true);
+ TokenId tN1 = tokenizer.registerToken("<<", false);
+ TokenId tN2 = tokenizer.registerToken(">>", false);
+
+ TokenSet tokens = TokenSet{tN1, tN2};
+
+ Token token, textToken;
+ {
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ ASSERT_EQ(Tokens::Data, token.id);
+
+ TokenizedDataReader dataReader = data.reader();
+ assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 0, 2);
+ assertText(dataReader, "test1", tokens, WhitespaceMode::TRIM, 2, 7);
+ assertToken(dataReader, tN2, ">>", tokens, WhitespaceMode::TRIM, 7, 9);
+ assertEnd(dataReader);
+ }
+
+ assertPrimaryToken(reader, tokenizer, tP1, "<", 9, 10);
+ assertDataToken(reader, tokenizer, "test2", 10, 15, 10, 15);
+ assertPrimaryToken(reader, tokenizer, tP2, ">", 15, 16);
+
+ {
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ ASSERT_EQ(Tokens::Data, token.id);
+
+ TokenizedDataReader dataReader = data.reader();
+ assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 16, 18);
+ assertText(dataReader, "test3", tokens, WhitespaceMode::TRIM, 18, 23);
+ assertEnd(dataReader);
+ }
+
+ assertPrimaryToken(reader, tokenizer, tP3, "\\>", 23, 25);
+
+ {
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ ASSERT_EQ(Tokens::Data, token.id);
+
+ TokenizedDataReader dataReader = data.reader();
+ assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 25, 27);
+ assertEnd(dataReader);
+ }
+
+ assertPrimaryToken(reader, tokenizer, tP1, "<", 27, 28);
+
+ {
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ ASSERT_EQ(Tokens::Data, token.id);
+
+ TokenizedDataReader dataReader = data.reader();
+ assertText(dataReader, "test4", tokens, WhitespaceMode::TRIM, 28, 33);
+ assertToken(dataReader, tN2, ">>", tokens, WhitespaceMode::TRIM, 33, 35);
+ assertEnd(dataReader);
+ }
+
+ assertPrimaryToken(reader, tokenizer, tP2, ">", 35, 36);
+
+ TokenizedData data;
+ ASSERT_FALSE(tokenizer.read(reader, token, data));
+}
+
+TEST(Tokenizer, ambiguousTokens2)
+{
+ CharReader reader{"<\\"};
+
+ Tokenizer tokenizer;
+
+ TokenId tBackslash = tokenizer.registerToken("\\");
+ TokenId tAnnotationStart = tokenizer.registerToken("<\\");
+
+ TokenSet tokens = TokenSet{tBackslash, tAnnotationStart};
+ Token token;
+ {
+ TokenizedData data;
+ ASSERT_TRUE(tokenizer.read(reader, token, data));
+ ASSERT_EQ("<\\", token.content);
+ ASSERT_EQ(tAnnotationStart, token.id);
+ ASSERT_TRUE(data.empty());
+ }
+
+ {
+ TokenizedData data;
+ ASSERT_FALSE(tokenizer.read(reader, token, data));
}
- ASSERT_FALSE(tokenizer.read(reader, t));
}
}