summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/core/parser/utils/TokenizedDataTest.cpp526
1 files changed, 526 insertions, 0 deletions
diff --git a/test/core/parser/utils/TokenizedDataTest.cpp b/test/core/parser/utils/TokenizedDataTest.cpp
new file mode 100644
index 0000000..231bad9
--- /dev/null
+++ b/test/core/parser/utils/TokenizedDataTest.cpp
@@ -0,0 +1,526 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/parser/utils/TokenizedData.hpp>
+
+namespace ousia {
+
+TEST(TokenizedData, dataWhitespacePreserve)
+{
+ TokenizedData data;
+ ASSERT_EQ(16U, data.append(" test1 test2 "));
+ // 0123456789012345
+ // 0 1
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ(" test1 test2 ", token.content);
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(16U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+}
+
+TEST(TokenizedData, dataWhitespaceTrim)
+{
+ TokenizedData data;
+ ASSERT_EQ(16U, data.append(" test1 test2 "));
+ // 0123456789012345
+ // 0 1
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ("test1 test2", token.content);
+ EXPECT_EQ(1U, token.getLocation().getStart());
+ EXPECT_EQ(14U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+}
+
+TEST(TokenizedData, dataWhitespaceCollapse)
+{
+ TokenizedData data;
+ ASSERT_EQ(16U, data.append(" test1 test2 "));
+ // 0123456789012345
+ // 0 1
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ("test1 test2", token.content);
+ EXPECT_EQ(1U, token.getLocation().getStart());
+ EXPECT_EQ(14U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+}
+
+TEST(TokenizedData, singleToken)
+{
+ TokenizedData data;
+ ASSERT_EQ(2U, data.append("$$"));
+ data.mark(5, 0, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+}
+
+TEST(TokenizedData, singleDisabledToken)
+{
+ TokenizedData data;
+ ASSERT_EQ(2U, data.append("$$"));
+ data.mark(5, 0, 2);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+}
+
+TEST(TokenizedData, dualToken)
+{
+ TokenizedData data;
+ ASSERT_EQ(2U, data.append("$$"));
+ data.mark(6, 0, 1);
+ data.mark(5, 0, 2);
+ data.mark(6, 1, 1);
+
+ data.enableToken(5);
+ data.enableToken(6);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+}
+
+TEST(TokenizedData, dualTokenShorterEnabled)
+{
+ TokenizedData data;
+ ASSERT_EQ(2U, data.append("$$"));
+ data.mark(6, 0, 1);
+ data.mark(5, 0, 2);
+ data.mark(6, 1, 1);
+
+ data.enableToken(6);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(6U, token.id);
+ EXPECT_EQ("$", token.content);
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(1U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(6U, token.id);
+ EXPECT_EQ("$", token.content);
+ EXPECT_EQ(1U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+}
+
+TEST(TokenizedData, dualTokenLongerEnabled)
+{
+ TokenizedData data;
+ ASSERT_EQ(2U, data.append("$$"));
+ data.mark(5, 0, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+}
+
+TEST(TokenizedData, tokensAndDataPreserveWhitespace)
+{
+ TokenizedData data;
+ ASSERT_EQ(10U, data.append("$$ test $$"));
+ // 0123456789
+ data.mark(5, 0, 2);
+ data.mark(5, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ(" test ", token.content);
+ EXPECT_EQ(2U, token.getLocation().getStart());
+ EXPECT_EQ(8U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(8U, token.getLocation().getStart());
+ EXPECT_EQ(10U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+}
+
+TEST(TokenizedData, tokensAndDataTrimWhitespace)
+{
+ TokenizedData data;
+ ASSERT_EQ(10U, data.append("$$ test $$"));
+ // 0123456789
+ data.mark(5, 0, 2);
+ data.mark(5, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ("test", token.content);
+ EXPECT_EQ(3U, token.getLocation().getStart());
+ EXPECT_EQ(7U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(8U, token.getLocation().getStart());
+ EXPECT_EQ(10U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+}
+
+TEST(TokenizedData, tokensAndDataCollapseWhitespace)
+{
+ TokenizedData data;
+ ASSERT_EQ(10U, data.append("$$ test $$"));
+ // 0123456789
+ data.mark(5, 0, 2);
+ data.mark(5, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ("test", token.content);
+ EXPECT_EQ(3U, token.getLocation().getStart());
+ EXPECT_EQ(7U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(8U, token.getLocation().getStart());
+ EXPECT_EQ(10U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+}
+
+TEST(TokenizedData, tokensAndWhitespacePreserveWhitespace)
+{
+ TokenizedData data;
+ ASSERT_EQ(10U, data.append("$$ $$"));
+ // 0123456789
+ data.mark(5, 0, 2);
+ data.mark(5, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ(" ", token.content);
+ EXPECT_EQ(2U, token.getLocation().getStart());
+ EXPECT_EQ(8U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(8U, token.getLocation().getStart());
+ EXPECT_EQ(10U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+}
+
+TEST(TokenizedData, tokensAndWhitespaceTrimWhitespace)
+{
+ TokenizedData data;
+ ASSERT_EQ(10U, data.append("$$ $$"));
+ // 0123456789
+ data.mark(5, 0, 2);
+ data.mark(5, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(8U, token.getLocation().getStart());
+ EXPECT_EQ(10U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+}
+
+TEST(TokenizedData, tokensAndWhitespaceCollapseWhitespace)
+{
+ TokenizedData data;
+ ASSERT_EQ(10U, data.append("$$ $$"));
+ // 0123456789
+ data.mark(5, 0, 2);
+ data.mark(5, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(8U, token.getLocation().getStart());
+ EXPECT_EQ(10U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+}
+
+TEST(TokenizedData, textPreserveWhitespace)
+{
+ TokenizedData data;
+ ASSERT_EQ(6U, data.append(" $$ "));
+ // 012345
+ data.mark(5, 2, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ(" ", token.content);
+ EXPECT_EQ(0U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(2U, token.getLocation().getStart());
+ EXPECT_EQ(4U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ(" ", token.content);
+ EXPECT_EQ(4U, token.getLocation().getStart());
+ EXPECT_EQ(6U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.text(token, WhitespaceMode::PRESERVE));
+ ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+}
+
+TEST(TokenizedData, textTrimWhitespace)
+{
+ TokenizedData data;
+ ASSERT_EQ(6U, data.append(" $$ "));
+ // 012345
+ data.mark(5, 2, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM));
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(2U, token.getLocation().getStart());
+ EXPECT_EQ(4U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM));
+ ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+}
+
+TEST(TokenizedData, textCollapseWhitespace)
+{
+ TokenizedData data;
+ ASSERT_EQ(6U, data.append(" $$ "));
+ // 012345
+ data.mark(5, 2, 2);
+
+ data.enableToken(5);
+
+ Token token;
+ ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(5U, token.id);
+ EXPECT_EQ("$$", token.content);
+ EXPECT_EQ(2U, token.getLocation().getStart());
+ EXPECT_EQ(4U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
+ ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+}
+
+TEST(TokenizedData, appendChars)
+{
+ TokenizedData data;
+ ASSERT_EQ(1U, data.append('t', 5, 7));
+ ASSERT_EQ(2U, data.append('e', 7, 8));
+ ASSERT_EQ(3U, data.append('s', 8, 10));
+ ASSERT_EQ(4U, data.append('t', 10, 12));
+
+ Token token;
+ ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ("test", token.content);
+ EXPECT_EQ(5U, token.getLocation().getStart());
+ EXPECT_EQ(12U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
+ ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+}
+
+TEST(TokenizedData, copy)
+{
+ TokenizedData data;
+ ASSERT_EQ(7U, data.append(" a $ b "));
+ // 0123456
+ data.mark(6, 3, 1);
+ data.enableToken(6);
+
+ Token token;
+ ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ("a", token.content);
+ EXPECT_EQ(1U, token.getLocation().getStart());
+ EXPECT_EQ(2U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
+
+ TokenizedData dataCopy = data;
+
+ ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(6U, token.id);
+ EXPECT_EQ("$", token.content);
+ EXPECT_EQ(3U, token.getLocation().getStart());
+ EXPECT_EQ(4U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(dataCopy.next(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(6U, token.id);
+ EXPECT_EQ("$", token.content);
+ EXPECT_EQ(3U, token.getLocation().getStart());
+ EXPECT_EQ(4U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+
+ ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ(" b ", token.content);
+ EXPECT_EQ(4U, token.getLocation().getStart());
+ EXPECT_EQ(7U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+ ASSERT_FALSE(data.next(token));
+
+ ASSERT_TRUE(dataCopy.text(token, WhitespaceMode::COLLAPSE));
+ EXPECT_EQ(Tokens::Data, token.id);
+ EXPECT_EQ("b", token.content);
+ EXPECT_EQ(5U, token.getLocation().getStart());
+ EXPECT_EQ(6U, token.getLocation().getEnd());
+ EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+ ASSERT_FALSE(dataCopy.next(token));
+}
+}
+