diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-15 21:32:54 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-15 21:32:54 +0100 |
commit | 8e5e08c4f293434585d2a88f7f331f8ce49b67b9 (patch) | |
tree | fa82a937b1ea80f45d7955938c333f68f8a0f3f6 /test/formats | |
parent | 2544749215bc2465bfeca431e271110ca86d8a83 (diff) | |
parent | 40f4666c43211d9071a827ad8a2524688e7f678f (diff) |
Merge branch 'astoecke_parser_stack_new'
Conflicts:
application/src/core/parser/stack/DocumentHandler.cpp
application/src/core/parser/stack/DocumentHandler.hpp
Diffstat (limited to 'test/formats')
-rw-r--r-- | test/formats/osdm/DynamicTokenizerTest.cpp | 415 | ||||
-rw-r--r-- | test/formats/osdm/TokenTrieTest.cpp | 92 | ||||
-rw-r--r-- | test/formats/osml/OsmlStreamParserTest.cpp (renamed from test/formats/osdm/OsdmStreamParserTest.cpp) | 794 | ||||
-rw-r--r-- | test/formats/osxml/OsxmlEventParserTest.cpp | 217 | ||||
-rw-r--r-- | test/formats/osxml/OsxmlParserTest.cpp | 395 |
5 files changed, 1153 insertions, 760 deletions
diff --git a/test/formats/osdm/DynamicTokenizerTest.cpp b/test/formats/osdm/DynamicTokenizerTest.cpp deleted file mode 100644 index c1f8785..0000000 --- a/test/formats/osdm/DynamicTokenizerTest.cpp +++ /dev/null @@ -1,415 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <gtest/gtest.h> - -#include <core/common/CharReader.hpp> -#include <formats/osdm/DynamicTokenizer.hpp> - -namespace ousia { - -TEST(DynamicTokenizer, tokenRegistration) -{ - DynamicTokenizer tokenizer; - - ASSERT_EQ(EmptyToken, tokenizer.registerToken("")); - - ASSERT_EQ(0U, tokenizer.registerToken("a")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("a")); - ASSERT_EQ("a", tokenizer.getTokenString(0U)); - - ASSERT_EQ(1U, tokenizer.registerToken("b")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("b")); - ASSERT_EQ("b", tokenizer.getTokenString(1U)); - - ASSERT_EQ(2U, tokenizer.registerToken("c")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("c")); - ASSERT_EQ("c", tokenizer.getTokenString(2U)); - - ASSERT_TRUE(tokenizer.unregisterToken(1U)); - ASSERT_FALSE(tokenizer.unregisterToken(1U)); - ASSERT_EQ("", tokenizer.getTokenString(1U)); - - ASSERT_EQ(1U, tokenizer.registerToken("d")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("d")); - ASSERT_EQ("d", tokenizer.getTokenString(1U)); -} - -TEST(DynamicTokenizer, textTokenPreserveWhitespace) -{ - { - CharReader reader{" this \t is only a \n\n test text "}; - // 012345 6789012345678 9 0123456789012345 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ(" this \t is only a \n\n test text ", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(36U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } - - { - CharReader reader{"this \t is only a \n\n test text"}; - // 01234 5678901234567 8 9012345678901 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("this \t is only a \n\n test text", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } -} - -TEST(DynamicTokenizer, textTokenTrimWhitespace) -{ - { - CharReader reader{" this \t is only a \n\n test text "}; - // 012345 6789012345678 9 0123456789012345 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("this \t is only a \n\n test text", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(33U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } - - { - CharReader reader{"this \t is only a \n\n test text"}; - // 01234 5678901234567 8 9012345678901 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("this \t is only a \n\n test text", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } -} - -TEST(DynamicTokenizer, textTokenCollapseWhitespace) -{ - { - CharReader reader{" this \t is only a \n\n test text "}; - // 012345 6789012345678 9 0123456789012345 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("this is only a test text", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(33U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } - - { - CharReader reader{"this \t is only a \n\n test text"}; - // 01234 5678901234567 8 9012345678901 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("this is only a test text", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } -} - -TEST(DynamicTokenizer, simpleReadToken) -{ - CharReader reader{"test1:test2"}; - DynamicTokenizer tokenizer; - - const TokenTypeId tid = tokenizer.registerToken(":"); - ASSERT_EQ(0U, tid); - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test1", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); - - char c; - ASSERT_TRUE(reader.peek(c)); - ASSERT_EQ(':', c); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(tid, token.type); - ASSERT_EQ(":", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(5U, loc.getStart()); - ASSERT_EQ(6U, loc.getEnd()); - - char c; - ASSERT_TRUE(reader.peek(c)); - ASSERT_EQ('t', c); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); - - char c; - ASSERT_FALSE(reader.peek(c)); - } -} - -TEST(DynamicTokenizer, simplePeekToken) -{ - CharReader reader{"test1:test2"}; - DynamicTokenizer tokenizer; - - const TokenTypeId tid = tokenizer.registerToken(":"); - ASSERT_EQ(0U, tid); - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.peek(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test1", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); - ASSERT_EQ(0U, reader.getOffset()); - ASSERT_EQ(5U, reader.getPeekOffset()); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.peek(reader, token)); - - ASSERT_EQ(tid, token.type); - ASSERT_EQ(":", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(5U, loc.getStart()); - ASSERT_EQ(6U, loc.getEnd()); - ASSERT_EQ(0U, reader.getOffset()); - ASSERT_EQ(6U, reader.getPeekOffset()); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.peek(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); - ASSERT_EQ(0U, reader.getOffset()); - ASSERT_EQ(11U, reader.getPeekOffset()); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test1", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); - ASSERT_EQ(5U, reader.getOffset()); - ASSERT_EQ(5U, reader.getPeekOffset()); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(tid, token.type); - ASSERT_EQ(":", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(5U, loc.getStart()); - ASSERT_EQ(6U, loc.getEnd()); - ASSERT_EQ(6U, reader.getOffset()); - ASSERT_EQ(6U, reader.getPeekOffset()); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); - ASSERT_EQ(11U, reader.getOffset()); - ASSERT_EQ(11U, reader.getPeekOffset()); - } -} - -TEST(DynamicTokenizer, ambiguousTokens) -{ - CharReader reader{"abc"}; - DynamicTokenizer tokenizer; - - TokenTypeId t1 = tokenizer.registerToken("abd"); - TokenTypeId t2 = tokenizer.registerToken("bc"); - - ASSERT_EQ(0U, t1); - ASSERT_EQ(1U, t2); - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("a", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); - - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(t2, token.type); - ASSERT_EQ("bc", token.content); - - loc = token.location; - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(3U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); -} - -TEST(DynamicTokenizer, commentTestWhitespacePreserve) -{ - CharReader reader{"Test/Test /* Block Comment */", 0}; - // 012345678901234567890123456789 - // 0 1 2 - DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE); - - const TokenTypeId t1 = tokenizer.registerToken("/"); - const TokenTypeId t2 = tokenizer.registerToken("/*"); - const TokenTypeId t3 = tokenizer.registerToken("*/"); - - std::vector<DynamicToken> expected = { - {TextToken, "Test", SourceLocation{0, 0, 4}}, - {t1, "/", SourceLocation{0, 4, 5}}, - {TextToken, "Test ", SourceLocation{0, 5, 10}}, - {t2, "/*", SourceLocation{0, 10, 12}}, - {TextToken, " Block Comment ", SourceLocation{0, 12, 27}}, - {t3, "*/", SourceLocation{0, 27, 29}}}; - - DynamicToken t; - for (auto &te : expected) { - EXPECT_TRUE(tokenizer.read(reader, t)); - EXPECT_EQ(te.type, t.type); - EXPECT_EQ(te.content, t.content); - EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); - EXPECT_EQ(te.location.getStart(), t.location.getStart()); - EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); - } - ASSERT_FALSE(tokenizer.read(reader, t)); -} - -TEST(DynamicTokenizer, commentTestWhitespaceCollapse) -{ - CharReader reader{"Test/Test /* Block Comment */", 0}; - // 012345678901234567890123456789 - // 0 1 2 - DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE); - - const TokenTypeId t1 = tokenizer.registerToken("/"); - const TokenTypeId t2 = tokenizer.registerToken("/*"); - const TokenTypeId t3 = tokenizer.registerToken("*/"); - - std::vector<DynamicToken> expected = { - {TextToken, "Test", SourceLocation{0, 0, 4}}, - {t1, "/", SourceLocation{0, 4, 5}}, - {TextToken, "Test", SourceLocation{0, 5, 9}}, - {t2, "/*", SourceLocation{0, 10, 12}}, - {TextToken, "Block Comment", SourceLocation{0, 13, 26}}, - {t3, "*/", SourceLocation{0, 27, 29}}}; - - DynamicToken t; - for (auto &te : expected) { - EXPECT_TRUE(tokenizer.read(reader, t)); - EXPECT_EQ(te.type, t.type); - EXPECT_EQ(te.content, t.content); - EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); - EXPECT_EQ(te.location.getStart(), t.location.getStart()); - EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); - } - ASSERT_FALSE(tokenizer.read(reader, t)); -} - -} - diff --git a/test/formats/osdm/TokenTrieTest.cpp b/test/formats/osdm/TokenTrieTest.cpp deleted file mode 100644 index aacd6c0..0000000 --- a/test/formats/osdm/TokenTrieTest.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <gtest/gtest.h> - -#include <formats/osdm/TokenTrie.hpp> - -namespace ousia { - -static const TokenTypeId t1 = 0; -static const TokenTypeId t2 = 1; -static const TokenTypeId t3 = 2; -static const TokenTypeId t4 = 3; - -TEST(TokenTrie, registerToken) -{ - TokenTrie tree; - - ASSERT_TRUE(tree.registerToken("a", t1)); - ASSERT_TRUE(tree.registerToken("ab", t2)); - ASSERT_TRUE(tree.registerToken("b", t3)); - ASSERT_TRUE(tree.registerToken("hello", t4)); - - ASSERT_FALSE(tree.registerToken("", t1)); - ASSERT_FALSE(tree.registerToken("a", t4)); - ASSERT_FALSE(tree.registerToken("ab", t4)); - ASSERT_FALSE(tree.registerToken("b", t4)); - ASSERT_FALSE(tree.registerToken("hello", t4)); - - ASSERT_EQ(t1, tree.hasToken("a")); - ASSERT_EQ(t2, tree.hasToken("ab")); - ASSERT_EQ(t3, tree.hasToken("b")); - ASSERT_EQ(t4, tree.hasToken("hello")); - ASSERT_EQ(EmptyToken, tree.hasToken("")); - ASSERT_EQ(EmptyToken, tree.hasToken("abc")); -} - -TEST(TokenTrie, unregisterToken) -{ - TokenTrie tree; - - ASSERT_TRUE(tree.registerToken("a", t1)); - ASSERT_FALSE(tree.registerToken("a", t4)); - - ASSERT_TRUE(tree.registerToken("ab", t2)); - ASSERT_FALSE(tree.registerToken("ab", t4)); - - ASSERT_TRUE(tree.registerToken("b", t3)); - ASSERT_FALSE(tree.registerToken("b", t4)); - - ASSERT_EQ(t1, tree.hasToken("a")); - ASSERT_EQ(t2, tree.hasToken("ab")); - ASSERT_EQ(t3, tree.hasToken("b")); - - ASSERT_TRUE(tree.unregisterToken("a")); - ASSERT_FALSE(tree.unregisterToken("a")); - - ASSERT_EQ(EmptyToken, tree.hasToken("a")); - ASSERT_EQ(t2, tree.hasToken("ab")); - ASSERT_EQ(t3, tree.hasToken("b")); - - ASSERT_TRUE(tree.unregisterToken("b")); - ASSERT_FALSE(tree.unregisterToken("b")); - - ASSERT_EQ(EmptyToken, tree.hasToken("a")); - ASSERT_EQ(t2, tree.hasToken("ab")); - ASSERT_EQ(EmptyToken, tree.hasToken("b")); - - ASSERT_TRUE(tree.unregisterToken("ab")); - ASSERT_FALSE(tree.unregisterToken("ab")); - - ASSERT_EQ(EmptyToken, tree.hasToken("a")); - ASSERT_EQ(EmptyToken, tree.hasToken("ab")); - ASSERT_EQ(EmptyToken, tree.hasToken("b")); -} -} - diff --git a/test/formats/osdm/OsdmStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index 46f4cf6..d52fa5b 100644 --- a/test/formats/osdm/OsdmStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -23,95 +23,205 @@ #include <core/common/CharReader.hpp> #include <core/frontend/TerminalLogger.hpp> -#include <formats/osdm/OsdmStreamParser.hpp> +#include <formats/osml/OsmlStreamParser.hpp> namespace ousia { static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; -TEST(OsdmStreamParser, empty) +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + EXPECT_EQ(name, reader.getCommandName().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + assertCommand(reader, name, start, end); + EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertData(OsmlStreamParser &reader, const std::string &data, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + EXPECT_EQ(data, reader.getData().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getData().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getData().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldStart(OsmlStreamParser &reader, bool defaultField, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); + EXPECT_EQ(defaultField, reader.inDefaultField()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertAnnotationStart(OsmlStreamParser &reader, + const std::string &name, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse()); + EXPECT_EQ(name, reader.getCommandName().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertAnnotationStart(OsmlStreamParser &reader, + const std::string &name, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + assertAnnotationStart(reader, name, start, end); + EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertAnnotationEnd(OsmlStreamParser &reader, + const std::string &name, + const std::string &elementName, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse()); + ASSERT_EQ(name, reader.getCommandName().asString()); + if (!elementName.empty()) { + ASSERT_EQ(1U, reader.getCommandArguments().asMap().size()); + ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name")); + + auto it = reader.getCommandArguments().asMap().find("name"); + ASSERT_EQ(elementName, it->second.asString()); + } + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +TEST(OsmlStreamParser, empty) { const char *testString = ""; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, oneCharacter) +TEST(OsmlStreamParser, oneCharacter) { const char *testString = "a"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getData().asString()); + OsmlStreamParser reader(charReader, logger); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); + assertData(reader, "a", 0, 1); } -TEST(OsdmStreamParser, whitespaceElimination) +TEST(OsmlStreamParser, whitespaceElimination) { const char *testString = " hello \t world "; // 0123456 78901234 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); + OsmlStreamParser reader(charReader, logger); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(14U, loc.getEnd()); + assertData(reader, "hello world", 1, 14); } -TEST(OsdmStreamParser, whitespaceEliminationWithLinebreak) +TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) { const char *testString = " hello \n world "; // 0123456 78901234 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); + OsmlStreamParser reader(charReader, logger); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(14U, loc.getEnd()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + assertData(reader, "hello world", 1, 14); } -TEST(OsdmStreamParser, escapeWhitespace) +TEST(OsmlStreamParser, escapeWhitespace) { const char *testString = " hello\\ \\ world "; // 012345 67 89012345 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(15U, loc.getEnd()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + assertData(reader, "hello world", 1, 15); } static void testEscapeSpecialCharacter(const std::string &c) { CharReader charReader(std::string("\\") + c); - OsdmStreamParser reader(charReader, logger); - EXPECT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + OsmlStreamParser reader(charReader, logger); + EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse()); EXPECT_EQ(c, reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); @@ -119,32 +229,30 @@ static void testEscapeSpecialCharacter(const std::string &c) EXPECT_EQ(1U + c.size(), loc.getEnd()); } -TEST(OsdmStreamParser, escapeSpecialCharacters) +TEST(OsmlStreamParser, escapeSpecialCharacters) { testEscapeSpecialCharacter("\\"); testEscapeSpecialCharacter("{"); testEscapeSpecialCharacter("}"); - testEscapeSpecialCharacter("<"); - testEscapeSpecialCharacter(">"); } -TEST(OsdmStreamParser, simpleSingleLineComment) +TEST(OsmlStreamParser, simpleSingleLineComment) { const char *testString = "% This is a single line comment"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, singleLineComment) +TEST(OsmlStreamParser, singleLineComment) { const char *testString = "a% This is a single line comment\nb"; // 01234567890123456789012345678901 23 // 0 1 2 3 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("a", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(0U, loc.getStart()); @@ -152,25 +260,25 @@ TEST(OsdmStreamParser, singleLineComment) } { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("b", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(33U, loc.getStart()); ASSERT_EQ(34U, loc.getEnd()); } - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, multilineComment) +TEST(OsmlStreamParser, multilineComment) { const char *testString = "a%{ This is a\n\n multiline line comment}%b"; // 0123456789012 3 456789012345678901234567890 // 0 1 2 3 4 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("a", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(0U, loc.getStart()); @@ -178,25 +286,25 @@ TEST(OsdmStreamParser, multilineComment) } { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("b", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(40U, loc.getStart()); ASSERT_EQ(41U, loc.getEnd()); } - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, nestedMultilineComment) +TEST(OsmlStreamParser, nestedMultilineComment) { const char *testString = "a%{%{Another\n\n}%multiline line comment}%b"; // 0123456789012 3 456789012345678901234567890 // 0 1 2 3 4 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("a", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(0U, loc.getStart()); @@ -204,23 +312,23 @@ TEST(OsdmStreamParser, nestedMultilineComment) } { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("b", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(40U, loc.getStart()); ASSERT_EQ(41U, loc.getEnd()); } - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, simpleCommand) +TEST(OsmlStreamParser, simpleCommand) { const char *testString = "\\test"; // 0 12345 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); Variant commandName = reader.getCommandName(); ASSERT_EQ("test", commandName.asString()); @@ -230,16 +338,16 @@ TEST(OsdmStreamParser, simpleCommand) ASSERT_EQ(5U, loc.getEnd()); ASSERT_EQ(0U, reader.getCommandArguments().asMap().size()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, simpleCommandWithName) +TEST(OsmlStreamParser, simpleCommandWithName) { const char *testString = "\\test#bla"; // 0 12345678 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); Variant commandName = reader.getCommandName(); ASSERT_EQ("test", commandName.asString()); @@ -257,17 +365,17 @@ TEST(OsdmStreamParser, simpleCommandWithName) ASSERT_EQ(5U, loc.getStart()); ASSERT_EQ(9U, loc.getEnd()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, simpleCommandWithArguments) +TEST(OsmlStreamParser, simpleCommandWithArguments) { const char *testString = "\\test[a=1,b=2,c=\"test\"]"; // 0 123456789012345 678901 2 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); Variant commandName = reader.getCommandName(); ASSERT_EQ("test", commandName.asString()); @@ -297,17 +405,17 @@ TEST(OsdmStreamParser, simpleCommandWithArguments) ASSERT_EQ(16U, loc.getStart()); ASSERT_EQ(22U, loc.getEnd()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, simpleCommandWithArgumentsAndName) +TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) { const char *testString = "\\test#bla[a=1,b=2,c=\"test\"]"; // 0 1234567890123456789 01234 56 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); Variant commandName = reader.getCommandName(); ASSERT_EQ("test", commandName.asString()); @@ -343,126 +451,46 @@ TEST(OsdmStreamParser, simpleCommandWithArgumentsAndName) ASSERT_EQ(5U, loc.getStart()); ASSERT_EQ(9U, loc.getEnd()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -static void assertCommand(OsdmStreamParser &reader, const std::string &name, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); - EXPECT_EQ(name, reader.getCommandName().asString()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertCommand(OsdmStreamParser &reader, const std::string &name, - const Variant::mapType &args, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - assertCommand(reader, name, start, end); - EXPECT_EQ(args, reader.getCommandArguments()); -} - -static void assertData(OsdmStreamParser &reader, const std::string &data, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); - EXPECT_EQ(data, reader.getData().asString()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getData().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getData().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertFieldStart(OsdmStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsdmStreamParser::State::FIELD_START, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertFieldEnd(OsdmStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsdmStreamParser::State::FIELD_END, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertEnd(OsdmStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -TEST(OsdmStreamParser, fields) +TEST(OsmlStreamParser, fields) { const char *testString = "\\test{a}{b}{c}"; // 01234567890123 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); assertData(reader, "b", 9, 10); assertFieldEnd(reader, 10, 11); - assertFieldStart(reader, 11, 12); + assertFieldStart(reader, false, 11, 12); assertData(reader, "c", 12, 13); assertFieldEnd(reader, 13, 14); assertEnd(reader, 14, 14); } -TEST(OsdmStreamParser, dataOutsideField) +TEST(OsmlStreamParser, dataOutsideField) { const char *testString = "\\test{a}{b} c"; // 0123456789012 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); assertData(reader, "b", 9, 10); assertFieldEnd(reader, 10, 11); @@ -470,24 +498,24 @@ TEST(OsdmStreamParser, dataOutsideField) assertEnd(reader, 13, 13); } -TEST(OsdmStreamParser, nestedCommand) +TEST(OsmlStreamParser, nestedCommand) { const char *testString = "\\test{a}{\\test2{b} c} d"; // 012345678 90123456789012 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); { assertCommand(reader, "test2", 9, 15); - assertFieldStart(reader, 15, 16); + assertFieldStart(reader, false, 15, 16); assertData(reader, "b", 16, 17); assertFieldEnd(reader, 17, 18); } @@ -497,19 +525,19 @@ TEST(OsdmStreamParser, nestedCommand) assertEnd(reader, 23, 23); } -TEST(OsdmStreamParser, nestedCommandImmediateEnd) +TEST(OsmlStreamParser, nestedCommandImmediateEnd) { const char *testString = "\\test{\\test2{b}} d"; // 012345 678901234567 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); { assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertData(reader, "b", 13, 14); assertFieldEnd(reader, 14, 15); } @@ -518,27 +546,27 @@ TEST(OsdmStreamParser, nestedCommandImmediateEnd) assertEnd(reader, 18, 18); } -TEST(OsdmStreamParser, nestedCommandNoData) +TEST(OsmlStreamParser, nestedCommandNoData) { const char *testString = "\\test{\\test2}"; // 012345 6789012 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); assertFieldEnd(reader, 12, 13); assertEnd(reader, 13, 13); } -TEST(OsdmStreamParser, multipleCommands) +TEST(OsmlStreamParser, multipleCommands) { const char *testString = "\\a \\b \\c \\d"; // 012 345 678 90 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "a", 0, 2); assertCommand(reader, "b", 3, 5); @@ -547,33 +575,33 @@ TEST(OsdmStreamParser, multipleCommands) assertEnd(reader, 11, 11); } -TEST(OsdmStreamParser, fieldsWithSpaces) +TEST(OsmlStreamParser, fieldsWithSpaces) { const char *testString = "\\a {\\b \\c} \n\n {\\d}"; // 0123 456 789012 3 456 789 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, 3, 4); + assertFieldStart(reader, false, 3, 4); assertCommand(reader, "b", 4, 6); assertCommand(reader, "c", 7, 9); assertFieldEnd(reader, 9, 10); - assertFieldStart(reader, 16, 17); + assertFieldStart(reader, false, 16, 17); assertCommand(reader, "d", 17, 19); assertFieldEnd(reader, 19, 20); assertEnd(reader, 20, 20); } -TEST(OsdmStreamParser, errorNoFieldToStart) +TEST(OsmlStreamParser, errorNoFieldToStart) { const char *testString = "\\a b {"; // 012345 // 0 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "a", 0, 2); @@ -583,14 +611,14 @@ TEST(OsdmStreamParser, errorNoFieldToStart) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorNoFieldToEnd) +TEST(OsmlStreamParser, errorNoFieldToEnd) { const char *testString = "\\a b }"; // 012345 // 0 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "a", 0, 2); @@ -600,20 +628,20 @@ TEST(OsdmStreamParser, errorNoFieldToEnd) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorNoFieldEndNested) +TEST(OsmlStreamParser, errorNoFieldEndNested) { const char *testString = "\\test{\\test2{}}}"; // 012345 6789012345 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertFieldEnd(reader, 13, 14); assertFieldEnd(reader, 14, 15); ASSERT_FALSE(logger.hasError()); @@ -621,20 +649,20 @@ TEST(OsdmStreamParser, errorNoFieldEndNested) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorNoFieldEndNestedData) +TEST(OsmlStreamParser, errorNoFieldEndNestedData) { const char *testString = "\\test{\\test2{}}a}"; // 012345 67890123456 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertFieldEnd(reader, 13, 14); assertFieldEnd(reader, 14, 15); assertData(reader, "a", 15, 16); @@ -643,53 +671,53 @@ TEST(OsdmStreamParser, errorNoFieldEndNestedData) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, beginEnd) +TEST(OsmlStreamParser, beginEnd) { const char *testString = "\\begin{book}\\end{book}"; // 012345678901 2345678901 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertFieldEnd(reader, 17, 21); assertEnd(reader, 22, 22); } -TEST(OsdmStreamParser, beginEndWithName) +TEST(OsmlStreamParser, beginEndWithName) { const char *testString = "\\begin{book#a}\\end{book}"; // 01234567890123 4567890123 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", {{"name", "a"}}, 7, 11); - assertFieldStart(reader, 14, 15); + assertFieldStart(reader, true, 14, 15); assertFieldEnd(reader, 19, 23); assertEnd(reader, 24, 24); } -TEST(OsdmStreamParser, beginEndWithNameAndArgs) +TEST(OsmlStreamParser, beginEndWithNameAndArgs) { const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}"; // 0123456789012345678901234 56789 01 2345678901 // 0 1 2 3 4 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, 32, 33); + assertFieldStart(reader, true, 32, 33); assertFieldEnd(reader, 37, 41); assertEnd(reader, 42, 42); } -TEST(OsdmStreamParser, beginEndWithNameAndArgsMultipleFields) +TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields) { const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}"; @@ -697,67 +725,100 @@ TEST(OsdmStreamParser, beginEndWithNameAndArgsMultipleFields) // 0 1 2 3 4 5 6 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, 32, 33); + assertFieldStart(reader, false, 32, 33); assertData(reader, "a", 33, 34); assertCommand(reader, "test", Variant::mapType{}, 35, 40); assertFieldEnd(reader, 40, 41); - assertFieldStart(reader, 41, 42); + assertFieldStart(reader, false, 41, 42); assertData(reader, "b", 42, 43); assertCommand(reader, "test", Variant::mapType{}, 44, 49); - assertFieldStart(reader, 49, 50); + assertFieldStart(reader, false, 49, 50); assertFieldEnd(reader, 50, 51); assertFieldEnd(reader, 51, 52); - assertFieldStart(reader, 52, 53); + assertFieldStart(reader, true, 52, 53); assertFieldEnd(reader, 57, 61); assertEnd(reader, 62, 62); } -TEST(OsdmStreamParser, beginEndWithData) +TEST(OsmlStreamParser, beginEndWithData) { const char *testString = "\\begin{book}a\\end{book}"; // 0123456789012 3456789012 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertData(reader, "a", 12, 13); assertFieldEnd(reader, 18, 22); assertEnd(reader, 23, 23); } -TEST(OsdmStreamParser, beginEndWithCommand) +TEST(OsmlStreamParser, beginEndNested) +{ + const char *testString = + "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}"; + // 012345678901234 5678901234567890 123456 7890123 4567890 + // 0 1 2 3 4 5 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 7, 8); + assertFieldStart(reader, false, 9, 10); + assertData(reader, "b", 10, 11); + assertFieldEnd(reader, 11, 12); + assertFieldStart(reader, true, 13, 14); + assertData(reader, "c", 13, 14); + assertCommand(reader, "d", 22, 23); + assertFieldStart(reader, false, 24, 25); + assertData(reader, "e", 25, 26); + assertFieldEnd(reader, 26, 27); + assertFieldStart(reader, false, 27, 28); + assertData(reader, "f", 28, 29); + assertFieldEnd(reader, 29, 30); + assertFieldStart(reader, true, 31, 32); + assertCommand(reader, "g", 31, 33); + assertFieldStart(reader, false, 33, 34); + assertData(reader, "h", 34, 35); + assertFieldEnd(reader, 35, 36); + assertFieldEnd(reader, 42, 43); + assertFieldEnd(reader, 49, 50); + assertEnd(reader, 51, 51); +} + +TEST(OsmlStreamParser, beginEndWithCommand) { const char *testString = "\\begin{book}\\a{test}\\end{book}"; // 012345678901 23456789 0123456789 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertCommand(reader, "a", 12, 14); - assertFieldStart(reader, 14, 15); + assertFieldStart(reader, false, 14, 15); assertData(reader, "test", 15, 19); assertFieldEnd(reader, 19, 20); assertFieldEnd(reader, 25, 29); assertEnd(reader, 30, 30); } -TEST(OsdmStreamParser, errorBeginNoBraceOpen) +TEST(OsmlStreamParser, errorBeginNoBraceOpen) { const char *testString = "\\begin a"; // 01234567 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -765,12 +826,12 @@ TEST(OsdmStreamParser, errorBeginNoBraceOpen) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorBeginNoIdentifier) +TEST(OsmlStreamParser, errorBeginNoIdentifier) { const char *testString = "\\begin{!"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -778,12 +839,12 @@ TEST(OsdmStreamParser, errorBeginNoIdentifier) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorBeginNoBraceClose) +TEST(OsmlStreamParser, errorBeginNoBraceClose) { const char *testString = "\\begin{a"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -791,12 +852,12 @@ TEST(OsdmStreamParser, errorBeginNoBraceClose) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorBeginNoName) +TEST(OsmlStreamParser, errorBeginNoName) { const char *testString = "\\begin{a#}"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -808,13 +869,13 @@ TEST(OsdmStreamParser, errorBeginNoName) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorEndNoBraceOpen) +TEST(OsmlStreamParser, errorEndNoBraceOpen) { const char *testString = "\\end a"; // 012345 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -822,12 +883,12 @@ TEST(OsdmStreamParser, errorEndNoBraceOpen) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorEndNoIdentifier) +TEST(OsmlStreamParser, errorEndNoIdentifier) { const char *testString = "\\end{!"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -835,12 +896,12 @@ TEST(OsdmStreamParser, errorEndNoIdentifier) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorEndNoBraceClose) +TEST(OsmlStreamParser, errorEndNoBraceClose) { const char *testString = "\\end{a"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -848,12 +909,12 @@ TEST(OsdmStreamParser, errorEndNoBraceClose) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorEndNoBegin) +TEST(OsmlStreamParser, errorEndNoBegin) { const char *testString = "\\end{a}"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -861,91 +922,91 @@ TEST(OsdmStreamParser, errorEndNoBegin) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorBeginEndMismatch) +TEST(OsmlStreamParser, errorBeginEndMismatch) { const char *testString = "\\begin{a} \\begin{b} test \\end{a}"; // 0123456789 012345678901234 5678901 // 0 1 2 3 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "a", 7, 8); - assertFieldStart(reader, 10, 11); + assertFieldStart(reader, true, 10, 11); assertCommand(reader, "b", 17, 18); - assertFieldStart(reader, 20, 24); + assertFieldStart(reader, true, 20, 24); assertData(reader, "test", 20, 24); ASSERT_FALSE(logger.hasError()); ASSERT_THROW(reader.parse(), LoggableException); ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, commandWithNSSep) +TEST(OsmlStreamParser, commandWithNSSep) { const char *testString = "\\test1:test2"; // 012345678901 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test1:test2", 0, 12); assertEnd(reader, 12, 12); } -TEST(OsdmStreamParser, beginEndWithNSSep) +TEST(OsmlStreamParser, beginEndWithNSSep) { const char *testString = "\\begin{test1:test2}\\end{test1:test2}"; // 0123456789012345678 90123456789012345 // 0 1 2 3 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test1:test2", 7, 18); - assertFieldStart(reader, 19, 20); + assertFieldStart(reader, true, 19, 20); assertFieldEnd(reader, 24, 35); assertEnd(reader, 36, 36); } -TEST(OsdmStreamParser, errorBeginNSSep) +TEST(OsmlStreamParser, errorBeginNSSep) { const char *testString = "\\begin:test{blub}\\end{blub}"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); assertCommand(reader, "blub"); ASSERT_TRUE(logger.hasError()); - assertFieldStart(reader); + assertFieldStart(reader, true); assertFieldEnd(reader); assertEnd(reader); } -TEST(OsdmStreamParser, errorEndNSSep) +TEST(OsmlStreamParser, errorEndNSSep) { const char *testString = "\\begin{blub}\\end:test{blub}"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "blub"); - assertFieldStart(reader); + assertFieldStart(reader, true); ASSERT_FALSE(logger.hasError()); assertFieldEnd(reader); ASSERT_TRUE(logger.hasError()); assertEnd(reader); } -TEST(OsdmStreamParser, errorEmptyNs) +TEST(OsmlStreamParser, errorEmptyNs) { const char *testString = "\\test:"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -955,12 +1016,12 @@ TEST(OsdmStreamParser, errorEmptyNs) assertEnd(reader); } -TEST(OsdmStreamParser, errorRepeatedNs) +TEST(OsmlStreamParser, errorRepeatedNs) { const char *testString = "\\test::"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -969,5 +1030,232 @@ TEST(OsdmStreamParser, errorRepeatedNs) assertData(reader, "::"); assertEnd(reader); } + +TEST(OsmlStreamParser, explicitDefaultField) +{ + const char *testString = "\\a{!b}c"; + // 01234567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertData(reader, "b", 4, 5); + assertFieldEnd(reader, 5, 6); + assertData(reader, "c", 6, 7); + assertEnd(reader, 7, 7); +} + +TEST(OsmlStreamParser, explicitDefaultFieldWithCommand) +{ + const char *testString = "\\a{!\\b}c"; + // 0123 4567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertCommand(reader, "b", 4, 6); + assertFieldEnd(reader, 6, 7); + assertData(reader, "c", 7, 8); + assertEnd(reader, 8, 8); +} + +TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField) +{ + const char *testString = "\\a{!\\b}{c}"; + // 0123 456789 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertCommand(reader, "b", 4, 6); + assertFieldEnd(reader, 6, 7); + ASSERT_FALSE(logger.hasError()); + assertData(reader, "c", 8, 9); + ASSERT_TRUE(logger.hasError()); + assertEnd(reader, 10, 10); +} + +TEST(OsmlStreamParser, annotationStart) +{ + const char *testString = "<\\a"; + // 0 12 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); + assertEnd(reader, 3, 3); +} + +TEST(OsmlStreamParser, annotationStartWithName) +{ + const char *testString = "<\\annotationWithName#aName"; + // 0 1234567890123456789012345 + // 0 1 2 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart(reader, "annotationWithName", + Variant::mapType{{"name", "aName"}}, 0, 20); + assertEnd(reader, 26, 26); +} + +TEST(OsmlStreamParser, annotationStartWithArguments) +{ + const char *testString = "<\\annotationWithName#aName[a=1,b=2]"; + // 0 1234567890123456789012345678901234 + // 0 1 2 3 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart( + reader, "annotationWithName", + Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20); + assertEnd(reader, 35, 35); +} + +TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd) +{ + const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>"; + // 0 123456789012345678901234567 89012345 67 + // 0 1 2 3 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart( + reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8, + 10); + assertFieldStart(reader, true, 26, 27); + assertData(reader, "a", 26, 27); + assertFieldEnd(reader, 33, 35); + assertAnnotationEnd(reader, "", "", 36, 38); + assertEnd(reader, 38, 38); +} + +TEST(OsmlStreamParser, annotationEnd) +{ + const char *testString = "\\a>"; + // 012 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationEnd(reader, "a", "", 0, 2); + assertEnd(reader, 3, 3); +} + +TEST(OsmlStreamParser, annotationEndWithName) +{ + const char *testString = "\\a#name>"; + // 01234567 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationEnd(reader, "a", "name", 0, 2); + assertEnd(reader, 8, 8); +} + +TEST(OsmlStreamParser, annotationEndWithNameAsArgs) +{ + const char *testString = "\\a[name=name]>"; + // 01234567890123 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationEnd(reader, "a", "name", 0, 2); + assertEnd(reader, 14, 14); +} + +TEST(OsmlStreamParser, errorAnnotationEndWithArguments) +{ + const char *testString = "\\a[foo=bar]>"; + // 012345678901 + // 0 1 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2); + ASSERT_TRUE(logger.hasError()); + assertData(reader, ">", 11, 12); + assertEnd(reader, 12, 12); +} + +TEST(OsmlStreamParser, closingAnnotation) +{ + const char *testString = "<\\a>"; + // 0 123 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); + assertData(reader, ">", 3, 4); + assertEnd(reader, 4, 4); +} + +TEST(OsmlStreamParser, annotationWithFields) +{ + const char *testString = "a <\\b{c}{d}{!e} f \\> g"; + // 012 345678901234567 8901 + // 0 1 2 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertData(reader, "a", 0, 1); + assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5); + assertFieldStart(reader, false, 5, 6); + assertData(reader, "c", 6, 7); + assertFieldEnd(reader, 7, 8); + assertFieldStart(reader, false, 8, 9); + assertData(reader, "d", 9, 10); + assertFieldEnd(reader, 10, 11); + assertFieldStart(reader, true, 11, 13); + assertData(reader, "e", 13, 14); + assertFieldEnd(reader, 14, 15); + assertData(reader, "f", 16, 17); + assertAnnotationEnd(reader, "", "", 18, 20); + assertData(reader, "g", 21, 22); + assertEnd(reader, 22, 22); +} + +TEST(OsmlStreamParser, annotationStartEscape) +{ + const char *testString = "<\\%test"; + // 0 123456 + // 0 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertData(reader, "<%test", 0, 7); + assertEnd(reader, 7, 7); +} } diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp new file mode 100644 index 0000000..3293370 --- /dev/null +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -0,0 +1,217 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <gtest/gtest.h> + +#include <core/frontend/TerminalLogger.hpp> +#include <core/common/CharReader.hpp> +#include <core/common/Variant.hpp> + +#include <formats/osxml/OsxmlEventParser.hpp> + +namespace ousia { + +static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; + +namespace { +enum class OsxmlEvent { + COMMAND, + ANNOTATION_START, + ANNOTATION_END, + FIELD_END, + DATA +}; + +class TestOsxmlEventListener : public OsxmlEvents { +public: + std::vector<std::pair<OsxmlEvent, Variant>> events; + + void command(const Variant &name, const Variant::mapType &args) override + { + events.emplace_back(OsxmlEvent::COMMAND, + Variant::arrayType{name, args}); + } + + void annotationStart(const Variant &className, + const Variant::mapType &args) override + { + events.emplace_back(OsxmlEvent::ANNOTATION_START, + Variant::arrayType{className, args}); + } + + void annotationEnd(const Variant &className, + const Variant &elementName) override + { + events.emplace_back(OsxmlEvent::ANNOTATION_END, + Variant::arrayType{className, elementName}); + } + + void fieldEnd() override + { + events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{}); + } + + void data(const Variant &data) override + { + events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data}); + } +}; + +static std::vector<std::pair<OsxmlEvent, Variant>> parseXml( + const char *testString, + WhitespaceMode whitespaceMode = WhitespaceMode::TRIM) +{ + TestOsxmlEventListener listener; + CharReader reader(testString); + OsxmlEventParser parser(reader, listener, logger); + parser.setWhitespaceMode(whitespaceMode); + parser.parse(); + return listener.events; +} +} + +TEST(OsxmlEventParser, simpleCommandWithArgs) +{ + const char *testString = "<a name=\"test\" a=\"1\" b=\"2\" c=\"blub\"/>"; + // 01234567 89012 3456 78 9012 34 5678 90123 456 + // 0 1 2 3 + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, + Variant::arrayType{ + "a", Variant::mapType{ + {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); + + // Check the locations (I'll do this one time and then just assume it works) + ASSERT_EQ(1U, events[0].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(2U, events[0].second.asArray()[0].getLocation().getEnd()); + ASSERT_EQ( + 9U, + events[0].second.asArray()[1].asMap()["name"].getLocation().getStart()); + ASSERT_EQ( + 13U, + events[0].second.asArray()[1].asMap()["name"].getLocation().getEnd()); + ASSERT_EQ( + 18U, + events[0].second.asArray()[1].asMap()["a"].getLocation().getStart()); + ASSERT_EQ( + 19U, events[0].second.asArray()[1].asMap()["a"].getLocation().getEnd()); + ASSERT_EQ( + 24U, + events[0].second.asArray()[1].asMap()["b"].getLocation().getStart()); + ASSERT_EQ( + 25U, events[0].second.asArray()[1].asMap()["b"].getLocation().getEnd()); + ASSERT_EQ( + 30U, + events[0].second.asArray()[1].asMap()["c"].getLocation().getStart()); + ASSERT_EQ( + 34U, events[0].second.asArray()[1].asMap()["c"].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, magicTopLevelTag) +{ + const char *testString = "<ousia><a/><b/></ousia>"; + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}, + {OsxmlEvent::COMMAND, Variant::arrayType{{"b", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, magicTopLevelTagInside) +{ + const char *testString = "<a><ousia/></a>"; + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::COMMAND, + Variant::arrayType{{"ousia", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) +{ + const char *testString = "<a> hello \n world </a>"; + // 012345678901 234567890123 + // 0 1 2 + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{" hello \n world "}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::PRESERVE); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataTrimWhitespace) +{ + const char *testString = "<a> hello \n world </a>"; + // 012345678901 234567890123 + // 0 1 2 + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{"hello \n world"}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::TRIM); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataCollapseWhitespace) +{ + const char *testString = "<a> hello \n world </a>"; + // 012345678901 234567890123 + // 0 1 2 + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{"hello world"}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::COLLAPSE); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} +} + diff --git a/test/formats/osxml/OsxmlParserTest.cpp b/test/formats/osxml/OsxmlParserTest.cpp new file mode 100644 index 0000000..fe8ed34 --- /dev/null +++ b/test/formats/osxml/OsxmlParserTest.cpp @@ -0,0 +1,395 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <iostream> + +#include <gtest/gtest.h> + +#include <core/common/CharReader.hpp> +#include <core/common/SourceContextReader.hpp> +#include <core/model/Document.hpp> +#include <core/model/Domain.hpp> +#include <core/model/Node.hpp> +#include <core/model/Project.hpp> +#include <core/frontend/TerminalLogger.hpp> +#include <core/StandaloneEnvironment.hpp> + +#include <plugins/filesystem/FileLocator.hpp> +#include <formats/osxml/OsxmlParser.hpp> + +namespace ousia { + +namespace RttiTypes { +extern const Rtti Document; +extern const Rtti Domain; +extern const Rtti Typesystem; +} + +struct XmlStandaloneEnvironment : public StandaloneEnvironment { + OsxmlParser parser; + FileLocator fileLocator; + + XmlStandaloneEnvironment(ConcreteLogger &logger) + : StandaloneEnvironment(logger) + { + fileLocator.addDefaultSearchPaths(); + fileLocator.addUnittestSearchPath("xmlparser"); + + registry.registerDefaultExtensions(); + registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"}, + {&RttiTypes::Node}, &parser); + registry.registerResourceLocator(&fileLocator); + } +}; + +static TerminalLogger logger(std::cerr, true); + +TEST(OsxmlParser, mismatchedTag) +{ + XmlStandaloneEnvironment env(logger); + env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document}); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsxmlParser, generic) +{ + XmlStandaloneEnvironment env(logger); + env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node}); +#ifdef MANAGER_GRAPHVIZ_EXPORT + env.manager.exportGraphviz("xmlDocument.dot"); +#endif +} + +static void checkAttributes(Handle<StructType> expected, + Handle<Descriptor> desc) +{ + if (expected == nullptr) { + ASSERT_TRUE(desc->getAttributesDescriptor()->getAttributes().empty()); + } else { + ASSERT_EQ(expected->getName(), + desc->getAttributesDescriptor()->getName()); + auto &attrs_exp = expected->getAttributes(); + auto &attrs = desc->getAttributesDescriptor()->getAttributes(); + ASSERT_EQ(attrs_exp.size(), attrs.size()); + for (size_t i = 0; i < attrs_exp.size(); i++) { + ASSERT_EQ(attrs_exp[i]->getName(), attrs[i]->getName()); + ASSERT_EQ(attrs_exp[i]->getType(), attrs[i]->getType()); + ASSERT_EQ(attrs_exp[i]->isOptional(), attrs[i]->isOptional()); + ASSERT_EQ(attrs_exp[i]->getDefaultValue(), + attrs[i]->getDefaultValue()); + } + } +} + +static void checkStructuredClass( + Handle<Node> n, const std::string &name, Handle<Domain> domain, + Variant cardinality = Cardinality::any(), + Handle<StructType> attributesDescriptor = nullptr, + Handle<StructuredClass> superclass = nullptr, bool transparent = false, + bool root = false) +{ + ASSERT_FALSE(n == nullptr); + Handle<StructuredClass> sc = n.cast<StructuredClass>(); + ASSERT_FALSE(sc == nullptr); + ASSERT_EQ(name, sc->getName()); + ASSERT_EQ(domain, sc->getParent()); + ASSERT_EQ(cardinality, sc->getCardinality()); + ASSERT_EQ(transparent, sc->isTransparent()); + ASSERT_EQ(root, sc->hasRootPermission()); + checkAttributes(attributesDescriptor, sc); +} + +static Rooted<StructuredClass> checkStructuredClass( + const std::string &resolve, const std::string &name, Handle<Domain> domain, + Variant cardinality = Cardinality::any(), + Handle<StructType> attributesDescriptor = nullptr, + Handle<StructuredClass> superclass = nullptr, bool transparent = false, + bool root = false) +{ + auto res = domain->resolve(&RttiTypes::StructuredClass, resolve); + if (res.size() != 1) { + throw OusiaException("resolution error!"); + } + Handle<StructuredClass> sc = res[0].node.cast<StructuredClass>(); + checkStructuredClass(sc, name, domain, cardinality, attributesDescriptor, + superclass, transparent, root); + return sc; +} + +static void checkAnnotationClass( + Handle<Node> n, const std::string &name, Handle<Domain> domain, + Handle<StructType> attributesDescriptor = nullptr) +{ + ASSERT_FALSE(n == nullptr); + Handle<AnnotationClass> ac = n.cast<AnnotationClass>(); + ASSERT_FALSE(ac == nullptr); + ASSERT_EQ(name, ac->getName()); + ASSERT_EQ(domain, ac->getParent()); + checkAttributes(attributesDescriptor, ac); +} + +static Rooted<AnnotationClass> checkAnnotationClass( + const std::string &resolve, const std::string &name, Handle<Domain> domain, + Handle<StructType> attributesDescriptor = nullptr) +{ + auto res = domain->resolve(&RttiTypes::AnnotationClass, resolve); + if (res.size() != 1) { + throw OusiaException("resolution error!"); + } + Handle<AnnotationClass> ac = res[0].node.cast<AnnotationClass>(); + checkAnnotationClass(ac, name, domain, attributesDescriptor); + return ac; +} + +static void checkFieldDescriptor( + Handle<Node> n, const std::string &name, Handle<Descriptor> parent, + NodeVector<StructuredClass> children, + FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE, + Handle<Type> primitiveType = nullptr, bool optional = false) +{ + ASSERT_FALSE(n == nullptr); + Handle<FieldDescriptor> field = n.cast<FieldDescriptor>(); + ASSERT_FALSE(field.isNull()); + ASSERT_EQ(name, field->getName()); + ASSERT_EQ(parent, field->getParent()); + ASSERT_EQ(type, field->getFieldType()); + ASSERT_EQ(primitiveType, field->getPrimitiveType()); + ASSERT_EQ(primitiveType != nullptr, field->isPrimitive()); + ASSERT_EQ(optional, field->isOptional()); + // check the children. + ASSERT_EQ(children.size(), field->getChildren().size()); + for (unsigned int c = 0; c < children.size(); c++) { + ASSERT_EQ(children[c], field->getChildren()[c]); + } +} + +static void checkFieldDescriptor( + Handle<Descriptor> desc, Handle<Descriptor> parent, + NodeVector<StructuredClass> children, + const std::string &name = "", + FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE, + Handle<Type> primitiveType = nullptr, bool optional = false) +{ + auto res = desc->resolve(&RttiTypes::FieldDescriptor, name); + ASSERT_EQ(1U, res.size()); + checkFieldDescriptor(res[0].node, name, parent, children, type, + primitiveType, optional); +} + +static void checkFieldDescriptor( + Handle<Descriptor> desc, NodeVector<StructuredClass> children, + const std::string &name = "", + FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE, + Handle<Type> primitiveType = nullptr, bool optional = false) +{ + checkFieldDescriptor(desc, desc, children, name, type, primitiveType, + optional); +} + +TEST(OsxmlParser, domainParsing) +{ + XmlStandaloneEnvironment env(logger); + Rooted<Node> book_domain_node = + env.parse("book_domain.oxm", "", "", RttiSet{&RttiTypes::Domain}); + ASSERT_FALSE(book_domain_node == nullptr); + ASSERT_FALSE(logger.hasError()); + // check the domain node. + Rooted<Domain> book_domain = book_domain_node.cast<Domain>(); + ASSERT_EQ("book", book_domain->getName()); + // get the book struct node. + Cardinality single; + single.merge({1}); + Rooted<StructuredClass> book = checkStructuredClass( + "book", "book", book_domain, single, nullptr, nullptr, false, true); + // get the chapter struct node. + Rooted<StructuredClass> chapter = + checkStructuredClass("chapter", "chapter", book_domain); + Rooted<StructuredClass> section = + checkStructuredClass("section", "section", book_domain); + Rooted<StructuredClass> subsection = + checkStructuredClass("subsection", "subsection", book_domain); + Rooted<StructuredClass> paragraph = + checkStructuredClass("paragraph", "paragraph", book_domain, + Cardinality::any(), nullptr, nullptr, true, false); + Rooted<StructuredClass> text = + checkStructuredClass("text", "text", book_domain, Cardinality::any(), + nullptr, nullptr, true, false); + + // check the FieldDescriptors. + checkFieldDescriptor(book, {chapter, paragraph}); + checkFieldDescriptor(chapter, {section, paragraph}); + checkFieldDescriptor(section, {subsection, paragraph}); + checkFieldDescriptor(subsection, {paragraph}); + checkFieldDescriptor(paragraph, {text}); + checkFieldDescriptor( + text, {}, "", FieldDescriptor::FieldType::TREE, + env.project->getSystemTypesystem()->getStringType(), false); + + // check parent handling using the headings domain. + Rooted<Node> headings_domain_node = + env.parse("headings_domain.oxm", "", "", RttiSet{&RttiTypes::Domain}); + ASSERT_FALSE(headings_domain_node == nullptr); + ASSERT_FALSE(logger.hasError()); + Rooted<Domain> headings_domain = headings_domain_node.cast<Domain>(); + // now there should be a heading struct. + Rooted<StructuredClass> heading = + checkStructuredClass("heading", "heading", headings_domain, single, + nullptr, nullptr, true, false); + // which should be a reference to the paragraph descriptor. + checkFieldDescriptor(heading, paragraph, {text}); + // and each struct in the book domain (except for text) should have a + // heading field now. + checkFieldDescriptor(book, {heading}, "heading", + FieldDescriptor::FieldType::SUBTREE, nullptr, true); + checkFieldDescriptor(chapter, {heading}, "heading", + FieldDescriptor::FieldType::SUBTREE, nullptr, true); + checkFieldDescriptor(section, {heading}, "heading", + FieldDescriptor::FieldType::SUBTREE, nullptr, true); + checkFieldDescriptor(subsection, {heading}, "heading", + FieldDescriptor::FieldType::SUBTREE, nullptr, true); + checkFieldDescriptor(paragraph, {heading}, "heading", + FieldDescriptor::FieldType::SUBTREE, nullptr, true); + + // check annotation handling using the comments domain. + Rooted<Node> comments_domain_node = + env.parse("comments_domain.oxm", "", "", RttiSet{&RttiTypes::Domain}); + ASSERT_FALSE(comments_domain_node == nullptr); + ASSERT_FALSE(logger.hasError()); + Rooted<Domain> comments_domain = comments_domain_node.cast<Domain>(); + // now we should be able to find a comment annotation. + Rooted<AnnotationClass> comment_anno = + checkAnnotationClass("comment", "comment", comments_domain); + // as well as a comment struct + Rooted<StructuredClass> comment = + checkStructuredClass("comment", "comment", comments_domain); + // and a reply struct + Rooted<StructuredClass> reply = + checkStructuredClass("reply", "reply", comments_domain); + // check the fields for each of them. + { + std::vector<Rooted<Descriptor>> descs{comment_anno, comment, reply}; + for (auto &d : descs) { + checkFieldDescriptor(d, {paragraph}, "content", + FieldDescriptor::FieldType::TREE, nullptr, + false); + checkFieldDescriptor(d, {reply}, "replies", + FieldDescriptor::FieldType::SUBTREE, nullptr, + false); + } + } + // paragraph should have comment as child now as well. + checkFieldDescriptor(paragraph, {text, comment}); + // as should heading, because it references the paragraph default field. + checkFieldDescriptor(heading, paragraph, {text, comment}); +} + +static void checkStructuredEntity( + Handle<Node> s, Handle<Node> expectedParent, Handle<StructuredClass> strct, + const Variant::mapType &expectedAttributes = Variant::mapType{}, + const std::string &expectedName = "") +{ + ASSERT_FALSE(s == nullptr); + ASSERT_TRUE(s->isa(&RttiTypes::StructuredEntity)); + Rooted<StructuredEntity> entity = s.cast<StructuredEntity>(); + ASSERT_EQ(expectedParent, entity->getParent()); + ASSERT_EQ(strct, entity->getDescriptor()); + ASSERT_EQ(expectedAttributes, entity->getAttributes()); + ASSERT_EQ(expectedName, entity->getName()); +} + +static void checkStructuredEntity( + Handle<Node> s, Handle<Node> expectedParent, Handle<Document> doc, + const std::string &className, + const Variant::mapType &expectedAttributes = Variant::mapType{}, + const std::string &expectedName = "") +{ + auto res = doc->resolve(&RttiTypes::StructuredClass, className); + if (res.size() != 1) { + throw OusiaException("resolution error!"); + } + Handle<StructuredClass> sc = res[0].node.cast<StructuredClass>(); + checkStructuredEntity(s, expectedParent, sc, expectedAttributes, + expectedName); +} + +static void checkText(Handle<Node> p, Handle<Node> expectedParent, + Handle<Document> doc, Variant expected) +{ + checkStructuredEntity(p, expectedParent, doc, "paragraph"); + Rooted<StructuredEntity> par = p.cast<StructuredEntity>(); + ASSERT_EQ(1U, par->getField().size()); + checkStructuredEntity(par->getField()[0], par, doc, "text"); + Rooted<StructuredEntity> text = par->getField()[0].cast<StructuredEntity>(); + ASSERT_EQ(1U, text->getField().size()); + + Handle<StructureNode> d = text->getField()[0]; + ASSERT_FALSE(d == nullptr); + ASSERT_TRUE(d->isa(&RttiTypes::DocumentPrimitive)); + Rooted<DocumentPrimitive> prim = d.cast<DocumentPrimitive>(); + ASSERT_EQ(text, prim->getParent()); + ASSERT_EQ(expected, prim->getContent()); +} + +TEST(OsxmlParser, documentParsing) +{ + XmlStandaloneEnvironment env(logger); + Rooted<Node> book_document_node = + env.parse("simple_book.oxd", "", "", RttiSet{&RttiTypes::Document}); + ASSERT_FALSE(book_document_node == nullptr); + ASSERT_TRUE(book_document_node->isa(&RttiTypes::Document)); + Rooted<Document> doc = book_document_node.cast<Document>(); + ASSERT_TRUE(doc->validate(logger)); + checkStructuredEntity(doc->getRoot(), doc, doc, "book"); + { + Rooted<StructuredEntity> book = doc->getRoot(); + ASSERT_EQ(2U, book->getField().size()); + checkText(book->getField()[0], book, doc, + "This might be some introductory text or a dedication."); + checkStructuredEntity(book->getField()[1], book, doc, "chapter", + Variant::mapType{}, "myFirstChapter"); + { + Rooted<StructuredEntity> chapter = + book->getField()[1].cast<StructuredEntity>(); + ASSERT_EQ(3U, chapter->getField().size()); + checkText(chapter->getField()[0], chapter, doc, + "Here we might have an introduction to the chapter."); + checkStructuredEntity(chapter->getField()[1], chapter, doc, + "section", Variant::mapType{}, + "myFirstSection"); + { + Rooted<StructuredEntity> section = + chapter->getField()[1].cast<StructuredEntity>(); + ASSERT_EQ(1U, section->getField().size()); + checkText(section->getField()[0], section, doc, + "Here we might find the actual section content."); + } + checkStructuredEntity(chapter->getField()[2], chapter, doc, + "section", Variant::mapType{}, + "mySndSection"); + { + Rooted<StructuredEntity> section = + chapter->getField()[2].cast<StructuredEntity>(); + ASSERT_EQ(1U, section->getField().size()); + checkText(section->getField()[0], section, doc, + "Here we might find the actual section content."); + } + } + } +} +} + |