diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-25 23:09:26 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-25 23:09:26 +0100 |
commit | 84c9abc3e9762c4486ddc5ca0352a5d697a51987 (patch) | |
tree | b95db6ab2c2c6c2fba430218411a4ddf1d31b19f /test/formats | |
parent | 8891dea26a1653a003b4171155e155d3aa6689ae (diff) |
start of branch, commit log will be rewritten
Diffstat (limited to 'test/formats')
-rw-r--r-- | test/formats/osml/OsmlStreamParserTest.cpp | 79 | ||||
-rw-r--r-- | test/formats/osxml/OsxmlEventParserTest.cpp | 47 |
2 files changed, 47 insertions, 79 deletions
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index d52fa5b..3d01007 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -30,11 +30,21 @@ namespace ousia { static TerminalLogger logger(std::cerr, true); // static ConcreteLogger logger; +static OsmlStreamParser::State skipEmptyData(OsmlStreamParser &reader) +{ + OsmlStreamParser::State res = reader.parse(); + if (res == OsmlStreamParser::State::DATA) { + EXPECT_FALSE(reader.getData().hasNonWhitespaceText()); + res = reader.parse(); + } + return res; +} + static void assertCommand(OsmlStreamParser &reader, const std::string &name, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, skipEmptyData(reader)); EXPECT_EQ(name, reader.getCommandName().asString()); if (start != InvalidSourceOffset) { EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); @@ -57,16 +67,19 @@ static void assertCommand(OsmlStreamParser &reader, const std::string &name, static void assertData(OsmlStreamParser &reader, const std::string &data, SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) + SourceOffset end = InvalidSourceOffset, + WhitespaceMode mode = WhitespaceMode::COLLAPSE) { ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - EXPECT_EQ(data, reader.getData().asString()); + Variant text = reader.getText(mode); + ASSERT_TRUE(text.isString()); + EXPECT_EQ(data, text.asString()); if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getData().getLocation().getStart()); + EXPECT_EQ(start, text.getLocation().getStart()); EXPECT_EQ(start, reader.getLocation().getStart()); } if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getData().getLocation().getEnd()); + EXPECT_EQ(end, text.getLocation().getEnd()); EXPECT_EQ(end, reader.getLocation().getEnd()); } } @@ -75,7 +88,7 @@ static void assertFieldStart(OsmlStreamParser &reader, bool defaultField, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::FIELD_START, skipEmptyData(reader)); EXPECT_EQ(defaultField, reader.inDefaultField()); if (start != InvalidSourceOffset) { EXPECT_EQ(start, reader.getLocation().getStart()); @@ -89,7 +102,7 @@ static void assertFieldEnd(OsmlStreamParser &reader, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::FIELD_END, skipEmptyData(reader)); if (start != InvalidSourceOffset) { EXPECT_EQ(start, reader.getLocation().getStart()); } @@ -103,7 +116,7 @@ static void assertAnnotationStart(OsmlStreamParser &reader, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, skipEmptyData(reader)); EXPECT_EQ(name, reader.getCommandName().asString()); if (start != InvalidSourceOffset) { EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); @@ -131,7 +144,7 @@ static void assertAnnotationEnd(OsmlStreamParser &reader, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, skipEmptyData(reader)); ASSERT_EQ(name, reader.getCommandName().asString()); if (!elementName.empty()) { ASSERT_EQ(1U, reader.getCommandArguments().asMap().size()); @@ -152,7 +165,7 @@ static void assertEnd(OsmlStreamParser &reader, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) { - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, skipEmptyData(reader)); if (start != InvalidSourceOffset) { EXPECT_EQ(start, reader.getLocation().getStart()); } @@ -205,26 +218,14 @@ TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) assertData(reader, "hello world", 1, 14); } -TEST(OsmlStreamParser, escapeWhitespace) -{ - const char *testString = " hello\\ \\ world "; - // 012345 67 89012345 - // 0 1 - CharReader charReader(testString); - - OsmlStreamParser reader(charReader, logger); - - assertData(reader, "hello world", 1, 15); -} - static void testEscapeSpecialCharacter(const std::string &c) { CharReader charReader(std::string("\\") + c); OsmlStreamParser reader(charReader, logger); EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - EXPECT_EQ(c, reader.getData().asString()); + EXPECT_EQ(c, reader.getText().asString()); - SourceLocation loc = reader.getData().getLocation(); + SourceLocation loc = reader.getText().getLocation(); EXPECT_EQ(0U, loc.getStart()); EXPECT_EQ(1U + c.size(), loc.getEnd()); } @@ -253,16 +254,16 @@ TEST(OsmlStreamParser, singleLineComment) OsmlStreamParser reader(charReader, logger); { ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ("a", reader.getText().asString()); + SourceLocation loc = reader.getText().getLocation(); ASSERT_EQ(0U, loc.getStart()); ASSERT_EQ(1U, loc.getEnd()); } { ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("b", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ("b", reader.getText().asString()); + SourceLocation loc = reader.getText().getLocation(); ASSERT_EQ(33U, loc.getStart()); ASSERT_EQ(34U, loc.getEnd()); } @@ -279,16 +280,16 @@ TEST(OsmlStreamParser, multilineComment) OsmlStreamParser reader(charReader, logger); { ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ("a", reader.getText().asString()); + SourceLocation loc = reader.getText().getLocation(); ASSERT_EQ(0U, loc.getStart()); ASSERT_EQ(1U, loc.getEnd()); } { ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("b", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ("b", reader.getText().asString()); + SourceLocation loc = reader.getText().getLocation(); ASSERT_EQ(40U, loc.getStart()); ASSERT_EQ(41U, loc.getEnd()); } @@ -305,16 +306,16 @@ TEST(OsmlStreamParser, nestedMultilineComment) OsmlStreamParser reader(charReader, logger); { ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ("a", reader.getText().asString()); + SourceLocation loc = reader.getText().getLocation(); ASSERT_EQ(0U, loc.getStart()); ASSERT_EQ(1U, loc.getEnd()); } { ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("b", reader.getData().asString()); - SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ("b", reader.getText().asString()); + SourceLocation loc = reader.getText().getLocation(); ASSERT_EQ(40U, loc.getStart()); ASSERT_EQ(41U, loc.getEnd()); } @@ -569,8 +570,11 @@ TEST(OsmlStreamParser, multipleCommands) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "a", 0, 2); + assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE); assertCommand(reader, "b", 3, 5); + assertData(reader, " ", 5, 6, WhitespaceMode::PRESERVE); assertCommand(reader, "c", 6, 8); + assertData(reader, " ", 8, 9, WhitespaceMode::PRESERVE); assertCommand(reader, "d", 9, 11); assertEnd(reader, 11, 11); } @@ -584,10 +588,13 @@ TEST(OsmlStreamParser, fieldsWithSpaces) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "a", 0, 2); + assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE); assertFieldStart(reader, false, 3, 4); assertCommand(reader, "b", 4, 6); + assertData(reader, " ", 6, 7, WhitespaceMode::PRESERVE); assertCommand(reader, "c", 7, 9); assertFieldEnd(reader, 9, 10); + assertData(reader, " \n\n {", 10, 12, WhitespaceMode::PRESERVE); assertFieldStart(reader, false, 16, 17); assertCommand(reader, "d", 17, 19); assertFieldEnd(reader, 19, 20); diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp index 3293370..6942166 100644 --- a/test/formats/osxml/OsxmlEventParserTest.cpp +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -21,6 +21,7 @@ #include <core/frontend/TerminalLogger.hpp> #include <core/common/CharReader.hpp> #include <core/common/Variant.hpp> +#include <core/parser/utils/TokenizedData.hpp> #include <formats/osxml/OsxmlEventParser.hpp> @@ -74,13 +75,11 @@ public: }; static std::vector<std::pair<OsxmlEvent, Variant>> parseXml( - const char *testString, - WhitespaceMode whitespaceMode = WhitespaceMode::TRIM) + const char *testString) { TestOsxmlEventListener listener; CharReader reader(testString); OsxmlEventParser parser(reader, listener, logger); - parser.setWhitespaceMode(whitespaceMode); parser.parse(); return listener.events; } @@ -157,7 +156,7 @@ TEST(OsxmlEventParser, magicTopLevelTagInside) ASSERT_EQ(expectedEvents, events); } -TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) +TEST(OsxmlEventParser, commandWithData) { const char *testString = "<a> hello \n world </a>"; // 012345678901 234567890123 @@ -168,50 +167,12 @@ TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) {OsxmlEvent::DATA, Variant::arrayType{" hello \n world "}}, {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; - auto events = parseXml(testString, WhitespaceMode::PRESERVE); + auto events = parseXml(testString); ASSERT_EQ(expectedEvents, events); // Check the location of the text ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart()); ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd()); } - -TEST(OsxmlEventParser, commandWithDataTrimWhitespace) -{ - const char *testString = "<a> hello \n world </a>"; - // 012345678901 234567890123 - // 0 1 2 - - std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, - {OsxmlEvent::DATA, Variant::arrayType{"hello \n world"}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; - - auto events = parseXml(testString, WhitespaceMode::TRIM); - ASSERT_EQ(expectedEvents, events); - - // Check the location of the text - ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); - ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); -} - -TEST(OsxmlEventParser, commandWithDataCollapseWhitespace) -{ - const char *testString = "<a> hello \n world </a>"; - // 012345678901 234567890123 - // 0 1 2 - - std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, - {OsxmlEvent::DATA, Variant::arrayType{"hello world"}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; - - auto events = parseXml(testString, WhitespaceMode::COLLAPSE); - ASSERT_EQ(expectedEvents, events); - - // Check the location of the text - ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); - ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); -} } |