From 974afd3fdc54380a43445a180263fb162e1ff2c0 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:00:23 +0100 Subject: Applied renaming to unit tests and added forgotten CMakeLists.txt --- test/formats/osml/OsmlStreamParserTest.cpp | 973 +++++++++++++++++++++++++++++ 1 file changed, 973 insertions(+) create mode 100644 test/formats/osml/OsmlStreamParserTest.cpp (limited to 'test/formats/osml/OsmlStreamParserTest.cpp') diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp new file mode 100644 index 0000000..e5eff05 --- /dev/null +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -0,0 +1,973 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +#include +#include + +#include + +namespace ousia { + +static TerminalLogger logger(std::cerr, true); + +TEST(OsmlStreamParser, empty) +{ + const char *testString = ""; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, oneCharacter) +{ + const char *testString = "a"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("a", reader.getData().asString()); + + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(1U, loc.getEnd()); +} + +TEST(OsmlStreamParser, whitespaceElimination) +{ + const char *testString = " hello \t world "; + // 0123456 78901234 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("hello world", reader.getData().asString()); + + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(14U, loc.getEnd()); +} + +TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) +{ + const char *testString = " hello \n world "; + // 0123456 78901234 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("hello world", reader.getData().asString()); + + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(14U, loc.getEnd()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, escapeWhitespace) +{ + const char *testString = " hello\\ \\ world "; + // 012345 67 89012345 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("hello world", reader.getData().asString()); + + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(15U, loc.getEnd()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +static void testEscapeSpecialCharacter(const std::string &c) +{ + CharReader charReader(std::string("\\") + c); + OsmlStreamParser reader(charReader, logger); + EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + EXPECT_EQ(c, reader.getData().asString()); + + SourceLocation loc = reader.getData().getLocation(); + EXPECT_EQ(0U, loc.getStart()); + EXPECT_EQ(1U + c.size(), loc.getEnd()); +} + +TEST(OsmlStreamParser, escapeSpecialCharacters) +{ + testEscapeSpecialCharacter("\\"); + testEscapeSpecialCharacter("{"); + testEscapeSpecialCharacter("}"); + testEscapeSpecialCharacter("<"); + testEscapeSpecialCharacter(">"); +} + +TEST(OsmlStreamParser, simpleSingleLineComment) +{ + const char *testString = "% This is a single line comment"; + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, singleLineComment) +{ + const char *testString = "a% This is a single line comment\nb"; + // 01234567890123456789012345678901 23 + // 0 1 2 3 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("a", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(1U, loc.getEnd()); + } + + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("b", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(33U, loc.getStart()); + ASSERT_EQ(34U, loc.getEnd()); + } + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, multilineComment) +{ + const char *testString = "a%{ This is a\n\n multiline line comment}%b"; + // 0123456789012 3 456789012345678901234567890 + // 0 1 2 3 4 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("a", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(1U, loc.getEnd()); + } + + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("b", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(40U, loc.getStart()); + ASSERT_EQ(41U, loc.getEnd()); + } + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, nestedMultilineComment) +{ + const char *testString = "a%{%{Another\n\n}%multiline line comment}%b"; + // 0123456789012 3 456789012345678901234567890 + // 0 1 2 3 4 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("a", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(1U, loc.getEnd()); + } + + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("b", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(40U, loc.getStart()); + ASSERT_EQ(41U, loc.getEnd()); + } + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, simpleCommand) +{ + const char *testString = "\\test"; + // 0 12345 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + + Variant commandName = reader.getCommandName(); + ASSERT_EQ("test", commandName.asString()); + + SourceLocation loc = commandName.getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + + ASSERT_EQ(0U, reader.getCommandArguments().asMap().size()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, simpleCommandWithName) +{ + const char *testString = "\\test#bla"; + // 0 12345678 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + + Variant commandName = reader.getCommandName(); + ASSERT_EQ("test", commandName.asString()); + SourceLocation loc = commandName.getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + + Variant commandArguments = reader.getCommandArguments(); + ASSERT_TRUE(commandArguments.isMap()); + ASSERT_EQ(1U, commandArguments.asMap().size()); + ASSERT_EQ(1U, commandArguments.asMap().count("name")); + ASSERT_EQ("bla", commandArguments.asMap()["name"].asString()); + + loc = commandArguments.asMap()["name"].getLocation(); + ASSERT_EQ(5U, loc.getStart()); + ASSERT_EQ(9U, loc.getEnd()); + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, simpleCommandWithArguments) +{ + const char *testString = "\\test[a=1,b=2,c=\"test\"]"; + // 0 123456789012345 678901 2 + // 0 1 2 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + + Variant commandName = reader.getCommandName(); + ASSERT_EQ("test", commandName.asString()); + SourceLocation loc = commandName.getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + + Variant commandArguments = reader.getCommandArguments(); + ASSERT_TRUE(commandArguments.isMap()); + ASSERT_EQ(3U, commandArguments.asMap().size()); + ASSERT_EQ(1U, commandArguments.asMap().count("a")); + ASSERT_EQ(1U, commandArguments.asMap().count("b")); + ASSERT_EQ(1U, commandArguments.asMap().count("c")); + ASSERT_EQ(1, commandArguments.asMap()["a"].asInt()); + ASSERT_EQ(2, commandArguments.asMap()["b"].asInt()); + ASSERT_EQ("test", commandArguments.asMap()["c"].asString()); + + loc = commandArguments.asMap()["a"].getLocation(); + ASSERT_EQ(8U, loc.getStart()); + ASSERT_EQ(9U, loc.getEnd()); + + loc = commandArguments.asMap()["b"].getLocation(); + ASSERT_EQ(12U, loc.getStart()); + ASSERT_EQ(13U, loc.getEnd()); + + loc = commandArguments.asMap()["c"].getLocation(); + ASSERT_EQ(16U, loc.getStart()); + ASSERT_EQ(22U, loc.getEnd()); + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) +{ + const char *testString = "\\test#bla[a=1,b=2,c=\"test\"]"; + // 0 1234567890123456789 01234 56 + // 0 1 2 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + + Variant commandName = reader.getCommandName(); + ASSERT_EQ("test", commandName.asString()); + SourceLocation loc = commandName.getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + + Variant commandArguments = reader.getCommandArguments(); + ASSERT_TRUE(commandArguments.isMap()); + ASSERT_EQ(4U, commandArguments.asMap().size()); + ASSERT_EQ(1U, commandArguments.asMap().count("a")); + ASSERT_EQ(1U, commandArguments.asMap().count("b")); + ASSERT_EQ(1U, commandArguments.asMap().count("c")); + ASSERT_EQ(1U, commandArguments.asMap().count("name")); + ASSERT_EQ(1, commandArguments.asMap()["a"].asInt()); + ASSERT_EQ(2, commandArguments.asMap()["b"].asInt()); + ASSERT_EQ("test", commandArguments.asMap()["c"].asString()); + ASSERT_EQ("bla", commandArguments.asMap()["name"].asString()); + + loc = commandArguments.asMap()["a"].getLocation(); + ASSERT_EQ(12U, loc.getStart()); + ASSERT_EQ(13U, loc.getEnd()); + + loc = commandArguments.asMap()["b"].getLocation(); + ASSERT_EQ(16U, loc.getStart()); + ASSERT_EQ(17U, loc.getEnd()); + + loc = commandArguments.asMap()["c"].getLocation(); + ASSERT_EQ(20U, loc.getStart()); + ASSERT_EQ(26U, loc.getEnd()); + + loc = commandArguments.asMap()["name"].getLocation(); + ASSERT_EQ(5U, loc.getStart()); + ASSERT_EQ(9U, loc.getEnd()); + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + EXPECT_EQ(name, reader.getCommandName().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + assertCommand(reader, name, start, end); + EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertData(OsmlStreamParser &reader, const std::string &data, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + EXPECT_EQ(data, reader.getData().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getData().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getData().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldStart(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +TEST(OsmlStreamParser, fields) +{ + const char *testString = "\\test{a}{b}{c}"; + // 01234567890123 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + assertData(reader, "a", 6, 7); + assertFieldEnd(reader, 7, 8); + + assertFieldStart(reader, 8, 9); + assertData(reader, "b", 9, 10); + assertFieldEnd(reader, 10, 11); + + assertFieldStart(reader, 11, 12); + assertData(reader, "c", 12, 13); + assertFieldEnd(reader, 13, 14); + assertEnd(reader, 14, 14); +} + +TEST(OsmlStreamParser, dataOutsideField) +{ + const char *testString = "\\test{a}{b} c"; + // 0123456789012 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + assertData(reader, "a", 6, 7); + assertFieldEnd(reader, 7, 8); + + assertFieldStart(reader, 8, 9); + assertData(reader, "b", 9, 10); + assertFieldEnd(reader, 10, 11); + + assertData(reader, "c", 12, 13); + assertEnd(reader, 13, 13); +} + +TEST(OsmlStreamParser, nestedCommand) +{ + const char *testString = "\\test{a}{\\test2{b} c} d"; + // 012345678 90123456789012 + // 0 1 2 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test", 0, 5); + + assertFieldStart(reader, 5, 6); + assertData(reader, "a", 6, 7); + assertFieldEnd(reader, 7, 8); + + assertFieldStart(reader, 8, 9); + { + assertCommand(reader, "test2", 9, 15); + assertFieldStart(reader, 15, 16); + assertData(reader, "b", 16, 17); + assertFieldEnd(reader, 17, 18); + } + assertData(reader, "c", 19, 20); + assertFieldEnd(reader, 20, 21); + assertData(reader, "d", 22, 23); + assertEnd(reader, 23, 23); +} + +TEST(OsmlStreamParser, nestedCommandImmediateEnd) +{ + const char *testString = "\\test{\\test2{b}} d"; + // 012345 678901234567 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + { + assertCommand(reader, "test2", 6, 12); + assertFieldStart(reader, 12, 13); + assertData(reader, "b", 13, 14); + assertFieldEnd(reader, 14, 15); + } + assertFieldEnd(reader, 15, 16); + assertData(reader, "d", 17, 18); + assertEnd(reader, 18, 18); +} + +TEST(OsmlStreamParser, nestedCommandNoData) +{ + const char *testString = "\\test{\\test2}"; + // 012345 6789012 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + assertCommand(reader, "test2", 6, 12); + assertFieldEnd(reader, 12, 13); + assertEnd(reader, 13, 13); +} + +TEST(OsmlStreamParser, multipleCommands) +{ + const char *testString = "\\a \\b \\c \\d"; + // 012 345 678 90 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertCommand(reader, "b", 3, 5); + assertCommand(reader, "c", 6, 8); + assertCommand(reader, "d", 9, 11); + assertEnd(reader, 11, 11); +} + +TEST(OsmlStreamParser, fieldsWithSpaces) +{ + const char *testString = "\\a {\\b \\c} \n\n {\\d}"; + // 0123 456 789012 3 456 789 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, 3, 4); + assertCommand(reader, "b", 4, 6); + assertCommand(reader, "c", 7, 9); + assertFieldEnd(reader, 9, 10); + assertFieldStart(reader, 16, 17); + assertCommand(reader, "d", 17, 19); + assertFieldEnd(reader, 19, 20); + assertEnd(reader, 20, 20); +} + +TEST(OsmlStreamParser, errorNoFieldToStart) +{ + const char *testString = "\\a b {"; + // 012345 + // 0 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "a", 0, 2); + assertData(reader, "b", 3, 4); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader, 6, 6); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorNoFieldToEnd) +{ + const char *testString = "\\a b }"; + // 012345 + // 0 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "a", 0, 2); + assertData(reader, "b", 3, 4); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader, 6, 6); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorNoFieldEndNested) +{ + const char *testString = "\\test{\\test2{}}}"; + // 012345 6789012345 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + assertCommand(reader, "test2", 6, 12); + assertFieldStart(reader, 12, 13); + assertFieldEnd(reader, 13, 14); + assertFieldEnd(reader, 14, 15); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader, 16, 16); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorNoFieldEndNestedData) +{ + const char *testString = "\\test{\\test2{}}a}"; + // 012345 67890123456 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + assertCommand(reader, "test2", 6, 12); + assertFieldStart(reader, 12, 13); + assertFieldEnd(reader, 13, 14); + assertFieldEnd(reader, 14, 15); + assertData(reader, "a", 15, 16); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader, 17, 17); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, beginEnd) +{ + const char *testString = "\\begin{book}\\end{book}"; + // 012345678901 2345678901 + // 0 1 2 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", 7, 11); + assertFieldStart(reader, 12, 13); + assertFieldEnd(reader, 17, 21); + assertEnd(reader, 22, 22); +} + +TEST(OsmlStreamParser, beginEndWithName) +{ + const char *testString = "\\begin{book#a}\\end{book}"; + // 01234567890123 4567890123 + // 0 1 2 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", {{"name", "a"}}, 7, 11); + assertFieldStart(reader, 14, 15); + assertFieldEnd(reader, 19, 23); + assertEnd(reader, 24, 24); +} + +TEST(OsmlStreamParser, beginEndWithNameAndArgs) +{ + const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}"; + // 0123456789012345678901234 56789 01 2345678901 + // 0 1 2 3 4 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", + {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); + assertFieldStart(reader, 32, 33); + assertFieldEnd(reader, 37, 41); + assertEnd(reader, 42, 42); +} + +TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields) +{ + const char *testString = + "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}"; + // 0123456789012345678901234 56789 01234 567890123 45678901 2345678901 + // 0 1 2 3 4 5 6 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", + {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); + assertFieldStart(reader, 32, 33); + assertData(reader, "a", 33, 34); + assertCommand(reader, "test", Variant::mapType{}, 35, 40); + assertFieldEnd(reader, 40, 41); + assertFieldStart(reader, 41, 42); + assertData(reader, "b", 42, 43); + assertCommand(reader, "test", Variant::mapType{}, 44, 49); + assertFieldStart(reader, 49, 50); + assertFieldEnd(reader, 50, 51); + assertFieldEnd(reader, 51, 52); + assertFieldStart(reader, 52, 53); + assertFieldEnd(reader, 57, 61); + assertEnd(reader, 62, 62); +} + +TEST(OsmlStreamParser, beginEndWithData) +{ + const char *testString = "\\begin{book}a\\end{book}"; + // 0123456789012 3456789012 + // 0 1 2 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", 7, 11); + assertFieldStart(reader, 12, 13); + assertData(reader, "a", 12, 13); + assertFieldEnd(reader, 18, 22); + assertEnd(reader, 23, 23); +} + +TEST(OsmlStreamParser, beginEndWithCommand) +{ + const char *testString = "\\begin{book}\\a{test}\\end{book}"; + // 012345678901 23456789 0123456789 + // 0 1 2 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", 7, 11); + assertFieldStart(reader, 12, 13); + assertCommand(reader, "a", 12, 14); + assertFieldStart(reader, 14, 15); + assertData(reader, "test", 15, 19); + assertFieldEnd(reader, 19, 20); + assertFieldEnd(reader, 25, 29); + assertEnd(reader, 30, 30); +} + +TEST(OsmlStreamParser, errorBeginNoBraceOpen) +{ + const char *testString = "\\begin a"; + // 01234567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertData(reader, "a", 7, 8); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorBeginNoIdentifier) +{ + const char *testString = "\\begin{!"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorBeginNoBraceClose) +{ + const char *testString = "\\begin{a"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorBeginNoName) +{ + const char *testString = "\\begin{a#}"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "a"); + ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorEndNoBraceOpen) +{ + const char *testString = "\\end a"; + // 012345 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertData(reader, "a", 5, 6); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorEndNoIdentifier) +{ + const char *testString = "\\end{!"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorEndNoBraceClose) +{ + const char *testString = "\\end{a"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorEndNoBegin) +{ + const char *testString = "\\end{a}"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorBeginEndMismatch) +{ + const char *testString = "\\begin{a} \\begin{b} test \\end{a}"; + // 0123456789 012345678901234 5678901 + // 0 1 2 3 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "a", 7, 8); + assertFieldStart(reader, 10, 11); + assertCommand(reader, "b", 17, 18); + assertFieldStart(reader, 20, 24); + assertData(reader, "test", 20, 24); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, commandWithNSSep) +{ + const char *testString = "\\test1:test2"; + // 012345678901 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test1:test2", 0, 12); + assertEnd(reader, 12, 12); +} + +TEST(OsmlStreamParser, beginEndWithNSSep) +{ + const char *testString = "\\begin{test1:test2}\\end{test1:test2}"; + // 0123456789012345678 90123456789012345 + // 0 1 2 3 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test1:test2", 7, 18); + assertFieldStart(reader, 19, 20); + assertFieldEnd(reader, 24, 35); + assertEnd(reader, 36, 36); +} + +TEST(OsmlStreamParser, errorBeginNSSep) +{ + const char *testString = "\\begin:test{blub}\\end{blub}"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "blub"); + ASSERT_TRUE(logger.hasError()); + assertFieldStart(reader); + assertFieldEnd(reader); + assertEnd(reader); +} + +TEST(OsmlStreamParser, errorEndNSSep) +{ + const char *testString = "\\begin{blub}\\end:test{blub}"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "blub"); + assertFieldStart(reader); + ASSERT_FALSE(logger.hasError()); + assertFieldEnd(reader); + ASSERT_TRUE(logger.hasError()); + assertEnd(reader); +} + +TEST(OsmlStreamParser, errorEmptyNs) +{ + const char *testString = "\\test:"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "test"); + ASSERT_TRUE(logger.hasError()); + assertData(reader, ":"); + assertEnd(reader); +} + +TEST(OsmlStreamParser, errorRepeatedNs) +{ + const char *testString = "\\test::"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "test"); + ASSERT_TRUE(logger.hasError()); + assertData(reader, "::"); + assertEnd(reader); +} +} + -- cgit v1.2.3 From 2659b4595d809cbd69a77e5ff7e2fc08d225f065 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:02:54 +0100 Subject: Tidied OsxmlEventParser up, implemented correct whitespace handling, started to write unit tests for the osxml parser --- CMakeLists.txt | 93 +++--- src/core/common/Utils.hpp | 21 +- src/core/common/WhitespaceHandler.hpp | 60 ++++ src/formats/osxml/OsxmlAttributeLocator.cpp | 144 ++++++++++ src/formats/osxml/OsxmlAttributeLocator.hpp | 67 +++++ src/formats/osxml/OsxmlEventParser.cpp | 425 +++++++++++++++------------- src/formats/osxml/OsxmlEventParser.hpp | 44 +-- test/formats/osml/OsmlStreamParserTest.cpp | 1 + test/formats/osxml/OsxmlEventParserTest.cpp | 222 +++++++++++++++ 9 files changed, 811 insertions(+), 266 deletions(-) create mode 100644 src/formats/osxml/OsxmlAttributeLocator.cpp create mode 100644 src/formats/osxml/OsxmlAttributeLocator.hpp create mode 100644 test/formats/osxml/OsxmlEventParserTest.cpp (limited to 'test/formats/osml/OsmlStreamParserTest.cpp') diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e3b90f..bdc9541 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,9 +147,9 @@ ADD_LIBRARY(ousia_core src/core/model/RootNode src/core/model/Style src/core/model/Typesystem -# src/core/parser/Parser -# src/core/parser/ParserContext -# src/core/parser/ParserScope + src/core/parser/Parser + src/core/parser/ParserContext + src/core/parser/ParserScope # src/core/parser/generic/ParserState # src/core/parser/generic/ParserStateCallbacks # src/core/parser/generic/ParserStateHandler @@ -183,36 +183,37 @@ TARGET_LINK_LIBRARIES(ousia_osml ousia_core ) -#ADD_LIBRARY(ousia_osxml -# src/formats/osxml/osxmlParser -#) +ADD_LIBRARY(ousia_osxml + src/formats/osxml/OsxmlAttributeLocator + src/formats/osxml/OsxmlEventParser +) -#TARGET_LINK_LIBRARIES(ousia_osxml -# ousia_core -# ${EXPAT_LIBRARIES} -#) +TARGET_LINK_LIBRARIES(ousia_osxml + ousia_core + ${EXPAT_LIBRARIES} +) # Resource locators -#ADD_LIBRARY(ousia_filesystem -# src/plugins/filesystem/FileLocator -# src/plugins/filesystem/SpecialPaths -#) +ADD_LIBRARY(ousia_filesystem + src/plugins/filesystem/FileLocator + src/plugins/filesystem/SpecialPaths +) -#TARGET_LINK_LIBRARIES(ousia_filesystem -# ousia_core -# ${Boost_LIBRARIES} -#) +TARGET_LINK_LIBRARIES(ousia_filesystem + ousia_core + ${Boost_LIBRARIES} +) # Output libraries -#ADD_LIBRARY(ousia_html -# src/plugins/html/DemoOutput -#) +ADD_LIBRARY(ousia_html + src/plugins/html/DemoOutput +) -#TARGET_LINK_LIBRARIES(ousia_html -# ousia_core -#) +TARGET_LINK_LIBRARIES(ousia_html + ousia_core +) #ADD_LIBRARY(ousia_mozjs # src/plugins/mozjs/MozJsScriptEngine @@ -247,7 +248,7 @@ IF(TEST) ADD_EXECUTABLE(ousia_test_core test/core/RangeSetTest -# test/core/RegistryTest + test/core/RegistryTest test/core/XMLTest test/core/common/ArgumentTest test/core/common/CharReaderTest @@ -272,7 +273,7 @@ IF(TEST) test/core/model/NodeTest test/core/model/StyleTest test/core/model/TypesystemTest -# test/core/parser/ParserScopeTest + test/core/parser/ParserScopeTest # test/core/parser/ParserStackTest # test/core/parser/ParserStateTest test/core/parser/utils/TokenizerTest @@ -311,15 +312,15 @@ IF(TEST) # ousia_css # ) -# ADD_EXECUTABLE(ousia_test_html -# test/plugins/html/DemoOutputTest -# ) + ADD_EXECUTABLE(ousia_test_html + test/plugins/html/DemoOutputTest + ) -# TARGET_LINK_LIBRARIES(ousia_test_html -# ${GTEST_LIBRARIES} -# ousia_core -# ousia_html -# ) + TARGET_LINK_LIBRARIES(ousia_test_html + ${GTEST_LIBRARIES} + ousia_core + ousia_html + ) ADD_EXECUTABLE(ousia_test_osml test/formats/osml/OsmlStreamParserTest @@ -331,16 +332,16 @@ IF(TEST) ousia_osml ) -# ADD_EXECUTABLE(ousia_test_osxml -# test/plugins/xml/XmlParserTest -# ) + ADD_EXECUTABLE(ousia_test_osxml + test/formats/osxml/OsxmlEventParserTest + ) -# TARGET_LINK_LIBRARIES(ousia_test_osxml -# ${GTEST_LIBRARIES} -# ousia_core -# ousia_osml -# ousia_filesystem -# ) + TARGET_LINK_LIBRARIES(ousia_test_osxml + ${GTEST_LIBRARIES} + ousia_core + ousia_osxml + ousia_filesystem + ) # ADD_EXECUTABLE(ousia_test_mozjs # test/plugins/mozjs/MozJsScriptEngineTest @@ -354,11 +355,11 @@ IF(TEST) # Register the unit tests ADD_TEST(ousia_test_core ousia_test_core) -# ADD_TEST(ousia_test_filesystem ousia_test_filesystem) + ADD_TEST(ousia_test_filesystem ousia_test_filesystem) # ADD_TEST(ousia_test_css ousia_test_css) -# ADD_TEST(ousia_test_html ousia_test_html) + ADD_TEST(ousia_test_html ousia_test_html) ADD_TEST(ousia_test_osml ousia_test_osml) -# ADD_TEST(ousia_test_osxml ousia_test_osxml) + ADD_TEST(ousia_test_osxml ousia_test_osxml) # ADD_TEST(ousia_test_mozjs ousia_test_mozjs) ENDIF() diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 16a9136..8361973 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -119,9 +119,26 @@ public: */ template static std::pair trim(const T &s, Filter f) + { + return trim(s, s.size(), f); + } + + /** + * Trims the given string or vector of chars by returning the start and end + * index. + * + * @param s is the container that should be trimmed. + * @param len is the number of elements in the container. + * @param f is a function that returns true for values that should be + * removed. + * @return start and end index. Note that "end" points at the character + * beyond the end, thus "end" minus "start" + */ + template + static std::pair trim(const T &s, size_t len, Filter f) { size_t start = 0; - for (size_t i = 0; i < s.size(); i++) { + for (size_t i = 0; i < len; i++) { if (!f(s[i])) { start = i; break; @@ -129,7 +146,7 @@ public: } size_t end = 0; - for (ssize_t i = s.size() - 1; i >= static_cast(start); i--) { + for (ssize_t i = len - 1; i >= static_cast(start); i--) { if (!f(s[i])) { end = i + 1; break; diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp index 79e0518..ed52ea3 100644 --- a/src/core/common/WhitespaceHandler.hpp +++ b/src/core/common/WhitespaceHandler.hpp @@ -97,6 +97,25 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd); + } + + /** + * Static version of PreservingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd) { if (textBuf.empty()) { textStart = start; @@ -129,6 +148,27 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf); + } + + /** + * Static version of TrimmingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param whitespaceBuf is a reference at the buffer for storing whitespace + * characters. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd, std::vector &whitespaceBuf) { // Handle whitespace characters if (Utils::isWhitespace(c)) { @@ -174,6 +214,26 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd, hasWhitespace); + } + + /** + * Static version of CollapsingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param hasWhitespace is a reference at the "hasWhitespace" flag. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd, bool &hasWhitespace) { // Handle whitespace characters if (Utils::isWhitespace(c)) { diff --git a/src/formats/osxml/OsxmlAttributeLocator.cpp b/src/formats/osxml/OsxmlAttributeLocator.cpp new file mode 100644 index 0000000..e37446a --- /dev/null +++ b/src/formats/osxml/OsxmlAttributeLocator.cpp @@ -0,0 +1,144 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include + +#include "OsxmlAttributeLocator.hpp" + +namespace ousia { + +/** + * Enum used internally in the statemachine of the xml argument parser. + */ +enum class XmlAttributeState { + IN_TAG_NAME, + SEARCH_ATTR, + IN_ATTR_NAME, + HAS_ATTR_NAME, + HAS_ATTR_EQUALS, + IN_ATTR_DATA +}; + +std::map OsxmlAttributeLocator::locate( + CharReader &reader, size_t offs) +{ + std::map res; + + // Fork the reader, we don't want to mess up the XML parsing process, do we? + CharReaderFork readerFork = reader.fork(); + + // Move the read cursor to the start location, abort if this does not work + if (offs != readerFork.seek(offs)) { + return res; + } + + // Now all we need to do is to implement one half of an XML parser. As this + // is inherently complicated we'll totaly fail at it. Don't care. All we + // want to get is those darn offsets for pretty error messages... (and we + // can assume the XML is valid as it was already read by expat) + XmlAttributeState state = XmlAttributeState::IN_TAG_NAME; + char c; + std::stringstream attrName; + while (readerFork.read(c)) { + // Abort at the end of the tag + if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) { + return res; + } + + // One state machine to rule them all, one state machine to find them, + // One state machine to bring them all and in the darkness bind them + // (the byte offsets) + switch (state) { + case XmlAttributeState::IN_TAG_NAME: + if (Utils::isWhitespace(c)) { + res.emplace("$tag", + SourceLocation{reader.getSourceId(), offs + 1, + readerFork.getOffset() - 1}); + state = XmlAttributeState::SEARCH_ATTR; + } + break; + case XmlAttributeState::SEARCH_ATTR: + if (!Utils::isWhitespace(c)) { + state = XmlAttributeState::IN_ATTR_NAME; + attrName << c; + } + break; + case XmlAttributeState::IN_ATTR_NAME: + if (Utils::isWhitespace(c)) { + state = XmlAttributeState::HAS_ATTR_NAME; + } else if (c == '=') { + state = XmlAttributeState::HAS_ATTR_EQUALS; + } else { + attrName << c; + } + break; + case XmlAttributeState::HAS_ATTR_NAME: + if (!Utils::isWhitespace(c)) { + if (c == '=') { + state = XmlAttributeState::HAS_ATTR_EQUALS; + break; + } + // Well, this is a strange XML file... We expected to + // see a '=' here! Try to continue with the + // "HAS_ATTR_EQUALS" state as this state will hopefully + // inlcude some error recovery + } else { + // Skip whitespace here + break; + } + // Fallthrough + case XmlAttributeState::HAS_ATTR_EQUALS: + if (!Utils::isWhitespace(c)) { + if (c == '"') { + // Here we are! We have found the beginning of an + // attribute. Let's quickly lock the current offset away + // in the result map + res.emplace(attrName.str(), + SourceLocation{reader.getSourceId(), + readerFork.getOffset()}); + state = XmlAttributeState::IN_ATTR_DATA; + } else { + // No, this XML file is not well formed. Assume we're in + // an attribute name once again + attrName.str(std::string{&c, 1}); + state = XmlAttributeState::IN_ATTR_NAME; + } + } + break; + case XmlAttributeState::IN_ATTR_DATA: + if (c == '"') { + // We're at the end of the attribute data, set the end + // location + auto it = res.find(attrName.str()); + if (it != res.end()) { + it->second.setEnd(readerFork.getOffset() - 1); + } + + // Reset the attribute name and restart the search + attrName.str(std::string{}); + state = XmlAttributeState::SEARCH_ATTR; + } + break; + } + } + return res; +} +} + diff --git a/src/formats/osxml/OsxmlAttributeLocator.hpp b/src/formats/osxml/OsxmlAttributeLocator.hpp new file mode 100644 index 0000000..f9a3437 --- /dev/null +++ b/src/formats/osxml/OsxmlAttributeLocator.hpp @@ -0,0 +1,67 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file OsxmlAttributeLocator.hpp + * + * Contains a class used for locating the byte offsets of the attributes given + * in a XML tag. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ +#define _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ + +#include + +namespace ousia { + +// Forward declarations +class CharReader; +class SourceLocation; + +/** + * Class containing one static function for locating the byte offsets of the + * attributes in a XML tag. This are not retrieved by our xml parser, so we have + * to do this manually. + */ +class OsxmlAttributeLocator { +public: + /** + * Function used to reconstruct the location of the attributes of a XML tag + * in the source code. This is necessary, as the xml parser only returns an + * offset to the begining of a tag and not to the position of the individual + * arguments. + * + * @param reader is the char reader from which the character data should be + * read. + * @param offs is a byte offset in the xml file pointing at the "<" + * character of the tag. + * @return a map from attribute keys to the corresponding location + * (including range) of the atribute. Also contains the location of the + * tagname in the form of the virtual attribute "$tag". + */ + static std::map locate(CharReader &reader, + size_t offs); +}; + +} + +#endif /* _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ */ + diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp index 2ef170e..b4aff77 100644 --- a/src/formats/osxml/OsxmlEventParser.cpp +++ b/src/formats/osxml/OsxmlEventParser.cpp @@ -18,14 +18,22 @@ #include +#include + +#include #include #include +#include #include +#include +#include "OsxmlAttributeLocator.hpp" #include "OsxmlEventParser.hpp" namespace ousia { +/* Class OsxmlEventParser */ + /** * Class containing data used by the internal functions. */ @@ -43,41 +51,75 @@ public: */ ssize_t annotationEndTagDepth; + /** + * Current character data buffer. + */ + std::vector textBuf; + + /** + * Current whitespace buffer (for the trimming whitspace mode) + */ + std::vector whitespaceBuf; + + /** + * Flag indicating whether a whitespace character was present (for the + * collapsing whitespace mode). + */ + bool hasWhitespace; + + /** + * Current character data start. + */ + size_t textStart; + + /** + * Current character data end. + */ + size_t textEnd; + /** * Default constructor. */ - OsxmlEventParserData() : depth(0), annotationEndTagDepth(-1) {} + OsxmlEventParserData(); /** * Increments the depth. */ - void incrDepth() { depth++; } + void incrDepth(); /** * Decrement the depth and reset the annotationEndTagDepth flag. */ - void decrDepth() - { - if (depth > 0) { - depth--; - } - if (depth < annotationEndTagDepth) { - annotationEndTagDepth = -1; - } - } + void decrDepth(); /** * Returns true if we're currently inside an end tag. */ - bool inAnnotationEndTag() { depth >= annotationEndTagDepth; } + bool inAnnotationEndTag(); + + /** + * Returns true if character data is available. + * + * @return true if character data is available. + */ + bool hasText(); + + /** + * Returns a Variant containing the character data and its location. + * + * @return a string variant containing the text data and the character + * location. + */ + Variant getText(SourceId sourceId); }; -namespace { +/* Class GuardedExpatXmlParser */ + /** * Wrapper class around the XML_Parser pointer which safely frees it whenever * the scope is left (e.g. because an exception was thrown). */ -class ScopedExpatXmlParser { +class GuardedExpatXmlParser { private: /** * Internal pointer to the XML_Parser instance. @@ -86,14 +128,14 @@ private: public: /** - * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS + * Constructor of the GuardedExpatXmlParser class. Calls XML_ParserCreateNS * from the expat library. Throws a parser exception if the XML parser * cannot be initialized. * * @param encoding is the protocol-defined encoding passed to expat (or * nullptr if expat should determine the encoding by itself). */ - ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) + GuardedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) { parser = XML_ParserCreate(encoding); if (!parser) { @@ -103,9 +145,9 @@ public: } /** - * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance. + * Destuctor of the GuardedExpatXmlParser, frees the XML parser instance. */ - ~ScopedExpatXmlParser() + ~GuardedExpatXmlParser() { if (parser) { XML_ParserFree(parser); @@ -120,134 +162,20 @@ public: }; /** - * Enum used internally in the statemachine of the micro-xml argument parser. + * Name of the special outer tag used for allowing multiple top-level elements + * in an xml file. */ -enum class XmlAttributeState { - IN_TAG_NAME, - SEARCH_ATTR, - IN_ATTR_NAME, - HAS_ATTR_NAME, - HAS_ATTR_EQUALS, - IN_ATTR_DATA -}; +static const std::string TOP_LEVEL_TAG{"ousia"}; /** - * Function used to reconstruct the location of the attributes of a XML tag in - * the source code. This is necessary, as the xml parser only returns an offset - * to the begining of a tag and not to the position of the individual arguments. - * - * @param reader is the char reader from which the character data should be - * read. - * @param offs is a byte offset in the xml file pointing at the "<" character of - * the tag. - * @return a map from attribute keys to the corresponding location (including - * range) of the atribute. Also contains the location of the tagname in the - * form of the virtual attribute "$tag". + * Prefix used to indicate the start of an annoation (note the trailing colon) */ -static std::map xmlReconstructAttributeOffsets( - CharReader &reader, size_t offs) -{ - std::map res; - - // Fork the reader, we don't want to mess up the XML parsing process, do we? - CharReaderFork readerFork = reader.fork(); - - // Move the read cursor to the start location, abort if this does not work - if (!location.isValid() || offs != readerFork.seek(offs)) { - return res; - } - - // Now all we need to do is to implement one half of an XML parser. As this - // is inherently complicated we'll totaly fail at it. Don't care. All we - // want to get is those darn offsets for pretty error messages... (and we - // can assume the XML is valid as it was already read by expat) - XmlAttributeState state = XmlAttributeState::IN_TAG_NAME; - char c; - std::stringstream attrName; - while (readerFork.read(c)) { - // Abort at the end of the tag - if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) { - return res; - } +static const std::string ANNOTATION_START_PREFIX{"a:start:"}; - // One state machine to rule them all, one state machine to find them, - // One state machine to bring them all and in the darkness bind them - // (the byte offsets) - switch (state) { - case XmlAttributeState::IN_TAG_NAME: - if (Utils::isWhitespace(c)) { - res.emplace("$tag", - SourceLocation{reader.getSourceId(), offs + 1, - readerFork.getOffset() - 1}); - state = XmlAttributeState::SEARCH_ATTR; - } - break; - case XmlAttributeState::SEARCH_ATTR: - if (!Utils::isWhitespace(c)) { - state = XmlAttributeState::IN_ATTR_NAME; - attrName << c; - } - break; - case XmlAttributeState::IN_ATTR_NAME: - if (Utils::isWhitespace(c)) { - state = XmlAttributeState::HAS_ATTR_NAME; - } else if (c == '=') { - state = XmlAttributeState::HAS_ATTR_EQUALS; - } else { - attrName << c; - } - break; - case XmlAttributeState::HAS_ATTR_NAME: - if (!Utils::isWhitespace(c)) { - if (c == '=') { - state = XmlAttributeState::HAS_ATTR_EQUALS; - break; - } - // Well, this is a strange XML file... We expected to - // see a '=' here! Try to continue with the - // "HAS_ATTR_EQUALS" state as this state will hopefully - // inlcude some error recovery - } else { - // Skip whitespace here - break; - } - // Fallthrough - case XmlAttributeState::HAS_ATTR_EQUALS: - if (!Utils::isWhitespace(c)) { - if (c == '"') { - // Here we are! We have found the beginning of an - // attribute. Let's quickly lock the current offset away - // in the result map - res.emplace(attrName.str(), - SourceLocation{reader.getSourceId(), - readerFork.getOffset()}); - state = XmlAttributeState::IN_ATTR_DATA; - } else { - // No, this XML file is not well formed. Assume we're in - // an attribute name once again - attrName.str(std::string{&c, 1}); - state = XmlAttributeState::IN_ATTR_NAME; - } - } - break; - case XmlAttributeState::IN_ATTR_DATA: - if (c == '"') { - // We're at the end of the attribute data, set the end - // location - auto it = res.find(attrName.str()); - if (it != res.end()) { - it->second.setEnd(readerFork.getOffset() - 1); - } - - // Reset the attribute name and restart the search - attrName.str(std::string{}); - state = XmlAttributeState::SEARCH_ATTR; - } - break; - } - } - return res; -} +/** + * Prefix used to indicate the end of an annotation. + */ +static const std::string ANNOTATION_END_PREFIX{"a:end"}; /** * Synchronizes the position of the xml parser with the default location of the @@ -268,22 +196,12 @@ static SourceLocation xmlSyncLoggerPosition(XML_Parser p, size_t len = 0) size_t offs = XML_GetCurrentByteIndex(p); SourceLocation loc = SourceLocation{parser->getReader().getSourceId(), offs, offs + len}; - parser->getLogger().setDefaultLocation(location); + parser->getLogger().setDefaultLocation(loc); // Return the fetched location return loc; } -/** - * Prefix used to indicate the start of an annoation, - */ -static const std::string ANNOTATION_START_PREFIX{"a:start:"}; - -/** - * Prefix used to indicate the end of an annotation. - */ -static const std::string ANNOTATION_END_PREFIX{"a:end"}; - /** * Callback called by eXpat whenever a start handler is reached. */ @@ -292,14 +210,21 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, { // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser XML_Parser p = static_cast(ref); - OsxmlEventParser *parser = static_cast(XML_GetUserData(p)); + OsxmlEventParser *parser = + static_cast(XML_GetUserData(p)); + + // If there is any text data in the buffer, issue that first + if (parser->getData().hasText()) { + parser->getEvents().data( + parser->getData().getText(parser->getReader().getSourceId())); + } // Read the argument locations -- this is only a stupid and slow hack, // but it is necessary, as expat doesn't give use the byte offset of the // arguments. std::map attributeOffsets = - xmlReconstructXMLAttributeOffsets(*userData->reader, - XML_GetCurrentByteIndex(p)); + OsxmlAttributeLocator::locate(parser->getReader(), + XML_GetCurrentByteIndex(p)); // Update the logger position SourceLocation loc = xmlSyncLoggerPosition(p); @@ -316,7 +241,8 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, // Make sure we're currently not inside an annotation end tag -- this would // be highly illegal! if (parser->getData().inAnnotationEndTag()) { - logger.error("No tags allowed inside an annotation end tag", nameLoc); + parser->getLogger().error( + "No tags allowed inside an annotation end tag", nameLoc); return; } @@ -336,36 +262,33 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, // Parse the string, pass the location of the key std::pair value = VariantReader::parseGenericString( - *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(), + *(attr++), parser->getLogger(), keyLoc.getSourceId(), keyLoc.getStart()); // Set the overall location of the parsed element to the attribute // location - value.second->setLocation(keyLoc); - - // Store the - if (!args.emplace(key, value.second).second) { - parser->getLogger().warning( - std::string("Attribute \"") + key + - "\" defined multiple times, only using first definition", - keyLoc); - } + value.second.setLocation(keyLoc); + + // Store the keys in the map + args.emplace(key, value.second).second; } // Fetch the name of the tag, check for special tags std::string nameStr(name); - if (nameStr == "ousia" && parser->getData().depth == 1) { - // We're in the top-level and the magic "ousia" tag is reached -- just + if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 1) { + // We're in the top-level and the magic tag is reached -- just // ignore it and issue a warning for each argument that has been given for (const auto &arg : args) { - parser->getLogger().warning( - std::string("Ignoring attribute \"") + arg.first + - std::string("\" for magic tag \"ousia\""), - arg.second); + parser->getLogger().warning(std::string("Ignoring attribute \"") + + arg.first + + std::string("\" for magic tag \"") + + TOP_LEVEL_TAG + std::string("\""), + arg.second); } } else if (Utils::startsWith(nameStr, ANNOTATION_START_PREFIX)) { // Assemble a name variant containing the name minus the prefix - Variant nameVar = nameStr.substr(ANNOTATION_START_PREFIX.size()); + Variant nameVar = + Variant::fromString(nameStr.substr(ANNOTATION_START_PREFIX.size())); nameVar.setLocation(nameLoc); // Issue the "annotationStart" event @@ -410,25 +333,34 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, } } -static void xmlEndElementHandler(void *p, const XML_Char *name) +static void xmlEndElementHandler(void *ref, const XML_Char *name) { // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser XML_Parser p = static_cast(ref); - OsxmlEventParser *parser = static_cast(XML_GetUserData(p)); + OsxmlEventParser *parser = + static_cast(XML_GetUserData(p)); // Synchronize the position of the logger with teh position - xmlSyncLoggerPosition(parser); - - // Decrement the current depth - parser->getData().decrDepth(); + xmlSyncLoggerPosition(p); // Abort as long as we're in an annotation end tag if (parser->getData().inAnnotationEndTag()) { + parser->getData().decrDepth(); return; } + // Decrement the current depth + parser->getData().decrDepth(); + + // If there is any text data in the buffer, issue that first + if (parser->getData().hasText()) { + parser->getEvents().data( + parser->getData().getText(parser->getReader().getSourceId())); + } + // Abort if the special ousia tag ends here - if (nameStr == "ousia" && parser->getData().depth == 0) { + std::string nameStr{name}; + if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 0) { return; } @@ -436,20 +368,105 @@ static void xmlEndElementHandler(void *p, const XML_Char *name) parser->getEvents().fieldEnd(); } -static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len) +static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len) { // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser XML_Parser p = static_cast(ref); - OsxmlEventParser *parser = static_cast(XML_GetUserData(p)); - - // TODO -/* size_t ulen = len > 0 ? static_cast(len) : 0; - syncLoggerPosition(parser, ulen); - const std::string data = Utils::trim(std::string{s, ulen}); - if (!data.empty()) { - stack->data(data); - }*/ + OsxmlEventParser *parser = + static_cast(XML_GetUserData(p)); + + // Abort as long as we're in an annotation end tag + if (parser->getData().inAnnotationEndTag()) { + return; + } + + // Convert the signed (smell the 90's C library here?) length to an usigned + // value + size_t ulen = len > 0 ? static_cast(len) : 0; + + // Synchronize the logger position + SourceLocation loc = xmlSyncLoggerPosition(p, ulen); + + // Fetch some variables for convenience + const WhitespaceMode mode = parser->getWhitespaceMode(); + OsxmlEventParserData &data = parser->getData(); + std::vector &textBuf = data.textBuf; + std::vector &whitespaceBuf = data.whitespaceBuf; + bool &hasWhitespace = data.hasWhitespace; + size_t &textStart = data.textStart; + size_t &textEnd = data.textEnd; + + size_t pos = loc.getStart(); + for (size_t i = 0; i < ulen; i++, pos++) { + switch (mode) { + case WhitespaceMode::PRESERVE: + PreservingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd); + break; + case WhitespaceMode::TRIM: + TrimmingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd, + whitespaceBuf); + break; + case WhitespaceMode::COLLAPSE: + CollapsingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd, + hasWhitespace); + break; + } + } +} + +/* Class OsxmlEvents */ + +OsxmlEvents::~OsxmlEvents() {} + +/* Class OsxmlEventParser */ + +OsxmlEventParserData::OsxmlEventParserData() + : depth(0), + annotationEndTagDepth(-1), + hasWhitespace(false), + textStart(0), + textEnd(0) +{ +} + +void OsxmlEventParserData::incrDepth() { depth++; } + +void OsxmlEventParserData::decrDepth() +{ + if (depth > 0) { + depth--; + } + if (depth < annotationEndTagDepth) { + annotationEndTagDepth = -1; + } +} + +bool OsxmlEventParserData::inAnnotationEndTag() +{ + return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth); } + +bool OsxmlEventParserData::hasText() { return !textBuf.empty(); } + +Variant OsxmlEventParserData::getText(SourceId sourceId) +{ + // Create a variant containing the string data and the location + Variant var = + Variant::fromString(std::string{textBuf.data(), textBuf.size()}); + var.setLocation({sourceId, textStart, textEnd}); + + // Reset the text buffers + textBuf.clear(); + whitespaceBuf.clear(); + hasWhitespace = false; + textStart = 0; + textEnd = 0; + + // Return the variant + return var; } /* Class OsxmlEventParser */ @@ -459,21 +476,22 @@ OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events, : reader(reader), events(events), logger(logger), - whitespaceMode(WhitespaceMode::COLLAPSE), + whitespaceMode(WhitespaceMode::TRIM), data(new OsxmlEventParserData()) { } -void OsxmlEventParser::parse(CharReader &reader) +OsxmlEventParser::~OsxmlEventParser() {} + +void OsxmlEventParser::parse() { // Create the parser object - ScopedExpatXmlParser p{"UTF-8"}; + GuardedExpatXmlParser p{"UTF-8"}; // Reset the depth - depth = 0; + data->depth = 0; - // Pass the reference to the ParserStack to the XML handler - XMLUserData data(&stack, &reader); + // Pass the reference to this parser instance to the XML handler XML_SetUserData(&p, this); XML_UseParserAsHandlerArg(&p); @@ -498,7 +516,7 @@ void OsxmlEventParser::parse(CharReader &reader) if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { throw LoggableException{ "XML: " + std::string{XML_ErrorString(XML_GetErrorCode(&p))}, - xmlSyncLoggerPosition(p)}; + xmlSyncLoggerPosition(&p)}; } // Abort once there are no more bytes in the stream @@ -513,12 +531,17 @@ void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode) this->whitespaceMode = whitespaceMode; } -CharReader &OsxmlEventParser::getCharReader() { return charReader; } +WhitespaceMode OsxmlEventParser::getWhitespaceMode() const +{ + return whitespaceMode; +} + +CharReader &OsxmlEventParser::getReader() const { return reader; } -Logger &OsxmlEventParser::getLogger() { return logger; } +Logger &OsxmlEventParser::getLogger() const { return logger; } -OsxmlEvents &OsxmlEventParser::getEvents() { return events; } +OsxmlEvents &OsxmlEventParser::getEvents() const { return events; } -OsxmlEventParserData &OsxmlEventParser::getData() { return *data; } +OsxmlEventParserData &OsxmlEventParser::getData() const { return *data; } } diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp index 5319ca6..aa20ea9 100644 --- a/src/formats/osxml/OsxmlEventParser.hpp +++ b/src/formats/osxml/OsxmlEventParser.hpp @@ -42,7 +42,7 @@ class Variant; class OsxmlEventParserData; /** - * Interface which defines the callback functions which are called by the + * Interface which defines the callback functions which are called by the * OsxmlEventParser whenever an event occurs. */ class OsxmlEvents { @@ -50,13 +50,13 @@ public: /** * Virtual destructor. */ - virtual ~OsxmlEvents() {} + virtual ~OsxmlEvents(); /** * Called whenever a command starts. Note that this implicitly always starts * the default field of the command. * - * @param name is a string variant containing name and location of the + * @param name is a string variant containing name and location of the * command. * @param args is a map variant containing the arguments that were given * to the command. @@ -67,12 +67,12 @@ public: * Called whenever an annotation starts. Note that this implicitly always * starts the default field of the annotation. * - * @param name is a string variant containing the name of the annotation + * @param name is a string variant containing the name of the annotation * class and the location of the annotation definition. * @param args is a map variant containing the arguments that were given * to the annotation definition. */ - virtual void annotationStart(Variant name, Variant args); + virtual void annotationStart(Variant name, Variant args) = 0; /** * Called whenever the range of an annotation ends. The callee must @@ -85,12 +85,12 @@ public: * ended here. May be empty (or nullptr), if no elementName has been * specified at the end of the annotation. */ - virtual void annotationEnd(Variant name, Variant elementName); + virtual void annotationEnd(Variant name, Variant elementName) = 0; /** - * Called whenever the default field which was implicitly started by + * Called whenever the default field which was implicitly started by * commandStart or annotationStart ends. Note that this does not end the - * range of an annotation, but the default field of the annotation. To + * range of an annotation, but the default field of the annotation. To * signal the end of the annotation this, the annotationEnd method will be * invoked. */ @@ -102,11 +102,10 @@ public: * is not called if the parsing failed, the parser prints an error message * instead. * - * @param data is the already parsed data that should be passed to the + * @param data is the already parsed data that should be passed to the * handler. */ virtual void data(Variant data) = 0; - }; /** @@ -148,7 +147,7 @@ public: * Constructor fo the OsxmlEventParser. Takes a reference at the OsxmlEvents * of which the callback functions are called. * - * @param reader is a reference to the CharReader instance from which the + * @param reader is a reference to the CharReader instance from which the * XML should be read. * @param events is a refence at an instance of the OsxmlEvents class. All * events are forwarded to this class. @@ -157,6 +156,11 @@ public: */ OsxmlEventParser(CharReader &reader, OsxmlEvents &events, Logger &logger); + /** + * Destructor of OsxmlEventParser (needed for unique_ptr to incomplete type) + */ + ~OsxmlEventParser(); + /** * Performs the actual parsing. Reads the XML using eXpat and calles the * callbacks in the event listener instance whenever something interesting @@ -167,38 +171,44 @@ public: /** * Sets the whitespace handling mode. * - * @param whitespaceMode defines how whitespace in the data should be + * @param whitespaceMode defines how whitespace in the data should be * handled. */ void setWhitespaceMode(WhitespaceMode whitespaceMode); + /** + * Returns the current whitespace handling mode. + * + * @return the currently set whitespace handling mode. + */ + WhitespaceMode getWhitespaceMode() const; + /** * Returns the internal CharReader reference. * * @return the CharReader reference. */ - CharReader &getCharReader(); + CharReader &getReader() const; /** * Returns the internal Logger reference. * * @return the internal Logger reference. */ - Logger &getLogger(); + Logger &getLogger() const; /** * Returns the internal OsxmlEvents reference. * * @return the internal OsxmlEvents reference. */ - OsxmlEvents &getEvents(); + OsxmlEvents &getEvents() const; /** * Returns a reference at the internal data. */ - OsxmlEventParserData &getData(); + OsxmlEventParserData &getData() const; }; - } #endif /* _OSXML_EVENT_PARSER_HPP_ */ diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index e5eff05..b944af8 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -28,6 +28,7 @@ namespace ousia { static TerminalLogger logger(std::cerr, true); +//static ConcreteLogger logger; TEST(OsmlStreamParser, empty) { diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp new file mode 100644 index 0000000..06c800f --- /dev/null +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -0,0 +1,222 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include +#include + +#include + +namespace ousia { + +static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; + +namespace { +enum class OsxmlEvent { + COMMAND_START, + ANNOTATION_START, + ANNOTATION_END, + FIELD_END, + DATA +}; + +class TestOsxmlEventListener : public OsxmlEvents { +public: + std::vector> events; + + void commandStart(Variant name, Variant args) override + { + events.emplace_back(OsxmlEvent::COMMAND_START, + Variant::arrayType{name, args}); + } + + void annotationStart(Variant name, Variant args) override + { + events.emplace_back(OsxmlEvent::ANNOTATION_START, + Variant::arrayType{name, args}); + } + + void annotationEnd(Variant name, Variant elementName) override + { + events.emplace_back(OsxmlEvent::ANNOTATION_END, + Variant::arrayType{name, elementName}); + } + + void fieldEnd() override + { + events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{}); + } + + void data(Variant data) override + { + events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data}); + } +}; + +static std::vector> parseXml( + const char *testString, + WhitespaceMode whitespaceMode = WhitespaceMode::TRIM) +{ + TestOsxmlEventListener listener; + CharReader reader(testString); + OsxmlEventParser parser(reader, listener, logger); + parser.setWhitespaceMode(whitespaceMode); + parser.parse(); + return listener.events; +} +} + +TEST(OsxmlEventParser, simpleCommandWithArgs) +{ + const char *testString = ""; + // 01234567 89012 3456 78 9012 34 5678 90123 456 + // 0 1 2 3 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{ + "a", Variant::mapType{ + {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); + + // Check the locations (I'll do this one time and then just assume it works) + ASSERT_EQ(1U, events[0].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(2U, events[0].second.asArray()[0].getLocation().getEnd()); + ASSERT_EQ( + 9U, + events[0].second.asArray()[1].asMap()["name"].getLocation().getStart()); + ASSERT_EQ( + 13U, + events[0].second.asArray()[1].asMap()["name"].getLocation().getEnd()); + ASSERT_EQ( + 18U, + events[0].second.asArray()[1].asMap()["a"].getLocation().getStart()); + ASSERT_EQ( + 19U, events[0].second.asArray()[1].asMap()["a"].getLocation().getEnd()); + ASSERT_EQ( + 24U, + events[0].second.asArray()[1].asMap()["b"].getLocation().getStart()); + ASSERT_EQ( + 25U, events[0].second.asArray()[1].asMap()["b"].getLocation().getEnd()); + ASSERT_EQ( + 30U, + events[0].second.asArray()[1].asMap()["c"].getLocation().getStart()); + ASSERT_EQ( + 34U, events[0].second.asArray()[1].asMap()["c"].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, magicTopLevelTag) +{ + const char *testString = ""; + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"b", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, magicTopLevelTagInside) +{ + const char *testString = ""; + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"ousia", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) +{ + const char *testString = " hello \n world "; + // 012345678901 234567890123 + // 0 1 2 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{" hello \n world "}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::PRESERVE); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataTrimWhitespace) +{ + const char *testString = " hello \n world "; + // 012345678901 234567890123 + // 0 1 2 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{"hello \n world"}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::TRIM); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataCollapseWhitespace) +{ + const char *testString = " hello \n world "; + // 012345678901 234567890123 + // 0 1 2 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{"hello world"}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::COLLAPSE); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +} + -- cgit v1.2.3 From 9b4cdfabf6527440d6ffa499cc6b57a44daaeadb Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:05:42 +0100 Subject: Added code for the handling of explicit default fields and improved unit tests --- CMakeLists.txt | 16 +- src/formats/osml/OsmlStreamParser.cpp | 78 +++++-- src/formats/osml/OsmlStreamParser.hpp | 45 +++- test/formats/osml/OsmlStreamParserTest.cpp | 340 +++++++++++++++++------------ 4 files changed, 302 insertions(+), 177 deletions(-) (limited to 'test/formats/osml/OsmlStreamParserTest.cpp') diff --git a/CMakeLists.txt b/CMakeLists.txt index bdc9541..d311f7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -290,15 +290,15 @@ IF(TEST) ousia_core ) -# ADD_EXECUTABLE(ousia_test_filesystem -# test/plugins/filesystem/FileLocatorTest -# ) + ADD_EXECUTABLE(ousia_test_filesystem + test/plugins/filesystem/FileLocatorTest + ) -# TARGET_LINK_LIBRARIES(ousia_test_filesystem -# ${GTEST_LIBRARIES} -# ousia_core -# ousia_filesystem -# ) + TARGET_LINK_LIBRARIES(ousia_test_filesystem + ${GTEST_LIBRARIES} + ousia_core + ousia_filesystem + ) # ADD_EXECUTABLE(ousia_test_css # test/plugins/css/Tokenizer diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp index 6b00eef..6606120 100644 --- a/src/formats/osml/OsmlStreamParser.cpp +++ b/src/formats/osml/OsmlStreamParser.cpp @@ -60,6 +60,11 @@ public: */ TokenTypeId FieldEnd; + /** + * Id of the default field start token. + */ + TokenTypeId DefaultFieldStart; + /** * Registers the plain format tokens in the internal tokenizer. */ @@ -71,6 +76,7 @@ public: BlockCommentEnd = registerToken("}%"); FieldStart = registerToken("{"); FieldEnd = registerToken("}"); + DefaultFieldStart = registerToken("{!"); } }; @@ -164,7 +170,7 @@ OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger) : reader(reader), logger(logger), tokenizer(Tokens) { // Place an intial command representing the complete file on the stack - commands.push(Command{"", Variant::mapType{}, true, true, true}); + commands.push(Command{"", Variant::mapType{}, true, true, true, false}); } Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep) @@ -365,7 +371,7 @@ void OsmlStreamParser::pushCommand(Variant commandName, commands.pop(); } commands.push(Command{std::move(commandName), std::move(commandArguments), - hasRange, false, false}); + hasRange, false, false, false}); } OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start) @@ -482,6 +488,29 @@ bool OsmlStreamParser::checkIssueFieldStart() return false; } +bool OsmlStreamParser::closeField() +{ + // Try to end an open field of the current command -- if the current command + // is not inside an open field, end this command and try to close the next + // one + for (int i = 0; i < 2 && commands.size() > 1; i++) { + Command &cmd = commands.top(); + if (!cmd.inRangeField) { + if (cmd.inField) { + cmd.inField = false; + if (cmd.inDefaultField) { + commands.pop(); + } + return true; + } + commands.pop(); + } else { + return false; + } + } + return false; +} + OsmlStreamParser::State OsmlStreamParser::parse() { // Handler for incomming data @@ -579,27 +608,29 @@ OsmlStreamParser::State OsmlStreamParser::parse() } logger.error( "Got field start token \"{\", but no command for which to " - "start the field. Did you mean \"\\{\"?", + "start the field. Write \"\\{\" to insert this sequence as " + "text.", token); } else if (token.type == Tokens.FieldEnd) { - // Try to end an open field of the current command -- if the current - // command is not inside an open field, end this command and try to - // close the next one - for (int i = 0; i < 2 && commands.size() > 1; i++) { - Command &cmd = commands.top(); - if (!cmd.inRangeField) { - if (cmd.inField) { - cmd.inField = false; - return State::FIELD_END; - } - commands.pop(); - } else { - break; - } + if (closeField()) { + return State::FIELD_END; + } + logger.error( + "Got field end token \"}\", but there is no field to end. " + "Write \"\\}\" to insert this sequence as text.", + token); + } else if (token.type == Tokens.DefaultFieldStart) { + // Try to start a default field the first time the token is reached + Command &topCmd = commands.top(); + if (!topCmd.inField) { + topCmd.inField = true; + topCmd.inDefaultField = true; + return State::FIELD_START; } logger.error( - "Got field end token \"}\", but there is no field to end. Did " - "you mean \"\\}\"?", + "Got default field start token \"{!\", but no command for " + "which to start the field. Write \"\\{!\" to insert this " + "sequence as text", token); } else { logger.error("Unexpected token \"" + token.content + "\"", token); @@ -627,14 +658,19 @@ OsmlStreamParser::State OsmlStreamParser::parse() return State::END; } -const Variant &OsmlStreamParser::getCommandName() +const Variant &OsmlStreamParser::getCommandName() const { return commands.top().name; } -const Variant &OsmlStreamParser::getCommandArguments() +const Variant &OsmlStreamParser::getCommandArguments() const { return commands.top().arguments; } + +bool OsmlStreamParser::inDefaultField() const +{ + return commands.top().inRangeField || commands.top().inDefaultField; +} } diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp index 1508012..bb5db65 100644 --- a/src/formats/osml/OsmlStreamParser.hpp +++ b/src/formats/osml/OsmlStreamParser.hpp @@ -152,10 +152,16 @@ public: */ bool inRangeField; + /** + * Set to true if we are currently in a field that has been especially + * marked as default field (using the "|") syntax. + */ + bool inDefaultField; + /** * Default constructor. */ - Command() : hasRange(false), inField(false), inRangeField(false) {} + Command() : hasRange(false), inField(false), inRangeField(false), inDefaultField() {} /** * Constructor of the Command class. @@ -168,16 +174,19 @@ public: * explicit range. * @param inField is set to true if we currently are inside a field * of this command. - * @param inRangeField is set to true if we currently inside the outer - * field of the command. + * @param inRangeField is set to true if we currently are inside the + * outer field of a ranged command. + * @param inDefaultField is set to true if we currently are in a + * specially marked default field. */ Command(Variant name, Variant arguments, bool hasRange, bool inField, - bool inRangeField) + bool inRangeField, bool inDefaultField) : name(std::move(name)), arguments(std::move(arguments)), hasRange(hasRange), inField(inField), - inRangeField(inRangeField) + inRangeField(inRangeField), + inDefaultField(inDefaultField) { } }; @@ -289,6 +298,16 @@ private: */ bool checkIssueFieldStart(); + /** + * Closes a currently open field. Note that the command will be removed from + * the internal command stack if the field that is being closed is a + * field marked as default field. + * + * @return true if the field could be closed, false if there was no field + * to close. + */ + bool closeField(); + public: /** * Constructor of the OsmlStreamParser class. Attaches the new @@ -317,7 +336,7 @@ public: * @return a reference at a variant containing the data parsed by the * "parse" function. */ - const Variant &getData() { return data; } + const Variant &getData() const { return data; } /** * Returns a reference at the internally stored command name. Only valid if @@ -326,7 +345,7 @@ public: * @return a reference at a variant containing name and location of the * parsed command. */ - const Variant &getCommandName(); + const Variant &getCommandName() const; /** * Returns a reference at the internally stored command name. Only valid if @@ -335,14 +354,22 @@ public: * @return a reference at a variant containing arguments given to the * command. */ - const Variant &getCommandArguments(); + const Variant &getCommandArguments() const; + + /** + * Returns true if the current field is the "default" field. This is true if + * the parser either is in the outer range of a range command or inside a + * field that has been especially marked as "default" field (using the "|" + * syntax). + */ + bool inDefaultField() const; /** * Returns a reference at the char reader. * * @return the last internal token location. */ - SourceLocation &getLocation() { return location; } + const SourceLocation &getLocation() const { return location; } }; } diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index b944af8..da9fe8a 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -28,7 +28,88 @@ namespace ousia { static TerminalLogger logger(std::cerr, true); -//static ConcreteLogger logger; +// static ConcreteLogger logger; + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + EXPECT_EQ(name, reader.getCommandName().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + assertCommand(reader, name, start, end); + EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertData(OsmlStreamParser &reader, const std::string &data, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + EXPECT_EQ(data, reader.getData().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getData().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getData().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldStart(OsmlStreamParser &reader, bool defaultField, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); + EXPECT_EQ(defaultField, reader.inDefaultField()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} TEST(OsmlStreamParser, empty) { @@ -47,12 +128,7 @@ TEST(OsmlStreamParser, oneCharacter) OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); + assertData(reader, "a", 0, 1); } TEST(OsmlStreamParser, whitespaceElimination) @@ -64,12 +140,7 @@ TEST(OsmlStreamParser, whitespaceElimination) OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(14U, loc.getEnd()); + assertData(reader, "hello world", 1, 14); } TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) @@ -81,13 +152,7 @@ TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(14U, loc.getEnd()); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertData(reader, "hello world", 1, 14); } TEST(OsmlStreamParser, escapeWhitespace) @@ -99,13 +164,7 @@ TEST(OsmlStreamParser, escapeWhitespace) OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(15U, loc.getEnd()); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertData(reader, "hello world", 1, 15); } static void testEscapeSpecialCharacter(const std::string &c) @@ -127,6 +186,7 @@ TEST(OsmlStreamParser, escapeSpecialCharacters) testEscapeSpecialCharacter("}"); testEscapeSpecialCharacter("<"); testEscapeSpecialCharacter(">"); + testEscapeSpecialCharacter("|"); } TEST(OsmlStreamParser, simpleSingleLineComment) @@ -347,86 +407,6 @@ TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -static void assertCommand(OsmlStreamParser &reader, const std::string &name, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - EXPECT_EQ(name, reader.getCommandName().asString()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertCommand(OsmlStreamParser &reader, const std::string &name, - const Variant::mapType &args, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - assertCommand(reader, name, start, end); - EXPECT_EQ(args, reader.getCommandArguments()); -} - -static void assertData(OsmlStreamParser &reader, const std::string &data, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - EXPECT_EQ(data, reader.getData().asString()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getData().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getData().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertFieldStart(OsmlStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertFieldEnd(OsmlStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertEnd(OsmlStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - TEST(OsmlStreamParser, fields) { const char *testString = "\\test{a}{b}{c}"; @@ -436,15 +416,15 @@ TEST(OsmlStreamParser, fields) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); assertData(reader, "b", 9, 10); assertFieldEnd(reader, 10, 11); - assertFieldStart(reader, 11, 12); + assertFieldStart(reader, false, 11, 12); assertData(reader, "c", 12, 13); assertFieldEnd(reader, 13, 14); assertEnd(reader, 14, 14); @@ -459,11 +439,11 @@ TEST(OsmlStreamParser, dataOutsideField) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); assertData(reader, "b", 9, 10); assertFieldEnd(reader, 10, 11); @@ -481,14 +461,14 @@ TEST(OsmlStreamParser, nestedCommand) assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); { assertCommand(reader, "test2", 9, 15); - assertFieldStart(reader, 15, 16); + assertFieldStart(reader, false, 15, 16); assertData(reader, "b", 16, 17); assertFieldEnd(reader, 17, 18); } @@ -507,10 +487,10 @@ TEST(OsmlStreamParser, nestedCommandImmediateEnd) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); { assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertData(reader, "b", 13, 14); assertFieldEnd(reader, 14, 15); } @@ -527,7 +507,7 @@ TEST(OsmlStreamParser, nestedCommandNoData) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); assertFieldEnd(reader, 12, 13); assertEnd(reader, 13, 13); @@ -557,11 +537,11 @@ TEST(OsmlStreamParser, fieldsWithSpaces) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, 3, 4); + assertFieldStart(reader, false, 3, 4); assertCommand(reader, "b", 4, 6); assertCommand(reader, "c", 7, 9); assertFieldEnd(reader, 9, 10); - assertFieldStart(reader, 16, 17); + assertFieldStart(reader, false, 16, 17); assertCommand(reader, "d", 17, 19); assertFieldEnd(reader, 19, 20); assertEnd(reader, 20, 20); @@ -612,9 +592,9 @@ TEST(OsmlStreamParser, errorNoFieldEndNested) logger.reset(); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertFieldEnd(reader, 13, 14); assertFieldEnd(reader, 14, 15); ASSERT_FALSE(logger.hasError()); @@ -633,9 +613,9 @@ TEST(OsmlStreamParser, errorNoFieldEndNestedData) logger.reset(); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertFieldEnd(reader, 13, 14); assertFieldEnd(reader, 14, 15); assertData(reader, "a", 15, 16); @@ -654,7 +634,7 @@ TEST(OsmlStreamParser, beginEnd) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertFieldEnd(reader, 17, 21); assertEnd(reader, 22, 22); } @@ -669,7 +649,7 @@ TEST(OsmlStreamParser, beginEndWithName) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", {{"name", "a"}}, 7, 11); - assertFieldStart(reader, 14, 15); + assertFieldStart(reader, true, 14, 15); assertFieldEnd(reader, 19, 23); assertEnd(reader, 24, 24); } @@ -685,7 +665,7 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgs) assertCommand(reader, "book", {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, 32, 33); + assertFieldStart(reader, true, 32, 33); assertFieldEnd(reader, 37, 41); assertEnd(reader, 42, 42); } @@ -702,17 +682,17 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields) assertCommand(reader, "book", {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, 32, 33); + assertFieldStart(reader, false, 32, 33); assertData(reader, "a", 33, 34); assertCommand(reader, "test", Variant::mapType{}, 35, 40); assertFieldEnd(reader, 40, 41); - assertFieldStart(reader, 41, 42); + assertFieldStart(reader, false, 41, 42); assertData(reader, "b", 42, 43); assertCommand(reader, "test", Variant::mapType{}, 44, 49); - assertFieldStart(reader, 49, 50); + assertFieldStart(reader, false, 49, 50); assertFieldEnd(reader, 50, 51); assertFieldEnd(reader, 51, 52); - assertFieldStart(reader, 52, 53); + assertFieldStart(reader, true, 52, 53); assertFieldEnd(reader, 57, 61); assertEnd(reader, 62, 62); } @@ -727,12 +707,45 @@ TEST(OsmlStreamParser, beginEndWithData) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertData(reader, "a", 12, 13); assertFieldEnd(reader, 18, 22); assertEnd(reader, 23, 23); } +TEST(OsmlStreamParser, beginEndNested) +{ + const char *testString = + "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}"; + // 012345678901234 5678901234567890 123456 7890123 4567890 + // 0 1 2 3 4 5 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 7, 8); + assertFieldStart(reader, false, 9, 10); + assertData(reader, "b", 10, 11); + assertFieldEnd(reader, 11, 12); + assertFieldStart(reader, true, 13, 14); + assertData(reader, "c", 13, 14); + assertCommand(reader, "d", 22, 23); + assertFieldStart(reader, false, 24, 25); + assertData(reader, "e", 25, 26); + assertFieldEnd(reader, 26, 27); + assertFieldStart(reader, false, 27, 28); + assertData(reader, "f", 28, 29); + assertFieldEnd(reader, 29, 30); + assertFieldStart(reader, true, 31, 32); + assertCommand(reader, "g", 31, 33); + assertFieldStart(reader, false, 33, 34); + assertData(reader, "h", 34, 35); + assertFieldEnd(reader, 35, 36); + assertFieldEnd(reader, 42, 43); + assertFieldEnd(reader, 49, 50); + assertEnd(reader, 51, 51); +} + TEST(OsmlStreamParser, beginEndWithCommand) { const char *testString = "\\begin{book}\\a{test}\\end{book}"; @@ -743,9 +756,9 @@ TEST(OsmlStreamParser, beginEndWithCommand) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertCommand(reader, "a", 12, 14); - assertFieldStart(reader, 14, 15); + assertFieldStart(reader, false, 14, 15); assertData(reader, "test", 15, 19); assertFieldEnd(reader, 19, 20); assertFieldEnd(reader, 25, 29); @@ -873,9 +886,9 @@ TEST(OsmlStreamParser, errorBeginEndMismatch) logger.reset(); assertCommand(reader, "a", 7, 8); - assertFieldStart(reader, 10, 11); + assertFieldStart(reader, true, 10, 11); assertCommand(reader, "b", 17, 18); - assertFieldStart(reader, 20, 24); + assertFieldStart(reader, true, 20, 24); assertData(reader, "test", 20, 24); ASSERT_FALSE(logger.hasError()); ASSERT_THROW(reader.parse(), LoggableException); @@ -904,7 +917,7 @@ TEST(OsmlStreamParser, beginEndWithNSSep) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test1:test2", 7, 18); - assertFieldStart(reader, 19, 20); + assertFieldStart(reader, true, 19, 20); assertFieldEnd(reader, 24, 35); assertEnd(reader, 36, 36); } @@ -920,7 +933,7 @@ TEST(OsmlStreamParser, errorBeginNSSep) ASSERT_FALSE(logger.hasError()); assertCommand(reader, "blub"); ASSERT_TRUE(logger.hasError()); - assertFieldStart(reader); + assertFieldStart(reader, true); assertFieldEnd(reader); assertEnd(reader); } @@ -934,7 +947,7 @@ TEST(OsmlStreamParser, errorEndNSSep) logger.reset(); assertCommand(reader, "blub"); - assertFieldStart(reader); + assertFieldStart(reader, true); ASSERT_FALSE(logger.hasError()); assertFieldEnd(reader); ASSERT_TRUE(logger.hasError()); @@ -970,5 +983,54 @@ TEST(OsmlStreamParser, errorRepeatedNs) assertData(reader, "::"); assertEnd(reader); } + +TEST(OsmlStreamParser, explicitDefaultField) +{ + const char *testString = "\\a{!b}c"; + // 01234567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertData(reader, "b", 4, 5); + assertFieldEnd(reader, 5, 6); + assertData(reader, "c", 6, 7); + assertEnd(reader, 7, 7); +} + +TEST(OsmlStreamParser, explicitDefaultFieldWithCommand) +{ + const char *testString = "\\a{!\\b}c"; + // 0123 4567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertCommand(reader, "b", 4, 6); + assertFieldEnd(reader, 6, 7); + assertData(reader, "c", 7, 8); + assertEnd(reader, 8, 8); +} + +TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField) +{ + const char *testString = "\\a{!\\b}{c}"; + // 0123 4567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertCommand(reader, "b", 4, 6); + assertFieldEnd(reader, 6, 7); + assertData(reader, "c", 7, 8); + assertEnd(reader, 8, 8); +} + } -- cgit v1.2.3 From 856fa8298d55c07313d9638d9f8b8c0913202b2c Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:06:05 +0100 Subject: Fixed forgotten unit test --- test/formats/osml/OsmlStreamParserTest.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'test/formats/osml/OsmlStreamParserTest.cpp') diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index da9fe8a..5f23822 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -1019,17 +1019,20 @@ TEST(OsmlStreamParser, explicitDefaultFieldWithCommand) TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField) { const char *testString = "\\a{!\\b}{c}"; - // 0123 4567 + // 0123 456789 CharReader charReader(testString); OsmlStreamParser reader(charReader, logger); + logger.reset(); assertCommand(reader, "a", 0, 2); assertFieldStart(reader, true, 2, 4); assertCommand(reader, "b", 4, 6); assertFieldEnd(reader, 6, 7); - assertData(reader, "c", 7, 8); - assertEnd(reader, 8, 8); + ASSERT_FALSE(logger.hasError()); + assertData(reader, "c", 8, 9); + ASSERT_TRUE(logger.hasError()); + assertEnd(reader, 10, 10); } } -- cgit v1.2.3 From 205810b44c980998958dcd857c2cb34a914dc760 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Thu, 12 Feb 2015 16:21:36 +0100 Subject: Implemented annotation start and end field --- contrib/test.osdm | 29 ---- contrib/test.osml | 29 ++++ src/formats/osml/OsmlStreamParser.cpp | 116 ++++++++++++--- src/formats/osml/OsmlStreamParser.hpp | 16 +- test/formats/osml/OsmlStreamParserTest.cpp | 228 ++++++++++++++++++++++++++++- 5 files changed, 363 insertions(+), 55 deletions(-) delete mode 100644 contrib/test.osdm create mode 100644 contrib/test.osml (limited to 'test/formats/osml/OsmlStreamParserTest.cpp') diff --git a/contrib/test.osdm b/contrib/test.osdm deleted file mode 100644 index 100bc77..0000000 --- a/contrib/test.osdm +++ /dev/null @@ -1,29 +0,0 @@ -%{ - We're currently inside a block comment. - %{ - Note that block comments can be nested, easily allowing you to comment - out blocks which already contain comments. - }% -}% - -% Well, line comments, as we know them from TeX also work - -\import{meta} -\import{book} - -\domain#special_words{ - \struct#latex - \struct#ousia -} - -\book{ - \include{chapters/chapter1} - \include{chapters/chapter2} - - \begin{note}{Behaviour of "Include"} - Analogous to the `include` command in \latex, \ousia forces the included - file to be *complete* in a sense, that it must not have dangling open - commands. - \end{note} -} - diff --git a/contrib/test.osml b/contrib/test.osml new file mode 100644 index 0000000..100bc77 --- /dev/null +++ b/contrib/test.osml @@ -0,0 +1,29 @@ +%{ + We're currently inside a block comment. + %{ + Note that block comments can be nested, easily allowing you to comment + out blocks which already contain comments. + }% +}% + +% Well, line comments, as we know them from TeX also work + +\import{meta} +\import{book} + +\domain#special_words{ + \struct#latex + \struct#ousia +} + +\book{ + \include{chapters/chapter1} + \include{chapters/chapter2} + + \begin{note}{Behaviour of "Include"} + Analogous to the `include` command in \latex, \ousia forces the included + file to be *complete* in a sense, that it must not have dangling open + commands. + \end{note} +} + diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp index 6606120..0174fa4 100644 --- a/src/formats/osml/OsmlStreamParser.cpp +++ b/src/formats/osml/OsmlStreamParser.cpp @@ -65,6 +65,16 @@ public: */ TokenTypeId DefaultFieldStart; + /** + * Id of the annotation start token. + */ + TokenTypeId AnnotationStart; + + /** + * Id of the annotation end token. + */ + TokenTypeId AnnotationEnd; + /** * Registers the plain format tokens in the internal tokenizer. */ @@ -77,6 +87,8 @@ public: FieldStart = registerToken("{"); FieldEnd = registerToken("}"); DefaultFieldStart = registerToken("{!"); + AnnotationStart = registerToken("<\\"); + AnnotationEnd = registerToken("\\>"); } }; @@ -374,7 +386,8 @@ void OsmlStreamParser::pushCommand(Variant commandName, hasRange, false, false, false}); } -OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start) +OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start, + bool isAnnotation) { // Parse the commandName as a first identifier Variant commandName = parseIdentifier(start, true); @@ -388,6 +401,9 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start) Utils::split(commandName.asString(), ':'); const bool isBegin = commandNameComponents[0] == "begin"; const bool isEnd = commandNameComponents[0] == "end"; + + // Parse the begin or end command + State res = State::COMMAND; if (isBegin || isEnd) { if (commandNameComponents.size() > 1) { logger.error( @@ -396,30 +412,76 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start) commandName); } if (isBegin) { - return parseBeginCommand(); + res = parseBeginCommand(); } else if (isEnd) { - return parseEndCommand(); + res = parseEndCommand(); + } + } else { + // Check whether the next character is a '#', indicating the start of + // the command name + Variant commandArgName; + start = reader.getOffset(); + if (reader.expect('#')) { + commandArgName = parseIdentifier(start); + if (commandArgName.asString().empty()) { + logger.error("Expected identifier after \"#\"", commandArgName); + } } + + // Parse the arugments + Variant commandArguments = + parseCommandArguments(std::move(commandArgName)); + + // Push the command onto the command stack + pushCommand(std::move(commandName), std::move(commandArguments), false); } - // Check whether the next character is a '#', indicating the start of the - // command name - Variant commandArgName; - start = reader.getOffset(); - if (reader.expect('#')) { - commandArgName = parseIdentifier(start); - if (commandArgName.asString().empty()) { - logger.error("Expected identifier after \"#\"", commandArgName); + // Check whether a ">" character is the next character that is to be read. + // In that case the current command could be an annotation end command! + char c; + if (reader.fetch(c) && c == '>') { + // Ignore the character after a begin or end command + if (isBegin || isEnd) { + logger.warning( + "Ignoring annotation end character \">\" after special " + "commands \"begin\" or \"end\". Write \"\\>\" to end a " + "\"begin\"/\"end\" enclosed annotation.", + reader); + return res; } - } - // Parse the arugments - Variant commandArguments = parseCommandArguments(std::move(commandArgName)); + // If this should be an annoation, ignore the character + if (isAnnotation) { + logger.warning( + "Ignoring annotation end character \">\" after annotation " + "start command. Write \"\\>\" to end the annotation.", + reader); + } else { + // Make sure no arguments apart from the "name" argument are given + // to an annotation end + Variant::mapType &map = commands.top().arguments.asMap(); + if (!map.empty()) { + if (map.count("name") == 0 || map.size() > 1U) { + logger.error( + "An annotation end command may not have any arguments " + "other than \"name\""); + return res; + } + } - // Push the command onto the command stack - pushCommand(std::move(commandName), std::move(commandArguments), false); + // If we got here, this is a valid ANNOTATION_END command, issue it + reader.peek(c); + reader.consumePeek(); + return State::ANNOTATION_END; + } + } - return State::COMMAND; + // If we're starting an annotation, return the command as annotation start + // instead of command + if (isAnnotation && res == State::COMMAND) { + return State::ANNOTATION_START; + } + return res; } void OsmlStreamParser::parseBlockComment() @@ -522,7 +584,7 @@ OsmlStreamParser::State OsmlStreamParser::parse() const TokenTypeId type = token.type; // Special handling for Backslash and Text - if (type == Tokens.Backslash) { + if (type == Tokens.Backslash || type == Tokens.AnnotationStart) { // Before appending anything to the output data or starting a new // command, check whether FIELD_START has to be issued, as the // current command is a command with range @@ -548,7 +610,8 @@ OsmlStreamParser::State OsmlStreamParser::parse() } // Parse the actual command - State res = parseCommand(token.location.getStart()); + State res = parseCommand(token.location.getStart(), + type == Tokens.AnnotationStart); switch (res) { case State::ERROR: throw LoggableException( @@ -565,6 +628,14 @@ OsmlStreamParser::State OsmlStreamParser::parse() // to the data buffer, use the escape character start as start // location and the peek offset as end location reader.peek(c); // Peek the previously fetched character + + // If this was an annotation start token, add the parsed < to the + // output + if (type == Tokens.AnnotationStart) { + handler.append('<', token.location.getStart(), + token.location.getStart() + 1); + } + handler.append(c, token.location.getStart(), reader.getPeekOffset()); reader.consumePeek(); @@ -632,6 +703,13 @@ OsmlStreamParser::State OsmlStreamParser::parse() "which to start the field. Write \"\\{!\" to insert this " "sequence as text", token); + } else if (token.type == Tokens.AnnotationEnd) { + // We got a single annotation end token "\>" -- simply issue the + // ANNOTATION_END event + Variant annotationName = Variant::fromString(""); + annotationName.setLocation(token.location); + pushCommand(annotationName, Variant::mapType{}, false); + return State::ANNOTATION_END; } else { logger.error("Unexpected token \"" + token.content + "\"", token); } diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp index bb5db65..3827118 100644 --- a/src/formats/osml/OsmlStreamParser.hpp +++ b/src/formats/osml/OsmlStreamParser.hpp @@ -161,7 +161,13 @@ public: /** * Default constructor. */ - Command() : hasRange(false), inField(false), inRangeField(false), inDefaultField() {} + Command() + : hasRange(false), + inField(false), + inRangeField(false), + inDefaultField() + { + } /** * Constructor of the Command class. @@ -179,8 +185,8 @@ public: * @param inDefaultField is set to true if we currently are in a * specially marked default field. */ - Command(Variant name, Variant arguments, bool hasRange, bool inField, - bool inRangeField, bool inDefaultField) + Command(Variant name, Variant arguments, bool hasRange, + bool inField, bool inRangeField, bool inDefaultField) : name(std::move(name)), arguments(std::move(arguments)), hasRange(hasRange), @@ -266,9 +272,11 @@ private: * * @param start is the start byte offset of the command (including the * backslash) + * @param isAnnotation if true, the command is not returned as command, but + * as annotation start. * @return true if a command was actuall parsed, false otherwise. */ - State parseCommand(size_t start); + State parseCommand(size_t start, bool isAnnotation); /** * Function used internally to parse a block comment. diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index 5f23822..d52fa5b 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -98,6 +98,56 @@ static void assertFieldEnd(OsmlStreamParser &reader, } } +static void assertAnnotationStart(OsmlStreamParser &reader, + const std::string &name, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse()); + EXPECT_EQ(name, reader.getCommandName().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertAnnotationStart(OsmlStreamParser &reader, + const std::string &name, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + assertAnnotationStart(reader, name, start, end); + EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertAnnotationEnd(OsmlStreamParser &reader, + const std::string &name, + const std::string &elementName, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse()); + ASSERT_EQ(name, reader.getCommandName().asString()); + if (!elementName.empty()) { + ASSERT_EQ(1U, reader.getCommandArguments().asMap().size()); + ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name")); + + auto it = reader.getCommandArguments().asMap().find("name"); + ASSERT_EQ(elementName, it->second.asString()); + } + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + static void assertEnd(OsmlStreamParser &reader, SourceOffset start = InvalidSourceOffset, SourceOffset end = InvalidSourceOffset) @@ -184,9 +234,6 @@ TEST(OsmlStreamParser, escapeSpecialCharacters) testEscapeSpecialCharacter("\\"); testEscapeSpecialCharacter("{"); testEscapeSpecialCharacter("}"); - testEscapeSpecialCharacter("<"); - testEscapeSpecialCharacter(">"); - testEscapeSpecialCharacter("|"); } TEST(OsmlStreamParser, simpleSingleLineComment) @@ -1035,5 +1082,180 @@ TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField) assertEnd(reader, 10, 10); } +TEST(OsmlStreamParser, annotationStart) +{ + const char *testString = "<\\a"; + // 0 12 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); + assertEnd(reader, 3, 3); +} + +TEST(OsmlStreamParser, annotationStartWithName) +{ + const char *testString = "<\\annotationWithName#aName"; + // 0 1234567890123456789012345 + // 0 1 2 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart(reader, "annotationWithName", + Variant::mapType{{"name", "aName"}}, 0, 20); + assertEnd(reader, 26, 26); +} + +TEST(OsmlStreamParser, annotationStartWithArguments) +{ + const char *testString = "<\\annotationWithName#aName[a=1,b=2]"; + // 0 1234567890123456789012345678901234 + // 0 1 2 3 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart( + reader, "annotationWithName", + Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20); + assertEnd(reader, 35, 35); +} + +TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd) +{ + const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>"; + // 0 123456789012345678901234567 89012345 67 + // 0 1 2 3 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart( + reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8, + 10); + assertFieldStart(reader, true, 26, 27); + assertData(reader, "a", 26, 27); + assertFieldEnd(reader, 33, 35); + assertAnnotationEnd(reader, "", "", 36, 38); + assertEnd(reader, 38, 38); +} + +TEST(OsmlStreamParser, annotationEnd) +{ + const char *testString = "\\a>"; + // 012 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationEnd(reader, "a", "", 0, 2); + assertEnd(reader, 3, 3); +} + +TEST(OsmlStreamParser, annotationEndWithName) +{ + const char *testString = "\\a#name>"; + // 01234567 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationEnd(reader, "a", "name", 0, 2); + assertEnd(reader, 8, 8); +} + +TEST(OsmlStreamParser, annotationEndWithNameAsArgs) +{ + const char *testString = "\\a[name=name]>"; + // 01234567890123 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationEnd(reader, "a", "name", 0, 2); + assertEnd(reader, 14, 14); +} + +TEST(OsmlStreamParser, errorAnnotationEndWithArguments) +{ + const char *testString = "\\a[foo=bar]>"; + // 012345678901 + // 0 1 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2); + ASSERT_TRUE(logger.hasError()); + assertData(reader, ">", 11, 12); + assertEnd(reader, 12, 12); +} + +TEST(OsmlStreamParser, closingAnnotation) +{ + const char *testString = "<\\a>"; + // 0 123 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); + assertData(reader, ">", 3, 4); + assertEnd(reader, 4, 4); +} + +TEST(OsmlStreamParser, annotationWithFields) +{ + const char *testString = "a <\\b{c}{d}{!e} f \\> g"; + // 012 345678901234567 8901 + // 0 1 2 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertData(reader, "a", 0, 1); + assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5); + assertFieldStart(reader, false, 5, 6); + assertData(reader, "c", 6, 7); + assertFieldEnd(reader, 7, 8); + assertFieldStart(reader, false, 8, 9); + assertData(reader, "d", 9, 10); + assertFieldEnd(reader, 10, 11); + assertFieldStart(reader, true, 11, 13); + assertData(reader, "e", 13, 14); + assertFieldEnd(reader, 14, 15); + assertData(reader, "f", 16, 17); + assertAnnotationEnd(reader, "", "", 18, 20); + assertData(reader, "g", 21, 22); + assertEnd(reader, 22, 22); +} + +TEST(OsmlStreamParser, annotationStartEscape) +{ + const char *testString = "<\\%test"; + // 0 123456 + // 0 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertData(reader, "<%test", 0, 7); + assertEnd(reader, 7, 7); +} } -- cgit v1.2.3