From c771577b9c7c7a3c1b019139ed132101add73cf9 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:42:22 +0100 Subject: Disabled css --- CMakeLists.txt | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index 67cacce..4458d1b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,9 +115,7 @@ ADD_DEFINITIONS( ) ADD_LIBRARY(ousia_core - src/core/CodeTokenizer src/core/Registry - src/core/Tokenizer src/core/XML src/core/RangeSet src/core/common/Argument @@ -136,6 +134,7 @@ ADD_LIBRARY(ousia_core src/core/common/VariantConverter src/core/common/VariantReader src/core/common/VariantWriter + src/core/common/Whitespace src/core/frontend/Terminal src/core/frontend/TerminalLogger src/core/managed/Events @@ -152,8 +151,8 @@ ADD_LIBRARY(ousia_core src/core/parser/Parser src/core/parser/ParserContext src/core/parser/ParserScope - src/core/parser/ParserStack - src/core/parser/ParserState + src/core/parser/generic/ParserStateStack + src/core/parser/generic/ParserState src/core/resource/Resource src/core/resource/ResourceLocator src/core/resource/ResourceManager @@ -161,13 +160,15 @@ ADD_LIBRARY(ousia_core # src/core/script/ScriptEngine ) -ADD_LIBRARY(ousia_css - src/plugins/css/CSSParser -) +#ADD_LIBRARY(ousia_css +# src/plugins/css/CodeTokenizer +# src/plugins/css/Tokenizer +# src/plugins/css/CSSParser +#) -TARGET_LINK_LIBRARIES(ousia_css - ousia_core -) +#TARGET_LINK_LIBRARIES(ousia_css +# ousia_core +#) ADD_LIBRARY(ousia_filesystem src/plugins/filesystem/FileLocator @@ -224,7 +225,6 @@ ADD_EXECUTABLE(ousia TARGET_LINK_LIBRARIES(ousia ousia_core - ousia_css ousia_filesystem ousia_html ousia_xml @@ -257,6 +257,7 @@ IF(TEST) test/core/common/VariantWriterTest test/core/common/VariantTest test/core/common/UtilsTest + test/core/common/WhitespaceTest test/core/frontend/TerminalLoggerTest test/core/managed/ManagedContainerTest test/core/managed/ManagedTest @@ -293,15 +294,17 @@ IF(TEST) ousia_filesystem ) - ADD_EXECUTABLE(ousia_test_css - test/plugins/css/CSSParserTest - ) +# ADD_EXECUTABLE(ousia_test_css +# test/plugins/css/Tokenizer +# test/plugins/css/CodeTokenizerTest +# test/plugins/css/CSSParserTest +# ) - TARGET_LINK_LIBRARIES(ousia_test_css - ${GTEST_LIBRARIES} - ousia_core - ousia_css - ) +# TARGET_LINK_LIBRARIES(ousia_test_css +# ${GTEST_LIBRARIES} +# ousia_core +# ousia_css +# ) ADD_EXECUTABLE(ousia_test_html test/plugins/html/DemoOutputTest @@ -349,7 +352,7 @@ IF(TEST) # Register the unit tests ADD_TEST(ousia_test_core ousia_test_core) ADD_TEST(ousia_test_filesystem ousia_test_filesystem) - ADD_TEST(ousia_test_css ousia_test_css) +# ADD_TEST(ousia_test_css ousia_test_css) ADD_TEST(ousia_test_html ousia_test_html) ADD_TEST(ousia_test_xml ousia_test_xml) ADD_TEST(ousia_test_osdm ousia_test_osdm) -- cgit v1.2.3 From 974afd3fdc54380a43445a180263fb162e1ff2c0 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:00:23 +0100 Subject: Applied renaming to unit tests and added forgotten CMakeLists.txt --- CMakeLists.txt | 177 +++--- test/formats/osml/OsmlStreamParserTest.cpp | 973 +++++++++++++++++++++++++++++ test/formats/osxml/OsxmlParserTest.cpp | 314 ++++++++++ 3 files changed, 1377 insertions(+), 87 deletions(-) create mode 100644 test/formats/osml/OsmlStreamParserTest.cpp create mode 100644 test/formats/osxml/OsxmlParserTest.cpp (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index 4458d1b..6e3b90f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,7 +134,6 @@ ADD_LIBRARY(ousia_core src/core/common/VariantConverter src/core/common/VariantReader src/core/common/VariantWriter - src/core/common/Whitespace src/core/frontend/Terminal src/core/frontend/TerminalLogger src/core/managed/Events @@ -148,11 +147,15 @@ ADD_LIBRARY(ousia_core src/core/model/RootNode src/core/model/Style src/core/model/Typesystem - src/core/parser/Parser - src/core/parser/ParserContext - src/core/parser/ParserScope - src/core/parser/generic/ParserStateStack - src/core/parser/generic/ParserState +# src/core/parser/Parser +# src/core/parser/ParserContext +# src/core/parser/ParserScope +# src/core/parser/generic/ParserState +# src/core/parser/generic/ParserStateCallbacks +# src/core/parser/generic/ParserStateHandler +# src/core/parser/generic/ParserStateStack + src/core/parser/utils/Tokenizer + src/core/parser/utils/TokenTrie src/core/resource/Resource src/core/resource/ResourceLocator src/core/resource/ResourceManager @@ -160,6 +163,8 @@ ADD_LIBRARY(ousia_core # src/core/script/ScriptEngine ) +# Format libraries + #ADD_LIBRARY(ousia_css # src/plugins/css/CodeTokenizer # src/plugins/css/Tokenizer @@ -170,43 +175,44 @@ ADD_LIBRARY(ousia_core # ousia_core #) -ADD_LIBRARY(ousia_filesystem - src/plugins/filesystem/FileLocator - src/plugins/filesystem/SpecialPaths +ADD_LIBRARY(ousia_osml + src/formats/osml/OsmlStreamParser ) -TARGET_LINK_LIBRARIES(ousia_filesystem +TARGET_LINK_LIBRARIES(ousia_osml ousia_core - ${Boost_LIBRARIES} ) -ADD_LIBRARY(ousia_html - src/plugins/html/DemoOutput -) +#ADD_LIBRARY(ousia_osxml +# src/formats/osxml/osxmlParser +#) -TARGET_LINK_LIBRARIES(ousia_html - ousia_core -) +#TARGET_LINK_LIBRARIES(ousia_osxml +# ousia_core +# ${EXPAT_LIBRARIES} +#) -ADD_LIBRARY(ousia_xml - src/plugins/xml/XmlParser -) +# Resource locators -TARGET_LINK_LIBRARIES(ousia_xml - ousia_core - ${EXPAT_LIBRARIES} -) +#ADD_LIBRARY(ousia_filesystem +# src/plugins/filesystem/FileLocator +# src/plugins/filesystem/SpecialPaths +#) -ADD_LIBRARY(ousia_osdm - src/formats/osdm/DynamicTokenizer - src/formats/osdm/TokenTrie - src/formats/osdm/OsdmStreamParser -) +#TARGET_LINK_LIBRARIES(ousia_filesystem +# ousia_core +# ${Boost_LIBRARIES} +#) -TARGET_LINK_LIBRARIES(ousia_osdm - ousia_core -) +# Output libraries +#ADD_LIBRARY(ousia_html +# src/plugins/html/DemoOutput +#) + +#TARGET_LINK_LIBRARIES(ousia_html +# ousia_core +#) #ADD_LIBRARY(ousia_mozjs # src/plugins/mozjs/MozJsScriptEngine @@ -219,17 +225,17 @@ TARGET_LINK_LIBRARIES(ousia_osdm # Command line interface -ADD_EXECUTABLE(ousia - src/cli/Main -) +#ADD_EXECUTABLE(ousia +# src/cli/Main +#) -TARGET_LINK_LIBRARIES(ousia - ousia_core - ousia_filesystem - ousia_html - ousia_xml - ${Boost_LIBRARIES} -) +#TARGET_LINK_LIBRARIES(ousia +# ousia_core +# ousia_filesystem +# ousia_html +# ousia_xml +# ${Boost_LIBRARIES} +#) # If testing is enabled, build the unit tests IF(TEST) @@ -240,10 +246,8 @@ IF(TEST) ) ADD_EXECUTABLE(ousia_test_core - test/core/CodeTokenizerTest test/core/RangeSetTest - test/core/RegistryTest - test/core/TokenizerTest +# test/core/RegistryTest test/core/XMLTest test/core/common/ArgumentTest test/core/common/CharReaderTest @@ -257,7 +261,6 @@ IF(TEST) test/core/common/VariantWriterTest test/core/common/VariantTest test/core/common/UtilsTest - test/core/common/WhitespaceTest test/core/frontend/TerminalLoggerTest test/core/managed/ManagedContainerTest test/core/managed/ManagedTest @@ -269,9 +272,11 @@ IF(TEST) test/core/model/NodeTest test/core/model/StyleTest test/core/model/TypesystemTest - test/core/parser/ParserScopeTest - test/core/parser/ParserStackTest - test/core/parser/ParserStateTest +# test/core/parser/ParserScopeTest +# test/core/parser/ParserStackTest +# test/core/parser/ParserStateTest + test/core/parser/utils/TokenizerTest + test/core/parser/utils/TokenTrieTest test/core/resource/ResourceLocatorTest test/core/resource/ResourceRequestTest # test/core/script/FunctionTest @@ -284,15 +289,15 @@ IF(TEST) ousia_core ) - ADD_EXECUTABLE(ousia_test_filesystem - test/plugins/filesystem/FileLocatorTest - ) +# ADD_EXECUTABLE(ousia_test_filesystem +# test/plugins/filesystem/FileLocatorTest +# ) - TARGET_LINK_LIBRARIES(ousia_test_filesystem - ${GTEST_LIBRARIES} - ousia_core - ousia_filesystem - ) +# TARGET_LINK_LIBRARIES(ousia_test_filesystem +# ${GTEST_LIBRARIES} +# ousia_core +# ousia_filesystem +# ) # ADD_EXECUTABLE(ousia_test_css # test/plugins/css/Tokenizer @@ -306,38 +311,36 @@ IF(TEST) # ousia_css # ) - ADD_EXECUTABLE(ousia_test_html - test/plugins/html/DemoOutputTest - ) +# ADD_EXECUTABLE(ousia_test_html +# test/plugins/html/DemoOutputTest +# ) - TARGET_LINK_LIBRARIES(ousia_test_html - ${GTEST_LIBRARIES} - ousia_core - ousia_html - ) +# TARGET_LINK_LIBRARIES(ousia_test_html +# ${GTEST_LIBRARIES} +# ousia_core +# ousia_html +# ) - ADD_EXECUTABLE(ousia_test_xml - test/plugins/xml/XmlParserTest + ADD_EXECUTABLE(ousia_test_osml + test/formats/osml/OsmlStreamParserTest ) - TARGET_LINK_LIBRARIES(ousia_test_xml + TARGET_LINK_LIBRARIES(ousia_test_osml ${GTEST_LIBRARIES} ousia_core - ousia_xml - ousia_filesystem + ousia_osml ) - ADD_EXECUTABLE(ousia_test_osdm - test/formats/osdm/TokenTrieTest - test/formats/osdm/DynamicTokenizerTest - test/formats/osdm/OsdmStreamParserTest - ) +# ADD_EXECUTABLE(ousia_test_osxml +# test/plugins/xml/XmlParserTest +# ) - TARGET_LINK_LIBRARIES(ousia_test_osdm - ${GTEST_LIBRARIES} - ousia_core - ousia_osdm - ) +# TARGET_LINK_LIBRARIES(ousia_test_osxml +# ${GTEST_LIBRARIES} +# ousia_core +# ousia_osml +# ousia_filesystem +# ) # ADD_EXECUTABLE(ousia_test_mozjs # test/plugins/mozjs/MozJsScriptEngineTest @@ -351,11 +354,11 @@ IF(TEST) # Register the unit tests ADD_TEST(ousia_test_core ousia_test_core) - ADD_TEST(ousia_test_filesystem ousia_test_filesystem) +# ADD_TEST(ousia_test_filesystem ousia_test_filesystem) # ADD_TEST(ousia_test_css ousia_test_css) - ADD_TEST(ousia_test_html ousia_test_html) - ADD_TEST(ousia_test_xml ousia_test_xml) - ADD_TEST(ousia_test_osdm ousia_test_osdm) +# ADD_TEST(ousia_test_html ousia_test_html) + ADD_TEST(ousia_test_osml ousia_test_osml) +# ADD_TEST(ousia_test_osxml ousia_test_osxml) # ADD_TEST(ousia_test_mozjs ousia_test_mozjs) ENDIF() @@ -373,6 +376,6 @@ INSTALL(DIRECTORY data/ DESTINATION share/ousia OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE ) -INSTALL(TARGETS ousia - RUNTIME DESTINATION bin -) +#INSTALL(TARGETS ousia +# RUNTIME DESTINATION bin +#) diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp new file mode 100644 index 0000000..e5eff05 --- /dev/null +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -0,0 +1,973 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +#include +#include + +#include + +namespace ousia { + +static TerminalLogger logger(std::cerr, true); + +TEST(OsmlStreamParser, empty) +{ + const char *testString = ""; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, oneCharacter) +{ + const char *testString = "a"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("a", reader.getData().asString()); + + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(1U, loc.getEnd()); +} + +TEST(OsmlStreamParser, whitespaceElimination) +{ + const char *testString = " hello \t world "; + // 0123456 78901234 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("hello world", reader.getData().asString()); + + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(14U, loc.getEnd()); +} + +TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) +{ + const char *testString = " hello \n world "; + // 0123456 78901234 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("hello world", reader.getData().asString()); + + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(14U, loc.getEnd()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, escapeWhitespace) +{ + const char *testString = " hello\\ \\ world "; + // 012345 67 89012345 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("hello world", reader.getData().asString()); + + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(15U, loc.getEnd()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +static void testEscapeSpecialCharacter(const std::string &c) +{ + CharReader charReader(std::string("\\") + c); + OsmlStreamParser reader(charReader, logger); + EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + EXPECT_EQ(c, reader.getData().asString()); + + SourceLocation loc = reader.getData().getLocation(); + EXPECT_EQ(0U, loc.getStart()); + EXPECT_EQ(1U + c.size(), loc.getEnd()); +} + +TEST(OsmlStreamParser, escapeSpecialCharacters) +{ + testEscapeSpecialCharacter("\\"); + testEscapeSpecialCharacter("{"); + testEscapeSpecialCharacter("}"); + testEscapeSpecialCharacter("<"); + testEscapeSpecialCharacter(">"); +} + +TEST(OsmlStreamParser, simpleSingleLineComment) +{ + const char *testString = "% This is a single line comment"; + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, singleLineComment) +{ + const char *testString = "a% This is a single line comment\nb"; + // 01234567890123456789012345678901 23 + // 0 1 2 3 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("a", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(1U, loc.getEnd()); + } + + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("b", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(33U, loc.getStart()); + ASSERT_EQ(34U, loc.getEnd()); + } + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, multilineComment) +{ + const char *testString = "a%{ This is a\n\n multiline line comment}%b"; + // 0123456789012 3 456789012345678901234567890 + // 0 1 2 3 4 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("a", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(1U, loc.getEnd()); + } + + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("b", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(40U, loc.getStart()); + ASSERT_EQ(41U, loc.getEnd()); + } + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, nestedMultilineComment) +{ + const char *testString = "a%{%{Another\n\n}%multiline line comment}%b"; + // 0123456789012 3 456789012345678901234567890 + // 0 1 2 3 4 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("a", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(1U, loc.getEnd()); + } + + { + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + ASSERT_EQ("b", reader.getData().asString()); + SourceLocation loc = reader.getData().getLocation(); + ASSERT_EQ(40U, loc.getStart()); + ASSERT_EQ(41U, loc.getEnd()); + } + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, simpleCommand) +{ + const char *testString = "\\test"; + // 0 12345 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + + Variant commandName = reader.getCommandName(); + ASSERT_EQ("test", commandName.asString()); + + SourceLocation loc = commandName.getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + + ASSERT_EQ(0U, reader.getCommandArguments().asMap().size()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, simpleCommandWithName) +{ + const char *testString = "\\test#bla"; + // 0 12345678 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + + Variant commandName = reader.getCommandName(); + ASSERT_EQ("test", commandName.asString()); + SourceLocation loc = commandName.getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + + Variant commandArguments = reader.getCommandArguments(); + ASSERT_TRUE(commandArguments.isMap()); + ASSERT_EQ(1U, commandArguments.asMap().size()); + ASSERT_EQ(1U, commandArguments.asMap().count("name")); + ASSERT_EQ("bla", commandArguments.asMap()["name"].asString()); + + loc = commandArguments.asMap()["name"].getLocation(); + ASSERT_EQ(5U, loc.getStart()); + ASSERT_EQ(9U, loc.getEnd()); + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, simpleCommandWithArguments) +{ + const char *testString = "\\test[a=1,b=2,c=\"test\"]"; + // 0 123456789012345 678901 2 + // 0 1 2 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + + Variant commandName = reader.getCommandName(); + ASSERT_EQ("test", commandName.asString()); + SourceLocation loc = commandName.getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + + Variant commandArguments = reader.getCommandArguments(); + ASSERT_TRUE(commandArguments.isMap()); + ASSERT_EQ(3U, commandArguments.asMap().size()); + ASSERT_EQ(1U, commandArguments.asMap().count("a")); + ASSERT_EQ(1U, commandArguments.asMap().count("b")); + ASSERT_EQ(1U, commandArguments.asMap().count("c")); + ASSERT_EQ(1, commandArguments.asMap()["a"].asInt()); + ASSERT_EQ(2, commandArguments.asMap()["b"].asInt()); + ASSERT_EQ("test", commandArguments.asMap()["c"].asString()); + + loc = commandArguments.asMap()["a"].getLocation(); + ASSERT_EQ(8U, loc.getStart()); + ASSERT_EQ(9U, loc.getEnd()); + + loc = commandArguments.asMap()["b"].getLocation(); + ASSERT_EQ(12U, loc.getStart()); + ASSERT_EQ(13U, loc.getEnd()); + + loc = commandArguments.asMap()["c"].getLocation(); + ASSERT_EQ(16U, loc.getStart()); + ASSERT_EQ(22U, loc.getEnd()); + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) +{ + const char *testString = "\\test#bla[a=1,b=2,c=\"test\"]"; + // 0 1234567890123456789 01234 56 + // 0 1 2 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + + Variant commandName = reader.getCommandName(); + ASSERT_EQ("test", commandName.asString()); + SourceLocation loc = commandName.getLocation(); + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + + Variant commandArguments = reader.getCommandArguments(); + ASSERT_TRUE(commandArguments.isMap()); + ASSERT_EQ(4U, commandArguments.asMap().size()); + ASSERT_EQ(1U, commandArguments.asMap().count("a")); + ASSERT_EQ(1U, commandArguments.asMap().count("b")); + ASSERT_EQ(1U, commandArguments.asMap().count("c")); + ASSERT_EQ(1U, commandArguments.asMap().count("name")); + ASSERT_EQ(1, commandArguments.asMap()["a"].asInt()); + ASSERT_EQ(2, commandArguments.asMap()["b"].asInt()); + ASSERT_EQ("test", commandArguments.asMap()["c"].asString()); + ASSERT_EQ("bla", commandArguments.asMap()["name"].asString()); + + loc = commandArguments.asMap()["a"].getLocation(); + ASSERT_EQ(12U, loc.getStart()); + ASSERT_EQ(13U, loc.getEnd()); + + loc = commandArguments.asMap()["b"].getLocation(); + ASSERT_EQ(16U, loc.getStart()); + ASSERT_EQ(17U, loc.getEnd()); + + loc = commandArguments.asMap()["c"].getLocation(); + ASSERT_EQ(20U, loc.getStart()); + ASSERT_EQ(26U, loc.getEnd()); + + loc = commandArguments.asMap()["name"].getLocation(); + ASSERT_EQ(5U, loc.getStart()); + ASSERT_EQ(9U, loc.getEnd()); + + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +} + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + EXPECT_EQ(name, reader.getCommandName().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + assertCommand(reader, name, start, end); + EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertData(OsmlStreamParser &reader, const std::string &data, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + EXPECT_EQ(data, reader.getData().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getData().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getData().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldStart(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +TEST(OsmlStreamParser, fields) +{ + const char *testString = "\\test{a}{b}{c}"; + // 01234567890123 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + assertData(reader, "a", 6, 7); + assertFieldEnd(reader, 7, 8); + + assertFieldStart(reader, 8, 9); + assertData(reader, "b", 9, 10); + assertFieldEnd(reader, 10, 11); + + assertFieldStart(reader, 11, 12); + assertData(reader, "c", 12, 13); + assertFieldEnd(reader, 13, 14); + assertEnd(reader, 14, 14); +} + +TEST(OsmlStreamParser, dataOutsideField) +{ + const char *testString = "\\test{a}{b} c"; + // 0123456789012 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + assertData(reader, "a", 6, 7); + assertFieldEnd(reader, 7, 8); + + assertFieldStart(reader, 8, 9); + assertData(reader, "b", 9, 10); + assertFieldEnd(reader, 10, 11); + + assertData(reader, "c", 12, 13); + assertEnd(reader, 13, 13); +} + +TEST(OsmlStreamParser, nestedCommand) +{ + const char *testString = "\\test{a}{\\test2{b} c} d"; + // 012345678 90123456789012 + // 0 1 2 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test", 0, 5); + + assertFieldStart(reader, 5, 6); + assertData(reader, "a", 6, 7); + assertFieldEnd(reader, 7, 8); + + assertFieldStart(reader, 8, 9); + { + assertCommand(reader, "test2", 9, 15); + assertFieldStart(reader, 15, 16); + assertData(reader, "b", 16, 17); + assertFieldEnd(reader, 17, 18); + } + assertData(reader, "c", 19, 20); + assertFieldEnd(reader, 20, 21); + assertData(reader, "d", 22, 23); + assertEnd(reader, 23, 23); +} + +TEST(OsmlStreamParser, nestedCommandImmediateEnd) +{ + const char *testString = "\\test{\\test2{b}} d"; + // 012345 678901234567 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + { + assertCommand(reader, "test2", 6, 12); + assertFieldStart(reader, 12, 13); + assertData(reader, "b", 13, 14); + assertFieldEnd(reader, 14, 15); + } + assertFieldEnd(reader, 15, 16); + assertData(reader, "d", 17, 18); + assertEnd(reader, 18, 18); +} + +TEST(OsmlStreamParser, nestedCommandNoData) +{ + const char *testString = "\\test{\\test2}"; + // 012345 6789012 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + assertCommand(reader, "test2", 6, 12); + assertFieldEnd(reader, 12, 13); + assertEnd(reader, 13, 13); +} + +TEST(OsmlStreamParser, multipleCommands) +{ + const char *testString = "\\a \\b \\c \\d"; + // 012 345 678 90 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertCommand(reader, "b", 3, 5); + assertCommand(reader, "c", 6, 8); + assertCommand(reader, "d", 9, 11); + assertEnd(reader, 11, 11); +} + +TEST(OsmlStreamParser, fieldsWithSpaces) +{ + const char *testString = "\\a {\\b \\c} \n\n {\\d}"; + // 0123 456 789012 3 456 789 + // 0 1 + CharReader charReader(testString); + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, 3, 4); + assertCommand(reader, "b", 4, 6); + assertCommand(reader, "c", 7, 9); + assertFieldEnd(reader, 9, 10); + assertFieldStart(reader, 16, 17); + assertCommand(reader, "d", 17, 19); + assertFieldEnd(reader, 19, 20); + assertEnd(reader, 20, 20); +} + +TEST(OsmlStreamParser, errorNoFieldToStart) +{ + const char *testString = "\\a b {"; + // 012345 + // 0 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "a", 0, 2); + assertData(reader, "b", 3, 4); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader, 6, 6); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorNoFieldToEnd) +{ + const char *testString = "\\a b }"; + // 012345 + // 0 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "a", 0, 2); + assertData(reader, "b", 3, 4); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader, 6, 6); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorNoFieldEndNested) +{ + const char *testString = "\\test{\\test2{}}}"; + // 012345 6789012345 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + assertCommand(reader, "test2", 6, 12); + assertFieldStart(reader, 12, 13); + assertFieldEnd(reader, 13, 14); + assertFieldEnd(reader, 14, 15); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader, 16, 16); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorNoFieldEndNestedData) +{ + const char *testString = "\\test{\\test2{}}a}"; + // 012345 67890123456 + // 0 1 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "test", 0, 5); + assertFieldStart(reader, 5, 6); + assertCommand(reader, "test2", 6, 12); + assertFieldStart(reader, 12, 13); + assertFieldEnd(reader, 13, 14); + assertFieldEnd(reader, 14, 15); + assertData(reader, "a", 15, 16); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader, 17, 17); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, beginEnd) +{ + const char *testString = "\\begin{book}\\end{book}"; + // 012345678901 2345678901 + // 0 1 2 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", 7, 11); + assertFieldStart(reader, 12, 13); + assertFieldEnd(reader, 17, 21); + assertEnd(reader, 22, 22); +} + +TEST(OsmlStreamParser, beginEndWithName) +{ + const char *testString = "\\begin{book#a}\\end{book}"; + // 01234567890123 4567890123 + // 0 1 2 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", {{"name", "a"}}, 7, 11); + assertFieldStart(reader, 14, 15); + assertFieldEnd(reader, 19, 23); + assertEnd(reader, 24, 24); +} + +TEST(OsmlStreamParser, beginEndWithNameAndArgs) +{ + const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}"; + // 0123456789012345678901234 56789 01 2345678901 + // 0 1 2 3 4 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", + {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); + assertFieldStart(reader, 32, 33); + assertFieldEnd(reader, 37, 41); + assertEnd(reader, 42, 42); +} + +TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields) +{ + const char *testString = + "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}"; + // 0123456789012345678901234 56789 01234 567890123 45678901 2345678901 + // 0 1 2 3 4 5 6 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", + {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); + assertFieldStart(reader, 32, 33); + assertData(reader, "a", 33, 34); + assertCommand(reader, "test", Variant::mapType{}, 35, 40); + assertFieldEnd(reader, 40, 41); + assertFieldStart(reader, 41, 42); + assertData(reader, "b", 42, 43); + assertCommand(reader, "test", Variant::mapType{}, 44, 49); + assertFieldStart(reader, 49, 50); + assertFieldEnd(reader, 50, 51); + assertFieldEnd(reader, 51, 52); + assertFieldStart(reader, 52, 53); + assertFieldEnd(reader, 57, 61); + assertEnd(reader, 62, 62); +} + +TEST(OsmlStreamParser, beginEndWithData) +{ + const char *testString = "\\begin{book}a\\end{book}"; + // 0123456789012 3456789012 + // 0 1 2 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", 7, 11); + assertFieldStart(reader, 12, 13); + assertData(reader, "a", 12, 13); + assertFieldEnd(reader, 18, 22); + assertEnd(reader, 23, 23); +} + +TEST(OsmlStreamParser, beginEndWithCommand) +{ + const char *testString = "\\begin{book}\\a{test}\\end{book}"; + // 012345678901 23456789 0123456789 + // 0 1 2 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "book", 7, 11); + assertFieldStart(reader, 12, 13); + assertCommand(reader, "a", 12, 14); + assertFieldStart(reader, 14, 15); + assertData(reader, "test", 15, 19); + assertFieldEnd(reader, 19, 20); + assertFieldEnd(reader, 25, 29); + assertEnd(reader, 30, 30); +} + +TEST(OsmlStreamParser, errorBeginNoBraceOpen) +{ + const char *testString = "\\begin a"; + // 01234567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertData(reader, "a", 7, 8); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorBeginNoIdentifier) +{ + const char *testString = "\\begin{!"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorBeginNoBraceClose) +{ + const char *testString = "\\begin{a"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorBeginNoName) +{ + const char *testString = "\\begin{a#}"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "a"); + ASSERT_TRUE(logger.hasError()); + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertEnd(reader); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorEndNoBraceOpen) +{ + const char *testString = "\\end a"; + // 012345 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertData(reader, "a", 5, 6); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorEndNoIdentifier) +{ + const char *testString = "\\end{!"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorEndNoBraceClose) +{ + const char *testString = "\\end{a"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorEndNoBegin) +{ + const char *testString = "\\end{a}"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, errorBeginEndMismatch) +{ + const char *testString = "\\begin{a} \\begin{b} test \\end{a}"; + // 0123456789 012345678901234 5678901 + // 0 1 2 3 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "a", 7, 8); + assertFieldStart(reader, 10, 11); + assertCommand(reader, "b", 17, 18); + assertFieldStart(reader, 20, 24); + assertData(reader, "test", 20, 24); + ASSERT_FALSE(logger.hasError()); + ASSERT_THROW(reader.parse(), LoggableException); + ASSERT_TRUE(logger.hasError()); +} + +TEST(OsmlStreamParser, commandWithNSSep) +{ + const char *testString = "\\test1:test2"; + // 012345678901 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test1:test2", 0, 12); + assertEnd(reader, 12, 12); +} + +TEST(OsmlStreamParser, beginEndWithNSSep) +{ + const char *testString = "\\begin{test1:test2}\\end{test1:test2}"; + // 0123456789012345678 90123456789012345 + // 0 1 2 3 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "test1:test2", 7, 18); + assertFieldStart(reader, 19, 20); + assertFieldEnd(reader, 24, 35); + assertEnd(reader, 36, 36); +} + +TEST(OsmlStreamParser, errorBeginNSSep) +{ + const char *testString = "\\begin:test{blub}\\end{blub}"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "blub"); + ASSERT_TRUE(logger.hasError()); + assertFieldStart(reader); + assertFieldEnd(reader); + assertEnd(reader); +} + +TEST(OsmlStreamParser, errorEndNSSep) +{ + const char *testString = "\\begin{blub}\\end:test{blub}"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "blub"); + assertFieldStart(reader); + ASSERT_FALSE(logger.hasError()); + assertFieldEnd(reader); + ASSERT_TRUE(logger.hasError()); + assertEnd(reader); +} + +TEST(OsmlStreamParser, errorEmptyNs) +{ + const char *testString = "\\test:"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "test"); + ASSERT_TRUE(logger.hasError()); + assertData(reader, ":"); + assertEnd(reader); +} + +TEST(OsmlStreamParser, errorRepeatedNs) +{ + const char *testString = "\\test::"; + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "test"); + ASSERT_TRUE(logger.hasError()); + assertData(reader, "::"); + assertEnd(reader); +} +} + diff --git a/test/formats/osxml/OsxmlParserTest.cpp b/test/formats/osxml/OsxmlParserTest.cpp new file mode 100644 index 0000000..c0fb50d --- /dev/null +++ b/test/formats/osxml/OsxmlParserTest.cpp @@ -0,0 +1,314 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace ousia { + +namespace RttiTypes { +extern const Rtti Document; +extern const Rtti Domain; +extern const Rtti Typesystem; +} + +struct XmlStandaloneEnvironment : public StandaloneEnvironment { + XmlParser xmlParser; + FileLocator fileLocator; + + XmlStandaloneEnvironment(ConcreteLogger &logger) + : StandaloneEnvironment(logger) + { + fileLocator.addDefaultSearchPaths(); + fileLocator.addUnittestSearchPath("xmlparser"); + + registry.registerDefaultExtensions(); + registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"}, + {&RttiTypes::Node}, &xmlParser); + registry.registerResourceLocator(&fileLocator); + } +}; + +static TerminalLogger logger(std::cerr, true); + +TEST(XmlParser, mismatchedTag) +{ + XmlStandaloneEnvironment env(logger); + env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document}); + ASSERT_TRUE(logger.hasError()); +} + +TEST(XmlParser, generic) +{ + XmlStandaloneEnvironment env(logger); + env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node}); +#ifdef MANAGER_GRAPHVIZ_EXPORT + env.manager.exportGraphviz("xmlDocument.dot"); +#endif +} + +static void checkAttributes(Handle expected, + Handle desc) +{ + if (expected == nullptr) { + ASSERT_TRUE(desc->getAttributesDescriptor()->getAttributes().empty()); + } else { + ASSERT_EQ(expected->getName(), + desc->getAttributesDescriptor()->getName()); + auto &attrs_exp = expected->getAttributes(); + auto &attrs = desc->getAttributesDescriptor()->getAttributes(); + ASSERT_EQ(attrs_exp.size(), attrs.size()); + for (size_t i = 0; i < attrs_exp.size(); i++) { + ASSERT_EQ(attrs_exp[i]->getName(), attrs[i]->getName()); + ASSERT_EQ(attrs_exp[i]->getType(), attrs[i]->getType()); + ASSERT_EQ(attrs_exp[i]->isOptional(), attrs[i]->isOptional()); + ASSERT_EQ(attrs_exp[i]->getDefaultValue(), + attrs[i]->getDefaultValue()); + } + } +} + +static void checkStructuredClass( + Handle n, const std::string &name, Handle domain, + Variant cardinality = Cardinality::any(), + Handle attributesDescriptor = nullptr, + Handle superclass = nullptr, bool transparent = false, + bool root = false) +{ + ASSERT_FALSE(n == nullptr); + Handle sc = n.cast(); + ASSERT_FALSE(sc == nullptr); + ASSERT_EQ(name, sc->getName()); + ASSERT_EQ(domain, sc->getParent()); + ASSERT_EQ(cardinality, sc->getCardinality()); + ASSERT_EQ(transparent, sc->isTransparent()); + ASSERT_EQ(root, sc->hasRootPermission()); + checkAttributes(attributesDescriptor, sc); +} + +static Rooted checkStructuredClass( + const std::string &resolve, const std::string &name, Handle domain, + Variant cardinality = Cardinality::any(), + Handle attributesDescriptor = nullptr, + Handle superclass = nullptr, bool transparent = false, + bool root = false) +{ + auto res = domain->resolve(&RttiTypes::StructuredClass, resolve); + if (res.size() != 1) { + throw OusiaException("resolution error!"); + } + Handle sc = res[0].node.cast(); + checkStructuredClass(sc, name, domain, cardinality, attributesDescriptor, + superclass, transparent, root); + return sc; +} + +static void checkAnnotationClass( + Handle n, const std::string &name, Handle domain, + Handle attributesDescriptor = nullptr) +{ + ASSERT_FALSE(n == nullptr); + Handle ac = n.cast(); + ASSERT_FALSE(ac == nullptr); + ASSERT_EQ(name, ac->getName()); + ASSERT_EQ(domain, ac->getParent()); + checkAttributes(attributesDescriptor, ac); +} + +static Rooted checkAnnotationClass( + const std::string &resolve, const std::string &name, Handle domain, + Handle attributesDescriptor = nullptr) +{ + auto res = domain->resolve(&RttiTypes::AnnotationClass, resolve); + if (res.size() != 1) { + throw OusiaException("resolution error!"); + } + Handle ac = res[0].node.cast(); + checkAnnotationClass(ac, name, domain, attributesDescriptor); + return ac; +} + +static void checkFieldDescriptor( + Handle n, const std::string &name, Handle parent, + NodeVector children, + FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE, + Handle primitiveType = nullptr, bool optional = false) +{ + ASSERT_FALSE(n == nullptr); + Handle field = n.cast(); + ASSERT_FALSE(field.isNull()); + ASSERT_EQ(name, field->getName()); + ASSERT_EQ(parent, field->getParent()); + ASSERT_EQ(type, field->getFieldType()); + ASSERT_EQ(primitiveType, field->getPrimitiveType()); + ASSERT_EQ(optional, field->isOptional()); + // check the children. + ASSERT_EQ(children.size(), field->getChildren().size()); + for (unsigned int c = 0; c < children.size(); c++) { + ASSERT_EQ(children[c], field->getChildren()[c]); + } +} + +static void checkFieldDescriptor( + Handle desc, Handle parent, + NodeVector children, + const std::string &name = DEFAULT_FIELD_NAME, + FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE, + Handle primitiveType = nullptr, bool optional = false) +{ + auto res = desc->resolve(&RttiTypes::FieldDescriptor, name); + ASSERT_EQ(1, res.size()); + checkFieldDescriptor(res[0].node, name, parent, children, type, + primitiveType, optional); +} + +static void checkFieldDescriptor( + Handle desc, NodeVector children, + const std::string &name = DEFAULT_FIELD_NAME, + FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE, + Handle primitiveType = nullptr, bool optional = false) +{ + checkFieldDescriptor(desc, desc, children, name, type, primitiveType, + optional); +} + +TEST(XmlParser, domainParsing) +{ + XmlStandaloneEnvironment env(logger); + Rooted book_domain_node = + env.parse("book_domain.oxm", "", "", RttiSet{&RttiTypes::Domain}); + ASSERT_FALSE(book_domain_node == nullptr); + ASSERT_FALSE(logger.hasError()); + // check the domain node. + Rooted book_domain = book_domain_node.cast(); + ASSERT_EQ("book", book_domain->getName()); + // get the book struct node. + Cardinality single; + single.merge({1}); + Rooted bookAuthor{ + new StructType(book_domain->getManager(), "", nullptr)}; + bookAuthor->addAttribute( + {new Attribute(book_domain->getManager(), "author", + env.project->getSystemTypesystem()->getStringType(), + "")}, + logger); + Rooted book = checkStructuredClass( + "book", "book", book_domain, single, bookAuthor, nullptr, false, true); + // get the chapter struct node. + Rooted chapter = + checkStructuredClass("chapter", "chapter", book_domain); + Rooted section = + checkStructuredClass("section", "section", book_domain); + Rooted subsection = + checkStructuredClass("subsection", "subsection", book_domain); + Rooted paragraph = + checkStructuredClass("paragraph", "paragraph", book_domain, + Cardinality::any(), nullptr, nullptr, true, false); + Rooted text = + checkStructuredClass("text", "text", book_domain, Cardinality::any(), + nullptr, nullptr, true, false); + + // check the FieldDescriptors. + checkFieldDescriptor(book, {chapter, paragraph}); + checkFieldDescriptor(chapter, {section, paragraph}); + checkFieldDescriptor(section, {subsection, paragraph}); + checkFieldDescriptor(subsection, {paragraph}); + checkFieldDescriptor(paragraph, {text}); + checkFieldDescriptor( + text, {}, DEFAULT_FIELD_NAME, FieldDescriptor::FieldType::PRIMITIVE, + env.project->getSystemTypesystem()->getStringType(), false); + + // check parent handling using the headings domain. + Rooted headings_domain_node = + env.parse("headings_domain.oxm", "", "", RttiSet{&RttiTypes::Domain}); + ASSERT_FALSE(headings_domain_node == nullptr); + ASSERT_FALSE(logger.hasError()); + Rooted headings_domain = headings_domain_node.cast(); + // now there should be a heading struct. + Rooted heading = + checkStructuredClass("heading", "heading", headings_domain, single, + nullptr, nullptr, true, false); + // which should be a reference to the paragraph descriptor. + checkFieldDescriptor(heading, paragraph, {text}); + // and each struct in the book domain (except for text) should have a + // heading field now. + checkFieldDescriptor(book, {heading}, "heading", + FieldDescriptor::FieldType::SUBTREE, nullptr, true); + checkFieldDescriptor(chapter, {heading}, "heading", + FieldDescriptor::FieldType::SUBTREE, nullptr, true); + checkFieldDescriptor(section, {heading}, "heading", + FieldDescriptor::FieldType::SUBTREE, nullptr, true); + checkFieldDescriptor(subsection, {heading}, "heading", + FieldDescriptor::FieldType::SUBTREE, nullptr, true); + checkFieldDescriptor(paragraph, {heading}, "heading", + FieldDescriptor::FieldType::SUBTREE, nullptr, true); + + // check annotation handling using the comments domain. + Rooted comments_domain_node = + env.parse("comments_domain.oxm", "", "", RttiSet{&RttiTypes::Domain}); + ASSERT_FALSE(comments_domain_node == nullptr); + ASSERT_FALSE(logger.hasError()); + Rooted comments_domain = comments_domain_node.cast(); + // now we should be able to find a comment annotation. + Rooted comment_anno = + checkAnnotationClass("comment", "comment", comments_domain); + // as well as a comment struct + Rooted comment = + checkStructuredClass("comment", "comment", comments_domain); + // and a reply struct + Rooted reply = + checkStructuredClass("reply", "reply", comments_domain); + // check the fields for each of them. + { + std::vector> descs{comment_anno, comment, reply}; + for (auto &d : descs) { + checkFieldDescriptor(d, {paragraph}, "content", + FieldDescriptor::FieldType::SUBTREE, nullptr, + false); + checkFieldDescriptor(d, {reply}, "replies", + FieldDescriptor::FieldType::SUBTREE, nullptr, + false); + } + } + // paragraph should have comment as child now as well. + checkFieldDescriptor(paragraph, {text, comment}); + // as should heading, because it references the paragraph default field. + checkFieldDescriptor(heading, paragraph, {text, comment}); +} + +TEST(XmlParser, documentParsing) +{ + XmlStandaloneEnvironment env(logger); + Rooted book_domain_node = + env.parse("simple_book.oxd", "", "", RttiSet{&RttiTypes::Document}); + //TODO: Check result +} +} + -- cgit v1.2.3 From 2659b4595d809cbd69a77e5ff7e2fc08d225f065 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:02:54 +0100 Subject: Tidied OsxmlEventParser up, implemented correct whitespace handling, started to write unit tests for the osxml parser --- CMakeLists.txt | 93 +++--- src/core/common/Utils.hpp | 21 +- src/core/common/WhitespaceHandler.hpp | 60 ++++ src/formats/osxml/OsxmlAttributeLocator.cpp | 144 ++++++++++ src/formats/osxml/OsxmlAttributeLocator.hpp | 67 +++++ src/formats/osxml/OsxmlEventParser.cpp | 425 +++++++++++++++------------- src/formats/osxml/OsxmlEventParser.hpp | 44 +-- test/formats/osml/OsmlStreamParserTest.cpp | 1 + test/formats/osxml/OsxmlEventParserTest.cpp | 222 +++++++++++++++ 9 files changed, 811 insertions(+), 266 deletions(-) create mode 100644 src/formats/osxml/OsxmlAttributeLocator.cpp create mode 100644 src/formats/osxml/OsxmlAttributeLocator.hpp create mode 100644 test/formats/osxml/OsxmlEventParserTest.cpp (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e3b90f..bdc9541 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,9 +147,9 @@ ADD_LIBRARY(ousia_core src/core/model/RootNode src/core/model/Style src/core/model/Typesystem -# src/core/parser/Parser -# src/core/parser/ParserContext -# src/core/parser/ParserScope + src/core/parser/Parser + src/core/parser/ParserContext + src/core/parser/ParserScope # src/core/parser/generic/ParserState # src/core/parser/generic/ParserStateCallbacks # src/core/parser/generic/ParserStateHandler @@ -183,36 +183,37 @@ TARGET_LINK_LIBRARIES(ousia_osml ousia_core ) -#ADD_LIBRARY(ousia_osxml -# src/formats/osxml/osxmlParser -#) +ADD_LIBRARY(ousia_osxml + src/formats/osxml/OsxmlAttributeLocator + src/formats/osxml/OsxmlEventParser +) -#TARGET_LINK_LIBRARIES(ousia_osxml -# ousia_core -# ${EXPAT_LIBRARIES} -#) +TARGET_LINK_LIBRARIES(ousia_osxml + ousia_core + ${EXPAT_LIBRARIES} +) # Resource locators -#ADD_LIBRARY(ousia_filesystem -# src/plugins/filesystem/FileLocator -# src/plugins/filesystem/SpecialPaths -#) +ADD_LIBRARY(ousia_filesystem + src/plugins/filesystem/FileLocator + src/plugins/filesystem/SpecialPaths +) -#TARGET_LINK_LIBRARIES(ousia_filesystem -# ousia_core -# ${Boost_LIBRARIES} -#) +TARGET_LINK_LIBRARIES(ousia_filesystem + ousia_core + ${Boost_LIBRARIES} +) # Output libraries -#ADD_LIBRARY(ousia_html -# src/plugins/html/DemoOutput -#) +ADD_LIBRARY(ousia_html + src/plugins/html/DemoOutput +) -#TARGET_LINK_LIBRARIES(ousia_html -# ousia_core -#) +TARGET_LINK_LIBRARIES(ousia_html + ousia_core +) #ADD_LIBRARY(ousia_mozjs # src/plugins/mozjs/MozJsScriptEngine @@ -247,7 +248,7 @@ IF(TEST) ADD_EXECUTABLE(ousia_test_core test/core/RangeSetTest -# test/core/RegistryTest + test/core/RegistryTest test/core/XMLTest test/core/common/ArgumentTest test/core/common/CharReaderTest @@ -272,7 +273,7 @@ IF(TEST) test/core/model/NodeTest test/core/model/StyleTest test/core/model/TypesystemTest -# test/core/parser/ParserScopeTest + test/core/parser/ParserScopeTest # test/core/parser/ParserStackTest # test/core/parser/ParserStateTest test/core/parser/utils/TokenizerTest @@ -311,15 +312,15 @@ IF(TEST) # ousia_css # ) -# ADD_EXECUTABLE(ousia_test_html -# test/plugins/html/DemoOutputTest -# ) + ADD_EXECUTABLE(ousia_test_html + test/plugins/html/DemoOutputTest + ) -# TARGET_LINK_LIBRARIES(ousia_test_html -# ${GTEST_LIBRARIES} -# ousia_core -# ousia_html -# ) + TARGET_LINK_LIBRARIES(ousia_test_html + ${GTEST_LIBRARIES} + ousia_core + ousia_html + ) ADD_EXECUTABLE(ousia_test_osml test/formats/osml/OsmlStreamParserTest @@ -331,16 +332,16 @@ IF(TEST) ousia_osml ) -# ADD_EXECUTABLE(ousia_test_osxml -# test/plugins/xml/XmlParserTest -# ) + ADD_EXECUTABLE(ousia_test_osxml + test/formats/osxml/OsxmlEventParserTest + ) -# TARGET_LINK_LIBRARIES(ousia_test_osxml -# ${GTEST_LIBRARIES} -# ousia_core -# ousia_osml -# ousia_filesystem -# ) + TARGET_LINK_LIBRARIES(ousia_test_osxml + ${GTEST_LIBRARIES} + ousia_core + ousia_osxml + ousia_filesystem + ) # ADD_EXECUTABLE(ousia_test_mozjs # test/plugins/mozjs/MozJsScriptEngineTest @@ -354,11 +355,11 @@ IF(TEST) # Register the unit tests ADD_TEST(ousia_test_core ousia_test_core) -# ADD_TEST(ousia_test_filesystem ousia_test_filesystem) + ADD_TEST(ousia_test_filesystem ousia_test_filesystem) # ADD_TEST(ousia_test_css ousia_test_css) -# ADD_TEST(ousia_test_html ousia_test_html) + ADD_TEST(ousia_test_html ousia_test_html) ADD_TEST(ousia_test_osml ousia_test_osml) -# ADD_TEST(ousia_test_osxml ousia_test_osxml) + ADD_TEST(ousia_test_osxml ousia_test_osxml) # ADD_TEST(ousia_test_mozjs ousia_test_mozjs) ENDIF() diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 16a9136..8361973 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -119,9 +119,26 @@ public: */ template static std::pair trim(const T &s, Filter f) + { + return trim(s, s.size(), f); + } + + /** + * Trims the given string or vector of chars by returning the start and end + * index. + * + * @param s is the container that should be trimmed. + * @param len is the number of elements in the container. + * @param f is a function that returns true for values that should be + * removed. + * @return start and end index. Note that "end" points at the character + * beyond the end, thus "end" minus "start" + */ + template + static std::pair trim(const T &s, size_t len, Filter f) { size_t start = 0; - for (size_t i = 0; i < s.size(); i++) { + for (size_t i = 0; i < len; i++) { if (!f(s[i])) { start = i; break; @@ -129,7 +146,7 @@ public: } size_t end = 0; - for (ssize_t i = s.size() - 1; i >= static_cast(start); i--) { + for (ssize_t i = len - 1; i >= static_cast(start); i--) { if (!f(s[i])) { end = i + 1; break; diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp index 79e0518..ed52ea3 100644 --- a/src/core/common/WhitespaceHandler.hpp +++ b/src/core/common/WhitespaceHandler.hpp @@ -97,6 +97,25 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd); + } + + /** + * Static version of PreservingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd) { if (textBuf.empty()) { textStart = start; @@ -129,6 +148,27 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf); + } + + /** + * Static version of TrimmingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param whitespaceBuf is a reference at the buffer for storing whitespace + * characters. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd, std::vector &whitespaceBuf) { // Handle whitespace characters if (Utils::isWhitespace(c)) { @@ -174,6 +214,26 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd, hasWhitespace); + } + + /** + * Static version of CollapsingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param hasWhitespace is a reference at the "hasWhitespace" flag. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd, bool &hasWhitespace) { // Handle whitespace characters if (Utils::isWhitespace(c)) { diff --git a/src/formats/osxml/OsxmlAttributeLocator.cpp b/src/formats/osxml/OsxmlAttributeLocator.cpp new file mode 100644 index 0000000..e37446a --- /dev/null +++ b/src/formats/osxml/OsxmlAttributeLocator.cpp @@ -0,0 +1,144 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include + +#include "OsxmlAttributeLocator.hpp" + +namespace ousia { + +/** + * Enum used internally in the statemachine of the xml argument parser. + */ +enum class XmlAttributeState { + IN_TAG_NAME, + SEARCH_ATTR, + IN_ATTR_NAME, + HAS_ATTR_NAME, + HAS_ATTR_EQUALS, + IN_ATTR_DATA +}; + +std::map OsxmlAttributeLocator::locate( + CharReader &reader, size_t offs) +{ + std::map res; + + // Fork the reader, we don't want to mess up the XML parsing process, do we? + CharReaderFork readerFork = reader.fork(); + + // Move the read cursor to the start location, abort if this does not work + if (offs != readerFork.seek(offs)) { + return res; + } + + // Now all we need to do is to implement one half of an XML parser. As this + // is inherently complicated we'll totaly fail at it. Don't care. All we + // want to get is those darn offsets for pretty error messages... (and we + // can assume the XML is valid as it was already read by expat) + XmlAttributeState state = XmlAttributeState::IN_TAG_NAME; + char c; + std::stringstream attrName; + while (readerFork.read(c)) { + // Abort at the end of the tag + if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) { + return res; + } + + // One state machine to rule them all, one state machine to find them, + // One state machine to bring them all and in the darkness bind them + // (the byte offsets) + switch (state) { + case XmlAttributeState::IN_TAG_NAME: + if (Utils::isWhitespace(c)) { + res.emplace("$tag", + SourceLocation{reader.getSourceId(), offs + 1, + readerFork.getOffset() - 1}); + state = XmlAttributeState::SEARCH_ATTR; + } + break; + case XmlAttributeState::SEARCH_ATTR: + if (!Utils::isWhitespace(c)) { + state = XmlAttributeState::IN_ATTR_NAME; + attrName << c; + } + break; + case XmlAttributeState::IN_ATTR_NAME: + if (Utils::isWhitespace(c)) { + state = XmlAttributeState::HAS_ATTR_NAME; + } else if (c == '=') { + state = XmlAttributeState::HAS_ATTR_EQUALS; + } else { + attrName << c; + } + break; + case XmlAttributeState::HAS_ATTR_NAME: + if (!Utils::isWhitespace(c)) { + if (c == '=') { + state = XmlAttributeState::HAS_ATTR_EQUALS; + break; + } + // Well, this is a strange XML file... We expected to + // see a '=' here! Try to continue with the + // "HAS_ATTR_EQUALS" state as this state will hopefully + // inlcude some error recovery + } else { + // Skip whitespace here + break; + } + // Fallthrough + case XmlAttributeState::HAS_ATTR_EQUALS: + if (!Utils::isWhitespace(c)) { + if (c == '"') { + // Here we are! We have found the beginning of an + // attribute. Let's quickly lock the current offset away + // in the result map + res.emplace(attrName.str(), + SourceLocation{reader.getSourceId(), + readerFork.getOffset()}); + state = XmlAttributeState::IN_ATTR_DATA; + } else { + // No, this XML file is not well formed. Assume we're in + // an attribute name once again + attrName.str(std::string{&c, 1}); + state = XmlAttributeState::IN_ATTR_NAME; + } + } + break; + case XmlAttributeState::IN_ATTR_DATA: + if (c == '"') { + // We're at the end of the attribute data, set the end + // location + auto it = res.find(attrName.str()); + if (it != res.end()) { + it->second.setEnd(readerFork.getOffset() - 1); + } + + // Reset the attribute name and restart the search + attrName.str(std::string{}); + state = XmlAttributeState::SEARCH_ATTR; + } + break; + } + } + return res; +} +} + diff --git a/src/formats/osxml/OsxmlAttributeLocator.hpp b/src/formats/osxml/OsxmlAttributeLocator.hpp new file mode 100644 index 0000000..f9a3437 --- /dev/null +++ b/src/formats/osxml/OsxmlAttributeLocator.hpp @@ -0,0 +1,67 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file OsxmlAttributeLocator.hpp + * + * Contains a class used for locating the byte offsets of the attributes given + * in a XML tag. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ +#define _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ + +#include + +namespace ousia { + +// Forward declarations +class CharReader; +class SourceLocation; + +/** + * Class containing one static function for locating the byte offsets of the + * attributes in a XML tag. This are not retrieved by our xml parser, so we have + * to do this manually. + */ +class OsxmlAttributeLocator { +public: + /** + * Function used to reconstruct the location of the attributes of a XML tag + * in the source code. This is necessary, as the xml parser only returns an + * offset to the begining of a tag and not to the position of the individual + * arguments. + * + * @param reader is the char reader from which the character data should be + * read. + * @param offs is a byte offset in the xml file pointing at the "<" + * character of the tag. + * @return a map from attribute keys to the corresponding location + * (including range) of the atribute. Also contains the location of the + * tagname in the form of the virtual attribute "$tag". + */ + static std::map locate(CharReader &reader, + size_t offs); +}; + +} + +#endif /* _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ */ + diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp index 2ef170e..b4aff77 100644 --- a/src/formats/osxml/OsxmlEventParser.cpp +++ b/src/formats/osxml/OsxmlEventParser.cpp @@ -18,14 +18,22 @@ #include +#include + +#include #include #include +#include #include +#include +#include "OsxmlAttributeLocator.hpp" #include "OsxmlEventParser.hpp" namespace ousia { +/* Class OsxmlEventParser */ + /** * Class containing data used by the internal functions. */ @@ -43,41 +51,75 @@ public: */ ssize_t annotationEndTagDepth; + /** + * Current character data buffer. + */ + std::vector textBuf; + + /** + * Current whitespace buffer (for the trimming whitspace mode) + */ + std::vector whitespaceBuf; + + /** + * Flag indicating whether a whitespace character was present (for the + * collapsing whitespace mode). + */ + bool hasWhitespace; + + /** + * Current character data start. + */ + size_t textStart; + + /** + * Current character data end. + */ + size_t textEnd; + /** * Default constructor. */ - OsxmlEventParserData() : depth(0), annotationEndTagDepth(-1) {} + OsxmlEventParserData(); /** * Increments the depth. */ - void incrDepth() { depth++; } + void incrDepth(); /** * Decrement the depth and reset the annotationEndTagDepth flag. */ - void decrDepth() - { - if (depth > 0) { - depth--; - } - if (depth < annotationEndTagDepth) { - annotationEndTagDepth = -1; - } - } + void decrDepth(); /** * Returns true if we're currently inside an end tag. */ - bool inAnnotationEndTag() { depth >= annotationEndTagDepth; } + bool inAnnotationEndTag(); + + /** + * Returns true if character data is available. + * + * @return true if character data is available. + */ + bool hasText(); + + /** + * Returns a Variant containing the character data and its location. + * + * @return a string variant containing the text data and the character + * location. + */ + Variant getText(SourceId sourceId); }; -namespace { +/* Class GuardedExpatXmlParser */ + /** * Wrapper class around the XML_Parser pointer which safely frees it whenever * the scope is left (e.g. because an exception was thrown). */ -class ScopedExpatXmlParser { +class GuardedExpatXmlParser { private: /** * Internal pointer to the XML_Parser instance. @@ -86,14 +128,14 @@ private: public: /** - * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS + * Constructor of the GuardedExpatXmlParser class. Calls XML_ParserCreateNS * from the expat library. Throws a parser exception if the XML parser * cannot be initialized. * * @param encoding is the protocol-defined encoding passed to expat (or * nullptr if expat should determine the encoding by itself). */ - ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) + GuardedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) { parser = XML_ParserCreate(encoding); if (!parser) { @@ -103,9 +145,9 @@ public: } /** - * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance. + * Destuctor of the GuardedExpatXmlParser, frees the XML parser instance. */ - ~ScopedExpatXmlParser() + ~GuardedExpatXmlParser() { if (parser) { XML_ParserFree(parser); @@ -120,134 +162,20 @@ public: }; /** - * Enum used internally in the statemachine of the micro-xml argument parser. + * Name of the special outer tag used for allowing multiple top-level elements + * in an xml file. */ -enum class XmlAttributeState { - IN_TAG_NAME, - SEARCH_ATTR, - IN_ATTR_NAME, - HAS_ATTR_NAME, - HAS_ATTR_EQUALS, - IN_ATTR_DATA -}; +static const std::string TOP_LEVEL_TAG{"ousia"}; /** - * Function used to reconstruct the location of the attributes of a XML tag in - * the source code. This is necessary, as the xml parser only returns an offset - * to the begining of a tag and not to the position of the individual arguments. - * - * @param reader is the char reader from which the character data should be - * read. - * @param offs is a byte offset in the xml file pointing at the "<" character of - * the tag. - * @return a map from attribute keys to the corresponding location (including - * range) of the atribute. Also contains the location of the tagname in the - * form of the virtual attribute "$tag". + * Prefix used to indicate the start of an annoation (note the trailing colon) */ -static std::map xmlReconstructAttributeOffsets( - CharReader &reader, size_t offs) -{ - std::map res; - - // Fork the reader, we don't want to mess up the XML parsing process, do we? - CharReaderFork readerFork = reader.fork(); - - // Move the read cursor to the start location, abort if this does not work - if (!location.isValid() || offs != readerFork.seek(offs)) { - return res; - } - - // Now all we need to do is to implement one half of an XML parser. As this - // is inherently complicated we'll totaly fail at it. Don't care. All we - // want to get is those darn offsets for pretty error messages... (and we - // can assume the XML is valid as it was already read by expat) - XmlAttributeState state = XmlAttributeState::IN_TAG_NAME; - char c; - std::stringstream attrName; - while (readerFork.read(c)) { - // Abort at the end of the tag - if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) { - return res; - } +static const std::string ANNOTATION_START_PREFIX{"a:start:"}; - // One state machine to rule them all, one state machine to find them, - // One state machine to bring them all and in the darkness bind them - // (the byte offsets) - switch (state) { - case XmlAttributeState::IN_TAG_NAME: - if (Utils::isWhitespace(c)) { - res.emplace("$tag", - SourceLocation{reader.getSourceId(), offs + 1, - readerFork.getOffset() - 1}); - state = XmlAttributeState::SEARCH_ATTR; - } - break; - case XmlAttributeState::SEARCH_ATTR: - if (!Utils::isWhitespace(c)) { - state = XmlAttributeState::IN_ATTR_NAME; - attrName << c; - } - break; - case XmlAttributeState::IN_ATTR_NAME: - if (Utils::isWhitespace(c)) { - state = XmlAttributeState::HAS_ATTR_NAME; - } else if (c == '=') { - state = XmlAttributeState::HAS_ATTR_EQUALS; - } else { - attrName << c; - } - break; - case XmlAttributeState::HAS_ATTR_NAME: - if (!Utils::isWhitespace(c)) { - if (c == '=') { - state = XmlAttributeState::HAS_ATTR_EQUALS; - break; - } - // Well, this is a strange XML file... We expected to - // see a '=' here! Try to continue with the - // "HAS_ATTR_EQUALS" state as this state will hopefully - // inlcude some error recovery - } else { - // Skip whitespace here - break; - } - // Fallthrough - case XmlAttributeState::HAS_ATTR_EQUALS: - if (!Utils::isWhitespace(c)) { - if (c == '"') { - // Here we are! We have found the beginning of an - // attribute. Let's quickly lock the current offset away - // in the result map - res.emplace(attrName.str(), - SourceLocation{reader.getSourceId(), - readerFork.getOffset()}); - state = XmlAttributeState::IN_ATTR_DATA; - } else { - // No, this XML file is not well formed. Assume we're in - // an attribute name once again - attrName.str(std::string{&c, 1}); - state = XmlAttributeState::IN_ATTR_NAME; - } - } - break; - case XmlAttributeState::IN_ATTR_DATA: - if (c == '"') { - // We're at the end of the attribute data, set the end - // location - auto it = res.find(attrName.str()); - if (it != res.end()) { - it->second.setEnd(readerFork.getOffset() - 1); - } - - // Reset the attribute name and restart the search - attrName.str(std::string{}); - state = XmlAttributeState::SEARCH_ATTR; - } - break; - } - } - return res; -} +/** + * Prefix used to indicate the end of an annotation. + */ +static const std::string ANNOTATION_END_PREFIX{"a:end"}; /** * Synchronizes the position of the xml parser with the default location of the @@ -268,22 +196,12 @@ static SourceLocation xmlSyncLoggerPosition(XML_Parser p, size_t len = 0) size_t offs = XML_GetCurrentByteIndex(p); SourceLocation loc = SourceLocation{parser->getReader().getSourceId(), offs, offs + len}; - parser->getLogger().setDefaultLocation(location); + parser->getLogger().setDefaultLocation(loc); // Return the fetched location return loc; } -/** - * Prefix used to indicate the start of an annoation, - */ -static const std::string ANNOTATION_START_PREFIX{"a:start:"}; - -/** - * Prefix used to indicate the end of an annotation. - */ -static const std::string ANNOTATION_END_PREFIX{"a:end"}; - /** * Callback called by eXpat whenever a start handler is reached. */ @@ -292,14 +210,21 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, { // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser XML_Parser p = static_cast(ref); - OsxmlEventParser *parser = static_cast(XML_GetUserData(p)); + OsxmlEventParser *parser = + static_cast(XML_GetUserData(p)); + + // If there is any text data in the buffer, issue that first + if (parser->getData().hasText()) { + parser->getEvents().data( + parser->getData().getText(parser->getReader().getSourceId())); + } // Read the argument locations -- this is only a stupid and slow hack, // but it is necessary, as expat doesn't give use the byte offset of the // arguments. std::map attributeOffsets = - xmlReconstructXMLAttributeOffsets(*userData->reader, - XML_GetCurrentByteIndex(p)); + OsxmlAttributeLocator::locate(parser->getReader(), + XML_GetCurrentByteIndex(p)); // Update the logger position SourceLocation loc = xmlSyncLoggerPosition(p); @@ -316,7 +241,8 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, // Make sure we're currently not inside an annotation end tag -- this would // be highly illegal! if (parser->getData().inAnnotationEndTag()) { - logger.error("No tags allowed inside an annotation end tag", nameLoc); + parser->getLogger().error( + "No tags allowed inside an annotation end tag", nameLoc); return; } @@ -336,36 +262,33 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, // Parse the string, pass the location of the key std::pair value = VariantReader::parseGenericString( - *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(), + *(attr++), parser->getLogger(), keyLoc.getSourceId(), keyLoc.getStart()); // Set the overall location of the parsed element to the attribute // location - value.second->setLocation(keyLoc); - - // Store the - if (!args.emplace(key, value.second).second) { - parser->getLogger().warning( - std::string("Attribute \"") + key + - "\" defined multiple times, only using first definition", - keyLoc); - } + value.second.setLocation(keyLoc); + + // Store the keys in the map + args.emplace(key, value.second).second; } // Fetch the name of the tag, check for special tags std::string nameStr(name); - if (nameStr == "ousia" && parser->getData().depth == 1) { - // We're in the top-level and the magic "ousia" tag is reached -- just + if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 1) { + // We're in the top-level and the magic tag is reached -- just // ignore it and issue a warning for each argument that has been given for (const auto &arg : args) { - parser->getLogger().warning( - std::string("Ignoring attribute \"") + arg.first + - std::string("\" for magic tag \"ousia\""), - arg.second); + parser->getLogger().warning(std::string("Ignoring attribute \"") + + arg.first + + std::string("\" for magic tag \"") + + TOP_LEVEL_TAG + std::string("\""), + arg.second); } } else if (Utils::startsWith(nameStr, ANNOTATION_START_PREFIX)) { // Assemble a name variant containing the name minus the prefix - Variant nameVar = nameStr.substr(ANNOTATION_START_PREFIX.size()); + Variant nameVar = + Variant::fromString(nameStr.substr(ANNOTATION_START_PREFIX.size())); nameVar.setLocation(nameLoc); // Issue the "annotationStart" event @@ -410,25 +333,34 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, } } -static void xmlEndElementHandler(void *p, const XML_Char *name) +static void xmlEndElementHandler(void *ref, const XML_Char *name) { // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser XML_Parser p = static_cast(ref); - OsxmlEventParser *parser = static_cast(XML_GetUserData(p)); + OsxmlEventParser *parser = + static_cast(XML_GetUserData(p)); // Synchronize the position of the logger with teh position - xmlSyncLoggerPosition(parser); - - // Decrement the current depth - parser->getData().decrDepth(); + xmlSyncLoggerPosition(p); // Abort as long as we're in an annotation end tag if (parser->getData().inAnnotationEndTag()) { + parser->getData().decrDepth(); return; } + // Decrement the current depth + parser->getData().decrDepth(); + + // If there is any text data in the buffer, issue that first + if (parser->getData().hasText()) { + parser->getEvents().data( + parser->getData().getText(parser->getReader().getSourceId())); + } + // Abort if the special ousia tag ends here - if (nameStr == "ousia" && parser->getData().depth == 0) { + std::string nameStr{name}; + if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 0) { return; } @@ -436,20 +368,105 @@ static void xmlEndElementHandler(void *p, const XML_Char *name) parser->getEvents().fieldEnd(); } -static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len) +static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len) { // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser XML_Parser p = static_cast(ref); - OsxmlEventParser *parser = static_cast(XML_GetUserData(p)); - - // TODO -/* size_t ulen = len > 0 ? static_cast(len) : 0; - syncLoggerPosition(parser, ulen); - const std::string data = Utils::trim(std::string{s, ulen}); - if (!data.empty()) { - stack->data(data); - }*/ + OsxmlEventParser *parser = + static_cast(XML_GetUserData(p)); + + // Abort as long as we're in an annotation end tag + if (parser->getData().inAnnotationEndTag()) { + return; + } + + // Convert the signed (smell the 90's C library here?) length to an usigned + // value + size_t ulen = len > 0 ? static_cast(len) : 0; + + // Synchronize the logger position + SourceLocation loc = xmlSyncLoggerPosition(p, ulen); + + // Fetch some variables for convenience + const WhitespaceMode mode = parser->getWhitespaceMode(); + OsxmlEventParserData &data = parser->getData(); + std::vector &textBuf = data.textBuf; + std::vector &whitespaceBuf = data.whitespaceBuf; + bool &hasWhitespace = data.hasWhitespace; + size_t &textStart = data.textStart; + size_t &textEnd = data.textEnd; + + size_t pos = loc.getStart(); + for (size_t i = 0; i < ulen; i++, pos++) { + switch (mode) { + case WhitespaceMode::PRESERVE: + PreservingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd); + break; + case WhitespaceMode::TRIM: + TrimmingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd, + whitespaceBuf); + break; + case WhitespaceMode::COLLAPSE: + CollapsingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd, + hasWhitespace); + break; + } + } +} + +/* Class OsxmlEvents */ + +OsxmlEvents::~OsxmlEvents() {} + +/* Class OsxmlEventParser */ + +OsxmlEventParserData::OsxmlEventParserData() + : depth(0), + annotationEndTagDepth(-1), + hasWhitespace(false), + textStart(0), + textEnd(0) +{ +} + +void OsxmlEventParserData::incrDepth() { depth++; } + +void OsxmlEventParserData::decrDepth() +{ + if (depth > 0) { + depth--; + } + if (depth < annotationEndTagDepth) { + annotationEndTagDepth = -1; + } +} + +bool OsxmlEventParserData::inAnnotationEndTag() +{ + return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth); } + +bool OsxmlEventParserData::hasText() { return !textBuf.empty(); } + +Variant OsxmlEventParserData::getText(SourceId sourceId) +{ + // Create a variant containing the string data and the location + Variant var = + Variant::fromString(std::string{textBuf.data(), textBuf.size()}); + var.setLocation({sourceId, textStart, textEnd}); + + // Reset the text buffers + textBuf.clear(); + whitespaceBuf.clear(); + hasWhitespace = false; + textStart = 0; + textEnd = 0; + + // Return the variant + return var; } /* Class OsxmlEventParser */ @@ -459,21 +476,22 @@ OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events, : reader(reader), events(events), logger(logger), - whitespaceMode(WhitespaceMode::COLLAPSE), + whitespaceMode(WhitespaceMode::TRIM), data(new OsxmlEventParserData()) { } -void OsxmlEventParser::parse(CharReader &reader) +OsxmlEventParser::~OsxmlEventParser() {} + +void OsxmlEventParser::parse() { // Create the parser object - ScopedExpatXmlParser p{"UTF-8"}; + GuardedExpatXmlParser p{"UTF-8"}; // Reset the depth - depth = 0; + data->depth = 0; - // Pass the reference to the ParserStack to the XML handler - XMLUserData data(&stack, &reader); + // Pass the reference to this parser instance to the XML handler XML_SetUserData(&p, this); XML_UseParserAsHandlerArg(&p); @@ -498,7 +516,7 @@ void OsxmlEventParser::parse(CharReader &reader) if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { throw LoggableException{ "XML: " + std::string{XML_ErrorString(XML_GetErrorCode(&p))}, - xmlSyncLoggerPosition(p)}; + xmlSyncLoggerPosition(&p)}; } // Abort once there are no more bytes in the stream @@ -513,12 +531,17 @@ void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode) this->whitespaceMode = whitespaceMode; } -CharReader &OsxmlEventParser::getCharReader() { return charReader; } +WhitespaceMode OsxmlEventParser::getWhitespaceMode() const +{ + return whitespaceMode; +} + +CharReader &OsxmlEventParser::getReader() const { return reader; } -Logger &OsxmlEventParser::getLogger() { return logger; } +Logger &OsxmlEventParser::getLogger() const { return logger; } -OsxmlEvents &OsxmlEventParser::getEvents() { return events; } +OsxmlEvents &OsxmlEventParser::getEvents() const { return events; } -OsxmlEventParserData &OsxmlEventParser::getData() { return *data; } +OsxmlEventParserData &OsxmlEventParser::getData() const { return *data; } } diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp index 5319ca6..aa20ea9 100644 --- a/src/formats/osxml/OsxmlEventParser.hpp +++ b/src/formats/osxml/OsxmlEventParser.hpp @@ -42,7 +42,7 @@ class Variant; class OsxmlEventParserData; /** - * Interface which defines the callback functions which are called by the + * Interface which defines the callback functions which are called by the * OsxmlEventParser whenever an event occurs. */ class OsxmlEvents { @@ -50,13 +50,13 @@ public: /** * Virtual destructor. */ - virtual ~OsxmlEvents() {} + virtual ~OsxmlEvents(); /** * Called whenever a command starts. Note that this implicitly always starts * the default field of the command. * - * @param name is a string variant containing name and location of the + * @param name is a string variant containing name and location of the * command. * @param args is a map variant containing the arguments that were given * to the command. @@ -67,12 +67,12 @@ public: * Called whenever an annotation starts. Note that this implicitly always * starts the default field of the annotation. * - * @param name is a string variant containing the name of the annotation + * @param name is a string variant containing the name of the annotation * class and the location of the annotation definition. * @param args is a map variant containing the arguments that were given * to the annotation definition. */ - virtual void annotationStart(Variant name, Variant args); + virtual void annotationStart(Variant name, Variant args) = 0; /** * Called whenever the range of an annotation ends. The callee must @@ -85,12 +85,12 @@ public: * ended here. May be empty (or nullptr), if no elementName has been * specified at the end of the annotation. */ - virtual void annotationEnd(Variant name, Variant elementName); + virtual void annotationEnd(Variant name, Variant elementName) = 0; /** - * Called whenever the default field which was implicitly started by + * Called whenever the default field which was implicitly started by * commandStart or annotationStart ends. Note that this does not end the - * range of an annotation, but the default field of the annotation. To + * range of an annotation, but the default field of the annotation. To * signal the end of the annotation this, the annotationEnd method will be * invoked. */ @@ -102,11 +102,10 @@ public: * is not called if the parsing failed, the parser prints an error message * instead. * - * @param data is the already parsed data that should be passed to the + * @param data is the already parsed data that should be passed to the * handler. */ virtual void data(Variant data) = 0; - }; /** @@ -148,7 +147,7 @@ public: * Constructor fo the OsxmlEventParser. Takes a reference at the OsxmlEvents * of which the callback functions are called. * - * @param reader is a reference to the CharReader instance from which the + * @param reader is a reference to the CharReader instance from which the * XML should be read. * @param events is a refence at an instance of the OsxmlEvents class. All * events are forwarded to this class. @@ -157,6 +156,11 @@ public: */ OsxmlEventParser(CharReader &reader, OsxmlEvents &events, Logger &logger); + /** + * Destructor of OsxmlEventParser (needed for unique_ptr to incomplete type) + */ + ~OsxmlEventParser(); + /** * Performs the actual parsing. Reads the XML using eXpat and calles the * callbacks in the event listener instance whenever something interesting @@ -167,38 +171,44 @@ public: /** * Sets the whitespace handling mode. * - * @param whitespaceMode defines how whitespace in the data should be + * @param whitespaceMode defines how whitespace in the data should be * handled. */ void setWhitespaceMode(WhitespaceMode whitespaceMode); + /** + * Returns the current whitespace handling mode. + * + * @return the currently set whitespace handling mode. + */ + WhitespaceMode getWhitespaceMode() const; + /** * Returns the internal CharReader reference. * * @return the CharReader reference. */ - CharReader &getCharReader(); + CharReader &getReader() const; /** * Returns the internal Logger reference. * * @return the internal Logger reference. */ - Logger &getLogger(); + Logger &getLogger() const; /** * Returns the internal OsxmlEvents reference. * * @return the internal OsxmlEvents reference. */ - OsxmlEvents &getEvents(); + OsxmlEvents &getEvents() const; /** * Returns a reference at the internal data. */ - OsxmlEventParserData &getData(); + OsxmlEventParserData &getData() const; }; - } #endif /* _OSXML_EVENT_PARSER_HPP_ */ diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index e5eff05..b944af8 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -28,6 +28,7 @@ namespace ousia { static TerminalLogger logger(std::cerr, true); +//static ConcreteLogger logger; TEST(OsmlStreamParser, empty) { diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp new file mode 100644 index 0000000..06c800f --- /dev/null +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -0,0 +1,222 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include +#include + +#include + +namespace ousia { + +static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; + +namespace { +enum class OsxmlEvent { + COMMAND_START, + ANNOTATION_START, + ANNOTATION_END, + FIELD_END, + DATA +}; + +class TestOsxmlEventListener : public OsxmlEvents { +public: + std::vector> events; + + void commandStart(Variant name, Variant args) override + { + events.emplace_back(OsxmlEvent::COMMAND_START, + Variant::arrayType{name, args}); + } + + void annotationStart(Variant name, Variant args) override + { + events.emplace_back(OsxmlEvent::ANNOTATION_START, + Variant::arrayType{name, args}); + } + + void annotationEnd(Variant name, Variant elementName) override + { + events.emplace_back(OsxmlEvent::ANNOTATION_END, + Variant::arrayType{name, elementName}); + } + + void fieldEnd() override + { + events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{}); + } + + void data(Variant data) override + { + events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data}); + } +}; + +static std::vector> parseXml( + const char *testString, + WhitespaceMode whitespaceMode = WhitespaceMode::TRIM) +{ + TestOsxmlEventListener listener; + CharReader reader(testString); + OsxmlEventParser parser(reader, listener, logger); + parser.setWhitespaceMode(whitespaceMode); + parser.parse(); + return listener.events; +} +} + +TEST(OsxmlEventParser, simpleCommandWithArgs) +{ + const char *testString = ""; + // 01234567 89012 3456 78 9012 34 5678 90123 456 + // 0 1 2 3 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{ + "a", Variant::mapType{ + {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); + + // Check the locations (I'll do this one time and then just assume it works) + ASSERT_EQ(1U, events[0].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(2U, events[0].second.asArray()[0].getLocation().getEnd()); + ASSERT_EQ( + 9U, + events[0].second.asArray()[1].asMap()["name"].getLocation().getStart()); + ASSERT_EQ( + 13U, + events[0].second.asArray()[1].asMap()["name"].getLocation().getEnd()); + ASSERT_EQ( + 18U, + events[0].second.asArray()[1].asMap()["a"].getLocation().getStart()); + ASSERT_EQ( + 19U, events[0].second.asArray()[1].asMap()["a"].getLocation().getEnd()); + ASSERT_EQ( + 24U, + events[0].second.asArray()[1].asMap()["b"].getLocation().getStart()); + ASSERT_EQ( + 25U, events[0].second.asArray()[1].asMap()["b"].getLocation().getEnd()); + ASSERT_EQ( + 30U, + events[0].second.asArray()[1].asMap()["c"].getLocation().getStart()); + ASSERT_EQ( + 34U, events[0].second.asArray()[1].asMap()["c"].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, magicTopLevelTag) +{ + const char *testString = ""; + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"b", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, magicTopLevelTagInside) +{ + const char *testString = ""; + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"ousia", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) +{ + const char *testString = " hello \n world "; + // 012345678901 234567890123 + // 0 1 2 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{" hello \n world "}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::PRESERVE); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataTrimWhitespace) +{ + const char *testString = " hello \n world "; + // 012345678901 234567890123 + // 0 1 2 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{"hello \n world"}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::TRIM); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataCollapseWhitespace) +{ + const char *testString = " hello \n world "; + // 012345678901 234567890123 + // 0 1 2 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{"hello world"}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::COLLAPSE); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +} + -- cgit v1.2.3 From 9b4cdfabf6527440d6ffa499cc6b57a44daaeadb Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:05:42 +0100 Subject: Added code for the handling of explicit default fields and improved unit tests --- CMakeLists.txt | 16 +- src/formats/osml/OsmlStreamParser.cpp | 78 +++++-- src/formats/osml/OsmlStreamParser.hpp | 45 +++- test/formats/osml/OsmlStreamParserTest.cpp | 340 +++++++++++++++++------------ 4 files changed, 302 insertions(+), 177 deletions(-) (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index bdc9541..d311f7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -290,15 +290,15 @@ IF(TEST) ousia_core ) -# ADD_EXECUTABLE(ousia_test_filesystem -# test/plugins/filesystem/FileLocatorTest -# ) + ADD_EXECUTABLE(ousia_test_filesystem + test/plugins/filesystem/FileLocatorTest + ) -# TARGET_LINK_LIBRARIES(ousia_test_filesystem -# ${GTEST_LIBRARIES} -# ousia_core -# ousia_filesystem -# ) + TARGET_LINK_LIBRARIES(ousia_test_filesystem + ${GTEST_LIBRARIES} + ousia_core + ousia_filesystem + ) # ADD_EXECUTABLE(ousia_test_css # test/plugins/css/Tokenizer diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp index 6b00eef..6606120 100644 --- a/src/formats/osml/OsmlStreamParser.cpp +++ b/src/formats/osml/OsmlStreamParser.cpp @@ -60,6 +60,11 @@ public: */ TokenTypeId FieldEnd; + /** + * Id of the default field start token. + */ + TokenTypeId DefaultFieldStart; + /** * Registers the plain format tokens in the internal tokenizer. */ @@ -71,6 +76,7 @@ public: BlockCommentEnd = registerToken("}%"); FieldStart = registerToken("{"); FieldEnd = registerToken("}"); + DefaultFieldStart = registerToken("{!"); } }; @@ -164,7 +170,7 @@ OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger) : reader(reader), logger(logger), tokenizer(Tokens) { // Place an intial command representing the complete file on the stack - commands.push(Command{"", Variant::mapType{}, true, true, true}); + commands.push(Command{"", Variant::mapType{}, true, true, true, false}); } Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep) @@ -365,7 +371,7 @@ void OsmlStreamParser::pushCommand(Variant commandName, commands.pop(); } commands.push(Command{std::move(commandName), std::move(commandArguments), - hasRange, false, false}); + hasRange, false, false, false}); } OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start) @@ -482,6 +488,29 @@ bool OsmlStreamParser::checkIssueFieldStart() return false; } +bool OsmlStreamParser::closeField() +{ + // Try to end an open field of the current command -- if the current command + // is not inside an open field, end this command and try to close the next + // one + for (int i = 0; i < 2 && commands.size() > 1; i++) { + Command &cmd = commands.top(); + if (!cmd.inRangeField) { + if (cmd.inField) { + cmd.inField = false; + if (cmd.inDefaultField) { + commands.pop(); + } + return true; + } + commands.pop(); + } else { + return false; + } + } + return false; +} + OsmlStreamParser::State OsmlStreamParser::parse() { // Handler for incomming data @@ -579,27 +608,29 @@ OsmlStreamParser::State OsmlStreamParser::parse() } logger.error( "Got field start token \"{\", but no command for which to " - "start the field. Did you mean \"\\{\"?", + "start the field. Write \"\\{\" to insert this sequence as " + "text.", token); } else if (token.type == Tokens.FieldEnd) { - // Try to end an open field of the current command -- if the current - // command is not inside an open field, end this command and try to - // close the next one - for (int i = 0; i < 2 && commands.size() > 1; i++) { - Command &cmd = commands.top(); - if (!cmd.inRangeField) { - if (cmd.inField) { - cmd.inField = false; - return State::FIELD_END; - } - commands.pop(); - } else { - break; - } + if (closeField()) { + return State::FIELD_END; + } + logger.error( + "Got field end token \"}\", but there is no field to end. " + "Write \"\\}\" to insert this sequence as text.", + token); + } else if (token.type == Tokens.DefaultFieldStart) { + // Try to start a default field the first time the token is reached + Command &topCmd = commands.top(); + if (!topCmd.inField) { + topCmd.inField = true; + topCmd.inDefaultField = true; + return State::FIELD_START; } logger.error( - "Got field end token \"}\", but there is no field to end. Did " - "you mean \"\\}\"?", + "Got default field start token \"{!\", but no command for " + "which to start the field. Write \"\\{!\" to insert this " + "sequence as text", token); } else { logger.error("Unexpected token \"" + token.content + "\"", token); @@ -627,14 +658,19 @@ OsmlStreamParser::State OsmlStreamParser::parse() return State::END; } -const Variant &OsmlStreamParser::getCommandName() +const Variant &OsmlStreamParser::getCommandName() const { return commands.top().name; } -const Variant &OsmlStreamParser::getCommandArguments() +const Variant &OsmlStreamParser::getCommandArguments() const { return commands.top().arguments; } + +bool OsmlStreamParser::inDefaultField() const +{ + return commands.top().inRangeField || commands.top().inDefaultField; +} } diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp index 1508012..bb5db65 100644 --- a/src/formats/osml/OsmlStreamParser.hpp +++ b/src/formats/osml/OsmlStreamParser.hpp @@ -152,10 +152,16 @@ public: */ bool inRangeField; + /** + * Set to true if we are currently in a field that has been especially + * marked as default field (using the "|") syntax. + */ + bool inDefaultField; + /** * Default constructor. */ - Command() : hasRange(false), inField(false), inRangeField(false) {} + Command() : hasRange(false), inField(false), inRangeField(false), inDefaultField() {} /** * Constructor of the Command class. @@ -168,16 +174,19 @@ public: * explicit range. * @param inField is set to true if we currently are inside a field * of this command. - * @param inRangeField is set to true if we currently inside the outer - * field of the command. + * @param inRangeField is set to true if we currently are inside the + * outer field of a ranged command. + * @param inDefaultField is set to true if we currently are in a + * specially marked default field. */ Command(Variant name, Variant arguments, bool hasRange, bool inField, - bool inRangeField) + bool inRangeField, bool inDefaultField) : name(std::move(name)), arguments(std::move(arguments)), hasRange(hasRange), inField(inField), - inRangeField(inRangeField) + inRangeField(inRangeField), + inDefaultField(inDefaultField) { } }; @@ -289,6 +298,16 @@ private: */ bool checkIssueFieldStart(); + /** + * Closes a currently open field. Note that the command will be removed from + * the internal command stack if the field that is being closed is a + * field marked as default field. + * + * @return true if the field could be closed, false if there was no field + * to close. + */ + bool closeField(); + public: /** * Constructor of the OsmlStreamParser class. Attaches the new @@ -317,7 +336,7 @@ public: * @return a reference at a variant containing the data parsed by the * "parse" function. */ - const Variant &getData() { return data; } + const Variant &getData() const { return data; } /** * Returns a reference at the internally stored command name. Only valid if @@ -326,7 +345,7 @@ public: * @return a reference at a variant containing name and location of the * parsed command. */ - const Variant &getCommandName(); + const Variant &getCommandName() const; /** * Returns a reference at the internally stored command name. Only valid if @@ -335,14 +354,22 @@ public: * @return a reference at a variant containing arguments given to the * command. */ - const Variant &getCommandArguments(); + const Variant &getCommandArguments() const; + + /** + * Returns true if the current field is the "default" field. This is true if + * the parser either is in the outer range of a range command or inside a + * field that has been especially marked as "default" field (using the "|" + * syntax). + */ + bool inDefaultField() const; /** * Returns a reference at the char reader. * * @return the last internal token location. */ - SourceLocation &getLocation() { return location; } + const SourceLocation &getLocation() const { return location; } }; } diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index b944af8..da9fe8a 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -28,7 +28,88 @@ namespace ousia { static TerminalLogger logger(std::cerr, true); -//static ConcreteLogger logger; +// static ConcreteLogger logger; + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + EXPECT_EQ(name, reader.getCommandName().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + assertCommand(reader, name, start, end); + EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertData(OsmlStreamParser &reader, const std::string &data, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + EXPECT_EQ(data, reader.getData().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getData().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getData().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldStart(OsmlStreamParser &reader, bool defaultField, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); + EXPECT_EQ(defaultField, reader.inDefaultField()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} TEST(OsmlStreamParser, empty) { @@ -47,12 +128,7 @@ TEST(OsmlStreamParser, oneCharacter) OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); + assertData(reader, "a", 0, 1); } TEST(OsmlStreamParser, whitespaceElimination) @@ -64,12 +140,7 @@ TEST(OsmlStreamParser, whitespaceElimination) OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(14U, loc.getEnd()); + assertData(reader, "hello world", 1, 14); } TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) @@ -81,13 +152,7 @@ TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(14U, loc.getEnd()); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertData(reader, "hello world", 1, 14); } TEST(OsmlStreamParser, escapeWhitespace) @@ -99,13 +164,7 @@ TEST(OsmlStreamParser, escapeWhitespace) OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(15U, loc.getEnd()); - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + assertData(reader, "hello world", 1, 15); } static void testEscapeSpecialCharacter(const std::string &c) @@ -127,6 +186,7 @@ TEST(OsmlStreamParser, escapeSpecialCharacters) testEscapeSpecialCharacter("}"); testEscapeSpecialCharacter("<"); testEscapeSpecialCharacter(">"); + testEscapeSpecialCharacter("|"); } TEST(OsmlStreamParser, simpleSingleLineComment) @@ -347,86 +407,6 @@ TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -static void assertCommand(OsmlStreamParser &reader, const std::string &name, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); - EXPECT_EQ(name, reader.getCommandName().asString()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertCommand(OsmlStreamParser &reader, const std::string &name, - const Variant::mapType &args, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - assertCommand(reader, name, start, end); - EXPECT_EQ(args, reader.getCommandArguments()); -} - -static void assertData(OsmlStreamParser &reader, const std::string &data, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); - EXPECT_EQ(data, reader.getData().asString()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getData().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getData().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertFieldStart(OsmlStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertFieldEnd(OsmlStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertEnd(OsmlStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - TEST(OsmlStreamParser, fields) { const char *testString = "\\test{a}{b}{c}"; @@ -436,15 +416,15 @@ TEST(OsmlStreamParser, fields) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); assertData(reader, "b", 9, 10); assertFieldEnd(reader, 10, 11); - assertFieldStart(reader, 11, 12); + assertFieldStart(reader, false, 11, 12); assertData(reader, "c", 12, 13); assertFieldEnd(reader, 13, 14); assertEnd(reader, 14, 14); @@ -459,11 +439,11 @@ TEST(OsmlStreamParser, dataOutsideField) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); assertData(reader, "b", 9, 10); assertFieldEnd(reader, 10, 11); @@ -481,14 +461,14 @@ TEST(OsmlStreamParser, nestedCommand) assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); { assertCommand(reader, "test2", 9, 15); - assertFieldStart(reader, 15, 16); + assertFieldStart(reader, false, 15, 16); assertData(reader, "b", 16, 17); assertFieldEnd(reader, 17, 18); } @@ -507,10 +487,10 @@ TEST(OsmlStreamParser, nestedCommandImmediateEnd) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); { assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertData(reader, "b", 13, 14); assertFieldEnd(reader, 14, 15); } @@ -527,7 +507,7 @@ TEST(OsmlStreamParser, nestedCommandNoData) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); assertFieldEnd(reader, 12, 13); assertEnd(reader, 13, 13); @@ -557,11 +537,11 @@ TEST(OsmlStreamParser, fieldsWithSpaces) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, 3, 4); + assertFieldStart(reader, false, 3, 4); assertCommand(reader, "b", 4, 6); assertCommand(reader, "c", 7, 9); assertFieldEnd(reader, 9, 10); - assertFieldStart(reader, 16, 17); + assertFieldStart(reader, false, 16, 17); assertCommand(reader, "d", 17, 19); assertFieldEnd(reader, 19, 20); assertEnd(reader, 20, 20); @@ -612,9 +592,9 @@ TEST(OsmlStreamParser, errorNoFieldEndNested) logger.reset(); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertFieldEnd(reader, 13, 14); assertFieldEnd(reader, 14, 15); ASSERT_FALSE(logger.hasError()); @@ -633,9 +613,9 @@ TEST(OsmlStreamParser, errorNoFieldEndNestedData) logger.reset(); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertFieldEnd(reader, 13, 14); assertFieldEnd(reader, 14, 15); assertData(reader, "a", 15, 16); @@ -654,7 +634,7 @@ TEST(OsmlStreamParser, beginEnd) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertFieldEnd(reader, 17, 21); assertEnd(reader, 22, 22); } @@ -669,7 +649,7 @@ TEST(OsmlStreamParser, beginEndWithName) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", {{"name", "a"}}, 7, 11); - assertFieldStart(reader, 14, 15); + assertFieldStart(reader, true, 14, 15); assertFieldEnd(reader, 19, 23); assertEnd(reader, 24, 24); } @@ -685,7 +665,7 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgs) assertCommand(reader, "book", {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, 32, 33); + assertFieldStart(reader, true, 32, 33); assertFieldEnd(reader, 37, 41); assertEnd(reader, 42, 42); } @@ -702,17 +682,17 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields) assertCommand(reader, "book", {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, 32, 33); + assertFieldStart(reader, false, 32, 33); assertData(reader, "a", 33, 34); assertCommand(reader, "test", Variant::mapType{}, 35, 40); assertFieldEnd(reader, 40, 41); - assertFieldStart(reader, 41, 42); + assertFieldStart(reader, false, 41, 42); assertData(reader, "b", 42, 43); assertCommand(reader, "test", Variant::mapType{}, 44, 49); - assertFieldStart(reader, 49, 50); + assertFieldStart(reader, false, 49, 50); assertFieldEnd(reader, 50, 51); assertFieldEnd(reader, 51, 52); - assertFieldStart(reader, 52, 53); + assertFieldStart(reader, true, 52, 53); assertFieldEnd(reader, 57, 61); assertEnd(reader, 62, 62); } @@ -727,12 +707,45 @@ TEST(OsmlStreamParser, beginEndWithData) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertData(reader, "a", 12, 13); assertFieldEnd(reader, 18, 22); assertEnd(reader, 23, 23); } +TEST(OsmlStreamParser, beginEndNested) +{ + const char *testString = + "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}"; + // 012345678901234 5678901234567890 123456 7890123 4567890 + // 0 1 2 3 4 5 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 7, 8); + assertFieldStart(reader, false, 9, 10); + assertData(reader, "b", 10, 11); + assertFieldEnd(reader, 11, 12); + assertFieldStart(reader, true, 13, 14); + assertData(reader, "c", 13, 14); + assertCommand(reader, "d", 22, 23); + assertFieldStart(reader, false, 24, 25); + assertData(reader, "e", 25, 26); + assertFieldEnd(reader, 26, 27); + assertFieldStart(reader, false, 27, 28); + assertData(reader, "f", 28, 29); + assertFieldEnd(reader, 29, 30); + assertFieldStart(reader, true, 31, 32); + assertCommand(reader, "g", 31, 33); + assertFieldStart(reader, false, 33, 34); + assertData(reader, "h", 34, 35); + assertFieldEnd(reader, 35, 36); + assertFieldEnd(reader, 42, 43); + assertFieldEnd(reader, 49, 50); + assertEnd(reader, 51, 51); +} + TEST(OsmlStreamParser, beginEndWithCommand) { const char *testString = "\\begin{book}\\a{test}\\end{book}"; @@ -743,9 +756,9 @@ TEST(OsmlStreamParser, beginEndWithCommand) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertCommand(reader, "a", 12, 14); - assertFieldStart(reader, 14, 15); + assertFieldStart(reader, false, 14, 15); assertData(reader, "test", 15, 19); assertFieldEnd(reader, 19, 20); assertFieldEnd(reader, 25, 29); @@ -873,9 +886,9 @@ TEST(OsmlStreamParser, errorBeginEndMismatch) logger.reset(); assertCommand(reader, "a", 7, 8); - assertFieldStart(reader, 10, 11); + assertFieldStart(reader, true, 10, 11); assertCommand(reader, "b", 17, 18); - assertFieldStart(reader, 20, 24); + assertFieldStart(reader, true, 20, 24); assertData(reader, "test", 20, 24); ASSERT_FALSE(logger.hasError()); ASSERT_THROW(reader.parse(), LoggableException); @@ -904,7 +917,7 @@ TEST(OsmlStreamParser, beginEndWithNSSep) OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test1:test2", 7, 18); - assertFieldStart(reader, 19, 20); + assertFieldStart(reader, true, 19, 20); assertFieldEnd(reader, 24, 35); assertEnd(reader, 36, 36); } @@ -920,7 +933,7 @@ TEST(OsmlStreamParser, errorBeginNSSep) ASSERT_FALSE(logger.hasError()); assertCommand(reader, "blub"); ASSERT_TRUE(logger.hasError()); - assertFieldStart(reader); + assertFieldStart(reader, true); assertFieldEnd(reader); assertEnd(reader); } @@ -934,7 +947,7 @@ TEST(OsmlStreamParser, errorEndNSSep) logger.reset(); assertCommand(reader, "blub"); - assertFieldStart(reader); + assertFieldStart(reader, true); ASSERT_FALSE(logger.hasError()); assertFieldEnd(reader); ASSERT_TRUE(logger.hasError()); @@ -970,5 +983,54 @@ TEST(OsmlStreamParser, errorRepeatedNs) assertData(reader, "::"); assertEnd(reader); } + +TEST(OsmlStreamParser, explicitDefaultField) +{ + const char *testString = "\\a{!b}c"; + // 01234567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertData(reader, "b", 4, 5); + assertFieldEnd(reader, 5, 6); + assertData(reader, "c", 6, 7); + assertEnd(reader, 7, 7); +} + +TEST(OsmlStreamParser, explicitDefaultFieldWithCommand) +{ + const char *testString = "\\a{!\\b}c"; + // 0123 4567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertCommand(reader, "b", 4, 6); + assertFieldEnd(reader, 6, 7); + assertData(reader, "c", 7, 8); + assertEnd(reader, 8, 8); +} + +TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField) +{ + const char *testString = "\\a{!\\b}{c}"; + // 0123 4567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertCommand(reader, "b", 4, 6); + assertFieldEnd(reader, 6, 7); + assertData(reader, "c", 7, 8); + assertEnd(reader, 8, 8); +} + } -- cgit v1.2.3 From 93780dde6d52d72961ffc322b16e1fcc9575e85e Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:09:45 +0100 Subject: Updated CMakeLists.txt --- CMakeLists.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index d311f7a..69c3fd3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,10 +150,10 @@ ADD_LIBRARY(ousia_core src/core/parser/Parser src/core/parser/ParserContext src/core/parser/ParserScope -# src/core/parser/generic/ParserState -# src/core/parser/generic/ParserStateCallbacks -# src/core/parser/generic/ParserStateHandler -# src/core/parser/generic/ParserStateStack + src/core/parser/stack/Callbacks + src/core/parser/stack/Handler + src/core/parser/stack/State + src/core/parser/stack/Stack src/core/parser/utils/Tokenizer src/core/parser/utils/TokenTrie src/core/resource/Resource @@ -274,8 +274,8 @@ IF(TEST) test/core/model/StyleTest test/core/model/TypesystemTest test/core/parser/ParserScopeTest -# test/core/parser/ParserStackTest -# test/core/parser/ParserStateTest +# test/core/parser/stack/StackTest + test/core/parser/stack/StateTest test/core/parser/utils/TokenizerTest test/core/parser/utils/TokenTrieTest test/core/resource/ResourceLocatorTest -- cgit v1.2.3 From be1ff95016b033c6017cd79b21bae87cc9d885df Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 22:43:20 +0100 Subject: Included the stack test --- CMakeLists.txt | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index 69c3fd3..4f9a8d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,18 @@ FIND_PACKAGE(Boost COMPONENTS system filesystem program_options REQUIRED) # Set utf8cpp include path SET(UTF8CPP_INCLUDE_DIR "lib/utf8cpp") +################################################################################ +# Check the gcc version # +################################################################################ + +IF(CMAKE_COMPILER_IS_GNUCC) + EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} -dumpversion + OUTPUT_VARIABLE GCC_VERSION) + IF(GCC_VERSION VERSION_LESS 4.8) + ERROR("This projects requires at last GCC 4.8 or newer to be built") + ENDIF() +ENDIF() + ################################################################################ # Inclusion of doxygen # ################################################################################ @@ -274,7 +286,7 @@ IF(TEST) test/core/model/StyleTest test/core/model/TypesystemTest test/core/parser/ParserScopeTest -# test/core/parser/stack/StackTest + test/core/parser/stack/StackTest test/core/parser/stack/StateTest test/core/parser/utils/TokenizerTest test/core/parser/utils/TokenTrieTest -- cgit v1.2.3 From 551b7be64f207845cb05b8ec593f9bf2d7f0c940 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:25:38 +0100 Subject: Included handler classes from master --- CMakeLists.txt | 4 + src/core/parser/stack/DocumentHandler.cpp | 252 ++++++++++++++++++++++ src/core/parser/stack/DocumentHandler.hpp | 88 ++++++++ src/core/parser/stack/DomainHandler.cpp | 276 +++++++++++++++++++++++++ src/core/parser/stack/DomainHandler.hpp | 200 ++++++++++++++++++ src/core/parser/stack/Handler.cpp | 2 +- src/core/parser/stack/ImportIncludeHandler.cpp | 96 +++++++++ src/core/parser/stack/ImportIncludeHandler.hpp | 90 ++++++++ src/core/parser/stack/TypesystemHandler.cpp | 175 ++++++++++++++++ src/core/parser/stack/TypesystemHandler.hpp | 121 +++++++++++ 10 files changed, 1303 insertions(+), 1 deletion(-) create mode 100644 src/core/parser/stack/DocumentHandler.cpp create mode 100644 src/core/parser/stack/DocumentHandler.hpp create mode 100644 src/core/parser/stack/DomainHandler.cpp create mode 100644 src/core/parser/stack/DomainHandler.hpp create mode 100644 src/core/parser/stack/ImportIncludeHandler.cpp create mode 100644 src/core/parser/stack/ImportIncludeHandler.hpp create mode 100644 src/core/parser/stack/TypesystemHandler.cpp create mode 100644 src/core/parser/stack/TypesystemHandler.hpp (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f9a8d3..4a3db32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -163,9 +163,13 @@ ADD_LIBRARY(ousia_core src/core/parser/ParserContext src/core/parser/ParserScope src/core/parser/stack/Callbacks + src/core/parser/stack/DocumentHandler + src/core/parser/stack/DomainHandler src/core/parser/stack/Handler + src/core/parser/stack/ImportIncludeHandler src/core/parser/stack/State src/core/parser/stack/Stack + src/core/parser/stack/TypesystemHandler src/core/parser/utils/Tokenizer src/core/parser/utils/TokenTrie src/core/resource/Resource diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp new file mode 100644 index 0000000..ba7430d --- /dev/null +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -0,0 +1,252 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "DocumentHandler.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +namespace ousia { + +/* DocumentHandler */ + +void DocumentHandler::start(Variant::mapType &args) +{ + Rooted document = + project()->createDocument(args["name"].asString()); + document->setLocation(location()); + scope().push(document); + scope().setFlag(ParserFlag::POST_HEAD, false); +} + +void DocumentHandler::end() { scope().pop(); } + +/* DocumentChildHandler */ + +void DocumentChildHandler::preamble(Handle parentNode, + std::string &fieldName, + DocumentEntity *&parent, bool &inField) +{ + // check if the parent in the structure tree was an explicit field + // reference. + inField = parentNode->isa(&RttiTypes::DocumentField); + if (inField) { + fieldName = parentNode->getName(); + parentNode = scope().selectOrThrow( + {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity}); + } else { + // if it wasn't an explicit reference, we use the default field. + fieldName = DEFAULT_FIELD_NAME; + } + // reference the parent entity explicitly. + parent = nullptr; + if (parentNode->isa(&RttiTypes::StructuredEntity)) { + parent = static_cast( + parentNode.cast().get()); + } else if (parentNode->isa(&RttiTypes::AnnotationEntity)) { + parent = static_cast( + parentNode.cast().get()); + } +} + +void DocumentChildHandler::createPath(const NodeVector &path, + DocumentEntity *&parent) +{ + size_t S = path.size(); + for (size_t p = 1; p < S; p = p + 2) { + parent = static_cast( + parent->createChildStructuredEntity( + path[p].cast(), Variant::mapType{}, + path[p - 1]->getName(), "").get()); + } +} + +void DocumentChildHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + Rooted parentNode = scope().selectOrThrow( + {&RttiTypes::Document, &RttiTypes::StructuredEntity, + &RttiTypes::AnnotationEntity, &RttiTypes::DocumentField}); + + std::string fieldName; + DocumentEntity *parent; + bool inField; + + preamble(parentNode, fieldName, parent, inField); + + // try to find a FieldDescriptor for the given tag if we are not in a + // field already. This does _not_ try to construct transparent paths + // in between. + if (!inField && parent != nullptr && + parent->getDescriptor()->hasField(name())) { + Rooted field{ + new DocumentField(parentNode->getManager(), fieldName, parentNode)}; + field->setLocation(location()); + scope().push(field); + return; + } + + // Otherwise create a new StructuredEntity + // TODO: Consider Anchors and AnnotationEntities + Rooted strct = + scope().resolve(Utils::split(name(), ':'), logger()); + if (strct == nullptr) { + // if we could not resolve the name, throw an exception. + throw LoggableException( + std::string("\"") + name() + "\" could not be resolved.", + location()); + } + + std::string name; + auto it = args.find("name"); + if (it != args.end()) { + name = it->second.asString(); + args.erase(it); + } + + Rooted entity; + if (parentNode->isa(&RttiTypes::Document)) { + entity = parentNode.cast()->createRootStructuredEntity( + strct, args, name); + } else { + // calculate a path if transparent entities are needed in between. + auto path = parent->getDescriptor()->pathTo(strct, logger()); + if (path.empty()) { + throw LoggableException( + std::string("An instance of \"") + strct->getName() + + "\" is not allowed as child of an instance of \"" + + parent->getDescriptor()->getName() + "\"", + location()); + } + + // create all transparent entities until the last field. + createPath(path, parent); + entity = + parent->createChildStructuredEntity(strct, args, fieldName, name); + } + entity->setLocation(location()); + scope().push(entity); +} + +void DocumentChildHandler::end() { scope().pop(); } + +std::pair DocumentChildHandler::convertData( + Handle field, Logger &logger, const std::string &data) +{ + // if the content is supposed to be of type string, we can finish + // directly. + auto vts = field->getPrimitiveType()->getVariantTypes(); + if (std::find(vts.begin(), vts.end(), VariantType::STRING) != vts.end()) { + return std::make_pair(true, Variant::fromString(data)); + } + + // then try to parse the content using the type specification. + auto res = field->getPrimitiveType()->read( + data, logger, location().getSourceId(), location().getStart()); + return res; +} + +void DocumentChildHandler::data(const std::string &data, int fieldIdx) +{ + Rooted parentNode = scope().selectOrThrow( + {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity, + &RttiTypes::DocumentField}); + + std::string fieldName; + DocumentEntity *parent; + bool inField; + + preamble(parentNode, fieldName, parent, inField); + + Rooted desc = parent->getDescriptor(); + /* + * We distinguish two cases here: One for fields that are given. + */ + if (fieldName != DEFAULT_FIELD_NAME) { + // retrieve the actual FieldDescriptor + Rooted field = desc->getFieldDescriptor(fieldName); + if (field == nullptr) { + logger().error( + std::string("Can't handle data because no field with name \"") + + fieldName + "\" exists in descriptor\"" + desc->getName() + + "\".", + location()); + return; + } + // if it is not primitive at all, we can't parse the content. + if (!field->isPrimitive()) { + logger().error(std::string("Can't handle data because field \"") + + fieldName + "\" of descriptor \"" + + desc->getName() + "\" is not primitive!", + location()); + return; + } + // then try to parse the content using the type specification. + auto res = convertData(field, logger(), data); + // add it as primitive content. + if (res.first) { + parent->createChildDocumentPrimitive(res.second, fieldName); + } + } else { + /* + * The second case is for primitive fields. Here we search through + * all FieldDescriptors that allow primitive content at this point + * and could be constructed via transparent intermediate entities. + * We then try to parse the data using the type specified by the + * respective field. If that does not work we proceed to the next + * possible field. + */ + // retrieve all fields. + NodeVector fields = desc->getDefaultFields(); + std::vector forks; + for (auto field : fields) { + // then try to parse the content using the type specification. + forks.emplace_back(logger().fork()); + auto res = convertData(field, forks.back(), data); + if (res.first) { + forks.back().commit(); + // if that worked, construct the necessary path. + auto pathRes = desc->pathTo(field, logger()); + assert(pathRes.second); + NodeVector path = pathRes.first; + createPath(path, parent); + // then create the primitive element. + parent->createChildDocumentPrimitive(res.second, fieldName); + return; + } + } + logger().error("Could not read data with any of the possible fields:"); + for (size_t f = 0; f < fields.size(); f++) { + logger().note(Utils::join(fields[f]->path(), ".") + ":", + SourceLocation{}, MessageMode::NO_CONTEXT); + forks[f].commit(); + } + } +} + +namespace RttiTypes { +const Rtti DocumentField = + RttiBuilder("DocumentField").parent(&Node); +} +} diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp new file mode 100644 index 0000000..475fe69 --- /dev/null +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -0,0 +1,88 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file DocumentHandler.hpp + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_DOCUMENT_HANDLER_HPP_ +#define _OUSIA_DOCUMENT_HANDLER_HPP_ + +#include + +#include "Handler.hpp" + +namespace ousia { + +// Forward declarations +class Rtti; +class DocumentEntity; +class FieldDescriptor; + +class DocumentHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DocumentHandler{handlerData}; + } +}; + +class DocumentField : public Node { +public: + using Node::Node; +}; + +class DocumentChildHandler : public StaticHandler { +private: + void preamble(Handle parentNode, std::string &fieldName, + DocumentEntity *&parent, bool &inField); + + void createPath(const NodeVector &path, DocumentEntity *&parent); + + std::pair convertData(Handle field, + Logger &logger, + const std::string &data); + +public: + using Handler::Handler; + + bool start(Variant::mapType &args) override; + + void end() override; + + bool data(const Variant &data) override; + + static Handler *create(const HandlerData &handlerData) + { + return new DocumentChildHandler{handlerData}; + } +}; + +namespace RttiTypes { +extern const Rtti DocumentField; +} +} +#endif diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp new file mode 100644 index 0000000..6571717 --- /dev/null +++ b/src/core/parser/stack/DomainHandler.cpp @@ -0,0 +1,276 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "DomainHandler.hpp" + +#include +#include +#include + +namespace ousia { + +/* DomainHandler */ + +void DomainHandler::start(Variant::mapType &args) +{ + Rooted domain = project()->createDomain(args["name"].asString()); + domain->setLocation(location()); + + scope().push(domain); +} + +void DomainHandler::end() { scope().pop(); } + +/* DomainStructHandler */ + +void DomainStructHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + + Rooted domain = scope().selectOrThrow(); + + Rooted structuredClass = domain->createStructuredClass( + args["name"].asString(), args["cardinality"].asCardinality(), nullptr, + args["transparent"].asBool(), args["isRoot"].asBool()); + structuredClass->setLocation(location()); + + const std::string &isa = args["isa"].asString(); + if (!isa.empty()) { + scope().resolve( + isa, structuredClass, logger(), + [](Handle superclass, Handle structuredClass, + Logger &logger) { + if (superclass != nullptr) { + structuredClass.cast()->setSuperclass( + superclass.cast(), logger); + } + }); + } + + scope().push(structuredClass); +} + +void DomainStructHandler::end() { scope().pop(); } + +/* DomainAnnotationHandler */ +void DomainAnnotationHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + + Rooted domain = scope().selectOrThrow(); + + Rooted annotationClass = + domain->createAnnotationClass(args["name"].asString()); + annotationClass->setLocation(location()); + + scope().push(annotationClass); +} + +void DomainAnnotationHandler::end() { scope().pop(); } + +/* DomainAttributesHandler */ + +void DomainAttributesHandler::start(Variant::mapType &args) +{ + // Fetch the current typesystem and create the struct node + Rooted parent = scope().selectOrThrow(); + + Rooted attrDesc = parent->getAttributesDescriptor(); + attrDesc->setLocation(location()); + + scope().push(attrDesc); +} + +void DomainAttributesHandler::end() { scope().pop(); } + +/* DomainFieldHandler */ + +void DomainFieldHandler::start(Variant::mapType &args) +{ + FieldDescriptor::FieldType type; + if (args["isSubtree"].asBool()) { + type = FieldDescriptor::FieldType::SUBTREE; + } else { + type = FieldDescriptor::FieldType::TREE; + } + + Rooted parent = scope().selectOrThrow(); + + Rooted field = parent->createFieldDescriptor( + logger(), type, args["name"].asString(), args["optional"].asBool()); + field->setLocation(location()); + + scope().push(field); +} + +void DomainFieldHandler::end() { scope().pop(); } + +/* DomainFieldRefHandler */ + +void DomainFieldRefHandler::start(Variant::mapType &args) +{ + Rooted parent = scope().selectOrThrow(); + + const std::string &name = args["ref"].asString(); + scope().resolveFieldDescriptor( + name, parent, logger(), + [](Handle field, Handle parent, Logger &logger) { + if (field != nullptr) { + parent.cast()->addFieldDescriptor( + field.cast(), logger); + } + }); +} + +void DomainFieldRefHandler::end() {} + +/* DomainPrimitiveHandler */ + +void DomainPrimitiveHandler::start(Variant::mapType &args) +{ + Rooted parent = scope().selectOrThrow(); + + FieldDescriptor::FieldType fieldType; + if (args["isSubtree"].asBool()) { + fieldType = FieldDescriptor::FieldType::SUBTREE; + } else { + fieldType = FieldDescriptor::FieldType::TREE; + } + + Rooted field = parent->createPrimitiveFieldDescriptor( + new UnknownType(manager()), logger(), fieldType, + args["name"].asString(), args["optional"].asBool()); + field->setLocation(location()); + + const std::string &type = args["type"].asString(); + scope().resolve(type, field, logger(), + [](Handle type, Handle field, + Logger &logger) { + if (type != nullptr) { + field.cast()->setPrimitiveType(type.cast()); + } + }); + + scope().push(field); +} + +void DomainPrimitiveHandler::end() { scope().pop(); } + +/* DomainChildHandler */ + +void DomainChildHandler::start(Variant::mapType &args) +{ + Rooted field = scope().selectOrThrow(); + + const std::string &ref = args["ref"].asString(); + scope().resolve( + ref, field, logger(), + [](Handle child, Handle field, Logger &logger) { + if (child != nullptr) { + field.cast()->addChild( + child.cast()); + } + }); +} + +void DomainChildHandler::end() {} + +/* DomainParentHandler */ + +void DomainParentHandler::start(Variant::mapType &args) +{ + Rooted strct = scope().selectOrThrow(); + + Rooted parent{ + new DomainParent(strct->getManager(), args["ref"].asString(), strct)}; + parent->setLocation(location()); + scope().push(parent); +} + +void DomainParentHandler::end() { scope().pop(); } + +/* DomainParentFieldHandler */ +void DomainParentFieldHandler::start(Variant::mapType &args) +{ + Rooted parentNameNode = scope().selectOrThrow(); + FieldDescriptor::FieldType type; + if (args["isSubtree"].asBool()) { + type = FieldDescriptor::FieldType::SUBTREE; + } else { + type = FieldDescriptor::FieldType::TREE; + } + + const std::string &name = args["name"].asString(); + const bool optional = args["optional"].asBool(); + Rooted strct = + parentNameNode->getParent().cast(); + + // resolve the parent, create the declared field and add the declared + // StructuredClass as child to it. + scope().resolve( + parentNameNode->getName(), strct, logger(), + [type, name, optional](Handle parent, Handle strct, + Logger &logger) { + if (parent != nullptr) { + Rooted field = + parent.cast()->createFieldDescriptor( + logger, type, name, optional); + field->addChild(strct.cast()); + } + }); +} + +void DomainParentFieldHandler::end() {} + +/* DomainParentFieldRefHandler */ + +void DomainParentFieldRefHandler::start(Variant::mapType &args) +{ + Rooted parentNameNode = scope().selectOrThrow(); + + const std::string &name = args["ref"].asString(); + Rooted strct = + parentNameNode->getParent().cast(); + auto loc = location(); + + // resolve the parent, get the referenced field and add the declared + // StructuredClass as child to it. + scope().resolve( + parentNameNode->getName(), strct, logger(), + [name, loc](Handle parent, Handle strct, Logger &logger) { + if (parent != nullptr) { + Rooted field = + parent.cast()->getFieldDescriptor(name); + if (field == nullptr) { + logger.error( + std::string("Could not find referenced field ") + name, + loc); + return; + } + field->addChild(strct.cast()); + } + }); +} + +void DomainParentFieldRefHandler::end() {} + +namespace RttiTypes { +const Rtti DomainParent = + RttiBuilder("DomainParent").parent(&Node); +} +} diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp new file mode 100644 index 0000000..5e8ea60 --- /dev/null +++ b/src/core/parser/stack/DomainHandler.hpp @@ -0,0 +1,200 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file DomainHandler.hpp + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_DOMAIN_HANDLER_HPP_ +#define _OUSIA_DOMAIN_HANDLER_HPP_ + +#include + +#include "Handler.hpp" + +namespace ousia { + +// Forward declarations +class Rtti; + +class DomainHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainHandler{handlerData}; + } +}; + +class DomainStructHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainStructHandler{handlerData}; + } +}; + +class DomainAnnotationHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainAnnotationHandler{handlerData}; + } +}; + +class DomainAttributesHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainAttributesHandler{handlerData}; + } +}; + +class DomainFieldHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainFieldHandler{handlerData}; + } +}; + +class DomainFieldRefHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainFieldRefHandler{handlerData}; + } +}; + +class DomainPrimitiveHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainPrimitiveHandler{handlerData}; + } +}; + +class DomainChildHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainChildHandler{handlerData}; + } +}; + +class DomainParent : public Node { +public: + using Node::Node; +}; + +namespace RttiTypes { +extern const Rtti DomainParent; +} + +class DomainParentHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainParentHandler{handlerData}; + } +}; + +class DomainParentFieldHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainParentFieldHandler{handlerData}; + } +}; + +class DomainParentFieldRefHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainParentFieldRefHandler{handlerData}; + } +}; +} +#endif diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index 54dfe3e..a608f7f 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -184,7 +184,7 @@ bool StaticHandler::annotationEnd(const Variant &className, bool StaticHandler::data(const Variant &data) { - // No data supported + logger().error("Did not expect any data here", data); return false; } diff --git a/src/core/parser/stack/ImportIncludeHandler.cpp b/src/core/parser/stack/ImportIncludeHandler.cpp new file mode 100644 index 0000000..94ee82d --- /dev/null +++ b/src/core/parser/stack/ImportIncludeHandler.cpp @@ -0,0 +1,96 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "ImportIncludeHandler.hpp" + +#include + +namespace ousia { + +/* ImportIncludeHandler */ + +void ImportIncludeHandler::start(Variant::mapType &args) +{ + rel = args["rel"].asString(); + type = args["type"].asString(); + src = args["src"].asString(); + srcInArgs = !src.empty(); +} + +void ImportIncludeHandler::data(const std::string &data, int field) +{ + if (srcInArgs) { + logger().error("\"src\" attribute has already been set"); + return; + } + if (field != 0) { + logger().error("Command has only one field."); + return; + } + src.append(data); +} + +/* ImportHandler */ + +void ImportHandler::start(Variant::mapType &args) +{ + ImportIncludeHandler::start(args); + + // Make sure imports are still possible + if (scope().getFlag(ParserFlag::POST_HEAD)) { + logger().error("Imports must be listed before other commands.", + location()); + return; + } +} + +void ImportHandler::end() +{ + // Fetch the last node and check whether an import is valid at this + // position + Rooted leaf = scope().getLeaf(); + if (leaf == nullptr || !leaf->isa(&RttiTypes::RootNode)) { + logger().error( + "Import not supported here, must be inside a document, domain " + "or typesystem command.", + location()); + return; + } + Rooted leafRootNode = leaf.cast(); + + // Perform the actual import, register the imported node within the leaf + // node + Rooted imported = + context().import(src, type, rel, leafRootNode->getReferenceTypes()); + if (imported != nullptr) { + leafRootNode->reference(imported); + } +} + +/* IncludeHandler */ + +void IncludeHandler::start(Variant::mapType &args) +{ + ImportIncludeHandler::start(args); +} + +void IncludeHandler::end() +{ + context().include(src, type, rel, {&RttiTypes::Node}); +} +} diff --git a/src/core/parser/stack/ImportIncludeHandler.hpp b/src/core/parser/stack/ImportIncludeHandler.hpp new file mode 100644 index 0000000..f9abe55 --- /dev/null +++ b/src/core/parser/stack/ImportIncludeHandler.hpp @@ -0,0 +1,90 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file ImportIncludeHandler.hpp + * + * Contains the conceptually similar handlers for the "include" and "import" + * commands. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_IMPORT_INCLUDE_HANDLER_HPP_ +#define _OUSIA_IMPORT_INCLUDE_HANDLER_HPP_ + +#include +#include + +namespace ousia { + +/** + * The ImportHandler is responsible for handling the "import" command. An import + * creates a reference to a specified file. The specified file is parsed (if + * this has not already been done) outside of the context of the current file. + * If the specified resource has already been parsed, a reference to the already + * parsed file is inserted. Imports are only possible before no other content + * has been parsed. + */ +class ImportHandler : public StaticFieldHandler { +public: + using StaticFieldHandler::StaticFieldHandler; + + void doHandle(const Variant &fieldData, + const Variant::mapType &args) override; + + /** + * Creates a new instance of the ImportHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ + static Handler *create(const HandlerData &handlerData) + { + return new ImportHandler{handlerData}; + } +}; + +/** + * The IncludeHandler is responsible for handling the "include" command. The + * included file is parsed in the context of the current file and will change + * the content that is currently being parsed. Includes are possible at (almost) + * any position in the source file. + */ +class IncludeHandler : public StaticFieldHandler { +public: + using StaticFieldHandler::StaticFieldHandler; + + void doHandle(const Variant &fieldData, + const Variant::mapType &args) override; + + /** + * Creates a new instance of the IncludeHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ + static Handler *create(const HandlerData &handlerData) + { + return new IncludeHandler{handlerData}; + } +}; +} +#endif diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp new file mode 100644 index 0000000..2cc7dfb --- /dev/null +++ b/src/core/parser/stack/TypesystemHandler.cpp @@ -0,0 +1,175 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "TypesystemHandler.hpp" + +#include +#include + +namespace ousia { + +/* TypesystemHandler */ + +void TypesystemHandler::start(Variant::mapType &args) +{ + // Create the typesystem instance + Rooted typesystem = + project()->createTypesystem(args["name"].asString()); + typesystem->setLocation(location()); + + // Push the typesystem onto the scope, set the POST_HEAD flag to true + scope().push(typesystem); + scope().setFlag(ParserFlag::POST_HEAD, false); +} + +void TypesystemHandler::end() { scope().pop(); } + +/* TypesystemEnumHandler */ + +void TypesystemEnumHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + + // Fetch the current typesystem and create the enum node + Rooted typesystem = scope().selectOrThrow(); + Rooted enumType = + typesystem->createEnumType(args["name"].asString()); + enumType->setLocation(location()); + + scope().push(enumType); +} + +void TypesystemEnumHandler::end() { scope().pop(); } + +/* TypesystemEnumEntryHandler */ + +void TypesystemEnumEntryHandler::start(Variant::mapType &args) {} + +void TypesystemEnumEntryHandler::end() +{ + Rooted enumType = scope().selectOrThrow(); + enumType->addEntry(entry, logger()); +} + +void TypesystemEnumEntryHandler::data(const std::string &data, int field) +{ + if (field != 0) { + // TODO: This should be stored in the HandlerData + logger().error("Enum entry only has one field."); + return; + } + entry.append(data); +} + +/* TypesystemStructHandler */ + +void TypesystemStructHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + + // Fetch the arguments used for creating this type + const std::string &name = args["name"].asString(); + const std::string &parent = args["parent"].asString(); + + // Fetch the current typesystem and create the struct node + Rooted typesystem = scope().selectOrThrow(); + Rooted structType = typesystem->createStructType(name); + structType->setLocation(location()); + + // Try to resolve the parent type and set it as parent structure + if (!parent.empty()) { + scope().resolve( + parent, structType, logger(), + [](Handle parent, Handle structType, Logger &logger) { + if (parent != nullptr) { + structType.cast()->setParentStructure( + parent.cast(), logger); + } + }); + } + scope().push(structType); +} + +void TypesystemStructHandler::end() { scope().pop(); } + +/* TypesystemStructFieldHandler */ + +void TypesystemStructFieldHandler::start(Variant::mapType &args) +{ + // Read the argument values + const std::string &name = args["name"].asString(); + const std::string &type = args["type"].asString(); + const Variant &defaultValue = args["default"]; + const bool optional = + !(defaultValue.isObject() && defaultValue.asObject() == nullptr); + + Rooted structType = scope().selectOrThrow(); + Rooted attribute = + structType->createAttribute(name, defaultValue, optional, logger()); + attribute->setLocation(location()); + + // Try to resolve the type and default value + if (optional) { + scope().resolveTypeWithValue( + type, attribute, attribute->getDefaultValue(), logger(), + [](Handle type, Handle attribute, Logger &logger) { + if (type != nullptr) { + attribute.cast()->setType(type.cast(), + logger); + } + }); + } else { + scope().resolveType(type, attribute, logger(), + [](Handle type, Handle attribute, + Logger &logger) { + if (type != nullptr) { + attribute.cast()->setType(type.cast(), logger); + } + }); + } +} + +void TypesystemStructFieldHandler::end() {} + +/* TypesystemConstantHandler */ + +void TypesystemConstantHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + + // Read the argument values + const std::string &name = args["name"].asString(); + const std::string &type = args["type"].asString(); + const Variant &value = args["value"]; + + Rooted typesystem = scope().selectOrThrow(); + Rooted constant = typesystem->createConstant(name, value); + constant->setLocation(location()); + + // Try to resolve the type + scope().resolveTypeWithValue( + type, constant, constant->getValue(), logger(), + [](Handle type, Handle constant, Logger &logger) { + if (type != nullptr) { + constant.cast()->setType(type.cast(), logger); + } + }); +} + +void TypesystemConstantHandler::end() {} +} diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp new file mode 100644 index 0000000..76a7bc9 --- /dev/null +++ b/src/core/parser/stack/TypesystemHandler.hpp @@ -0,0 +1,121 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file TypesystemHandler.hpp + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_TYPESYSTEM_HANDLER_HPP_ +#define _OUSIA_TYPESYSTEM_HANDLER_HPP_ + +#include +#include + +namespace ousia { + +class TypesystemHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemHandler{handlerData}; + } +}; + +class TypesystemEnumHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemEnumHandler{handlerData}; + } +}; + +class TypesystemEnumEntryHandler : public Handler { +public: + using Handler::Handler; + + std::string entry; + + void start(Variant::mapType &args) override; + + void end() override; + + void data(const std::string &data, int field) override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemEnumEntryHandler{handlerData}; + } +}; + +class TypesystemStructHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemStructHandler{handlerData}; + } +}; + +class TypesystemStructFieldHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemStructFieldHandler{handlerData}; + } +}; + +class TypesystemConstantHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemConstantHandler{handlerData}; + } +}; +} +#endif -- cgit v1.2.3 From e2e32eef55406519c744002a404e7e5ca66b29a1 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 20:57:27 +0100 Subject: Declaring States in the Handler classes --- CMakeLists.txt | 1 + src/core/parser/stack/DocumentHandler.cpp | 21 +++++- src/core/parser/stack/DocumentHandler.hpp | 14 ++++ src/core/parser/stack/DomainHandler.cpp | 100 ++++++++++++++++++++++++- src/core/parser/stack/DomainHandler.hpp | 73 ++++++++++++++++-- src/core/parser/stack/GenericParserStates.cpp | 53 +++++++++++++ src/core/parser/stack/GenericParserStates.hpp | 49 ++++++++++++ src/core/parser/stack/ImportIncludeHandler.cpp | 24 +++++- src/core/parser/stack/ImportIncludeHandler.hpp | 13 ++++ src/core/parser/stack/TypesystemHandler.cpp | 48 +++++++++++- src/core/parser/stack/TypesystemHandler.hpp | 30 +++++++- 11 files changed, 409 insertions(+), 17 deletions(-) create mode 100644 src/core/parser/stack/GenericParserStates.cpp create mode 100644 src/core/parser/stack/GenericParserStates.hpp (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a3db32..2106cf0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,6 +165,7 @@ ADD_LIBRARY(ousia_core src/core/parser/stack/Callbacks src/core/parser/stack/DocumentHandler src/core/parser/stack/DomainHandler + src/core/parser/stack/GenericParserStates src/core/parser/stack/Handler src/core/parser/stack/ImportIncludeHandler src/core/parser/stack/State diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp index b28f0fb..9fedabb 100644 --- a/src/core/parser/stack/DocumentHandler.cpp +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -16,8 +16,6 @@ along with this program. If not, see . */ -#include "DocumentHandler.hpp" - #include #include @@ -30,6 +28,9 @@ #include #include +#include "DocumentHandler.hpp" +#include "State.hpp" + namespace ousia { namespace parser_stack { @@ -274,6 +275,22 @@ bool DocumentChildHandler::data(Variant &data) } return true; } + +namespace States { +const State Document = StateBuilder() + .parent(&None) + .createdNodeType(&RttiTypes::Document) + .elementHandler(DocumentHandler::create) + .arguments({Argument::String("name", "")}); + +const State DocumentChild = + StateBuilder() + .parents({&Document, &DocumentChild}) + .createdNodeTypes({&RttiTypes::StructureNode, + &RttiTypes::AnnotationEntity, + &RttiTypes::DocumentField}) + .elementHandler(DocumentChildHandler::create); +} } namespace RttiTypes { diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp index 7dc4c86..b339b96 100644 --- a/src/core/parser/stack/DocumentHandler.hpp +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -144,6 +144,19 @@ public: return new DocumentChildHandler{handlerData}; } }; + +namespace States { +/** + * State constant representing the "document" tag. + */ +extern const State Document; + +/** + * State contstant representing any user-defined element within a document. + */ +extern const State DocumentChild; +} + } namespace RttiTypes { @@ -152,6 +165,7 @@ namespace RttiTypes { */ extern const Rtti DocumentField; } + } #endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */ diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp index cb12543..24a6f1a 100644 --- a/src/core/parser/stack/DomainHandler.cpp +++ b/src/core/parser/stack/DomainHandler.cpp @@ -16,14 +16,18 @@ along with this program. If not, see . */ -#include "DomainHandler.hpp" - #include +#include #include #include #include #include +#include "DocumentHandler.hpp" +#include "DomainHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" + namespace ousia { namespace parser_stack { @@ -278,6 +282,98 @@ bool DomainParentFieldRefHandler::start(Variant::mapType &args) }); return true; } + +namespace States { +const State Domain = StateBuilder() + .parents({&None, &Document}) + .createdNodeType(&RttiTypes::Domain) + .elementHandler(DomainHandler::create) + .arguments({Argument::String("name")}); + +const State DomainStruct = + StateBuilder() + .parent(&Domain) + .createdNodeType(&RttiTypes::StructuredClass) + .elementHandler(DomainStructHandler::create) + .arguments({Argument::String("name"), + Argument::Cardinality("cardinality", Cardinality::any()), + Argument::Bool("isRoot", false), + Argument::Bool("transparent", false), + Argument::String("isa", "")}); + +const State DomainAnnotation = + StateBuilder() + .parent(&Domain) + .createdNodeType(&RttiTypes::AnnotationClass) + .elementHandler(DomainAnnotationHandler::create) + .arguments({Argument::String("name")}); + +const State DomainAttributes = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::StructType) + .elementHandler(DomainAttributesHandler::create) + .arguments({}); + +const State DomainAttribute = + StateBuilder() + .parent(&DomainAttributes) + .elementHandler(TypesystemStructFieldHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("default", Variant::fromObject(nullptr))}); + +const State DomainField = StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainFieldHandler::create) + .arguments({Argument::String("name", ""), + Argument::Bool("isSubtree", false), + Argument::Bool("optional", false)}); + +const State DomainFieldRef = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainFieldRefHandler::create) + .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); + +const State DomainStructPrimitive = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainPrimitiveHandler::create) + .arguments( + {Argument::String("name", ""), Argument::Bool("isSubtree", false), + Argument::Bool("optional", false), Argument::String("type")}); + +const State DomainStructChild = StateBuilder() + .parent(&DomainField) + .elementHandler(DomainChildHandler::create) + .arguments({Argument::String("ref")}); + +const State DomainStructParent = + StateBuilder() + .parent(&DomainStruct) + .createdNodeType(&RttiTypes::DomainParent) + .elementHandler(DomainParentHandler::create) + .arguments({Argument::String("ref")}); + +const State DomainStructParentField = + StateBuilder() + .parent(&DomainStructParent) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainParentFieldHandler::create) + .arguments({Argument::String("name", ""), + Argument::Bool("isSubtree", false), + Argument::Bool("optional", false)}); + +const State DomainStructParentFieldRef = + StateBuilder() + .parent(&DomainStructParent) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainParentFieldRefHandler::create) + .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); +} } namespace RttiTypes { diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp index 917d65d..76172d6 100644 --- a/src/core/parser/stack/DomainHandler.hpp +++ b/src/core/parser/stack/DomainHandler.hpp @@ -34,13 +34,14 @@ #include "Handler.hpp" namespace ousia { -namespace parser_stack { - -// TODO: Documentation // Forward declarations class Rtti; +namespace parser_stack { + +// TODO: Documentation + class DomainHandler : public StaticHandler { public: using StaticHandler::StaticHandler; @@ -149,10 +150,6 @@ public: using Node::Node; }; -namespace RttiTypes { -extern const Rtti DomainParent; -} - class DomainParentHandler : public StaticHandler { public: using StaticHandler::StaticHandler; @@ -189,6 +186,68 @@ public: return new DomainParentFieldRefHandler{handlerData}; } }; + +namespace States { +/** + * State representing a "domain" struct. + */ +extern const State Domain; + +/** + * State representing a "struct" tag within a domain description. + */ +extern const State DomainStruct; + +/** + * State representing an "annotation" tag within a domain description. + */ +extern const State DomainAnnotation; + +/** + * State representing an "attributes" tag within a structure or annotation. + */ +extern const State DomainAttributes; + +/** + * State representing an "attribute" tag within the "attributes". + */ +extern const State DomainAttribute; + +/** + * State representing a "field" tag within a structure or annotation. + */ +extern const State DomainField; + +/** + * State representing a "fieldref" tag within a structure or annotation. + */ +extern const State DomainFieldRef; + +/** + * State representing a "primitive" tag within a structure or annotation. + */ +extern const State DomainStructPrimitive; + +/** + * State representing a "child" tag within a structure or annotation. + */ +extern const State DomainStructChild; + +/** + * State representing a "parent" tag within a structure or annotation. + */ +extern const State DomainStructParent; + +/** + * State representing a "field" tag within a "parent" tag. + */ +extern const State DomainStructParentField; + +/** + * State representing a "fieldRef" tag within a "parent" tag. + */ +extern const State DomainStructParentFieldRef; +} } namespace RttiTypes { diff --git a/src/core/parser/stack/GenericParserStates.cpp b/src/core/parser/stack/GenericParserStates.cpp new file mode 100644 index 0000000..69a6e0e --- /dev/null +++ b/src/core/parser/stack/GenericParserStates.cpp @@ -0,0 +1,53 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "DocumentHandler.hpp" +#include "DomainHandler.hpp" +#include "GenericParserStates.hpp" +#include "ImportIncludeHandler.hpp" +#include "TypesystemHandler.hpp" + +namespace ousia { +namespace parser_stack { + +const std::multimap GenericParserStates{ + {"document", &States::Document}, + {"*", &States::DocumentChild}, + {"domain", &States::Domain}, + {"struct", &States::DomainStruct}, + {"annotation", &States::DomainAnnotation}, + {"attributes", &States::DomainAttributes}, + {"attribute", &States::DomainAttribute}, + {"field", &States::DomainField}, + {"fieldRef", &States::DomainFieldRef}, + {"primitive", &States::DomainStructPrimitive}, + {"childRef", &States::DomainStructChild}, + {"parentRef", &States::DomainStructParent}, + {"field", &States::DomainStructParentField}, + {"fieldRef", &States::DomainStructParentFieldRef}, + {"typesystem", &States::Typesystem}, + {"enum", &States::TypesystemEnum}, + {"entry", &States::TypesystemEnumEntry}, + {"struct", &States::TypesystemStruct}, + {"field", &States::TypesystemStructField}, + {"constant", &States::TypesystemConstant}, + {"import", &States::Import}, + {"include", &States::Include}}; +} +} + diff --git a/src/core/parser/stack/GenericParserStates.hpp b/src/core/parser/stack/GenericParserStates.hpp new file mode 100644 index 0000000..552eee5 --- /dev/null +++ b/src/core/parser/stack/GenericParserStates.hpp @@ -0,0 +1,49 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file GenericParserStates.hpp + * + * Contains a multimap which maps between tag/command names to the corresponding + * state descriptors. This multimap is used to initialize the push down + * automaton residing inside the "Stack" class. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ +#define _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ + +#include +#include + +namespace ousia { +namespace parser_stack { + +// Forward declarations +class State; + +/** + * Map between tagnames and references to the corresponding State instances. + */ +extern const std::multimap GenericParserStates; +} +} + +#endif /* _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ */ + diff --git a/src/core/parser/stack/ImportIncludeHandler.cpp b/src/core/parser/stack/ImportIncludeHandler.cpp index 797dd8d..d1ea97d 100644 --- a/src/core/parser/stack/ImportIncludeHandler.cpp +++ b/src/core/parser/stack/ImportIncludeHandler.cpp @@ -16,12 +16,16 @@ along with this program. If not, see . */ -#include "ImportIncludeHandler.hpp" - #include #include #include +#include "DomainHandler.hpp" +#include "DocumentHandler.hpp" +#include "ImportIncludeHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" + namespace ousia { namespace parser_stack { @@ -58,5 +62,21 @@ void IncludeHandler::doHandle(const Variant &fieldData, Variant::mapType &args) context().include(fieldData.asString(), args["type"].asString(), args["rel"].asString(), {&RttiTypes::Node}); } + +namespace States { +const State Import = + StateBuilder() + .parents({&Document, &Typesystem, &Domain}) + .elementHandler(ImportHandler::create) + .arguments({Argument::String("rel", ""), Argument::String("type", ""), + Argument::String("src", "")}); + +const State Include = + StateBuilder() + .parent(&All) + .elementHandler(IncludeHandler::create) + .arguments({Argument::String("rel", ""), Argument::String("type", ""), + Argument::String("src", "")}); +} } } diff --git a/src/core/parser/stack/ImportIncludeHandler.hpp b/src/core/parser/stack/ImportIncludeHandler.hpp index 8f3d3d0..6168639 100644 --- a/src/core/parser/stack/ImportIncludeHandler.hpp +++ b/src/core/parser/stack/ImportIncludeHandler.hpp @@ -88,6 +88,19 @@ public: return new IncludeHandler{handlerData, "src"}; } }; + +namespace States { +/** + * State representing the "import" command. + */ +extern const State Import; + +/** + * State representing the "include" command. + */ +extern const State Include; +} + } } #endif diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp index 34f64f9..d053699 100644 --- a/src/core/parser/stack/TypesystemHandler.cpp +++ b/src/core/parser/stack/TypesystemHandler.cpp @@ -16,12 +16,14 @@ along with this program. If not, see . */ -#include "TypesystemHandler.hpp" - #include +#include #include #include +#include "DomainHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" namespace ousia { namespace parser_stack { @@ -170,6 +172,48 @@ bool TypesystemConstantHandler::start(Variant::mapType &args) return true; } + +namespace States { +const State Typesystem = StateBuilder() + .parents({&None, &Domain}) + .createdNodeType(&RttiTypes::Typesystem) + .elementHandler(TypesystemHandler::create) + .arguments({Argument::String("name", "")}); + +const State TypesystemEnum = StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::EnumType) + .elementHandler(TypesystemEnumHandler::create) + .arguments({Argument::String("name")}); + +const State TypesystemEnumEntry = + StateBuilder() + .parent(&TypesystemEnum) + .elementHandler(TypesystemEnumEntryHandler::create) + .arguments({}); + +const State TypesystemStruct = + StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::StructType) + .elementHandler(TypesystemStructHandler::create) + .arguments({Argument::String("name"), Argument::String("parent", "")}); + +const State TypesystemStructField = + StateBuilder() + .parent(&TypesystemStruct) + .elementHandler(TypesystemStructFieldHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("default", Variant::fromObject(nullptr))}); + +const State TypesystemConstant = + StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::Constant) + .elementHandler(TypesystemConstantHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("value")}); +} } } diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp index 55277a1..85494f1 100644 --- a/src/core/parser/stack/TypesystemHandler.hpp +++ b/src/core/parser/stack/TypesystemHandler.hpp @@ -91,8 +91,7 @@ class TypesystemEnumEntryHandler : public StaticFieldHandler { public: using StaticFieldHandler::StaticFieldHandler; - void doHandle(const Variant &fieldData, - Variant::mapType &args) override; + void doHandle(const Variant &fieldData, Variant::mapType &args) override; /** * Creates a new instance of the TypesystemEnumEntryHandler. @@ -177,6 +176,33 @@ public: return new TypesystemConstantHandler{handlerData}; } }; + +namespace States { +/** + * State representing the "typesystem" tag. + */ +extern const State Typesystem; +/** + * State representing the "enum" tag within a typesystem. + */ +extern const State TypesystemEnum; +/** + * State representing the "entry" tag within an enum. + */ +extern const State TypesystemEnumEntry; +/** + * State representing the "struct" tag within a typesystem. + */ +extern const State TypesystemStruct; +/** + * State representing the "field" tag within a typesystem structure. + */ +extern const State TypesystemStructField; +/** + * State representing the "constant" tag within a typesystem. + */ +extern const State TypesystemConstant; +} } } #endif -- cgit v1.2.3 From c298f00ef1633a663775fe9a715a249b9f4d255d Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 20:58:26 +0100 Subject: Implemented OsxmlParser --- CMakeLists.txt | 2 + src/formats/osxml/OsxmlParser.cpp | 288 +++++++++------------------------ src/formats/osxml/OsxmlParser.hpp | 2 +- test/formats/osxml/OsxmlParserTest.cpp | 28 ++-- 4 files changed, 91 insertions(+), 229 deletions(-) (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index 2106cf0..ec1bb4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,6 +203,7 @@ TARGET_LINK_LIBRARIES(ousia_osml ADD_LIBRARY(ousia_osxml src/formats/osxml/OsxmlAttributeLocator src/formats/osxml/OsxmlEventParser + src/formats/osxml/OsxmlParser ) TARGET_LINK_LIBRARIES(ousia_osxml @@ -351,6 +352,7 @@ IF(TEST) ADD_EXECUTABLE(ousia_test_osxml test/formats/osxml/OsxmlEventParserTest + test/formats/osxml/OsxmlParserTest ) TARGET_LINK_LIBRARIES(ousia_test_osxml diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp index 869c76a..c216855 100644 --- a/src/formats/osxml/OsxmlParser.cpp +++ b/src/formats/osxml/OsxmlParser.cpp @@ -16,223 +16,83 @@ along with this program. If not, see . */ -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "XmlParser.hpp" +#include +#include +#include + +#include "OsxmlEventParser.hpp" +#include "OsxmlParser.hpp" namespace ousia { -namespace ParserStates { -/* Document states */ -static const ParserState Document = - ParserStateBuilder() - .parent(&None) - .createdNodeType(&RttiTypes::Document) - .elementHandler(DocumentHandler::create) - .arguments({Argument::String("name", "")}); - -static const ParserState DocumentChild = - ParserStateBuilder() - .parents({&Document, &DocumentChild}) - .createdNodeTypes({&RttiTypes::StructureNode, - &RttiTypes::AnnotationEntity, - &RttiTypes::DocumentField}) - .elementHandler(DocumentChildHandler::create); - -/* Domain states */ -static const ParserState Domain = ParserStateBuilder() - .parents({&None, &Document}) - .createdNodeType(&RttiTypes::Domain) - .elementHandler(DomainHandler::create) - .arguments({Argument::String("name")}); - -static const ParserState DomainStruct = - ParserStateBuilder() - .parent(&Domain) - .createdNodeType(&RttiTypes::StructuredClass) - .elementHandler(DomainStructHandler::create) - .arguments({Argument::String("name"), - Argument::Cardinality("cardinality", Cardinality::any()), - Argument::Bool("isRoot", false), - Argument::Bool("transparent", false), - Argument::String("isa", "")}); - -static const ParserState DomainAnnotation = - ParserStateBuilder() - .parent(&Domain) - .createdNodeType(&RttiTypes::AnnotationClass) - .elementHandler(DomainAnnotationHandler::create) - .arguments({Argument::String("name")}); - -static const ParserState DomainAttributes = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::StructType) - .elementHandler(DomainAttributesHandler::create) - .arguments({}); - -static const ParserState DomainAttribute = - ParserStateBuilder() - .parent(&DomainAttributes) - .elementHandler(TypesystemStructFieldHandler::create) - .arguments({Argument::String("name"), Argument::String("type"), - Argument::Any("default", Variant::fromObject(nullptr))}); - -static const ParserState DomainField = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainFieldHandler::create) - .arguments({Argument::String("name", ""), - Argument::Bool("isSubtree", false), - Argument::Bool("optional", false)}); - -static const ParserState DomainFieldRef = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainFieldRefHandler::create) - .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); - -static const ParserState DomainStructPrimitive = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainPrimitiveHandler::create) - .arguments( - {Argument::String("name", ""), Argument::Bool("isSubtree", false), - Argument::Bool("optional", false), Argument::String("type")}); - -static const ParserState DomainStructChild = - ParserStateBuilder() - .parent(&DomainField) - .elementHandler(DomainChildHandler::create) - .arguments({Argument::String("ref")}); - -static const ParserState DomainStructParent = - ParserStateBuilder() - .parent(&DomainStruct) - .createdNodeType(&RttiTypes::DomainParent) - .elementHandler(DomainParentHandler::create) - .arguments({Argument::String("ref")}); - -static const ParserState DomainStructParentField = - ParserStateBuilder() - .parent(&DomainStructParent) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainParentFieldHandler::create) - .arguments({Argument::String("name", ""), - Argument::Bool("isSubtree", false), - Argument::Bool("optional", false)}); - -static const ParserState DomainStructParentFieldRef = - ParserStateBuilder() - .parent(&DomainStructParent) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainParentFieldRefHandler::create) - .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); - -/* Typesystem states */ -static const ParserState Typesystem = - ParserStateBuilder() - .parents({&None, &Domain}) - .createdNodeType(&RttiTypes::Typesystem) - .elementHandler(TypesystemHandler::create) - .arguments({Argument::String("name", "")}); - -static const ParserState TypesystemEnum = - ParserStateBuilder() - .parent(&Typesystem) - .createdNodeType(&RttiTypes::EnumType) - .elementHandler(TypesystemEnumHandler::create) - .arguments({Argument::String("name")}); - -static const ParserState TypesystemEnumEntry = - ParserStateBuilder() - .parent(&TypesystemEnum) - .elementHandler(TypesystemEnumEntryHandler::create) - .arguments({}); - -static const ParserState TypesystemStruct = - ParserStateBuilder() - .parent(&Typesystem) - .createdNodeType(&RttiTypes::StructType) - .elementHandler(TypesystemStructHandler::create) - .arguments({Argument::String("name"), Argument::String("parent", "")}); - -static const ParserState TypesystemStructField = - ParserStateBuilder() - .parent(&TypesystemStruct) - .elementHandler(TypesystemStructFieldHandler::create) - .arguments({Argument::String("name"), Argument::String("type"), - Argument::Any("default", Variant::fromObject(nullptr))}); - -static const ParserState TypesystemConstant = - ParserStateBuilder() - .parent(&Typesystem) - .createdNodeType(&RttiTypes::Constant) - .elementHandler(TypesystemConstantHandler::create) - .arguments({Argument::String("name"), Argument::String("type"), - Argument::Any("value")}); - -/* Special states for import and include */ -static const ParserState Import = - ParserStateBuilder() - .parents({&Document, &Typesystem, &Domain}) - .elementHandler(ImportHandler::create) - .arguments({Argument::String("rel", ""), Argument::String("type", ""), - Argument::String("src", "")}); - -static const ParserState Include = - ParserStateBuilder() - .parent(&All) - .elementHandler(IncludeHandler::create) - .arguments({Argument::String("rel", ""), Argument::String("type", ""), - Argument::String("src", "")}); - -static const std::multimap XmlStates{ - {"document", &Document}, - {"*", &DocumentChild}, - {"domain", &Domain}, - {"struct", &DomainStruct}, - {"annotation", &DomainAnnotation}, - {"attributes", &DomainAttributes}, - {"attribute", &DomainAttribute}, - {"field", &DomainField}, - {"fieldRef", &DomainFieldRef}, - {"primitive", &DomainStructPrimitive}, - {"childRef", &DomainStructChild}, - {"parentRef", &DomainStructParent}, - {"field", &DomainStructParentField}, - {"fieldRef", &DomainStructParentFieldRef}, - {"typesystem", &Typesystem}, - {"enum", &TypesystemEnum}, - {"entry", &TypesystemEnumEntry}, - {"struct", &TypesystemStruct}, - {"field", &TypesystemStructField}, - {"constant", &TypesystemConstant}, - {"import", &Import}, - {"include", &Include}}; +using namespace parser_stack; + +/** + * Class containing the actual OsxmlParser implementation. + */ +class OsxmlParserImplementation : public OsxmlEvents { +private: + /** + * Actual xml parser -- converts the xml stream into a set of events. + */ + OsxmlEventParser parser; + + /** + * Pushdown automaton responsible for converting the xml events into an + * actual Node tree. + */ + Stack stack; + +public: + /** + * Constructor of the OsxmlParserImplementation class. + * + * @param reader is a reference to the CharReader instance from which the + * XML should be read. + * @param ctx is a reference to the ParserContext instance that should be + * used. + */ + OsxmlParserImplementation(CharReader &reader, ParserContext &ctx) + : parser(reader, *this, ctx.getLogger()), + stack(ctx, GenericParserStates) + { + } + + /** + * Starts the actual parsing process. + */ + void parse() { parser.parse(); } + + void command(const Variant &name, const Variant::mapType &args) override + { + stack.command(name, args); + stack.fieldStart(true); + } + + void annotationStart(const Variant &name, + const Variant::mapType &args) override + { + stack.annotationStart(name, args); + stack.fieldStart(true); + } + + void annotationEnd(const Variant &className, + const Variant &elementName) override + { + stack.annotationEnd(className, elementName); + } + + void fieldEnd() override { stack.fieldEnd(); } + + void data(const Variant &data) override { stack.data(data); } +}; + +/* Class OsxmlParser */ + +void OsxmlParser::doParse(CharReader &reader, ParserContext &ctx) +{ + OsxmlParserImplementation impl(reader, ctx); + impl.parse(); } - - } diff --git a/src/formats/osxml/OsxmlParser.hpp b/src/formats/osxml/OsxmlParser.hpp index 281a49c..0fbf83c 100644 --- a/src/formats/osxml/OsxmlParser.hpp +++ b/src/formats/osxml/OsxmlParser.hpp @@ -17,7 +17,7 @@ */ /** - * @file XmlParser.hpp + * @file OsxmlParser.hpp * * Contains the parser responsible for reading Ousía XML Documents (extension * oxd) and Ousía XML Modules (extension oxm). diff --git a/test/formats/osxml/OsxmlParserTest.cpp b/test/formats/osxml/OsxmlParserTest.cpp index 269a3f6..a2bd8b1 100644 --- a/test/formats/osxml/OsxmlParserTest.cpp +++ b/test/formats/osxml/OsxmlParserTest.cpp @@ -30,7 +30,7 @@ #include #include -#include +#include namespace ousia { @@ -41,7 +41,7 @@ extern const Rtti Typesystem; } struct XmlStandaloneEnvironment : public StandaloneEnvironment { - XmlParser xmlParser; + OsxmlParser parser; FileLocator fileLocator; XmlStandaloneEnvironment(ConcreteLogger &logger) @@ -52,21 +52,21 @@ struct XmlStandaloneEnvironment : public StandaloneEnvironment { registry.registerDefaultExtensions(); registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"}, - {&RttiTypes::Node}, &xmlParser); + {&RttiTypes::Node}, &parser); registry.registerResourceLocator(&fileLocator); } }; static TerminalLogger logger(std::cerr, true); -TEST(XmlParser, mismatchedTag) +TEST(OsxmlParser, mismatchedTag) { XmlStandaloneEnvironment env(logger); env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document}); ASSERT_TRUE(logger.hasError()); } -TEST(XmlParser, generic) +TEST(OsxmlParser, generic) { XmlStandaloneEnvironment env(logger); env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node}); @@ -186,7 +186,7 @@ static void checkFieldDescriptor( Handle primitiveType = nullptr, bool optional = false) { auto res = desc->resolve(&RttiTypes::FieldDescriptor, name); - ASSERT_EQ(1, res.size()); + ASSERT_EQ(1U, res.size()); checkFieldDescriptor(res[0].node, name, parent, children, type, primitiveType, optional); } @@ -201,7 +201,7 @@ static void checkFieldDescriptor( optional); } -TEST(XmlParser, domainParsing) +TEST(OsxmlParser, domainParsing) { XmlStandaloneEnvironment env(logger); Rooted book_domain_node = @@ -339,10 +339,10 @@ static void checkText(Handle p, Handle expectedParent, { checkStructuredEntity(p, expectedParent, doc, "paragraph"); Rooted par = p.cast(); - ASSERT_EQ(1, par->getField().size()); + ASSERT_EQ(1U, par->getField().size()); checkStructuredEntity(par->getField()[0], par, doc, "text"); Rooted text = par->getField()[0].cast(); - ASSERT_EQ(1, text->getField().size()); + ASSERT_EQ(1U, text->getField().size()); Handle d = text->getField()[0]; ASSERT_FALSE(d == nullptr); @@ -352,7 +352,7 @@ static void checkText(Handle p, Handle expectedParent, ASSERT_EQ(expected, prim->getContent()); } -TEST(XmlParser, documentParsing) +TEST(OsxmlParser, documentParsing) { XmlStandaloneEnvironment env(logger); Rooted book_document_node = @@ -364,7 +364,7 @@ TEST(XmlParser, documentParsing) checkStructuredEntity(doc->getRoot(), doc, doc, "book"); { Rooted book = doc->getRoot(); - ASSERT_EQ(2, book->getField().size()); + ASSERT_EQ(2U, book->getField().size()); checkText(book->getField()[0], book, doc, "This might be some introductory text or a dedication."); checkStructuredEntity(book->getField()[1], book, doc, "chapter", @@ -372,7 +372,7 @@ TEST(XmlParser, documentParsing) { Rooted chapter = book->getField()[1].cast(); - ASSERT_EQ(3, chapter->getField().size()); + ASSERT_EQ(3U, chapter->getField().size()); checkText(chapter->getField()[0], chapter, doc, "Here we might have an introduction to the chapter."); checkStructuredEntity(chapter->getField()[1], chapter, doc, @@ -381,7 +381,7 @@ TEST(XmlParser, documentParsing) { Rooted section = chapter->getField()[1].cast(); - ASSERT_EQ(1, section->getField().size()); + ASSERT_EQ(1U, section->getField().size()); checkText(section->getField()[0], section, doc, "Here we might find the actual section content."); } @@ -391,7 +391,7 @@ TEST(XmlParser, documentParsing) { Rooted section = chapter->getField()[2].cast(); - ASSERT_EQ(1, section->getField().size()); + ASSERT_EQ(1U, section->getField().size()); checkText(section->getField()[0], section, doc, "Here we might find the actual section content."); } -- cgit v1.2.3