From 84c9abc3e9762c4486ddc5ca0352a5d697a51987 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Wed, 25 Feb 2015 23:09:26 +0100 Subject: start of branch, commit log will be rewritten --- test/formats/osxml/OsxmlEventParserTest.cpp | 47 +++-------------------------- 1 file changed, 4 insertions(+), 43 deletions(-) (limited to 'test/formats/osxml/OsxmlEventParserTest.cpp') diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp index 3293370..6942166 100644 --- a/test/formats/osxml/OsxmlEventParserTest.cpp +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -74,13 +75,11 @@ public: }; static std::vector> parseXml( - const char *testString, - WhitespaceMode whitespaceMode = WhitespaceMode::TRIM) + const char *testString) { TestOsxmlEventListener listener; CharReader reader(testString); OsxmlEventParser parser(reader, listener, logger); - parser.setWhitespaceMode(whitespaceMode); parser.parse(); return listener.events; } @@ -157,7 +156,7 @@ TEST(OsxmlEventParser, magicTopLevelTagInside) ASSERT_EQ(expectedEvents, events); } -TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) +TEST(OsxmlEventParser, commandWithData) { const char *testString = " hello \n world "; // 012345678901 234567890123 @@ -168,50 +167,12 @@ TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) {OsxmlEvent::DATA, Variant::arrayType{" hello \n world "}}, {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; - auto events = parseXml(testString, WhitespaceMode::PRESERVE); + auto events = parseXml(testString); ASSERT_EQ(expectedEvents, events); // Check the location of the text ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart()); ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd()); } - -TEST(OsxmlEventParser, commandWithDataTrimWhitespace) -{ - const char *testString = " hello \n world "; - // 012345678901 234567890123 - // 0 1 2 - - std::vector> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, - {OsxmlEvent::DATA, Variant::arrayType{"hello \n world"}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; - - auto events = parseXml(testString, WhitespaceMode::TRIM); - ASSERT_EQ(expectedEvents, events); - - // Check the location of the text - ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); - ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); -} - -TEST(OsxmlEventParser, commandWithDataCollapseWhitespace) -{ - const char *testString = " hello \n world "; - // 012345678901 234567890123 - // 0 1 2 - - std::vector> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, - {OsxmlEvent::DATA, Variant::arrayType{"hello world"}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; - - auto events = parseXml(testString, WhitespaceMode::COLLAPSE); - ASSERT_EQ(expectedEvents, events); - - // Check the location of the text - ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); - ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); -} } -- cgit v1.2.3 From 3bdc30e0798d6b356782da430e93b72b4303e963 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Mon, 2 Mar 2015 00:32:34 +0100 Subject: Adapted OsxmlParser to new Stack interface, enabled Osxml code in CMakeLists again --- CMakeLists.txt | 22 ++++++++-------- src/formats/osxml/OsxmlEventParser.cpp | 6 ++--- src/formats/osxml/OsxmlEventParser.hpp | 11 +++----- src/formats/osxml/OsxmlParser.cpp | 13 ++++----- test/formats/osxml/OsxmlEventParserTest.cpp | 41 ++++++++++++++++------------- 5 files changed, 48 insertions(+), 45 deletions(-) (limited to 'test/formats/osxml/OsxmlEventParserTest.cpp') diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e021fd..2a09b54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -225,7 +225,7 @@ TARGET_LINK_LIBRARIES(ousia_osml ADD_LIBRARY(ousia_osxml src/formats/osxml/OsxmlAttributeLocator src/formats/osxml/OsxmlEventParser -# src/formats/osxml/OsxmlParser + src/formats/osxml/OsxmlParser ) TARGET_LINK_LIBRARIES(ousia_osxml @@ -397,17 +397,17 @@ IF(TEST) ousia_filesystem ) -# ADD_EXECUTABLE(ousia_test_osxml -# test/formats/osxml/OsxmlEventParserTest + ADD_EXECUTABLE(ousia_test_osxml + test/formats/osxml/OsxmlEventParserTest # test/formats/osxml/OsxmlParserTest -# ) + ) -# TARGET_LINK_LIBRARIES(ousia_test_osxml -# ${GTEST_LIBRARIES} -# ousia_core -# ousia_osxml -# ousia_filesystem -# ) + TARGET_LINK_LIBRARIES(ousia_test_osxml + ${GTEST_LIBRARIES} + ousia_core + ousia_osxml + ousia_filesystem + ) ADD_EXECUTABLE(ousia_test_xml test/plugins/xml/XmlOutputTest @@ -426,7 +426,7 @@ IF(TEST) ADD_TEST(ousia_test_html ousia_test_html) # ADD_TEST(ousia_test_mozjs ousia_test_mozjs) ADD_TEST(ousia_test_osml ousia_test_osml) -# ADD_TEST(ousia_test_osxml ousia_test_osxml) + ADD_TEST(ousia_test_osxml ousia_test_osxml) ADD_TEST(ousia_test_xml ousia_test_xml) ENDIF() diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp index 855f80d..83c16f0 100644 --- a/src/formats/osxml/OsxmlEventParser.cpp +++ b/src/formats/osxml/OsxmlEventParser.cpp @@ -323,7 +323,7 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, // Just issue a "commandStart" event in any other case Variant nameVar = Variant::fromString(nameStr); nameVar.setLocation(nameLoc); - parser->getEvents().command(nameVar, args); + parser->getEvents().commandStart(nameVar, args); } } @@ -358,8 +358,8 @@ static void xmlEndElementHandler(void *ref, const XML_Char *name) return; } - // Issue the "fieldEnd" event - parser->getEvents().fieldEnd(); + // Issue the "rangeEnd" event + parser->getEvents().rangeEnd(); } static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len) diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp index e3fd5d4..7a8c96d 100644 --- a/src/formats/osxml/OsxmlEventParser.hpp +++ b/src/formats/osxml/OsxmlEventParser.hpp @@ -59,7 +59,8 @@ public: * @param args is a map containing the arguments that were given to the * command. */ - virtual void command(const Variant &name, const Variant::mapType &args) = 0; + virtual void commandStart(const Variant &name, + const Variant::mapType &args) = 0; /** * Called whenever an annotation starts. Note that this implicitly always @@ -88,13 +89,9 @@ public: const Variant &elementName) = 0; /** - * Called whenever the default field which was implicitly started by - * commandStart or annotationStart ends. Note that this does not end the - * range of an annotation, but the default field of the annotation. To - * signal the end of the annotation this, the annotationEnd method will be - * invoked. + * Called whenever the command or annotation tags end. */ - virtual void fieldEnd() = 0; + virtual void rangeEnd() = 0; /** * Called whenever string data is found. diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp index c216855..924d11b 100644 --- a/src/formats/osxml/OsxmlParser.cpp +++ b/src/formats/osxml/OsxmlParser.cpp @@ -16,6 +16,8 @@ along with this program. If not, see . */ +#include +#include #include #include #include @@ -63,17 +65,16 @@ public: */ void parse() { parser.parse(); } - void command(const Variant &name, const Variant::mapType &args) override + void commandStart(const Variant &name, + const Variant::mapType &args) override { - stack.command(name, args); - stack.fieldStart(true); + stack.commandStart(name, args, true); } void annotationStart(const Variant &name, const Variant::mapType &args) override { - stack.annotationStart(name, args); - stack.fieldStart(true); + stack.annotationStart(name, args, true); } void annotationEnd(const Variant &className, @@ -82,7 +83,7 @@ public: stack.annotationEnd(className, elementName); } - void fieldEnd() override { stack.fieldEnd(); } + void rangeEnd() override { stack.rangeEnd(); } void data(const Variant &data) override { stack.data(data); } }; diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp index 6942166..b24a43d 100644 --- a/test/formats/osxml/OsxmlEventParserTest.cpp +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -32,10 +32,10 @@ static TerminalLogger logger(std::cerr, true); namespace { enum class OsxmlEvent { - COMMAND, + COMMAND_START, ANNOTATION_START, ANNOTATION_END, - FIELD_END, + RANGE_END, DATA }; @@ -43,9 +43,10 @@ class TestOsxmlEventListener : public OsxmlEvents { public: std::vector> events; - void command(const Variant &name, const Variant::mapType &args) override + void commandStart(const Variant &name, + const Variant::mapType &args) override { - events.emplace_back(OsxmlEvent::COMMAND, + events.emplace_back(OsxmlEvent::COMMAND_START, Variant::arrayType{name, args}); } @@ -63,9 +64,9 @@ public: Variant::arrayType{className, elementName}); } - void fieldEnd() override + void rangeEnd() override { - events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{}); + events.emplace_back(OsxmlEvent::RANGE_END, Variant::arrayType{}); } void data(const Variant &data) override @@ -92,11 +93,11 @@ TEST(OsxmlEventParser, simpleCommandWithArgs) // 0 1 2 3 std::vector> expectedEvents{ - {OsxmlEvent::COMMAND, + {OsxmlEvent::COMMAND_START, Variant::arrayType{ "a", Variant::mapType{ {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + {OsxmlEvent::RANGE_END, Variant::arrayType{}}}; auto events = parseXml(testString); ASSERT_EQ(expectedEvents, events); @@ -132,10 +133,12 @@ TEST(OsxmlEventParser, magicTopLevelTag) const char *testString = ""; std::vector> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}, - {OsxmlEvent::COMMAND, Variant::arrayType{{"b", Variant::mapType{}}}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::RANGE_END, Variant::arrayType{}}, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"b", Variant::mapType{}}}}, + {OsxmlEvent::RANGE_END, Variant::arrayType{}}}; auto events = parseXml(testString); ASSERT_EQ(expectedEvents, events); @@ -146,11 +149,12 @@ TEST(OsxmlEventParser, magicTopLevelTagInside) const char *testString = ""; std::vector> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, - {OsxmlEvent::COMMAND, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::COMMAND_START, Variant::arrayType{{"ousia", Variant::mapType{}}}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + {OsxmlEvent::RANGE_END, Variant::arrayType{}}, + {OsxmlEvent::RANGE_END, Variant::arrayType{}}}; auto events = parseXml(testString); ASSERT_EQ(expectedEvents, events); @@ -163,9 +167,10 @@ TEST(OsxmlEventParser, commandWithData) // 0 1 2 std::vector> expectedEvents{ - {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, {OsxmlEvent::DATA, Variant::arrayType{" hello \n world "}}, - {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + {OsxmlEvent::RANGE_END, Variant::arrayType{}}}; auto events = parseXml(testString); ASSERT_EQ(expectedEvents, events); -- cgit v1.2.3 From 11ee669f29e426effaf4a1e0d82baa978219e92f Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Mon, 2 Mar 2015 10:35:22 +0100 Subject: OsxmlEventParser also fills a TokenizedData instance now --- src/formats/osxml/OsxmlEventParser.cpp | 87 +++++++++-------------------- src/formats/osxml/OsxmlEventParser.hpp | 10 ++-- src/formats/osxml/OsxmlParser.cpp | 2 +- test/formats/osxml/OsxmlEventParserTest.cpp | 11 +++- 4 files changed, 41 insertions(+), 69 deletions(-) (limited to 'test/formats/osxml/OsxmlEventParserTest.cpp') diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp index 83c16f0..79a8dbe 100644 --- a/src/formats/osxml/OsxmlEventParser.cpp +++ b/src/formats/osxml/OsxmlEventParser.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include "OsxmlAttributeLocator.hpp" #include "OsxmlEventParser.hpp" @@ -38,6 +39,11 @@ namespace ousia { */ class OsxmlEventParserData { public: + /** + * Current character data buffer. + */ + TokenizedData data; + /** * Contains the current depth of the parsing process. */ @@ -51,24 +57,13 @@ public: ssize_t annotationEndTagDepth; /** - * Current character data buffer. - */ - std::vector textBuf; - - /** - * Current character data start. - */ - size_t textStart; - - /** - * Current character data end. - */ - size_t textEnd; - - /** - * Default constructor. + * Constructor taking the sourceId of the file from which the XML is being + * parsed. + * + * @param sourceId is the source if of the XML file from which the data is + * currently being parsed. */ - OsxmlEventParserData(); + OsxmlEventParserData(SourceId sourceId); /** * Increments the depth. @@ -91,14 +86,6 @@ public: * @return true if character data is available. */ bool hasText(); - - /** - * Returns a Variant containing the character data and its location. - * - * @return a string variant containing the text data and the character - * location. - */ - Variant getText(SourceId sourceId); }; /* Class GuardedExpatXmlParser */ @@ -156,7 +143,7 @@ public: static const std::string TOP_LEVEL_TAG{"ousia"}; /** - * Prefix used to indicate the start of an annoation (note the trailing colon) + * Prefix used to indicate the start of an annoation (note the trailing colon). */ static const std::string ANNOTATION_START_PREFIX{"a:start:"}; @@ -203,8 +190,9 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, // If there is any text data in the buffer, issue that first if (parser->getData().hasText()) { - parser->getEvents().data( - parser->getData().getText(parser->getReader().getSourceId())); + TokenizedData &data = parser->getData().data; + parser->getEvents().data(data); + data.clear(); } // Read the argument locations -- this is only a stupid and slow hack, @@ -348,8 +336,9 @@ static void xmlEndElementHandler(void *ref, const XML_Char *name) // If there is any text data in the buffer, issue that first if (parser->getData().hasText()) { - parser->getEvents().data( - parser->getData().getText(parser->getReader().getSourceId())); + TokenizedData &data = parser->getData().data; + parser->getEvents().data(data); + data.clear(); } // Abort if the special ousia tag ends here @@ -381,18 +370,8 @@ static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len) // Synchronize the logger position SourceLocation loc = xmlSyncLoggerPosition(p, ulen); - // Fetch some variables for convenience - OsxmlEventParserData &data = parser->getData(); - std::vector &textBuf = data.textBuf; - - // Update start and end position - if (textBuf.empty()) { - data.textStart = loc.getStart(); - } - data.textEnd = loc.getEnd(); - - // Insert the data into the text buffer - textBuf.insert(textBuf.end(), &s[0], &s[ulen]); + // Append the data to the buffer + parser->getData().data.append(std::string(s, ulen), loc.getStart()); } /* Class OsxmlEvents */ @@ -401,8 +380,8 @@ OsxmlEvents::~OsxmlEvents() {} /* Class OsxmlEventParser */ -OsxmlEventParserData::OsxmlEventParserData() - : depth(0), annotationEndTagDepth(-1), textStart(0), textEnd(0) +OsxmlEventParserData::OsxmlEventParserData(SourceId sourceId) + : data(sourceId), depth(0), annotationEndTagDepth(-1) { } @@ -423,23 +402,7 @@ bool OsxmlEventParserData::inAnnotationEndTag() return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth); } -bool OsxmlEventParserData::hasText() { return !textBuf.empty(); } - -Variant OsxmlEventParserData::getText(SourceId sourceId) -{ - // Create a variant containing the string data and the location - Variant var = - Variant::fromString(std::string{textBuf.data(), textBuf.size()}); - var.setLocation({sourceId, textStart, textEnd}); - - // Reset the text buffers - textBuf.clear(); - textStart = 0; - textEnd = 0; - - // Return the variant - return var; -} +bool OsxmlEventParserData::hasText() { return !data.empty(); } /* Class OsxmlEventParser */ @@ -448,7 +411,7 @@ OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events, : reader(reader), events(events), logger(logger), - data(new OsxmlEventParserData()) + data(new OsxmlEventParserData(reader.getSourceId())) { } diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp index 7a8c96d..4c5a485 100644 --- a/src/formats/osxml/OsxmlEventParser.hpp +++ b/src/formats/osxml/OsxmlEventParser.hpp @@ -96,10 +96,10 @@ public: /** * Called whenever string data is found. * - * @param data is a Variant containing the string data that was found in the - * XML file. + * @param data is a TokenizedData instance containing the string data that + * was found in the XML file. */ - virtual void data(const Variant &data) = 0; + virtual void data(const TokenizedData &data) = 0; }; /** @@ -179,7 +179,9 @@ public: OsxmlEvents &getEvents() const; /** - * Returns a reference at the internal data. + * Used internally to fetch a reference at the internal data. + * + * @return a reference at the internal OsxmlEventParserData structure. */ OsxmlEventParserData &getData() const; }; diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp index 924d11b..afe0dc6 100644 --- a/src/formats/osxml/OsxmlParser.cpp +++ b/src/formats/osxml/OsxmlParser.cpp @@ -85,7 +85,7 @@ public: void rangeEnd() override { stack.rangeEnd(); } - void data(const Variant &data) override { stack.data(data); } + void data(const TokenizedData &data) override { stack.data(data); } }; /* Class OsxmlParser */ diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp index b24a43d..d4e9443 100644 --- a/test/formats/osxml/OsxmlEventParserTest.cpp +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -69,9 +69,16 @@ public: events.emplace_back(OsxmlEvent::RANGE_END, Variant::arrayType{}); } - void data(const Variant &data) override + void data(const TokenizedData &data) override { - events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data}); + Token token; + Variant text; + TokenizedDataReader reader = data.reader(); + reader.read(token, TokenSet{}, WhitespaceMode::PRESERVE); + EXPECT_EQ(Tokens::Data, token.id); + text = Variant::fromString(token.content); + text.setLocation(token.getLocation()); + events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{text}); } }; -- cgit v1.2.3