diff options
-rw-r--r-- | src/core/parser/ParserStack.cpp | 46 | ||||
-rw-r--r-- | src/core/parser/ParserStack.hpp | 57 | ||||
-rw-r--r-- | src/plugins/xml/XmlParser.cpp | 102 | ||||
-rw-r--r-- | test/core/parser/ParserStackTest.cpp | 14 | ||||
-rw-r--r-- | test/plugins/xml/XmlParserTest.cpp | 36 |
5 files changed, 151 insertions, 104 deletions
diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp index 7bc7af3..dca7f35 100644 --- a/src/core/parser/ParserStack.cpp +++ b/src/core/parser/ParserStack.cpp @@ -20,44 +20,38 @@ #include "ParserStack.hpp" +#include <core/Utils.hpp> #include <core/Exceptions.hpp> namespace ousia { namespace parser { +/* Class Handler */ + +void Handler::data(const std::string &data, int field) +{ + for (auto &c : data) { + if (!Utils::isWhitespace(c)) { + throw LoggableException{"No data allowed here."}; + } + } +} + /* Class HandlerDescriptor */ HandlerInstance HandlerDescriptor::create(const ParserContext &ctx, std::string name, State parentState, - bool isChild, char **attrs) const + bool isChild, + const Variant &args) const { Handler *h = ctor(ctx, name, targetState, parentState, isChild); - h->start(attrs); + h->start(args); return HandlerInstance(h, this); } /* Class ParserStack */ /** - * Function used internally to turn the elements of a collection into a string - * separated by the given delimiter. - */ -template <class T> -static std::string join(T es, const std::string &delim) -{ - std::stringstream res; - bool first = true; - for (auto &e : es) { - if (!first) { - res << delim; - } - res << e; - first = false; - } - return res.str(); -} - -/** * Returns an Exception that should be thrown when a currently invalid command * is thrown. */ @@ -73,7 +67,7 @@ static LoggableException invalidCommand(const std::string &name, std::string{"Expected "} + (expected.size() == 1 ? std::string{"\""} : std::string{"one of \""}) + - join(expected, "\", \"") + std::string{"\", but got \""} + name + + Utils::join(expected, "\", \"") + std::string{"\", but got \""} + name + std::string{"\""}}; } } @@ -89,7 +83,7 @@ std::set<std::string> ParserStack::expectedCommands(State state) return res; } -void ParserStack::start(std::string name, char **attrs) +void ParserStack::start(std::string name, const Variant &args) { // Fetch the current handler and the current state const HandlerInstance *h = stack.empty() ? nullptr : &stack.top(); @@ -117,7 +111,7 @@ void ParserStack::start(std::string name, char **attrs) } // Instantiate the handler and call its start function - stack.emplace(descr->create(ctx, name, curState, isChild, attrs)); + stack.emplace(descr->create(ctx, name, curState, isChild, args)); } void ParserStack::end() @@ -141,7 +135,7 @@ void ParserStack::end() } } -void ParserStack::data(const char *data, int len) +void ParserStack::data(const std::string &data, int field) { // Check whether there is any command the data can be sent to if (stack.empty()) { @@ -149,7 +143,7 @@ void ParserStack::data(const char *data, int len) } // Pass the data to the current Handler instance - stack.top().handler->data(data, len); + stack.top().handler->data(data, field); } } } diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp index 18fc8d9..c5ed4e4 100644 --- a/src/core/parser/ParserStack.hpp +++ b/src/core/parser/ParserStack.hpp @@ -37,6 +37,8 @@ #include <stack> #include <vector> +#include <core/variant/Variant.hpp> + #include "Parser.hpp" namespace ousia { @@ -125,10 +127,9 @@ public: * Called when the command that was specified in the constructor is * instanciated. * - * @param attrs contains the attributes that were specified for the command. - * TODO: Replace with StructInstance! + * @param args is a map from strings to variants (argument name and value). */ - virtual void start(char **attrs) = 0; + virtual void start(const Variant &args) = 0; /** * Called whenever the command for which this handler @@ -137,15 +138,15 @@ public: /** * Called whenever raw data (int the form of a string) is available for the - * Handler instance. - * - * TODO: Replace with std::string? - * TODO: Per default: Allow no data except for whitespace characters! + * Handler instance. In the default handler an exception is raised if the + * received data contains non-whitespace characters. * * @param data is a pointer at the character data that is available for the * Handler instance. + * @param field is the field number (the interpretation of this value + * depends on the format that is being parsed). */ - virtual void data(const char *data, int len){}; + virtual void data(const std::string &data, int field); /** * Called whenever a direct child element was created and has ended. @@ -225,7 +226,8 @@ struct HandlerDescriptor { * HandlerDescriptor and calls its start function. */ HandlerInstance create(const ParserContext &ctx, std::string name, - State parentState, bool isChild, char **attrs) const; + State parentState, bool isChild, + const Variant &args) const; }; /** @@ -240,6 +242,11 @@ private: const ParserContext &ctx; /** + * User specified data that will be passed to all handlers. + */ + void *userData; + + /** * Map containing all registered command names and the corresponding * handler * descriptor. @@ -278,7 +285,8 @@ public: * @return the state of the currently active Handler instance or STATE_NONE * if no handler is on the stack. */ - State currentState() { + State currentState() + { return stack.empty() ? STATE_NONE : stack.top().handler->state; } @@ -288,7 +296,8 @@ public: * @return the name of the command currently being handled by the active * Handler instance or an empty string if no handler is currently active. */ - std::string currentName() { + std::string currentName() + { return stack.empty() ? std::string{} : stack.top().handler->name; } @@ -297,17 +306,33 @@ public: * * @return true if the handler allows arbitrary children, false otherwise. */ - bool currentArbitraryChildren() { + bool currentArbitraryChildren() + { return stack.empty() ? false : stack.top().descr->arbitraryChildren; } - // TODO: Change signature - void start(std::string name, char **attrs); + /** + * Function that should be called whenever a new command starts. + * + * @param name is the name of the command. + * @param args is a map from strings to variants (argument name and value). + */ + void start(std::string name, const Variant &args); + /** + * Function called whenever a command ends. + */ void end(); - // TODO: Change signature - void data(const char *data, int len); + /** + * Function that should be called whenever data is available for the + * command. + * + * @param data is the data that should be passed to the handler. + * @param field is the field number (the interpretation of this value + * depends on the format that is being parsed). + */ + void data(const std::string &data, int field = 0); }; } } diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp index 42e0dd4..afc7f14 100644 --- a/src/plugins/xml/XmlParser.cpp +++ b/src/plugins/xml/XmlParser.cpp @@ -20,6 +20,7 @@ #include <expat.h> +#include <core/Utils.hpp> #include <core/parser/ParserStack.hpp> #include "XmlParser.hpp" @@ -44,30 +45,54 @@ static const State STATE_CONSTANT = 201; static const State STATE_ENUM = 202; static const State STATE_STRUCT = 203; -static Handler* createTestHandler(const ParserContext &ctx, - std::string name, State state, - State parentState, bool isChild) +class TestHandler : public Handler { +public: + using Handler::Handler; + + void start(const Variant &args) override + { + std::cout << this->name << ": start (isChild: " << (this->isChild) + << ", args: " << args << ")" << std::endl; + } + + void end() override + { + // TODO + } + + void data(const std::string &data, int field) override + { + std::cout << this->name << ": data \"" << data << "\"" << std::endl; + } + + void child(std::shared_ptr<Handler> handler) override + { + // TODO + } +}; + +static Handler *createTestHandler(const ParserContext &ctx, std::string name, + State state, State parentState, bool isChild) { - return nullptr; + return new TestHandler{ctx, name, state, parentState, isChild}; } static const std::multimap<std::string, HandlerDescriptor> XML_HANDLERS{ - /* Documents */ - {"document", {{STATE_NONE}, createTestHandler, STATE_DOCUMENT}}, - {"head", {{STATE_DOCUMENT}, createTestHandler, STATE_HEAD}}, - {"body", {{STATE_DOCUMENT}, createTestHandler, STATE_BODY, true}}, - - /* Special commands */ - {"use", {{STATE_HEAD}, createTestHandler, STATE_USE}}, - {"include", {{STATE_ALL}, createTestHandler, STATE_INCLUDE}}, - {"inline", {{STATE_ALL}, createTestHandler, STATE_INLINE}}, - - /* Typesystem definitions */ - {"types", {{STATE_NONE, STATE_HEAD}, createTestHandler, STATE_TYPES}}, - {"enum", {{STATE_TYPES}, createTestHandler, STATE_ENUM}}, - {"struct", {{STATE_TYPES}, createTestHandler, STATE_STRUCT}}, - {"constant", {{STATE_TYPES}, createTestHandler, STATE_CONSTANT}} -}; + /* Documents */ + {"document", {{STATE_NONE}, createTestHandler, STATE_DOCUMENT}}, + {"head", {{STATE_DOCUMENT}, createTestHandler, STATE_HEAD}}, + {"body", {{STATE_DOCUMENT}, createTestHandler, STATE_BODY, true}}, + + /* Special commands */ + {"use", {{STATE_HEAD}, createTestHandler, STATE_USE}}, + {"include", {{STATE_ALL}, createTestHandler, STATE_INCLUDE}}, + {"inline", {{STATE_ALL}, createTestHandler, STATE_INLINE}}, + + /* Typesystem definitions */ + {"typesystem", {{STATE_NONE, STATE_HEAD}, createTestHandler, STATE_TYPES}}, + {"enum", {{STATE_TYPES}, createTestHandler, STATE_ENUM}}, + {"struct", {{STATE_TYPES}, createTestHandler, STATE_STRUCT}}, + {"constant", {{STATE_TYPES}, createTestHandler, STATE_CONSTANT}}}; /** * Wrapper class around the XML_Parser pointer which safely frees it whenever @@ -89,8 +114,7 @@ public: * @param encoding is the protocol-defined encoding passed to expat (or * nullptr if expat should determine the encoding by itself). */ - ScopedExpatXmlParser(const XML_Char *encoding) - : parser(nullptr) + ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) { parser = XML_ParserCreate(encoding); if (!parser) { @@ -116,28 +140,36 @@ public: XML_Parser operator&() { return parser; } }; +/* Adapter Expat -> ParserStack */ + static void xmlStartElementHandler(void *userData, const XML_Char *name, const XML_Char **attrs) { - std::cout << "start tag: " << name << std::endl; + Variant::mapType args; const XML_Char **attr = attrs; while (*attr) { - std::cout << "\t" << *attr; - attr++; - std::cout << " -> " << *attr << std::endl; - attr++; + const std::string key{*(attr++)}; + args.emplace(std::make_pair(key, Variant{*(attr++)})); } + (static_cast<ParserStack *>(userData))->start(std::string(name), args); } -static void xmlEndElementHandler(void *userData, const XML_Char *name) { - std::cout << "end tag: " << name << std::endl; +static void xmlEndElementHandler(void *userData, const XML_Char *name) +{ + (static_cast<ParserStack *>(userData))->end(); } - -static void xmlCharacterDataHandler(void *userData, const XML_Char *s, int len) { - std::cout << "\tdata: " << std::string(s, len) << std::endl; +static void xmlCharacterDataHandler(void *userData, const XML_Char *s, int len) +{ + const std::string data = + Utils::trim(std::string{s, static_cast<size_t>(len)}); + if (!data.empty()) { + (static_cast<ParserStack *>(userData))->data(data); + } } +/* Class XmlParser */ + std::set<std::string> XmlParser::mimetypes() { return std::set<std::string>{{"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"}}; @@ -147,7 +179,11 @@ Rooted<Node> XmlParser::parse(std::istream &is, ParserContext &ctx) { // Create the parser object ScopedExpatXmlParser p{"UTF-8"}; - XML_SetUserData(&p, &ctx); + + // Create the parser stack instance and pass the reference to the state + // machine descriptor + ParserStack stack{ctx, XML_HANDLERS}; + XML_SetUserData(&p, &stack); // Set the callback functions XML_SetStartElementHandler(&p, xmlStartElementHandler); diff --git a/test/core/parser/ParserStackTest.cpp b/test/core/parser/ParserStackTest.cpp index 1f4a4e2..5dab979 100644 --- a/test/core/parser/ParserStackTest.cpp +++ b/test/core/parser/ParserStackTest.cpp @@ -39,28 +39,24 @@ class TestHandler : public Handler { public: using Handler::Handler; - void start(char **attrs) override + void start(const Variant &args) override { startCount++; -// std::cout << this->name << ": start (isChild: " << (this->isChild) << ")" << std::endl; } void end() override { endCount++; -// std::cout << this->name << ": end " << std::endl; } - void data(const char *data, int len) override + void data(const std::string &data, int field) override { dataCount++; -// std::cout << this->name << ": data \"" << std::string{data, static_cast<unsigned int>(len)} << "\"" << std::endl; } void child(std::shared_ptr<Handler> handler) override { childCount++; -// std::cout << this->name << ": has child \"" << handler->name << "\"" << std::endl; } }; @@ -93,7 +89,7 @@ TEST(ParserStack, simpleTest) ASSERT_EQ(STATE_NONE, s.currentState()); s.start("document", nullptr); - s.data("test1", 5); + s.data("test1"); ASSERT_EQ("document", s.currentName()); ASSERT_EQ(STATE_DOCUMENT, s.currentState()); @@ -101,7 +97,7 @@ TEST(ParserStack, simpleTest) ASSERT_EQ(1, dataCount); s.start("body", nullptr); - s.data("test2", 5); + s.data("test2"); ASSERT_EQ("body", s.currentName()); ASSERT_EQ(STATE_BODY, s.currentState()); ASSERT_EQ(2, startCount); @@ -123,7 +119,7 @@ TEST(ParserStack, simpleTest) ASSERT_EQ(STATE_DOCUMENT, s.currentState()); s.start("body", nullptr); - s.data("test3", 5); + s.data("test3"); ASSERT_EQ("body", s.currentName()); ASSERT_EQ(STATE_BODY, s.currentState()); s.end(); diff --git a/test/plugins/xml/XmlParserTest.cpp b/test/plugins/xml/XmlParserTest.cpp index 98a5a34..ecc9438 100644 --- a/test/plugins/xml/XmlParserTest.cpp +++ b/test/plugins/xml/XmlParserTest.cpp @@ -26,26 +26,14 @@ namespace ousia { namespace parser { namespace xml { -struct TestParserContext : public ParserContext { - -private: - Logger log; - Registry r; - Scope s; - -public: - TestParserContext() : ParserContext(s, r, log), r(log), s(nullptr) {}; - -}; - TEST(XmlParser, mismatchedTagException) { - TestParserContext ctx; + StandaloneParserContext ctx; XmlParser p; bool hadException = false; try { - p.parse("<test foo=\"bar\">data<![CDATA[bla]]>\n</btest>", ctx); + p.parse("<document>\n</document2>", ctx); } catch (ParserException ex) { ASSERT_EQ(2, ex.line); @@ -55,19 +43,27 @@ TEST(XmlParser, mismatchedTagException) ASSERT_TRUE(hadException); } -const char* TEST_DATA = "<?xml version=\"1.0\" standalone=\"yes\"?>\n" - "<document a:bc=\"b\">\n" - " <bla:test xmlAttr=\"blub\" />\n" - "</document>\n"; +const char *TEST_DATA = + "<?xml version=\"1.0\" standalone=\"yes\"?>\n" + "<document a:bc=\"b\">\n" + " <head>\n" + " <typesystem name=\"color\">\n" + " <struct name=\"color\">\n" + " </struct>\n" + " </typesystem>\n" + " </head>\n" + " <body xmlAttr=\"blub\">\n" + " <book>Dies ist ein Test></book>\n" + " </body>\n" + "</document>\n"; TEST(XmlParser, namespaces) { - TestParserContext ctx; + StandaloneParserContext ctx; XmlParser p; p.parse(TEST_DATA, ctx); } - } } } |