summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/parser/ParserStack.cpp46
-rw-r--r--src/core/parser/ParserStack.hpp57
-rw-r--r--src/plugins/xml/XmlParser.cpp102
-rw-r--r--test/core/parser/ParserStackTest.cpp14
-rw-r--r--test/plugins/xml/XmlParserTest.cpp36
5 files changed, 151 insertions, 104 deletions
diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp
index 7bc7af3..dca7f35 100644
--- a/src/core/parser/ParserStack.cpp
+++ b/src/core/parser/ParserStack.cpp
@@ -20,44 +20,38 @@
#include "ParserStack.hpp"
+#include <core/Utils.hpp>
#include <core/Exceptions.hpp>
namespace ousia {
namespace parser {
+/* Class Handler */
+
+void Handler::data(const std::string &data, int field)
+{
+ for (auto &c : data) {
+ if (!Utils::isWhitespace(c)) {
+ throw LoggableException{"No data allowed here."};
+ }
+ }
+}
+
/* Class HandlerDescriptor */
HandlerInstance HandlerDescriptor::create(const ParserContext &ctx,
std::string name, State parentState,
- bool isChild, char **attrs) const
+ bool isChild,
+ const Variant &args) const
{
Handler *h = ctor(ctx, name, targetState, parentState, isChild);
- h->start(attrs);
+ h->start(args);
return HandlerInstance(h, this);
}
/* Class ParserStack */
/**
- * Function used internally to turn the elements of a collection into a string
- * separated by the given delimiter.
- */
-template <class T>
-static std::string join(T es, const std::string &delim)
-{
- std::stringstream res;
- bool first = true;
- for (auto &e : es) {
- if (!first) {
- res << delim;
- }
- res << e;
- first = false;
- }
- return res.str();
-}
-
-/**
* Returns an Exception that should be thrown when a currently invalid command
* is thrown.
*/
@@ -73,7 +67,7 @@ static LoggableException invalidCommand(const std::string &name,
std::string{"Expected "} +
(expected.size() == 1 ? std::string{"\""}
: std::string{"one of \""}) +
- join(expected, "\", \"") + std::string{"\", but got \""} + name +
+ Utils::join(expected, "\", \"") + std::string{"\", but got \""} + name +
std::string{"\""}};
}
}
@@ -89,7 +83,7 @@ std::set<std::string> ParserStack::expectedCommands(State state)
return res;
}
-void ParserStack::start(std::string name, char **attrs)
+void ParserStack::start(std::string name, const Variant &args)
{
// Fetch the current handler and the current state
const HandlerInstance *h = stack.empty() ? nullptr : &stack.top();
@@ -117,7 +111,7 @@ void ParserStack::start(std::string name, char **attrs)
}
// Instantiate the handler and call its start function
- stack.emplace(descr->create(ctx, name, curState, isChild, attrs));
+ stack.emplace(descr->create(ctx, name, curState, isChild, args));
}
void ParserStack::end()
@@ -141,7 +135,7 @@ void ParserStack::end()
}
}
-void ParserStack::data(const char *data, int len)
+void ParserStack::data(const std::string &data, int field)
{
// Check whether there is any command the data can be sent to
if (stack.empty()) {
@@ -149,7 +143,7 @@ void ParserStack::data(const char *data, int len)
}
// Pass the data to the current Handler instance
- stack.top().handler->data(data, len);
+ stack.top().handler->data(data, field);
}
}
}
diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp
index 18fc8d9..c5ed4e4 100644
--- a/src/core/parser/ParserStack.hpp
+++ b/src/core/parser/ParserStack.hpp
@@ -37,6 +37,8 @@
#include <stack>
#include <vector>
+#include <core/variant/Variant.hpp>
+
#include "Parser.hpp"
namespace ousia {
@@ -125,10 +127,9 @@ public:
* Called when the command that was specified in the constructor is
* instanciated.
*
- * @param attrs contains the attributes that were specified for the command.
- * TODO: Replace with StructInstance!
+ * @param args is a map from strings to variants (argument name and value).
*/
- virtual void start(char **attrs) = 0;
+ virtual void start(const Variant &args) = 0;
/**
* Called whenever the command for which this handler
@@ -137,15 +138,15 @@ public:
/**
* Called whenever raw data (int the form of a string) is available for the
- * Handler instance.
- *
- * TODO: Replace with std::string?
- * TODO: Per default: Allow no data except for whitespace characters!
+ * Handler instance. In the default handler an exception is raised if the
+ * received data contains non-whitespace characters.
*
* @param data is a pointer at the character data that is available for the
* Handler instance.
+ * @param field is the field number (the interpretation of this value
+ * depends on the format that is being parsed).
*/
- virtual void data(const char *data, int len){};
+ virtual void data(const std::string &data, int field);
/**
* Called whenever a direct child element was created and has ended.
@@ -225,7 +226,8 @@ struct HandlerDescriptor {
* HandlerDescriptor and calls its start function.
*/
HandlerInstance create(const ParserContext &ctx, std::string name,
- State parentState, bool isChild, char **attrs) const;
+ State parentState, bool isChild,
+ const Variant &args) const;
};
/**
@@ -240,6 +242,11 @@ private:
const ParserContext &ctx;
/**
+ * User specified data that will be passed to all handlers.
+ */
+ void *userData;
+
+ /**
* Map containing all registered command names and the corresponding
* handler
* descriptor.
@@ -278,7 +285,8 @@ public:
* @return the state of the currently active Handler instance or STATE_NONE
* if no handler is on the stack.
*/
- State currentState() {
+ State currentState()
+ {
return stack.empty() ? STATE_NONE : stack.top().handler->state;
}
@@ -288,7 +296,8 @@ public:
* @return the name of the command currently being handled by the active
* Handler instance or an empty string if no handler is currently active.
*/
- std::string currentName() {
+ std::string currentName()
+ {
return stack.empty() ? std::string{} : stack.top().handler->name;
}
@@ -297,17 +306,33 @@ public:
*
* @return true if the handler allows arbitrary children, false otherwise.
*/
- bool currentArbitraryChildren() {
+ bool currentArbitraryChildren()
+ {
return stack.empty() ? false : stack.top().descr->arbitraryChildren;
}
- // TODO: Change signature
- void start(std::string name, char **attrs);
+ /**
+ * Function that should be called whenever a new command starts.
+ *
+ * @param name is the name of the command.
+ * @param args is a map from strings to variants (argument name and value).
+ */
+ void start(std::string name, const Variant &args);
+ /**
+ * Function called whenever a command ends.
+ */
void end();
- // TODO: Change signature
- void data(const char *data, int len);
+ /**
+ * Function that should be called whenever data is available for the
+ * command.
+ *
+ * @param data is the data that should be passed to the handler.
+ * @param field is the field number (the interpretation of this value
+ * depends on the format that is being parsed).
+ */
+ void data(const std::string &data, int field = 0);
};
}
}
diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp
index 42e0dd4..afc7f14 100644
--- a/src/plugins/xml/XmlParser.cpp
+++ b/src/plugins/xml/XmlParser.cpp
@@ -20,6 +20,7 @@
#include <expat.h>
+#include <core/Utils.hpp>
#include <core/parser/ParserStack.hpp>
#include "XmlParser.hpp"
@@ -44,30 +45,54 @@ static const State STATE_CONSTANT = 201;
static const State STATE_ENUM = 202;
static const State STATE_STRUCT = 203;
-static Handler* createTestHandler(const ParserContext &ctx,
- std::string name, State state,
- State parentState, bool isChild)
+class TestHandler : public Handler {
+public:
+ using Handler::Handler;
+
+ void start(const Variant &args) override
+ {
+ std::cout << this->name << ": start (isChild: " << (this->isChild)
+ << ", args: " << args << ")" << std::endl;
+ }
+
+ void end() override
+ {
+ // TODO
+ }
+
+ void data(const std::string &data, int field) override
+ {
+ std::cout << this->name << ": data \"" << data << "\"" << std::endl;
+ }
+
+ void child(std::shared_ptr<Handler> handler) override
+ {
+ // TODO
+ }
+};
+
+static Handler *createTestHandler(const ParserContext &ctx, std::string name,
+ State state, State parentState, bool isChild)
{
- return nullptr;
+ return new TestHandler{ctx, name, state, parentState, isChild};
}
static const std::multimap<std::string, HandlerDescriptor> XML_HANDLERS{
- /* Documents */
- {"document", {{STATE_NONE}, createTestHandler, STATE_DOCUMENT}},
- {"head", {{STATE_DOCUMENT}, createTestHandler, STATE_HEAD}},
- {"body", {{STATE_DOCUMENT}, createTestHandler, STATE_BODY, true}},
-
- /* Special commands */
- {"use", {{STATE_HEAD}, createTestHandler, STATE_USE}},
- {"include", {{STATE_ALL}, createTestHandler, STATE_INCLUDE}},
- {"inline", {{STATE_ALL}, createTestHandler, STATE_INLINE}},
-
- /* Typesystem definitions */
- {"types", {{STATE_NONE, STATE_HEAD}, createTestHandler, STATE_TYPES}},
- {"enum", {{STATE_TYPES}, createTestHandler, STATE_ENUM}},
- {"struct", {{STATE_TYPES}, createTestHandler, STATE_STRUCT}},
- {"constant", {{STATE_TYPES}, createTestHandler, STATE_CONSTANT}}
-};
+ /* Documents */
+ {"document", {{STATE_NONE}, createTestHandler, STATE_DOCUMENT}},
+ {"head", {{STATE_DOCUMENT}, createTestHandler, STATE_HEAD}},
+ {"body", {{STATE_DOCUMENT}, createTestHandler, STATE_BODY, true}},
+
+ /* Special commands */
+ {"use", {{STATE_HEAD}, createTestHandler, STATE_USE}},
+ {"include", {{STATE_ALL}, createTestHandler, STATE_INCLUDE}},
+ {"inline", {{STATE_ALL}, createTestHandler, STATE_INLINE}},
+
+ /* Typesystem definitions */
+ {"typesystem", {{STATE_NONE, STATE_HEAD}, createTestHandler, STATE_TYPES}},
+ {"enum", {{STATE_TYPES}, createTestHandler, STATE_ENUM}},
+ {"struct", {{STATE_TYPES}, createTestHandler, STATE_STRUCT}},
+ {"constant", {{STATE_TYPES}, createTestHandler, STATE_CONSTANT}}};
/**
* Wrapper class around the XML_Parser pointer which safely frees it whenever
@@ -89,8 +114,7 @@ public:
* @param encoding is the protocol-defined encoding passed to expat (or
* nullptr if expat should determine the encoding by itself).
*/
- ScopedExpatXmlParser(const XML_Char *encoding)
- : parser(nullptr)
+ ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
{
parser = XML_ParserCreate(encoding);
if (!parser) {
@@ -116,28 +140,36 @@ public:
XML_Parser operator&() { return parser; }
};
+/* Adapter Expat -> ParserStack */
+
static void xmlStartElementHandler(void *userData, const XML_Char *name,
const XML_Char **attrs)
{
- std::cout << "start tag: " << name << std::endl;
+ Variant::mapType args;
const XML_Char **attr = attrs;
while (*attr) {
- std::cout << "\t" << *attr;
- attr++;
- std::cout << " -> " << *attr << std::endl;
- attr++;
+ const std::string key{*(attr++)};
+ args.emplace(std::make_pair(key, Variant{*(attr++)}));
}
+ (static_cast<ParserStack *>(userData))->start(std::string(name), args);
}
-static void xmlEndElementHandler(void *userData, const XML_Char *name) {
- std::cout << "end tag: " << name << std::endl;
+static void xmlEndElementHandler(void *userData, const XML_Char *name)
+{
+ (static_cast<ParserStack *>(userData))->end();
}
-
-static void xmlCharacterDataHandler(void *userData, const XML_Char *s, int len) {
- std::cout << "\tdata: " << std::string(s, len) << std::endl;
+static void xmlCharacterDataHandler(void *userData, const XML_Char *s, int len)
+{
+ const std::string data =
+ Utils::trim(std::string{s, static_cast<size_t>(len)});
+ if (!data.empty()) {
+ (static_cast<ParserStack *>(userData))->data(data);
+ }
}
+/* Class XmlParser */
+
std::set<std::string> XmlParser::mimetypes()
{
return std::set<std::string>{{"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"}};
@@ -147,7 +179,11 @@ Rooted<Node> XmlParser::parse(std::istream &is, ParserContext &ctx)
{
// Create the parser object
ScopedExpatXmlParser p{"UTF-8"};
- XML_SetUserData(&p, &ctx);
+
+ // Create the parser stack instance and pass the reference to the state
+ // machine descriptor
+ ParserStack stack{ctx, XML_HANDLERS};
+ XML_SetUserData(&p, &stack);
// Set the callback functions
XML_SetStartElementHandler(&p, xmlStartElementHandler);
diff --git a/test/core/parser/ParserStackTest.cpp b/test/core/parser/ParserStackTest.cpp
index 1f4a4e2..5dab979 100644
--- a/test/core/parser/ParserStackTest.cpp
+++ b/test/core/parser/ParserStackTest.cpp
@@ -39,28 +39,24 @@ class TestHandler : public Handler {
public:
using Handler::Handler;
- void start(char **attrs) override
+ void start(const Variant &args) override
{
startCount++;
-// std::cout << this->name << ": start (isChild: " << (this->isChild) << ")" << std::endl;
}
void end() override
{
endCount++;
-// std::cout << this->name << ": end " << std::endl;
}
- void data(const char *data, int len) override
+ void data(const std::string &data, int field) override
{
dataCount++;
-// std::cout << this->name << ": data \"" << std::string{data, static_cast<unsigned int>(len)} << "\"" << std::endl;
}
void child(std::shared_ptr<Handler> handler) override
{
childCount++;
-// std::cout << this->name << ": has child \"" << handler->name << "\"" << std::endl;
}
};
@@ -93,7 +89,7 @@ TEST(ParserStack, simpleTest)
ASSERT_EQ(STATE_NONE, s.currentState());
s.start("document", nullptr);
- s.data("test1", 5);
+ s.data("test1");
ASSERT_EQ("document", s.currentName());
ASSERT_EQ(STATE_DOCUMENT, s.currentState());
@@ -101,7 +97,7 @@ TEST(ParserStack, simpleTest)
ASSERT_EQ(1, dataCount);
s.start("body", nullptr);
- s.data("test2", 5);
+ s.data("test2");
ASSERT_EQ("body", s.currentName());
ASSERT_EQ(STATE_BODY, s.currentState());
ASSERT_EQ(2, startCount);
@@ -123,7 +119,7 @@ TEST(ParserStack, simpleTest)
ASSERT_EQ(STATE_DOCUMENT, s.currentState());
s.start("body", nullptr);
- s.data("test3", 5);
+ s.data("test3");
ASSERT_EQ("body", s.currentName());
ASSERT_EQ(STATE_BODY, s.currentState());
s.end();
diff --git a/test/plugins/xml/XmlParserTest.cpp b/test/plugins/xml/XmlParserTest.cpp
index 98a5a34..ecc9438 100644
--- a/test/plugins/xml/XmlParserTest.cpp
+++ b/test/plugins/xml/XmlParserTest.cpp
@@ -26,26 +26,14 @@ namespace ousia {
namespace parser {
namespace xml {
-struct TestParserContext : public ParserContext {
-
-private:
- Logger log;
- Registry r;
- Scope s;
-
-public:
- TestParserContext() : ParserContext(s, r, log), r(log), s(nullptr) {};
-
-};
-
TEST(XmlParser, mismatchedTagException)
{
- TestParserContext ctx;
+ StandaloneParserContext ctx;
XmlParser p;
bool hadException = false;
try {
- p.parse("<test foo=\"bar\">data<![CDATA[bla]]>\n</btest>", ctx);
+ p.parse("<document>\n</document2>", ctx);
}
catch (ParserException ex) {
ASSERT_EQ(2, ex.line);
@@ -55,19 +43,27 @@ TEST(XmlParser, mismatchedTagException)
ASSERT_TRUE(hadException);
}
-const char* TEST_DATA = "<?xml version=\"1.0\" standalone=\"yes\"?>\n"
- "<document a:bc=\"b\">\n"
- " <bla:test xmlAttr=\"blub\" />\n"
- "</document>\n";
+const char *TEST_DATA =
+ "<?xml version=\"1.0\" standalone=\"yes\"?>\n"
+ "<document a:bc=\"b\">\n"
+ " <head>\n"
+ " <typesystem name=\"color\">\n"
+ " <struct name=\"color\">\n"
+ " </struct>\n"
+ " </typesystem>\n"
+ " </head>\n"
+ " <body xmlAttr=\"blub\">\n"
+ " <book>Dies ist ein Test&gt;</book>\n"
+ " </body>\n"
+ "</document>\n";
TEST(XmlParser, namespaces)
{
- TestParserContext ctx;
+ StandaloneParserContext ctx;
XmlParser p;
p.parse(TEST_DATA, ctx);
}
-
}
}
}