summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2014-12-01 21:27:08 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2014-12-01 21:27:08 +0100
commit5554f3594d00e267af447a24149f655ceff64d17 (patch)
tree7a06022fcd5799a12a55e95d2e45414260cc45cf
parent082165d21269123f2658edc74aa1960669e306c8 (diff)
working version of the ParserStack class plus unit tests
-rw-r--r--CMakeLists.txt3
-rw-r--r--src/core/parser/Parser.hpp13
-rw-r--r--src/core/parser/ParserStack.cpp155
-rw-r--r--src/core/parser/ParserStack.hpp (renamed from src/core/parser/XmlStates.hpp)246
-rw-r--r--src/core/parser/Scope.hpp2
-rw-r--r--src/core/parser/XmlStates.cpp53
-rw-r--r--test/core/parser/ParserStackTest.cpp165
7 files changed, 468 insertions, 169 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index da6479b..327f664 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -110,6 +110,8 @@ ADD_LIBRARY(ousia_core
# src/core/Typesystem
src/core/Utils
src/core/parser/Parser
+ src/core/parser/ParserStack
+ src/core/parser/Scope
src/core/script/Function
src/core/script/Object
src/core/script/ScriptEngine
@@ -156,6 +158,7 @@ IF(TEST)
test/core/RangeSetTest
test/core/TokenizerTest
test/core/UtilsTest
+ test/core/parser/ParserStackTest
test/core/script/FunctionTest
test/core/script/ObjectTest
test/core/script/VariantTest
diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp
index b8faf98..fa5dd49 100644
--- a/src/core/parser/Parser.hpp
+++ b/src/core/parser/Parser.hpp
@@ -86,6 +86,19 @@ struct ParserContext {
: scope(scope), registry(registry), logger(logger){};
};
+struct StandaloneParserContext : public ParserContext {
+private:
+ Logger logger;
+ Scope scope;
+ Registry registry;
+
+public:
+ StandaloneParserContext()
+ : ParserContext(scope, registry, logger),
+ scope(nullptr),
+ registry(logger){};
+};
+
/**
* Abstract parser class. This class builds the basic interface that should be
* used by any parser which reads data from an input stream and transforms it
diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp
new file mode 100644
index 0000000..01fce3f
--- /dev/null
+++ b/src/core/parser/ParserStack.cpp
@@ -0,0 +1,155 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <sstream>
+
+#include "ParserStack.hpp"
+
+#include <core/Exceptions.hpp>
+
+namespace ousia {
+namespace parser {
+
+/* Class HandlerDescriptor */
+
+HandlerInstance HandlerDescriptor::create(const ParserContext &ctx,
+ std::string name, State parentState,
+ bool isChild, char **attrs) const
+{
+ Handler *h = ctor(ctx, name, targetState, parentState, isChild);
+ h->start(attrs);
+ return HandlerInstance(h, this);
+}
+
+/* Class ParserStack */
+
+/**
+ * Function used internally to turn the elements of a collection into a string
+ * separated by the given delimiter.
+ */
+template <class T>
+static std::string join(T es, const std::string &delim)
+{
+ std::stringstream res;
+ bool first = true;
+ for (auto &e : es) {
+ if (!first) {
+ res << delim;
+ }
+ res << e;
+ first = false;
+ }
+ return res.str();
+}
+
+/**
+ * Returns an Exception that should be thrown when a currently invalid command
+ * is thrown.
+ */
+static LoggableException invalidCommand(const std::string &name,
+ const std::set<std::string> &expected)
+{
+ if (expected.empty()) {
+ return LoggableException{
+ std::string{"No nested elements allowed, but got \""} + name +
+ std::string{"\""}};
+ } else {
+ return LoggableException{
+ std::string{"Expected "} +
+ (expected.size() == 1 ? std::string{"\""}
+ : std::string{"one of \""}) +
+ join(expected, "\", \"") + std::string{"\", but got \""} + name +
+ std::string{"\""}};
+ }
+}
+
+std::set<std::string> ParserStack::expectedCommands(State state)
+{
+ std::set<std::string> res;
+ for (const auto &v : handlers) {
+ if (v.second.parentStates.count(state)) {
+ res.insert(v.first);
+ }
+ }
+ return res;
+}
+
+void ParserStack::start(std::string name, char **attrs)
+{
+ // Fetch the current handler and the current state
+ const HandlerInstance *h = stack.empty() ? nullptr : &stack.top();
+ const State curState = currentState();
+ bool isChild = false;
+
+ // Fetch the correct Handler descriptor for this
+ const HandlerDescriptor *descr = nullptr;
+ auto range = handlers.equal_range(name);
+ for (auto it = range.first; it != range.second; it++) {
+ if (it->second.parentStates.count(curState)) {
+ descr = &(it->second);
+ break;
+ }
+ }
+ if (!descr && currentArbitraryChildren()) {
+ isChild = true;
+ descr = h->descr;
+ }
+
+ // No descriptor found, throw an exception.
+ if (!descr) {
+ throw invalidCommand(name, expectedCommands(curState));
+ }
+
+ // Instantiate the handler and call its start function
+ stack.emplace(descr->create(ctx, name, curState, isChild, attrs));
+}
+
+void ParserStack::end()
+{
+ // Check whether the current command could be ended
+ if (stack.empty()) {
+ throw LoggableException{"No command to end."};
+ }
+
+ // Remove the current HandlerInstance from the stack
+ HandlerInstance inst{stack.top()};
+ stack.pop();
+
+ // Call the end function of the last Handler
+ inst.handler->end();
+
+ // Call the "child" function of the parent Handler in the stack
+ // (if one exists).
+ if (!stack.empty()) {
+ stack.top().handler->child(inst.handler);
+ }
+}
+
+void ParserStack::data(const char *data, int len)
+{
+ // Check whether there is any command the data can be sent to
+ if (stack.empty()) {
+ throw LoggableException{"No command to receive data."};
+ }
+
+ // Pass the data to the current Handler instance
+ stack.top().handler->data(data, len);
+}
+}
+}
+
diff --git a/src/core/parser/XmlStates.hpp b/src/core/parser/ParserStack.hpp
index 70e95f4..a777b1e 100644
--- a/src/core/parser/XmlStates.hpp
+++ b/src/core/parser/ParserStack.hpp
@@ -16,53 +16,39 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef _OUSIA_XML_STATES_HPP_
-#define _OUSIA_XML_STATES_HPP_
+/**
+ * @file ParserStack.hpp
+ *
+ * Helper classes for document or description parsers. Contains the ParserStack
+ * class, which is an pushdown automaton responsible for accepting commands in
+ * the correct order and calling specified handlers.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_PARSER_STACK_HPP_
+#define _OUSIA_PARSER_STACK_HPP_
#include <cstdint>
#include <map>
+#include <memory>
#include <set>
#include <stack>
#include <vector>
+#include "Parser.hpp"
+
namespace ousia {
namespace parser {
-class Scope;
-class Registry;
-class Logger;
-
-namespace xml {
-
/**
- * The State class represents all states the XML parser can be in. These states
- * mostly represent single tags.
+ * The State type alias is used to
*/
-enum class State : uint8_t {
- /* Meta states */
- ALL = -1,
-
- /* Start state */
- NONE,
-
- /* Special commands */
- INCLUDE,
- INLINE,
+using State = int8_t;
- /* Document states */
- DOCUMENT,
- HEAD,
- BODY,
-
- /* Domain description states */
- DOMAIN,
-
- /* Type system states */
- TYPESYSTEM,
- TYPE,
- TYPE_ELEM
-};
+static const State STATE_ALL = -2;
+static const State STATE_NONE = -1;
/**
* The handler class provides a context for handling an XML tag. It has to be
@@ -125,7 +111,7 @@ public:
/**
* Virtual destructor.
*/
- virtual ~Handler();
+ virtual ~Handler(){};
/**
* Returns the node instance that was created by the handler.
@@ -158,14 +144,14 @@ public:
* @param data is a pointer at the character data that is available for the
* Handler instance.
*/
- virtual void data(char *data, int len){};
+ virtual void data(const char *data, int len){};
/**
* Called whenever a direct child element was created and has ended.
*
* @param handler is a reference at the child Handler instance.
*/
- virtual void child(Handler *handler){};
+ virtual void child(std::shared_ptr<Handler> handler){};
};
/**
@@ -176,95 +162,90 @@ using HandlerConstructor = Handler *(*)(const ParserContext &ctx,
std::string name, State state,
State parentState, bool isChild);
+struct HandlerDescriptor;
+
/**
- * The StateStack class is a pushdown automaton responsible for turning a
- * command stream into a tree of Node instances.
+ * Used internlly by StateStack to store Handler instances and parameters
+ * from HandlerDescriptor that are not stored in the Handler instance
+ * itself. Instances of the HandlerInstance class can be created using the
+ * HandlerDescriptor "create" method.
*/
-class StateStack {
-public:
+struct HandlerInstance {
/**
- * Used internlly by StateStack to store Handler instances and parameters
- * from HandlerDescriptor that are not stored in the Handler instance
- * itself. Instances of the HandlerInstance class can be created using the
- * HandlerDescriptor "create" method.
+ * Pointer at the actual handler instance.
*/
- struct HandlerInstance {
- /**
- * Pointer at the actual handler instance.
- */
- std::unique_ptr<Handler> handler;
-
- /**
- * Value of the arbitraryChildren flag stored in the HandlerDescriptor
- * class.
- */
- const bool arbitraryChildren;
-
- HandlerInstance(std::unique_ptr<Handler> handler,
- bool arbitraryChildren)
- : handler(handler), arbitraryChildren(arbitraryChildren)
- {
- }
+ std::shared_ptr<Handler> handler;
+
+ const HandlerDescriptor *descr;
+
+ HandlerInstance(Handler *handler, const HandlerDescriptor *descr)
+ : handler(handler), descr(descr)
+ {
}
+};
+/**
+ * Used internally by StateStack to store the pushdown automaton
+ * description.
+ */
+struct HandlerDescriptor {
/**
- * Used internally by StateStack to store the pushdown automaton
- * description.
+ * The valid parent states.
*/
- struct HandlerDescriptor {
- /**
- * The valid parent states.
- */
- const std::set<State> parentStates;
-
- /**
- * Pointer at a function which creates a new concrete Handler instance.
- */
- const HandlerConstructor ctor;
-
- /**
- * The target state for the registered handler.
- */
- const State targetState;
-
- /**
- * Set to true if this handler instance allows arbitrary children as
- * tags.
- */
- const bool arbitraryChildren;
-
- HandlerDescriptor(std::set<State> parentStates, HandlerConstructor ctor,
- State targetState, bool arbitraryChildren = false)
- : parentStates(std::move(parentStates)),
- ctor(constructor),
- targetState(targetState),
- arbitraryChildren(arbitraryChildren)
- {
- }
-
- HandlerInstance create(const ParserContext &ctx, std::string name,
- State parentState, bool isChild)
- {
- return HandlerInstance{
- ctor(ctx, name, targetState, parentState, isChild),
- arbitraryChildren};
- }
- };
+ const std::set<State> parentStates;
-private:
/**
- * Map containing all registered command names and the corresponding
- * handler
- * descriptor.
+ * Pointer at a function which creates a new concrete Handler instance.
+ */
+ const HandlerConstructor ctor;
+
+ /**
+ * The target state for the registered handler.
+ */
+ const State targetState;
+
+ /**
+ * Set to true if this handler instance allows arbitrary children as
+ * tags.
*/
- const std::multimap<std::string, HandlerDescriptor> handlers;
+ const bool arbitraryChildren;
+
+ HandlerDescriptor(std::set<State> parentStates, HandlerConstructor ctor,
+ State targetState, bool arbitraryChildren = false)
+ : parentStates(std::move(parentStates)),
+ ctor(ctor),
+ targetState(targetState),
+ arbitraryChildren(arbitraryChildren)
+ {
+ }
/**
+ * Creates an instance of the concrete Handler class represented by the
+ * HandlerDescriptor and calls its start function.
+ */
+ HandlerInstance create(const ParserContext &ctx, std::string name,
+ State parentState, bool isChild, char **attrs) const;
+};
+
+/**
+ * The ParserStack class is a pushdown automaton responsible for turning a
+ * command stream into a tree of Node instances.
+ */
+class ParserStack {
+private:
+ /**
* Reference at the parser context.
*/
const ParserContext &ctx;
/**
+ * Map containing all registered command names and the corresponding
+ * handler
+ * descriptor.
+ */
+ const std::multimap<std::string, HandlerDescriptor> &handlers;
+
+ /**
* Internal stack used for managing the currently active Handler instances.
*/
std::stack<HandlerInstance> stack;
@@ -281,21 +262,54 @@ private:
public:
/**
- * Creates a new instance of the StateStack class.
+ * Creates a new instance of the ParserStack class.
*
* @param handlers is a map containing the command names and the
* corresponding HandlerDescriptor instances.
*/
- StateStack(const ParserContext &ctx,
- std::multimap<std::string, HandlerDescriptor> handlers)
- : handlers(std::move(handlers)),
- ctx(ctx),
- currentState(State::NONE),
- arbitraryChildren(false);
+ ParserStack(const ParserContext &ctx,
+ const std::multimap<std::string, HandlerDescriptor> &handlers)
+ : ctx(ctx), handlers(handlers){};
+
+ /**
+ * Returns the state the ParserStack instance currently is in.
+ *
+ * @return the state of the currently active Handler instance or STATE_NONE
+ * if no handler is on the stack.
+ */
+ State currentState() {
+ return stack.empty() ? STATE_NONE : stack.top().handler->state;
+ }
+
+ /**
+ * Returns the command name that is currently being handled.
+ *
+ * @return the name of the command currently being handled by the active
+ * Handler instance or an empty string if no handler is currently active.
+ */
+ std::string currentName() {
+ return stack.empty() ? std::string{} : stack.top().handler->name;
+ }
+
+ /**
+ * Returns whether the current command handler allows arbitrary children.
+ *
+ * @return true if the handler allows arbitrary children, false otherwise.
+ */
+ bool currentArbitraryChildren() {
+ return stack.empty() ? false : stack.top().descr->arbitraryChildren;
+ }
+
+ // TODO: Change signature
+ void start(std::string name, char **attrs);
+
+ void end();
+
+ // TODO: Change signature
+ void data(const char *data, int len);
};
}
}
-}
-#endif /* _OUSIA_XML_STATES_HPP_ */
+#endif /* _OUSIA_PARSER_STACK_HPP_ */
diff --git a/src/core/parser/Scope.hpp b/src/core/parser/Scope.hpp
index 0c37fbd..9c5504f 100644
--- a/src/core/parser/Scope.hpp
+++ b/src/core/parser/Scope.hpp
@@ -19,6 +19,8 @@
#ifndef _OUSIA_PARSER_SCOPE_H_
#define _OUSIA_PARSER_SCOPE_H_
+#include <deque>
+
#include <core/Node.hpp>
/**
diff --git a/src/core/parser/XmlStates.cpp b/src/core/parser/XmlStates.cpp
deleted file mode 100644
index ec0f002..0000000
--- a/src/core/parser/XmlStates.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "XmlStates.hpp"
-
-namespace ousia {
-namespace parser {
-namespace xml {
-
-std::set<std::string> StateStack::expectedCommands(State state)
-{
- std::set<std::string> res;
- for (const auto &v: handlers) {
- if (v.second.parentStates.count(state)) {
- res.insert(v.first);
- }
- }
- return res;
-}
-
-void StateStack::start(std::string tagName, char **attrs) {
- // Fetch the current handler and the current state
- const Handler *h = stack.empty() ? nullptr : stack.top();
- const State currentState = h ? State::NONE : h->state;
-
- // Fetch all handlers for the given tagName
- auto range = handlers.equal_range(tagName);
- if (range->first == handlers.end()) {
- // There are no handlers registered for this tag name -- check whether
- // the current handler supports arbitrary children
- if (h && h->arbitraryChildren)
- }
-}
-
-}
-}
-}
-
diff --git a/test/core/parser/ParserStackTest.cpp b/test/core/parser/ParserStackTest.cpp
new file mode 100644
index 0000000..92249ff
--- /dev/null
+++ b/test/core/parser/ParserStackTest.cpp
@@ -0,0 +1,165 @@
+/*
+ SCAENEA IDL Compiler (scidlc)
+ Copyright (C) 2014 Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+#include <core/parser/ParserStack.hpp>
+
+namespace ousia {
+namespace parser {
+
+static const State STATE_DOCUMENT = 0;
+static const State STATE_BODY = 1;
+static const State STATE_EMPTY = 2;
+
+static int startCount = 0;
+static int endCount = 0;
+static int dataCount = 0;
+static int childCount = 0;
+
+class TestHandler : public Handler {
+
+public:
+ using Handler::Handler;
+
+ void start(char **attrs) override
+ {
+ startCount++;
+// std::cout << this->name << ": start (isChild: " << (this->isChild) << ")" << std::endl;
+ }
+
+ void end() override
+ {
+ endCount++;
+// std::cout << this->name << ": end " << std::endl;
+ }
+
+ void data(const char *data, int len) override
+ {
+ dataCount++;
+// std::cout << this->name << ": data \"" << std::string{data, static_cast<unsigned int>(len)} << "\"" << std::endl;
+ }
+
+ void child(std::shared_ptr<Handler> handler) override
+ {
+ childCount++;
+// std::cout << this->name << ": has child \"" << handler->name << "\"" << std::endl;
+ }
+
+};
+
+static Handler* createTestHandler(const ParserContext &ctx,
+ std::string name, State state,
+ State parentState, bool isChild)
+{
+ return new TestHandler(ctx, name, state, parentState, isChild);
+}
+
+// Two nested elements used for testing
+static const std::multimap<std::string, HandlerDescriptor> TEST_HANDLERS{
+ {"document", {{STATE_NONE}, createTestHandler, STATE_DOCUMENT}},
+ {"body", {{STATE_DOCUMENT}, createTestHandler, STATE_BODY, true}},
+ {"empty", {{STATE_DOCUMENT}, createTestHandler, STATE_EMPTY}},
+};
+
+
+TEST(ParserStack, simpleTest)
+{
+ StandaloneParserContext ctx;
+ ParserStack s{ctx, TEST_HANDLERS};
+
+ startCount = 0;
+ endCount = 0;
+ dataCount = 0;
+ childCount = 0;
+
+ ASSERT_EQ("", s.currentName());
+ ASSERT_EQ(STATE_NONE, s.currentState());
+
+ s.start("document", nullptr);
+ s.data("test1", 5);
+
+ ASSERT_EQ("document", s.currentName());
+ ASSERT_EQ(STATE_DOCUMENT, s.currentState());
+ ASSERT_EQ(1, startCount);
+ ASSERT_EQ(1, dataCount);
+
+ s.start("body", nullptr);
+ s.data("test2", 5);
+ ASSERT_EQ("body", s.currentName());
+ ASSERT_EQ(STATE_BODY, s.currentState());
+ ASSERT_EQ(2, startCount);
+ ASSERT_EQ(2, dataCount);
+
+ s.start("inner", nullptr);
+ ASSERT_EQ("inner", s.currentName());
+ ASSERT_EQ(STATE_BODY, s.currentState());
+ s.end();
+ ASSERT_EQ(3, startCount);
+ ASSERT_EQ(1, childCount);
+ ASSERT_EQ(1, endCount);
+
+ s.end();
+ ASSERT_EQ(2, childCount);
+ ASSERT_EQ(2, endCount);
+
+ ASSERT_EQ("document", s.currentName());
+ ASSERT_EQ(STATE_DOCUMENT, s.currentState());
+
+ s.start("body", nullptr);
+ s.data("test3", 5);
+ ASSERT_EQ("body", s.currentName());
+ ASSERT_EQ(STATE_BODY, s.currentState());
+ s.end();
+ ASSERT_EQ(4, startCount);
+ ASSERT_EQ(3, dataCount);
+ ASSERT_EQ(3, childCount);
+ ASSERT_EQ(3, endCount);
+
+ ASSERT_EQ("document", s.currentName());
+ ASSERT_EQ(STATE_DOCUMENT, s.currentState());
+
+ s.end();
+ ASSERT_EQ(4, endCount);
+
+ ASSERT_EQ("", s.currentName());
+ ASSERT_EQ(STATE_NONE, s.currentState());
+}
+
+TEST(ParserStack, errorHandling)
+{
+ StandaloneParserContext ctx;
+ ParserStack s{ctx, TEST_HANDLERS};
+
+ ASSERT_THROW(s.start("body", nullptr), OusiaException);
+ s.start("document", nullptr);
+ ASSERT_THROW(s.start("document", nullptr), OusiaException);
+ s.start("empty", nullptr);
+ ASSERT_THROW(s.start("body", nullptr), OusiaException);
+ s.end();
+ s.end();
+ ASSERT_EQ(STATE_NONE, s.currentState());
+ ASSERT_THROW(s.end(), OusiaException);
+ ASSERT_THROW(s.data("test", 1), OusiaException);
+}
+
+}
+}
+