diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-01 21:27:08 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-01 21:27:08 +0100 |
commit | 5554f3594d00e267af447a24149f655ceff64d17 (patch) | |
tree | 7a06022fcd5799a12a55e95d2e45414260cc45cf /src/core | |
parent | 082165d21269123f2658edc74aa1960669e306c8 (diff) |
working version of the ParserStack class plus unit tests
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/parser/Parser.hpp | 13 | ||||
-rw-r--r-- | src/core/parser/ParserStack.cpp | 155 | ||||
-rw-r--r-- | src/core/parser/ParserStack.hpp (renamed from src/core/parser/XmlStates.hpp) | 246 | ||||
-rw-r--r-- | src/core/parser/Scope.hpp | 2 | ||||
-rw-r--r-- | src/core/parser/XmlStates.cpp | 53 |
5 files changed, 300 insertions, 169 deletions
diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index b8faf98..fa5dd49 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -86,6 +86,19 @@ struct ParserContext { : scope(scope), registry(registry), logger(logger){}; }; +struct StandaloneParserContext : public ParserContext { +private: + Logger logger; + Scope scope; + Registry registry; + +public: + StandaloneParserContext() + : ParserContext(scope, registry, logger), + scope(nullptr), + registry(logger){}; +}; + /** * Abstract parser class. This class builds the basic interface that should be * used by any parser which reads data from an input stream and transforms it diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp new file mode 100644 index 0000000..01fce3f --- /dev/null +++ b/src/core/parser/ParserStack.cpp @@ -0,0 +1,155 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include "ParserStack.hpp" + +#include <core/Exceptions.hpp> + +namespace ousia { +namespace parser { + +/* Class HandlerDescriptor */ + +HandlerInstance HandlerDescriptor::create(const ParserContext &ctx, + std::string name, State parentState, + bool isChild, char **attrs) const +{ + Handler *h = ctor(ctx, name, targetState, parentState, isChild); + h->start(attrs); + return HandlerInstance(h, this); +} + +/* Class ParserStack */ + +/** + * Function used internally to turn the elements of a collection into a string + * separated by the given delimiter. + */ +template <class T> +static std::string join(T es, const std::string &delim) +{ + std::stringstream res; + bool first = true; + for (auto &e : es) { + if (!first) { + res << delim; + } + res << e; + first = false; + } + return res.str(); +} + +/** + * Returns an Exception that should be thrown when a currently invalid command + * is thrown. + */ +static LoggableException invalidCommand(const std::string &name, + const std::set<std::string> &expected) +{ + if (expected.empty()) { + return LoggableException{ + std::string{"No nested elements allowed, but got \""} + name + + std::string{"\""}}; + } else { + return LoggableException{ + std::string{"Expected "} + + (expected.size() == 1 ? std::string{"\""} + : std::string{"one of \""}) + + join(expected, "\", \"") + std::string{"\", but got \""} + name + + std::string{"\""}}; + } +} + +std::set<std::string> ParserStack::expectedCommands(State state) +{ + std::set<std::string> res; + for (const auto &v : handlers) { + if (v.second.parentStates.count(state)) { + res.insert(v.first); + } + } + return res; +} + +void ParserStack::start(std::string name, char **attrs) +{ + // Fetch the current handler and the current state + const HandlerInstance *h = stack.empty() ? nullptr : &stack.top(); + const State curState = currentState(); + bool isChild = false; + + // Fetch the correct Handler descriptor for this + const HandlerDescriptor *descr = nullptr; + auto range = handlers.equal_range(name); + for (auto it = range.first; it != range.second; it++) { + if (it->second.parentStates.count(curState)) { + descr = &(it->second); + break; + } + } + if (!descr && currentArbitraryChildren()) { + isChild = true; + descr = h->descr; + } + + // No descriptor found, throw an exception. + if (!descr) { + throw invalidCommand(name, expectedCommands(curState)); + } + + // Instantiate the handler and call its start function + stack.emplace(descr->create(ctx, name, curState, isChild, attrs)); +} + +void ParserStack::end() +{ + // Check whether the current command could be ended + if (stack.empty()) { + throw LoggableException{"No command to end."}; + } + + // Remove the current HandlerInstance from the stack + HandlerInstance inst{stack.top()}; + stack.pop(); + + // Call the end function of the last Handler + inst.handler->end(); + + // Call the "child" function of the parent Handler in the stack + // (if one exists). + if (!stack.empty()) { + stack.top().handler->child(inst.handler); + } +} + +void ParserStack::data(const char *data, int len) +{ + // Check whether there is any command the data can be sent to + if (stack.empty()) { + throw LoggableException{"No command to receive data."}; + } + + // Pass the data to the current Handler instance + stack.top().handler->data(data, len); +} +} +} + diff --git a/src/core/parser/XmlStates.hpp b/src/core/parser/ParserStack.hpp index 70e95f4..a777b1e 100644 --- a/src/core/parser/XmlStates.hpp +++ b/src/core/parser/ParserStack.hpp @@ -16,53 +16,39 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef _OUSIA_XML_STATES_HPP_ -#define _OUSIA_XML_STATES_HPP_ +/** + * @file ParserStack.hpp + * + * Helper classes for document or description parsers. Contains the ParserStack + * class, which is an pushdown automaton responsible for accepting commands in + * the correct order and calling specified handlers. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_HPP_ +#define _OUSIA_PARSER_STACK_HPP_ #include <cstdint> #include <map> +#include <memory> #include <set> #include <stack> #include <vector> +#include "Parser.hpp" + namespace ousia { namespace parser { -class Scope; -class Registry; -class Logger; - -namespace xml { - /** - * The State class represents all states the XML parser can be in. These states - * mostly represent single tags. + * The State type alias is used to */ -enum class State : uint8_t { - /* Meta states */ - ALL = -1, - - /* Start state */ - NONE, - - /* Special commands */ - INCLUDE, - INLINE, +using State = int8_t; - /* Document states */ - DOCUMENT, - HEAD, - BODY, - - /* Domain description states */ - DOMAIN, - - /* Type system states */ - TYPESYSTEM, - TYPE, - TYPE_ELEM -}; +static const State STATE_ALL = -2; +static const State STATE_NONE = -1; /** * The handler class provides a context for handling an XML tag. It has to be @@ -125,7 +111,7 @@ public: /** * Virtual destructor. */ - virtual ~Handler(); + virtual ~Handler(){}; /** * Returns the node instance that was created by the handler. @@ -158,14 +144,14 @@ public: * @param data is a pointer at the character data that is available for the * Handler instance. */ - virtual void data(char *data, int len){}; + virtual void data(const char *data, int len){}; /** * Called whenever a direct child element was created and has ended. * * @param handler is a reference at the child Handler instance. */ - virtual void child(Handler *handler){}; + virtual void child(std::shared_ptr<Handler> handler){}; }; /** @@ -176,95 +162,90 @@ using HandlerConstructor = Handler *(*)(const ParserContext &ctx, std::string name, State state, State parentState, bool isChild); +struct HandlerDescriptor; + /** - * The StateStack class is a pushdown automaton responsible for turning a - * command stream into a tree of Node instances. + * Used internlly by StateStack to store Handler instances and parameters + * from HandlerDescriptor that are not stored in the Handler instance + * itself. Instances of the HandlerInstance class can be created using the + * HandlerDescriptor "create" method. */ -class StateStack { -public: +struct HandlerInstance { /** - * Used internlly by StateStack to store Handler instances and parameters - * from HandlerDescriptor that are not stored in the Handler instance - * itself. Instances of the HandlerInstance class can be created using the - * HandlerDescriptor "create" method. + * Pointer at the actual handler instance. */ - struct HandlerInstance { - /** - * Pointer at the actual handler instance. - */ - std::unique_ptr<Handler> handler; - - /** - * Value of the arbitraryChildren flag stored in the HandlerDescriptor - * class. - */ - const bool arbitraryChildren; - - HandlerInstance(std::unique_ptr<Handler> handler, - bool arbitraryChildren) - : handler(handler), arbitraryChildren(arbitraryChildren) - { - } + std::shared_ptr<Handler> handler; + + const HandlerDescriptor *descr; + + HandlerInstance(Handler *handler, const HandlerDescriptor *descr) + : handler(handler), descr(descr) + { } +}; +/** + * Used internally by StateStack to store the pushdown automaton + * description. + */ +struct HandlerDescriptor { /** - * Used internally by StateStack to store the pushdown automaton - * description. + * The valid parent states. */ - struct HandlerDescriptor { - /** - * The valid parent states. - */ - const std::set<State> parentStates; - - /** - * Pointer at a function which creates a new concrete Handler instance. - */ - const HandlerConstructor ctor; - - /** - * The target state for the registered handler. - */ - const State targetState; - - /** - * Set to true if this handler instance allows arbitrary children as - * tags. - */ - const bool arbitraryChildren; - - HandlerDescriptor(std::set<State> parentStates, HandlerConstructor ctor, - State targetState, bool arbitraryChildren = false) - : parentStates(std::move(parentStates)), - ctor(constructor), - targetState(targetState), - arbitraryChildren(arbitraryChildren) - { - } - - HandlerInstance create(const ParserContext &ctx, std::string name, - State parentState, bool isChild) - { - return HandlerInstance{ - ctor(ctx, name, targetState, parentState, isChild), - arbitraryChildren}; - } - }; + const std::set<State> parentStates; -private: /** - * Map containing all registered command names and the corresponding - * handler - * descriptor. + * Pointer at a function which creates a new concrete Handler instance. + */ + const HandlerConstructor ctor; + + /** + * The target state for the registered handler. + */ + const State targetState; + + /** + * Set to true if this handler instance allows arbitrary children as + * tags. */ - const std::multimap<std::string, HandlerDescriptor> handlers; + const bool arbitraryChildren; + + HandlerDescriptor(std::set<State> parentStates, HandlerConstructor ctor, + State targetState, bool arbitraryChildren = false) + : parentStates(std::move(parentStates)), + ctor(ctor), + targetState(targetState), + arbitraryChildren(arbitraryChildren) + { + } /** + * Creates an instance of the concrete Handler class represented by the + * HandlerDescriptor and calls its start function. + */ + HandlerInstance create(const ParserContext &ctx, std::string name, + State parentState, bool isChild, char **attrs) const; +}; + +/** + * The ParserStack class is a pushdown automaton responsible for turning a + * command stream into a tree of Node instances. + */ +class ParserStack { +private: + /** * Reference at the parser context. */ const ParserContext &ctx; /** + * Map containing all registered command names and the corresponding + * handler + * descriptor. + */ + const std::multimap<std::string, HandlerDescriptor> &handlers; + + /** * Internal stack used for managing the currently active Handler instances. */ std::stack<HandlerInstance> stack; @@ -281,21 +262,54 @@ private: public: /** - * Creates a new instance of the StateStack class. + * Creates a new instance of the ParserStack class. * * @param handlers is a map containing the command names and the * corresponding HandlerDescriptor instances. */ - StateStack(const ParserContext &ctx, - std::multimap<std::string, HandlerDescriptor> handlers) - : handlers(std::move(handlers)), - ctx(ctx), - currentState(State::NONE), - arbitraryChildren(false); + ParserStack(const ParserContext &ctx, + const std::multimap<std::string, HandlerDescriptor> &handlers) + : ctx(ctx), handlers(handlers){}; + + /** + * Returns the state the ParserStack instance currently is in. + * + * @return the state of the currently active Handler instance or STATE_NONE + * if no handler is on the stack. + */ + State currentState() { + return stack.empty() ? STATE_NONE : stack.top().handler->state; + } + + /** + * Returns the command name that is currently being handled. + * + * @return the name of the command currently being handled by the active + * Handler instance or an empty string if no handler is currently active. + */ + std::string currentName() { + return stack.empty() ? std::string{} : stack.top().handler->name; + } + + /** + * Returns whether the current command handler allows arbitrary children. + * + * @return true if the handler allows arbitrary children, false otherwise. + */ + bool currentArbitraryChildren() { + return stack.empty() ? false : stack.top().descr->arbitraryChildren; + } + + // TODO: Change signature + void start(std::string name, char **attrs); + + void end(); + + // TODO: Change signature + void data(const char *data, int len); }; } } -} -#endif /* _OUSIA_XML_STATES_HPP_ */ +#endif /* _OUSIA_PARSER_STACK_HPP_ */ diff --git a/src/core/parser/Scope.hpp b/src/core/parser/Scope.hpp index 0c37fbd..9c5504f 100644 --- a/src/core/parser/Scope.hpp +++ b/src/core/parser/Scope.hpp @@ -19,6 +19,8 @@ #ifndef _OUSIA_PARSER_SCOPE_H_ #define _OUSIA_PARSER_SCOPE_H_ +#include <deque> + #include <core/Node.hpp> /** diff --git a/src/core/parser/XmlStates.cpp b/src/core/parser/XmlStates.cpp deleted file mode 100644 index ec0f002..0000000 --- a/src/core/parser/XmlStates.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include "XmlStates.hpp" - -namespace ousia { -namespace parser { -namespace xml { - -std::set<std::string> StateStack::expectedCommands(State state) -{ - std::set<std::string> res; - for (const auto &v: handlers) { - if (v.second.parentStates.count(state)) { - res.insert(v.first); - } - } - return res; -} - -void StateStack::start(std::string tagName, char **attrs) { - // Fetch the current handler and the current state - const Handler *h = stack.empty() ? nullptr : stack.top(); - const State currentState = h ? State::NONE : h->state; - - // Fetch all handlers for the given tagName - auto range = handlers.equal_range(tagName); - if (range->first == handlers.end()) { - // There are no handlers registered for this tag name -- check whether - // the current handler supports arbitrary children - if (h && h->arbitraryChildren) - } -} - -} -} -} - |