From 5554f3594d00e267af447a24149f655ceff64d17 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Mon, 1 Dec 2014 21:27:08 +0100 Subject: working version of the ParserStack class plus unit tests --- src/core/parser/Parser.hpp | 13 ++ src/core/parser/ParserStack.cpp | 155 ++++++++++++++++++++ src/core/parser/ParserStack.hpp | 315 ++++++++++++++++++++++++++++++++++++++++ src/core/parser/Scope.hpp | 2 + src/core/parser/XmlStates.cpp | 53 ------- src/core/parser/XmlStates.hpp | 301 -------------------------------------- 6 files changed, 485 insertions(+), 354 deletions(-) create mode 100644 src/core/parser/ParserStack.cpp create mode 100644 src/core/parser/ParserStack.hpp delete mode 100644 src/core/parser/XmlStates.cpp delete mode 100644 src/core/parser/XmlStates.hpp (limited to 'src/core') diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index b8faf98..fa5dd49 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -86,6 +86,19 @@ struct ParserContext { : scope(scope), registry(registry), logger(logger){}; }; +struct StandaloneParserContext : public ParserContext { +private: + Logger logger; + Scope scope; + Registry registry; + +public: + StandaloneParserContext() + : ParserContext(scope, registry, logger), + scope(nullptr), + registry(logger){}; +}; + /** * Abstract parser class. This class builds the basic interface that should be * used by any parser which reads data from an input stream and transforms it diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp new file mode 100644 index 0000000..01fce3f --- /dev/null +++ b/src/core/parser/ParserStack.cpp @@ -0,0 +1,155 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "ParserStack.hpp" + +#include + +namespace ousia { +namespace parser { + +/* Class HandlerDescriptor */ + +HandlerInstance HandlerDescriptor::create(const ParserContext &ctx, + std::string name, State parentState, + bool isChild, char **attrs) const +{ + Handler *h = ctor(ctx, name, targetState, parentState, isChild); + h->start(attrs); + return HandlerInstance(h, this); +} + +/* Class ParserStack */ + +/** + * Function used internally to turn the elements of a collection into a string + * separated by the given delimiter. + */ +template +static std::string join(T es, const std::string &delim) +{ + std::stringstream res; + bool first = true; + for (auto &e : es) { + if (!first) { + res << delim; + } + res << e; + first = false; + } + return res.str(); +} + +/** + * Returns an Exception that should be thrown when a currently invalid command + * is thrown. + */ +static LoggableException invalidCommand(const std::string &name, + const std::set &expected) +{ + if (expected.empty()) { + return LoggableException{ + std::string{"No nested elements allowed, but got \""} + name + + std::string{"\""}}; + } else { + return LoggableException{ + std::string{"Expected "} + + (expected.size() == 1 ? std::string{"\""} + : std::string{"one of \""}) + + join(expected, "\", \"") + std::string{"\", but got \""} + name + + std::string{"\""}}; + } +} + +std::set ParserStack::expectedCommands(State state) +{ + std::set res; + for (const auto &v : handlers) { + if (v.second.parentStates.count(state)) { + res.insert(v.first); + } + } + return res; +} + +void ParserStack::start(std::string name, char **attrs) +{ + // Fetch the current handler and the current state + const HandlerInstance *h = stack.empty() ? nullptr : &stack.top(); + const State curState = currentState(); + bool isChild = false; + + // Fetch the correct Handler descriptor for this + const HandlerDescriptor *descr = nullptr; + auto range = handlers.equal_range(name); + for (auto it = range.first; it != range.second; it++) { + if (it->second.parentStates.count(curState)) { + descr = &(it->second); + break; + } + } + if (!descr && currentArbitraryChildren()) { + isChild = true; + descr = h->descr; + } + + // No descriptor found, throw an exception. + if (!descr) { + throw invalidCommand(name, expectedCommands(curState)); + } + + // Instantiate the handler and call its start function + stack.emplace(descr->create(ctx, name, curState, isChild, attrs)); +} + +void ParserStack::end() +{ + // Check whether the current command could be ended + if (stack.empty()) { + throw LoggableException{"No command to end."}; + } + + // Remove the current HandlerInstance from the stack + HandlerInstance inst{stack.top()}; + stack.pop(); + + // Call the end function of the last Handler + inst.handler->end(); + + // Call the "child" function of the parent Handler in the stack + // (if one exists). + if (!stack.empty()) { + stack.top().handler->child(inst.handler); + } +} + +void ParserStack::data(const char *data, int len) +{ + // Check whether there is any command the data can be sent to + if (stack.empty()) { + throw LoggableException{"No command to receive data."}; + } + + // Pass the data to the current Handler instance + stack.top().handler->data(data, len); +} +} +} + diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp new file mode 100644 index 0000000..a777b1e --- /dev/null +++ b/src/core/parser/ParserStack.hpp @@ -0,0 +1,315 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file ParserStack.hpp + * + * Helper classes for document or description parsers. Contains the ParserStack + * class, which is an pushdown automaton responsible for accepting commands in + * the correct order and calling specified handlers. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_HPP_ +#define _OUSIA_PARSER_STACK_HPP_ + +#include + +#include +#include +#include +#include +#include + +#include "Parser.hpp" + +namespace ousia { +namespace parser { + +/** + * The State type alias is used to + */ +using State = int8_t; + +static const State STATE_ALL = -2; +static const State STATE_NONE = -1; + +/** + * The handler class provides a context for handling an XML tag. It has to be + * overridden and registered in the StateStack class to form handlers for + * concrete XML tags. + */ +class Handler { +private: + Rooted node; + +protected: + void setNode(Handle node) { this->node = node; } + +public: + /** + * Reference to the ParserContext instance that should be used to resolve + * references to nodes in the Graph. + */ + const ParserContext &ctx; + + /** + * Contains the name of the tag that is being handled. + */ + const std::string name; + + /** + * Contains the current state of the state machine. + */ + const State state; + + /** + * Contains the state of the state machine when the parent node was handled. + */ + const State parentState; + + /** + * Set to true if the tag that is being handled is not the tag that was + * specified in the state machine but a child tag of that tag. + */ + const bool isChild; + + /** + * Constructor of the Handler class. + * + * @param ctx is the parser context the handler should be executed in. + * @param name is the name of the string. + * @param state is the state this handler was called for. + * @param parentState is the state of the parent command. + * @param isChild specifies whether this handler was called not for the + * command that was specified in the state machine but a child command. + */ + Handler(const ParserContext &ctx, std::string name, State state, + State parentState, bool isChild) + : ctx(ctx), + name(std::move(name)), + state(state), + parentState(parentState), + isChild(isChild){}; + + /** + * Virtual destructor. + */ + virtual ~Handler(){}; + + /** + * Returns the node instance that was created by the handler. + * + * @return the Node instance created by the handler. May be nullptr if no + * Node was created. + */ + Rooted getNode() { return node; } + + /** + * Called when the command that was specified in the constructor is + * instanciated. + * + * @param attrs contains the attributes that were specified for the command. + * TODO: Replace with StructInstance! + */ + virtual void start(char **attrs) = 0; + + /** + * Called whenever the command for which this handler + */ + virtual void end() = 0; + + /** + * Called whenever raw data (int the form of a string) is available for the + * Handler instance. + * + * TODO: Replace with std::string? + * + * @param data is a pointer at the character data that is available for the + * Handler instance. + */ + virtual void data(const char *data, int len){}; + + /** + * Called whenever a direct child element was created and has ended. + * + * @param handler is a reference at the child Handler instance. + */ + virtual void child(std::shared_ptr handler){}; +}; + +/** + * HandlerConstructor is a function pointer type used to create concrete + * instances of the Handler class. + */ +using HandlerConstructor = Handler *(*)(const ParserContext &ctx, + std::string name, State state, + State parentState, bool isChild); + +struct HandlerDescriptor; + +/** + * Used internlly by StateStack to store Handler instances and parameters + * from HandlerDescriptor that are not stored in the Handler instance + * itself. Instances of the HandlerInstance class can be created using the + * HandlerDescriptor "create" method. + */ +struct HandlerInstance { + /** + * Pointer at the actual handler instance. + */ + std::shared_ptr handler; + + const HandlerDescriptor *descr; + + HandlerInstance(Handler *handler, const HandlerDescriptor *descr) + : handler(handler), descr(descr) + { + } +}; + +/** + * Used internally by StateStack to store the pushdown automaton + * description. + */ +struct HandlerDescriptor { + /** + * The valid parent states. + */ + const std::set parentStates; + + /** + * Pointer at a function which creates a new concrete Handler instance. + */ + const HandlerConstructor ctor; + + /** + * The target state for the registered handler. + */ + const State targetState; + + /** + * Set to true if this handler instance allows arbitrary children as + * tags. + */ + const bool arbitraryChildren; + + HandlerDescriptor(std::set parentStates, HandlerConstructor ctor, + State targetState, bool arbitraryChildren = false) + : parentStates(std::move(parentStates)), + ctor(ctor), + targetState(targetState), + arbitraryChildren(arbitraryChildren) + { + } + + /** + * Creates an instance of the concrete Handler class represented by the + * HandlerDescriptor and calls its start function. + */ + HandlerInstance create(const ParserContext &ctx, std::string name, + State parentState, bool isChild, char **attrs) const; +}; + +/** + * The ParserStack class is a pushdown automaton responsible for turning a + * command stream into a tree of Node instances. + */ +class ParserStack { +private: + /** + * Reference at the parser context. + */ + const ParserContext &ctx; + + /** + * Map containing all registered command names and the corresponding + * handler + * descriptor. + */ + const std::multimap &handlers; + + /** + * Internal stack used for managing the currently active Handler instances. + */ + std::stack stack; + + /** + * Used internally to get all expected command names for the given state + * (does not work if the current Handler instance allows arbitrary + * children). This function is used to build error messages. + * + * @param state is the state for which all expected command names should be + * returned. + */ + std::set expectedCommands(State state); + +public: + /** + * Creates a new instance of the ParserStack class. + * + * @param handlers is a map containing the command names and the + * corresponding HandlerDescriptor instances. + */ + ParserStack(const ParserContext &ctx, + const std::multimap &handlers) + : ctx(ctx), handlers(handlers){}; + + /** + * Returns the state the ParserStack instance currently is in. + * + * @return the state of the currently active Handler instance or STATE_NONE + * if no handler is on the stack. + */ + State currentState() { + return stack.empty() ? STATE_NONE : stack.top().handler->state; + } + + /** + * Returns the command name that is currently being handled. + * + * @return the name of the command currently being handled by the active + * Handler instance or an empty string if no handler is currently active. + */ + std::string currentName() { + return stack.empty() ? std::string{} : stack.top().handler->name; + } + + /** + * Returns whether the current command handler allows arbitrary children. + * + * @return true if the handler allows arbitrary children, false otherwise. + */ + bool currentArbitraryChildren() { + return stack.empty() ? false : stack.top().descr->arbitraryChildren; + } + + // TODO: Change signature + void start(std::string name, char **attrs); + + void end(); + + // TODO: Change signature + void data(const char *data, int len); +}; +} +} + +#endif /* _OUSIA_PARSER_STACK_HPP_ */ + diff --git a/src/core/parser/Scope.hpp b/src/core/parser/Scope.hpp index 0c37fbd..9c5504f 100644 --- a/src/core/parser/Scope.hpp +++ b/src/core/parser/Scope.hpp @@ -19,6 +19,8 @@ #ifndef _OUSIA_PARSER_SCOPE_H_ #define _OUSIA_PARSER_SCOPE_H_ +#include + #include /** diff --git a/src/core/parser/XmlStates.cpp b/src/core/parser/XmlStates.cpp deleted file mode 100644 index ec0f002..0000000 --- a/src/core/parser/XmlStates.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "XmlStates.hpp" - -namespace ousia { -namespace parser { -namespace xml { - -std::set StateStack::expectedCommands(State state) -{ - std::set res; - for (const auto &v: handlers) { - if (v.second.parentStates.count(state)) { - res.insert(v.first); - } - } - return res; -} - -void StateStack::start(std::string tagName, char **attrs) { - // Fetch the current handler and the current state - const Handler *h = stack.empty() ? nullptr : stack.top(); - const State currentState = h ? State::NONE : h->state; - - // Fetch all handlers for the given tagName - auto range = handlers.equal_range(tagName); - if (range->first == handlers.end()) { - // There are no handlers registered for this tag name -- check whether - // the current handler supports arbitrary children - if (h && h->arbitraryChildren) - } -} - -} -} -} - diff --git a/src/core/parser/XmlStates.hpp b/src/core/parser/XmlStates.hpp deleted file mode 100644 index 70e95f4..0000000 --- a/src/core/parser/XmlStates.hpp +++ /dev/null @@ -1,301 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_XML_STATES_HPP_ -#define _OUSIA_XML_STATES_HPP_ - -#include - -#include -#include -#include -#include - -namespace ousia { -namespace parser { - -class Scope; -class Registry; -class Logger; - -namespace xml { - -/** - * The State class represents all states the XML parser can be in. These states - * mostly represent single tags. - */ -enum class State : uint8_t { - /* Meta states */ - ALL = -1, - - /* Start state */ - NONE, - - /* Special commands */ - INCLUDE, - INLINE, - - /* Document states */ - DOCUMENT, - HEAD, - BODY, - - /* Domain description states */ - DOMAIN, - - /* Type system states */ - TYPESYSTEM, - TYPE, - TYPE_ELEM -}; - -/** - * The handler class provides a context for handling an XML tag. It has to be - * overridden and registered in the StateStack class to form handlers for - * concrete XML tags. - */ -class Handler { -private: - Rooted node; - -protected: - void setNode(Handle node) { this->node = node; } - -public: - /** - * Reference to the ParserContext instance that should be used to resolve - * references to nodes in the Graph. - */ - const ParserContext &ctx; - - /** - * Contains the name of the tag that is being handled. - */ - const std::string name; - - /** - * Contains the current state of the state machine. - */ - const State state; - - /** - * Contains the state of the state machine when the parent node was handled. - */ - const State parentState; - - /** - * Set to true if the tag that is being handled is not the tag that was - * specified in the state machine but a child tag of that tag. - */ - const bool isChild; - - /** - * Constructor of the Handler class. - * - * @param ctx is the parser context the handler should be executed in. - * @param name is the name of the string. - * @param state is the state this handler was called for. - * @param parentState is the state of the parent command. - * @param isChild specifies whether this handler was called not for the - * command that was specified in the state machine but a child command. - */ - Handler(const ParserContext &ctx, std::string name, State state, - State parentState, bool isChild) - : ctx(ctx), - name(std::move(name)), - state(state), - parentState(parentState), - isChild(isChild){}; - - /** - * Virtual destructor. - */ - virtual ~Handler(); - - /** - * Returns the node instance that was created by the handler. - * - * @return the Node instance created by the handler. May be nullptr if no - * Node was created. - */ - Rooted getNode() { return node; } - - /** - * Called when the command that was specified in the constructor is - * instanciated. - * - * @param attrs contains the attributes that were specified for the command. - * TODO: Replace with StructInstance! - */ - virtual void start(char **attrs) = 0; - - /** - * Called whenever the command for which this handler - */ - virtual void end() = 0; - - /** - * Called whenever raw data (int the form of a string) is available for the - * Handler instance. - * - * TODO: Replace with std::string? - * - * @param data is a pointer at the character data that is available for the - * Handler instance. - */ - virtual void data(char *data, int len){}; - - /** - * Called whenever a direct child element was created and has ended. - * - * @param handler is a reference at the child Handler instance. - */ - virtual void child(Handler *handler){}; -}; - -/** - * HandlerConstructor is a function pointer type used to create concrete - * instances of the Handler class. - */ -using HandlerConstructor = Handler *(*)(const ParserContext &ctx, - std::string name, State state, - State parentState, bool isChild); - -/** - * The StateStack class is a pushdown automaton responsible for turning a - * command stream into a tree of Node instances. - */ -class StateStack { -public: - /** - * Used internlly by StateStack to store Handler instances and parameters - * from HandlerDescriptor that are not stored in the Handler instance - * itself. Instances of the HandlerInstance class can be created using the - * HandlerDescriptor "create" method. - */ - struct HandlerInstance { - /** - * Pointer at the actual handler instance. - */ - std::unique_ptr handler; - - /** - * Value of the arbitraryChildren flag stored in the HandlerDescriptor - * class. - */ - const bool arbitraryChildren; - - HandlerInstance(std::unique_ptr handler, - bool arbitraryChildren) - : handler(handler), arbitraryChildren(arbitraryChildren) - { - } - } - - /** - * Used internally by StateStack to store the pushdown automaton - * description. - */ - struct HandlerDescriptor { - /** - * The valid parent states. - */ - const std::set parentStates; - - /** - * Pointer at a function which creates a new concrete Handler instance. - */ - const HandlerConstructor ctor; - - /** - * The target state for the registered handler. - */ - const State targetState; - - /** - * Set to true if this handler instance allows arbitrary children as - * tags. - */ - const bool arbitraryChildren; - - HandlerDescriptor(std::set parentStates, HandlerConstructor ctor, - State targetState, bool arbitraryChildren = false) - : parentStates(std::move(parentStates)), - ctor(constructor), - targetState(targetState), - arbitraryChildren(arbitraryChildren) - { - } - - HandlerInstance create(const ParserContext &ctx, std::string name, - State parentState, bool isChild) - { - return HandlerInstance{ - ctor(ctx, name, targetState, parentState, isChild), - arbitraryChildren}; - } - }; - -private: - /** - * Map containing all registered command names and the corresponding - * handler - * descriptor. - */ - const std::multimap handlers; - - /** - * Reference at the parser context. - */ - const ParserContext &ctx; - - /** - * Internal stack used for managing the currently active Handler instances. - */ - std::stack stack; - - /** - * Used internally to get all expected command names for the given state - * (does not work if the current Handler instance allows arbitrary - * children). This function is used to build error messages. - * - * @param state is the state for which all expected command names should be - * returned. - */ - std::set expectedCommands(State state); - -public: - /** - * Creates a new instance of the StateStack class. - * - * @param handlers is a map containing the command names and the - * corresponding HandlerDescriptor instances. - */ - StateStack(const ParserContext &ctx, - std::multimap handlers) - : handlers(std::move(handlers)), - ctx(ctx), - currentState(State::NONE), - arbitraryChildren(false); -}; -} -} -} - -#endif /* _OUSIA_XML_STATES_HPP_ */ - -- cgit v1.2.3