From cc281d91def921b7bbf5d3d4a0fce53afc5a317b Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:07:58 +0100 Subject: Renamed parser/generic to parser/stack and made filenames much shorter --- src/core/parser/stack/Callbacks.cpp | 23 +++ src/core/parser/stack/Callbacks.hpp | 99 ++++++++++++ src/core/parser/stack/Handler.cpp | 90 +++++++++++ src/core/parser/stack/Handler.hpp | 302 +++++++++++++++++++++++++++++++++++ src/core/parser/stack/Stack.cpp | 188 ++++++++++++++++++++++ src/core/parser/stack/Stack.hpp | 191 ++++++++++++++++++++++ src/core/parser/stack/State.cpp | 171 ++++++++++++++++++++ src/core/parser/stack/State.hpp | 307 ++++++++++++++++++++++++++++++++++++ 8 files changed, 1371 insertions(+) create mode 100644 src/core/parser/stack/Callbacks.cpp create mode 100644 src/core/parser/stack/Callbacks.hpp create mode 100644 src/core/parser/stack/Handler.cpp create mode 100644 src/core/parser/stack/Handler.hpp create mode 100644 src/core/parser/stack/Stack.cpp create mode 100644 src/core/parser/stack/Stack.hpp create mode 100644 src/core/parser/stack/State.cpp create mode 100644 src/core/parser/stack/State.hpp (limited to 'src/core/parser/stack') diff --git a/src/core/parser/stack/Callbacks.cpp b/src/core/parser/stack/Callbacks.cpp new file mode 100644 index 0000000..6ebc549 --- /dev/null +++ b/src/core/parser/stack/Callbacks.cpp @@ -0,0 +1,23 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "Callbacks.hpp" + +namespace ousia { +} + diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp new file mode 100644 index 0000000..bb56e44 --- /dev/null +++ b/src/core/parser/stack/Callbacks.hpp @@ -0,0 +1,99 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Callbacks.hpp + * + * Contains an interface defining the callbacks that can be directed from a + * StateHandler to the StateStack, and from the StateStack to + * the actual parser. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STATE_CALLBACKS_HPP_ +#define _OUSIA_PARSER_STATE_CALLBACKS_HPP_ + +#include + +#include + +namespace ousia { +namespace parser_stack { + +/** + * Interface defining a set of callback functions that act as a basis for the + * StateStackCallbacks and the ParserCallbacks. + */ +class Callbacks { +public: + /** + * Virtual descructor. + */ + virtual ~Callbacks() {}; + + /** + * Sets the whitespace mode that specifies how string data should be + * processed. + * + * @param whitespaceMode specifies one of the three WhitespaceMode constants + * PRESERVE, TRIM or COLLAPSE. + */ + virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0; + + /** + * Registers the given token as token that should be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be reported. + */ + virtual void registerToken(const std::string &token) = 0; + + /** + * Unregisters the given token, it will no longer be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be unregistered. + */ + virtual void unregisterToken(const std::string &token) = 0; +}; + +/** + * Interface defining the callback functions that can be passed from a + * StateStack to the underlying parser. + */ +class ParserCallbacks : public Callbacks { + /** + * Checks whether the given token is supported by the parser. The parser + * returns true, if the token is supported, false if this token cannot be + * registered. Note that parsers that do not support the registration of + * tokens at all should always return "true". + * + * @param token is the token that should be checked for support. + * @return true if the token is generally supported (or the parser does not + * support registering tokens at all), false if the token is not supported, + * because e.g. it is a reserved token or it interferes with other tokens. + */ + virtual bool supportsToken(const std::string &token) = 0; +}; + +} +} + +#endif /* _OUSIA_PARSER_STATE_CALLBACKS_HPP_ */ + diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp new file mode 100644 index 0000000..66af2a4 --- /dev/null +++ b/src/core/parser/stack/Handler.cpp @@ -0,0 +1,90 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "Callbacks.hpp" +#include "Handler.hpp" +#include "State.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class HandlerData */ + +HandlerData::HandlerData(ParserContext &ctx, Callbacks &callbacks, + std::string name, const State &state, + const SourceLocation &location) + : ctx(ctx), + callbacks(callbacks), + name(std::move(name)), + state(state), + location(location) +{ +} + +/* Class Handler */ + +Handler::Handler(const HandlerData &internalData) : internalData(internalData) +{ +} + +Handler::~Handler() {} + +ParserContext &Handler::context() { return internalData.ctx; } + +const std::string &Handler::name() { return internalData.name; } + +ParserScope &Handler::scope() { return internalData.ctx.getScope(); } + +Manager &Handler::manager() { return internalData.ctx.getManager(); } + +Logger &Handler::logger() { return internalData.ctx.getLogger(); } + +const State &Handler::state() { return internalData.state; } + +SourceLocation Handler::location() { return internalData.location; } + +void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode) +{ + internalData.callbacks.setWhitespaceMode(whitespaceMode); +} + +void Handler::registerToken(const std::string &token) +{ + internalData.callbacks.registerToken(token); +} + +void Handler::unregisterToken(const std::string &token) +{ + internalData.callbacks.unregisterToken(token); +} + +/* Class DefaultHandler */ + +/*void DefaultHandler::start(Variant::mapType &args) {} + +void DefaultHandler::end() {} + +Handler *DefaultHandler::create(const data &data) +{ + return new DefaultHandler{data}; +}*/ +} +} + diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp new file mode 100644 index 0000000..0701343 --- /dev/null +++ b/src/core/parser/stack/Handler.hpp @@ -0,0 +1,302 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef _OUSIA_PARSER_STATE_HANDLER_HPP_ +#define _OUSIA_PARSER_STATE_HANDLER_HPP_ + +#include +#include + +#include +#include + +namespace ousia { + +// Forward declarations +class ParserContext; +class Callbacks; +class Logger; +class Project; + +namespace parser_stack { + +// More forward declarations +class State; + +/** + * Class collecting all the data that is being passed to a Handler + * instance. + */ +class HandlerData { +public: + /** + * Reference to the ParserContext instance that should be used to resolve + * references to nodes in the Graph. + */ + ParserContext &ctx; + + /** + * Reference at an instance of the Callbacks class, used for + * modifying the behaviour of the parser (like registering tokens, setting + * the data type or changing the whitespace handling mode). + */ + Callbacks &callbacks; + + /** + * Contains the name of the command that is being handled. + */ + std::string name; + + /** + * Contains the current state of the state machine. + */ + const State &state; + + /** + * Current source code location. + */ + SourceLocation location; + + /** + * Constructor of the HandlerData class. + * + * @param ctx is the parser context the handler should be executed in. + * @param callbacks is an instance of Callbacks used to notify + * the parser about certain state changes. + * @param name is the name of the string. + * @param state is the state this handler was called for. + * @param location is the location at which the handler is created. + */ + HandlerData(ParserContext &ctx, Callbacks &callbacks, std::string name, + const State &state, const SourceLocation &location); +}; + +/** + * The Handler class provides a context for handling a generic stack element. + * It has to beoverridden and registered in the StateStack class to form + * handlers for concrete XML tags. + */ +class Handler { +private: + /** + * Structure containing the internal handler data. + */ + const HandlerData internalData; + +protected: + /** + * Constructor of the Handler class. + * + * @param data is a structure containing all data being passed to the + * handler. + */ + Handler(const HandlerData &internalData); + + /** + * Returns a reference at the ParserContext. + * + * @return a reference at the ParserContext. + */ + ParserContext &context(); + + /** + * Returns the command name for which the handler was created. + * + * @return a const reference at the command name. + */ + const std::string &name(); + + /** + * Returns a reference at the ParserScope instance. + * + * @return a reference at the ParserScope instance. + */ + ParserScope &scope(); + + /** + * Returns a reference at the Manager instance which manages all nodes. + * + * @return a referance at the Manager instance. + */ + Manager &manager(); + + /** + * Returns a reference at the Logger instance used for logging error + * messages. + * + * @return a reference at the Logger instance. + */ + Logger &logger(); + + /** + * Reference at the State descriptor for which this Handler was created. + * + * @return a const reference at the constructing State descriptor. + */ + const State &state(); + + /** + * Returns the current location in the source file. + * + * @return the current location in the source file. + */ + SourceLocation location(); + +public: + /** + * Virtual destructor. + */ + virtual ~Handler(); + + /** + * Calls the corresponding function in the Callbacks instance. Sets the + * whitespace mode that specifies how string data should be processed. The + * calls to this function are placed on a stack by the underlying Stack + * class. + * + * @param whitespaceMode specifies one of the three WhitespaceMode constants + * PRESERVE, TRIM or COLLAPSE. + */ + void setWhitespaceMode(WhitespaceMode whitespaceMode); + + /** + * Calls the corresponding function in the Callbacks instance. + * Registers the given token as token that should be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be reported. + */ + void registerToken(const std::string &token); + + /** + * Calls the corresponding function in the Callbacks instance. + * Unregisters the given token, it will no longer be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be unregistered. + */ + void unregisterToken(const std::string &token); + + /** + * Called when the command that was specified in the constructor is + * instanciated. + * + * @param args is a map from strings to variants (argument name and value). + * @return true if the handler was successful in starting the element it + * represents, false otherwise. + */ + virtual bool start(Variant::mapType &args) = 0; + + /** + * Called before the command for which this handler is defined ends (is + * forever removed from the stack). + */ + virtual void end() = 0; + + /** + * Called when a new field starts, while the handler is active. This + * function should return true if the field is supported, false otherwise. + * No error should be logged if the field cannot be started, the caller will + * take care of that (since it is always valid to start a default field, + * even though the corresponding structure does not have a field, as long as + * no data is fed into the field). + * + * @param isDefaultField is set to true if the field that is being started + * is the default/tree field. The handler should set the value of this + * variable to true if the referenced field is indeed the default field. + * @param isImplicit is set to true if the field is implicitly being started + * by the stack (this field always implies isDefaultField being set to + * true). + * @param fieldIndex is the numerical index of the field. + */ + virtual bool fieldStart(bool &isDefaultField, bool isImplicit, + size_t fieldIndex) = 0; + + /** + * Called when a previously opened field ends, while the handler is active. + * Note that a "fieldStart" and "fieldEnd" are always called alternately. + */ + virtual void fieldEnd() = 0; + + /** + * Called whenever an annotation starts while this handler is active. The + * function should return true if starting the annotation was successful, + * false otherwise. + * + * @param className is a string variant containing the name of the + * annotation class and the location of the name in the source code. + * @param args is a map from strings to variants (argument name and value). + * @return true if the mentioned annotation could be started here, false + * if an error occurred. + */ + virtual bool annotationStart(Variant className, Variant::mapType &args) = 0; + + /** + * Called whenever an annotation ends while this handler is active. The + * function should return true if ending the annotation was successful, + * false otherwise. + * + * @param className is a string variant containing the name of the + * annotation class and the location of the class name in the source code. + * @param elementName is a string variant containing the name of the + * annotation class and the location of the element name in the source code. + * @return true if the mentioned annotation could be started here, false if + * an error occurred. + */ + virtual bool annotationEnd(Variant className, Variant elementName) = 0; + + /** + * Called whenever raw data (int the form of a string) is available for the + * Handler instance. + * + * @param data is a string variant containing the character data and its + * location. + */ + virtual void data(Variant data) = 0; +}; + +/** + * HandlerConstructor is a function pointer type used to create concrete + * instances of the Handler class. + * + * @param handlerData is the data that should be passed to the new handler + * instance. + * @return a newly created handler instance. + */ +using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); + +/** + * The DefaultHandler class is used in case no element handler is specified in + * the State descriptor. + */ +/*class EmptyHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData); +};*/ + +} +} + +#endif /* _OUSIA_PARSER_STATE_HANDLER_HPP_ */ + diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp new file mode 100644 index 0000000..1d83a68 --- /dev/null +++ b/src/core/parser/stack/Stack.cpp @@ -0,0 +1,188 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include +#include + +#include "Stack.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class StateStack */ + +/** + * Returns an Exception that should be thrown when a currently invalid command + * is thrown. + */ +static LoggableException InvalidCommand(const std::string &name, + const std::set &expected) +{ + if (expected.empty()) { + return LoggableException{ + std::string{"No nested elements allowed, but got \""} + name + + std::string{"\""}}; + } else { + return LoggableException{ + std::string{"Expected "} + + (expected.size() == 1 ? std::string{"\""} + : std::string{"one of \""}) + + Utils::join(expected, "\", \"") + std::string{"\", but got \""} + + name + std::string{"\""}}; + } +} + +StateStack::StateStack( + ParserContext &ctx, + const std::multimap &states) + : ctx(ctx), states(states) +{ +} + +bool StateStack::deduceState() +{ + // Assemble all states + std::vector states; + for (const auto &e : this->states) { + states.push_back(e.second); + } + + // Fetch the type signature of the scope and derive all possible states, + // abort if no unique parser state was found + std::vector possibleStates = + StateDeductor(ctx.getScope().getStackTypeSignature(), states) + .deduce(); + if (possibleStates.size() != 1) { + ctx.getLogger().error( + "Error while including file: Cannot deduce parser state."); + return false; + } + + // Switch to this state by creating a dummy handler + const State *state = possibleStates[0]; + Handler *handler = + DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}}); + stack.emplace(handler); + return true; +} + +std::set StateStack::expectedCommands() +{ + const State *currentState = &(this->currentState()); + std::set res; + for (const auto &v : states) { + if (v.second->parents.count(currentState)) { + res.insert(v.first); + } + } + return res; +} + +const State &StateStack::currentState() +{ + return stack.empty() ? States::None : stack.top()->state(); +} + +std::string StateStack::currentCommandName() +{ + return stack.empty() ? std::string{} : stack.top()->name(); +} + +const State *StateStack::findTargetState(const std::string &name) +{ + const State *currentState = &(this->currentState()); + auto range = states.equal_range(name); + for (auto it = range.first; it != range.second; it++) { + const StateSet &parents = it->second->parents; + if (parents.count(currentState) || parents.count(&States::All)) { + return it->second; + } + } + + return nullptr; +} + +void StateStack::start(const std::string &name, Variant::mapType &args, + const SourceLocation &location) +{ + State const *targetState = findTargetState(name); +// TODO: Andreas, please improve this. +// if (!Utils::isIdentifier(name)) { +// throw LoggableException(std::string("Invalid identifier \"") + name + +// std::string("\"")); +// } + + if (targetState == nullptr) { + targetState = findTargetState("*"); + } + if (targetState == nullptr) { + throw InvalidCommand(name, expectedCommands()); + } + + // Fetch the associated constructor + HandlerConstructor ctor = targetState->elementHandler + ? targetState->elementHandler + : DefaultHandler::create; + + // Canonicalize the arguments, allow additional arguments + targetState->arguments.validateMap(args, ctx.getLogger(), true); + + // Instantiate the handler and call its start function + Handler *handler = ctor({ctx, name, *targetState, currentState(), location}); + handler->start(args); + stack.emplace(handler); +} + +void StateStack::start(std::string name, const Variant::mapType &args, + const SourceLocation &location) +{ + Variant::mapType argsCopy(args); + start(name, argsCopy); +} + +void StateStack::end() +{ + // Check whether the current command could be ended + if (stack.empty()) { + throw LoggableException{"No command to end."}; + } + + // Remove the current HandlerInstance from the stack + std::shared_ptr inst{stack.top()}; + stack.pop(); + + // Call the end function of the last Handler + inst->end(); +} + +void StateStack::data(const std::string &data, int field) +{ + // Check whether there is any command the data can be sent to + if (stack.empty()) { + throw LoggableException{"No command to receive data."}; + } + + // Pass the data to the current Handler instance + stack.top()->data(data, field); +} +} +} + diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp new file mode 100644 index 0000000..b106475 --- /dev/null +++ b/src/core/parser/stack/Stack.hpp @@ -0,0 +1,191 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file ParserStateStack.hpp + * + * Helper classes for document or description parsers. Contains the + * ParserStateStack class, which is an pushdown automaton responsible for + * accepting commands in the correct order and calling specified handlers. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STATE_STACK_HPP_ +#define _OUSIA_PARSER_STATE_STACK_HPP_ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "Parser.hpp" +#include "ParserContext.hpp" +#include "ParserState.hpp" + +namespace ousia { + +/** + * The ParserStateStack class is a pushdown automaton responsible for turning a + * command stream into a tree of Node instances. + */ +class ParserStateStack { +private: + /** + * Reference at the parser context. + */ + ParserContext &ctx; + + /** + * Map containing all registered command names and the corresponding + * state descriptors. + */ + const std::multimap &states; + + /** + * Internal stack used for managing the currently active Handler instances. + */ + std::stack> stack; + + /** + * Used internally to get all expected command names for the current state. + * This function is used to build error messages. + * + * @return a set of strings containing the names of the expected commands. + */ + std::set expectedCommands(); + + /** + * Returns the targetState for a command with the given name that can be + * reached from for the current state. + * + * @param name is the name of the requested command. + * @return nullptr if no target state was found, a pointer at the target + *state + * otherwise. + */ + const ParserState *findTargetState(const std::string &name); + +public: + /** + * Creates a new instance of the ParserStateStack class. + * + * @param ctx is the parser context the parser stack is working on. + * @param states is a map containing the command names and pointers at the + * corresponding ParserState instances. + */ + ParserStateStack( + ParserContext &ctx, + const std::multimap &states); + + /** + * Tries to reconstruct the parser state from the Scope instance of the + * ParserContext given in the constructor. This functionality is needed for + * including files,as the Parser of the included file needs to be brought to + + an equivalent state as the one in the including file. + * + * @param scope is the ParserScope instance from which the ParserState + * should be reconstructed. + * @param logger is the logger instance to which error messages should be + * written. + * @return true if the operation was sucessful, false otherwise. + */ + bool deduceState(); + + /** + * Returns the state the ParserStateStack instance currently is in. + * + * @return the state of the currently active Handler instance or STATE_NONE + * if no handler is on the stack. + */ + const ParserState ¤tState(); + + /** + * Returns the command name that is currently being handled. + * + * @return the name of the command currently being handled by the active + * Handler instance or an empty string if no handler is currently active. + */ + std::string currentCommandName(); + + /** + * Function that should be called whenever a new command is reached. + * + * @param name is the name of the command (including the namespace + * separator ':') and its corresponding location. Must be a string variant. + * @param args is a map variant containing the arguments that were passed to + * the command. + */ + void command(Variant name, Variant args); + + /** + * Function that should be called whenever a new field starts. Fields of the + * same command may not be separated by calls to + */ + void fieldStart(); + + /** + * Function that should be called whenever a field ends. + */ + void fieldEnd(); + + /** + * Function that shuold be called whenever character data is found in the + * input stream. + * + * @param data is a variant of any type containing the data that was parsed + * as data. + */ + void data(Variant data); + + /** + * Function that should be called whenever an annotation starts. + * + * @param name is the name of the annotation class. + * @param args is a map variant containing the arguments that were passed + * to the annotation. + */ + void annotationStart(Variant name, Variant args); + + /** + * Function that should be called whenever an annotation ends. + * + * @param name is the name of the annotation class that was ended. + * @param annotationName is the name of the annotation that was ended. + */ + void annotationEnd(Variant name, Variant annotationName); + + /** + * Function that should be called whenever a previously registered token + * is found in the input stream. + * + * @param token is string variant containing the token that was encountered. + */ + void token(Variant token); +}; +} + +#endif /* _OUSIA_PARSER_STATE_STACK_HPP_ */ + diff --git a/src/core/parser/stack/State.cpp b/src/core/parser/stack/State.cpp new file mode 100644 index 0000000..d72f533 --- /dev/null +++ b/src/core/parser/stack/State.cpp @@ -0,0 +1,171 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "State.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class State */ + +State::State() : elementHandler(nullptr) {} + +State::State(StateSet parents, Arguments arguments, + RttiSet createdNodeTypes, + HandlerConstructor elementHandler, + bool supportsAnnotations) + : parents(parents), + arguments(arguments), + createdNodeTypes(createdNodeTypes), + elementHandler(elementHandler), + supportsAnnotations(supportsAnnotations) +{ +} + +State::State(const StateBuilder &builder) + : State(builder.build()) +{ +} + +/* Class StateBuilder */ + +StateBuilder &StateBuilder::copy(const State &state) +{ + this->state = state; + return *this; +} + +StateBuilder &StateBuilder::parent(const State *parent) +{ + state.parents = StateSet{parent}; + return *this; +} + +StateBuilder &StateBuilder::parents(const StateSet &parents) +{ + state.parents = parents; + return *this; +} + +StateBuilder &StateBuilder::arguments(const Arguments &arguments) +{ + state.arguments = arguments; + return *this; +} + +StateBuilder &StateBuilder::createdNodeType(const Rtti *type) +{ + state.createdNodeTypes = RttiSet{type}; + return *this; +} + +StateBuilder &StateBuilder::createdNodeTypes(const RttiSet &types) +{ + state.createdNodeTypes = types; + return *this; +} + +StateBuilder &StateBuilder::elementHandler( + HandlerConstructor elementHandler) +{ + state.elementHandler = elementHandler; + return *this; +} + +StateBuilder &StateBuilder::supportsAnnotations(bool supportsAnnotations) +{ + state.supportsAnnotations = supportsAnnotations; + return *this; +} + +const State &StateBuilder::build() const { return state; } + +/* Class StateDeductor */ + +StateDeductor::StateDeductor( + std::vector signature, + std::vector states) + : tbl(signature.size()), + signature(std::move(signature)), + states(std::move(states)) +{ +} + +bool StateDeductor::isActive(size_t d, const State *s) +{ + // Lookup the "active" state of (d, s), if it was not already set + // (e.second is true) we'll have to calculate it + auto e = tbl[d].emplace(s, false); + bool &res = e.first->second; + if (!e.second) { + return res; + } + + // Check whether this node is generative (may have produced the Node + // described by the current Signature element) + bool isGenerative = signature[d]->isOneOf(s->createdNodeTypes); + + if (isGenerative && d == 0) { + // End of recursion -- the last signature element is reached and the + // node was generative + res = true; + } else { + // Try repetition of this node + if (isGenerative && isActive(d - 1, s)) { + res = true; + } else { + // Check whether any of the parent nodes were active -- either for + // the previous element (if this one is generative) or for the + // current element (assuming this node was not generative) + for (const State *parent : s->parents) { + if ((isGenerative && isActive(d - 1, parent)) || + isActive(d, parent)) { + res = true; + break; + } + } + } + } + + return res; +} + +std::vector StateDeductor::deduce() +{ + std::vector res; + if (!signature.empty()) { + const size_t D = signature.size(); + for (auto s : states) { + if (signature[D - 1]->isOneOf(s->createdNodeTypes) && + isActive(D - 1, s)) { + res.push_back(s); + } + } + } + return res; +} + +/* Constant initializations */ + +namespace States { +const State All; +const State None; +} +} +} + diff --git a/src/core/parser/stack/State.hpp b/src/core/parser/stack/State.hpp new file mode 100644 index 0000000..ea326ec --- /dev/null +++ b/src/core/parser/stack/State.hpp @@ -0,0 +1,307 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file State.hpp + * + * Defines the State class used within the ParserStack pushdown + * automaton and the StateBuilder class for convenient construction of + * such classes. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STATE_HPP_ +#define _OUSIA_PARSER_STATE_HPP_ + +#include + +#include +#include + +namespace ousia { +namespace parser_stack { + +// Forward declarations +class StateBuilder; +class State; +class HandlerData; +class Handler; +using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); + +/** + * Set of pointers of parser states -- used for specifying a set of parent + * states. + */ +using StateSet = std::unordered_set; + +/** + * Class used for the complete specification of a State. Stores possible + * parent states, state handlers and arguments to be passed to that state. + */ +struct State { + /** + * Vector containing all possible parent states. + */ + StateSet parents; + + /** + * Descriptor of the arguments that should be passed to the handler. + */ + Arguments arguments; + + /** + * Set containing the types of the nodes that may be created in this + * State. This information is needed for Parsers to reconstruct the + * current State from a given ParserScope when a file is included. + */ + RttiSet createdNodeTypes; + + /** + * Pointer at a function which creates a new concrete Handler instance for + * the elements described by this state. May be nullptr in which case no + * handler instance is created. + */ + HandlerConstructor elementHandler; + + /** + * Set to true if this handler does support annotations. This is almost + * always false (e.g. all description handlers), except for document + * element handlers. + */ + bool supportsAnnotations; + + /** + * Default constructor, initializes the handlers with nullptr. + */ + State(); + + /** + * Constructor taking values for all fields. Use the StateBuilder + * class for a more convenient construction of State instances. + * + * @param parents is a vector containing all possible parent states. + * @param arguments is a descriptor of arguments that should be passed to + * the handler. + * @param createdNodeTypes is a set containing the types of the nodes tha + * may be created in this State. This information is needed for + * Parsers to reconstruct the current State from a given ParserScope + * when a file is included. + * @param elementHandler is a pointer at a function which creates a new + * concrete Handler instance for the elements described by this state. May + * be nullptr in which case no handler instance is created. + * @param supportsAnnotations specifies whether annotations are supported + * here at all. + */ + State(StateSet parents, Arguments arguments = Arguments{}, + RttiSet createdNodeTypes = RttiSet{}, + HandlerConstructor elementHandler = nullptr, + bool supportsAnnotations = false); + + /** + * Creates this State from the given StateBuilder instance. + */ + State(const StateBuilder &builder); +}; + +/** + * The StateBuilder class is a class used for conveniently building new + * State instances. + */ +class StateBuilder { +private: + /** + * State instance that is currently being built by the + * StateBuilder. + */ + State state; + +public: + /** + * Copies the State instance and uses it as internal state. Overrides + * all changes made by the StateBuilder. + * + * @param state is the state that should be copied. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder ©(const State &state); + + /** + * Sets the possible parent states to the single given parent element. + * + * @param parent is a pointer at the parent State instance that should + * be the possible parent state. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &parent(const State *parent); + + /** + * Sets the State instances in the given StateSet as the list of + * supported parent states. + * + * @param parents is a set of pointers at State instances that should + * be the possible parent states. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &parents(const StateSet &parents); + + /** + * Sets the arguments that should be passed to the parser state handler to + * those given as argument. + * + * @param arguments is the Arguments instance describing the Arguments that + * should be parsed to a Handler for this State. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &arguments(const Arguments &arguments); + + /** + * Sets the Node types this state may produce to the given Rtti descriptor. + * + * @param type is the Rtti descriptor of the Type that may be produced by + * this state. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &createdNodeType(const Rtti *type); + + /** + * Sets the Node types this state may produce to the given Rtti descriptors. + * + * @param types is a set of Rtti descriptors of the Types that may be + * produced by this state. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &createdNodeTypes(const RttiSet &types); + + /** + * Sets the constructor for the element handler. The constructor creates a + * new concrete Handler instance for the elements described by this state. + * May be nullptr in which case no handler instance is created (this is + * the default value). + * + * @param elementHandler is the HandlerConstructor that should create a + * new Handler instance. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &elementHandler(HandlerConstructor elementHandler); + + /** + * Sets the state of the "supportsAnnotations" flags (default value is + * false) + * + * @param supportsAnnotations should be set to true, if annotations are + * supported for the handlers associated with this document. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &supportsAnnotations(bool supportsAnnotations); + + /** + * Returns a reference at the internal State instance that was built + * using the StateBuilder. + * + * @return the built State. + */ + const State &build() const; +}; + +/** + * Class used to deduce the State a Parser is currently in based on the + * types of the Nodes that currently are on the ParserStack. Uses dynamic + * programming in order to solve this problem. + */ +class StateDeductor { +public: + /** + * Type containing the dynamic programming table. + */ + using Table = std::vector>; + +private: + /** + * Dynamic programming table. + */ + Table tbl; + + /** + * Signature given in the constructor. + */ + const std::vector signature; + + /** + * List of states that should be checked for being active. + */ + const std::vector states; + + /** + * Used internally to check whether the given parser stack s may have been + * active for signature element d. + * + * @param d is the signature element. + * @param s is the parser state. + * @return true if the the given State may have been active. + */ + bool isActive(size_t d, const State *s); + +public: + /** + * Constructor of the StateDeductor class. + * + * @param signature a Node type signature describing the types of the nodes + * which currently reside on e.g. the ParserScope stack. + * @param states is a list of states that should be checked. + */ + StateDeductor(std::vector signature, + std::vector states); + + /** + * Selects all active states from the given states. Only considers those + * states that may have produced the last signature element. + * + * @return a list of states that may actually have been active. + */ + std::vector deduce(); +}; + +/** + * The States namespace contains all the global state constants used + * in the ParserStack class. + */ +namespace States { +/** + * State representing all states. + */ +extern const State All; + +/** + * State representing the initial state. + */ +extern const State None; +} +} +} + +#endif /* _OUSIA_PARSER_STATE_HPP_ */ + -- cgit v1.2.3