diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-15 21:32:54 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-15 21:32:54 +0100 |
commit | 8e5e08c4f293434585d2a88f7f331f8ce49b67b9 (patch) | |
tree | fa82a937b1ea80f45d7955938c333f68f8a0f3f6 /src/core/parser | |
parent | 2544749215bc2465bfeca431e271110ca86d8a83 (diff) | |
parent | 40f4666c43211d9071a827ad8a2524688e7f678f (diff) |
Merge branch 'astoecke_parser_stack_new'
Conflicts:
application/src/core/parser/stack/DocumentHandler.cpp
application/src/core/parser/stack/DocumentHandler.hpp
Diffstat (limited to 'src/core/parser')
27 files changed, 3540 insertions, 1005 deletions
diff --git a/src/core/parser/ParserScope.cpp b/src/core/parser/ParserScope.cpp index 3929abf..ce3dc94 100644 --- a/src/core/parser/ParserScope.cpp +++ b/src/core/parser/ParserScope.cpp @@ -351,8 +351,7 @@ bool ParserScope::resolveType(const std::string &name, Handle<Node> owner, return resolveType(Utils::split(name, '.'), owner, logger, resultCallback); } -bool ParserScope::resolveValue(Variant &data, Handle<Type> type, - Handle<Node> owner, Logger &logger) +bool ParserScope::resolveValue(Variant &data, Handle<Type> type, Logger &logger) { return type->build( data, logger, @@ -408,7 +407,7 @@ bool ParserScope::resolveTypeWithValue(const std::vector<std::string> &path, [=](Handle<Node> resolved, Handle<Node> owner, Logger &logger) mutable { if (resolved != nullptr) { Rooted<Type> type = resolved.cast<Type>(); - scope.resolveValue(*valuePtr, type, owner, logger); + scope.resolveValue(*valuePtr, type, logger); } // Call the result callback with the type diff --git a/src/core/parser/ParserScope.hpp b/src/core/parser/ParserScope.hpp index 58fc037..185b845 100644 --- a/src/core/parser/ParserScope.hpp +++ b/src/core/parser/ParserScope.hpp @@ -702,13 +702,11 @@ public: * (even in inner structures). The data will be passed to the "build" * function of the given type. * @param type is the Typesystem type the data should be interpreted with. - * @param owner is the node for which the resolution takes place. * @param logger is the logger instance into which resolution problems * should be logged. * @return true if the value was successfully built. */ - bool resolveValue(Variant &data, Handle<Type> type, Handle<Node> owner, - Logger &logger); + bool resolveValue(Variant &data, Handle<Type> type, Logger &logger); /** * Resolves a type and makes sure the corresponding value is of the correct diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp deleted file mode 100644 index 1265851..0000000 --- a/src/core/parser/ParserStack.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <sstream> - -#include <core/common/Utils.hpp> -#include <core/common/Exceptions.hpp> -#include <core/model/Project.hpp> - -#include "ParserScope.hpp" -#include "ParserStack.hpp" - -namespace ousia { - -/* A default handler */ - -/** - * The DefaultHandler class is used in case no element handler is specified in - * the ParserState descriptor. - */ -class DefaultHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override {} - - void end() override {} - - static Handler *create(const HandlerData &handlerData) - { - return new DefaultHandler{handlerData}; - } -}; - -/* Class Handler */ - -void Handler::data(const std::string &data, int field) -{ - if (Utils::hasNonWhitepaceChar(data)) { - logger().error("Expected command but found character data."); - } -} - -/* Class ParserStack */ - -/** - * Returns an Exception that should be thrown when a currently invalid command - * is thrown. - */ -static LoggableException InvalidCommand(const std::string &name, - const std::set<std::string> &expected) -{ - if (expected.empty()) { - return LoggableException{ - std::string{"No nested elements allowed, but got \""} + name + - std::string{"\""}}; - } else { - return LoggableException{ - std::string{"Expected "} + - (expected.size() == 1 ? std::string{"\""} - : std::string{"one of \""}) + - Utils::join(expected, "\", \"") + std::string{"\", but got \""} + - name + std::string{"\""}}; - } -} - -ParserStack::ParserStack( - ParserContext &ctx, - const std::multimap<std::string, const ParserState *> &states) - : ctx(ctx), states(states) -{ -} - -bool ParserStack::deduceState() -{ - // Assemble all states - std::vector<const ParserState *> states; - for (const auto &e : this->states) { - states.push_back(e.second); - } - - // Fetch the type signature of the scope and derive all possible states, - // abort if no unique parser state was found - std::vector<const ParserState *> possibleStates = - ParserStateDeductor(ctx.getScope().getStackTypeSignature(), states) - .deduce(); - if (possibleStates.size() != 1) { - ctx.getLogger().error( - "Error while including file: Cannot deduce parser state."); - return false; - } - - // Switch to this state by creating a dummy handler - const ParserState *state = possibleStates[0]; - Handler *handler = - DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}}); - stack.emplace(handler); - return true; -} - -std::set<std::string> ParserStack::expectedCommands() -{ - const ParserState *currentState = &(this->currentState()); - std::set<std::string> res; - for (const auto &v : states) { - if (v.second->parents.count(currentState)) { - res.insert(v.first); - } - } - return res; -} - -const ParserState &ParserStack::currentState() -{ - return stack.empty() ? ParserStates::None : stack.top()->state(); -} - -std::string ParserStack::currentCommandName() -{ - return stack.empty() ? std::string{} : stack.top()->name(); -} - -const ParserState *ParserStack::findTargetState(const std::string &name) -{ - const ParserState *currentState = &(this->currentState()); - auto range = states.equal_range(name); - for (auto it = range.first; it != range.second; it++) { - const ParserStateSet &parents = it->second->parents; - if (parents.count(currentState) || parents.count(&ParserStates::All)) { - return it->second; - } - } - - return nullptr; -} - -void ParserStack::start(const std::string &name, Variant::mapType &args, - const SourceLocation &location) -{ - ParserState const *targetState = findTargetState(name); -// TODO: Andreas, please improve this. -// if (!Utils::isIdentifier(name)) { -// throw LoggableException(std::string("Invalid identifier \"") + name + -// std::string("\"")); -// } - - if (targetState == nullptr) { - targetState = findTargetState("*"); - } - if (targetState == nullptr) { - throw InvalidCommand(name, expectedCommands()); - } - - // Fetch the associated constructor - HandlerConstructor ctor = targetState->elementHandler - ? targetState->elementHandler - : DefaultHandler::create; - - // Canonicalize the arguments, allow additional arguments - targetState->arguments.validateMap(args, ctx.getLogger(), true); - - // Instantiate the handler and call its start function - Handler *handler = ctor({ctx, name, *targetState, currentState(), location}); - handler->start(args); - stack.emplace(handler); -} - -void ParserStack::start(std::string name, const Variant::mapType &args, - const SourceLocation &location) -{ - Variant::mapType argsCopy(args); - start(name, argsCopy); -} - -void ParserStack::end() -{ - // Check whether the current command could be ended - if (stack.empty()) { - throw LoggableException{"No command to end."}; - } - - // Remove the current HandlerInstance from the stack - std::shared_ptr<Handler> inst{stack.top()}; - stack.pop(); - - // Call the end function of the last Handler - inst->end(); -} - -void ParserStack::data(const std::string &data, int field) -{ - // Check whether there is any command the data can be sent to - if (stack.empty()) { - throw LoggableException{"No command to receive data."}; - } - - // Pass the data to the current Handler instance - stack.top()->data(data, field); -} -} - diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp deleted file mode 100644 index efc4e4a..0000000 --- a/src/core/parser/ParserStack.hpp +++ /dev/null @@ -1,361 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file ParserStack.hpp - * - * Helper classes for document or description parsers. Contains the ParserStack - * class, which is an pushdown automaton responsible for accepting commands in - * the correct order and calling specified handlers. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_PARSER_STACK_HPP_ -#define _OUSIA_PARSER_STACK_HPP_ - -#include <cstdint> - -#include <map> -#include <memory> -#include <set> -#include <stack> -#include <vector> - -#include <core/common/Variant.hpp> -#include <core/common/Logger.hpp> -#include <core/common/Argument.hpp> - -#include "Parser.hpp" -#include "ParserContext.hpp" -#include "ParserState.hpp" - -namespace ousia { - -/** - * Struct collecting all the data that is being passed to a Handler instance. - */ -struct HandlerData { - /** - * Reference to the ParserContext instance that should be used to resolve - * references to nodes in the Graph. - */ - ParserContext &ctx; - - /** - * Contains the name of the tag that is being handled. - */ - const std::string name; - - /** - * Contains the current state of the state machine. - */ - const ParserState &state; - - /** - * Contains the state of the state machine when the parent node was handled. - */ - const ParserState &parentState; - - /** - * Current source code location. - */ - const SourceLocation location; - - /** - * Constructor of the HandlerData class. - * - * @param ctx is the parser context the handler should be executed in. - * @param name is the name of the string. - * @param state is the state this handler was called for. - * @param parentState is the state of the parent command. - * @param location is the location at which the handler is created. - */ - HandlerData(ParserContext &ctx, std::string name, const ParserState &state, - const ParserState &parentState, const SourceLocation location) - : ctx(ctx), - name(std::move(name)), - state(state), - parentState(parentState), - location(location){}; -}; - -/** - * The handler class provides a context for handling an XML tag. It has to be - * overridden and registered in the StateStack class to form handlers for - * concrete XML tags. - */ -class Handler { -private: - /** - * Structure containing the internal handler data. - */ - const HandlerData handlerData; - -public: - /** - * Constructor of the Handler class. - * - * @param data is a structure containing all data being passed to the - * handler. - */ - Handler(const HandlerData &handlerData) : handlerData(handlerData){}; - - /** - * Virtual destructor. - */ - virtual ~Handler(){}; - - /** - * Returns a reference at the ParserContext. - * - * @return a reference at the ParserContext. - */ - ParserContext &context() { return handlerData.ctx; } - - /** - * Returns the command name for which the handler was created. - * - * @return a const reference at the command name. - */ - const std::string &name() { return handlerData.name; } - - /** - * Returns a reference at the ParserScope instance. - * - * @return a reference at the ParserScope instance. - */ - ParserScope &scope() { return handlerData.ctx.getScope(); } - - /** - * Returns a reference at the Manager instance which manages all nodes. - * - * @return a referance at the Manager instance. - */ - Manager &manager() { return handlerData.ctx.getManager(); } - - /** - * Returns a reference at the Logger instance used for logging error - * messages. - * - * @return a reference at the Logger instance. - */ - Logger &logger() { return handlerData.ctx.getLogger(); } - - /** - * Returns a reference at the Project Node, representing the project into - * which the file is currently being parsed. - * - * @return a referance at the Project Node. - */ - Rooted<Project> project() { return handlerData.ctx.getProject(); } - - /** - * Reference at the ParserState descriptor for which this Handler was - * created. - * - * @return a const reference at the constructing ParserState descriptor. - */ - const ParserState &state() { return handlerData.state; } - - /** - * Reference at the ParserState descriptor of the parent state of the state - * for which this Handler was created. Set to ParserStates::None if there - * is no parent state. - * - * @return a const reference at the parent state of the constructing - * ParserState descriptor. - */ - const ParserState &parentState() { return handlerData.parentState; } - - /** - * Returns the current location in the source file. - * - * @return the current location in the source file. - */ - SourceLocation location() { return handlerData.location; } - - /** - * Called when the command that was specified in the constructor is - * instanciated. - * - * @param args is a map from strings to variants (argument name and value). - */ - virtual void start(Variant::mapType &args) = 0; - - /** - * Called whenever the command for which this handler is defined ends. - */ - virtual void end() = 0; - - /** - * Called whenever raw data (int the form of a string) is available for the - * Handler instance. In the default handler an exception is raised if the - * received data contains non-whitespace characters. - * - * @param data is a pointer at the character data that is available for the - * Handler instance. - * @param field is the field number (the interpretation of this value - * depends on the format that is being parsed). - */ - virtual void data(const std::string &data, int field); -}; - -/** - * HandlerConstructor is a function pointer type used to create concrete - * instances of the Handler class. - * - * @param handlerData is the data that should be passed to the new handler - * instance. - * @return a newly created handler instance. - */ -using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); - -/** - * The ParserStack class is a pushdown automaton responsible for turning a - * command stream into a tree of Node instances. - */ -class ParserStack { -private: - /** - * Reference at the parser context. - */ - ParserContext &ctx; - - /** - * Map containing all registered command names and the corresponding - * state descriptors. - */ - const std::multimap<std::string, const ParserState *> &states; - - /** - * Internal stack used for managing the currently active Handler instances. - */ - std::stack<std::shared_ptr<Handler>> stack; - - /** - * Used internally to get all expected command names for the current state. - * This function is used to build error messages. - * - * @return a set of strings containing the names of the expected commands. - */ - std::set<std::string> expectedCommands(); - - /** - * Returns the targetState for a command with the given name that can be - * reached from for the current state. - * - * @param name is the name of the requested command. - * @return nullptr if no target state was found, a pointer at the target - *state - * otherwise. - */ - const ParserState *findTargetState(const std::string &name); - -public: - /** - * Creates a new instance of the ParserStack class. - * - * @param ctx is the parser context the parser stack is working on. - * @param states is a map containing the command names and pointers at the - * corresponding ParserState instances. - */ - ParserStack(ParserContext &ctx, - const std::multimap<std::string, const ParserState *> &states); - - /** - * Tries to reconstruct the parser state from the Scope instance of the - * ParserContext given in the constructor. This functionality is needed for - * including files,as the Parser of the included file needs to be brought to - + an equivalent state as the one in the including file. - * - * @param scope is the ParserScope instance from which the ParserState - * should be reconstructed. - * @param logger is the logger instance to which error messages should be - * written. - * @return true if the operation was sucessful, false otherwise. - */ - bool deduceState(); - - /** - * Returns the state the ParserStack instance currently is in. - * - * @return the state of the currently active Handler instance or STATE_NONE - * if no handler is on the stack. - */ - const ParserState ¤tState(); - - /** - * Returns the command name that is currently being handled. - * - * @return the name of the command currently being handled by the active - * Handler instance or an empty string if no handler is currently active. - */ - std::string currentCommandName(); - - /** - * Function that should be called whenever a new command starts. - * - * @param name is the name of the command. - * @param args is a map from strings to variants (argument name and value). - * Note that the passed map will be modified. - * @param location is the location in the source file at which the command - * starts. - */ - void start(const std::string &name, Variant::mapType &args, - const SourceLocation &location = SourceLocation{}); - - /** - * Function that should be called whenever a new command starts. - * - * @param name is the name of the command. - * @param args is a map from strings to variants (argument name and value). - * @param location is the location in the source file at which the command - * starts. - */ - void start(std::string name, - const Variant::mapType &args = Variant::mapType{}, - const SourceLocation &location = SourceLocation{}); - - /** - * Function called whenever a command ends. - */ - void end(); - - /** - * Function that should be called whenever data is available for the - * command. - * - * @param data is the data that should be passed to the handler. - * @param field is the field number (the interpretation of this value - * depends on the format that is being parsed). - */ - void data(const std::string &data, int field = 0); - - /** - * Returns a reference to the parser context the parser stack is currently - * working on. - * - * @return a reference to the parser context. - */ - ParserContext &getContext() { return ctx; } -}; -} - -#endif /* _OUSIA_PARSER_STACK_HPP_ */ - diff --git a/src/core/parser/generic/GenericParser.cpp b/src/core/parser/generic/GenericParser.cpp deleted file mode 100644 index e69de29..0000000 --- a/src/core/parser/generic/GenericParser.cpp +++ /dev/null diff --git a/src/core/parser/stack/Callbacks.cpp b/src/core/parser/stack/Callbacks.cpp new file mode 100644 index 0000000..6ebc549 --- /dev/null +++ b/src/core/parser/stack/Callbacks.cpp @@ -0,0 +1,23 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "Callbacks.hpp" + +namespace ousia { +} + diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp new file mode 100644 index 0000000..9c61000 --- /dev/null +++ b/src/core/parser/stack/Callbacks.hpp @@ -0,0 +1,99 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Callbacks.hpp + * + * Contains an interface defining the callbacks that can be directed from a + * StateHandler to the StateStack, and from the StateStack to + * the actual parser. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_CALLBACKS_HPP_ +#define _OUSIA_PARSER_STACK_CALLBACKS_HPP_ + +#include <string> + +#include <core/common/Whitespace.hpp> + +namespace ousia { +namespace parser_stack { + +/** + * Interface defining a set of callback functions that act as a basis for the + * StateStackCallbacks and the ParserCallbacks. + */ +class Callbacks { +public: + /** + * Virtual descructor. + */ + virtual ~Callbacks() {}; + + /** + * Sets the whitespace mode that specifies how string data should be + * processed. + * + * @param whitespaceMode specifies one of the three WhitespaceMode constants + * PRESERVE, TRIM or COLLAPSE. + */ + virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0; + + /** + * Registers the given token as token that should be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be reported. + */ + virtual void registerToken(const std::string &token) = 0; + + /** + * Unregisters the given token, it will no longer be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be unregistered. + */ + virtual void unregisterToken(const std::string &token) = 0; +}; + +/** + * Interface defining the callback functions that can be passed from a + * StateStack to the underlying parser. + */ +class ParserCallbacks : public Callbacks { + /** + * Checks whether the given token is supported by the parser. The parser + * returns true, if the token is supported, false if this token cannot be + * registered. Note that parsers that do not support the registration of + * tokens at all should always return "true". + * + * @param token is the token that should be checked for support. + * @return true if the token is generally supported (or the parser does not + * support registering tokens at all), false if the token is not supported, + * because e.g. it is a reserved token or it interferes with other tokens. + */ + virtual bool supportsToken(const std::string &token) = 0; +}; + +} +} + +#endif /* _OUSIA_PARSER_STACK_CALLBACKS_HPP_ */ + diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp index 3647db3..d514701 100644 --- a/src/core/parser/stack/DocumentHandler.cpp +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -16,28 +16,35 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "DocumentHandler.hpp" - #include <algorithm> #include <core/common/RttiBuilder.hpp> #include <core/common/Utils.hpp> +#include <core/common/VariantReader.hpp> #include <core/model/Document.hpp> #include <core/model/Domain.hpp> +#include <core/model/Project.hpp> #include <core/model/Typesystem.hpp> #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "DocumentHandler.hpp" +#include "State.hpp" namespace ousia { +namespace parser_stack { /* DocumentHandler */ -void DocumentHandler::start(Variant::mapType &args) +bool DocumentHandler::start(Variant::mapType &args) { Rooted<Document> document = - project()->createDocument(args["name"].asString()); + context().getProject()->createDocument(args["name"].asString()); document->setLocation(location()); scope().push(document); scope().setFlag(ParserFlag::POST_HEAD, false); + + return true; } void DocumentHandler::end() { scope().pop(); } @@ -48,7 +55,7 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode, std::string &fieldName, DocumentEntity *&parent, bool &inField) { - // check if the parent in the structure tree was an explicit field + // Check if the parent in the structure tree was an explicit field // reference. inField = parentNode->isa(&RttiTypes::DocumentField); if (inField) { @@ -56,10 +63,11 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode, parentNode = scope().selectOrThrow( {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity}); } else { - // if it wasn't an explicit reference, we use the default field. + // If it wasn't an explicit reference, we use the default field. fieldName = DEFAULT_FIELD_NAME; } - // reference the parent entity explicitly. + + // Reference the parent entity explicitly. parent = nullptr; if (parentNode->isa(&RttiTypes::StructuredEntity)) { parent = static_cast<DocumentEntity *>( @@ -70,17 +78,13 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode, } } -static void createPath(const std::string &firstFieldName, - const NodeVector<Node> &path, DocumentEntity *&parent) +static void createPath(const NodeVector<Node> &path, DocumentEntity *&parent, + size_t p0 = 1) { - // add the first element - parent = static_cast<DocumentEntity *>( - parent->createChildStructuredEntity(path[0].cast<StructuredClass>(), - Variant::mapType{}, firstFieldName, - "").get()); - + // TODO (@benjamin): These should be pushed onto the scope and poped once + // the scope is left. Otherwise stuff may not be correclty resolved. size_t S = path.size(); - for (size_t p = 2; p < S; p = p + 2) { + for (size_t p = p0; p < S; p = p + 2) { parent = static_cast<DocumentEntity *>( parent->createChildStructuredEntity( path[p].cast<StructuredClass>(), Variant::mapType{}, @@ -88,18 +92,19 @@ static void createPath(const std::string &firstFieldName, } } -static void createPath(const NodeVector<Node> &path, DocumentEntity *&parent) +static void createPath(const std::string &firstFieldName, + const NodeVector<Node> &path, DocumentEntity *&parent) { - size_t S = path.size(); - for (size_t p = 1; p < S; p = p + 2) { - parent = static_cast<DocumentEntity *>( - parent->createChildStructuredEntity( - path[p].cast<StructuredClass>(), Variant::mapType{}, - path[p - 1]->getName(), "").get()); - } + // Add the first element + parent = static_cast<DocumentEntity *>( + parent->createChildStructuredEntity(path[0].cast<StructuredClass>(), + Variant::mapType{}, firstFieldName, + "").get()); + + createPath(path, parent, 2); } -void DocumentChildHandler::start(Variant::mapType &args) +bool DocumentChildHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); Rooted<Node> parentNode = scope().selectOrThrow( @@ -112,7 +117,7 @@ void DocumentChildHandler::start(Variant::mapType &args) preamble(parentNode, fieldName, parent, inField); - // try to find a FieldDescriptor for the given tag if we are not in a + // Try to find a FieldDescriptor for the given tag if we are not in a // field already. This does _not_ try to construct transparent paths // in between. if (!inField && parent != nullptr && @@ -121,7 +126,7 @@ void DocumentChildHandler::start(Variant::mapType &args) new DocumentField(parentNode->getManager(), name(), parentNode)}; field->setLocation(location()); scope().push(field); - return; + return true; } // Otherwise create a new StructuredEntity @@ -187,27 +192,39 @@ void DocumentChildHandler::start(Variant::mapType &args) } entity->setLocation(location()); scope().push(entity); + return true; } void DocumentChildHandler::end() { scope().pop(); } -std::pair<bool, Variant> DocumentChildHandler::convertData( - Handle<FieldDescriptor> field, Logger &logger, const std::string &data) +bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field, + Variant &data, Logger &logger) { - // if the content is supposed to be of type string, we can finish - // directly. - auto vts = field->getPrimitiveType()->getVariantTypes(); - if (std::find(vts.begin(), vts.end(), VariantType::STRING) != vts.end()) { - return std::make_pair(true, Variant::fromString(data)); + bool valid = true; + Rooted<Type> type = field->getPrimitiveType(); + + // If the content is supposed to be of type string, we only need to check + // for "magic" values -- otherwise just call the "parseGenericString" + // function on the string data + if (type->isa(&RttiTypes::StringType)) { + const std::string &str = data.asString(); + // TODO: Referencing constants with "." separator should also work + if (Utils::isIdentifier(str)) { + data.markAsMagic(); + } + } else { + // Parse the string as generic string, assign the result + auto res = VariantReader::parseGenericString( + data.asString(), logger, data.getLocation().getSourceId(), + data.getLocation().getStart()); + data = res.second; } - // then try to parse the content using the type specification. - auto res = field->getPrimitiveType()->read( - data, logger, location().getSourceId(), location().getStart()); - return res; + // Now try to resolve the value for the primitive type + return valid && scope().resolveValue(data, type, logger); } -void DocumentChildHandler::data(const std::string &data, int fieldIdx) +bool DocumentChildHandler::data(Variant &data) { Rooted<Node> parentNode = scope().selectOrThrow( {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity, @@ -222,11 +239,10 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx) Rooted<Descriptor> desc = strctParent->getDescriptor(); // The parent from which we need to connect to the primitive content. Rooted<Node> parentClass; - /* - * We distinguish two cases here: One for fields that are given. - */ + + // We distinguish two cases here: One for fields that are given. if (inField) { - // retrieve the actual FieldDescriptor + // Retrieve the actual FieldDescriptor Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName); if (field == nullptr) { logger().error( @@ -234,75 +250,102 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx) fieldName + "\" exists in descriptor\"" + desc->getName() + "\".", location()); - return; + return false; } - // if it is a primitive field directly, try to parse the content. + // If it is a primitive field directly, try to parse the content. if (field->isPrimitive()) { - auto res = convertData(field, logger(), data); - // add it as primitive content. - if (res.first) { - strctParent->createChildDocumentPrimitive(res.second, - fieldName); + // Add it as primitive content. + if (!convertData(field, data, logger())) { + return false; } - return; + + strctParent->createChildDocumentPrimitive(data, fieldName); + return true; } - // if it is not primitive we need to connect via transparent elements + // If it is not primitive we need to connect via transparent elements // and default fields. parentClass = field; } else { - // in case of default fields we need to construct via default fields + // In case of default fields we need to construct via default fields // and maybe transparent elements. parentClass = desc; } - /* - * Search through all permitted default fields of the parent class that - * allow primitive content at this point and could be constructed via - * transparent intermediate entities. - * We then try to parse the data using the type specified by the respective - * field. If that does not work we proceed to the next possible field. - */ - // retrieve all default fields at this point. + + // Search through all permitted default fields of the parent class that + // allow primitive content at this point and could be constructed via + // transparent intermediate entities. + + // Retrieve all default fields at this point, either from the field + // descriptor or the structured class NodeVector<FieldDescriptor> defaultFields; if (inField) { defaultFields = parentClass.cast<FieldDescriptor>()->getDefaultFields(); } else { defaultFields = parentClass.cast<StructuredClass>()->getDefaultFields(); } + + // Try to parse the data using the type specified by the respective field. + // If that does not work we proceed to the next possible field. std::vector<LoggerFork> forks; for (auto field : defaultFields) { - // then try to parse the content using the type specification. + // Then try to parse the content using the type specification. forks.emplace_back(logger().fork()); - auto res = convertData(field, forks.back(), data); - if (res.first) { - forks.back().commit(); - // if that worked, construct the necessary path. - if (inField) { - NodeVector<Node> path = - parentClass.cast<FieldDescriptor>()->pathTo(field, - logger()); - createPath(fieldName, path, strctParent); - } else { - auto pathRes = desc->pathTo(field, logger()); - assert(pathRes.second); - createPath(pathRes.first, strctParent); - } - // then create the primitive element. - strctParent->createChildDocumentPrimitive(res.second); - return; + if (!convertData(field, data, forks.back())) { + continue; } + + // The conversion worked, commit any possible warnings + forks.back().commit(); + + // Construct the necessary path + if (inField) { + NodeVector<Node> path = + parentClass.cast<FieldDescriptor>()->pathTo(field, logger()); + createPath(fieldName, path, strctParent); + } else { + auto pathRes = desc->pathTo(field, logger()); + assert(pathRes.second); + createPath(pathRes.first, strctParent); + } + + // Then create the primitive element + strctParent->createChildDocumentPrimitive(data); + return true; } - logger().error("Could not read data with any of the possible fields:"); + + // No field was found that might take the data -- dump the error messages + // from the loggers + logger().error("Could not read data with any of the possible fields:", + SourceLocation{}, MessageMode::NO_CONTEXT); size_t f = 0; for (auto field : defaultFields) { - logger().note(Utils::join(field->path(), ".") + ":", SourceLocation{}, - MessageMode::NO_CONTEXT); + logger().note(std::string("Field ") + Utils::join(field->path(), ".") + + std::string(":"), + SourceLocation{}, MessageMode::NO_CONTEXT); forks[f].commit(); f++; } + return false; +} + +namespace States { +const State Document = StateBuilder() + .parent(&None) + .createdNodeType(&RttiTypes::Document) + .elementHandler(DocumentHandler::create) + .arguments({Argument::String("name", "")}); + +const State DocumentChild = StateBuilder() + .parents({&Document, &DocumentChild}) + .createdNodeTypes({&RttiTypes::StructureNode, + &RttiTypes::AnnotationEntity, + &RttiTypes::DocumentField}) + .elementHandler(DocumentChildHandler::create); +} } namespace RttiTypes { -const Rtti DocumentField = - RttiBuilder<ousia::DocumentField>("DocumentField").parent(&Node); +const Rtti DocumentField = RttiBuilder<ousia::parser_stack::DocumentField>( + "DocumentField").parent(&Node); +} } -}
\ No newline at end of file diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp index cb124aa..b339b96 100644 --- a/src/core/parser/stack/DocumentHandler.hpp +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -19,14 +19,21 @@ /** * @file DocumentHandler.hpp * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + * Contains the Handler instances used for parsing actual documents. This file + * declares to classes: The Document handler which parses the "document" command + * that introduces a new document and the "DocumentChildHandler" which parses + * the actual user defined tags. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) */ -#ifndef _OUSIA_DOCUMENT_HANDLER_HPP_ -#define _OUSIA_DOCUMENT_HANDLER_HPP_ +#ifndef _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ +#define _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> +#include <core/model/Node.hpp> + +#include "Handler.hpp" namespace ousia { @@ -35,51 +42,131 @@ class Rtti; class DocumentEntity; class FieldDescriptor; -class DocumentHandler : public Handler { +namespace parser_stack { +/** + * The DocumentHandler class parses the "document" tag that is used to introduce + * a new document. Note that this tag is not mandatory in osml files -- if the + * first command is not a typesystem, domain or any other declarative command, + * the DocumentHandler will be implicitly called. + */ +class DocumentHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the ImportHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new DocumentHandler{handlerData}; } }; +/** + * Temporary Node that is being pushed onto the ParserScope in order to indicate + * the field the parser is currently in. The name of the Node is stored in the + * "name" field of the parent Node class. + */ class DocumentField : public Node { public: using Node::Node; }; -class DocumentChildHandler : public Handler { +/** + * The DocumentChildHandler class performs the actual parsing of the user + * defined elements in an Ousía document. + */ +class DocumentChildHandler : public StaticHandler { private: + /** + * Code shared by both the start() and the end() method. Checks whether the + * parser currently is in a field and returns the name of this field. + * + * @param parentNode is the next possible parent node (a document, + * a structured entity, an annotation entity or a field). + * @param fieldName is an output parameter to which the name of the current + * field is written (or unchanged if we're not in a field). + * @param parent is an output parameter to which the parent document entity + * will be written. + * @param inField is set to true if we actually are in a field. + */ void preamble(Handle<Node> parentNode, std::string &fieldName, DocumentEntity *&parent, bool &inField); - std::pair<bool, Variant> convertData(Handle<FieldDescriptor> field, - Logger &logger, - const std::string &data); + /** + * Constructs all structured entites along the given path and inserts them + * into the document graph. + * + * @param path is a path containing an alternating series of structured + * classes and fields. + * @pram parent is the root entity from which the process should be started. + */ + void createPath(const NodeVector<Node> &path, DocumentEntity *&parent); + + /** + * Tries to convert the given data to the type that is specified in the + * given primitive field. + * + * @param field is the primitive field for which the data is intended. + * @param data is the is the data that should be converted, the result is + * written into this argument as output variable. + * @param logger is the Logger instance to which error messages should be + * written. Needed to allow the convertData function to write to a forked + * Logger instance. + * @return true if the operation was successful, false otherwise. + */ + bool convertData(Handle<FieldDescriptor> field, Variant &data, + Logger &logger); public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; - - void data(const std::string &data, int fieldIdx) override; - + bool data(Variant &data) override; + + /** + * Creates a new instance of the DocumentChildHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new DocumentChildHandler{handlerData}; } }; +namespace States { +/** + * State constant representing the "document" tag. + */ +extern const State Document; + +/** + * State contstant representing any user-defined element within a document. + */ +extern const State DocumentChild; +} + +} + namespace RttiTypes { +/** + * RttiType for the internally used DocumentField class. + */ extern const Rtti DocumentField; } + } -#endif + +#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */ + diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp index 6571717..a2c8eec 100644 --- a/src/core/parser/stack/DomainHandler.cpp +++ b/src/core/parser/stack/DomainHandler.cpp @@ -16,29 +16,48 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "DomainHandler.hpp" - #include <core/common/RttiBuilder.hpp> +#include <core/model/Document.hpp> #include <core/model/Domain.hpp> +#include <core/model/Project.hpp> #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "DocumentHandler.hpp" +#include "DomainHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" namespace ousia { +namespace parser_stack { /* DomainHandler */ -void DomainHandler::start(Variant::mapType &args) +bool DomainHandler::start(Variant::mapType &args) { - Rooted<Domain> domain = project()->createDomain(args["name"].asString()); + // Create the Domain node + Rooted<Domain> domain = + context().getProject()->createDomain(args["name"].asString()); domain->setLocation(location()); + // If the domain is defined inside a document, add the reference to the + // document + Rooted<Document> document = scope().select<Document>(); + if (document != nullptr) { + document->reference(domain); + } + + // Push the typesystem onto the scope, set the POST_HEAD flag to true scope().push(domain); + scope().setFlag(ParserFlag::POST_HEAD, false); + return true; } void DomainHandler::end() { scope().pop(); } /* DomainStructHandler */ -void DomainStructHandler::start(Variant::mapType &args) +bool DomainStructHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -63,12 +82,13 @@ void DomainStructHandler::start(Variant::mapType &args) } scope().push(structuredClass); + return true; } void DomainStructHandler::end() { scope().pop(); } /* DomainAnnotationHandler */ -void DomainAnnotationHandler::start(Variant::mapType &args) +bool DomainAnnotationHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -79,13 +99,14 @@ void DomainAnnotationHandler::start(Variant::mapType &args) annotationClass->setLocation(location()); scope().push(annotationClass); + return true; } void DomainAnnotationHandler::end() { scope().pop(); } /* DomainAttributesHandler */ -void DomainAttributesHandler::start(Variant::mapType &args) +bool DomainAttributesHandler::start(Variant::mapType &args) { // Fetch the current typesystem and create the struct node Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); @@ -94,13 +115,14 @@ void DomainAttributesHandler::start(Variant::mapType &args) attrDesc->setLocation(location()); scope().push(attrDesc); + return true; } void DomainAttributesHandler::end() { scope().pop(); } /* DomainFieldHandler */ -void DomainFieldHandler::start(Variant::mapType &args) +bool DomainFieldHandler::start(Variant::mapType &args) { FieldDescriptor::FieldType type; if (args["isSubtree"].asBool()) { @@ -116,13 +138,14 @@ void DomainFieldHandler::start(Variant::mapType &args) field->setLocation(location()); scope().push(field); + return true; } void DomainFieldHandler::end() { scope().pop(); } /* DomainFieldRefHandler */ -void DomainFieldRefHandler::start(Variant::mapType &args) +bool DomainFieldRefHandler::start(Variant::mapType &args) { Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); @@ -135,13 +158,14 @@ void DomainFieldRefHandler::start(Variant::mapType &args) field.cast<FieldDescriptor>(), logger); } }); + return true; } void DomainFieldRefHandler::end() {} /* DomainPrimitiveHandler */ -void DomainPrimitiveHandler::start(Variant::mapType &args) +bool DomainPrimitiveHandler::start(Variant::mapType &args) { Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); @@ -167,13 +191,14 @@ void DomainPrimitiveHandler::start(Variant::mapType &args) }); scope().push(field); + return true; } void DomainPrimitiveHandler::end() { scope().pop(); } /* DomainChildHandler */ -void DomainChildHandler::start(Variant::mapType &args) +bool DomainChildHandler::start(Variant::mapType &args) { Rooted<FieldDescriptor> field = scope().selectOrThrow<FieldDescriptor>(); @@ -186,13 +211,12 @@ void DomainChildHandler::start(Variant::mapType &args) child.cast<StructuredClass>()); } }); + return true; } -void DomainChildHandler::end() {} - /* DomainParentHandler */ -void DomainParentHandler::start(Variant::mapType &args) +bool DomainParentHandler::start(Variant::mapType &args) { Rooted<StructuredClass> strct = scope().selectOrThrow<StructuredClass>(); @@ -200,12 +224,14 @@ void DomainParentHandler::start(Variant::mapType &args) new DomainParent(strct->getManager(), args["ref"].asString(), strct)}; parent->setLocation(location()); scope().push(parent); + return true; } void DomainParentHandler::end() { scope().pop(); } /* DomainParentFieldHandler */ -void DomainParentFieldHandler::start(Variant::mapType &args) + +bool DomainParentFieldHandler::start(Variant::mapType &args) { Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>(); FieldDescriptor::FieldType type; @@ -233,13 +259,12 @@ void DomainParentFieldHandler::start(Variant::mapType &args) field->addChild(strct.cast<StructuredClass>()); } }); + return true; } -void DomainParentFieldHandler::end() {} - /* DomainParentFieldRefHandler */ -void DomainParentFieldRefHandler::start(Variant::mapType &args) +bool DomainParentFieldRefHandler::start(Variant::mapType &args) { Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>(); @@ -265,12 +290,104 @@ void DomainParentFieldRefHandler::start(Variant::mapType &args) field->addChild(strct.cast<StructuredClass>()); } }); + return true; } -void DomainParentFieldRefHandler::end() {} +namespace States { +const State Domain = StateBuilder() + .parents({&None, &Document}) + .createdNodeType(&RttiTypes::Domain) + .elementHandler(DomainHandler::create) + .arguments({Argument::String("name")}); + +const State DomainStruct = + StateBuilder() + .parent(&Domain) + .createdNodeType(&RttiTypes::StructuredClass) + .elementHandler(DomainStructHandler::create) + .arguments({Argument::String("name"), + Argument::Cardinality("cardinality", Cardinality::any()), + Argument::Bool("isRoot", false), + Argument::Bool("transparent", false), + Argument::String("isa", "")}); + +const State DomainAnnotation = + StateBuilder() + .parent(&Domain) + .createdNodeType(&RttiTypes::AnnotationClass) + .elementHandler(DomainAnnotationHandler::create) + .arguments({Argument::String("name")}); + +const State DomainAttributes = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::StructType) + .elementHandler(DomainAttributesHandler::create) + .arguments({}); + +const State DomainAttribute = + StateBuilder() + .parent(&DomainAttributes) + .elementHandler(TypesystemStructFieldHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("default", Variant::fromObject(nullptr))}); + +const State DomainField = StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainFieldHandler::create) + .arguments({Argument::String("name", ""), + Argument::Bool("isSubtree", false), + Argument::Bool("optional", false)}); + +const State DomainFieldRef = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainFieldRefHandler::create) + .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); + +const State DomainStructPrimitive = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainPrimitiveHandler::create) + .arguments( + {Argument::String("name", ""), Argument::Bool("isSubtree", false), + Argument::Bool("optional", false), Argument::String("type")}); + +const State DomainStructChild = StateBuilder() + .parent(&DomainField) + .elementHandler(DomainChildHandler::create) + .arguments({Argument::String("ref")}); + +const State DomainStructParent = + StateBuilder() + .parent(&DomainStruct) + .createdNodeType(&RttiTypes::DomainParent) + .elementHandler(DomainParentHandler::create) + .arguments({Argument::String("ref")}); + +const State DomainStructParentField = + StateBuilder() + .parent(&DomainStructParent) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainParentFieldHandler::create) + .arguments({Argument::String("name", ""), + Argument::Bool("isSubtree", false), + Argument::Bool("optional", false)}); + +const State DomainStructParentFieldRef = + StateBuilder() + .parent(&DomainStructParent) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainParentFieldRefHandler::create) + .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); +} +} namespace RttiTypes { -const Rtti DomainParent = - RttiBuilder<ousia::DomainParent>("DomainParent").parent(&Node); +const Rtti DomainParent = RttiBuilder<ousia::parser_stack::DomainParent>( + "DomainParent").parent(&Node); } } diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp index 7398812..76172d6 100644 --- a/src/core/parser/stack/DomainHandler.hpp +++ b/src/core/parser/stack/DomainHandler.hpp @@ -19,26 +19,34 @@ /** * @file DomainHandler.hpp * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + * Contains the Handler classes used for parsing Domain descriptors. This + * includes the "domain" tag and all describing tags below the "domain" tag. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) */ #ifndef _OUSIA_DOMAIN_HANDLER_HPP_ #define _OUSIA_DOMAIN_HANDLER_HPP_ #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> +#include <core/model/Node.hpp> + +#include "Handler.hpp" namespace ousia { // Forward declarations class Rtti; -class DomainHandler : public Handler { -public: - using Handler::Handler; +namespace parser_stack { + +// TODO: Documentation - void start(Variant::mapType &args) override; +class DomainHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -47,12 +55,11 @@ public: } }; -class DomainStructHandler : public Handler { +class DomainStructHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -61,12 +68,11 @@ public: } }; -class DomainAnnotationHandler : public Handler { +class DomainAnnotationHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -75,12 +81,11 @@ public: } }; -class DomainAttributesHandler : public Handler { +class DomainAttributesHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -89,12 +94,11 @@ public: } }; -class DomainFieldHandler : public Handler { +class DomainFieldHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -103,12 +107,11 @@ public: } }; -class DomainFieldRefHandler : public Handler { +class DomainFieldRefHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -117,12 +120,11 @@ public: } }; -class DomainPrimitiveHandler : public Handler { +class DomainPrimitiveHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -131,13 +133,11 @@ public: } }; -class DomainChildHandler : public Handler { +class DomainChildHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; - void end() override; + bool start(Variant::mapType &args) override; static Handler *create(const HandlerData &handlerData) { @@ -150,16 +150,11 @@ public: using Node::Node; }; -namespace RttiTypes { -extern const Rtti DomainParent; -} - -class DomainParentHandler : public Handler { +class DomainParentHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -168,13 +163,11 @@ public: } }; -class DomainParentFieldHandler : public Handler { +class DomainParentFieldHandler : public StaticHandler { public: - using Handler::Handler; + using StaticHandler::StaticHandler; - void start(Variant::mapType &args) override; - - void end() override; + bool start(Variant::mapType &args) override; static Handler *create(const HandlerData &handlerData) { @@ -182,18 +175,83 @@ public: } }; -class DomainParentFieldRefHandler : public Handler { +class DomainParentFieldRefHandler : public StaticHandler { public: - using Handler::Handler; + using StaticHandler::StaticHandler; - void start(Variant::mapType &args) override; - - void end() override; + bool start(Variant::mapType &args) override; static Handler *create(const HandlerData &handlerData) { return new DomainParentFieldRefHandler{handlerData}; } }; + +namespace States { +/** + * State representing a "domain" struct. + */ +extern const State Domain; + +/** + * State representing a "struct" tag within a domain description. + */ +extern const State DomainStruct; + +/** + * State representing an "annotation" tag within a domain description. + */ +extern const State DomainAnnotation; + +/** + * State representing an "attributes" tag within a structure or annotation. + */ +extern const State DomainAttributes; + +/** + * State representing an "attribute" tag within the "attributes". + */ +extern const State DomainAttribute; + +/** + * State representing a "field" tag within a structure or annotation. + */ +extern const State DomainField; + +/** + * State representing a "fieldref" tag within a structure or annotation. + */ +extern const State DomainFieldRef; + +/** + * State representing a "primitive" tag within a structure or annotation. + */ +extern const State DomainStructPrimitive; + +/** + * State representing a "child" tag within a structure or annotation. + */ +extern const State DomainStructChild; + +/** + * State representing a "parent" tag within a structure or annotation. + */ +extern const State DomainStructParent; + +/** + * State representing a "field" tag within a "parent" tag. + */ +extern const State DomainStructParentField; + +/** + * State representing a "fieldRef" tag within a "parent" tag. + */ +extern const State DomainStructParentFieldRef; +} +} + +namespace RttiTypes { +extern const Rtti DomainParent; +} } #endif diff --git a/src/core/parser/stack/GenericParserStates.cpp b/src/core/parser/stack/GenericParserStates.cpp new file mode 100644 index 0000000..69a6e0e --- /dev/null +++ b/src/core/parser/stack/GenericParserStates.cpp @@ -0,0 +1,53 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "DocumentHandler.hpp" +#include "DomainHandler.hpp" +#include "GenericParserStates.hpp" +#include "ImportIncludeHandler.hpp" +#include "TypesystemHandler.hpp" + +namespace ousia { +namespace parser_stack { + +const std::multimap<std::string, const State *> GenericParserStates{ + {"document", &States::Document}, + {"*", &States::DocumentChild}, + {"domain", &States::Domain}, + {"struct", &States::DomainStruct}, + {"annotation", &States::DomainAnnotation}, + {"attributes", &States::DomainAttributes}, + {"attribute", &States::DomainAttribute}, + {"field", &States::DomainField}, + {"fieldRef", &States::DomainFieldRef}, + {"primitive", &States::DomainStructPrimitive}, + {"childRef", &States::DomainStructChild}, + {"parentRef", &States::DomainStructParent}, + {"field", &States::DomainStructParentField}, + {"fieldRef", &States::DomainStructParentFieldRef}, + {"typesystem", &States::Typesystem}, + {"enum", &States::TypesystemEnum}, + {"entry", &States::TypesystemEnumEntry}, + {"struct", &States::TypesystemStruct}, + {"field", &States::TypesystemStructField}, + {"constant", &States::TypesystemConstant}, + {"import", &States::Import}, + {"include", &States::Include}}; +} +} + diff --git a/src/core/parser/generic/GenericParser.hpp b/src/core/parser/stack/GenericParserStates.hpp index 4f29f94..552eee5 100644 --- a/src/core/parser/generic/GenericParser.hpp +++ b/src/core/parser/stack/GenericParserStates.hpp @@ -17,33 +17,33 @@ */ /** - * @file GenericParser.hpp + * @file GenericParserStates.hpp * - * The GenericParser class builds an abstraction layer that separates the - * underlying document format (e.g. osdm or osdmx) from the actual process of - * building the document model. It provides a set of genric functions that - * should be called by the inheriting concrete parser class, e.g. indicating a - * command with parameters, the start/end of a field or the start/end of an - * annotation. The GenericParser maintains an internal stack of - * ParserStateHandlers and relays the commands to the elements of this stack. + * Contains a multimap which maps between tag/command names to the corresponding + * state descriptors. This multimap is used to initialize the push down + * automaton residing inside the "Stack" class. * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ -#ifndef _OUSIA_GENERIC_PARSER_HPP_ -#define _OUSIA_GENERIC_PARSER_HPP_ +#ifndef _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ +#define _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ -#include <core/parser/Parseer.hpp> +#include <string> +#include <map> namespace ousia { +namespace parser_stack { -class GenericParser : public Parser { - - - -}; +// Forward declarations +class State; +/** + * Map between tagnames and references to the corresponding State instances. + */ +extern const std::multimap<std::string, const State *> GenericParserStates; +} } -#endif _OUSIA_GENERIC_PARSER_HPP_ +#endif /* _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ */ diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp new file mode 100644 index 0000000..bf5d4ea --- /dev/null +++ b/src/core/parser/stack/Handler.cpp @@ -0,0 +1,254 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/common/Exceptions.hpp> +#include <core/common/Logger.hpp> +#include <core/parser/ParserContext.hpp> + +#include "Callbacks.hpp" +#include "Handler.hpp" +#include "State.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class HandlerData */ + +HandlerData::HandlerData(ParserContext &ctx, /*Callbacks &callbacks,*/ + const std::string &name, const State &state, + const SourceLocation &location) + : ctx(ctx), + /*callbacks(callbacks),*/ + name(name), + state(state), + location(location) +{ +} + +/* Class Handler */ + +Handler::Handler(const HandlerData &handlerData) + : handlerData(handlerData), internalLogger(nullptr) +{ +} + +Handler::~Handler() {} + +ParserContext &Handler::context() { return handlerData.ctx; } + +ParserScope &Handler::scope() { return handlerData.ctx.getScope(); } + +Manager &Handler::manager() { return handlerData.ctx.getManager(); } + +Logger &Handler::logger() +{ + if (internalLogger != nullptr) { + return *internalLogger; + } + return handlerData.ctx.getLogger(); +} + +const SourceLocation &Handler::location() const { return handlerData.location; } + +const std::string &Handler::name() const { return handlerData.name; } + +void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode) +{ + /*handlerData.callbacks.setWhitespaceMode(whitespaceMode);*/ +} + +void Handler::registerToken(const std::string &token) +{ + /*handlerData.callbacks.registerToken(token);*/ +} + +void Handler::unregisterToken(const std::string &token) +{ + /*handlerData.callbacks.unregisterToken(token);*/ +} + +const std::string &Handler::getName() const { return name(); } + +const State &Handler::getState() const { return handlerData.state; } + +void Handler::setLogger(Logger &logger) { internalLogger = &logger; } + +void Handler::resetLogger() { internalLogger = nullptr; } + +const SourceLocation &Handler::getLocation() const { return location(); } + +/* Class EmptyHandler */ + +bool EmptyHandler::start(Variant::mapType &args) +{ + // Just accept anything + return true; +} + +void EmptyHandler::end() +{ + // Do nothing if a command ends +} + +bool EmptyHandler::fieldStart(bool &isDefaultField, size_t fieldIndex) +{ + // Accept any field + return true; +} + +void EmptyHandler::fieldEnd() +{ + // Do not handle fields +} + +bool EmptyHandler::annotationStart(const Variant &className, + Variant::mapType &args) +{ + // Accept any data + return true; +} + +bool EmptyHandler::annotationEnd(const Variant &className, + const Variant &elementName) +{ + // Accept any annotation + return true; +} + +bool EmptyHandler::data(Variant &data) +{ + // Support any data + return true; +} + +Handler *EmptyHandler::create(const HandlerData &handlerData) +{ + return new EmptyHandler(handlerData); +} + +/* Class StaticHandler */ + +bool StaticHandler::start(Variant::mapType &args) +{ + // Do nothing in the default implementation, accept anything + return true; +} + +void StaticHandler::end() +{ + // Do nothing here +} + +bool StaticHandler::fieldStart(bool &isDefault, size_t fieldIdx) +{ + // Return true if either the default field is requested or the field index + // is zero. This simulates that there is exactly one field (a default field) + if (fieldIdx == 0) { + isDefault = true; + return true; + } + return false; +} + +void StaticHandler::fieldEnd() +{ + // Do nothing here +} + +bool StaticHandler::annotationStart(const Variant &className, + Variant::mapType &args) +{ + // No annotations supported + return false; +} + +bool StaticHandler::annotationEnd(const Variant &className, + const Variant &elementName) +{ + // No annotations supported + return false; +} + +bool StaticHandler::data(Variant &data) +{ + logger().error("Did not expect any data here", data); + return false; +} + +/* Class StaticFieldHandler */ + +StaticFieldHandler::StaticFieldHandler(const HandlerData &handlerData, + const std::string &argName) + : StaticHandler(handlerData), argName(argName), handled(false) +{ +} + +bool StaticFieldHandler::start(Variant::mapType &args) +{ + if (!argName.empty()) { + auto it = args.find(argName); + if (it != args.end() && !it->second.toString().empty()) { + handled = true; + doHandle(it->second, args); + return true; + } + } + + this->args = args; + return true; +} + +void StaticFieldHandler::end() +{ + if (!handled) { + if (!argName.empty()) { + logger().error(std::string("Required argument \"") + argName + + std::string("\" is missing."), + location()); + } else { + logger().error("Command requires data, but no data given", + location()); + } + } +} + +bool StaticFieldHandler::data(Variant &data) +{ + // Call the doHandle function if this has not been done before + if (!handled) { + handled = true; + doHandle(data, args); + return true; + } + + // The doHandle function was already called, print an error message + logger().error( + std::string("Found data, but the corresponding argument \"") + argName + + std::string("\" was already specified"), + data); + + // Print the location at which the attribute was originally specified + auto it = args.find(argName); + if (it != args.end()) { + logger().note(std::string("Attribute was specified here:"), it->second); + } + return false; +} +} +} + diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp new file mode 100644 index 0000000..7cda7a4 --- /dev/null +++ b/src/core/parser/stack/Handler.hpp @@ -0,0 +1,421 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_PARSER_STACK_HANDLER_HPP_ +#define _OUSIA_PARSER_STACK_HANDLER_HPP_ + +#include <string> + +#include <core/common/Location.hpp> +#include <core/common/Variant.hpp> +#include <core/common/Whitespace.hpp> + +namespace ousia { + +// Forward declarations +class ParserScope; +class ParserContext; +class Logger; + +namespace parser_stack { + +// More forward declarations +class Callbacks; +class State; + +/** + * Class collecting all the data that is being passed to a Handler + * instance. + */ +class HandlerData { +public: + /** + * Reference to the ParserContext instance that should be used to resolve + * references to nodes in the Graph. + */ + ParserContext &ctx; + + /** + * Reference at an instance of the Callbacks class, used for + * modifying the behaviour of the parser (like registering tokens, setting + * the data type or changing the whitespace handling mode). + */ + // Callbacks &callbacks; + + /** + * Contains the name of the command that is being handled. + */ + std::string name; + + /** + * Contains the current state of the state machine. + */ + const State &state; + + /** + * Current source code location. + */ + SourceLocation location; + + /** + * Constructor of the HandlerData class. + * + * @param ctx is the parser context the handler should be executed in. + * @param callbacks is an instance of Callbacks used to notify + * the parser about certain state changes. + * @param name is the name of the string. + * @param state is the state this handler was called for. + * @param location is the location at which the handler is created. + */ + HandlerData(ParserContext &ctx, + /*Callbacks &callbacks,*/ const std::string &name, + const State &state, const SourceLocation &location); +}; + +/** + * The Handler class provides a context for handling a generic stack element. + * It has to beoverridden and registered in the StateStack class to form + * handlers for concrete XML tags. + */ +class Handler { +private: + /** + * Structure containing the internal handler data. + */ + const HandlerData handlerData; + + /** + * Reference at the current logger. If not nullptr, this will override the + * logger from the ParserContext specified in the handlerData. + */ + Logger *internalLogger; + +protected: + /** + * Constructor of the Handler class. + * + * @param data is a structure containing all data being passed to the + * handler. + */ + Handler(const HandlerData &handlerData); + + /** + * Returns a reference at the ParserContext. + * + * @return a reference at the ParserContext. + */ + ParserContext &context(); + + /** + * Returns a reference at the ParserScope instance. + * + * @return a reference at the ParserScope instance. + */ + ParserScope &scope(); + + /** + * Returns a reference at the Manager instance which manages all nodes. + * + * @return a referance at the Manager instance. + */ + Manager &manager(); + + /** + * Returns a reference at the Logger instance used for logging error + * messages. + * + * @return a reference at the Logger instance. + */ + Logger &logger(); + + /** + * Returns the location of the element in the source file, for which this + * Handler was created. + * + * @return the location of the Handler in the source file. + */ + const SourceLocation &location() const; + + /** + * Returns the command name for which the handler was created. + * + * @return a const reference at the command name. + */ + const std::string &name() const; + +public: + /** + * Virtual destructor. + */ + virtual ~Handler(); + + /** + * Calls the corresponding function in the Callbacks instance. Sets the + * whitespace mode that specifies how string data should be processed. The + * calls to this function are placed on a stack by the underlying Stack + * class. + * + * @param whitespaceMode specifies one of the three WhitespaceMode constants + * PRESERVE, TRIM or COLLAPSE. + */ + void setWhitespaceMode(WhitespaceMode whitespaceMode); + + /** + * Calls the corresponding function in the Callbacks instance. + * Registers the given token as token that should be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be reported. + */ + void registerToken(const std::string &token); + + /** + * Calls the corresponding function in the Callbacks instance. + * Unregisters the given token, it will no longer be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be unregistered. + */ + void unregisterToken(const std::string &token); + + /** + * Returns the command name for which the handler was created. + * + * @return a const reference at the command name. + */ + const std::string &getName() const; + + /** + * Reference at the State descriptor for which this Handler was created. + * + * @return a const reference at the constructing State descriptor. + */ + const State &getState() const; + + /** + * Sets the internal logger to the given logger instance. + * + * @param logger is the Logger instance to which the logger should be set. + */ + void setLogger(Logger &logger); + + /** + * Resets the logger instance to the logger instance provided in the + * ParserContext. + */ + void resetLogger(); + + /** + * Returns the location of the element in the source file, for which this + * Handler was created. + * + * @return the location of the Handler in the source file. + */ + const SourceLocation &getLocation() const; + + /** + * Called when the command that was specified in the constructor is + * instanciated. + * + * @param args is a map from strings to variants (argument name and value). + * @return true if the handler was successful in starting the element it + * represents, false otherwise. + */ + virtual bool start(Variant::mapType &args) = 0; + + /** + * Called before the command for which this handler is defined ends (is + * forever removed from the stack). + */ + virtual void end() = 0; + + /** + * Called when a new field starts, while the handler is active. This + * function should return true if the field is supported, false otherwise. + * No error should be logged if the field cannot be started, the caller will + * take care of that (since it is always valid to start a default field, + * even though the corresponding structure does not have a field, as long as + * no data is fed into the field). + * + * @param isDefault is set to true if the field that is being started is the + * default/tree field. The handler should set the value of this variable to + * true if the referenced field is indeed the default field. + * @param fieldIdx is the numerical index of the field. + */ + virtual bool fieldStart(bool &isDefault, size_t fieldIdx) = 0; + + /** + * Called when a previously opened field ends, while the handler is active. + * Note that a "fieldStart" and "fieldEnd" are always called alternately. + */ + virtual void fieldEnd() = 0; + + /** + * Called whenever an annotation starts while this handler is active. The + * function should return true if starting the annotation was successful, + * false otherwise. + * + * @param className is a string variant containing the name of the + * annotation class and the location of the name in the source code. + * @param args is a map from strings to variants (argument name and value). + * @return true if the mentioned annotation could be started here, false + * if an error occurred. + */ + virtual bool annotationStart(const Variant &className, + Variant::mapType &args) = 0; + + /** + * Called whenever an annotation ends while this handler is active. The + * function should return true if ending the annotation was successful, + * false otherwise. + * + * @param className is a string variant containing the name of the + * annotation class and the location of the class name in the source code. + * @param elementName is a string variant containing the name of the + * annotation class and the location of the element name in the source code. + * @return true if the mentioned annotation could be started here, false if + * an error occurred. + */ + virtual bool annotationEnd(const Variant &className, + const Variant &elementName) = 0; + + /** + * Called whenever raw data (int the form of a string) is available for the + * Handler instance. Should return true if the data could be handled, false + * otherwise. + * + * @param data is a string variant containing the character data and its + * location. + * @return true if the data could be handled, false otherwise. + */ + virtual bool data(Variant &data) = 0; +}; + +/** + * HandlerConstructor is a function pointer type used to create concrete + * instances of the Handler class. + * + * @param handlerData is the data that should be passed to the new handler + * instance. + * @return a newly created handler instance. + */ +using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); + +/** + * The EmptyHandler class is used in case no element handler is specified in + * the State descriptor. It just accepts all data and does nothing. + */ +class EmptyHandler : public Handler { +protected: + using Handler::Handler; + +public: + bool start(Variant::mapType &args) override; + void end() override; + bool fieldStart(bool &isDefault, size_t fieldIdx) override; + void fieldEnd() override; + bool annotationStart(const Variant &className, + Variant::mapType &args) override; + bool annotationEnd(const Variant &className, + const Variant &elementName) override; + bool data(Variant &data) override; + + /** + * Creates an instance of the EmptyHandler class. + */ + static Handler *create(const HandlerData &handlerData); +}; + +/** + * The StaticHandler class is used to handle predifined commands which do + * neither support annotations, nor multiple fields. Child classes can decide + * whether a single data field should be used. + */ +class StaticHandler : public Handler { +protected: + using Handler::Handler; + +public: + bool start(Variant::mapType &args) override; + void end() override; + bool fieldStart(bool &isDefault, size_t fieldIdx) override; + void fieldEnd() override; + bool annotationStart(const Variant &className, + Variant::mapType &args) override; + bool annotationEnd(const Variant &className, + const Variant &elementName) override; + bool data(Variant &data) override; +}; + +/** + * The StaticFieldHandler class is used to handle predifined commands which do + * neither support annotations, nor multiple fields. Additionally, it captures a + * data entry from a single default field. + */ +class StaticFieldHandler : public StaticHandler { +private: + /** + * Set to the name of the data argument that should be used instead of the + * data field, if no data field is given. + */ + std::string argName; + + /** + * Set to true, once the "doHandle" function has been called. + */ + bool handled; + + /** + * Map containing the arguments given in the start function. + */ + Variant::mapType args; + +protected: + /** + * Constructor of the StaticFieldHandler class. + * + * @param handlerData is a structure containing the internal data that + * should be stored inside the handler. + * @param name of the data argument that -- if present -- should be used + * instead of the data field. If empty, data is not captured from the + * arguments. If both, data in the data field and the argument, are given, + * this results in an error. + */ + StaticFieldHandler(const HandlerData &handlerData, + const std::string &argName); + + /** + * Function that should be overriden in order to handle the field data and + * the other arguments. This function is not called if no data was given. + * + * @param fieldData is the captured field data. + * @param args are the arguments that were given in the "start" function. + */ + virtual void doHandle(const Variant &fieldData, + Variant::mapType &args) = 0; + +public: + bool start(Variant::mapType &args) override; + void end() override; + bool data(Variant &data) override; +}; +} +} + +#endif /* _OUSIA_PARSER_STACK_HANDLER_HPP_ */ + diff --git a/src/core/parser/stack/ImportIncludeHandler.cpp b/src/core/parser/stack/ImportIncludeHandler.cpp index 94ee82d..d1ea97d 100644 --- a/src/core/parser/stack/ImportIncludeHandler.cpp +++ b/src/core/parser/stack/ImportIncludeHandler.cpp @@ -16,50 +16,22 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "ImportIncludeHandler.hpp" - +#include <core/model/RootNode.hpp> #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> -namespace ousia { - -/* ImportIncludeHandler */ - -void ImportIncludeHandler::start(Variant::mapType &args) -{ - rel = args["rel"].asString(); - type = args["type"].asString(); - src = args["src"].asString(); - srcInArgs = !src.empty(); -} +#include "DomainHandler.hpp" +#include "DocumentHandler.hpp" +#include "ImportIncludeHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" -void ImportIncludeHandler::data(const std::string &data, int field) -{ - if (srcInArgs) { - logger().error("\"src\" attribute has already been set"); - return; - } - if (field != 0) { - logger().error("Command has only one field."); - return; - } - src.append(data); -} +namespace ousia { +namespace parser_stack { /* ImportHandler */ -void ImportHandler::start(Variant::mapType &args) -{ - ImportIncludeHandler::start(args); - - // Make sure imports are still possible - if (scope().getFlag(ParserFlag::POST_HEAD)) { - logger().error("Imports must be listed before other commands.", - location()); - return; - } -} - -void ImportHandler::end() +void ImportHandler::doHandle(const Variant &fieldData, Variant::mapType &args) { // Fetch the last node and check whether an import is valid at this // position @@ -75,8 +47,9 @@ void ImportHandler::end() // Perform the actual import, register the imported node within the leaf // node - Rooted<Node> imported = - context().import(src, type, rel, leafRootNode->getReferenceTypes()); + Rooted<Node> imported = context().import( + fieldData.asString(), args["type"].asString(), args["rel"].asString(), + leafRootNode->getReferenceTypes()); if (imported != nullptr) { leafRootNode->reference(imported); } @@ -84,13 +57,26 @@ void ImportHandler::end() /* IncludeHandler */ -void IncludeHandler::start(Variant::mapType &args) +void IncludeHandler::doHandle(const Variant &fieldData, Variant::mapType &args) { - ImportIncludeHandler::start(args); + context().include(fieldData.asString(), args["type"].asString(), + args["rel"].asString(), {&RttiTypes::Node}); } -void IncludeHandler::end() -{ - context().include(src, type, rel, {&RttiTypes::Node}); +namespace States { +const State Import = + StateBuilder() + .parents({&Document, &Typesystem, &Domain}) + .elementHandler(ImportHandler::create) + .arguments({Argument::String("rel", ""), Argument::String("type", ""), + Argument::String("src", "")}); + +const State Include = + StateBuilder() + .parent(&All) + .elementHandler(IncludeHandler::create) + .arguments({Argument::String("rel", ""), Argument::String("type", ""), + Argument::String("src", "")}); +} } } diff --git a/src/core/parser/stack/ImportIncludeHandler.hpp b/src/core/parser/stack/ImportIncludeHandler.hpp index b0767be..6168639 100644 --- a/src/core/parser/stack/ImportIncludeHandler.hpp +++ b/src/core/parser/stack/ImportIncludeHandler.hpp @@ -19,6 +19,9 @@ /** * @file ImportIncludeHandler.hpp * + * Contains the conceptually similar handlers for the "include" and "import" + * commands. + * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ @@ -26,51 +29,78 @@ #define _OUSIA_IMPORT_INCLUDE_HANDLER_HPP_ #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> - -namespace ousia { -class ImportIncludeHandler : public Handler { -protected: - bool srcInArgs = false; - std::string rel; - std::string type; - std::string src; +#include "Handler.hpp" -public: - using Handler::Handler; - - void start(Variant::mapType &args) override; - - void data(const std::string &data, int field) override; -}; +namespace ousia { +namespace parser_stack { -class ImportHandler : public ImportIncludeHandler { +/** + * The ImportHandler is responsible for handling the "import" command. An import + * creates a reference to a specified file. The specified file is parsed (if + * this has not already been done) outside of the context of the current file. + * If the specified resource has already been parsed, a reference to the already + * parsed file is inserted. Imports are only possible before no other content + * has been parsed. + */ +class ImportHandler : public StaticFieldHandler { public: - using ImportIncludeHandler::ImportIncludeHandler; - - void start(Variant::mapType &args) override; - - void end() override; - + using StaticFieldHandler::StaticFieldHandler; + + void doHandle(const Variant &fieldData, + Variant::mapType &args) override; + + /** + * Creates a new instance of the ImportHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { - return new ImportHandler{handlerData}; + return new ImportHandler{handlerData, "src"}; } }; -class IncludeHandler : public ImportIncludeHandler { +/** + * The IncludeHandler is responsible for handling the "include" command. The + * included file is parsed in the context of the current file and will change + * the content that is currently being parsed. Includes are possible at (almost) + * any position in the source file. + */ +class IncludeHandler : public StaticFieldHandler { public: - using ImportIncludeHandler::ImportIncludeHandler; - - void start(Variant::mapType &args) override; - - void end() override; - + using StaticFieldHandler::StaticFieldHandler; + + void doHandle(const Variant &fieldData, + Variant::mapType &args) override; + + /** + * Creates a new instance of the IncludeHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { - return new IncludeHandler{handlerData}; + return new IncludeHandler{handlerData, "src"}; } }; + +namespace States { +/** + * State representing the "import" command. + */ +extern const State Import; + +/** + * State representing the "include" command. + */ +extern const State Include; +} + +} } #endif diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp new file mode 100644 index 0000000..47f7d2c --- /dev/null +++ b/src/core/parser/stack/Stack.cpp @@ -0,0 +1,550 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include <core/common/Logger.hpp> +#include <core/common/Utils.hpp> +#include <core/common/Exceptions.hpp> +#include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "Handler.hpp" +#include "Stack.hpp" +#include "State.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class HandlerInfo */ + +HandlerInfo::HandlerInfo() : HandlerInfo(nullptr) {} + +HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler) + : handler(handler), + fieldIdx(0), + valid(true), + implicit(false), + inField(false), + inDefaultField(false), + inImplicitDefaultField(false), + inValidField(false), + hadDefaultField(false) +{ +} + +HandlerInfo::HandlerInfo(bool valid, bool implicit, bool inField, + bool inDefaultField, bool inImplicitDefaultField, + bool inValidField) + : handler(nullptr), + fieldIdx(0), + valid(valid), + implicit(implicit), + inField(inField), + inDefaultField(inDefaultField), + inImplicitDefaultField(inImplicitDefaultField), + inValidField(inValidField), + hadDefaultField(false) +{ +} + +HandlerInfo::~HandlerInfo() +{ + // Do nothing +} + +void HandlerInfo::fieldStart(bool isDefault, bool isImplicit, bool isValid) +{ + inField = true; + inDefaultField = isDefault || isImplicit; + inImplicitDefaultField = isImplicit; + inValidField = isValid; + hadDefaultField = hadDefaultField || inDefaultField; + fieldIdx++; +} + +void HandlerInfo::fieldEnd() +{ + inField = false; + inDefaultField = false; + inImplicitDefaultField = false; + inValidField = false; +} + +/** + * Stub instance of HandlerInfo containing no handler information. + */ +static HandlerInfo EmptyHandlerInfo{true, true, true, true, false, true}; + +/* Helper functions */ + +/** + * Returns an Exception that should be thrown when a currently invalid command + * is thrown. + * + * @param name is the name of the command for which no state transition is + * found. + * @param expected is a set containing the names of the expected commands. + */ +static LoggableException buildInvalidCommandException( + const std::string &name, const std::set<std::string> &expected) +{ + if (expected.empty()) { + return LoggableException{ + std::string{"No nested elements allowed, but got \""} + name + + std::string{"\""}}; + } else { + return LoggableException{ + std::string{"Expected "} + + (expected.size() == 1 ? std::string{"\""} + : std::string{"one of \""}) + + Utils::join(expected, "\", \"") + std::string{"\", but got \""} + + name + std::string{"\""}}; + } +} + +/* Class Stack */ + +Stack::Stack(ParserContext &ctx, + const std::multimap<std::string, const State *> &states) + : ctx(ctx), states(states) +{ + // If the scope instance is not empty we need to deduce the current parser + // state + if (!ctx.getScope().isEmpty()) { + deduceState(); + } +} + +Stack::~Stack() +{ + while (!stack.empty()) { + // Fetch the topmost stack element + HandlerInfo &info = currentInfo(); + + // It is an error if we're still in a field of an element while the + // Stack instance is destroyed. Log that + if (handlersValid()) { + if (info.inField && !info.implicit && + !info.inImplicitDefaultField) { + logger().error( + std::string("Reached end of stream, but command \"") + + info.handler->getName() + + "\" has not ended yet. Command was started here:", + info.handler->getLocation()); + } + } + + // Remove the command from the stack + endCurrentHandler(); + } +} + +void Stack::deduceState() +{ + // Assemble all states + std::vector<const State *> states; + for (const auto &e : this->states) { + states.push_back(e.second); + } + + // Fetch the type signature of the scope and derive all possible states, + // abort if no unique parser state was found + std::vector<const State *> possibleStates = + StateDeductor(ctx.getScope().getStackTypeSignature(), states).deduce(); + if (possibleStates.size() != 1U) { + throw LoggableException( + "Error while including file: Cannot deduce parser state."); + } + + // Switch to this state by creating a handler, but do not call its start + // function + const State &state = *possibleStates[0]; + HandlerConstructor ctor = + state.elementHandler ? state.elementHandler : EmptyHandler::create; + + std::shared_ptr<Handler> handler = + std::shared_ptr<Handler>{ctor({ctx, "", state, SourceLocation{}})}; + stack.emplace_back(handler); + + // Set the correct flags for this implicit handler + HandlerInfo &info = currentInfo(); + info.implicit = true; + info.fieldStart(true, false, true); +} + +std::set<std::string> Stack::expectedCommands() +{ + const State *currentState = &(this->currentState()); + std::set<std::string> res; + for (const auto &v : states) { + if (v.second->parents.count(currentState)) { + res.insert(v.first); + } + } + return res; +} + +const State &Stack::currentState() +{ + return stack.empty() ? States::None : stack.back().handler->getState(); +} + +std::string Stack::currentCommandName() +{ + return stack.empty() ? std::string{} : stack.back().handler->getName(); +} + +const State *Stack::findTargetState(const std::string &name) +{ + const State *currentState = &(this->currentState()); + auto range = states.equal_range(name); + for (auto it = range.first; it != range.second; it++) { + const StateSet &parents = it->second->parents; + if (parents.count(currentState) || parents.count(&States::All)) { + return it->second; + } + } + + return nullptr; +} + +const State *Stack::findTargetStateOrWildcard(const std::string &name) +{ + // Try to find the target state with the given name, if none is found, try + // find a matching "*" state. + State const *targetState = findTargetState(name); + if (targetState == nullptr) { + return findTargetState("*"); + } + return targetState; +} + +HandlerInfo &Stack::currentInfo() +{ + return stack.empty() ? EmptyHandlerInfo : stack.back(); +} +HandlerInfo &Stack::lastInfo() +{ + return stack.size() < 2U ? EmptyHandlerInfo : stack[stack.size() - 2]; +} + +void Stack::endCurrentHandler() +{ + if (!stack.empty()) { + // Fetch the handler info for the current top-level element + HandlerInfo &info = stack.back(); + + // Do not call any callback functions while the stack is marked as + // invalid or this is an elment marked as "implicit" + if (!info.implicit && handlersValid()) { + // Make sure the fieldEnd handler is called if the element still + // is in a field + if (info.inField) { + info.handler->fieldEnd(); + info.fieldEnd(); + } + + // Call the "end" function of the corresponding Handler instance + info.handler->end(); + } + + // Remove the element from the stack + stack.pop_back(); + } +} + +bool Stack::ensureHandlerIsInField() +{ + // If the current handler is not in a field (and actually has a handler) + // try to start a default field + HandlerInfo &info = currentInfo(); + if (!info.inField && info.handler != nullptr) { + // Abort if the element already had a default field + if (info.hadDefaultField) { + return false; + } + + // Try to start a new default field, abort if this did not work + bool isDefault = true; + if (!info.handler->fieldStart(isDefault, info.fieldIdx)) { + info.handler->fieldEnd(); + endCurrentHandler(); + return false; + } + + // Mark the field as started + info.fieldStart(true, true, true); + } + return true; +} + +bool Stack::handlersValid() +{ + for (auto it = stack.crbegin(); it != stack.crend(); it++) { + if (!it->valid) { + return false; + } + } + return true; +} + +Logger &Stack::logger() { return ctx.getLogger(); } + +void Stack::command(const Variant &name, const Variant::mapType &args) +{ + // Make sure the given identifier is valid (preventing "*" from being + // malicously passed to this function) + if (!Utils::isNamespacedIdentifier(name.asString())) { + throw LoggableException(std::string("Invalid identifier \"") + + name.asString() + std::string("\""), + name); + } + + while (true) { + // Try to find a target state for the given command, if none can be + // found and the current command does not have an open field, then try + // to create an empty default field, otherwise this is an exception + const State *targetState = findTargetStateOrWildcard(name.asString()); + if (targetState == nullptr) { + if (!currentInfo().inField) { + endCurrentHandler(); + continue; + } else { + throw buildInvalidCommandException(name.asString(), + expectedCommands()); + } + } + + // Make sure we're currently inside a field + if (!ensureHandlerIsInField()) { + endCurrentHandler(); + continue; + } + + // Fork the logger. We do not want any validation errors to skip + LoggerFork loggerFork = logger().fork(); + + // Instantiate the handler and push it onto the stack + HandlerConstructor ctor = targetState->elementHandler + ? targetState->elementHandler + : EmptyHandler::create; + std::shared_ptr<Handler> handler{ + ctor({ctx, name.asString(), *targetState, name.getLocation()})}; + stack.emplace_back(handler); + + // Fetch the HandlerInfo for the parent element and the current element + HandlerInfo &parentInfo = lastInfo(); + HandlerInfo &info = currentInfo(); + + // Call the "start" method of the handler, store the result of the start + // method as the validity of the handler -- do not call the start method + // if the stack is currently invalid (as this may cause further, + // unwanted errors) + bool validStack = handlersValid(); + info.valid = false; + if (validStack) { + // Canonicalize the arguments (if this has not already been done), + // allow additional arguments + Variant::mapType canonicalArgs = args; + targetState->arguments.validateMap(canonicalArgs, loggerFork, true); + + handler->setLogger(loggerFork); + try { + info.valid = handler->start(canonicalArgs); + } + catch (LoggableException ex) { + loggerFork.log(ex); + } + handler->resetLogger(); + } + + // We started the command within an implicit default field and it is not + // valid -- remove both the new handler and the parent field from the + // stack + if (!info.valid && parentInfo.inImplicitDefaultField) { + endCurrentHandler(); + endCurrentHandler(); + continue; + } + + // If we ended up here, starting the command may or may not have worked, + // but after all, we cannot unroll the stack any further. Update the + // "valid" flag, commit any potential error messages and return. + info.valid = parentInfo.valid && info.valid; + loggerFork.commit(); + return; + } +} + +void Stack::data(const Variant &data) +{ + while (true) { + // Check whether there is any command the data can be sent to + if (stack.empty()) { + throw LoggableException("No command here to receive data."); + } + + // Fetch the current command handler information + HandlerInfo &info = currentInfo(); + + // Make sure the current handler has an open field + if (!ensureHandlerIsInField()) { + endCurrentHandler(); + continue; + } + + // If this field should not get any data, log an error and do not call + // the "data" handler + if (!info.inValidField) { + logger().error("Did not expect any data here", data); + } + + if (handlersValid() && info.inValidField) { + // Fork the logger and set it as temporary logger for the "start" + // method. We only want to keep error messages if this was not a try + // to implicitly open a default field. + LoggerFork loggerFork = logger().fork(); + info.handler->setLogger(loggerFork); + + // Pass the data to the current Handler instance + bool valid = false; + try { + Variant dataCopy = data; + valid = info.handler->data(dataCopy); + } + catch (LoggableException ex) { + loggerFork.log(ex); + } + + // Reset the logger instance as soon as possible + info.handler->resetLogger(); + + // If placing the data here failed and we're currently in an + // implicitly opened field, just unroll the stack to the next field + // and try again + if (!valid && info.inImplicitDefaultField) { + endCurrentHandler(); + continue; + } + + // Commit the content of the logger fork. Do not change the valid + // flag. + loggerFork.commit(); + } + + // There was no reason to unroll the stack any further, so continue + return; + } +} + +void Stack::fieldStart(bool isDefault) +{ + // Make sure the current handler stack is not empty + if (stack.empty()) { + throw LoggableException( + "No command for which a field could be started"); + } + + // Fetch the information attached to the current handler + HandlerInfo &info = currentInfo(); + if (info.inField) { + logger().error( + "Got field start, but there is no command for which to start the " + "field."); + return; + } + + // Copy the isDefault flag to a local variable, the fieldStart method will + // write into this variable + bool defaultField = isDefault; + + // Do not call the "fieldStart" function if we're in an invalid subtree + bool valid = false; + if (handlersValid()) { + try { + valid = info.handler->fieldStart(defaultField, info.fieldIdx); + } + catch (LoggableException ex) { + logger().log(ex); + } + if (!valid && !defaultField) { + logger().error( + std::string("Cannot start a new field here (index ") + + std::to_string(info.fieldIdx + 1) + + std::string("), field does not exist")); + } + } + + // Mark the field as started + info.fieldStart(defaultField, false, valid); +} + +void Stack::fieldEnd() +{ + // Make sure the current handler stack is not empty + if (stack.empty()) { + throw LoggableException("No command for which a field could be ended"); + } + + // Fetch the information attached to the current handler + HandlerInfo &info = currentInfo(); + if (!info.inField) { + logger().error( + "Got field end, but there is no command for which to end the " + "field."); + return; + } + + // Only continue if the current handler stack is in a valid state, do not + // call the fieldEnd function if something went wrong before + if (handlersValid()) { + try { + info.handler->fieldEnd(); + } + catch (LoggableException ex) { + logger().log(ex); + } + } + + // This command no longer is in a field + info.fieldEnd(); + + // As soon as this command had a default field, remove it from the stack + if (info.hadDefaultField) { + endCurrentHandler(); + } +} + +void Stack::annotationStart(const Variant &className, const Variant &args) +{ + // TODO +} + +void Stack::annotationEnd(const Variant &className, const Variant &elementName) +{ + // TODO +} + +void Stack::token(Variant token) +{ + // TODO +} +} +} + diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp new file mode 100644 index 0000000..76eefd9 --- /dev/null +++ b/src/core/parser/stack/Stack.hpp @@ -0,0 +1,341 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Stack.hpp + * + * Helper classes for document or description parsers. Contains the + * Stack class, which is an pushdown automaton responsible for + * accepting commands in the correct order and calling specified handlers. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_STACK_HPP_ +#define _OUSIA_PARSER_STACK_STACK_HPP_ + +#include <cstdint> + +#include <map> +#include <memory> +#include <set> +#include <vector> + +#include <core/common/Variant.hpp> +#include <core/parser/Parser.hpp> + +namespace ousia { + +// Forward declarations +class ParserContext; +class Logger; + +namespace parser_stack { + +// Forward declarations +class Handler; +class State; + +/** + * The HandlerInfo class is used internally by the stack to associate additional + * (mutable) data with a handler instance. + */ +class HandlerInfo { +public: + /** + * Pointer pointing at the actual handler instance. + */ + std::shared_ptr<Handler> handler; + + /** + * Next field index to be passed to the "fieldStart" function of the Handler + * class. + */ + size_t fieldIdx; + + /** + * Set to true if the handler is valid (which is the case if the "start" + * method has returned true). If the handler is invalid, no more calls are + * directed at it until it can be removed from the stack. + */ + bool valid : 1; + + /** + * Set to true if this is an implicit handler, that was created when the + * current stack state was deduced. + */ + bool implicit : 1; + + /** + * Set to true if the handler currently is in a field. + */ + bool inField : 1; + + /** + * Set to true if the handler currently is in the default field. + */ + bool inDefaultField : 1; + + /** + * Set to true if the handler currently is in an implicitly started default + * field. + */ + bool inImplicitDefaultField : 1; + + /** + * Set to false if this field is only opened pro-forma and does not accept + * any data. Otherwise set to true. + */ + bool inValidField : 1; + + /** + * Set to true, if the default field was already started. + */ + bool hadDefaultField : 1; + + /** + * Default constructor of the HandlerInfo class. + */ + HandlerInfo(); + /** + * Constructor of the HandlerInfo class, allows to set all flags manually. + */ + HandlerInfo(bool valid, bool implicit, bool inField, bool inDefaultField, + bool inImplicitDefaultField, bool inValidField); + + /** + * Constructor of the HandlerInfo class, taking a shared_ptr to the handler + * to which additional information should be attached. + */ + HandlerInfo(std::shared_ptr<Handler> handler); + + /** + * Destructor of the HandlerInfo class (to allow Handler to be forward + * declared). + */ + ~HandlerInfo(); + + /** + * Updates the "field" flags according to a "fieldStart" event. + */ + void fieldStart(bool isDefault, bool isImplicit, bool isValid); + + /** + * Updates the "fields" flags according to a "fieldEnd" event. + */ + void fieldEnd(); +}; + +/** + * The Stack class is a pushdown automaton responsible for turning a command + * stream into a tree of Node instances. It does so by following a state + * transition graph and creating a set of Handler instances, which are placed + * on the stack. + */ +class Stack { +private: + /** + * Reference at the parser context. + */ + ParserContext &ctx; + + /** + * Map containing all registered command names and the corresponding + * state descriptors. + */ + const std::multimap<std::string, const State *> &states; + + /** + * Internal stack used for managing the currently active Handler instances. + */ + std::vector<HandlerInfo> stack; + + /** + * Return the reference in the Logger instance stored within the context. + */ + Logger &logger(); + + /** + * Used internally to get all expected command names for the current state. + * This function is used to build error messages. + * + * @return a set of strings containing the names of the expected commands. + */ + std::set<std::string> expectedCommands(); + + /** + * Returns the targetState for a command with the given name that can be + * reached from the current state. + * + * @param name is the name of the requested command. + * @return nullptr if no target state was found, a pointer at the target + * state otherwise. + */ + const State *findTargetState(const std::string &name); + + /** + * Returns the targetState for a command with the given name that can be + * reached from the current state, also including the wildcard "*" state. + * Throws an exception if the given target state is not a valid identifier. + * + * @param name is the name of the requested command. + * @return nullptr if no target state was found, a pointer at the target + * state otherwise. + */ + const State *findTargetStateOrWildcard(const std::string &name); + + /** + * Tries to reconstruct the parser state from the Scope instance of the + * ParserContext given in the constructor. This functionality is needed for + * including files,as the Parser of the included file needs to be brought to + * an equivalent state as the one in the including file. + */ + void deduceState(); + + /** + * Returns a reference at the current HandlerInfo instance (or a stub + * HandlerInfo instance if the stack is empty). + */ + HandlerInfo ¤tInfo(); + + /** + * Returns a reference at the last HandlerInfo instance (or a stub + * HandlerInfo instance if the stack has only one element). + */ + HandlerInfo &lastInfo(); + + /** + * Ends the current handler and removes the corresponding element from the + * stack. + */ + void endCurrentHandler(); + + /** + * Tries to start a default field for the current handler, if currently the + * handler is not inside a field and did not have a default field yet. + * + * @return true if the handler is inside a field, false if no field could + * be started. + */ + bool ensureHandlerIsInField(); + + /** + * Returns true if all handlers on the stack are currently valid, or false + * if at least one handler is invalid. + * + * @return true if all handlers on the stack are valid. + */ + bool handlersValid(); + +public: + /** + * Creates a new instance of the Stack class. + * + * @param ctx is the parser context the parser stack is working on. + * @param states is a map containing the command names and pointers at the + * corresponding State instances. + */ + Stack(ParserContext &ctx, + const std::multimap<std::string, const State *> &states); + + /** + * Destructor of the Stack class. + */ + ~Stack(); + + /** + * Returns the state the Stack instance currently is in. + * + * @return the state of the currently active Handler instance or STATE_NONE + * if no handler is on the stack. + */ + const State ¤tState(); + + /** + * Returns the command name that is currently being handled. + * + * @return the name of the command currently being handled by the active + * Handler instance or an empty string if no handler is currently active. + */ + std::string currentCommandName(); + + /** + * Function that should be called whenever a new command is reached. + * + * @param name is the name of the command (including the namespace + * separator ':') and its corresponding location. Must be a string variant. + * @param args is a map containing the arguments that were passed to the + * command. + */ + void command(const Variant &name, const Variant::mapType &args); + + /** + * Function that shuold be called whenever character data is found in the + * input stream. May only be called if the currently is a command on the + * stack. + * + * @param data is a string variant containing the data that has been found. + */ + void data(const Variant &data); + + /** + * Function that should be called whenever a new field starts. Fields of the + * same command may not be separated by calls to data or annotations. Doing + * so will result in a LoggableException. + * + * @param isDefault should be set to true if the started field explicitly + * is the default field. + */ + void fieldStart(bool isDefault); + + /** + * Function that should be called whenever a field ends. Calling this + * function if there is no field to end will result in a LoggableException. + */ + void fieldEnd(); + + /** + * Function that should be called whenever an annotation starts. + * + * @param name is the name of the annotation class. + * @param args is a map variant containing the arguments that were passed + * to the annotation. + */ + void annotationStart(const Variant &className, const Variant &args); + + /** + * Function that should be called whenever an annotation ends. + * + * @param name is the name of the annotation class that was ended. + * @param annotationName is the name of the annotation that was ended. + */ + void annotationEnd(const Variant &className, const Variant &elementName); + + /** + * Function that should be called whenever a previously registered token + * is found in the input stream. + * + * @param token is string variant containing the token that was encountered. + */ + void token(Variant token); +}; +} +} + +#endif /* _OUSIA_STACK_HPP_ */ + diff --git a/src/core/parser/ParserState.cpp b/src/core/parser/stack/State.cpp index f635d86..d72f533 100644 --- a/src/core/parser/ParserState.cpp +++ b/src/core/parser/stack/State.cpp @@ -16,88 +16,97 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "ParserState.hpp" +#include "State.hpp" namespace ousia { +namespace parser_stack { -/* Class ParserState */ +/* Class State */ -ParserState::ParserState() : elementHandler(nullptr) {} +State::State() : elementHandler(nullptr) {} -ParserState::ParserState(ParserStateSet parents, Arguments arguments, +State::State(StateSet parents, Arguments arguments, RttiSet createdNodeTypes, - HandlerConstructor elementHandler) + HandlerConstructor elementHandler, + bool supportsAnnotations) : parents(parents), arguments(arguments), createdNodeTypes(createdNodeTypes), - elementHandler(elementHandler) + elementHandler(elementHandler), + supportsAnnotations(supportsAnnotations) { } -ParserState::ParserState(const ParserStateBuilder &builder) - : ParserState(builder.build()) +State::State(const StateBuilder &builder) + : State(builder.build()) { } -/* Class ParserStateBuilder */ +/* Class StateBuilder */ -ParserStateBuilder &ParserStateBuilder::copy(const ParserState &state) +StateBuilder &StateBuilder::copy(const State &state) { this->state = state; return *this; } -ParserStateBuilder &ParserStateBuilder::parent(const ParserState *parent) +StateBuilder &StateBuilder::parent(const State *parent) { - state.parents = ParserStateSet{parent}; + state.parents = StateSet{parent}; return *this; } -ParserStateBuilder &ParserStateBuilder::parents(const ParserStateSet &parents) +StateBuilder &StateBuilder::parents(const StateSet &parents) { state.parents = parents; return *this; } -ParserStateBuilder &ParserStateBuilder::arguments(const Arguments &arguments) +StateBuilder &StateBuilder::arguments(const Arguments &arguments) { state.arguments = arguments; return *this; } -ParserStateBuilder &ParserStateBuilder::createdNodeType(const Rtti *type) +StateBuilder &StateBuilder::createdNodeType(const Rtti *type) { state.createdNodeTypes = RttiSet{type}; return *this; } -ParserStateBuilder &ParserStateBuilder::createdNodeTypes(const RttiSet &types) +StateBuilder &StateBuilder::createdNodeTypes(const RttiSet &types) { state.createdNodeTypes = types; return *this; } -ParserStateBuilder &ParserStateBuilder::elementHandler( +StateBuilder &StateBuilder::elementHandler( HandlerConstructor elementHandler) { state.elementHandler = elementHandler; return *this; } -const ParserState &ParserStateBuilder::build() const { return state; } +StateBuilder &StateBuilder::supportsAnnotations(bool supportsAnnotations) +{ + state.supportsAnnotations = supportsAnnotations; + return *this; +} -/* Class ParserStateDeductor */ +const State &StateBuilder::build() const { return state; } -ParserStateDeductor::ParserStateDeductor( +/* Class StateDeductor */ + +StateDeductor::StateDeductor( std::vector<const Rtti *> signature, - std::vector<const ParserState *> states) + std::vector<const State *> states) : tbl(signature.size()), signature(std::move(signature)), states(std::move(states)) { } -bool ParserStateDeductor::isActive(size_t d, const ParserState *s) +bool StateDeductor::isActive(size_t d, const State *s) { // Lookup the "active" state of (d, s), if it was not already set // (e.second is true) we'll have to calculate it @@ -123,7 +132,7 @@ bool ParserStateDeductor::isActive(size_t d, const ParserState *s) // Check whether any of the parent nodes were active -- either for // the previous element (if this one is generative) or for the // current element (assuming this node was not generative) - for (const ParserState *parent : s->parents) { + for (const State *parent : s->parents) { if ((isGenerative && isActive(d - 1, parent)) || isActive(d, parent)) { res = true; @@ -136,9 +145,9 @@ bool ParserStateDeductor::isActive(size_t d, const ParserState *s) return res; } -std::vector<const ParserState *> ParserStateDeductor::deduce() +std::vector<const State *> StateDeductor::deduce() { - std::vector<const ParserState *> res; + std::vector<const State *> res; if (!signature.empty()) { const size_t D = signature.size(); for (auto s : states) { @@ -153,9 +162,10 @@ std::vector<const ParserState *> ParserStateDeductor::deduce() /* Constant initializations */ -namespace ParserStates { -const ParserState All; -const ParserState None; +namespace States { +const State All; +const State None; +} } } diff --git a/src/core/parser/ParserState.hpp b/src/core/parser/stack/State.hpp index 6487fdd..4766235 100644 --- a/src/core/parser/ParserState.hpp +++ b/src/core/parser/stack/State.hpp @@ -17,10 +17,10 @@ */ /** - * @file ParserState.hpp + * @file State.hpp * - * Defines the ParserState class used within the ParserStack pushdown - * automaton and the ParserStateBuilder class for convenient construction of + * Defines the State class used within the ParserStack pushdown + * automaton and the StateBuilder class for convenient construction of * such classes. * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) @@ -33,12 +33,14 @@ #include <core/common/Rtti.hpp> #include <core/common/Argument.hpp> +#include <core/common/Whitespace.hpp> namespace ousia { +namespace parser_stack { // Forward declarations -class ParserStateBuilder; -class ParserState; +class StateBuilder; +class State; class HandlerData; class Handler; using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); @@ -47,17 +49,17 @@ using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); * Set of pointers of parser states -- used for specifying a set of parent * states. */ -using ParserStateSet = std::unordered_set<const ParserState *>; +using StateSet = std::unordered_set<const State *>; /** - * Class used for the complete specification of a ParserState. Stores possible + * Class used for the complete specification of a State. Stores possible * parent states, state handlers and arguments to be passed to that state. */ -struct ParserState { +struct State { /** * Vector containing all possible parent states. */ - ParserStateSet parents; + StateSet parents; /** * Descriptor of the arguments that should be passed to the handler. @@ -66,8 +68,8 @@ struct ParserState { /** * Set containing the types of the nodes that may be created in this - * ParserState. This information is needed for Parsers to reconstruct the - * current ParserState from a given ParserScope when a file is included. + * State. This information is needed for Parsers to reconstruct the + * current State from a given ParserScope when a file is included. */ RttiSet createdNodeTypes; @@ -79,109 +81,119 @@ struct ParserState { HandlerConstructor elementHandler; /** + * Set to true if this handler does support annotations. This is almost + * always false (e.g. all description handlers), except for document + * element handlers. + */ + bool supportsAnnotations; + + /** * Default constructor, initializes the handlers with nullptr. */ - ParserState(); + State(); /** - * Constructor taking values for all fields. Use the ParserStateBuilder - * class for a more convenient construction of ParserState instances. + * Constructor taking values for all fields. Use the StateBuilder + * class for a more convenient construction of State instances. * * @param parents is a vector containing all possible parent states. * @param arguments is a descriptor of arguments that should be passed to * the handler. * @param createdNodeTypes is a set containing the types of the nodes tha - * may be created in this ParserState. This information is needed for - * Parsers to reconstruct the current ParserState from a given ParserScope + * may be created in this State. This information is needed for + * Parsers to reconstruct the current State from a given ParserScope * when a file is included. * @param elementHandler is a pointer at a function which creates a new * concrete Handler instance for the elements described by this state. May * be nullptr in which case no handler instance is created. + * @param supportsAnnotations specifies whether annotations are supported + * here at all. */ - ParserState(ParserStateSet parents, Arguments arguments = Arguments{}, + State(StateSet parents, Arguments arguments = Arguments{}, RttiSet createdNodeTypes = RttiSet{}, - HandlerConstructor elementHandler = nullptr); + HandlerConstructor elementHandler = nullptr, + bool supportsAnnotations = false); /** - * Creates this ParserState from the given ParserStateBuilder instance. + * Creates this State from the given StateBuilder instance. */ - ParserState(const ParserStateBuilder &builder); + State(const StateBuilder &builder); }; /** - * The ParserStateBuilder class is a class used for conveniently building new - * ParserState instances. + * The StateBuilder class is a class used for conveniently building new + * State instances. */ -class ParserStateBuilder { +class StateBuilder { private: /** - * ParserState instance that is currently being built by the - * ParserStateBuilder. + * State instance that is currently being built by the + * StateBuilder. */ - ParserState state; + State state; public: /** - * Copies the ParserState instance and uses it as internal state. Overrides - * all changes made by the ParserStateBuilder. + * Copies the State instance and uses it as internal state. Overrides + * all changes made by the StateBuilder. * * @param state is the state that should be copied. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder ©(const ParserState &state); + StateBuilder ©(const State &state); /** * Sets the possible parent states to the single given parent element. * - * @param parent is a pointer at the parent ParserState instance that should + * @param parent is a pointer at the parent State instance that should * be the possible parent state. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &parent(const ParserState *parent); + StateBuilder &parent(const State *parent); /** - * Sets the ParserState instances in the given ParserStateSet as the list of + * Sets the State instances in the given StateSet as the list of * supported parent states. * - * @param parents is a set of pointers at ParserState instances that should + * @param parents is a set of pointers at State instances that should * be the possible parent states. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &parents(const ParserStateSet &parents); + StateBuilder &parents(const StateSet &parents); /** * Sets the arguments that should be passed to the parser state handler to * those given as argument. * * @param arguments is the Arguments instance describing the Arguments that - * should be parsed to a Handler for this ParserState. - * @return a reference at this ParserStateBuilder instance for method + * should be parsed to a Handler for this State. + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &arguments(const Arguments &arguments); + StateBuilder &arguments(const Arguments &arguments); /** * Sets the Node types this state may produce to the given Rtti descriptor. * * @param type is the Rtti descriptor of the Type that may be produced by * this state. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &createdNodeType(const Rtti *type); + StateBuilder &createdNodeType(const Rtti *type); /** * Sets the Node types this state may produce to the given Rtti descriptors. * * @param types is a set of Rtti descriptors of the Types that may be * produced by this state. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &createdNodeTypes(const RttiSet &types); + StateBuilder &createdNodeTypes(const RttiSet &types); /** * Sets the constructor for the element handler. The constructor creates a @@ -191,31 +203,42 @@ public: * * @param elementHandler is the HandlerConstructor that should create a * new Handler instance. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &elementHandler(HandlerConstructor elementHandler); + StateBuilder &elementHandler(HandlerConstructor elementHandler); /** - * Returns a reference at the internal ParserState instance that was built - * using the ParserStateBuilder. + * Sets the state of the "supportsAnnotations" flags (default value is + * false) * - * @return the built ParserState. + * @param supportsAnnotations should be set to true, if annotations are + * supported for the handlers associated with this document. + * @return a reference at this StateBuilder instance for method + * chaining. */ - const ParserState &build() const; + StateBuilder &supportsAnnotations(bool supportsAnnotations); + + /** + * Returns a reference at the internal State instance that was built + * using the StateBuilder. + * + * @return the built State. + */ + const State &build() const; }; /** - * Class used to deduce the ParserState a Parser is currently in based on the + * Class used to deduce the State a Parser is currently in based on the * types of the Nodes that currently are on the ParserStack. Uses dynamic * programming in order to solve this problem. */ -class ParserStateDeductor { +class StateDeductor { public: /** * Type containing the dynamic programming table. */ - using Table = std::vector<std::unordered_map<const ParserState *, bool>>; + using Table = std::vector<std::unordered_map<const State *, bool>>; private: /** @@ -231,7 +254,7 @@ private: /** * List of states that should be checked for being active. */ - const std::vector<const ParserState *> states; + const std::vector<const State *> states; /** * Used internally to check whether the given parser stack s may have been @@ -239,20 +262,20 @@ private: * * @param d is the signature element. * @param s is the parser state. - * @return true if the the given ParserState may have been active. + * @return true if the the given State may have been active. */ - bool isActive(size_t d, const ParserState *s); + bool isActive(size_t d, const State *s); public: /** - * Constructor of the ParserStateDeductor class. + * Constructor of the StateDeductor class. * * @param signature a Node type signature describing the types of the nodes * which currently reside on e.g. the ParserScope stack. * @param states is a list of states that should be checked. */ - ParserStateDeductor(std::vector<const Rtti *> signature, - std::vector<const ParserState *> states); + StateDeductor(std::vector<const Rtti *> signature, + std::vector<const State *> states); /** * Selects all active states from the given states. Only considers those @@ -260,23 +283,24 @@ public: * * @return a list of states that may actually have been active. */ - std::vector<const ParserState *> deduce(); + std::vector<const State *> deduce(); }; /** - * The ParserStates namespace contains all the global state constants used + * The States namespace contains all the global state constants used * in the ParserStack class. */ -namespace ParserStates { +namespace States { /** * State representing all states. */ -extern const ParserState All; +extern const State All; /** * State representing the initial state. */ -extern const ParserState None; +extern const State None; +} } } diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp index 2cc7dfb..8fd9525 100644 --- a/src/core/parser/stack/TypesystemHandler.cpp +++ b/src/core/parser/stack/TypesystemHandler.cpp @@ -16,32 +16,46 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "TypesystemHandler.hpp" - #include <core/model/Typesystem.hpp> +#include <core/model/Domain.hpp> #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "DomainHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" namespace ousia { +namespace parser_stack { /* TypesystemHandler */ -void TypesystemHandler::start(Variant::mapType &args) +bool TypesystemHandler::start(Variant::mapType &args) { // Create the typesystem instance Rooted<Typesystem> typesystem = - project()->createTypesystem(args["name"].asString()); + context().getProject()->createTypesystem(args["name"].asString()); typesystem->setLocation(location()); + // If the typesystem is defined inside a domain, add a reference to the + // typesystem to the domain + Rooted<Domain> domain = scope().select<Domain>(); + if (domain != nullptr) { + domain->reference(typesystem); + } + // Push the typesystem onto the scope, set the POST_HEAD flag to true scope().push(typesystem); scope().setFlag(ParserFlag::POST_HEAD, false); + + return true; } void TypesystemHandler::end() { scope().pop(); } /* TypesystemEnumHandler */ -void TypesystemEnumHandler::start(Variant::mapType &args) +bool TypesystemEnumHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -52,33 +66,24 @@ void TypesystemEnumHandler::start(Variant::mapType &args) enumType->setLocation(location()); scope().push(enumType); + + return true; } void TypesystemEnumHandler::end() { scope().pop(); } /* TypesystemEnumEntryHandler */ -void TypesystemEnumEntryHandler::start(Variant::mapType &args) {} - -void TypesystemEnumEntryHandler::end() +void TypesystemEnumEntryHandler::doHandle(const Variant &fieldData, + Variant::mapType &args) { Rooted<EnumType> enumType = scope().selectOrThrow<EnumType>(); - enumType->addEntry(entry, logger()); -} - -void TypesystemEnumEntryHandler::data(const std::string &data, int field) -{ - if (field != 0) { - // TODO: This should be stored in the HandlerData - logger().error("Enum entry only has one field."); - return; - } - entry.append(data); + enumType->addEntry(fieldData.asString(), logger()); } /* TypesystemStructHandler */ -void TypesystemStructHandler::start(Variant::mapType &args) +bool TypesystemStructHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -103,13 +108,15 @@ void TypesystemStructHandler::start(Variant::mapType &args) }); } scope().push(structType); + + return true; } void TypesystemStructHandler::end() { scope().pop(); } /* TypesystemStructFieldHandler */ -void TypesystemStructFieldHandler::start(Variant::mapType &args) +bool TypesystemStructFieldHandler::start(Variant::mapType &args) { // Read the argument values const std::string &name = args["name"].asString(); @@ -142,13 +149,13 @@ void TypesystemStructFieldHandler::start(Variant::mapType &args) } }); } -} -void TypesystemStructFieldHandler::end() {} + return true; +} /* TypesystemConstantHandler */ -void TypesystemConstantHandler::start(Variant::mapType &args) +bool TypesystemConstantHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -169,7 +176,51 @@ void TypesystemConstantHandler::start(Variant::mapType &args) constant.cast<Constant>()->setType(type.cast<Type>(), logger); } }); + + return true; } -void TypesystemConstantHandler::end() {} +namespace States { +const State Typesystem = StateBuilder() + .parents({&None, &Domain}) + .createdNodeType(&RttiTypes::Typesystem) + .elementHandler(TypesystemHandler::create) + .arguments({Argument::String("name", "")}); + +const State TypesystemEnum = StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::EnumType) + .elementHandler(TypesystemEnumHandler::create) + .arguments({Argument::String("name")}); + +const State TypesystemEnumEntry = + StateBuilder() + .parent(&TypesystemEnum) + .elementHandler(TypesystemEnumEntryHandler::create) + .arguments({}); + +const State TypesystemStruct = + StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::StructType) + .elementHandler(TypesystemStructHandler::create) + .arguments({Argument::String("name"), Argument::String("parent", "")}); + +const State TypesystemStructField = + StateBuilder() + .parent(&TypesystemStruct) + .elementHandler(TypesystemStructFieldHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("default", Variant::fromObject(nullptr))}); + +const State TypesystemConstant = + StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::Constant) + .elementHandler(TypesystemConstantHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("value")}); +} } +} + diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp index 76a7bc9..85494f1 100644 --- a/src/core/parser/stack/TypesystemHandler.hpp +++ b/src/core/parser/stack/TypesystemHandler.hpp @@ -19,6 +19,9 @@ /** * @file TypesystemHandler.hpp * + * Contains the Handler classes used to parse Typesystem descriptions. The + * Handlers parse all the tags found below and including the "typesystem" tag. + * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ @@ -26,96 +29,180 @@ #define _OUSIA_TYPESYSTEM_HANDLER_HPP_ #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> + +#include "Handler.hpp" namespace ousia { +namespace parser_stack { -class TypesystemHandler : public Handler { +/** + * Handles the occurance of the "typesystem" tag. Creates a new Typesystem + * instance and places it on the ParserScope. + */ +class TypesystemHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the TypesystemHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemHandler{handlerData}; } }; -class TypesystemEnumHandler : public Handler { +/** + * Handles the occurance of the "enum" tag. Creates a new EnumType instance and + * places it on the ParserScope. + */ +class TypesystemEnumHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the TypesystemEnumHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemEnumHandler{handlerData}; } }; -class TypesystemEnumEntryHandler : public Handler { +/** + * Handles the occurance of the "entry" tag within an "enum" tag. Creates a new + * EnumType instance and places it on the ParserScope. + */ +class TypesystemEnumEntryHandler : public StaticFieldHandler { public: - using Handler::Handler; + using StaticFieldHandler::StaticFieldHandler; - std::string entry; - - void start(Variant::mapType &args) override; - - void end() override; - - void data(const std::string &data, int field) override; + void doHandle(const Variant &fieldData, Variant::mapType &args) override; + /** + * Creates a new instance of the TypesystemEnumEntryHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { - return new TypesystemEnumEntryHandler{handlerData}; + return new TypesystemEnumEntryHandler{handlerData, "name"}; } }; -class TypesystemStructHandler : public Handler { +/** + * Handles the occurance of the "struct" tag within a typesystem description. + * Creates a new StructType instance and places it on the ParserScope. + */ +class TypesystemStructHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the TypesystemStructHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemStructHandler{handlerData}; } }; -class TypesystemStructFieldHandler : public Handler { +/** + * Handles the occurance of the "field" tag within a typesystem structure + * description. Places a new Attribute instance in the StructType instance + * that is currently at the top of the scope. + */ +class TypesystemStructFieldHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; - void end() override; + bool start(Variant::mapType &args) override; + /** + * Creates a new instance of the TypesystemStructFieldHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemStructFieldHandler{handlerData}; } }; -class TypesystemConstantHandler : public Handler { +/** + * Handles the occurance of the "constant" tag within a typesystem structure + * description. Places a new Constant instance in the current typesystem. + */ +class TypesystemConstantHandler : public StaticHandler { public: - using Handler::Handler; + using StaticHandler::StaticHandler; - void start(Variant::mapType &args) override; - - void end() override; + bool start(Variant::mapType &args) override; + /** + * Creates a new instance of the TypesystemConstantHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemConstantHandler{handlerData}; } }; + +namespace States { +/** + * State representing the "typesystem" tag. + */ +extern const State Typesystem; +/** + * State representing the "enum" tag within a typesystem. + */ +extern const State TypesystemEnum; +/** + * State representing the "entry" tag within an enum. + */ +extern const State TypesystemEnumEntry; +/** + * State representing the "struct" tag within a typesystem. + */ +extern const State TypesystemStruct; +/** + * State representing the "field" tag within a typesystem structure. + */ +extern const State TypesystemStructField; +/** + * State representing the "constant" tag within a typesystem. + */ +extern const State TypesystemConstant; +} +} } #endif diff --git a/src/core/parser/utils/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp new file mode 100644 index 0000000..4a0430b --- /dev/null +++ b/src/core/parser/utils/TokenTrie.cpp @@ -0,0 +1,119 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "TokenTrie.hpp" + +namespace ousia { + +/* Class DynamicTokenTree::Node */ + +TokenTrie::Node::Node() : type(EmptyToken) {} + +/* Class DynamicTokenTree */ + +bool TokenTrie::registerToken(const std::string &token, + TokenTypeId type) noexcept +{ + // Abort if the token is empty -- this would taint the root node + if (token.empty()) { + return false; + } + + // Iterate over each character in the given string and insert them as + // (new) nodes + Node *node = &root; + for (size_t i = 0; i < token.size(); i++) { + // Insert a new node if this one does not exist + const char c = token[i]; + auto it = node->children.find(c); + if (it == node->children.end()) { + it = node->children.emplace(c, std::make_shared<Node>()).first; + } + node = it->second.get(); + } + + // If the resulting node already has a type set, we're screwed. + if (node->type != EmptyToken) { + return false; + } + + // Otherwise just set the type to the given type. + node->type = type; + return true; +} + +bool TokenTrie::unregisterToken(const std::string &token) noexcept +{ + // We cannot remove empty tokens as we need to access the fist character + // upfront + if (token.empty()) { + return false; + } + + // First pass -- search the node in the path that can be deleted + Node *subtreeRoot = &root; + char subtreeKey = token[0]; + Node *node = &root; + for (size_t i = 0; i < token.size(); i++) { + // Go to the next node, abort if the tree ends unexpectedly + auto it = node->children.find(token[i]); + if (it == node->children.end()) { + return false; + } + + // Reset the subtree handler if this node has another type + node = it->second.get(); + if ((node->type != EmptyToken || node->children.size() > 1) && + (i + 1 != token.size())) { + subtreeRoot = node; + subtreeKey = token[i + 1]; + } + } + + // If the node type is already EmptyToken, we cannot do anything here + if (node->type == EmptyToken) { + return false; + } + + // If the target node has children, we cannot delete the subtree. Set the + // type to EmptyToken instead + if (!node->children.empty()) { + node->type = EmptyToken; + return true; + } + + // If we end up here, we can safely delete the complete subtree + subtreeRoot->children.erase(subtreeKey); + return true; +} + +TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept +{ + Node const *node = &root; + for (size_t i = 0; i < token.size(); i++) { + const char c = token[i]; + auto it = node->children.find(c); + if (it == node->children.end()) { + return EmptyToken; + } + node = it->second.get(); + } + return node->type; +} +} + diff --git a/src/core/parser/utils/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp new file mode 100644 index 0000000..36c2ffa --- /dev/null +++ b/src/core/parser/utils/TokenTrie.hpp @@ -0,0 +1,150 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file TokenTrie.hpp + * + * Class representing a token trie that can be updated dynamically. + * + * @author Benjamin Paaßen (astoecke@techfak.uni-bielefeld.de) + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_TOKEN_TRIE_HPP_ +#define _OUSIA_TOKEN_TRIE_HPP_ + +#include <cstdint> +#include <memory> +#include <limits> +#include <unordered_map> + +namespace ousia { + +/** + * The TokenTypeId is used to give each token type a unique id. + */ +using TokenTypeId = uint32_t; + +/** + * Token which is not a token. + */ +constexpr TokenTypeId EmptyToken = std::numeric_limits<TokenTypeId>::max(); + +/** + * Token which represents a text token. + */ +constexpr TokenTypeId TextToken = std::numeric_limits<TokenTypeId>::max() - 1; + +/** + * The Tokenizer internally uses a TokenTrie to be efficiently able to identify + * the longest consecutive token in the text. This is equivalent to a prefix + * trie. + * + * A token trie is a construct that structures all special tokens a Tokenizer + * recognizes. Consider the tokens "aab", "a" and "bac" numbered as one, two and + * three. Then the token tree would look like this: + * + * \code{*.txt} + * ~ (0) + * / \ + * a (2) b (0) + * | | + * a (0) a (0) + * | | + * b (1) c (0) + * \endcode + * + * Where the number indicates the corresponding token descriptor identifier. + */ +class TokenTrie { +public: + /** + * Structure used to build the node tree. + */ + struct Node { + /** + * Type used for the child map. + */ + using ChildMap = std::unordered_map<char, std::shared_ptr<Node>>; + + /** + * Map from single characters at the corresponding child nodes. + */ + ChildMap children; + + /** + * Reference at the corresponding token descriptor. Set to nullptr if + * no token is attached to this node. + */ + TokenTypeId type; + + /** + * Default constructor, initializes the descriptor with nullptr. + */ + Node(); + }; + +private: + /** + * Root node of the internal token tree. + */ + Node root; + +public: + /** + * Registers a token containing the given string. Returns false if the + * token already exists, true otherwise. + * + * @param token is the character sequence that should be registered as + * token. + * @param type is the descriptor that should be set for this token. + * @return true if the operation is successful, false otherwise. + */ + bool registerToken(const std::string &token, TokenTypeId type) noexcept; + + /** + * Unregisters the token from the token tree. Returns true if the token was + * unregistered successfully, false otherwise. + * + * @param token is the character sequence that should be unregistered. + * @return true if the operation was successful, false otherwise. + */ + bool unregisterToken(const std::string &token) noexcept; + + /** + * Returns true, if the given token exists within the TokenTree. This + * function is mostly thought for debugging and unit testing. + * + * @param token is the character sequence that should be searched. + * @return the attached token descriptor or nullptr if the given token is + * not found. + */ + TokenTypeId hasToken(const std::string &token) const noexcept; + + /** + * Returns a reference at the root node to be used for traversing the token + * tree. + * + * @return a reference at the root node. + */ + const Node *getRoot() const noexcept { return &root; } +}; +} + +#endif /* _OUSIA_TOKEN_TRIE_HPP_ */ + diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp new file mode 100644 index 0000000..3c8177d --- /dev/null +++ b/src/core/parser/utils/Tokenizer.cpp @@ -0,0 +1,381 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <memory> +#include <vector> + +#include <core/common/CharReader.hpp> +#include <core/common/Exceptions.hpp> +#include <core/common/Utils.hpp> +#include <core/common/WhitespaceHandler.hpp> + +#include "Tokenizer.hpp" + +namespace ousia { + +namespace { + +/* Internal class TokenMatch */ + +/** + * Contains information about a matching token. + */ +struct TokenMatch { + /** + * Token that was matched. + */ + Token token; + + /** + * Current length of the data within the text handler. The text buffer needs + * to be trimmed to this length if this token matches. + */ + size_t textLength; + + /** + * End location of the current text handler. This location needs to be used + * for the text token that is emitted before the actual token. + */ + size_t textEnd; + + /** + * Constructor of the TokenMatch class. + */ + TokenMatch() : textLength(0), textEnd(0) {} + + /** + * Returns true if this TokenMatch instance actually represents a match. + */ + bool hasMatch() { return token.type != EmptyToken; } +}; + +/* Internal class TokenLookup */ + +/** + * The TokenLookup class is used to represent a thread in a running token + * lookup. + */ +class TokenLookup { +private: + /** + * Current node within the token trie. + */ + TokenTrie::Node const *node; + + /** + * Start offset within the source file. + */ + size_t start; + + /** + * Current length of the data within the text handler. The text buffer needs + * to be trimmed to this length if this token matches. + */ + size_t textLength; + + /** + * End location of the current text handler. This location needs to be used + * for the text token that is emitted before the actual token. + */ + size_t textEnd; + +public: + /** + * Constructor of the TokenLookup class. + * + * @param node is the current node. + * @param start is the start position. + * @param textLength is the text buffer length of the previous text token. + * @param textEnd is the current end location of the previous text token. + */ + TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength, + size_t textEnd) + : node(node), start(start), textLength(textLength), textEnd(textEnd) + { + } + + /** + * Tries to extend the current path in the token trie with the given + * character. If a complete token is matched, stores this match in the + * tokens list (in case it is longer than any previous token). + * + * @param c is the character that should be appended to the current prefix. + * @param lookups is a list to which new TokeLookup instances are added -- + * which could potentially be expanded in the next iteration. + * @param match is the Token instance to which the matching token + * should be written. + * @param tokens is a reference at the internal token list of the + * Tokenizer. + * @param end is the end byte offset of the current character. + * @param sourceId is the source if of this file. + */ + void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match, + const std::vector<std::string> &tokens, SourceOffset end, + SourceId sourceId) + { + // Check whether we can continue the current token path with the given + // character without visiting an already visited node + auto it = node->children.find(c); + if (it == node->children.end()) { + return; + } + + // Check whether the new node represents a complete token a whether it + // is longer than the current token. If yes, replace the current token. + node = it->second.get(); + if (node->type != EmptyToken) { + const std::string &str = tokens[node->type]; + size_t len = str.size(); + if (len > match.token.content.size()) { + match.token = + Token{node->type, str, {sourceId, start, end}}; + match.textLength = textLength; + match.textEnd = textEnd; + } + } + + // If this state can possibly be advanced, store it in the states list. + if (!node->children.empty()) { + lookups.emplace_back(*this); + } + } +}; + +/** + * Transforms the given token into a text token containing the extracted + * text. + * + * @param handler is the WhitespaceHandler containing the collected data. + * @param token is the output token to which the text should be written. + * @param sourceId is the source id of the underlying file. + */ +static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match, + SourceId sourceId) +{ + if (match.hasMatch()) { + match.token.content = + std::string{handler.textBuf.data(), match.textLength}; + match.token.location = + SourceLocation{sourceId, handler.textStart, match.textEnd}; + } else { + match.token.content = handler.toString(); + match.token.location = + SourceLocation{sourceId, handler.textStart, handler.textEnd}; + } + match.token.type = TextToken; +} +} + +/* Class Tokenizer */ + +Tokenizer::Tokenizer(WhitespaceMode whitespaceMode) + : whitespaceMode(whitespaceMode), nextTokenTypeId(0) +{ +} + +template <typename TextHandler, bool read> +bool Tokenizer::next(CharReader &reader, Token &token) +{ + // If we're in the read mode, reset the char reader peek position to the + // current read position + if (read) { + reader.resetPeek(); + } + + // Prepare the lookups in the token trie + const TokenTrie::Node *root = trie.getRoot(); + TokenMatch match; + std::vector<TokenLookup> lookups; + std::vector<TokenLookup> nextLookups; + + // Instantiate the text handler + TextHandler textHandler; + + // Peek characters from the reader and try to advance the current token tree + // cursor + char c; + size_t charStart = reader.getPeekOffset(); + const SourceId sourceId = reader.getSourceId(); + while (reader.peek(c)) { + const size_t charEnd = reader.getPeekOffset(); + const size_t textLength = textHandler.textBuf.size(); + const size_t textEnd = textHandler.textEnd; + + // If we do not have a match yet, start a new lookup from the root + if (!match.hasMatch()) { + TokenLookup{root, charStart, textLength, textEnd}.advance( + c, nextLookups, match, tokens, charEnd, sourceId); + } + + // Try to advance all other lookups with the new character + for (TokenLookup &lookup : lookups) { + lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId); + } + + // We have found a token and there are no more states to advance or the + // text handler has found something -- abort to return the new token + if (match.hasMatch()) { + if ((nextLookups.empty() || textHandler.hasText())) { + break; + } + } else { + // Record all incomming characters + textHandler.append(c, charStart, charEnd); + } + + // Swap the lookups and the nextLookups list + lookups = std::move(nextLookups); + nextLookups.clear(); + + // Advance the offset + charStart = charEnd; + } + + // If we found text, emit that text + if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) { + buildTextToken(textHandler, match, sourceId); + } + + // Move the read/peek cursor to the end of the token, abort if an error + // happens while doing so + if (match.hasMatch()) { + // Make sure we have a valid location + if (match.token.location.getEnd() == InvalidSourceOffset) { + throw OusiaException{"Token end position offset out of range"}; + } + + // Seek to the end of the current token + const size_t end = match.token.location.getEnd(); + if (read) { + reader.seek(end); + } else { + reader.seekPeekCursor(end); + } + token = match.token; + } else { + token = Token{}; + } + return match.hasMatch(); +} + +bool Tokenizer::read(CharReader &reader, Token &token) +{ + switch (whitespaceMode) { + case WhitespaceMode::PRESERVE: + return next<PreservingWhitespaceHandler, true>(reader, token); + case WhitespaceMode::TRIM: + return next<TrimmingWhitespaceHandler, true>(reader, token); + case WhitespaceMode::COLLAPSE: + return next<CollapsingWhitespaceHandler, true>(reader, token); + } + return false; +} + +bool Tokenizer::peek(CharReader &reader, Token &token) +{ + switch (whitespaceMode) { + case WhitespaceMode::PRESERVE: + return next<PreservingWhitespaceHandler, false>(reader, token); + case WhitespaceMode::TRIM: + return next<TrimmingWhitespaceHandler, false>(reader, token); + case WhitespaceMode::COLLAPSE: + return next<CollapsingWhitespaceHandler, false>(reader, token); + } + return false; +} + +TokenTypeId Tokenizer::registerToken(const std::string &token) +{ + // Abort if an empty token should be registered + if (token.empty()) { + return EmptyToken; + } + + // Search for a new slot in the tokens list + TokenTypeId type = EmptyToken; + for (size_t i = nextTokenTypeId; i < tokens.size(); i++) { + if (tokens[i].empty()) { + tokens[i] = token; + type = i; + break; + } + } + + // No existing slot was found, add a new one -- make sure we do not + // override the special token type handles + if (type == EmptyToken) { + type = tokens.size(); + if (type == TextToken || type == EmptyToken) { + throw OusiaException{"Token type ids depleted!"}; + } + tokens.emplace_back(token); + } + nextTokenTypeId = type + 1; + + // Try to register the token in the trie -- if this fails, remove it + // from the tokens list + if (!trie.registerToken(token, type)) { + tokens[type] = std::string{}; + nextTokenTypeId = type; + return EmptyToken; + } + return type; +} + +bool Tokenizer::unregisterToken(TokenTypeId type) +{ + // Unregister the token from the trie, abort if an invalid type is given + if (type < tokens.size() && trie.unregisterToken(tokens[type])) { + tokens[type] = std::string{}; + nextTokenTypeId = type; + return true; + } + return false; +} + +std::string Tokenizer::getTokenString(TokenTypeId type) +{ + if (type < tokens.size()) { + return tokens[type]; + } + return std::string{}; +} + +void Tokenizer::setWhitespaceMode(WhitespaceMode mode) +{ + whitespaceMode = mode; +} + +WhitespaceMode Tokenizer::getWhitespaceMode() { return whitespaceMode; } + +/* Explicitly instantiate all possible instantiations of the "next" member + function */ +template bool Tokenizer::next<PreservingWhitespaceHandler, false>( + CharReader &reader, Token &token); +template bool Tokenizer::next<TrimmingWhitespaceHandler, false>( + CharReader &reader, Token &token); +template bool Tokenizer::next<CollapsingWhitespaceHandler, false>( + CharReader &reader, Token &token); +template bool Tokenizer::next<PreservingWhitespaceHandler, true>( + CharReader &reader, Token &token); +template bool Tokenizer::next<TrimmingWhitespaceHandler, true>( + CharReader &reader, Token &token); +template bool Tokenizer::next<CollapsingWhitespaceHandler, true>( + CharReader &reader, Token &token); +} + diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp new file mode 100644 index 0000000..6b4e116 --- /dev/null +++ b/src/core/parser/utils/Tokenizer.hpp @@ -0,0 +1,231 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Tokenizer.hpp + * + * Tokenizer that can be reconfigured at runtime used for parsing the plain + * text format. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_ +#define _OUSIA_DYNAMIC_TOKENIZER_HPP_ + +#include <set> +#include <string> +#include <vector> + +#include <core/common/Location.hpp> +#include <core/common/Whitespace.hpp> + +#include "TokenTrie.hpp" + +namespace ousia { + +// Forward declarations +class CharReader; + +/** + * The Token structure describes a token discovered by the Tokenizer. + */ +struct Token { + /** + * Id of the type of this token. + */ + TokenTypeId type; + + /** + * String that was matched. + */ + std::string content; + + /** + * Location from which the string was extracted. + */ + SourceLocation location; + + /** + * Default constructor. + */ + Token() : type(EmptyToken) {} + + /** + * Constructor of the Token struct. + * + * @param id represents the token type. + * @param content is the string content that has been extracted. + * @param location is the location of the extracted string content in the + * source file. + */ + Token(TokenTypeId type, const std::string &content, + SourceLocation location) + : type(type), content(content), location(location) + { + } + + /** + * Constructor of the Token struct, only initializes the token type + * + * @param type is the id corresponding to the type of the token. + */ + Token(TokenTypeId type) : type(type) {} + + /** + * The getLocation function allows the tokens to be directly passed as + * parameter to Logger or LoggableException instances. + * + * @return a reference at the location field + */ + const SourceLocation &getLocation() const { return location; } +}; + +/** + * The Tokenizer is used to extract tokens and chunks of text from a + * CharReader. It allows to register and unregister tokens while parsing and + * to modify the handling of whitespace characters. Note that the + * Tokenizer always tries to extract the longest possible token from the + * tokenizer. + */ +class Tokenizer { +private: + /** + * Internally used token trie. This object holds all registered tokens. + */ + TokenTrie trie; + + /** + * Flag defining whether whitespaces should be preserved or not. + */ + WhitespaceMode whitespaceMode; + + /** + * Vector containing all registered token types. + */ + std::vector<std::string> tokens; + + /** + * Next index in the tokens list where to search for a new token id. + */ + size_t nextTokenTypeId; + + /** + * Templated function used internally to read the current token. The + * function is templated in order to force code generation for all six + * combiations of whitespace modes and reading/peeking. + * + * @tparam TextHandler is the type to be used for the textHandler instance. + * @tparam read specifies whether the function should start from and advance + * the read pointer of the char reader. + * @param reader is the CharReader instance from which the data should be + * read. + * @param token is the token structure into which the token information + * should be written. + * @return false if the end of the stream has been reached, true otherwise. + */ + template <typename TextHandler, bool read> + bool next(CharReader &reader, Token &token); + +public: + /** + * Constructor of the Tokenizer class. + * + * @param whitespaceMode specifies how whitespace should be handled. + */ + Tokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); + + /** + * Registers the given string as a token. Returns a const pointer at a + * TokenDescriptor that will be used to reference the newly created token. + * + * @param token is the token string that should be registered. + * @return a unique identifier for the registered token or EmptyToken if + * an error occured. + */ + TokenTypeId registerToken(const std::string &token); + + /** + * Unregisters the token belonging to the given TokenTypeId. + * + * @param type is the token type that should be unregistered. The + *TokenTypeId + * must have been returned by registerToken. + * @return true if the operation was successful, false otherwise (e.g. + * because the given TokenDescriptor was already unregistered). + */ + bool unregisterToken(TokenTypeId type); + + /** + * Returns the token that was registered under the given TokenTypeId id or + *an + * empty string if an invalid TokenTypeId id is given. + * + * @param type is the TokenTypeId id for which the corresponding token + *string + * should be returned. + * @return the registered token string or an empty string if the given type + * was invalid. + */ + std::string getTokenString(TokenTypeId type); + + /** + * Sets the whitespace mode. + * + * @param whitespaceMode defines how whitespace should be treated in text + * tokens. + */ + void setWhitespaceMode(WhitespaceMode mode); + + /** + * Returns the current value of the whitespace mode. + * + * @return the whitespace mode. + */ + WhitespaceMode getWhitespaceMode(); + + /** + * Reads a new token from the CharReader and stores it in the given + * Token instance. + * + * @param reader is the CharReader instance from which the data should be + * read. + * @param token is a reference at the token instance into which the Token + * information should be written. + * @return true if a token could be read, false if the end of the stream + * has been reached. + */ + bool read(CharReader &reader, Token &token); + + /** + * The peek method does not advance the read position of the char reader, + * but reads the next token from the current char reader peek position. + * + * @param reader is the CharReader instance from which the data should be + * read. + * @param token is a reference at the token instance into which the Token + * information should be written. + * @return true if a token could be read, false if the end of the stream + * has been reached. + */ + bool peek(CharReader &reader, Token &token); +}; +} + +#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */ + |