summaryrefslogtreecommitdiff
path: root/src/core/parser
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-15 21:32:54 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-15 21:32:54 +0100
commit8e5e08c4f293434585d2a88f7f331f8ce49b67b9 (patch)
treefa82a937b1ea80f45d7955938c333f68f8a0f3f6 /src/core/parser
parent2544749215bc2465bfeca431e271110ca86d8a83 (diff)
parent40f4666c43211d9071a827ad8a2524688e7f678f (diff)
Merge branch 'astoecke_parser_stack_new'
Conflicts: application/src/core/parser/stack/DocumentHandler.cpp application/src/core/parser/stack/DocumentHandler.hpp
Diffstat (limited to 'src/core/parser')
-rw-r--r--src/core/parser/ParserScope.cpp5
-rw-r--r--src/core/parser/ParserScope.hpp4
-rw-r--r--src/core/parser/ParserStack.cpp216
-rw-r--r--src/core/parser/ParserStack.hpp361
-rw-r--r--src/core/parser/generic/GenericParser.cpp0
-rw-r--r--src/core/parser/stack/Callbacks.cpp23
-rw-r--r--src/core/parser/stack/Callbacks.hpp99
-rw-r--r--src/core/parser/stack/DocumentHandler.cpp213
-rw-r--r--src/core/parser/stack/DocumentHandler.hpp125
-rw-r--r--src/core/parser/stack/DomainHandler.cpp159
-rw-r--r--src/core/parser/stack/DomainHandler.hpp164
-rw-r--r--src/core/parser/stack/GenericParserStates.cpp53
-rw-r--r--src/core/parser/stack/GenericParserStates.hpp (renamed from src/core/parser/generic/GenericParser.hpp)34
-rw-r--r--src/core/parser/stack/Handler.cpp254
-rw-r--r--src/core/parser/stack/Handler.hpp421
-rw-r--r--src/core/parser/stack/ImportIncludeHandler.cpp76
-rw-r--r--src/core/parser/stack/ImportIncludeHandler.hpp94
-rw-r--r--src/core/parser/stack/Stack.cpp550
-rw-r--r--src/core/parser/stack/Stack.hpp341
-rw-r--r--src/core/parser/stack/State.cpp (renamed from src/core/parser/ParserState.cpp)66
-rw-r--r--src/core/parser/stack/State.hpp (renamed from src/core/parser/ParserState.hpp)152
-rw-r--r--src/core/parser/stack/TypesystemHandler.cpp101
-rw-r--r--src/core/parser/stack/TypesystemHandler.hpp153
-rw-r--r--src/core/parser/utils/TokenTrie.cpp119
-rw-r--r--src/core/parser/utils/TokenTrie.hpp150
-rw-r--r--src/core/parser/utils/Tokenizer.cpp381
-rw-r--r--src/core/parser/utils/Tokenizer.hpp231
27 files changed, 3540 insertions, 1005 deletions
diff --git a/src/core/parser/ParserScope.cpp b/src/core/parser/ParserScope.cpp
index 3929abf..ce3dc94 100644
--- a/src/core/parser/ParserScope.cpp
+++ b/src/core/parser/ParserScope.cpp
@@ -351,8 +351,7 @@ bool ParserScope::resolveType(const std::string &name, Handle<Node> owner,
return resolveType(Utils::split(name, '.'), owner, logger, resultCallback);
}
-bool ParserScope::resolveValue(Variant &data, Handle<Type> type,
- Handle<Node> owner, Logger &logger)
+bool ParserScope::resolveValue(Variant &data, Handle<Type> type, Logger &logger)
{
return type->build(
data, logger,
@@ -408,7 +407,7 @@ bool ParserScope::resolveTypeWithValue(const std::vector<std::string> &path,
[=](Handle<Node> resolved, Handle<Node> owner, Logger &logger) mutable {
if (resolved != nullptr) {
Rooted<Type> type = resolved.cast<Type>();
- scope.resolveValue(*valuePtr, type, owner, logger);
+ scope.resolveValue(*valuePtr, type, logger);
}
// Call the result callback with the type
diff --git a/src/core/parser/ParserScope.hpp b/src/core/parser/ParserScope.hpp
index 58fc037..185b845 100644
--- a/src/core/parser/ParserScope.hpp
+++ b/src/core/parser/ParserScope.hpp
@@ -702,13 +702,11 @@ public:
* (even in inner structures). The data will be passed to the "build"
* function of the given type.
* @param type is the Typesystem type the data should be interpreted with.
- * @param owner is the node for which the resolution takes place.
* @param logger is the logger instance into which resolution problems
* should be logged.
* @return true if the value was successfully built.
*/
- bool resolveValue(Variant &data, Handle<Type> type, Handle<Node> owner,
- Logger &logger);
+ bool resolveValue(Variant &data, Handle<Type> type, Logger &logger);
/**
* Resolves a type and makes sure the corresponding value is of the correct
diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp
deleted file mode 100644
index 1265851..0000000
--- a/src/core/parser/ParserStack.cpp
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <sstream>
-
-#include <core/common/Utils.hpp>
-#include <core/common/Exceptions.hpp>
-#include <core/model/Project.hpp>
-
-#include "ParserScope.hpp"
-#include "ParserStack.hpp"
-
-namespace ousia {
-
-/* A default handler */
-
-/**
- * The DefaultHandler class is used in case no element handler is specified in
- * the ParserState descriptor.
- */
-class DefaultHandler : public Handler {
-public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override {}
-
- void end() override {}
-
- static Handler *create(const HandlerData &handlerData)
- {
- return new DefaultHandler{handlerData};
- }
-};
-
-/* Class Handler */
-
-void Handler::data(const std::string &data, int field)
-{
- if (Utils::hasNonWhitepaceChar(data)) {
- logger().error("Expected command but found character data.");
- }
-}
-
-/* Class ParserStack */
-
-/**
- * Returns an Exception that should be thrown when a currently invalid command
- * is thrown.
- */
-static LoggableException InvalidCommand(const std::string &name,
- const std::set<std::string> &expected)
-{
- if (expected.empty()) {
- return LoggableException{
- std::string{"No nested elements allowed, but got \""} + name +
- std::string{"\""}};
- } else {
- return LoggableException{
- std::string{"Expected "} +
- (expected.size() == 1 ? std::string{"\""}
- : std::string{"one of \""}) +
- Utils::join(expected, "\", \"") + std::string{"\", but got \""} +
- name + std::string{"\""}};
- }
-}
-
-ParserStack::ParserStack(
- ParserContext &ctx,
- const std::multimap<std::string, const ParserState *> &states)
- : ctx(ctx), states(states)
-{
-}
-
-bool ParserStack::deduceState()
-{
- // Assemble all states
- std::vector<const ParserState *> states;
- for (const auto &e : this->states) {
- states.push_back(e.second);
- }
-
- // Fetch the type signature of the scope and derive all possible states,
- // abort if no unique parser state was found
- std::vector<const ParserState *> possibleStates =
- ParserStateDeductor(ctx.getScope().getStackTypeSignature(), states)
- .deduce();
- if (possibleStates.size() != 1) {
- ctx.getLogger().error(
- "Error while including file: Cannot deduce parser state.");
- return false;
- }
-
- // Switch to this state by creating a dummy handler
- const ParserState *state = possibleStates[0];
- Handler *handler =
- DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}});
- stack.emplace(handler);
- return true;
-}
-
-std::set<std::string> ParserStack::expectedCommands()
-{
- const ParserState *currentState = &(this->currentState());
- std::set<std::string> res;
- for (const auto &v : states) {
- if (v.second->parents.count(currentState)) {
- res.insert(v.first);
- }
- }
- return res;
-}
-
-const ParserState &ParserStack::currentState()
-{
- return stack.empty() ? ParserStates::None : stack.top()->state();
-}
-
-std::string ParserStack::currentCommandName()
-{
- return stack.empty() ? std::string{} : stack.top()->name();
-}
-
-const ParserState *ParserStack::findTargetState(const std::string &name)
-{
- const ParserState *currentState = &(this->currentState());
- auto range = states.equal_range(name);
- for (auto it = range.first; it != range.second; it++) {
- const ParserStateSet &parents = it->second->parents;
- if (parents.count(currentState) || parents.count(&ParserStates::All)) {
- return it->second;
- }
- }
-
- return nullptr;
-}
-
-void ParserStack::start(const std::string &name, Variant::mapType &args,
- const SourceLocation &location)
-{
- ParserState const *targetState = findTargetState(name);
-// TODO: Andreas, please improve this.
-// if (!Utils::isIdentifier(name)) {
-// throw LoggableException(std::string("Invalid identifier \"") + name +
-// std::string("\""));
-// }
-
- if (targetState == nullptr) {
- targetState = findTargetState("*");
- }
- if (targetState == nullptr) {
- throw InvalidCommand(name, expectedCommands());
- }
-
- // Fetch the associated constructor
- HandlerConstructor ctor = targetState->elementHandler
- ? targetState->elementHandler
- : DefaultHandler::create;
-
- // Canonicalize the arguments, allow additional arguments
- targetState->arguments.validateMap(args, ctx.getLogger(), true);
-
- // Instantiate the handler and call its start function
- Handler *handler = ctor({ctx, name, *targetState, currentState(), location});
- handler->start(args);
- stack.emplace(handler);
-}
-
-void ParserStack::start(std::string name, const Variant::mapType &args,
- const SourceLocation &location)
-{
- Variant::mapType argsCopy(args);
- start(name, argsCopy);
-}
-
-void ParserStack::end()
-{
- // Check whether the current command could be ended
- if (stack.empty()) {
- throw LoggableException{"No command to end."};
- }
-
- // Remove the current HandlerInstance from the stack
- std::shared_ptr<Handler> inst{stack.top()};
- stack.pop();
-
- // Call the end function of the last Handler
- inst->end();
-}
-
-void ParserStack::data(const std::string &data, int field)
-{
- // Check whether there is any command the data can be sent to
- if (stack.empty()) {
- throw LoggableException{"No command to receive data."};
- }
-
- // Pass the data to the current Handler instance
- stack.top()->data(data, field);
-}
-}
-
diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp
deleted file mode 100644
index efc4e4a..0000000
--- a/src/core/parser/ParserStack.hpp
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file ParserStack.hpp
- *
- * Helper classes for document or description parsers. Contains the ParserStack
- * class, which is an pushdown automaton responsible for accepting commands in
- * the correct order and calling specified handlers.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_PARSER_STACK_HPP_
-#define _OUSIA_PARSER_STACK_HPP_
-
-#include <cstdint>
-
-#include <map>
-#include <memory>
-#include <set>
-#include <stack>
-#include <vector>
-
-#include <core/common/Variant.hpp>
-#include <core/common/Logger.hpp>
-#include <core/common/Argument.hpp>
-
-#include "Parser.hpp"
-#include "ParserContext.hpp"
-#include "ParserState.hpp"
-
-namespace ousia {
-
-/**
- * Struct collecting all the data that is being passed to a Handler instance.
- */
-struct HandlerData {
- /**
- * Reference to the ParserContext instance that should be used to resolve
- * references to nodes in the Graph.
- */
- ParserContext &ctx;
-
- /**
- * Contains the name of the tag that is being handled.
- */
- const std::string name;
-
- /**
- * Contains the current state of the state machine.
- */
- const ParserState &state;
-
- /**
- * Contains the state of the state machine when the parent node was handled.
- */
- const ParserState &parentState;
-
- /**
- * Current source code location.
- */
- const SourceLocation location;
-
- /**
- * Constructor of the HandlerData class.
- *
- * @param ctx is the parser context the handler should be executed in.
- * @param name is the name of the string.
- * @param state is the state this handler was called for.
- * @param parentState is the state of the parent command.
- * @param location is the location at which the handler is created.
- */
- HandlerData(ParserContext &ctx, std::string name, const ParserState &state,
- const ParserState &parentState, const SourceLocation location)
- : ctx(ctx),
- name(std::move(name)),
- state(state),
- parentState(parentState),
- location(location){};
-};
-
-/**
- * The handler class provides a context for handling an XML tag. It has to be
- * overridden and registered in the StateStack class to form handlers for
- * concrete XML tags.
- */
-class Handler {
-private:
- /**
- * Structure containing the internal handler data.
- */
- const HandlerData handlerData;
-
-public:
- /**
- * Constructor of the Handler class.
- *
- * @param data is a structure containing all data being passed to the
- * handler.
- */
- Handler(const HandlerData &handlerData) : handlerData(handlerData){};
-
- /**
- * Virtual destructor.
- */
- virtual ~Handler(){};
-
- /**
- * Returns a reference at the ParserContext.
- *
- * @return a reference at the ParserContext.
- */
- ParserContext &context() { return handlerData.ctx; }
-
- /**
- * Returns the command name for which the handler was created.
- *
- * @return a const reference at the command name.
- */
- const std::string &name() { return handlerData.name; }
-
- /**
- * Returns a reference at the ParserScope instance.
- *
- * @return a reference at the ParserScope instance.
- */
- ParserScope &scope() { return handlerData.ctx.getScope(); }
-
- /**
- * Returns a reference at the Manager instance which manages all nodes.
- *
- * @return a referance at the Manager instance.
- */
- Manager &manager() { return handlerData.ctx.getManager(); }
-
- /**
- * Returns a reference at the Logger instance used for logging error
- * messages.
- *
- * @return a reference at the Logger instance.
- */
- Logger &logger() { return handlerData.ctx.getLogger(); }
-
- /**
- * Returns a reference at the Project Node, representing the project into
- * which the file is currently being parsed.
- *
- * @return a referance at the Project Node.
- */
- Rooted<Project> project() { return handlerData.ctx.getProject(); }
-
- /**
- * Reference at the ParserState descriptor for which this Handler was
- * created.
- *
- * @return a const reference at the constructing ParserState descriptor.
- */
- const ParserState &state() { return handlerData.state; }
-
- /**
- * Reference at the ParserState descriptor of the parent state of the state
- * for which this Handler was created. Set to ParserStates::None if there
- * is no parent state.
- *
- * @return a const reference at the parent state of the constructing
- * ParserState descriptor.
- */
- const ParserState &parentState() { return handlerData.parentState; }
-
- /**
- * Returns the current location in the source file.
- *
- * @return the current location in the source file.
- */
- SourceLocation location() { return handlerData.location; }
-
- /**
- * Called when the command that was specified in the constructor is
- * instanciated.
- *
- * @param args is a map from strings to variants (argument name and value).
- */
- virtual void start(Variant::mapType &args) = 0;
-
- /**
- * Called whenever the command for which this handler is defined ends.
- */
- virtual void end() = 0;
-
- /**
- * Called whenever raw data (int the form of a string) is available for the
- * Handler instance. In the default handler an exception is raised if the
- * received data contains non-whitespace characters.
- *
- * @param data is a pointer at the character data that is available for the
- * Handler instance.
- * @param field is the field number (the interpretation of this value
- * depends on the format that is being parsed).
- */
- virtual void data(const std::string &data, int field);
-};
-
-/**
- * HandlerConstructor is a function pointer type used to create concrete
- * instances of the Handler class.
- *
- * @param handlerData is the data that should be passed to the new handler
- * instance.
- * @return a newly created handler instance.
- */
-using HandlerConstructor = Handler *(*)(const HandlerData &handlerData);
-
-/**
- * The ParserStack class is a pushdown automaton responsible for turning a
- * command stream into a tree of Node instances.
- */
-class ParserStack {
-private:
- /**
- * Reference at the parser context.
- */
- ParserContext &ctx;
-
- /**
- * Map containing all registered command names and the corresponding
- * state descriptors.
- */
- const std::multimap<std::string, const ParserState *> &states;
-
- /**
- * Internal stack used for managing the currently active Handler instances.
- */
- std::stack<std::shared_ptr<Handler>> stack;
-
- /**
- * Used internally to get all expected command names for the current state.
- * This function is used to build error messages.
- *
- * @return a set of strings containing the names of the expected commands.
- */
- std::set<std::string> expectedCommands();
-
- /**
- * Returns the targetState for a command with the given name that can be
- * reached from for the current state.
- *
- * @param name is the name of the requested command.
- * @return nullptr if no target state was found, a pointer at the target
- *state
- * otherwise.
- */
- const ParserState *findTargetState(const std::string &name);
-
-public:
- /**
- * Creates a new instance of the ParserStack class.
- *
- * @param ctx is the parser context the parser stack is working on.
- * @param states is a map containing the command names and pointers at the
- * corresponding ParserState instances.
- */
- ParserStack(ParserContext &ctx,
- const std::multimap<std::string, const ParserState *> &states);
-
- /**
- * Tries to reconstruct the parser state from the Scope instance of the
- * ParserContext given in the constructor. This functionality is needed for
- * including files,as the Parser of the included file needs to be brought to
- + an equivalent state as the one in the including file.
- *
- * @param scope is the ParserScope instance from which the ParserState
- * should be reconstructed.
- * @param logger is the logger instance to which error messages should be
- * written.
- * @return true if the operation was sucessful, false otherwise.
- */
- bool deduceState();
-
- /**
- * Returns the state the ParserStack instance currently is in.
- *
- * @return the state of the currently active Handler instance or STATE_NONE
- * if no handler is on the stack.
- */
- const ParserState &currentState();
-
- /**
- * Returns the command name that is currently being handled.
- *
- * @return the name of the command currently being handled by the active
- * Handler instance or an empty string if no handler is currently active.
- */
- std::string currentCommandName();
-
- /**
- * Function that should be called whenever a new command starts.
- *
- * @param name is the name of the command.
- * @param args is a map from strings to variants (argument name and value).
- * Note that the passed map will be modified.
- * @param location is the location in the source file at which the command
- * starts.
- */
- void start(const std::string &name, Variant::mapType &args,
- const SourceLocation &location = SourceLocation{});
-
- /**
- * Function that should be called whenever a new command starts.
- *
- * @param name is the name of the command.
- * @param args is a map from strings to variants (argument name and value).
- * @param location is the location in the source file at which the command
- * starts.
- */
- void start(std::string name,
- const Variant::mapType &args = Variant::mapType{},
- const SourceLocation &location = SourceLocation{});
-
- /**
- * Function called whenever a command ends.
- */
- void end();
-
- /**
- * Function that should be called whenever data is available for the
- * command.
- *
- * @param data is the data that should be passed to the handler.
- * @param field is the field number (the interpretation of this value
- * depends on the format that is being parsed).
- */
- void data(const std::string &data, int field = 0);
-
- /**
- * Returns a reference to the parser context the parser stack is currently
- * working on.
- *
- * @return a reference to the parser context.
- */
- ParserContext &getContext() { return ctx; }
-};
-}
-
-#endif /* _OUSIA_PARSER_STACK_HPP_ */
-
diff --git a/src/core/parser/generic/GenericParser.cpp b/src/core/parser/generic/GenericParser.cpp
deleted file mode 100644
index e69de29..0000000
--- a/src/core/parser/generic/GenericParser.cpp
+++ /dev/null
diff --git a/src/core/parser/stack/Callbacks.cpp b/src/core/parser/stack/Callbacks.cpp
new file mode 100644
index 0000000..6ebc549
--- /dev/null
+++ b/src/core/parser/stack/Callbacks.cpp
@@ -0,0 +1,23 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Callbacks.hpp"
+
+namespace ousia {
+}
+
diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp
new file mode 100644
index 0000000..9c61000
--- /dev/null
+++ b/src/core/parser/stack/Callbacks.hpp
@@ -0,0 +1,99 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Callbacks.hpp
+ *
+ * Contains an interface defining the callbacks that can be directed from a
+ * StateHandler to the StateStack, and from the StateStack to
+ * the actual parser.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_PARSER_STACK_CALLBACKS_HPP_
+#define _OUSIA_PARSER_STACK_CALLBACKS_HPP_
+
+#include <string>
+
+#include <core/common/Whitespace.hpp>
+
+namespace ousia {
+namespace parser_stack {
+
+/**
+ * Interface defining a set of callback functions that act as a basis for the
+ * StateStackCallbacks and the ParserCallbacks.
+ */
+class Callbacks {
+public:
+ /**
+ * Virtual descructor.
+ */
+ virtual ~Callbacks() {};
+
+ /**
+ * Sets the whitespace mode that specifies how string data should be
+ * processed.
+ *
+ * @param whitespaceMode specifies one of the three WhitespaceMode constants
+ * PRESERVE, TRIM or COLLAPSE.
+ */
+ virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0;
+
+ /**
+ * Registers the given token as token that should be reported to the handler
+ * using the "token" function.
+ *
+ * @param token is the token string that should be reported.
+ */
+ virtual void registerToken(const std::string &token) = 0;
+
+ /**
+ * Unregisters the given token, it will no longer be reported to the handler
+ * using the "token" function.
+ *
+ * @param token is the token string that should be unregistered.
+ */
+ virtual void unregisterToken(const std::string &token) = 0;
+};
+
+/**
+ * Interface defining the callback functions that can be passed from a
+ * StateStack to the underlying parser.
+ */
+class ParserCallbacks : public Callbacks {
+ /**
+ * Checks whether the given token is supported by the parser. The parser
+ * returns true, if the token is supported, false if this token cannot be
+ * registered. Note that parsers that do not support the registration of
+ * tokens at all should always return "true".
+ *
+ * @param token is the token that should be checked for support.
+ * @return true if the token is generally supported (or the parser does not
+ * support registering tokens at all), false if the token is not supported,
+ * because e.g. it is a reserved token or it interferes with other tokens.
+ */
+ virtual bool supportsToken(const std::string &token) = 0;
+};
+
+}
+}
+
+#endif /* _OUSIA_PARSER_STACK_CALLBACKS_HPP_ */
+
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp
index 3647db3..d514701 100644
--- a/src/core/parser/stack/DocumentHandler.cpp
+++ b/src/core/parser/stack/DocumentHandler.cpp
@@ -16,28 +16,35 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "DocumentHandler.hpp"
-
#include <algorithm>
#include <core/common/RttiBuilder.hpp>
#include <core/common/Utils.hpp>
+#include <core/common/VariantReader.hpp>
#include <core/model/Document.hpp>
#include <core/model/Domain.hpp>
+#include <core/model/Project.hpp>
#include <core/model/Typesystem.hpp>
#include <core/parser/ParserScope.hpp>
+#include <core/parser/ParserContext.hpp>
+
+#include "DocumentHandler.hpp"
+#include "State.hpp"
namespace ousia {
+namespace parser_stack {
/* DocumentHandler */
-void DocumentHandler::start(Variant::mapType &args)
+bool DocumentHandler::start(Variant::mapType &args)
{
Rooted<Document> document =
- project()->createDocument(args["name"].asString());
+ context().getProject()->createDocument(args["name"].asString());
document->setLocation(location());
scope().push(document);
scope().setFlag(ParserFlag::POST_HEAD, false);
+
+ return true;
}
void DocumentHandler::end() { scope().pop(); }
@@ -48,7 +55,7 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode,
std::string &fieldName,
DocumentEntity *&parent, bool &inField)
{
- // check if the parent in the structure tree was an explicit field
+ // Check if the parent in the structure tree was an explicit field
// reference.
inField = parentNode->isa(&RttiTypes::DocumentField);
if (inField) {
@@ -56,10 +63,11 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode,
parentNode = scope().selectOrThrow(
{&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity});
} else {
- // if it wasn't an explicit reference, we use the default field.
+ // If it wasn't an explicit reference, we use the default field.
fieldName = DEFAULT_FIELD_NAME;
}
- // reference the parent entity explicitly.
+
+ // Reference the parent entity explicitly.
parent = nullptr;
if (parentNode->isa(&RttiTypes::StructuredEntity)) {
parent = static_cast<DocumentEntity *>(
@@ -70,17 +78,13 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode,
}
}
-static void createPath(const std::string &firstFieldName,
- const NodeVector<Node> &path, DocumentEntity *&parent)
+static void createPath(const NodeVector<Node> &path, DocumentEntity *&parent,
+ size_t p0 = 1)
{
- // add the first element
- parent = static_cast<DocumentEntity *>(
- parent->createChildStructuredEntity(path[0].cast<StructuredClass>(),
- Variant::mapType{}, firstFieldName,
- "").get());
-
+ // TODO (@benjamin): These should be pushed onto the scope and poped once
+ // the scope is left. Otherwise stuff may not be correclty resolved.
size_t S = path.size();
- for (size_t p = 2; p < S; p = p + 2) {
+ for (size_t p = p0; p < S; p = p + 2) {
parent = static_cast<DocumentEntity *>(
parent->createChildStructuredEntity(
path[p].cast<StructuredClass>(), Variant::mapType{},
@@ -88,18 +92,19 @@ static void createPath(const std::string &firstFieldName,
}
}
-static void createPath(const NodeVector<Node> &path, DocumentEntity *&parent)
+static void createPath(const std::string &firstFieldName,
+ const NodeVector<Node> &path, DocumentEntity *&parent)
{
- size_t S = path.size();
- for (size_t p = 1; p < S; p = p + 2) {
- parent = static_cast<DocumentEntity *>(
- parent->createChildStructuredEntity(
- path[p].cast<StructuredClass>(), Variant::mapType{},
- path[p - 1]->getName(), "").get());
- }
+ // Add the first element
+ parent = static_cast<DocumentEntity *>(
+ parent->createChildStructuredEntity(path[0].cast<StructuredClass>(),
+ Variant::mapType{}, firstFieldName,
+ "").get());
+
+ createPath(path, parent, 2);
}
-void DocumentChildHandler::start(Variant::mapType &args)
+bool DocumentChildHandler::start(Variant::mapType &args)
{
scope().setFlag(ParserFlag::POST_HEAD, true);
Rooted<Node> parentNode = scope().selectOrThrow(
@@ -112,7 +117,7 @@ void DocumentChildHandler::start(Variant::mapType &args)
preamble(parentNode, fieldName, parent, inField);
- // try to find a FieldDescriptor for the given tag if we are not in a
+ // Try to find a FieldDescriptor for the given tag if we are not in a
// field already. This does _not_ try to construct transparent paths
// in between.
if (!inField && parent != nullptr &&
@@ -121,7 +126,7 @@ void DocumentChildHandler::start(Variant::mapType &args)
new DocumentField(parentNode->getManager(), name(), parentNode)};
field->setLocation(location());
scope().push(field);
- return;
+ return true;
}
// Otherwise create a new StructuredEntity
@@ -187,27 +192,39 @@ void DocumentChildHandler::start(Variant::mapType &args)
}
entity->setLocation(location());
scope().push(entity);
+ return true;
}
void DocumentChildHandler::end() { scope().pop(); }
-std::pair<bool, Variant> DocumentChildHandler::convertData(
- Handle<FieldDescriptor> field, Logger &logger, const std::string &data)
+bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field,
+ Variant &data, Logger &logger)
{
- // if the content is supposed to be of type string, we can finish
- // directly.
- auto vts = field->getPrimitiveType()->getVariantTypes();
- if (std::find(vts.begin(), vts.end(), VariantType::STRING) != vts.end()) {
- return std::make_pair(true, Variant::fromString(data));
+ bool valid = true;
+ Rooted<Type> type = field->getPrimitiveType();
+
+ // If the content is supposed to be of type string, we only need to check
+ // for "magic" values -- otherwise just call the "parseGenericString"
+ // function on the string data
+ if (type->isa(&RttiTypes::StringType)) {
+ const std::string &str = data.asString();
+ // TODO: Referencing constants with "." separator should also work
+ if (Utils::isIdentifier(str)) {
+ data.markAsMagic();
+ }
+ } else {
+ // Parse the string as generic string, assign the result
+ auto res = VariantReader::parseGenericString(
+ data.asString(), logger, data.getLocation().getSourceId(),
+ data.getLocation().getStart());
+ data = res.second;
}
- // then try to parse the content using the type specification.
- auto res = field->getPrimitiveType()->read(
- data, logger, location().getSourceId(), location().getStart());
- return res;
+ // Now try to resolve the value for the primitive type
+ return valid && scope().resolveValue(data, type, logger);
}
-void DocumentChildHandler::data(const std::string &data, int fieldIdx)
+bool DocumentChildHandler::data(Variant &data)
{
Rooted<Node> parentNode = scope().selectOrThrow(
{&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity,
@@ -222,11 +239,10 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx)
Rooted<Descriptor> desc = strctParent->getDescriptor();
// The parent from which we need to connect to the primitive content.
Rooted<Node> parentClass;
- /*
- * We distinguish two cases here: One for fields that are given.
- */
+
+ // We distinguish two cases here: One for fields that are given.
if (inField) {
- // retrieve the actual FieldDescriptor
+ // Retrieve the actual FieldDescriptor
Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName);
if (field == nullptr) {
logger().error(
@@ -234,75 +250,102 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx)
fieldName + "\" exists in descriptor\"" + desc->getName() +
"\".",
location());
- return;
+ return false;
}
- // if it is a primitive field directly, try to parse the content.
+ // If it is a primitive field directly, try to parse the content.
if (field->isPrimitive()) {
- auto res = convertData(field, logger(), data);
- // add it as primitive content.
- if (res.first) {
- strctParent->createChildDocumentPrimitive(res.second,
- fieldName);
+ // Add it as primitive content.
+ if (!convertData(field, data, logger())) {
+ return false;
}
- return;
+
+ strctParent->createChildDocumentPrimitive(data, fieldName);
+ return true;
}
- // if it is not primitive we need to connect via transparent elements
+ // If it is not primitive we need to connect via transparent elements
// and default fields.
parentClass = field;
} else {
- // in case of default fields we need to construct via default fields
+ // In case of default fields we need to construct via default fields
// and maybe transparent elements.
parentClass = desc;
}
- /*
- * Search through all permitted default fields of the parent class that
- * allow primitive content at this point and could be constructed via
- * transparent intermediate entities.
- * We then try to parse the data using the type specified by the respective
- * field. If that does not work we proceed to the next possible field.
- */
- // retrieve all default fields at this point.
+
+ // Search through all permitted default fields of the parent class that
+ // allow primitive content at this point and could be constructed via
+ // transparent intermediate entities.
+
+ // Retrieve all default fields at this point, either from the field
+ // descriptor or the structured class
NodeVector<FieldDescriptor> defaultFields;
if (inField) {
defaultFields = parentClass.cast<FieldDescriptor>()->getDefaultFields();
} else {
defaultFields = parentClass.cast<StructuredClass>()->getDefaultFields();
}
+
+ // Try to parse the data using the type specified by the respective field.
+ // If that does not work we proceed to the next possible field.
std::vector<LoggerFork> forks;
for (auto field : defaultFields) {
- // then try to parse the content using the type specification.
+ // Then try to parse the content using the type specification.
forks.emplace_back(logger().fork());
- auto res = convertData(field, forks.back(), data);
- if (res.first) {
- forks.back().commit();
- // if that worked, construct the necessary path.
- if (inField) {
- NodeVector<Node> path =
- parentClass.cast<FieldDescriptor>()->pathTo(field,
- logger());
- createPath(fieldName, path, strctParent);
- } else {
- auto pathRes = desc->pathTo(field, logger());
- assert(pathRes.second);
- createPath(pathRes.first, strctParent);
- }
- // then create the primitive element.
- strctParent->createChildDocumentPrimitive(res.second);
- return;
+ if (!convertData(field, data, forks.back())) {
+ continue;
}
+
+ // The conversion worked, commit any possible warnings
+ forks.back().commit();
+
+ // Construct the necessary path
+ if (inField) {
+ NodeVector<Node> path =
+ parentClass.cast<FieldDescriptor>()->pathTo(field, logger());
+ createPath(fieldName, path, strctParent);
+ } else {
+ auto pathRes = desc->pathTo(field, logger());
+ assert(pathRes.second);
+ createPath(pathRes.first, strctParent);
+ }
+
+ // Then create the primitive element
+ strctParent->createChildDocumentPrimitive(data);
+ return true;
}
- logger().error("Could not read data with any of the possible fields:");
+
+ // No field was found that might take the data -- dump the error messages
+ // from the loggers
+ logger().error("Could not read data with any of the possible fields:",
+ SourceLocation{}, MessageMode::NO_CONTEXT);
size_t f = 0;
for (auto field : defaultFields) {
- logger().note(Utils::join(field->path(), ".") + ":", SourceLocation{},
- MessageMode::NO_CONTEXT);
+ logger().note(std::string("Field ") + Utils::join(field->path(), ".") +
+ std::string(":"),
+ SourceLocation{}, MessageMode::NO_CONTEXT);
forks[f].commit();
f++;
}
+ return false;
+}
+
+namespace States {
+const State Document = StateBuilder()
+ .parent(&None)
+ .createdNodeType(&RttiTypes::Document)
+ .elementHandler(DocumentHandler::create)
+ .arguments({Argument::String("name", "")});
+
+const State DocumentChild = StateBuilder()
+ .parents({&Document, &DocumentChild})
+ .createdNodeTypes({&RttiTypes::StructureNode,
+ &RttiTypes::AnnotationEntity,
+ &RttiTypes::DocumentField})
+ .elementHandler(DocumentChildHandler::create);
+}
}
namespace RttiTypes {
-const Rtti DocumentField =
- RttiBuilder<ousia::DocumentField>("DocumentField").parent(&Node);
+const Rtti DocumentField = RttiBuilder<ousia::parser_stack::DocumentField>(
+ "DocumentField").parent(&Node);
+}
}
-} \ No newline at end of file
diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp
index cb124aa..b339b96 100644
--- a/src/core/parser/stack/DocumentHandler.hpp
+++ b/src/core/parser/stack/DocumentHandler.hpp
@@ -19,14 +19,21 @@
/**
* @file DocumentHandler.hpp
*
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ * Contains the Handler instances used for parsing actual documents. This file
+ * declares to classes: The Document handler which parses the "document" command
+ * that introduces a new document and the "DocumentChildHandler" which parses
+ * the actual user defined tags.
+ *
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
*/
-#ifndef _OUSIA_DOCUMENT_HANDLER_HPP_
-#define _OUSIA_DOCUMENT_HANDLER_HPP_
+#ifndef _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_
+#define _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_
#include <core/common/Variant.hpp>
-#include <core/parser/ParserStack.hpp>
+#include <core/model/Node.hpp>
+
+#include "Handler.hpp"
namespace ousia {
@@ -35,51 +42,131 @@ class Rtti;
class DocumentEntity;
class FieldDescriptor;
-class DocumentHandler : public Handler {
+namespace parser_stack {
+/**
+ * The DocumentHandler class parses the "document" tag that is used to introduce
+ * a new document. Note that this tag is not mandatory in osml files -- if the
+ * first command is not a typesystem, domain or any other declarative command,
+ * the DocumentHandler will be implicitly called.
+ */
+class DocumentHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
+ /**
+ * Creates a new instance of the ImportHandler.
+ *
+ * @param handlerData is the data that is passed to the constructor of the
+ * Handler base class and used there to e.g. access the ParserContext and
+ * the Callbacks instance.
+ */
static Handler *create(const HandlerData &handlerData)
{
return new DocumentHandler{handlerData};
}
};
+/**
+ * Temporary Node that is being pushed onto the ParserScope in order to indicate
+ * the field the parser is currently in. The name of the Node is stored in the
+ * "name" field of the parent Node class.
+ */
class DocumentField : public Node {
public:
using Node::Node;
};
-class DocumentChildHandler : public Handler {
+/**
+ * The DocumentChildHandler class performs the actual parsing of the user
+ * defined elements in an Ousía document.
+ */
+class DocumentChildHandler : public StaticHandler {
private:
+ /**
+ * Code shared by both the start() and the end() method. Checks whether the
+ * parser currently is in a field and returns the name of this field.
+ *
+ * @param parentNode is the next possible parent node (a document,
+ * a structured entity, an annotation entity or a field).
+ * @param fieldName is an output parameter to which the name of the current
+ * field is written (or unchanged if we're not in a field).
+ * @param parent is an output parameter to which the parent document entity
+ * will be written.
+ * @param inField is set to true if we actually are in a field.
+ */
void preamble(Handle<Node> parentNode, std::string &fieldName,
DocumentEntity *&parent, bool &inField);
- std::pair<bool, Variant> convertData(Handle<FieldDescriptor> field,
- Logger &logger,
- const std::string &data);
+ /**
+ * Constructs all structured entites along the given path and inserts them
+ * into the document graph.
+ *
+ * @param path is a path containing an alternating series of structured
+ * classes and fields.
+ * @pram parent is the root entity from which the process should be started.
+ */
+ void createPath(const NodeVector<Node> &path, DocumentEntity *&parent);
+
+ /**
+ * Tries to convert the given data to the type that is specified in the
+ * given primitive field.
+ *
+ * @param field is the primitive field for which the data is intended.
+ * @param data is the is the data that should be converted, the result is
+ * written into this argument as output variable.
+ * @param logger is the Logger instance to which error messages should be
+ * written. Needed to allow the convertData function to write to a forked
+ * Logger instance.
+ * @return true if the operation was successful, false otherwise.
+ */
+ bool convertData(Handle<FieldDescriptor> field, Variant &data,
+ Logger &logger);
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
-
- void data(const std::string &data, int fieldIdx) override;
-
+ bool data(Variant &data) override;
+
+ /**
+ * Creates a new instance of the DocumentChildHandler.
+ *
+ * @param handlerData is the data that is passed to the constructor of the
+ * Handler base class and used there to e.g. access the ParserContext and
+ * the Callbacks instance.
+ */
static Handler *create(const HandlerData &handlerData)
{
return new DocumentChildHandler{handlerData};
}
};
+namespace States {
+/**
+ * State constant representing the "document" tag.
+ */
+extern const State Document;
+
+/**
+ * State contstant representing any user-defined element within a document.
+ */
+extern const State DocumentChild;
+}
+
+}
+
namespace RttiTypes {
+/**
+ * RttiType for the internally used DocumentField class.
+ */
extern const Rtti DocumentField;
}
+
}
-#endif
+
+#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */
+
diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp
index 6571717..a2c8eec 100644
--- a/src/core/parser/stack/DomainHandler.cpp
+++ b/src/core/parser/stack/DomainHandler.cpp
@@ -16,29 +16,48 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "DomainHandler.hpp"
-
#include <core/common/RttiBuilder.hpp>
+#include <core/model/Document.hpp>
#include <core/model/Domain.hpp>
+#include <core/model/Project.hpp>
#include <core/parser/ParserScope.hpp>
+#include <core/parser/ParserContext.hpp>
+
+#include "DocumentHandler.hpp"
+#include "DomainHandler.hpp"
+#include "State.hpp"
+#include "TypesystemHandler.hpp"
namespace ousia {
+namespace parser_stack {
/* DomainHandler */
-void DomainHandler::start(Variant::mapType &args)
+bool DomainHandler::start(Variant::mapType &args)
{
- Rooted<Domain> domain = project()->createDomain(args["name"].asString());
+ // Create the Domain node
+ Rooted<Domain> domain =
+ context().getProject()->createDomain(args["name"].asString());
domain->setLocation(location());
+ // If the domain is defined inside a document, add the reference to the
+ // document
+ Rooted<Document> document = scope().select<Document>();
+ if (document != nullptr) {
+ document->reference(domain);
+ }
+
+ // Push the typesystem onto the scope, set the POST_HEAD flag to true
scope().push(domain);
+ scope().setFlag(ParserFlag::POST_HEAD, false);
+ return true;
}
void DomainHandler::end() { scope().pop(); }
/* DomainStructHandler */
-void DomainStructHandler::start(Variant::mapType &args)
+bool DomainStructHandler::start(Variant::mapType &args)
{
scope().setFlag(ParserFlag::POST_HEAD, true);
@@ -63,12 +82,13 @@ void DomainStructHandler::start(Variant::mapType &args)
}
scope().push(structuredClass);
+ return true;
}
void DomainStructHandler::end() { scope().pop(); }
/* DomainAnnotationHandler */
-void DomainAnnotationHandler::start(Variant::mapType &args)
+bool DomainAnnotationHandler::start(Variant::mapType &args)
{
scope().setFlag(ParserFlag::POST_HEAD, true);
@@ -79,13 +99,14 @@ void DomainAnnotationHandler::start(Variant::mapType &args)
annotationClass->setLocation(location());
scope().push(annotationClass);
+ return true;
}
void DomainAnnotationHandler::end() { scope().pop(); }
/* DomainAttributesHandler */
-void DomainAttributesHandler::start(Variant::mapType &args)
+bool DomainAttributesHandler::start(Variant::mapType &args)
{
// Fetch the current typesystem and create the struct node
Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
@@ -94,13 +115,14 @@ void DomainAttributesHandler::start(Variant::mapType &args)
attrDesc->setLocation(location());
scope().push(attrDesc);
+ return true;
}
void DomainAttributesHandler::end() { scope().pop(); }
/* DomainFieldHandler */
-void DomainFieldHandler::start(Variant::mapType &args)
+bool DomainFieldHandler::start(Variant::mapType &args)
{
FieldDescriptor::FieldType type;
if (args["isSubtree"].asBool()) {
@@ -116,13 +138,14 @@ void DomainFieldHandler::start(Variant::mapType &args)
field->setLocation(location());
scope().push(field);
+ return true;
}
void DomainFieldHandler::end() { scope().pop(); }
/* DomainFieldRefHandler */
-void DomainFieldRefHandler::start(Variant::mapType &args)
+bool DomainFieldRefHandler::start(Variant::mapType &args)
{
Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
@@ -135,13 +158,14 @@ void DomainFieldRefHandler::start(Variant::mapType &args)
field.cast<FieldDescriptor>(), logger);
}
});
+ return true;
}
void DomainFieldRefHandler::end() {}
/* DomainPrimitiveHandler */
-void DomainPrimitiveHandler::start(Variant::mapType &args)
+bool DomainPrimitiveHandler::start(Variant::mapType &args)
{
Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
@@ -167,13 +191,14 @@ void DomainPrimitiveHandler::start(Variant::mapType &args)
});
scope().push(field);
+ return true;
}
void DomainPrimitiveHandler::end() { scope().pop(); }
/* DomainChildHandler */
-void DomainChildHandler::start(Variant::mapType &args)
+bool DomainChildHandler::start(Variant::mapType &args)
{
Rooted<FieldDescriptor> field = scope().selectOrThrow<FieldDescriptor>();
@@ -186,13 +211,12 @@ void DomainChildHandler::start(Variant::mapType &args)
child.cast<StructuredClass>());
}
});
+ return true;
}
-void DomainChildHandler::end() {}
-
/* DomainParentHandler */
-void DomainParentHandler::start(Variant::mapType &args)
+bool DomainParentHandler::start(Variant::mapType &args)
{
Rooted<StructuredClass> strct = scope().selectOrThrow<StructuredClass>();
@@ -200,12 +224,14 @@ void DomainParentHandler::start(Variant::mapType &args)
new DomainParent(strct->getManager(), args["ref"].asString(), strct)};
parent->setLocation(location());
scope().push(parent);
+ return true;
}
void DomainParentHandler::end() { scope().pop(); }
/* DomainParentFieldHandler */
-void DomainParentFieldHandler::start(Variant::mapType &args)
+
+bool DomainParentFieldHandler::start(Variant::mapType &args)
{
Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>();
FieldDescriptor::FieldType type;
@@ -233,13 +259,12 @@ void DomainParentFieldHandler::start(Variant::mapType &args)
field->addChild(strct.cast<StructuredClass>());
}
});
+ return true;
}
-void DomainParentFieldHandler::end() {}
-
/* DomainParentFieldRefHandler */
-void DomainParentFieldRefHandler::start(Variant::mapType &args)
+bool DomainParentFieldRefHandler::start(Variant::mapType &args)
{
Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>();
@@ -265,12 +290,104 @@ void DomainParentFieldRefHandler::start(Variant::mapType &args)
field->addChild(strct.cast<StructuredClass>());
}
});
+ return true;
}
-void DomainParentFieldRefHandler::end() {}
+namespace States {
+const State Domain = StateBuilder()
+ .parents({&None, &Document})
+ .createdNodeType(&RttiTypes::Domain)
+ .elementHandler(DomainHandler::create)
+ .arguments({Argument::String("name")});
+
+const State DomainStruct =
+ StateBuilder()
+ .parent(&Domain)
+ .createdNodeType(&RttiTypes::StructuredClass)
+ .elementHandler(DomainStructHandler::create)
+ .arguments({Argument::String("name"),
+ Argument::Cardinality("cardinality", Cardinality::any()),
+ Argument::Bool("isRoot", false),
+ Argument::Bool("transparent", false),
+ Argument::String("isa", "")});
+
+const State DomainAnnotation =
+ StateBuilder()
+ .parent(&Domain)
+ .createdNodeType(&RttiTypes::AnnotationClass)
+ .elementHandler(DomainAnnotationHandler::create)
+ .arguments({Argument::String("name")});
+
+const State DomainAttributes =
+ StateBuilder()
+ .parents({&DomainStruct, &DomainAnnotation})
+ .createdNodeType(&RttiTypes::StructType)
+ .elementHandler(DomainAttributesHandler::create)
+ .arguments({});
+
+const State DomainAttribute =
+ StateBuilder()
+ .parent(&DomainAttributes)
+ .elementHandler(TypesystemStructFieldHandler::create)
+ .arguments({Argument::String("name"), Argument::String("type"),
+ Argument::Any("default", Variant::fromObject(nullptr))});
+
+const State DomainField = StateBuilder()
+ .parents({&DomainStruct, &DomainAnnotation})
+ .createdNodeType(&RttiTypes::FieldDescriptor)
+ .elementHandler(DomainFieldHandler::create)
+ .arguments({Argument::String("name", ""),
+ Argument::Bool("isSubtree", false),
+ Argument::Bool("optional", false)});
+
+const State DomainFieldRef =
+ StateBuilder()
+ .parents({&DomainStruct, &DomainAnnotation})
+ .createdNodeType(&RttiTypes::FieldDescriptor)
+ .elementHandler(DomainFieldRefHandler::create)
+ .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)});
+
+const State DomainStructPrimitive =
+ StateBuilder()
+ .parents({&DomainStruct, &DomainAnnotation})
+ .createdNodeType(&RttiTypes::FieldDescriptor)
+ .elementHandler(DomainPrimitiveHandler::create)
+ .arguments(
+ {Argument::String("name", ""), Argument::Bool("isSubtree", false),
+ Argument::Bool("optional", false), Argument::String("type")});
+
+const State DomainStructChild = StateBuilder()
+ .parent(&DomainField)
+ .elementHandler(DomainChildHandler::create)
+ .arguments({Argument::String("ref")});
+
+const State DomainStructParent =
+ StateBuilder()
+ .parent(&DomainStruct)
+ .createdNodeType(&RttiTypes::DomainParent)
+ .elementHandler(DomainParentHandler::create)
+ .arguments({Argument::String("ref")});
+
+const State DomainStructParentField =
+ StateBuilder()
+ .parent(&DomainStructParent)
+ .createdNodeType(&RttiTypes::FieldDescriptor)
+ .elementHandler(DomainParentFieldHandler::create)
+ .arguments({Argument::String("name", ""),
+ Argument::Bool("isSubtree", false),
+ Argument::Bool("optional", false)});
+
+const State DomainStructParentFieldRef =
+ StateBuilder()
+ .parent(&DomainStructParent)
+ .createdNodeType(&RttiTypes::FieldDescriptor)
+ .elementHandler(DomainParentFieldRefHandler::create)
+ .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)});
+}
+}
namespace RttiTypes {
-const Rtti DomainParent =
- RttiBuilder<ousia::DomainParent>("DomainParent").parent(&Node);
+const Rtti DomainParent = RttiBuilder<ousia::parser_stack::DomainParent>(
+ "DomainParent").parent(&Node);
}
}
diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp
index 7398812..76172d6 100644
--- a/src/core/parser/stack/DomainHandler.hpp
+++ b/src/core/parser/stack/DomainHandler.hpp
@@ -19,26 +19,34 @@
/**
* @file DomainHandler.hpp
*
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ * Contains the Handler classes used for parsing Domain descriptors. This
+ * includes the "domain" tag and all describing tags below the "domain" tag.
+ *
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
*/
#ifndef _OUSIA_DOMAIN_HANDLER_HPP_
#define _OUSIA_DOMAIN_HANDLER_HPP_
#include <core/common/Variant.hpp>
-#include <core/parser/ParserStack.hpp>
+#include <core/model/Node.hpp>
+
+#include "Handler.hpp"
namespace ousia {
// Forward declarations
class Rtti;
-class DomainHandler : public Handler {
-public:
- using Handler::Handler;
+namespace parser_stack {
+
+// TODO: Documentation
- void start(Variant::mapType &args) override;
+class DomainHandler : public StaticHandler {
+public:
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
static Handler *create(const HandlerData &handlerData)
@@ -47,12 +55,11 @@ public:
}
};
-class DomainStructHandler : public Handler {
+class DomainStructHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
static Handler *create(const HandlerData &handlerData)
@@ -61,12 +68,11 @@ public:
}
};
-class DomainAnnotationHandler : public Handler {
+class DomainAnnotationHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
static Handler *create(const HandlerData &handlerData)
@@ -75,12 +81,11 @@ public:
}
};
-class DomainAttributesHandler : public Handler {
+class DomainAttributesHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
static Handler *create(const HandlerData &handlerData)
@@ -89,12 +94,11 @@ public:
}
};
-class DomainFieldHandler : public Handler {
+class DomainFieldHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
static Handler *create(const HandlerData &handlerData)
@@ -103,12 +107,11 @@ public:
}
};
-class DomainFieldRefHandler : public Handler {
+class DomainFieldRefHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
static Handler *create(const HandlerData &handlerData)
@@ -117,12 +120,11 @@ public:
}
};
-class DomainPrimitiveHandler : public Handler {
+class DomainPrimitiveHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
static Handler *create(const HandlerData &handlerData)
@@ -131,13 +133,11 @@ public:
}
};
-class DomainChildHandler : public Handler {
+class DomainChildHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
- void end() override;
+ bool start(Variant::mapType &args) override;
static Handler *create(const HandlerData &handlerData)
{
@@ -150,16 +150,11 @@ public:
using Node::Node;
};
-namespace RttiTypes {
-extern const Rtti DomainParent;
-}
-
-class DomainParentHandler : public Handler {
+class DomainParentHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
static Handler *create(const HandlerData &handlerData)
@@ -168,13 +163,11 @@ public:
}
};
-class DomainParentFieldHandler : public Handler {
+class DomainParentFieldHandler : public StaticHandler {
public:
- using Handler::Handler;
+ using StaticHandler::StaticHandler;
- void start(Variant::mapType &args) override;
-
- void end() override;
+ bool start(Variant::mapType &args) override;
static Handler *create(const HandlerData &handlerData)
{
@@ -182,18 +175,83 @@ public:
}
};
-class DomainParentFieldRefHandler : public Handler {
+class DomainParentFieldRefHandler : public StaticHandler {
public:
- using Handler::Handler;
+ using StaticHandler::StaticHandler;
- void start(Variant::mapType &args) override;
-
- void end() override;
+ bool start(Variant::mapType &args) override;
static Handler *create(const HandlerData &handlerData)
{
return new DomainParentFieldRefHandler{handlerData};
}
};
+
+namespace States {
+/**
+ * State representing a "domain" struct.
+ */
+extern const State Domain;
+
+/**
+ * State representing a "struct" tag within a domain description.
+ */
+extern const State DomainStruct;
+
+/**
+ * State representing an "annotation" tag within a domain description.
+ */
+extern const State DomainAnnotation;
+
+/**
+ * State representing an "attributes" tag within a structure or annotation.
+ */
+extern const State DomainAttributes;
+
+/**
+ * State representing an "attribute" tag within the "attributes".
+ */
+extern const State DomainAttribute;
+
+/**
+ * State representing a "field" tag within a structure or annotation.
+ */
+extern const State DomainField;
+
+/**
+ * State representing a "fieldref" tag within a structure or annotation.
+ */
+extern const State DomainFieldRef;
+
+/**
+ * State representing a "primitive" tag within a structure or annotation.
+ */
+extern const State DomainStructPrimitive;
+
+/**
+ * State representing a "child" tag within a structure or annotation.
+ */
+extern const State DomainStructChild;
+
+/**
+ * State representing a "parent" tag within a structure or annotation.
+ */
+extern const State DomainStructParent;
+
+/**
+ * State representing a "field" tag within a "parent" tag.
+ */
+extern const State DomainStructParentField;
+
+/**
+ * State representing a "fieldRef" tag within a "parent" tag.
+ */
+extern const State DomainStructParentFieldRef;
+}
+}
+
+namespace RttiTypes {
+extern const Rtti DomainParent;
+}
}
#endif
diff --git a/src/core/parser/stack/GenericParserStates.cpp b/src/core/parser/stack/GenericParserStates.cpp
new file mode 100644
index 0000000..69a6e0e
--- /dev/null
+++ b/src/core/parser/stack/GenericParserStates.cpp
@@ -0,0 +1,53 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "DocumentHandler.hpp"
+#include "DomainHandler.hpp"
+#include "GenericParserStates.hpp"
+#include "ImportIncludeHandler.hpp"
+#include "TypesystemHandler.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+const std::multimap<std::string, const State *> GenericParserStates{
+ {"document", &States::Document},
+ {"*", &States::DocumentChild},
+ {"domain", &States::Domain},
+ {"struct", &States::DomainStruct},
+ {"annotation", &States::DomainAnnotation},
+ {"attributes", &States::DomainAttributes},
+ {"attribute", &States::DomainAttribute},
+ {"field", &States::DomainField},
+ {"fieldRef", &States::DomainFieldRef},
+ {"primitive", &States::DomainStructPrimitive},
+ {"childRef", &States::DomainStructChild},
+ {"parentRef", &States::DomainStructParent},
+ {"field", &States::DomainStructParentField},
+ {"fieldRef", &States::DomainStructParentFieldRef},
+ {"typesystem", &States::Typesystem},
+ {"enum", &States::TypesystemEnum},
+ {"entry", &States::TypesystemEnumEntry},
+ {"struct", &States::TypesystemStruct},
+ {"field", &States::TypesystemStructField},
+ {"constant", &States::TypesystemConstant},
+ {"import", &States::Import},
+ {"include", &States::Include}};
+}
+}
+
diff --git a/src/core/parser/generic/GenericParser.hpp b/src/core/parser/stack/GenericParserStates.hpp
index 4f29f94..552eee5 100644
--- a/src/core/parser/generic/GenericParser.hpp
+++ b/src/core/parser/stack/GenericParserStates.hpp
@@ -17,33 +17,33 @@
*/
/**
- * @file GenericParser.hpp
+ * @file GenericParserStates.hpp
*
- * The GenericParser class builds an abstraction layer that separates the
- * underlying document format (e.g. osdm or osdmx) from the actual process of
- * building the document model. It provides a set of genric functions that
- * should be called by the inheriting concrete parser class, e.g. indicating a
- * command with parameters, the start/end of a field or the start/end of an
- * annotation. The GenericParser maintains an internal stack of
- * ParserStateHandlers and relays the commands to the elements of this stack.
+ * Contains a multimap which maps between tag/command names to the corresponding
+ * state descriptors. This multimap is used to initialize the push down
+ * automaton residing inside the "Stack" class.
*
* @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
*/
-#ifndef _OUSIA_GENERIC_PARSER_HPP_
-#define _OUSIA_GENERIC_PARSER_HPP_
+#ifndef _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_
+#define _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_
-#include <core/parser/Parseer.hpp>
+#include <string>
+#include <map>
namespace ousia {
+namespace parser_stack {
-class GenericParser : public Parser {
-
-
-
-};
+// Forward declarations
+class State;
+/**
+ * Map between tagnames and references to the corresponding State instances.
+ */
+extern const std::multimap<std::string, const State *> GenericParserStates;
+}
}
-#endif _OUSIA_GENERIC_PARSER_HPP_
+#endif /* _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ */
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
new file mode 100644
index 0000000..bf5d4ea
--- /dev/null
+++ b/src/core/parser/stack/Handler.cpp
@@ -0,0 +1,254 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <core/common/Exceptions.hpp>
+#include <core/common/Logger.hpp>
+#include <core/parser/ParserContext.hpp>
+
+#include "Callbacks.hpp"
+#include "Handler.hpp"
+#include "State.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+/* Class HandlerData */
+
+HandlerData::HandlerData(ParserContext &ctx, /*Callbacks &callbacks,*/
+ const std::string &name, const State &state,
+ const SourceLocation &location)
+ : ctx(ctx),
+ /*callbacks(callbacks),*/
+ name(name),
+ state(state),
+ location(location)
+{
+}
+
+/* Class Handler */
+
+Handler::Handler(const HandlerData &handlerData)
+ : handlerData(handlerData), internalLogger(nullptr)
+{
+}
+
+Handler::~Handler() {}
+
+ParserContext &Handler::context() { return handlerData.ctx; }
+
+ParserScope &Handler::scope() { return handlerData.ctx.getScope(); }
+
+Manager &Handler::manager() { return handlerData.ctx.getManager(); }
+
+Logger &Handler::logger()
+{
+ if (internalLogger != nullptr) {
+ return *internalLogger;
+ }
+ return handlerData.ctx.getLogger();
+}
+
+const SourceLocation &Handler::location() const { return handlerData.location; }
+
+const std::string &Handler::name() const { return handlerData.name; }
+
+void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode)
+{
+ /*handlerData.callbacks.setWhitespaceMode(whitespaceMode);*/
+}
+
+void Handler::registerToken(const std::string &token)
+{
+ /*handlerData.callbacks.registerToken(token);*/
+}
+
+void Handler::unregisterToken(const std::string &token)
+{
+ /*handlerData.callbacks.unregisterToken(token);*/
+}
+
+const std::string &Handler::getName() const { return name(); }
+
+const State &Handler::getState() const { return handlerData.state; }
+
+void Handler::setLogger(Logger &logger) { internalLogger = &logger; }
+
+void Handler::resetLogger() { internalLogger = nullptr; }
+
+const SourceLocation &Handler::getLocation() const { return location(); }
+
+/* Class EmptyHandler */
+
+bool EmptyHandler::start(Variant::mapType &args)
+{
+ // Just accept anything
+ return true;
+}
+
+void EmptyHandler::end()
+{
+ // Do nothing if a command ends
+}
+
+bool EmptyHandler::fieldStart(bool &isDefaultField, size_t fieldIndex)
+{
+ // Accept any field
+ return true;
+}
+
+void EmptyHandler::fieldEnd()
+{
+ // Do not handle fields
+}
+
+bool EmptyHandler::annotationStart(const Variant &className,
+ Variant::mapType &args)
+{
+ // Accept any data
+ return true;
+}
+
+bool EmptyHandler::annotationEnd(const Variant &className,
+ const Variant &elementName)
+{
+ // Accept any annotation
+ return true;
+}
+
+bool EmptyHandler::data(Variant &data)
+{
+ // Support any data
+ return true;
+}
+
+Handler *EmptyHandler::create(const HandlerData &handlerData)
+{
+ return new EmptyHandler(handlerData);
+}
+
+/* Class StaticHandler */
+
+bool StaticHandler::start(Variant::mapType &args)
+{
+ // Do nothing in the default implementation, accept anything
+ return true;
+}
+
+void StaticHandler::end()
+{
+ // Do nothing here
+}
+
+bool StaticHandler::fieldStart(bool &isDefault, size_t fieldIdx)
+{
+ // Return true if either the default field is requested or the field index
+ // is zero. This simulates that there is exactly one field (a default field)
+ if (fieldIdx == 0) {
+ isDefault = true;
+ return true;
+ }
+ return false;
+}
+
+void StaticHandler::fieldEnd()
+{
+ // Do nothing here
+}
+
+bool StaticHandler::annotationStart(const Variant &className,
+ Variant::mapType &args)
+{
+ // No annotations supported
+ return false;
+}
+
+bool StaticHandler::annotationEnd(const Variant &className,
+ const Variant &elementName)
+{
+ // No annotations supported
+ return false;
+}
+
+bool StaticHandler::data(Variant &data)
+{
+ logger().error("Did not expect any data here", data);
+ return false;
+}
+
+/* Class StaticFieldHandler */
+
+StaticFieldHandler::StaticFieldHandler(const HandlerData &handlerData,
+ const std::string &argName)
+ : StaticHandler(handlerData), argName(argName), handled(false)
+{
+}
+
+bool StaticFieldHandler::start(Variant::mapType &args)
+{
+ if (!argName.empty()) {
+ auto it = args.find(argName);
+ if (it != args.end() && !it->second.toString().empty()) {
+ handled = true;
+ doHandle(it->second, args);
+ return true;
+ }
+ }
+
+ this->args = args;
+ return true;
+}
+
+void StaticFieldHandler::end()
+{
+ if (!handled) {
+ if (!argName.empty()) {
+ logger().error(std::string("Required argument \"") + argName +
+ std::string("\" is missing."),
+ location());
+ } else {
+ logger().error("Command requires data, but no data given",
+ location());
+ }
+ }
+}
+
+bool StaticFieldHandler::data(Variant &data)
+{
+ // Call the doHandle function if this has not been done before
+ if (!handled) {
+ handled = true;
+ doHandle(data, args);
+ return true;
+ }
+
+ // The doHandle function was already called, print an error message
+ logger().error(
+ std::string("Found data, but the corresponding argument \"") + argName +
+ std::string("\" was already specified"),
+ data);
+
+ // Print the location at which the attribute was originally specified
+ auto it = args.find(argName);
+ if (it != args.end()) {
+ logger().note(std::string("Attribute was specified here:"), it->second);
+ }
+ return false;
+}
+}
+}
+
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
new file mode 100644
index 0000000..7cda7a4
--- /dev/null
+++ b/src/core/parser/stack/Handler.hpp
@@ -0,0 +1,421 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _OUSIA_PARSER_STACK_HANDLER_HPP_
+#define _OUSIA_PARSER_STACK_HANDLER_HPP_
+
+#include <string>
+
+#include <core/common/Location.hpp>
+#include <core/common/Variant.hpp>
+#include <core/common/Whitespace.hpp>
+
+namespace ousia {
+
+// Forward declarations
+class ParserScope;
+class ParserContext;
+class Logger;
+
+namespace parser_stack {
+
+// More forward declarations
+class Callbacks;
+class State;
+
+/**
+ * Class collecting all the data that is being passed to a Handler
+ * instance.
+ */
+class HandlerData {
+public:
+ /**
+ * Reference to the ParserContext instance that should be used to resolve
+ * references to nodes in the Graph.
+ */
+ ParserContext &ctx;
+
+ /**
+ * Reference at an instance of the Callbacks class, used for
+ * modifying the behaviour of the parser (like registering tokens, setting
+ * the data type or changing the whitespace handling mode).
+ */
+ // Callbacks &callbacks;
+
+ /**
+ * Contains the name of the command that is being handled.
+ */
+ std::string name;
+
+ /**
+ * Contains the current state of the state machine.
+ */
+ const State &state;
+
+ /**
+ * Current source code location.
+ */
+ SourceLocation location;
+
+ /**
+ * Constructor of the HandlerData class.
+ *
+ * @param ctx is the parser context the handler should be executed in.
+ * @param callbacks is an instance of Callbacks used to notify
+ * the parser about certain state changes.
+ * @param name is the name of the string.
+ * @param state is the state this handler was called for.
+ * @param location is the location at which the handler is created.
+ */
+ HandlerData(ParserContext &ctx,
+ /*Callbacks &callbacks,*/ const std::string &name,
+ const State &state, const SourceLocation &location);
+};
+
+/**
+ * The Handler class provides a context for handling a generic stack element.
+ * It has to beoverridden and registered in the StateStack class to form
+ * handlers for concrete XML tags.
+ */
+class Handler {
+private:
+ /**
+ * Structure containing the internal handler data.
+ */
+ const HandlerData handlerData;
+
+ /**
+ * Reference at the current logger. If not nullptr, this will override the
+ * logger from the ParserContext specified in the handlerData.
+ */
+ Logger *internalLogger;
+
+protected:
+ /**
+ * Constructor of the Handler class.
+ *
+ * @param data is a structure containing all data being passed to the
+ * handler.
+ */
+ Handler(const HandlerData &handlerData);
+
+ /**
+ * Returns a reference at the ParserContext.
+ *
+ * @return a reference at the ParserContext.
+ */
+ ParserContext &context();
+
+ /**
+ * Returns a reference at the ParserScope instance.
+ *
+ * @return a reference at the ParserScope instance.
+ */
+ ParserScope &scope();
+
+ /**
+ * Returns a reference at the Manager instance which manages all nodes.
+ *
+ * @return a referance at the Manager instance.
+ */
+ Manager &manager();
+
+ /**
+ * Returns a reference at the Logger instance used for logging error
+ * messages.
+ *
+ * @return a reference at the Logger instance.
+ */
+ Logger &logger();
+
+ /**
+ * Returns the location of the element in the source file, for which this
+ * Handler was created.
+ *
+ * @return the location of the Handler in the source file.
+ */
+ const SourceLocation &location() const;
+
+ /**
+ * Returns the command name for which the handler was created.
+ *
+ * @return a const reference at the command name.
+ */
+ const std::string &name() const;
+
+public:
+ /**
+ * Virtual destructor.
+ */
+ virtual ~Handler();
+
+ /**
+ * Calls the corresponding function in the Callbacks instance. Sets the
+ * whitespace mode that specifies how string data should be processed. The
+ * calls to this function are placed on a stack by the underlying Stack
+ * class.
+ *
+ * @param whitespaceMode specifies one of the three WhitespaceMode constants
+ * PRESERVE, TRIM or COLLAPSE.
+ */
+ void setWhitespaceMode(WhitespaceMode whitespaceMode);
+
+ /**
+ * Calls the corresponding function in the Callbacks instance.
+ * Registers the given token as token that should be reported to the handler
+ * using the "token" function.
+ *
+ * @param token is the token string that should be reported.
+ */
+ void registerToken(const std::string &token);
+
+ /**
+ * Calls the corresponding function in the Callbacks instance.
+ * Unregisters the given token, it will no longer be reported to the handler
+ * using the "token" function.
+ *
+ * @param token is the token string that should be unregistered.
+ */
+ void unregisterToken(const std::string &token);
+
+ /**
+ * Returns the command name for which the handler was created.
+ *
+ * @return a const reference at the command name.
+ */
+ const std::string &getName() const;
+
+ /**
+ * Reference at the State descriptor for which this Handler was created.
+ *
+ * @return a const reference at the constructing State descriptor.
+ */
+ const State &getState() const;
+
+ /**
+ * Sets the internal logger to the given logger instance.
+ *
+ * @param logger is the Logger instance to which the logger should be set.
+ */
+ void setLogger(Logger &logger);
+
+ /**
+ * Resets the logger instance to the logger instance provided in the
+ * ParserContext.
+ */
+ void resetLogger();
+
+ /**
+ * Returns the location of the element in the source file, for which this
+ * Handler was created.
+ *
+ * @return the location of the Handler in the source file.
+ */
+ const SourceLocation &getLocation() const;
+
+ /**
+ * Called when the command that was specified in the constructor is
+ * instanciated.
+ *
+ * @param args is a map from strings to variants (argument name and value).
+ * @return true if the handler was successful in starting the element it
+ * represents, false otherwise.
+ */
+ virtual bool start(Variant::mapType &args) = 0;
+
+ /**
+ * Called before the command for which this handler is defined ends (is
+ * forever removed from the stack).
+ */
+ virtual void end() = 0;
+
+ /**
+ * Called when a new field starts, while the handler is active. This
+ * function should return true if the field is supported, false otherwise.
+ * No error should be logged if the field cannot be started, the caller will
+ * take care of that (since it is always valid to start a default field,
+ * even though the corresponding structure does not have a field, as long as
+ * no data is fed into the field).
+ *
+ * @param isDefault is set to true if the field that is being started is the
+ * default/tree field. The handler should set the value of this variable to
+ * true if the referenced field is indeed the default field.
+ * @param fieldIdx is the numerical index of the field.
+ */
+ virtual bool fieldStart(bool &isDefault, size_t fieldIdx) = 0;
+
+ /**
+ * Called when a previously opened field ends, while the handler is active.
+ * Note that a "fieldStart" and "fieldEnd" are always called alternately.
+ */
+ virtual void fieldEnd() = 0;
+
+ /**
+ * Called whenever an annotation starts while this handler is active. The
+ * function should return true if starting the annotation was successful,
+ * false otherwise.
+ *
+ * @param className is a string variant containing the name of the
+ * annotation class and the location of the name in the source code.
+ * @param args is a map from strings to variants (argument name and value).
+ * @return true if the mentioned annotation could be started here, false
+ * if an error occurred.
+ */
+ virtual bool annotationStart(const Variant &className,
+ Variant::mapType &args) = 0;
+
+ /**
+ * Called whenever an annotation ends while this handler is active. The
+ * function should return true if ending the annotation was successful,
+ * false otherwise.
+ *
+ * @param className is a string variant containing the name of the
+ * annotation class and the location of the class name in the source code.
+ * @param elementName is a string variant containing the name of the
+ * annotation class and the location of the element name in the source code.
+ * @return true if the mentioned annotation could be started here, false if
+ * an error occurred.
+ */
+ virtual bool annotationEnd(const Variant &className,
+ const Variant &elementName) = 0;
+
+ /**
+ * Called whenever raw data (int the form of a string) is available for the
+ * Handler instance. Should return true if the data could be handled, false
+ * otherwise.
+ *
+ * @param data is a string variant containing the character data and its
+ * location.
+ * @return true if the data could be handled, false otherwise.
+ */
+ virtual bool data(Variant &data) = 0;
+};
+
+/**
+ * HandlerConstructor is a function pointer type used to create concrete
+ * instances of the Handler class.
+ *
+ * @param handlerData is the data that should be passed to the new handler
+ * instance.
+ * @return a newly created handler instance.
+ */
+using HandlerConstructor = Handler *(*)(const HandlerData &handlerData);
+
+/**
+ * The EmptyHandler class is used in case no element handler is specified in
+ * the State descriptor. It just accepts all data and does nothing.
+ */
+class EmptyHandler : public Handler {
+protected:
+ using Handler::Handler;
+
+public:
+ bool start(Variant::mapType &args) override;
+ void end() override;
+ bool fieldStart(bool &isDefault, size_t fieldIdx) override;
+ void fieldEnd() override;
+ bool annotationStart(const Variant &className,
+ Variant::mapType &args) override;
+ bool annotationEnd(const Variant &className,
+ const Variant &elementName) override;
+ bool data(Variant &data) override;
+
+ /**
+ * Creates an instance of the EmptyHandler class.
+ */
+ static Handler *create(const HandlerData &handlerData);
+};
+
+/**
+ * The StaticHandler class is used to handle predifined commands which do
+ * neither support annotations, nor multiple fields. Child classes can decide
+ * whether a single data field should be used.
+ */
+class StaticHandler : public Handler {
+protected:
+ using Handler::Handler;
+
+public:
+ bool start(Variant::mapType &args) override;
+ void end() override;
+ bool fieldStart(bool &isDefault, size_t fieldIdx) override;
+ void fieldEnd() override;
+ bool annotationStart(const Variant &className,
+ Variant::mapType &args) override;
+ bool annotationEnd(const Variant &className,
+ const Variant &elementName) override;
+ bool data(Variant &data) override;
+};
+
+/**
+ * The StaticFieldHandler class is used to handle predifined commands which do
+ * neither support annotations, nor multiple fields. Additionally, it captures a
+ * data entry from a single default field.
+ */
+class StaticFieldHandler : public StaticHandler {
+private:
+ /**
+ * Set to the name of the data argument that should be used instead of the
+ * data field, if no data field is given.
+ */
+ std::string argName;
+
+ /**
+ * Set to true, once the "doHandle" function has been called.
+ */
+ bool handled;
+
+ /**
+ * Map containing the arguments given in the start function.
+ */
+ Variant::mapType args;
+
+protected:
+ /**
+ * Constructor of the StaticFieldHandler class.
+ *
+ * @param handlerData is a structure containing the internal data that
+ * should be stored inside the handler.
+ * @param name of the data argument that -- if present -- should be used
+ * instead of the data field. If empty, data is not captured from the
+ * arguments. If both, data in the data field and the argument, are given,
+ * this results in an error.
+ */
+ StaticFieldHandler(const HandlerData &handlerData,
+ const std::string &argName);
+
+ /**
+ * Function that should be overriden in order to handle the field data and
+ * the other arguments. This function is not called if no data was given.
+ *
+ * @param fieldData is the captured field data.
+ * @param args are the arguments that were given in the "start" function.
+ */
+ virtual void doHandle(const Variant &fieldData,
+ Variant::mapType &args) = 0;
+
+public:
+ bool start(Variant::mapType &args) override;
+ void end() override;
+ bool data(Variant &data) override;
+};
+}
+}
+
+#endif /* _OUSIA_PARSER_STACK_HANDLER_HPP_ */
+
diff --git a/src/core/parser/stack/ImportIncludeHandler.cpp b/src/core/parser/stack/ImportIncludeHandler.cpp
index 94ee82d..d1ea97d 100644
--- a/src/core/parser/stack/ImportIncludeHandler.cpp
+++ b/src/core/parser/stack/ImportIncludeHandler.cpp
@@ -16,50 +16,22 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "ImportIncludeHandler.hpp"
-
+#include <core/model/RootNode.hpp>
#include <core/parser/ParserScope.hpp>
+#include <core/parser/ParserContext.hpp>
-namespace ousia {
-
-/* ImportIncludeHandler */
-
-void ImportIncludeHandler::start(Variant::mapType &args)
-{
- rel = args["rel"].asString();
- type = args["type"].asString();
- src = args["src"].asString();
- srcInArgs = !src.empty();
-}
+#include "DomainHandler.hpp"
+#include "DocumentHandler.hpp"
+#include "ImportIncludeHandler.hpp"
+#include "State.hpp"
+#include "TypesystemHandler.hpp"
-void ImportIncludeHandler::data(const std::string &data, int field)
-{
- if (srcInArgs) {
- logger().error("\"src\" attribute has already been set");
- return;
- }
- if (field != 0) {
- logger().error("Command has only one field.");
- return;
- }
- src.append(data);
-}
+namespace ousia {
+namespace parser_stack {
/* ImportHandler */
-void ImportHandler::start(Variant::mapType &args)
-{
- ImportIncludeHandler::start(args);
-
- // Make sure imports are still possible
- if (scope().getFlag(ParserFlag::POST_HEAD)) {
- logger().error("Imports must be listed before other commands.",
- location());
- return;
- }
-}
-
-void ImportHandler::end()
+void ImportHandler::doHandle(const Variant &fieldData, Variant::mapType &args)
{
// Fetch the last node and check whether an import is valid at this
// position
@@ -75,8 +47,9 @@ void ImportHandler::end()
// Perform the actual import, register the imported node within the leaf
// node
- Rooted<Node> imported =
- context().import(src, type, rel, leafRootNode->getReferenceTypes());
+ Rooted<Node> imported = context().import(
+ fieldData.asString(), args["type"].asString(), args["rel"].asString(),
+ leafRootNode->getReferenceTypes());
if (imported != nullptr) {
leafRootNode->reference(imported);
}
@@ -84,13 +57,26 @@ void ImportHandler::end()
/* IncludeHandler */
-void IncludeHandler::start(Variant::mapType &args)
+void IncludeHandler::doHandle(const Variant &fieldData, Variant::mapType &args)
{
- ImportIncludeHandler::start(args);
+ context().include(fieldData.asString(), args["type"].asString(),
+ args["rel"].asString(), {&RttiTypes::Node});
}
-void IncludeHandler::end()
-{
- context().include(src, type, rel, {&RttiTypes::Node});
+namespace States {
+const State Import =
+ StateBuilder()
+ .parents({&Document, &Typesystem, &Domain})
+ .elementHandler(ImportHandler::create)
+ .arguments({Argument::String("rel", ""), Argument::String("type", ""),
+ Argument::String("src", "")});
+
+const State Include =
+ StateBuilder()
+ .parent(&All)
+ .elementHandler(IncludeHandler::create)
+ .arguments({Argument::String("rel", ""), Argument::String("type", ""),
+ Argument::String("src", "")});
+}
}
}
diff --git a/src/core/parser/stack/ImportIncludeHandler.hpp b/src/core/parser/stack/ImportIncludeHandler.hpp
index b0767be..6168639 100644
--- a/src/core/parser/stack/ImportIncludeHandler.hpp
+++ b/src/core/parser/stack/ImportIncludeHandler.hpp
@@ -19,6 +19,9 @@
/**
* @file ImportIncludeHandler.hpp
*
+ * Contains the conceptually similar handlers for the "include" and "import"
+ * commands.
+ *
* @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
*/
@@ -26,51 +29,78 @@
#define _OUSIA_IMPORT_INCLUDE_HANDLER_HPP_
#include <core/common/Variant.hpp>
-#include <core/parser/ParserStack.hpp>
-
-namespace ousia {
-class ImportIncludeHandler : public Handler {
-protected:
- bool srcInArgs = false;
- std::string rel;
- std::string type;
- std::string src;
+#include "Handler.hpp"
-public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
-
- void data(const std::string &data, int field) override;
-};
+namespace ousia {
+namespace parser_stack {
-class ImportHandler : public ImportIncludeHandler {
+/**
+ * The ImportHandler is responsible for handling the "import" command. An import
+ * creates a reference to a specified file. The specified file is parsed (if
+ * this has not already been done) outside of the context of the current file.
+ * If the specified resource has already been parsed, a reference to the already
+ * parsed file is inserted. Imports are only possible before no other content
+ * has been parsed.
+ */
+class ImportHandler : public StaticFieldHandler {
public:
- using ImportIncludeHandler::ImportIncludeHandler;
-
- void start(Variant::mapType &args) override;
-
- void end() override;
-
+ using StaticFieldHandler::StaticFieldHandler;
+
+ void doHandle(const Variant &fieldData,
+ Variant::mapType &args) override;
+
+ /**
+ * Creates a new instance of the ImportHandler.
+ *
+ * @param handlerData is the data that is passed to the constructor of the
+ * Handler base class and used there to e.g. access the ParserContext and
+ * the Callbacks instance.
+ */
static Handler *create(const HandlerData &handlerData)
{
- return new ImportHandler{handlerData};
+ return new ImportHandler{handlerData, "src"};
}
};
-class IncludeHandler : public ImportIncludeHandler {
+/**
+ * The IncludeHandler is responsible for handling the "include" command. The
+ * included file is parsed in the context of the current file and will change
+ * the content that is currently being parsed. Includes are possible at (almost)
+ * any position in the source file.
+ */
+class IncludeHandler : public StaticFieldHandler {
public:
- using ImportIncludeHandler::ImportIncludeHandler;
-
- void start(Variant::mapType &args) override;
-
- void end() override;
-
+ using StaticFieldHandler::StaticFieldHandler;
+
+ void doHandle(const Variant &fieldData,
+ Variant::mapType &args) override;
+
+ /**
+ * Creates a new instance of the IncludeHandler.
+ *
+ * @param handlerData is the data that is passed to the constructor of the
+ * Handler base class and used there to e.g. access the ParserContext and
+ * the Callbacks instance.
+ */
static Handler *create(const HandlerData &handlerData)
{
- return new IncludeHandler{handlerData};
+ return new IncludeHandler{handlerData, "src"};
}
};
+
+namespace States {
+/**
+ * State representing the "import" command.
+ */
+extern const State Import;
+
+/**
+ * State representing the "include" command.
+ */
+extern const State Include;
+}
+
+}
}
#endif
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
new file mode 100644
index 0000000..47f7d2c
--- /dev/null
+++ b/src/core/parser/stack/Stack.cpp
@@ -0,0 +1,550 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <sstream>
+
+#include <core/common/Logger.hpp>
+#include <core/common/Utils.hpp>
+#include <core/common/Exceptions.hpp>
+#include <core/parser/ParserScope.hpp>
+#include <core/parser/ParserContext.hpp>
+
+#include "Handler.hpp"
+#include "Stack.hpp"
+#include "State.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+/* Class HandlerInfo */
+
+HandlerInfo::HandlerInfo() : HandlerInfo(nullptr) {}
+
+HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler)
+ : handler(handler),
+ fieldIdx(0),
+ valid(true),
+ implicit(false),
+ inField(false),
+ inDefaultField(false),
+ inImplicitDefaultField(false),
+ inValidField(false),
+ hadDefaultField(false)
+{
+}
+
+HandlerInfo::HandlerInfo(bool valid, bool implicit, bool inField,
+ bool inDefaultField, bool inImplicitDefaultField,
+ bool inValidField)
+ : handler(nullptr),
+ fieldIdx(0),
+ valid(valid),
+ implicit(implicit),
+ inField(inField),
+ inDefaultField(inDefaultField),
+ inImplicitDefaultField(inImplicitDefaultField),
+ inValidField(inValidField),
+ hadDefaultField(false)
+{
+}
+
+HandlerInfo::~HandlerInfo()
+{
+ // Do nothing
+}
+
+void HandlerInfo::fieldStart(bool isDefault, bool isImplicit, bool isValid)
+{
+ inField = true;
+ inDefaultField = isDefault || isImplicit;
+ inImplicitDefaultField = isImplicit;
+ inValidField = isValid;
+ hadDefaultField = hadDefaultField || inDefaultField;
+ fieldIdx++;
+}
+
+void HandlerInfo::fieldEnd()
+{
+ inField = false;
+ inDefaultField = false;
+ inImplicitDefaultField = false;
+ inValidField = false;
+}
+
+/**
+ * Stub instance of HandlerInfo containing no handler information.
+ */
+static HandlerInfo EmptyHandlerInfo{true, true, true, true, false, true};
+
+/* Helper functions */
+
+/**
+ * Returns an Exception that should be thrown when a currently invalid command
+ * is thrown.
+ *
+ * @param name is the name of the command for which no state transition is
+ * found.
+ * @param expected is a set containing the names of the expected commands.
+ */
+static LoggableException buildInvalidCommandException(
+ const std::string &name, const std::set<std::string> &expected)
+{
+ if (expected.empty()) {
+ return LoggableException{
+ std::string{"No nested elements allowed, but got \""} + name +
+ std::string{"\""}};
+ } else {
+ return LoggableException{
+ std::string{"Expected "} +
+ (expected.size() == 1 ? std::string{"\""}
+ : std::string{"one of \""}) +
+ Utils::join(expected, "\", \"") + std::string{"\", but got \""} +
+ name + std::string{"\""}};
+ }
+}
+
+/* Class Stack */
+
+Stack::Stack(ParserContext &ctx,
+ const std::multimap<std::string, const State *> &states)
+ : ctx(ctx), states(states)
+{
+ // If the scope instance is not empty we need to deduce the current parser
+ // state
+ if (!ctx.getScope().isEmpty()) {
+ deduceState();
+ }
+}
+
+Stack::~Stack()
+{
+ while (!stack.empty()) {
+ // Fetch the topmost stack element
+ HandlerInfo &info = currentInfo();
+
+ // It is an error if we're still in a field of an element while the
+ // Stack instance is destroyed. Log that
+ if (handlersValid()) {
+ if (info.inField && !info.implicit &&
+ !info.inImplicitDefaultField) {
+ logger().error(
+ std::string("Reached end of stream, but command \"") +
+ info.handler->getName() +
+ "\" has not ended yet. Command was started here:",
+ info.handler->getLocation());
+ }
+ }
+
+ // Remove the command from the stack
+ endCurrentHandler();
+ }
+}
+
+void Stack::deduceState()
+{
+ // Assemble all states
+ std::vector<const State *> states;
+ for (const auto &e : this->states) {
+ states.push_back(e.second);
+ }
+
+ // Fetch the type signature of the scope and derive all possible states,
+ // abort if no unique parser state was found
+ std::vector<const State *> possibleStates =
+ StateDeductor(ctx.getScope().getStackTypeSignature(), states).deduce();
+ if (possibleStates.size() != 1U) {
+ throw LoggableException(
+ "Error while including file: Cannot deduce parser state.");
+ }
+
+ // Switch to this state by creating a handler, but do not call its start
+ // function
+ const State &state = *possibleStates[0];
+ HandlerConstructor ctor =
+ state.elementHandler ? state.elementHandler : EmptyHandler::create;
+
+ std::shared_ptr<Handler> handler =
+ std::shared_ptr<Handler>{ctor({ctx, "", state, SourceLocation{}})};
+ stack.emplace_back(handler);
+
+ // Set the correct flags for this implicit handler
+ HandlerInfo &info = currentInfo();
+ info.implicit = true;
+ info.fieldStart(true, false, true);
+}
+
+std::set<std::string> Stack::expectedCommands()
+{
+ const State *currentState = &(this->currentState());
+ std::set<std::string> res;
+ for (const auto &v : states) {
+ if (v.second->parents.count(currentState)) {
+ res.insert(v.first);
+ }
+ }
+ return res;
+}
+
+const State &Stack::currentState()
+{
+ return stack.empty() ? States::None : stack.back().handler->getState();
+}
+
+std::string Stack::currentCommandName()
+{
+ return stack.empty() ? std::string{} : stack.back().handler->getName();
+}
+
+const State *Stack::findTargetState(const std::string &name)
+{
+ const State *currentState = &(this->currentState());
+ auto range = states.equal_range(name);
+ for (auto it = range.first; it != range.second; it++) {
+ const StateSet &parents = it->second->parents;
+ if (parents.count(currentState) || parents.count(&States::All)) {
+ return it->second;
+ }
+ }
+
+ return nullptr;
+}
+
+const State *Stack::findTargetStateOrWildcard(const std::string &name)
+{
+ // Try to find the target state with the given name, if none is found, try
+ // find a matching "*" state.
+ State const *targetState = findTargetState(name);
+ if (targetState == nullptr) {
+ return findTargetState("*");
+ }
+ return targetState;
+}
+
+HandlerInfo &Stack::currentInfo()
+{
+ return stack.empty() ? EmptyHandlerInfo : stack.back();
+}
+HandlerInfo &Stack::lastInfo()
+{
+ return stack.size() < 2U ? EmptyHandlerInfo : stack[stack.size() - 2];
+}
+
+void Stack::endCurrentHandler()
+{
+ if (!stack.empty()) {
+ // Fetch the handler info for the current top-level element
+ HandlerInfo &info = stack.back();
+
+ // Do not call any callback functions while the stack is marked as
+ // invalid or this is an elment marked as "implicit"
+ if (!info.implicit && handlersValid()) {
+ // Make sure the fieldEnd handler is called if the element still
+ // is in a field
+ if (info.inField) {
+ info.handler->fieldEnd();
+ info.fieldEnd();
+ }
+
+ // Call the "end" function of the corresponding Handler instance
+ info.handler->end();
+ }
+
+ // Remove the element from the stack
+ stack.pop_back();
+ }
+}
+
+bool Stack::ensureHandlerIsInField()
+{
+ // If the current handler is not in a field (and actually has a handler)
+ // try to start a default field
+ HandlerInfo &info = currentInfo();
+ if (!info.inField && info.handler != nullptr) {
+ // Abort if the element already had a default field
+ if (info.hadDefaultField) {
+ return false;
+ }
+
+ // Try to start a new default field, abort if this did not work
+ bool isDefault = true;
+ if (!info.handler->fieldStart(isDefault, info.fieldIdx)) {
+ info.handler->fieldEnd();
+ endCurrentHandler();
+ return false;
+ }
+
+ // Mark the field as started
+ info.fieldStart(true, true, true);
+ }
+ return true;
+}
+
+bool Stack::handlersValid()
+{
+ for (auto it = stack.crbegin(); it != stack.crend(); it++) {
+ if (!it->valid) {
+ return false;
+ }
+ }
+ return true;
+}
+
+Logger &Stack::logger() { return ctx.getLogger(); }
+
+void Stack::command(const Variant &name, const Variant::mapType &args)
+{
+ // Make sure the given identifier is valid (preventing "*" from being
+ // malicously passed to this function)
+ if (!Utils::isNamespacedIdentifier(name.asString())) {
+ throw LoggableException(std::string("Invalid identifier \"") +
+ name.asString() + std::string("\""),
+ name);
+ }
+
+ while (true) {
+ // Try to find a target state for the given command, if none can be
+ // found and the current command does not have an open field, then try
+ // to create an empty default field, otherwise this is an exception
+ const State *targetState = findTargetStateOrWildcard(name.asString());
+ if (targetState == nullptr) {
+ if (!currentInfo().inField) {
+ endCurrentHandler();
+ continue;
+ } else {
+ throw buildInvalidCommandException(name.asString(),
+ expectedCommands());
+ }
+ }
+
+ // Make sure we're currently inside a field
+ if (!ensureHandlerIsInField()) {
+ endCurrentHandler();
+ continue;
+ }
+
+ // Fork the logger. We do not want any validation errors to skip
+ LoggerFork loggerFork = logger().fork();
+
+ // Instantiate the handler and push it onto the stack
+ HandlerConstructor ctor = targetState->elementHandler
+ ? targetState->elementHandler
+ : EmptyHandler::create;
+ std::shared_ptr<Handler> handler{
+ ctor({ctx, name.asString(), *targetState, name.getLocation()})};
+ stack.emplace_back(handler);
+
+ // Fetch the HandlerInfo for the parent element and the current element
+ HandlerInfo &parentInfo = lastInfo();
+ HandlerInfo &info = currentInfo();
+
+ // Call the "start" method of the handler, store the result of the start
+ // method as the validity of the handler -- do not call the start method
+ // if the stack is currently invalid (as this may cause further,
+ // unwanted errors)
+ bool validStack = handlersValid();
+ info.valid = false;
+ if (validStack) {
+ // Canonicalize the arguments (if this has not already been done),
+ // allow additional arguments
+ Variant::mapType canonicalArgs = args;
+ targetState->arguments.validateMap(canonicalArgs, loggerFork, true);
+
+ handler->setLogger(loggerFork);
+ try {
+ info.valid = handler->start(canonicalArgs);
+ }
+ catch (LoggableException ex) {
+ loggerFork.log(ex);
+ }
+ handler->resetLogger();
+ }
+
+ // We started the command within an implicit default field and it is not
+ // valid -- remove both the new handler and the parent field from the
+ // stack
+ if (!info.valid && parentInfo.inImplicitDefaultField) {
+ endCurrentHandler();
+ endCurrentHandler();
+ continue;
+ }
+
+ // If we ended up here, starting the command may or may not have worked,
+ // but after all, we cannot unroll the stack any further. Update the
+ // "valid" flag, commit any potential error messages and return.
+ info.valid = parentInfo.valid && info.valid;
+ loggerFork.commit();
+ return;
+ }
+}
+
+void Stack::data(const Variant &data)
+{
+ while (true) {
+ // Check whether there is any command the data can be sent to
+ if (stack.empty()) {
+ throw LoggableException("No command here to receive data.");
+ }
+
+ // Fetch the current command handler information
+ HandlerInfo &info = currentInfo();
+
+ // Make sure the current handler has an open field
+ if (!ensureHandlerIsInField()) {
+ endCurrentHandler();
+ continue;
+ }
+
+ // If this field should not get any data, log an error and do not call
+ // the "data" handler
+ if (!info.inValidField) {
+ logger().error("Did not expect any data here", data);
+ }
+
+ if (handlersValid() && info.inValidField) {
+ // Fork the logger and set it as temporary logger for the "start"
+ // method. We only want to keep error messages if this was not a try
+ // to implicitly open a default field.
+ LoggerFork loggerFork = logger().fork();
+ info.handler->setLogger(loggerFork);
+
+ // Pass the data to the current Handler instance
+ bool valid = false;
+ try {
+ Variant dataCopy = data;
+ valid = info.handler->data(dataCopy);
+ }
+ catch (LoggableException ex) {
+ loggerFork.log(ex);
+ }
+
+ // Reset the logger instance as soon as possible
+ info.handler->resetLogger();
+
+ // If placing the data here failed and we're currently in an
+ // implicitly opened field, just unroll the stack to the next field
+ // and try again
+ if (!valid && info.inImplicitDefaultField) {
+ endCurrentHandler();
+ continue;
+ }
+
+ // Commit the content of the logger fork. Do not change the valid
+ // flag.
+ loggerFork.commit();
+ }
+
+ // There was no reason to unroll the stack any further, so continue
+ return;
+ }
+}
+
+void Stack::fieldStart(bool isDefault)
+{
+ // Make sure the current handler stack is not empty
+ if (stack.empty()) {
+ throw LoggableException(
+ "No command for which a field could be started");
+ }
+
+ // Fetch the information attached to the current handler
+ HandlerInfo &info = currentInfo();
+ if (info.inField) {
+ logger().error(
+ "Got field start, but there is no command for which to start the "
+ "field.");
+ return;
+ }
+
+ // Copy the isDefault flag to a local variable, the fieldStart method will
+ // write into this variable
+ bool defaultField = isDefault;
+
+ // Do not call the "fieldStart" function if we're in an invalid subtree
+ bool valid = false;
+ if (handlersValid()) {
+ try {
+ valid = info.handler->fieldStart(defaultField, info.fieldIdx);
+ }
+ catch (LoggableException ex) {
+ logger().log(ex);
+ }
+ if (!valid && !defaultField) {
+ logger().error(
+ std::string("Cannot start a new field here (index ") +
+ std::to_string(info.fieldIdx + 1) +
+ std::string("), field does not exist"));
+ }
+ }
+
+ // Mark the field as started
+ info.fieldStart(defaultField, false, valid);
+}
+
+void Stack::fieldEnd()
+{
+ // Make sure the current handler stack is not empty
+ if (stack.empty()) {
+ throw LoggableException("No command for which a field could be ended");
+ }
+
+ // Fetch the information attached to the current handler
+ HandlerInfo &info = currentInfo();
+ if (!info.inField) {
+ logger().error(
+ "Got field end, but there is no command for which to end the "
+ "field.");
+ return;
+ }
+
+ // Only continue if the current handler stack is in a valid state, do not
+ // call the fieldEnd function if something went wrong before
+ if (handlersValid()) {
+ try {
+ info.handler->fieldEnd();
+ }
+ catch (LoggableException ex) {
+ logger().log(ex);
+ }
+ }
+
+ // This command no longer is in a field
+ info.fieldEnd();
+
+ // As soon as this command had a default field, remove it from the stack
+ if (info.hadDefaultField) {
+ endCurrentHandler();
+ }
+}
+
+void Stack::annotationStart(const Variant &className, const Variant &args)
+{
+ // TODO
+}
+
+void Stack::annotationEnd(const Variant &className, const Variant &elementName)
+{
+ // TODO
+}
+
+void Stack::token(Variant token)
+{
+ // TODO
+}
+}
+}
+
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
new file mode 100644
index 0000000..76eefd9
--- /dev/null
+++ b/src/core/parser/stack/Stack.hpp
@@ -0,0 +1,341 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Stack.hpp
+ *
+ * Helper classes for document or description parsers. Contains the
+ * Stack class, which is an pushdown automaton responsible for
+ * accepting commands in the correct order and calling specified handlers.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_PARSER_STACK_STACK_HPP_
+#define _OUSIA_PARSER_STACK_STACK_HPP_
+
+#include <cstdint>
+
+#include <map>
+#include <memory>
+#include <set>
+#include <vector>
+
+#include <core/common/Variant.hpp>
+#include <core/parser/Parser.hpp>
+
+namespace ousia {
+
+// Forward declarations
+class ParserContext;
+class Logger;
+
+namespace parser_stack {
+
+// Forward declarations
+class Handler;
+class State;
+
+/**
+ * The HandlerInfo class is used internally by the stack to associate additional
+ * (mutable) data with a handler instance.
+ */
+class HandlerInfo {
+public:
+ /**
+ * Pointer pointing at the actual handler instance.
+ */
+ std::shared_ptr<Handler> handler;
+
+ /**
+ * Next field index to be passed to the "fieldStart" function of the Handler
+ * class.
+ */
+ size_t fieldIdx;
+
+ /**
+ * Set to true if the handler is valid (which is the case if the "start"
+ * method has returned true). If the handler is invalid, no more calls are
+ * directed at it until it can be removed from the stack.
+ */
+ bool valid : 1;
+
+ /**
+ * Set to true if this is an implicit handler, that was created when the
+ * current stack state was deduced.
+ */
+ bool implicit : 1;
+
+ /**
+ * Set to true if the handler currently is in a field.
+ */
+ bool inField : 1;
+
+ /**
+ * Set to true if the handler currently is in the default field.
+ */
+ bool inDefaultField : 1;
+
+ /**
+ * Set to true if the handler currently is in an implicitly started default
+ * field.
+ */
+ bool inImplicitDefaultField : 1;
+
+ /**
+ * Set to false if this field is only opened pro-forma and does not accept
+ * any data. Otherwise set to true.
+ */
+ bool inValidField : 1;
+
+ /**
+ * Set to true, if the default field was already started.
+ */
+ bool hadDefaultField : 1;
+
+ /**
+ * Default constructor of the HandlerInfo class.
+ */
+ HandlerInfo();
+ /**
+ * Constructor of the HandlerInfo class, allows to set all flags manually.
+ */
+ HandlerInfo(bool valid, bool implicit, bool inField, bool inDefaultField,
+ bool inImplicitDefaultField, bool inValidField);
+
+ /**
+ * Constructor of the HandlerInfo class, taking a shared_ptr to the handler
+ * to which additional information should be attached.
+ */
+ HandlerInfo(std::shared_ptr<Handler> handler);
+
+ /**
+ * Destructor of the HandlerInfo class (to allow Handler to be forward
+ * declared).
+ */
+ ~HandlerInfo();
+
+ /**
+ * Updates the "field" flags according to a "fieldStart" event.
+ */
+ void fieldStart(bool isDefault, bool isImplicit, bool isValid);
+
+ /**
+ * Updates the "fields" flags according to a "fieldEnd" event.
+ */
+ void fieldEnd();
+};
+
+/**
+ * The Stack class is a pushdown automaton responsible for turning a command
+ * stream into a tree of Node instances. It does so by following a state
+ * transition graph and creating a set of Handler instances, which are placed
+ * on the stack.
+ */
+class Stack {
+private:
+ /**
+ * Reference at the parser context.
+ */
+ ParserContext &ctx;
+
+ /**
+ * Map containing all registered command names and the corresponding
+ * state descriptors.
+ */
+ const std::multimap<std::string, const State *> &states;
+
+ /**
+ * Internal stack used for managing the currently active Handler instances.
+ */
+ std::vector<HandlerInfo> stack;
+
+ /**
+ * Return the reference in the Logger instance stored within the context.
+ */
+ Logger &logger();
+
+ /**
+ * Used internally to get all expected command names for the current state.
+ * This function is used to build error messages.
+ *
+ * @return a set of strings containing the names of the expected commands.
+ */
+ std::set<std::string> expectedCommands();
+
+ /**
+ * Returns the targetState for a command with the given name that can be
+ * reached from the current state.
+ *
+ * @param name is the name of the requested command.
+ * @return nullptr if no target state was found, a pointer at the target
+ * state otherwise.
+ */
+ const State *findTargetState(const std::string &name);
+
+ /**
+ * Returns the targetState for a command with the given name that can be
+ * reached from the current state, also including the wildcard "*" state.
+ * Throws an exception if the given target state is not a valid identifier.
+ *
+ * @param name is the name of the requested command.
+ * @return nullptr if no target state was found, a pointer at the target
+ * state otherwise.
+ */
+ const State *findTargetStateOrWildcard(const std::string &name);
+
+ /**
+ * Tries to reconstruct the parser state from the Scope instance of the
+ * ParserContext given in the constructor. This functionality is needed for
+ * including files,as the Parser of the included file needs to be brought to
+ * an equivalent state as the one in the including file.
+ */
+ void deduceState();
+
+ /**
+ * Returns a reference at the current HandlerInfo instance (or a stub
+ * HandlerInfo instance if the stack is empty).
+ */
+ HandlerInfo &currentInfo();
+
+ /**
+ * Returns a reference at the last HandlerInfo instance (or a stub
+ * HandlerInfo instance if the stack has only one element).
+ */
+ HandlerInfo &lastInfo();
+
+ /**
+ * Ends the current handler and removes the corresponding element from the
+ * stack.
+ */
+ void endCurrentHandler();
+
+ /**
+ * Tries to start a default field for the current handler, if currently the
+ * handler is not inside a field and did not have a default field yet.
+ *
+ * @return true if the handler is inside a field, false if no field could
+ * be started.
+ */
+ bool ensureHandlerIsInField();
+
+ /**
+ * Returns true if all handlers on the stack are currently valid, or false
+ * if at least one handler is invalid.
+ *
+ * @return true if all handlers on the stack are valid.
+ */
+ bool handlersValid();
+
+public:
+ /**
+ * Creates a new instance of the Stack class.
+ *
+ * @param ctx is the parser context the parser stack is working on.
+ * @param states is a map containing the command names and pointers at the
+ * corresponding State instances.
+ */
+ Stack(ParserContext &ctx,
+ const std::multimap<std::string, const State *> &states);
+
+ /**
+ * Destructor of the Stack class.
+ */
+ ~Stack();
+
+ /**
+ * Returns the state the Stack instance currently is in.
+ *
+ * @return the state of the currently active Handler instance or STATE_NONE
+ * if no handler is on the stack.
+ */
+ const State &currentState();
+
+ /**
+ * Returns the command name that is currently being handled.
+ *
+ * @return the name of the command currently being handled by the active
+ * Handler instance or an empty string if no handler is currently active.
+ */
+ std::string currentCommandName();
+
+ /**
+ * Function that should be called whenever a new command is reached.
+ *
+ * @param name is the name of the command (including the namespace
+ * separator ':') and its corresponding location. Must be a string variant.
+ * @param args is a map containing the arguments that were passed to the
+ * command.
+ */
+ void command(const Variant &name, const Variant::mapType &args);
+
+ /**
+ * Function that shuold be called whenever character data is found in the
+ * input stream. May only be called if the currently is a command on the
+ * stack.
+ *
+ * @param data is a string variant containing the data that has been found.
+ */
+ void data(const Variant &data);
+
+ /**
+ * Function that should be called whenever a new field starts. Fields of the
+ * same command may not be separated by calls to data or annotations. Doing
+ * so will result in a LoggableException.
+ *
+ * @param isDefault should be set to true if the started field explicitly
+ * is the default field.
+ */
+ void fieldStart(bool isDefault);
+
+ /**
+ * Function that should be called whenever a field ends. Calling this
+ * function if there is no field to end will result in a LoggableException.
+ */
+ void fieldEnd();
+
+ /**
+ * Function that should be called whenever an annotation starts.
+ *
+ * @param name is the name of the annotation class.
+ * @param args is a map variant containing the arguments that were passed
+ * to the annotation.
+ */
+ void annotationStart(const Variant &className, const Variant &args);
+
+ /**
+ * Function that should be called whenever an annotation ends.
+ *
+ * @param name is the name of the annotation class that was ended.
+ * @param annotationName is the name of the annotation that was ended.
+ */
+ void annotationEnd(const Variant &className, const Variant &elementName);
+
+ /**
+ * Function that should be called whenever a previously registered token
+ * is found in the input stream.
+ *
+ * @param token is string variant containing the token that was encountered.
+ */
+ void token(Variant token);
+};
+}
+}
+
+#endif /* _OUSIA_STACK_HPP_ */
+
diff --git a/src/core/parser/ParserState.cpp b/src/core/parser/stack/State.cpp
index f635d86..d72f533 100644
--- a/src/core/parser/ParserState.cpp
+++ b/src/core/parser/stack/State.cpp
@@ -16,88 +16,97 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "ParserState.hpp"
+#include "State.hpp"
namespace ousia {
+namespace parser_stack {
-/* Class ParserState */
+/* Class State */
-ParserState::ParserState() : elementHandler(nullptr) {}
+State::State() : elementHandler(nullptr) {}
-ParserState::ParserState(ParserStateSet parents, Arguments arguments,
+State::State(StateSet parents, Arguments arguments,
RttiSet createdNodeTypes,
- HandlerConstructor elementHandler)
+ HandlerConstructor elementHandler,
+ bool supportsAnnotations)
: parents(parents),
arguments(arguments),
createdNodeTypes(createdNodeTypes),
- elementHandler(elementHandler)
+ elementHandler(elementHandler),
+ supportsAnnotations(supportsAnnotations)
{
}
-ParserState::ParserState(const ParserStateBuilder &builder)
- : ParserState(builder.build())
+State::State(const StateBuilder &builder)
+ : State(builder.build())
{
}
-/* Class ParserStateBuilder */
+/* Class StateBuilder */
-ParserStateBuilder &ParserStateBuilder::copy(const ParserState &state)
+StateBuilder &StateBuilder::copy(const State &state)
{
this->state = state;
return *this;
}
-ParserStateBuilder &ParserStateBuilder::parent(const ParserState *parent)
+StateBuilder &StateBuilder::parent(const State *parent)
{
- state.parents = ParserStateSet{parent};
+ state.parents = StateSet{parent};
return *this;
}
-ParserStateBuilder &ParserStateBuilder::parents(const ParserStateSet &parents)
+StateBuilder &StateBuilder::parents(const StateSet &parents)
{
state.parents = parents;
return *this;
}
-ParserStateBuilder &ParserStateBuilder::arguments(const Arguments &arguments)
+StateBuilder &StateBuilder::arguments(const Arguments &arguments)
{
state.arguments = arguments;
return *this;
}
-ParserStateBuilder &ParserStateBuilder::createdNodeType(const Rtti *type)
+StateBuilder &StateBuilder::createdNodeType(const Rtti *type)
{
state.createdNodeTypes = RttiSet{type};
return *this;
}
-ParserStateBuilder &ParserStateBuilder::createdNodeTypes(const RttiSet &types)
+StateBuilder &StateBuilder::createdNodeTypes(const RttiSet &types)
{
state.createdNodeTypes = types;
return *this;
}
-ParserStateBuilder &ParserStateBuilder::elementHandler(
+StateBuilder &StateBuilder::elementHandler(
HandlerConstructor elementHandler)
{
state.elementHandler = elementHandler;
return *this;
}
-const ParserState &ParserStateBuilder::build() const { return state; }
+StateBuilder &StateBuilder::supportsAnnotations(bool supportsAnnotations)
+{
+ state.supportsAnnotations = supportsAnnotations;
+ return *this;
+}
-/* Class ParserStateDeductor */
+const State &StateBuilder::build() const { return state; }
-ParserStateDeductor::ParserStateDeductor(
+/* Class StateDeductor */
+
+StateDeductor::StateDeductor(
std::vector<const Rtti *> signature,
- std::vector<const ParserState *> states)
+ std::vector<const State *> states)
: tbl(signature.size()),
signature(std::move(signature)),
states(std::move(states))
{
}
-bool ParserStateDeductor::isActive(size_t d, const ParserState *s)
+bool StateDeductor::isActive(size_t d, const State *s)
{
// Lookup the "active" state of (d, s), if it was not already set
// (e.second is true) we'll have to calculate it
@@ -123,7 +132,7 @@ bool ParserStateDeductor::isActive(size_t d, const ParserState *s)
// Check whether any of the parent nodes were active -- either for
// the previous element (if this one is generative) or for the
// current element (assuming this node was not generative)
- for (const ParserState *parent : s->parents) {
+ for (const State *parent : s->parents) {
if ((isGenerative && isActive(d - 1, parent)) ||
isActive(d, parent)) {
res = true;
@@ -136,9 +145,9 @@ bool ParserStateDeductor::isActive(size_t d, const ParserState *s)
return res;
}
-std::vector<const ParserState *> ParserStateDeductor::deduce()
+std::vector<const State *> StateDeductor::deduce()
{
- std::vector<const ParserState *> res;
+ std::vector<const State *> res;
if (!signature.empty()) {
const size_t D = signature.size();
for (auto s : states) {
@@ -153,9 +162,10 @@ std::vector<const ParserState *> ParserStateDeductor::deduce()
/* Constant initializations */
-namespace ParserStates {
-const ParserState All;
-const ParserState None;
+namespace States {
+const State All;
+const State None;
+}
}
}
diff --git a/src/core/parser/ParserState.hpp b/src/core/parser/stack/State.hpp
index 6487fdd..4766235 100644
--- a/src/core/parser/ParserState.hpp
+++ b/src/core/parser/stack/State.hpp
@@ -17,10 +17,10 @@
*/
/**
- * @file ParserState.hpp
+ * @file State.hpp
*
- * Defines the ParserState class used within the ParserStack pushdown
- * automaton and the ParserStateBuilder class for convenient construction of
+ * Defines the State class used within the ParserStack pushdown
+ * automaton and the StateBuilder class for convenient construction of
* such classes.
*
* @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
@@ -33,12 +33,14 @@
#include <core/common/Rtti.hpp>
#include <core/common/Argument.hpp>
+#include <core/common/Whitespace.hpp>
namespace ousia {
+namespace parser_stack {
// Forward declarations
-class ParserStateBuilder;
-class ParserState;
+class StateBuilder;
+class State;
class HandlerData;
class Handler;
using HandlerConstructor = Handler *(*)(const HandlerData &handlerData);
@@ -47,17 +49,17 @@ using HandlerConstructor = Handler *(*)(const HandlerData &handlerData);
* Set of pointers of parser states -- used for specifying a set of parent
* states.
*/
-using ParserStateSet = std::unordered_set<const ParserState *>;
+using StateSet = std::unordered_set<const State *>;
/**
- * Class used for the complete specification of a ParserState. Stores possible
+ * Class used for the complete specification of a State. Stores possible
* parent states, state handlers and arguments to be passed to that state.
*/
-struct ParserState {
+struct State {
/**
* Vector containing all possible parent states.
*/
- ParserStateSet parents;
+ StateSet parents;
/**
* Descriptor of the arguments that should be passed to the handler.
@@ -66,8 +68,8 @@ struct ParserState {
/**
* Set containing the types of the nodes that may be created in this
- * ParserState. This information is needed for Parsers to reconstruct the
- * current ParserState from a given ParserScope when a file is included.
+ * State. This information is needed for Parsers to reconstruct the
+ * current State from a given ParserScope when a file is included.
*/
RttiSet createdNodeTypes;
@@ -79,109 +81,119 @@ struct ParserState {
HandlerConstructor elementHandler;
/**
+ * Set to true if this handler does support annotations. This is almost
+ * always false (e.g. all description handlers), except for document
+ * element handlers.
+ */
+ bool supportsAnnotations;
+
+ /**
* Default constructor, initializes the handlers with nullptr.
*/
- ParserState();
+ State();
/**
- * Constructor taking values for all fields. Use the ParserStateBuilder
- * class for a more convenient construction of ParserState instances.
+ * Constructor taking values for all fields. Use the StateBuilder
+ * class for a more convenient construction of State instances.
*
* @param parents is a vector containing all possible parent states.
* @param arguments is a descriptor of arguments that should be passed to
* the handler.
* @param createdNodeTypes is a set containing the types of the nodes tha
- * may be created in this ParserState. This information is needed for
- * Parsers to reconstruct the current ParserState from a given ParserScope
+ * may be created in this State. This information is needed for
+ * Parsers to reconstruct the current State from a given ParserScope
* when a file is included.
* @param elementHandler is a pointer at a function which creates a new
* concrete Handler instance for the elements described by this state. May
* be nullptr in which case no handler instance is created.
+ * @param supportsAnnotations specifies whether annotations are supported
+ * here at all.
*/
- ParserState(ParserStateSet parents, Arguments arguments = Arguments{},
+ State(StateSet parents, Arguments arguments = Arguments{},
RttiSet createdNodeTypes = RttiSet{},
- HandlerConstructor elementHandler = nullptr);
+ HandlerConstructor elementHandler = nullptr,
+ bool supportsAnnotations = false);
/**
- * Creates this ParserState from the given ParserStateBuilder instance.
+ * Creates this State from the given StateBuilder instance.
*/
- ParserState(const ParserStateBuilder &builder);
+ State(const StateBuilder &builder);
};
/**
- * The ParserStateBuilder class is a class used for conveniently building new
- * ParserState instances.
+ * The StateBuilder class is a class used for conveniently building new
+ * State instances.
*/
-class ParserStateBuilder {
+class StateBuilder {
private:
/**
- * ParserState instance that is currently being built by the
- * ParserStateBuilder.
+ * State instance that is currently being built by the
+ * StateBuilder.
*/
- ParserState state;
+ State state;
public:
/**
- * Copies the ParserState instance and uses it as internal state. Overrides
- * all changes made by the ParserStateBuilder.
+ * Copies the State instance and uses it as internal state. Overrides
+ * all changes made by the StateBuilder.
*
* @param state is the state that should be copied.
- * @return a reference at this ParserStateBuilder instance for method
+ * @return a reference at this StateBuilder instance for method
* chaining.
*/
- ParserStateBuilder &copy(const ParserState &state);
+ StateBuilder &copy(const State &state);
/**
* Sets the possible parent states to the single given parent element.
*
- * @param parent is a pointer at the parent ParserState instance that should
+ * @param parent is a pointer at the parent State instance that should
* be the possible parent state.
- * @return a reference at this ParserStateBuilder instance for method
+ * @return a reference at this StateBuilder instance for method
* chaining.
*/
- ParserStateBuilder &parent(const ParserState *parent);
+ StateBuilder &parent(const State *parent);
/**
- * Sets the ParserState instances in the given ParserStateSet as the list of
+ * Sets the State instances in the given StateSet as the list of
* supported parent states.
*
- * @param parents is a set of pointers at ParserState instances that should
+ * @param parents is a set of pointers at State instances that should
* be the possible parent states.
- * @return a reference at this ParserStateBuilder instance for method
+ * @return a reference at this StateBuilder instance for method
* chaining.
*/
- ParserStateBuilder &parents(const ParserStateSet &parents);
+ StateBuilder &parents(const StateSet &parents);
/**
* Sets the arguments that should be passed to the parser state handler to
* those given as argument.
*
* @param arguments is the Arguments instance describing the Arguments that
- * should be parsed to a Handler for this ParserState.
- * @return a reference at this ParserStateBuilder instance for method
+ * should be parsed to a Handler for this State.
+ * @return a reference at this StateBuilder instance for method
* chaining.
*/
- ParserStateBuilder &arguments(const Arguments &arguments);
+ StateBuilder &arguments(const Arguments &arguments);
/**
* Sets the Node types this state may produce to the given Rtti descriptor.
*
* @param type is the Rtti descriptor of the Type that may be produced by
* this state.
- * @return a reference at this ParserStateBuilder instance for method
+ * @return a reference at this StateBuilder instance for method
* chaining.
*/
- ParserStateBuilder &createdNodeType(const Rtti *type);
+ StateBuilder &createdNodeType(const Rtti *type);
/**
* Sets the Node types this state may produce to the given Rtti descriptors.
*
* @param types is a set of Rtti descriptors of the Types that may be
* produced by this state.
- * @return a reference at this ParserStateBuilder instance for method
+ * @return a reference at this StateBuilder instance for method
* chaining.
*/
- ParserStateBuilder &createdNodeTypes(const RttiSet &types);
+ StateBuilder &createdNodeTypes(const RttiSet &types);
/**
* Sets the constructor for the element handler. The constructor creates a
@@ -191,31 +203,42 @@ public:
*
* @param elementHandler is the HandlerConstructor that should create a
* new Handler instance.
- * @return a reference at this ParserStateBuilder instance for method
+ * @return a reference at this StateBuilder instance for method
* chaining.
*/
- ParserStateBuilder &elementHandler(HandlerConstructor elementHandler);
+ StateBuilder &elementHandler(HandlerConstructor elementHandler);
/**
- * Returns a reference at the internal ParserState instance that was built
- * using the ParserStateBuilder.
+ * Sets the state of the "supportsAnnotations" flags (default value is
+ * false)
*
- * @return the built ParserState.
+ * @param supportsAnnotations should be set to true, if annotations are
+ * supported for the handlers associated with this document.
+ * @return a reference at this StateBuilder instance for method
+ * chaining.
*/
- const ParserState &build() const;
+ StateBuilder &supportsAnnotations(bool supportsAnnotations);
+
+ /**
+ * Returns a reference at the internal State instance that was built
+ * using the StateBuilder.
+ *
+ * @return the built State.
+ */
+ const State &build() const;
};
/**
- * Class used to deduce the ParserState a Parser is currently in based on the
+ * Class used to deduce the State a Parser is currently in based on the
* types of the Nodes that currently are on the ParserStack. Uses dynamic
* programming in order to solve this problem.
*/
-class ParserStateDeductor {
+class StateDeductor {
public:
/**
* Type containing the dynamic programming table.
*/
- using Table = std::vector<std::unordered_map<const ParserState *, bool>>;
+ using Table = std::vector<std::unordered_map<const State *, bool>>;
private:
/**
@@ -231,7 +254,7 @@ private:
/**
* List of states that should be checked for being active.
*/
- const std::vector<const ParserState *> states;
+ const std::vector<const State *> states;
/**
* Used internally to check whether the given parser stack s may have been
@@ -239,20 +262,20 @@ private:
*
* @param d is the signature element.
* @param s is the parser state.
- * @return true if the the given ParserState may have been active.
+ * @return true if the the given State may have been active.
*/
- bool isActive(size_t d, const ParserState *s);
+ bool isActive(size_t d, const State *s);
public:
/**
- * Constructor of the ParserStateDeductor class.
+ * Constructor of the StateDeductor class.
*
* @param signature a Node type signature describing the types of the nodes
* which currently reside on e.g. the ParserScope stack.
* @param states is a list of states that should be checked.
*/
- ParserStateDeductor(std::vector<const Rtti *> signature,
- std::vector<const ParserState *> states);
+ StateDeductor(std::vector<const Rtti *> signature,
+ std::vector<const State *> states);
/**
* Selects all active states from the given states. Only considers those
@@ -260,23 +283,24 @@ public:
*
* @return a list of states that may actually have been active.
*/
- std::vector<const ParserState *> deduce();
+ std::vector<const State *> deduce();
};
/**
- * The ParserStates namespace contains all the global state constants used
+ * The States namespace contains all the global state constants used
* in the ParserStack class.
*/
-namespace ParserStates {
+namespace States {
/**
* State representing all states.
*/
-extern const ParserState All;
+extern const State All;
/**
* State representing the initial state.
*/
-extern const ParserState None;
+extern const State None;
+}
}
}
diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp
index 2cc7dfb..8fd9525 100644
--- a/src/core/parser/stack/TypesystemHandler.cpp
+++ b/src/core/parser/stack/TypesystemHandler.cpp
@@ -16,32 +16,46 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "TypesystemHandler.hpp"
-
#include <core/model/Typesystem.hpp>
+#include <core/model/Domain.hpp>
#include <core/parser/ParserScope.hpp>
+#include <core/parser/ParserContext.hpp>
+
+#include "DomainHandler.hpp"
+#include "State.hpp"
+#include "TypesystemHandler.hpp"
namespace ousia {
+namespace parser_stack {
/* TypesystemHandler */
-void TypesystemHandler::start(Variant::mapType &args)
+bool TypesystemHandler::start(Variant::mapType &args)
{
// Create the typesystem instance
Rooted<Typesystem> typesystem =
- project()->createTypesystem(args["name"].asString());
+ context().getProject()->createTypesystem(args["name"].asString());
typesystem->setLocation(location());
+ // If the typesystem is defined inside a domain, add a reference to the
+ // typesystem to the domain
+ Rooted<Domain> domain = scope().select<Domain>();
+ if (domain != nullptr) {
+ domain->reference(typesystem);
+ }
+
// Push the typesystem onto the scope, set the POST_HEAD flag to true
scope().push(typesystem);
scope().setFlag(ParserFlag::POST_HEAD, false);
+
+ return true;
}
void TypesystemHandler::end() { scope().pop(); }
/* TypesystemEnumHandler */
-void TypesystemEnumHandler::start(Variant::mapType &args)
+bool TypesystemEnumHandler::start(Variant::mapType &args)
{
scope().setFlag(ParserFlag::POST_HEAD, true);
@@ -52,33 +66,24 @@ void TypesystemEnumHandler::start(Variant::mapType &args)
enumType->setLocation(location());
scope().push(enumType);
+
+ return true;
}
void TypesystemEnumHandler::end() { scope().pop(); }
/* TypesystemEnumEntryHandler */
-void TypesystemEnumEntryHandler::start(Variant::mapType &args) {}
-
-void TypesystemEnumEntryHandler::end()
+void TypesystemEnumEntryHandler::doHandle(const Variant &fieldData,
+ Variant::mapType &args)
{
Rooted<EnumType> enumType = scope().selectOrThrow<EnumType>();
- enumType->addEntry(entry, logger());
-}
-
-void TypesystemEnumEntryHandler::data(const std::string &data, int field)
-{
- if (field != 0) {
- // TODO: This should be stored in the HandlerData
- logger().error("Enum entry only has one field.");
- return;
- }
- entry.append(data);
+ enumType->addEntry(fieldData.asString(), logger());
}
/* TypesystemStructHandler */
-void TypesystemStructHandler::start(Variant::mapType &args)
+bool TypesystemStructHandler::start(Variant::mapType &args)
{
scope().setFlag(ParserFlag::POST_HEAD, true);
@@ -103,13 +108,15 @@ void TypesystemStructHandler::start(Variant::mapType &args)
});
}
scope().push(structType);
+
+ return true;
}
void TypesystemStructHandler::end() { scope().pop(); }
/* TypesystemStructFieldHandler */
-void TypesystemStructFieldHandler::start(Variant::mapType &args)
+bool TypesystemStructFieldHandler::start(Variant::mapType &args)
{
// Read the argument values
const std::string &name = args["name"].asString();
@@ -142,13 +149,13 @@ void TypesystemStructFieldHandler::start(Variant::mapType &args)
}
});
}
-}
-void TypesystemStructFieldHandler::end() {}
+ return true;
+}
/* TypesystemConstantHandler */
-void TypesystemConstantHandler::start(Variant::mapType &args)
+bool TypesystemConstantHandler::start(Variant::mapType &args)
{
scope().setFlag(ParserFlag::POST_HEAD, true);
@@ -169,7 +176,51 @@ void TypesystemConstantHandler::start(Variant::mapType &args)
constant.cast<Constant>()->setType(type.cast<Type>(), logger);
}
});
+
+ return true;
}
-void TypesystemConstantHandler::end() {}
+namespace States {
+const State Typesystem = StateBuilder()
+ .parents({&None, &Domain})
+ .createdNodeType(&RttiTypes::Typesystem)
+ .elementHandler(TypesystemHandler::create)
+ .arguments({Argument::String("name", "")});
+
+const State TypesystemEnum = StateBuilder()
+ .parent(&Typesystem)
+ .createdNodeType(&RttiTypes::EnumType)
+ .elementHandler(TypesystemEnumHandler::create)
+ .arguments({Argument::String("name")});
+
+const State TypesystemEnumEntry =
+ StateBuilder()
+ .parent(&TypesystemEnum)
+ .elementHandler(TypesystemEnumEntryHandler::create)
+ .arguments({});
+
+const State TypesystemStruct =
+ StateBuilder()
+ .parent(&Typesystem)
+ .createdNodeType(&RttiTypes::StructType)
+ .elementHandler(TypesystemStructHandler::create)
+ .arguments({Argument::String("name"), Argument::String("parent", "")});
+
+const State TypesystemStructField =
+ StateBuilder()
+ .parent(&TypesystemStruct)
+ .elementHandler(TypesystemStructFieldHandler::create)
+ .arguments({Argument::String("name"), Argument::String("type"),
+ Argument::Any("default", Variant::fromObject(nullptr))});
+
+const State TypesystemConstant =
+ StateBuilder()
+ .parent(&Typesystem)
+ .createdNodeType(&RttiTypes::Constant)
+ .elementHandler(TypesystemConstantHandler::create)
+ .arguments({Argument::String("name"), Argument::String("type"),
+ Argument::Any("value")});
+}
}
+}
+
diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp
index 76a7bc9..85494f1 100644
--- a/src/core/parser/stack/TypesystemHandler.hpp
+++ b/src/core/parser/stack/TypesystemHandler.hpp
@@ -19,6 +19,9 @@
/**
* @file TypesystemHandler.hpp
*
+ * Contains the Handler classes used to parse Typesystem descriptions. The
+ * Handlers parse all the tags found below and including the "typesystem" tag.
+ *
* @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
*/
@@ -26,96 +29,180 @@
#define _OUSIA_TYPESYSTEM_HANDLER_HPP_
#include <core/common/Variant.hpp>
-#include <core/parser/ParserStack.hpp>
+
+#include "Handler.hpp"
namespace ousia {
+namespace parser_stack {
-class TypesystemHandler : public Handler {
+/**
+ * Handles the occurance of the "typesystem" tag. Creates a new Typesystem
+ * instance and places it on the ParserScope.
+ */
+class TypesystemHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
+ /**
+ * Creates a new instance of the TypesystemHandler.
+ *
+ * @param handlerData is the data that is passed to the constructor of the
+ * Handler base class and used there to e.g. access the ParserContext and
+ * the Callbacks instance.
+ */
static Handler *create(const HandlerData &handlerData)
{
return new TypesystemHandler{handlerData};
}
};
-class TypesystemEnumHandler : public Handler {
+/**
+ * Handles the occurance of the "enum" tag. Creates a new EnumType instance and
+ * places it on the ParserScope.
+ */
+class TypesystemEnumHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
+ /**
+ * Creates a new instance of the TypesystemEnumHandler.
+ *
+ * @param handlerData is the data that is passed to the constructor of the
+ * Handler base class and used there to e.g. access the ParserContext and
+ * the Callbacks instance.
+ */
static Handler *create(const HandlerData &handlerData)
{
return new TypesystemEnumHandler{handlerData};
}
};
-class TypesystemEnumEntryHandler : public Handler {
+/**
+ * Handles the occurance of the "entry" tag within an "enum" tag. Creates a new
+ * EnumType instance and places it on the ParserScope.
+ */
+class TypesystemEnumEntryHandler : public StaticFieldHandler {
public:
- using Handler::Handler;
+ using StaticFieldHandler::StaticFieldHandler;
- std::string entry;
-
- void start(Variant::mapType &args) override;
-
- void end() override;
-
- void data(const std::string &data, int field) override;
+ void doHandle(const Variant &fieldData, Variant::mapType &args) override;
+ /**
+ * Creates a new instance of the TypesystemEnumEntryHandler.
+ *
+ * @param handlerData is the data that is passed to the constructor of the
+ * Handler base class and used there to e.g. access the ParserContext and
+ * the Callbacks instance.
+ */
static Handler *create(const HandlerData &handlerData)
{
- return new TypesystemEnumEntryHandler{handlerData};
+ return new TypesystemEnumEntryHandler{handlerData, "name"};
}
};
-class TypesystemStructHandler : public Handler {
+/**
+ * Handles the occurance of the "struct" tag within a typesystem description.
+ * Creates a new StructType instance and places it on the ParserScope.
+ */
+class TypesystemStructHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
+ bool start(Variant::mapType &args) override;
void end() override;
+ /**
+ * Creates a new instance of the TypesystemStructHandler.
+ *
+ * @param handlerData is the data that is passed to the constructor of the
+ * Handler base class and used there to e.g. access the ParserContext and
+ * the Callbacks instance.
+ */
static Handler *create(const HandlerData &handlerData)
{
return new TypesystemStructHandler{handlerData};
}
};
-class TypesystemStructFieldHandler : public Handler {
+/**
+ * Handles the occurance of the "field" tag within a typesystem structure
+ * description. Places a new Attribute instance in the StructType instance
+ * that is currently at the top of the scope.
+ */
+class TypesystemStructFieldHandler : public StaticHandler {
public:
- using Handler::Handler;
-
- void start(Variant::mapType &args) override;
+ using StaticHandler::StaticHandler;
- void end() override;
+ bool start(Variant::mapType &args) override;
+ /**
+ * Creates a new instance of the TypesystemStructFieldHandler.
+ *
+ * @param handlerData is the data that is passed to the constructor of the
+ * Handler base class and used there to e.g. access the ParserContext and
+ * the Callbacks instance.
+ */
static Handler *create(const HandlerData &handlerData)
{
return new TypesystemStructFieldHandler{handlerData};
}
};
-class TypesystemConstantHandler : public Handler {
+/**
+ * Handles the occurance of the "constant" tag within a typesystem structure
+ * description. Places a new Constant instance in the current typesystem.
+ */
+class TypesystemConstantHandler : public StaticHandler {
public:
- using Handler::Handler;
+ using StaticHandler::StaticHandler;
- void start(Variant::mapType &args) override;
-
- void end() override;
+ bool start(Variant::mapType &args) override;
+ /**
+ * Creates a new instance of the TypesystemConstantHandler.
+ *
+ * @param handlerData is the data that is passed to the constructor of the
+ * Handler base class and used there to e.g. access the ParserContext and
+ * the Callbacks instance.
+ */
static Handler *create(const HandlerData &handlerData)
{
return new TypesystemConstantHandler{handlerData};
}
};
+
+namespace States {
+/**
+ * State representing the "typesystem" tag.
+ */
+extern const State Typesystem;
+/**
+ * State representing the "enum" tag within a typesystem.
+ */
+extern const State TypesystemEnum;
+/**
+ * State representing the "entry" tag within an enum.
+ */
+extern const State TypesystemEnumEntry;
+/**
+ * State representing the "struct" tag within a typesystem.
+ */
+extern const State TypesystemStruct;
+/**
+ * State representing the "field" tag within a typesystem structure.
+ */
+extern const State TypesystemStructField;
+/**
+ * State representing the "constant" tag within a typesystem.
+ */
+extern const State TypesystemConstant;
+}
+}
}
#endif
diff --git a/src/core/parser/utils/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp
new file mode 100644
index 0000000..4a0430b
--- /dev/null
+++ b/src/core/parser/utils/TokenTrie.cpp
@@ -0,0 +1,119 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "TokenTrie.hpp"
+
+namespace ousia {
+
+/* Class DynamicTokenTree::Node */
+
+TokenTrie::Node::Node() : type(EmptyToken) {}
+
+/* Class DynamicTokenTree */
+
+bool TokenTrie::registerToken(const std::string &token,
+ TokenTypeId type) noexcept
+{
+ // Abort if the token is empty -- this would taint the root node
+ if (token.empty()) {
+ return false;
+ }
+
+ // Iterate over each character in the given string and insert them as
+ // (new) nodes
+ Node *node = &root;
+ for (size_t i = 0; i < token.size(); i++) {
+ // Insert a new node if this one does not exist
+ const char c = token[i];
+ auto it = node->children.find(c);
+ if (it == node->children.end()) {
+ it = node->children.emplace(c, std::make_shared<Node>()).first;
+ }
+ node = it->second.get();
+ }
+
+ // If the resulting node already has a type set, we're screwed.
+ if (node->type != EmptyToken) {
+ return false;
+ }
+
+ // Otherwise just set the type to the given type.
+ node->type = type;
+ return true;
+}
+
+bool TokenTrie::unregisterToken(const std::string &token) noexcept
+{
+ // We cannot remove empty tokens as we need to access the fist character
+ // upfront
+ if (token.empty()) {
+ return false;
+ }
+
+ // First pass -- search the node in the path that can be deleted
+ Node *subtreeRoot = &root;
+ char subtreeKey = token[0];
+ Node *node = &root;
+ for (size_t i = 0; i < token.size(); i++) {
+ // Go to the next node, abort if the tree ends unexpectedly
+ auto it = node->children.find(token[i]);
+ if (it == node->children.end()) {
+ return false;
+ }
+
+ // Reset the subtree handler if this node has another type
+ node = it->second.get();
+ if ((node->type != EmptyToken || node->children.size() > 1) &&
+ (i + 1 != token.size())) {
+ subtreeRoot = node;
+ subtreeKey = token[i + 1];
+ }
+ }
+
+ // If the node type is already EmptyToken, we cannot do anything here
+ if (node->type == EmptyToken) {
+ return false;
+ }
+
+ // If the target node has children, we cannot delete the subtree. Set the
+ // type to EmptyToken instead
+ if (!node->children.empty()) {
+ node->type = EmptyToken;
+ return true;
+ }
+
+ // If we end up here, we can safely delete the complete subtree
+ subtreeRoot->children.erase(subtreeKey);
+ return true;
+}
+
+TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept
+{
+ Node const *node = &root;
+ for (size_t i = 0; i < token.size(); i++) {
+ const char c = token[i];
+ auto it = node->children.find(c);
+ if (it == node->children.end()) {
+ return EmptyToken;
+ }
+ node = it->second.get();
+ }
+ return node->type;
+}
+}
+
diff --git a/src/core/parser/utils/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp
new file mode 100644
index 0000000..36c2ffa
--- /dev/null
+++ b/src/core/parser/utils/TokenTrie.hpp
@@ -0,0 +1,150 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file TokenTrie.hpp
+ *
+ * Class representing a token trie that can be updated dynamically.
+ *
+ * @author Benjamin Paaßen (astoecke@techfak.uni-bielefeld.de)
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_TOKEN_TRIE_HPP_
+#define _OUSIA_TOKEN_TRIE_HPP_
+
+#include <cstdint>
+#include <memory>
+#include <limits>
+#include <unordered_map>
+
+namespace ousia {
+
+/**
+ * The TokenTypeId is used to give each token type a unique id.
+ */
+using TokenTypeId = uint32_t;
+
+/**
+ * Token which is not a token.
+ */
+constexpr TokenTypeId EmptyToken = std::numeric_limits<TokenTypeId>::max();
+
+/**
+ * Token which represents a text token.
+ */
+constexpr TokenTypeId TextToken = std::numeric_limits<TokenTypeId>::max() - 1;
+
+/**
+ * The Tokenizer internally uses a TokenTrie to be efficiently able to identify
+ * the longest consecutive token in the text. This is equivalent to a prefix
+ * trie.
+ *
+ * A token trie is a construct that structures all special tokens a Tokenizer
+ * recognizes. Consider the tokens "aab", "a" and "bac" numbered as one, two and
+ * three. Then the token tree would look like this:
+ *
+ * \code{*.txt}
+ * ~ (0)
+ * / \
+ * a (2) b (0)
+ * | |
+ * a (0) a (0)
+ * | |
+ * b (1) c (0)
+ * \endcode
+ *
+ * Where the number indicates the corresponding token descriptor identifier.
+ */
+class TokenTrie {
+public:
+ /**
+ * Structure used to build the node tree.
+ */
+ struct Node {
+ /**
+ * Type used for the child map.
+ */
+ using ChildMap = std::unordered_map<char, std::shared_ptr<Node>>;
+
+ /**
+ * Map from single characters at the corresponding child nodes.
+ */
+ ChildMap children;
+
+ /**
+ * Reference at the corresponding token descriptor. Set to nullptr if
+ * no token is attached to this node.
+ */
+ TokenTypeId type;
+
+ /**
+ * Default constructor, initializes the descriptor with nullptr.
+ */
+ Node();
+ };
+
+private:
+ /**
+ * Root node of the internal token tree.
+ */
+ Node root;
+
+public:
+ /**
+ * Registers a token containing the given string. Returns false if the
+ * token already exists, true otherwise.
+ *
+ * @param token is the character sequence that should be registered as
+ * token.
+ * @param type is the descriptor that should be set for this token.
+ * @return true if the operation is successful, false otherwise.
+ */
+ bool registerToken(const std::string &token, TokenTypeId type) noexcept;
+
+ /**
+ * Unregisters the token from the token tree. Returns true if the token was
+ * unregistered successfully, false otherwise.
+ *
+ * @param token is the character sequence that should be unregistered.
+ * @return true if the operation was successful, false otherwise.
+ */
+ bool unregisterToken(const std::string &token) noexcept;
+
+ /**
+ * Returns true, if the given token exists within the TokenTree. This
+ * function is mostly thought for debugging and unit testing.
+ *
+ * @param token is the character sequence that should be searched.
+ * @return the attached token descriptor or nullptr if the given token is
+ * not found.
+ */
+ TokenTypeId hasToken(const std::string &token) const noexcept;
+
+ /**
+ * Returns a reference at the root node to be used for traversing the token
+ * tree.
+ *
+ * @return a reference at the root node.
+ */
+ const Node *getRoot() const noexcept { return &root; }
+};
+}
+
+#endif /* _OUSIA_TOKEN_TRIE_HPP_ */
+
diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp
new file mode 100644
index 0000000..3c8177d
--- /dev/null
+++ b/src/core/parser/utils/Tokenizer.cpp
@@ -0,0 +1,381 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <memory>
+#include <vector>
+
+#include <core/common/CharReader.hpp>
+#include <core/common/Exceptions.hpp>
+#include <core/common/Utils.hpp>
+#include <core/common/WhitespaceHandler.hpp>
+
+#include "Tokenizer.hpp"
+
+namespace ousia {
+
+namespace {
+
+/* Internal class TokenMatch */
+
+/**
+ * Contains information about a matching token.
+ */
+struct TokenMatch {
+ /**
+ * Token that was matched.
+ */
+ Token token;
+
+ /**
+ * Current length of the data within the text handler. The text buffer needs
+ * to be trimmed to this length if this token matches.
+ */
+ size_t textLength;
+
+ /**
+ * End location of the current text handler. This location needs to be used
+ * for the text token that is emitted before the actual token.
+ */
+ size_t textEnd;
+
+ /**
+ * Constructor of the TokenMatch class.
+ */
+ TokenMatch() : textLength(0), textEnd(0) {}
+
+ /**
+ * Returns true if this TokenMatch instance actually represents a match.
+ */
+ bool hasMatch() { return token.type != EmptyToken; }
+};
+
+/* Internal class TokenLookup */
+
+/**
+ * The TokenLookup class is used to represent a thread in a running token
+ * lookup.
+ */
+class TokenLookup {
+private:
+ /**
+ * Current node within the token trie.
+ */
+ TokenTrie::Node const *node;
+
+ /**
+ * Start offset within the source file.
+ */
+ size_t start;
+
+ /**
+ * Current length of the data within the text handler. The text buffer needs
+ * to be trimmed to this length if this token matches.
+ */
+ size_t textLength;
+
+ /**
+ * End location of the current text handler. This location needs to be used
+ * for the text token that is emitted before the actual token.
+ */
+ size_t textEnd;
+
+public:
+ /**
+ * Constructor of the TokenLookup class.
+ *
+ * @param node is the current node.
+ * @param start is the start position.
+ * @param textLength is the text buffer length of the previous text token.
+ * @param textEnd is the current end location of the previous text token.
+ */
+ TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength,
+ size_t textEnd)
+ : node(node), start(start), textLength(textLength), textEnd(textEnd)
+ {
+ }
+
+ /**
+ * Tries to extend the current path in the token trie with the given
+ * character. If a complete token is matched, stores this match in the
+ * tokens list (in case it is longer than any previous token).
+ *
+ * @param c is the character that should be appended to the current prefix.
+ * @param lookups is a list to which new TokeLookup instances are added --
+ * which could potentially be expanded in the next iteration.
+ * @param match is the Token instance to which the matching token
+ * should be written.
+ * @param tokens is a reference at the internal token list of the
+ * Tokenizer.
+ * @param end is the end byte offset of the current character.
+ * @param sourceId is the source if of this file.
+ */
+ void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
+ const std::vector<std::string> &tokens, SourceOffset end,
+ SourceId sourceId)
+ {
+ // Check whether we can continue the current token path with the given
+ // character without visiting an already visited node
+ auto it = node->children.find(c);
+ if (it == node->children.end()) {
+ return;
+ }
+
+ // Check whether the new node represents a complete token a whether it
+ // is longer than the current token. If yes, replace the current token.
+ node = it->second.get();
+ if (node->type != EmptyToken) {
+ const std::string &str = tokens[node->type];
+ size_t len = str.size();
+ if (len > match.token.content.size()) {
+ match.token =
+ Token{node->type, str, {sourceId, start, end}};
+ match.textLength = textLength;
+ match.textEnd = textEnd;
+ }
+ }
+
+ // If this state can possibly be advanced, store it in the states list.
+ if (!node->children.empty()) {
+ lookups.emplace_back(*this);
+ }
+ }
+};
+
+/**
+ * Transforms the given token into a text token containing the extracted
+ * text.
+ *
+ * @param handler is the WhitespaceHandler containing the collected data.
+ * @param token is the output token to which the text should be written.
+ * @param sourceId is the source id of the underlying file.
+ */
+static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match,
+ SourceId sourceId)
+{
+ if (match.hasMatch()) {
+ match.token.content =
+ std::string{handler.textBuf.data(), match.textLength};
+ match.token.location =
+ SourceLocation{sourceId, handler.textStart, match.textEnd};
+ } else {
+ match.token.content = handler.toString();
+ match.token.location =
+ SourceLocation{sourceId, handler.textStart, handler.textEnd};
+ }
+ match.token.type = TextToken;
+}
+}
+
+/* Class Tokenizer */
+
+Tokenizer::Tokenizer(WhitespaceMode whitespaceMode)
+ : whitespaceMode(whitespaceMode), nextTokenTypeId(0)
+{
+}
+
+template <typename TextHandler, bool read>
+bool Tokenizer::next(CharReader &reader, Token &token)
+{
+ // If we're in the read mode, reset the char reader peek position to the
+ // current read position
+ if (read) {
+ reader.resetPeek();
+ }
+
+ // Prepare the lookups in the token trie
+ const TokenTrie::Node *root = trie.getRoot();
+ TokenMatch match;
+ std::vector<TokenLookup> lookups;
+ std::vector<TokenLookup> nextLookups;
+
+ // Instantiate the text handler
+ TextHandler textHandler;
+
+ // Peek characters from the reader and try to advance the current token tree
+ // cursor
+ char c;
+ size_t charStart = reader.getPeekOffset();
+ const SourceId sourceId = reader.getSourceId();
+ while (reader.peek(c)) {
+ const size_t charEnd = reader.getPeekOffset();
+ const size_t textLength = textHandler.textBuf.size();
+ const size_t textEnd = textHandler.textEnd;
+
+ // If we do not have a match yet, start a new lookup from the root
+ if (!match.hasMatch()) {
+ TokenLookup{root, charStart, textLength, textEnd}.advance(
+ c, nextLookups, match, tokens, charEnd, sourceId);
+ }
+
+ // Try to advance all other lookups with the new character
+ for (TokenLookup &lookup : lookups) {
+ lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId);
+ }
+
+ // We have found a token and there are no more states to advance or the
+ // text handler has found something -- abort to return the new token
+ if (match.hasMatch()) {
+ if ((nextLookups.empty() || textHandler.hasText())) {
+ break;
+ }
+ } else {
+ // Record all incomming characters
+ textHandler.append(c, charStart, charEnd);
+ }
+
+ // Swap the lookups and the nextLookups list
+ lookups = std::move(nextLookups);
+ nextLookups.clear();
+
+ // Advance the offset
+ charStart = charEnd;
+ }
+
+ // If we found text, emit that text
+ if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) {
+ buildTextToken(textHandler, match, sourceId);
+ }
+
+ // Move the read/peek cursor to the end of the token, abort if an error
+ // happens while doing so
+ if (match.hasMatch()) {
+ // Make sure we have a valid location
+ if (match.token.location.getEnd() == InvalidSourceOffset) {
+ throw OusiaException{"Token end position offset out of range"};
+ }
+
+ // Seek to the end of the current token
+ const size_t end = match.token.location.getEnd();
+ if (read) {
+ reader.seek(end);
+ } else {
+ reader.seekPeekCursor(end);
+ }
+ token = match.token;
+ } else {
+ token = Token{};
+ }
+ return match.hasMatch();
+}
+
+bool Tokenizer::read(CharReader &reader, Token &token)
+{
+ switch (whitespaceMode) {
+ case WhitespaceMode::PRESERVE:
+ return next<PreservingWhitespaceHandler, true>(reader, token);
+ case WhitespaceMode::TRIM:
+ return next<TrimmingWhitespaceHandler, true>(reader, token);
+ case WhitespaceMode::COLLAPSE:
+ return next<CollapsingWhitespaceHandler, true>(reader, token);
+ }
+ return false;
+}
+
+bool Tokenizer::peek(CharReader &reader, Token &token)
+{
+ switch (whitespaceMode) {
+ case WhitespaceMode::PRESERVE:
+ return next<PreservingWhitespaceHandler, false>(reader, token);
+ case WhitespaceMode::TRIM:
+ return next<TrimmingWhitespaceHandler, false>(reader, token);
+ case WhitespaceMode::COLLAPSE:
+ return next<CollapsingWhitespaceHandler, false>(reader, token);
+ }
+ return false;
+}
+
+TokenTypeId Tokenizer::registerToken(const std::string &token)
+{
+ // Abort if an empty token should be registered
+ if (token.empty()) {
+ return EmptyToken;
+ }
+
+ // Search for a new slot in the tokens list
+ TokenTypeId type = EmptyToken;
+ for (size_t i = nextTokenTypeId; i < tokens.size(); i++) {
+ if (tokens[i].empty()) {
+ tokens[i] = token;
+ type = i;
+ break;
+ }
+ }
+
+ // No existing slot was found, add a new one -- make sure we do not
+ // override the special token type handles
+ if (type == EmptyToken) {
+ type = tokens.size();
+ if (type == TextToken || type == EmptyToken) {
+ throw OusiaException{"Token type ids depleted!"};
+ }
+ tokens.emplace_back(token);
+ }
+ nextTokenTypeId = type + 1;
+
+ // Try to register the token in the trie -- if this fails, remove it
+ // from the tokens list
+ if (!trie.registerToken(token, type)) {
+ tokens[type] = std::string{};
+ nextTokenTypeId = type;
+ return EmptyToken;
+ }
+ return type;
+}
+
+bool Tokenizer::unregisterToken(TokenTypeId type)
+{
+ // Unregister the token from the trie, abort if an invalid type is given
+ if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
+ tokens[type] = std::string{};
+ nextTokenTypeId = type;
+ return true;
+ }
+ return false;
+}
+
+std::string Tokenizer::getTokenString(TokenTypeId type)
+{
+ if (type < tokens.size()) {
+ return tokens[type];
+ }
+ return std::string{};
+}
+
+void Tokenizer::setWhitespaceMode(WhitespaceMode mode)
+{
+ whitespaceMode = mode;
+}
+
+WhitespaceMode Tokenizer::getWhitespaceMode() { return whitespaceMode; }
+
+/* Explicitly instantiate all possible instantiations of the "next" member
+ function */
+template bool Tokenizer::next<PreservingWhitespaceHandler, false>(
+ CharReader &reader, Token &token);
+template bool Tokenizer::next<TrimmingWhitespaceHandler, false>(
+ CharReader &reader, Token &token);
+template bool Tokenizer::next<CollapsingWhitespaceHandler, false>(
+ CharReader &reader, Token &token);
+template bool Tokenizer::next<PreservingWhitespaceHandler, true>(
+ CharReader &reader, Token &token);
+template bool Tokenizer::next<TrimmingWhitespaceHandler, true>(
+ CharReader &reader, Token &token);
+template bool Tokenizer::next<CollapsingWhitespaceHandler, true>(
+ CharReader &reader, Token &token);
+}
+
diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp
new file mode 100644
index 0000000..6b4e116
--- /dev/null
+++ b/src/core/parser/utils/Tokenizer.hpp
@@ -0,0 +1,231 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Tokenizer.hpp
+ *
+ * Tokenizer that can be reconfigured at runtime used for parsing the plain
+ * text format.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_
+#define _OUSIA_DYNAMIC_TOKENIZER_HPP_
+
+#include <set>
+#include <string>
+#include <vector>
+
+#include <core/common/Location.hpp>
+#include <core/common/Whitespace.hpp>
+
+#include "TokenTrie.hpp"
+
+namespace ousia {
+
+// Forward declarations
+class CharReader;
+
+/**
+ * The Token structure describes a token discovered by the Tokenizer.
+ */
+struct Token {
+ /**
+ * Id of the type of this token.
+ */
+ TokenTypeId type;
+
+ /**
+ * String that was matched.
+ */
+ std::string content;
+
+ /**
+ * Location from which the string was extracted.
+ */
+ SourceLocation location;
+
+ /**
+ * Default constructor.
+ */
+ Token() : type(EmptyToken) {}
+
+ /**
+ * Constructor of the Token struct.
+ *
+ * @param id represents the token type.
+ * @param content is the string content that has been extracted.
+ * @param location is the location of the extracted string content in the
+ * source file.
+ */
+ Token(TokenTypeId type, const std::string &content,
+ SourceLocation location)
+ : type(type), content(content), location(location)
+ {
+ }
+
+ /**
+ * Constructor of the Token struct, only initializes the token type
+ *
+ * @param type is the id corresponding to the type of the token.
+ */
+ Token(TokenTypeId type) : type(type) {}
+
+ /**
+ * The getLocation function allows the tokens to be directly passed as
+ * parameter to Logger or LoggableException instances.
+ *
+ * @return a reference at the location field
+ */
+ const SourceLocation &getLocation() const { return location; }
+};
+
+/**
+ * The Tokenizer is used to extract tokens and chunks of text from a
+ * CharReader. It allows to register and unregister tokens while parsing and
+ * to modify the handling of whitespace characters. Note that the
+ * Tokenizer always tries to extract the longest possible token from the
+ * tokenizer.
+ */
+class Tokenizer {
+private:
+ /**
+ * Internally used token trie. This object holds all registered tokens.
+ */
+ TokenTrie trie;
+
+ /**
+ * Flag defining whether whitespaces should be preserved or not.
+ */
+ WhitespaceMode whitespaceMode;
+
+ /**
+ * Vector containing all registered token types.
+ */
+ std::vector<std::string> tokens;
+
+ /**
+ * Next index in the tokens list where to search for a new token id.
+ */
+ size_t nextTokenTypeId;
+
+ /**
+ * Templated function used internally to read the current token. The
+ * function is templated in order to force code generation for all six
+ * combiations of whitespace modes and reading/peeking.
+ *
+ * @tparam TextHandler is the type to be used for the textHandler instance.
+ * @tparam read specifies whether the function should start from and advance
+ * the read pointer of the char reader.
+ * @param reader is the CharReader instance from which the data should be
+ * read.
+ * @param token is the token structure into which the token information
+ * should be written.
+ * @return false if the end of the stream has been reached, true otherwise.
+ */
+ template <typename TextHandler, bool read>
+ bool next(CharReader &reader, Token &token);
+
+public:
+ /**
+ * Constructor of the Tokenizer class.
+ *
+ * @param whitespaceMode specifies how whitespace should be handled.
+ */
+ Tokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
+
+ /**
+ * Registers the given string as a token. Returns a const pointer at a
+ * TokenDescriptor that will be used to reference the newly created token.
+ *
+ * @param token is the token string that should be registered.
+ * @return a unique identifier for the registered token or EmptyToken if
+ * an error occured.
+ */
+ TokenTypeId registerToken(const std::string &token);
+
+ /**
+ * Unregisters the token belonging to the given TokenTypeId.
+ *
+ * @param type is the token type that should be unregistered. The
+ *TokenTypeId
+ * must have been returned by registerToken.
+ * @return true if the operation was successful, false otherwise (e.g.
+ * because the given TokenDescriptor was already unregistered).
+ */
+ bool unregisterToken(TokenTypeId type);
+
+ /**
+ * Returns the token that was registered under the given TokenTypeId id or
+ *an
+ * empty string if an invalid TokenTypeId id is given.
+ *
+ * @param type is the TokenTypeId id for which the corresponding token
+ *string
+ * should be returned.
+ * @return the registered token string or an empty string if the given type
+ * was invalid.
+ */
+ std::string getTokenString(TokenTypeId type);
+
+ /**
+ * Sets the whitespace mode.
+ *
+ * @param whitespaceMode defines how whitespace should be treated in text
+ * tokens.
+ */
+ void setWhitespaceMode(WhitespaceMode mode);
+
+ /**
+ * Returns the current value of the whitespace mode.
+ *
+ * @return the whitespace mode.
+ */
+ WhitespaceMode getWhitespaceMode();
+
+ /**
+ * Reads a new token from the CharReader and stores it in the given
+ * Token instance.
+ *
+ * @param reader is the CharReader instance from which the data should be
+ * read.
+ * @param token is a reference at the token instance into which the Token
+ * information should be written.
+ * @return true if a token could be read, false if the end of the stream
+ * has been reached.
+ */
+ bool read(CharReader &reader, Token &token);
+
+ /**
+ * The peek method does not advance the read position of the char reader,
+ * but reads the next token from the current char reader peek position.
+ *
+ * @param reader is the CharReader instance from which the data should be
+ * read.
+ * @param token is a reference at the token instance into which the Token
+ * information should be written.
+ * @return true if a token could be read, false if the end of the stream
+ * has been reached.
+ */
+ bool peek(CharReader &reader, Token &token);
+};
+}
+
+#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */
+