diff options
Diffstat (limited to 'src/core/parser')
-rw-r--r-- | src/core/parser/Parser.cpp | 33 | ||||
-rw-r--r-- | src/core/parser/Parser.hpp | 159 | ||||
-rw-r--r-- | src/core/parser/ParserStack.cpp | 150 | ||||
-rw-r--r-- | src/core/parser/ParserStack.hpp | 341 | ||||
-rw-r--r-- | src/core/parser/Scope.cpp | 26 | ||||
-rw-r--r-- | src/core/parser/Scope.hpp | 172 | ||||
-rw-r--r-- | src/core/parser/XmlParser.cpp | 134 | ||||
-rw-r--r-- | src/core/parser/XmlParser.hpp | 63 |
8 files changed, 881 insertions, 197 deletions
diff --git a/src/core/parser/Parser.cpp b/src/core/parser/Parser.cpp new file mode 100644 index 0000000..23fd9b7 --- /dev/null +++ b/src/core/parser/Parser.cpp @@ -0,0 +1,33 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include "Parser.hpp" + +namespace ousia { +namespace parser { + +Rooted<Node> Parser::parse(const std::string &str, ParserContext &ctx) +{ + std::istringstream is{str}; + return parse(is, ctx); +} +} +} + diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp new file mode 100644 index 0000000..fa5dd49 --- /dev/null +++ b/src/core/parser/Parser.hpp @@ -0,0 +1,159 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Parser.hpp + * + * Contains the abstract Parser class. Parsers are objects capable of reading + * a certain file format and transforming it into a node. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_HPP_ +#define _OUSIA_PARSER_HPP_ + +#include <istream> +#include <set> +#include <string> + +#include <core/Exceptions.hpp> +#include <core/Node.hpp> +#include <core/Logger.hpp> +#include <core/Registry.hpp> + +#include "Scope.hpp" + +namespace ousia { +namespace parser { + +// TODO: Implement a proper Mimetype class + +/** + * Exception to be thrown whenever an error occurs inside a specific parser. + */ +class ParserException : public LoggableException { +public: + using LoggableException::LoggableException; +}; + +/** + * Struct containing the objects that are passed to a parser instance. + */ +struct ParserContext { + /** + * Reference to the Scope instance that should be used within the parser. + */ + Scope &scope; + + /** + * Reference to the Registry instance that should be used within the parser. + */ + Registry ®istry; + + /** + * Reference to the Logger the parser should log any messages to. + */ + Logger &logger; + + /** + * Constructor of the ParserContext class. + * + * @param scope is a reference to the Scope instance that should be used to + * lookup names. + * @param registry is a reference at the Registry class, which allows to + * obtain references at parsers for other formats or script engine + * implementations. + * @param logger is a reference to the Logger instance that should be used + * to log error messages and warnings that occur while parsing the document. + */ + ParserContext(Scope &scope, Registry ®istry, Logger &logger) + : scope(scope), registry(registry), logger(logger){}; +}; + +struct StandaloneParserContext : public ParserContext { +private: + Logger logger; + Scope scope; + Registry registry; + +public: + StandaloneParserContext() + : ParserContext(scope, registry, logger), + scope(nullptr), + registry(logger){}; +}; + +/** + * Abstract parser class. This class builds the basic interface that should be + * used by any parser which reads data from an input stream and transforms it + * into an Ousía node graph. + */ +class Parser { +public: + Parser(){}; + Parser(const Parser &) = delete; + + /** + * Returns a set containing all mime types supported by the parser. The mime + * types are used to describe the type of the document that is read by the + * parser. The default implementation returns an empty set. This method + * should be overridden by derived classes. + * + * @return a set containing the string value of the supported mime types. + */ + virtual std::set<std::string> mimetypes() + { + return std::set<std::string>{}; + }; + + /** + * Parses the given input stream and returns a corresponding node for + * inclusion in the document graph. This method should be overridden by + * derived classes. + * + * @param is is a reference to the input stream that should be parsed. + * @param ctx is a reference to the context that should be used while + * parsing the document. + * @return a reference to the node representing the subgraph that has been + * created. The resulting node may point at not yet resolved entities, the + * calling code will try to resolve these. If no valid node can be produced, + * a corresponding LoggableException must be thrown by the parser. + */ + virtual Rooted<Node> parse(std::istream &is, ParserContext &ctx) = 0; + + /** + * Parses the given string and returns a corresponding node for + * inclusion in the document graph. This method should be overridden by + * derived classes. + * + * @param str is the string that should be parsed. + * @param ctx is a reference to the context that should be used while + * parsing the document. + * @return a reference to the node representing the subgraph that has been + * created. The resulting node may point at not yet resolved entities, the + * calling code will try to resolve these. If no valid node can be produced, + * a corresponding ParserException must be thrown by the parser. + */ + Rooted<Node> parse(const std::string &str, ParserContext &ctx); +}; +} +} + +#endif /* _OUSIA_PARSER_HPP_ */ + diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp new file mode 100644 index 0000000..dca7f35 --- /dev/null +++ b/src/core/parser/ParserStack.cpp @@ -0,0 +1,150 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include "ParserStack.hpp" + +#include <core/Utils.hpp> +#include <core/Exceptions.hpp> + +namespace ousia { +namespace parser { + +/* Class Handler */ + +void Handler::data(const std::string &data, int field) +{ + for (auto &c : data) { + if (!Utils::isWhitespace(c)) { + throw LoggableException{"No data allowed here."}; + } + } +} + +/* Class HandlerDescriptor */ + +HandlerInstance HandlerDescriptor::create(const ParserContext &ctx, + std::string name, State parentState, + bool isChild, + const Variant &args) const +{ + Handler *h = ctor(ctx, name, targetState, parentState, isChild); + h->start(args); + return HandlerInstance(h, this); +} + +/* Class ParserStack */ + +/** + * Returns an Exception that should be thrown when a currently invalid command + * is thrown. + */ +static LoggableException invalidCommand(const std::string &name, + const std::set<std::string> &expected) +{ + if (expected.empty()) { + return LoggableException{ + std::string{"No nested elements allowed, but got \""} + name + + std::string{"\""}}; + } else { + return LoggableException{ + std::string{"Expected "} + + (expected.size() == 1 ? std::string{"\""} + : std::string{"one of \""}) + + Utils::join(expected, "\", \"") + std::string{"\", but got \""} + name + + std::string{"\""}}; + } +} + +std::set<std::string> ParserStack::expectedCommands(State state) +{ + std::set<std::string> res; + for (const auto &v : handlers) { + if (v.second.parentStates.count(state)) { + res.insert(v.first); + } + } + return res; +} + +void ParserStack::start(std::string name, const Variant &args) +{ + // Fetch the current handler and the current state + const HandlerInstance *h = stack.empty() ? nullptr : &stack.top(); + const State curState = currentState(); + bool isChild = false; + + // Fetch the correct Handler descriptor for this + const HandlerDescriptor *descr = nullptr; + auto range = handlers.equal_range(name); + for (auto it = range.first; it != range.second; it++) { + const std::set<State> &parentStates = it->second.parentStates; + if (parentStates.count(curState) || parentStates.count(STATE_ALL)) { + descr = &(it->second); + break; + } + } + if (!descr && currentArbitraryChildren()) { + isChild = true; + descr = h->descr; + } + + // No descriptor found, throw an exception. + if (!descr) { + throw invalidCommand(name, expectedCommands(curState)); + } + + // Instantiate the handler and call its start function + stack.emplace(descr->create(ctx, name, curState, isChild, args)); +} + +void ParserStack::end() +{ + // Check whether the current command could be ended + if (stack.empty()) { + throw LoggableException{"No command to end."}; + } + + // Remove the current HandlerInstance from the stack + HandlerInstance inst{stack.top()}; + stack.pop(); + + // Call the end function of the last Handler + inst.handler->end(); + + // Call the "child" function of the parent Handler in the stack + // (if one exists). + if (!stack.empty()) { + stack.top().handler->child(inst.handler); + } +} + +void ParserStack::data(const std::string &data, int field) +{ + // Check whether there is any command the data can be sent to + if (stack.empty()) { + throw LoggableException{"No command to receive data."}; + } + + // Pass the data to the current Handler instance + stack.top().handler->data(data, field); +} +} +} + diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp new file mode 100644 index 0000000..c5ed4e4 --- /dev/null +++ b/src/core/parser/ParserStack.hpp @@ -0,0 +1,341 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file ParserStack.hpp + * + * Helper classes for document or description parsers. Contains the ParserStack + * class, which is an pushdown automaton responsible for accepting commands in + * the correct order and calling specified handlers. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_HPP_ +#define _OUSIA_PARSER_STACK_HPP_ + +#include <cstdint> + +#include <map> +#include <memory> +#include <set> +#include <stack> +#include <vector> + +#include <core/variant/Variant.hpp> + +#include "Parser.hpp" + +namespace ousia { +namespace parser { + +/** + * The State type alias is used to + */ +using State = int16_t; + +static const State STATE_ALL = -2; +static const State STATE_NONE = -1; + +/** + * The handler class provides a context for handling an XML tag. It has to be + * overridden and registered in the StateStack class to form handlers for + * concrete XML tags. + */ +class Handler { +private: + Rooted<Node> node; + +protected: + void setNode(Handle<Node> node) { this->node = node; } + +public: + /** + * Reference to the ParserContext instance that should be used to resolve + * references to nodes in the Graph. + */ + const ParserContext &ctx; + + /** + * Contains the name of the tag that is being handled. + */ + const std::string name; + + /** + * Contains the current state of the state machine. + */ + const State state; + + /** + * Contains the state of the state machine when the parent node was handled. + */ + const State parentState; + + /** + * Set to true if the tag that is being handled is not the tag that was + * specified in the state machine but a child tag of that tag. + */ + const bool isChild; + + /** + * Constructor of the Handler class. + * + * @param ctx is the parser context the handler should be executed in. + * @param name is the name of the string. + * @param state is the state this handler was called for. + * @param parentState is the state of the parent command. + * @param isChild specifies whether this handler was called not for the + * command that was specified in the state machine but a child command. + */ + Handler(const ParserContext &ctx, std::string name, State state, + State parentState, bool isChild) + : ctx(ctx), + name(std::move(name)), + state(state), + parentState(parentState), + isChild(isChild){}; + + /** + * Virtual destructor. + */ + virtual ~Handler(){}; + + /** + * Returns the node instance that was created by the handler. + * + * @return the Node instance created by the handler. May be nullptr if no + * Node was created. + */ + Rooted<Node> getNode() { return node; } + + /** + * Called when the command that was specified in the constructor is + * instanciated. + * + * @param args is a map from strings to variants (argument name and value). + */ + virtual void start(const Variant &args) = 0; + + /** + * Called whenever the command for which this handler + */ + virtual void end() = 0; + + /** + * Called whenever raw data (int the form of a string) is available for the + * Handler instance. In the default handler an exception is raised if the + * received data contains non-whitespace characters. + * + * @param data is a pointer at the character data that is available for the + * Handler instance. + * @param field is the field number (the interpretation of this value + * depends on the format that is being parsed). + */ + virtual void data(const std::string &data, int field); + + /** + * Called whenever a direct child element was created and has ended. + * + * @param handler is a reference at the child Handler instance. + */ + virtual void child(std::shared_ptr<Handler> handler){}; +}; + +/** + * HandlerConstructor is a function pointer type used to create concrete + * instances of the Handler class. + */ +using HandlerConstructor = Handler *(*)(const ParserContext &ctx, + std::string name, State state, + State parentState, bool isChild); + +struct HandlerDescriptor; + +/** + * Used internlly by StateStack to store Handler instances and parameters + * from HandlerDescriptor that are not stored in the Handler instance + * itself. Instances of the HandlerInstance class can be created using the + * HandlerDescriptor "create" method. + */ +struct HandlerInstance { + /** + * Pointer at the actual handler instance. + */ + std::shared_ptr<Handler> handler; + + const HandlerDescriptor *descr; + + HandlerInstance(Handler *handler, const HandlerDescriptor *descr) + : handler(handler), descr(descr) + { + } +}; + +/** + * Used internally by StateStack to store the pushdown automaton + * description. + */ +struct HandlerDescriptor { + /** + * The valid parent states. + */ + const std::set<State> parentStates; + + /** + * Pointer at a function which creates a new concrete Handler instance. + */ + const HandlerConstructor ctor; + + /** + * The target state for the registered handler. + */ + const State targetState; + + /** + * Set to true if this handler instance allows arbitrary children as + * tags. + */ + const bool arbitraryChildren; + + HandlerDescriptor(std::set<State> parentStates, HandlerConstructor ctor, + State targetState, bool arbitraryChildren = false) + : parentStates(std::move(parentStates)), + ctor(ctor), + targetState(targetState), + arbitraryChildren(arbitraryChildren) + { + } + + /** + * Creates an instance of the concrete Handler class represented by the + * HandlerDescriptor and calls its start function. + */ + HandlerInstance create(const ParserContext &ctx, std::string name, + State parentState, bool isChild, + const Variant &args) const; +}; + +/** + * The ParserStack class is a pushdown automaton responsible for turning a + * command stream into a tree of Node instances. + */ +class ParserStack { +private: + /** + * Reference at the parser context. + */ + const ParserContext &ctx; + + /** + * User specified data that will be passed to all handlers. + */ + void *userData; + + /** + * Map containing all registered command names and the corresponding + * handler + * descriptor. + */ + const std::multimap<std::string, HandlerDescriptor> &handlers; + + /** + * Internal stack used for managing the currently active Handler instances. + */ + std::stack<HandlerInstance> stack; + + /** + * Used internally to get all expected command names for the given state + * (does not work if the current Handler instance allows arbitrary + * children). This function is used to build error messages. + * + * @param state is the state for which all expected command names should be + * returned. + */ + std::set<std::string> expectedCommands(State state); + +public: + /** + * Creates a new instance of the ParserStack class. + * + * @param handlers is a map containing the command names and the + * corresponding HandlerDescriptor instances. + */ + ParserStack(const ParserContext &ctx, + const std::multimap<std::string, HandlerDescriptor> &handlers) + : ctx(ctx), handlers(handlers){}; + + /** + * Returns the state the ParserStack instance currently is in. + * + * @return the state of the currently active Handler instance or STATE_NONE + * if no handler is on the stack. + */ + State currentState() + { + return stack.empty() ? STATE_NONE : stack.top().handler->state; + } + + /** + * Returns the command name that is currently being handled. + * + * @return the name of the command currently being handled by the active + * Handler instance or an empty string if no handler is currently active. + */ + std::string currentName() + { + return stack.empty() ? std::string{} : stack.top().handler->name; + } + + /** + * Returns whether the current command handler allows arbitrary children. + * + * @return true if the handler allows arbitrary children, false otherwise. + */ + bool currentArbitraryChildren() + { + return stack.empty() ? false : stack.top().descr->arbitraryChildren; + } + + /** + * Function that should be called whenever a new command starts. + * + * @param name is the name of the command. + * @param args is a map from strings to variants (argument name and value). + */ + void start(std::string name, const Variant &args); + + /** + * Function called whenever a command ends. + */ + void end(); + + /** + * Function that should be called whenever data is available for the + * command. + * + * @param data is the data that should be passed to the handler. + * @param field is the field number (the interpretation of this value + * depends on the format that is being parsed). + */ + void data(const std::string &data, int field = 0); +}; +} +} + +#endif /* _OUSIA_PARSER_STACK_HPP_ */ + diff --git a/src/core/parser/Scope.cpp b/src/core/parser/Scope.cpp new file mode 100644 index 0000000..a60ade0 --- /dev/null +++ b/src/core/parser/Scope.cpp @@ -0,0 +1,26 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "Scope.hpp" + +namespace ousia { +namespace parser { + + +} +} diff --git a/src/core/parser/Scope.hpp b/src/core/parser/Scope.hpp new file mode 100644 index 0000000..9c5504f --- /dev/null +++ b/src/core/parser/Scope.hpp @@ -0,0 +1,172 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_PARSER_SCOPE_H_ +#define _OUSIA_PARSER_SCOPE_H_ + +#include <deque> + +#include <core/Node.hpp> + +/** + * @file Scope.hpp + * + * Contains the Scope class used for resolving references based on the current + * parser state. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +namespace ousia { +namespace parser { + +class Scope; + +/** + * The ScopedScope class takes care of pushing a Node instance into the + * name resolution stack of a Scope instance and poping this node once the + * ScopedScope instance is deletes. This way you cannot forget to pop a Node + * from a Scope instance as this operation is performed automatically. + */ +class ScopedScope { +private: + /** + * Reference at the backing scope instance. + */ + Scope *scope; + +public: + /** + * Creates a new ScopedScope instance. + * + * @param scope is the backing Scope instance. + * @param node is the Node instance that should be poped onto the stack of + * the Scope instance. + */ + ScopedScope(Scope *scope, Handle<Node> node); + + /** + * Pops the Node given in the constructor form the stack of the Scope + * instance. + */ + ~ScopedScope(); + + /** + * Copying a ScopedScope is invalid. + */ + ScopedScope(const ScopedScope &) = delete; + + /** + * Move constructor of the ScopedScope class. + */ + ScopedScope(ScopedScope &&); + + /** + * Provides access at the underlying Scope instance. + */ + Scope *operator->() { return scope; } + + /** + * Provides access at the underlying Scope instance. + */ + Scope &operator*() { return *scope; } +}; + +/** + * Provides an interface for document parsers to resolve references based on the + * current position in the created document tree. The Scope class itself is + * represented as a chain of Scope objects where each element has a reference to + * a Node object attached to it. The descend method can be used to add a new + * scope element to the chain. + */ +class Scope { +private: + std::deque<Rooted<Node>> nodes; + +public: + /** + * Constructor of the Scope class. + * + * @param rootNode is the top-most Node from which elements can be looked + * up. + */ + Scope(Handle<Node> rootNode) { nodes.push_back(rootNode); } + + /** + * Returns a reference at the Manager instance all nodes belong to. + */ + Manager &getManager() { return getRoot()->getManager(); } + + /** + * Pushes a new node onto the scope. + * + * @param node is the node that should be used for local lookup. + */ + void push(Handle<Node> node) { nodes.push_back(node); } + + /** + * Removes the last pushed node from the scope. + */ + void pop() { nodes.pop_back(); } + + /** + * Returns a ScopedScope instance, which automatically pushes the given node + * into the Scope stack and pops it once the ScopedScope is destroyed. + */ + ScopedScope descend(Handle<Node> node) { return ScopedScope{this, node}; } + + /** + * Returns the top-most Node instance in the Scope hirarchy. + * + * @return a reference at the root node. + */ + Rooted<Node> getRoot() { return nodes.front(); } + + /** + * Returns the bottom-most Node instance in the Scope hirarchy, e.g. the + * node that was pushed last onto the stack. + * + * @return a reference at the leaf node. + */ + Rooted<Node> getLeaf() { return nodes.back(); } +}; + +/* Class ScopedScope -- inline declaration of some methods */ + +inline ScopedScope::ScopedScope(Scope *scope, Handle<Node> node) : scope(scope) +{ + scope->push(node); +} + +inline ScopedScope::~ScopedScope() +{ + if (scope) { + scope->pop(); + } +} + +inline ScopedScope::ScopedScope(ScopedScope &&s) +{ + scope = s.scope; + s.scope = nullptr; +} +} +} + +#endif /* _OUSIA_PARSER_SCOPE_H_ */ + diff --git a/src/core/parser/XmlParser.cpp b/src/core/parser/XmlParser.cpp deleted file mode 100644 index f9bb43e..0000000 --- a/src/core/parser/XmlParser.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <expat.h> - -#include "XmlParser.hpp" - -namespace ousia { - -/** - * The XmlParserData struct holds all information relevant to the expat callback - * functions. - */ -struct XmlParserData { - Rooted<Node> context; - Logger &logger; - - XmlParserData(Handle<Node> context, Logger &logger) - : context(context), logger(logger) - { - } -}; - -/** - * Wrapper class around the XML_Parser pointer which safely frees it whenever - * the scope is left (e.g. because an exception was thrown). - */ -class ScopedExpatXmlParser { -private: - /** - * Internal pointer to the XML_Parser instance. - */ - XML_Parser parser; - -public: - /** - * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS - * from the expat library. Throws a parser exception if the XML parser - * cannot be initialized. - * - * @param encoding is the protocol-defined encoding passed to expat (or - * nullptr if expat should determine the encoding by itself). - * @param namespaceSeparator is the separator used to separate the namespace - * components in the node name given by expat. - */ - ScopedExpatXmlParser(const XML_Char *encoding, XML_Char namespaceSeparator) - : parser(nullptr) - { - parser = XML_ParserCreateNS("UTF-8", ':'); - if (!parser) { - throw ParserException{ - "Internal error: Could not create expat XML parser!"}; - } - } - - /** - * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance. - */ - ~ScopedExpatXmlParser() - { - if (parser) { - XML_ParserFree(parser); - parser = nullptr; - } - } - - /** - * Returns the XML_Parser pointer. - */ - XML_Parser operator&() { return parser; } -}; - -std::set<std::string> XmlParser::mimetypes() -{ - return std::set<std::string>{{"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"}}; -} - -Rooted<Node> XmlParser::parse(std::istream &is, Handle<Node> context, - Logger &logger) -{ - // Create the parser object - ScopedExpatXmlParser p{"UTF-8", ':'}; - - // Set the callback functions, provide a pointer to a XmlParserData instance - // as user data. - XmlParserData ctx{context, logger}; - - // Feed data into expat while there is data to process - const std::streamsize BUFFER_SIZE = 4096; // TODO: Move to own header? - while (true) { - // Fetch a buffer from expat for the input data - char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE)); - if (!buf) { - throw ParserException{"Internal error: XML parser out of memory!"}; - } - - // Read the input data from the stream - const std::streamsize bytesRead = is.read(buf, BUFFER_SIZE).gcount(); - - // Parse the data and handle any XML error - if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { - const int line = XML_GetCurrentLineNumber(&p); - const int column = XML_GetCurrentColumnNumber(&p); - const XML_Error code = XML_GetErrorCode(&p); - const std::string msg = std::string{XML_ErrorString(code)}; - logger.error("XML: " + msg, line, column); - break; - } - - // Abort once there are no more bytes in the stream - if (bytesRead == 0) { - break; - } - } - - return nullptr; -} -} - diff --git a/src/core/parser/XmlParser.hpp b/src/core/parser/XmlParser.hpp deleted file mode 100644 index f6fb060..0000000 --- a/src/core/parser/XmlParser.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file XmlParser.hpp - * - * Contains the parser responsible for reading Ousía XML Documents (extension - * oxd) and Ousía XML Modules (extension oxm). - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_XML_PARSER_HPP_ -#define _OUSIA_XML_PARSER_HPP_ - -#include <core/Parser.hpp> - -namespace ousia { - -/** - * The XmlParser class implements parsing the various types of Ousía XML - * documents using the expat stream XML parser. - */ -class XmlParser : public Parser { -public: - /** - * Returns the mimetype supported by the XmlParser which is - * "text/vnd.ousia.oxm" and "text/vnd.ousia.oxd". - * - * @return a list containing the mimetype supported by Ousía. - */ - std::set<std::string> mimetypes() override; - - /** - * Parses the given input stream as XML file and returns the parsed - * top-level node. Throws - * - * @param is is the input stream that will be parsed. - */ - Rooted<Node> parse(std::istream &is, Handle<Node> context, - Logger &logger) override; - - using Parser::parse; -}; -} - -#endif /* _OUSIA_XML_PARSER_HPP_ */ - |