diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/Exceptions.cpp | 7 | ||||
-rw-r--r-- | src/core/Logger.cpp | 2 | ||||
-rw-r--r-- | src/core/Logger.hpp | 90 | ||||
-rw-r--r-- | src/core/Registry.cpp | 47 | ||||
-rw-r--r-- | src/core/Registry.hpp | 51 | ||||
-rw-r--r-- | src/core/parser/Parser.cpp | 9 | ||||
-rw-r--r-- | src/core/parser/Parser.hpp | 72 | ||||
-rw-r--r-- | src/core/parser/Scope.cpp | 26 | ||||
-rw-r--r-- | src/core/parser/Scope.hpp | 170 | ||||
-rw-r--r-- | src/plugins/mozjs/MozJsScriptEngine.cpp | 2 | ||||
-rw-r--r-- | src/plugins/mozjs/MozJsScriptEngine.hpp | 2 | ||||
-rw-r--r-- | src/plugins/xml/XmlParser.cpp | 65 | ||||
-rw-r--r-- | src/plugins/xml/XmlParser.hpp | 14 | ||||
-rw-r--r-- | src/plugins/xml/XmlStates.cpp | 53 | ||||
-rw-r--r-- | src/plugins/xml/XmlStates.hpp | 301 |
15 files changed, 847 insertions, 64 deletions
diff --git a/src/core/Exceptions.cpp b/src/core/Exceptions.cpp index 92d9293..735dac6 100644 --- a/src/core/Exceptions.cpp +++ b/src/core/Exceptions.cpp @@ -29,16 +29,17 @@ std::string LoggableException::formatMessage(const std::string &msg, int column, bool fatal) { std::stringstream ss; + ss << "error "; if (!file.empty()) { ss << "while processing \"" << file << "\" "; } if (line >= 0) { - ss << "at line: " << line << " "; + ss << "at line " << line << ", "; if (column >= 0) { - ss << "col: " << column << " "; + ss << "column " << column << " "; } } - ss << "message: " << msg; + ss << "with message: " << msg; return ss.str(); } } diff --git a/src/core/Logger.cpp b/src/core/Logger.cpp index 1a3b6c6..17f55a6 100644 --- a/src/core/Logger.cpp +++ b/src/core/Logger.cpp @@ -149,7 +149,7 @@ void TerminalLogger::process(const Message &msg) os << t.color(Terminal::RED, true) << "error: "; break; case Severity::FATAL_ERROR: - os << t.color(Terminal::RED, true) << "error: "; + os << t.color(Terminal::RED, true) << "fatal: "; break; } os << t.reset(); diff --git a/src/core/Logger.hpp b/src/core/Logger.hpp index 260d010..a30374c 100644 --- a/src/core/Logger.hpp +++ b/src/core/Logger.hpp @@ -256,6 +256,22 @@ public: * the file name stack. * * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void debug(const std::string &msg, const std::string &file, int line = -1, int column = -1) + { + log(Severity::DEBUG, msg, file, line, column); + } + + /** + * Logs a debug message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. * @param line is the line in the above file at which the error occured. * Ignored if negative. * @param column is the column in the above file at which the error occured. @@ -263,7 +279,23 @@ public: */ void debug(const std::string &msg, int line = -1, int column = -1) { - log(Severity::DEBUG, msg, line, column); + debug(msg, currentFilename(), line, column); + } + + /** + * Logs a note. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void note(const std::string &msg, const std::string &file, int line = -1, int column = -1) + { + log(Severity::NOTE, msg, file, line, column); } /** @@ -278,7 +310,23 @@ public: */ void note(const std::string &msg, int line = -1, int column = -1) { - log(Severity::NOTE, msg, line, column); + note(msg, currentFilename(), line, column); + } + + /** + * Logs a warning. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void warning(const std::string &msg, const std::string &file, int line = -1, int column = -1) + { + log(Severity::WARNING, msg, file, line, column); } /** @@ -293,7 +341,23 @@ public: */ void warning(const std::string &msg, int line = -1, int column = -1) { - log(Severity::WARNING, msg, line, column); + warning(msg, currentFilename(), line, column); + } + + /** + * Logs an error message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void error(const std::string &msg, const std::string &file, int line = -1, int column = -1) + { + log(Severity::ERROR, msg, file, line, column); } /** @@ -308,7 +372,23 @@ public: */ void error(const std::string &msg, int line = -1, int column = -1) { - log(Severity::ERROR, msg, line, column); + error(msg, currentFilename(), line, column); + } + + /** + * Logs a fatal error. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void fatalError(const std::string &msg, const std::string &file, int line = -1, int column = -1) + { + log(Severity::FATAL_ERROR, msg, file, line, column); } /** @@ -323,7 +403,7 @@ public: */ void fatalError(const std::string &msg, int line = -1, int column = -1) { - log(Severity::FATAL_ERROR, msg, line, column); + fatalError(msg, currentFilename(), line, column); } /** diff --git a/src/core/Registry.cpp b/src/core/Registry.cpp new file mode 100644 index 0000000..1961b35 --- /dev/null +++ b/src/core/Registry.cpp @@ -0,0 +1,47 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/Logger.hpp> + +#include <core/parser/Parser.hpp> + +namespace ousia { + +using namespace parser; + +/* Class Registry */ + +void Registry::registerParser(parser::Parser *parser) +{ + parsers.push_back(parser); + for (const auto &mime : parser.mimetypes()) { + parserMimetypes.insert(std::make_pair(mime, parser)); + } +} + +Parser* Registry::getParserForMimetype(const std::string &mimetype) +{ + const auto it = parserMimetypes.find(mimetype); + if (it != parserMimetypes.end()) { + return it->second; + } + return nullptr; +} + +} + diff --git a/src/core/Registry.hpp b/src/core/Registry.hpp new file mode 100644 index 0000000..235e427 --- /dev/null +++ b/src/core/Registry.hpp @@ -0,0 +1,51 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_REGISTRY_HPP_ +#define _OUSIA_REGISTRY_HPP_ + +#include <map> +#include <vector> + +namespace ousia { + +// TODO: Add support for ScriptEngine type + +class Logger; + +namespace parser { +class Parser; +} + +class Registry { +private: + Logger &logger; + std::vector<parser::Parser*> parsers; + std::map<std::string, parser::Parser*> parserMimetypes; + +public: + Registry(Logger &logger) : logger(logger) {} + + void registerParser(parser::Parser *parser); + + parser::Parser *getParserForMimetype(std::string mimetype); +}; +} + +#endif /* _OUSIA_REGISTRY_HPP_ */ + diff --git a/src/core/parser/Parser.cpp b/src/core/parser/Parser.cpp index bc98ac0..23fd9b7 100644 --- a/src/core/parser/Parser.cpp +++ b/src/core/parser/Parser.cpp @@ -21,12 +21,13 @@ #include "Parser.hpp" namespace ousia { +namespace parser { -Rooted<Node> Parser::parse(const std::string &str, Handle<Node> context, Logger &logger) +Rooted<Node> Parser::parse(const std::string &str, ParserContext &ctx) { - std::istringstream is(str); - return parse(is, context, logger); + std::istringstream is{str}; + return parse(is, ctx); +} } - } diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index 74a1988..b8faf98 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -19,7 +19,7 @@ /** * @file Parser.hpp * - * Contains the abstract "Parser" class. Parsers are objects capable of reading + * Contains the abstract Parser class. Parsers are objects capable of reading * a certain file format and transforming it into a node. * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) @@ -32,11 +32,15 @@ #include <set> #include <string> -#include "Exceptions.hpp" -#include "Node.hpp" -#include "Logger.hpp" +#include <core/Exceptions.hpp> +#include <core/Node.hpp> +#include <core/Logger.hpp> +#include <core/Registry.hpp> + +#include "Scope.hpp" namespace ousia { +namespace parser { // TODO: Implement a proper Mimetype class @@ -49,15 +53,48 @@ public: }; /** + * Struct containing the objects that are passed to a parser instance. + */ +struct ParserContext { + /** + * Reference to the Scope instance that should be used within the parser. + */ + Scope &scope; + + /** + * Reference to the Registry instance that should be used within the parser. + */ + Registry ®istry; + + /** + * Reference to the Logger the parser should log any messages to. + */ + Logger &logger; + + /** + * Constructor of the ParserContext class. + * + * @param scope is a reference to the Scope instance that should be used to + * lookup names. + * @param registry is a reference at the Registry class, which allows to + * obtain references at parsers for other formats or script engine + * implementations. + * @param logger is a reference to the Logger instance that should be used + * to log error messages and warnings that occur while parsing the document. + */ + ParserContext(Scope &scope, Registry ®istry, Logger &logger) + : scope(scope), registry(registry), logger(logger){}; +}; + +/** * Abstract parser class. This class builds the basic interface that should be * used by any parser which reads data from an input stream and transforms it * into an Ousía node graph. */ class Parser { public: - - Parser() {}; - Parser(const Parser&) = delete; + Parser(){}; + Parser(const Parser &) = delete; /** * Returns a set containing all mime types supported by the parser. The mime @@ -78,18 +115,14 @@ public: * derived classes. * * @param is is a reference to the input stream that should be parsed. - * @param context defines the context in which the input stream should be - * parsed. The context represents the scope from which element names should - * be looked up. - * @param logger is a reference to the Logger instance that should be used - * to log error messages and warnings that occur while parsing the document. + * @param ctx is a reference to the context that should be used while + * parsing the document. * @return a reference to the node representing the subgraph that has been * created. The resulting node may point at not yet resolved entities, the * calling code will try to resolve these. If no valid node can be produced, * a corresponding LoggableException must be thrown by the parser. */ - virtual Rooted<Node> parse(std::istream &is, Handle<Node> context, - Logger &logger) = 0; + virtual Rooted<Node> parse(std::istream &is, ParserContext &ctx) = 0; /** * Parses the given string and returns a corresponding node for @@ -97,20 +130,17 @@ public: * derived classes. * * @param str is the string that should be parsed. - * @param context defines the context in which the input stream should be - * parsed. The context represents the scope from which element names should - * be looked up. - * @param logger is a reference to the Logger instance that should be used - * to log error messages and warnings that occur while parsing the document. + * @param ctx is a reference to the context that should be used while + * parsing the document. * @return a reference to the node representing the subgraph that has been * created. The resulting node may point at not yet resolved entities, the * calling code will try to resolve these. If no valid node can be produced, * a corresponding ParserException must be thrown by the parser. */ - Rooted<Node> parse(const std::string &str, Handle<Node> context, - Logger &logger); + Rooted<Node> parse(const std::string &str, ParserContext &ctx); }; } +} #endif /* _OUSIA_PARSER_HPP_ */ diff --git a/src/core/parser/Scope.cpp b/src/core/parser/Scope.cpp new file mode 100644 index 0000000..a60ade0 --- /dev/null +++ b/src/core/parser/Scope.cpp @@ -0,0 +1,26 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "Scope.hpp" + +namespace ousia { +namespace parser { + + +} +} diff --git a/src/core/parser/Scope.hpp b/src/core/parser/Scope.hpp new file mode 100644 index 0000000..0c37fbd --- /dev/null +++ b/src/core/parser/Scope.hpp @@ -0,0 +1,170 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_PARSER_SCOPE_H_ +#define _OUSIA_PARSER_SCOPE_H_ + +#include <core/Node.hpp> + +/** + * @file Scope.hpp + * + * Contains the Scope class used for resolving references based on the current + * parser state. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +namespace ousia { +namespace parser { + +class Scope; + +/** + * The ScopedScope class takes care of pushing a Node instance into the + * name resolution stack of a Scope instance and poping this node once the + * ScopedScope instance is deletes. This way you cannot forget to pop a Node + * from a Scope instance as this operation is performed automatically. + */ +class ScopedScope { +private: + /** + * Reference at the backing scope instance. + */ + Scope *scope; + +public: + /** + * Creates a new ScopedScope instance. + * + * @param scope is the backing Scope instance. + * @param node is the Node instance that should be poped onto the stack of + * the Scope instance. + */ + ScopedScope(Scope *scope, Handle<Node> node); + + /** + * Pops the Node given in the constructor form the stack of the Scope + * instance. + */ + ~ScopedScope(); + + /** + * Copying a ScopedScope is invalid. + */ + ScopedScope(const ScopedScope &) = delete; + + /** + * Move constructor of the ScopedScope class. + */ + ScopedScope(ScopedScope &&); + + /** + * Provides access at the underlying Scope instance. + */ + Scope *operator->() { return scope; } + + /** + * Provides access at the underlying Scope instance. + */ + Scope &operator*() { return *scope; } +}; + +/** + * Provides an interface for document parsers to resolve references based on the + * current position in the created document tree. The Scope class itself is + * represented as a chain of Scope objects where each element has a reference to + * a Node object attached to it. The descend method can be used to add a new + * scope element to the chain. + */ +class Scope { +private: + std::deque<Rooted<Node>> nodes; + +public: + /** + * Constructor of the Scope class. + * + * @param rootNode is the top-most Node from which elements can be looked + * up. + */ + Scope(Handle<Node> rootNode) { nodes.push_back(rootNode); } + + /** + * Returns a reference at the Manager instance all nodes belong to. + */ + Manager &getManager() { return getRoot()->getManager(); } + + /** + * Pushes a new node onto the scope. + * + * @param node is the node that should be used for local lookup. + */ + void push(Handle<Node> node) { nodes.push_back(node); } + + /** + * Removes the last pushed node from the scope. + */ + void pop() { nodes.pop_back(); } + + /** + * Returns a ScopedScope instance, which automatically pushes the given node + * into the Scope stack and pops it once the ScopedScope is destroyed. + */ + ScopedScope descend(Handle<Node> node) { return ScopedScope{this, node}; } + + /** + * Returns the top-most Node instance in the Scope hirarchy. + * + * @return a reference at the root node. + */ + Rooted<Node> getRoot() { return nodes.front(); } + + /** + * Returns the bottom-most Node instance in the Scope hirarchy, e.g. the + * node that was pushed last onto the stack. + * + * @return a reference at the leaf node. + */ + Rooted<Node> getLeaf() { return nodes.back(); } +}; + +/* Class ScopedScope -- inline declaration of some methods */ + +inline ScopedScope::ScopedScope(Scope *scope, Handle<Node> node) : scope(scope) +{ + scope->push(node); +} + +inline ScopedScope::~ScopedScope() +{ + if (scope) { + scope->pop(); + } +} + +inline ScopedScope::ScopedScope(ScopedScope &&s) +{ + scope = s.scope; + s.scope = nullptr; +} +} +} + +#endif /* _OUSIA_PARSER_SCOPE_H_ */ + diff --git a/src/plugins/mozjs/MozJsScriptEngine.cpp b/src/plugins/mozjs/MozJsScriptEngine.cpp index f269eb7..47394a0 100644 --- a/src/plugins/mozjs/MozJsScriptEngine.cpp +++ b/src/plugins/mozjs/MozJsScriptEngine.cpp @@ -27,6 +27,7 @@ namespace ousia { namespace script { +namespace mozjs { /* * Some important links to the SpiderMonkey (mozjs) documentation: @@ -504,4 +505,5 @@ MozJsScriptEngineScope *MozJsScriptEngine::createScope() { } } } +} diff --git a/src/plugins/mozjs/MozJsScriptEngine.hpp b/src/plugins/mozjs/MozJsScriptEngine.hpp index 72e8ad7..385c676 100644 --- a/src/plugins/mozjs/MozJsScriptEngine.hpp +++ b/src/plugins/mozjs/MozJsScriptEngine.hpp @@ -46,6 +46,7 @@ typedef Rooted<Value> RootedValue; namespace ousia { namespace script { +namespace mozjs { class MozJsScriptEngineScope; @@ -121,6 +122,7 @@ public: }; } } +} #endif /* _MOZ_JS_SCRIPT_ENGINE_HPP_ */ diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp index f9bb43e..f6891a8 100644 --- a/src/plugins/xml/XmlParser.cpp +++ b/src/plugins/xml/XmlParser.cpp @@ -16,25 +16,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <iostream> + #include <expat.h> #include "XmlParser.hpp" namespace ousia { - -/** - * The XmlParserData struct holds all information relevant to the expat callback - * functions. - */ -struct XmlParserData { - Rooted<Node> context; - Logger &logger; - - XmlParserData(Handle<Node> context, Logger &logger) - : context(context), logger(logger) - { - } -}; +namespace parser { +namespace xml { /** * Wrapper class around the XML_Parser pointer which safely frees it whenever @@ -55,13 +45,11 @@ public: * * @param encoding is the protocol-defined encoding passed to expat (or * nullptr if expat should determine the encoding by itself). - * @param namespaceSeparator is the separator used to separate the namespace - * components in the node name given by expat. */ - ScopedExpatXmlParser(const XML_Char *encoding, XML_Char namespaceSeparator) + ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) { - parser = XML_ParserCreateNS("UTF-8", ':'); + parser = XML_ParserCreate(encoding); if (!parser) { throw ParserException{ "Internal error: Could not create expat XML parser!"}; @@ -85,20 +73,43 @@ public: XML_Parser operator&() { return parser; } }; +static void xmlStartElementHandler(void *userData, const XML_Char *name, + const XML_Char **attrs) +{ + std::cout << "start tag: " << name << std::endl; + const XML_Char **attr = attrs; + while (*attr) { + std::cout << "\t" << *attr; + attr++; + std::cout << " -> " << *attr << std::endl; + attr++; + } +} + +static void xmlEndElementHandler(void *userData, const XML_Char *name) { + std::cout << "end tag: " << name << std::endl; +} + + +static void xmlCharacterDataHandler(void *userData, const XML_Char *s, int len) { + std::cout << "\tdata: " << std::string(s, len) << std::endl; +} + std::set<std::string> XmlParser::mimetypes() { return std::set<std::string>{{"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"}}; } -Rooted<Node> XmlParser::parse(std::istream &is, Handle<Node> context, - Logger &logger) +Rooted<Node> XmlParser::parse(std::istream &is, ParserContext &ctx) { // Create the parser object - ScopedExpatXmlParser p{"UTF-8", ':'}; + ScopedExpatXmlParser p{"UTF-8"}; + XML_SetUserData(&p, &ctx); - // Set the callback functions, provide a pointer to a XmlParserData instance - // as user data. - XmlParserData ctx{context, logger}; + // Set the callback functions + XML_SetStartElementHandler(&p, xmlStartElementHandler); + XML_SetEndElementHandler(&p, xmlEndElementHandler); + XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler); // Feed data into expat while there is data to process const std::streamsize BUFFER_SIZE = 4096; // TODO: Move to own header? @@ -118,8 +129,8 @@ Rooted<Node> XmlParser::parse(std::istream &is, Handle<Node> context, const int column = XML_GetCurrentColumnNumber(&p); const XML_Error code = XML_GetErrorCode(&p); const std::string msg = std::string{XML_ErrorString(code)}; - logger.error("XML: " + msg, line, column); - break; + throw ParserException{"XML Syntax Error: " + msg, line, column, + false}; } // Abort once there are no more bytes in the stream @@ -131,4 +142,6 @@ Rooted<Node> XmlParser::parse(std::istream &is, Handle<Node> context, return nullptr; } } +} +} diff --git a/src/plugins/xml/XmlParser.hpp b/src/plugins/xml/XmlParser.hpp index f6fb060..b19af1e 100644 --- a/src/plugins/xml/XmlParser.hpp +++ b/src/plugins/xml/XmlParser.hpp @@ -28,9 +28,11 @@ #ifndef _OUSIA_XML_PARSER_HPP_ #define _OUSIA_XML_PARSER_HPP_ -#include <core/Parser.hpp> +#include <core/parser/Parser.hpp> namespace ousia { +namespace parser { +namespace xml { /** * The XmlParser class implements parsing the various types of Ousía XML @@ -48,15 +50,19 @@ public: /** * Parses the given input stream as XML file and returns the parsed - * top-level node. Throws + * top-level node. * * @param is is the input stream that will be parsed. + * @param ctx is a reference to the ParserContext instance that should be + * used. */ - Rooted<Node> parse(std::istream &is, Handle<Node> context, - Logger &logger) override; + Rooted<Node> parse(std::istream &is, ParserContext &ctx) override; using Parser::parse; }; + +} +} } #endif /* _OUSIA_XML_PARSER_HPP_ */ diff --git a/src/plugins/xml/XmlStates.cpp b/src/plugins/xml/XmlStates.cpp new file mode 100644 index 0000000..ec0f002 --- /dev/null +++ b/src/plugins/xml/XmlStates.cpp @@ -0,0 +1,53 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "XmlStates.hpp" + +namespace ousia { +namespace parser { +namespace xml { + +std::set<std::string> StateStack::expectedCommands(State state) +{ + std::set<std::string> res; + for (const auto &v: handlers) { + if (v.second.parentStates.count(state)) { + res.insert(v.first); + } + } + return res; +} + +void StateStack::start(std::string tagName, char **attrs) { + // Fetch the current handler and the current state + const Handler *h = stack.empty() ? nullptr : stack.top(); + const State currentState = h ? State::NONE : h->state; + + // Fetch all handlers for the given tagName + auto range = handlers.equal_range(tagName); + if (range->first == handlers.end()) { + // There are no handlers registered for this tag name -- check whether + // the current handler supports arbitrary children + if (h && h->arbitraryChildren) + } +} + +} +} +} + diff --git a/src/plugins/xml/XmlStates.hpp b/src/plugins/xml/XmlStates.hpp new file mode 100644 index 0000000..70e95f4 --- /dev/null +++ b/src/plugins/xml/XmlStates.hpp @@ -0,0 +1,301 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_XML_STATES_HPP_ +#define _OUSIA_XML_STATES_HPP_ + +#include <cstdint> + +#include <map> +#include <set> +#include <stack> +#include <vector> + +namespace ousia { +namespace parser { + +class Scope; +class Registry; +class Logger; + +namespace xml { + +/** + * The State class represents all states the XML parser can be in. These states + * mostly represent single tags. + */ +enum class State : uint8_t { + /* Meta states */ + ALL = -1, + + /* Start state */ + NONE, + + /* Special commands */ + INCLUDE, + INLINE, + + /* Document states */ + DOCUMENT, + HEAD, + BODY, + + /* Domain description states */ + DOMAIN, + + /* Type system states */ + TYPESYSTEM, + TYPE, + TYPE_ELEM +}; + +/** + * The handler class provides a context for handling an XML tag. It has to be + * overridden and registered in the StateStack class to form handlers for + * concrete XML tags. + */ +class Handler { +private: + Rooted<Node> node; + +protected: + void setNode(Handle<Node> node) { this->node = node; } + +public: + /** + * Reference to the ParserContext instance that should be used to resolve + * references to nodes in the Graph. + */ + const ParserContext &ctx; + + /** + * Contains the name of the tag that is being handled. + */ + const std::string name; + + /** + * Contains the current state of the state machine. + */ + const State state; + + /** + * Contains the state of the state machine when the parent node was handled. + */ + const State parentState; + + /** + * Set to true if the tag that is being handled is not the tag that was + * specified in the state machine but a child tag of that tag. + */ + const bool isChild; + + /** + * Constructor of the Handler class. + * + * @param ctx is the parser context the handler should be executed in. + * @param name is the name of the string. + * @param state is the state this handler was called for. + * @param parentState is the state of the parent command. + * @param isChild specifies whether this handler was called not for the + * command that was specified in the state machine but a child command. + */ + Handler(const ParserContext &ctx, std::string name, State state, + State parentState, bool isChild) + : ctx(ctx), + name(std::move(name)), + state(state), + parentState(parentState), + isChild(isChild){}; + + /** + * Virtual destructor. + */ + virtual ~Handler(); + + /** + * Returns the node instance that was created by the handler. + * + * @return the Node instance created by the handler. May be nullptr if no + * Node was created. + */ + Rooted<Node> getNode() { return node; } + + /** + * Called when the command that was specified in the constructor is + * instanciated. + * + * @param attrs contains the attributes that were specified for the command. + * TODO: Replace with StructInstance! + */ + virtual void start(char **attrs) = 0; + + /** + * Called whenever the command for which this handler + */ + virtual void end() = 0; + + /** + * Called whenever raw data (int the form of a string) is available for the + * Handler instance. + * + * TODO: Replace with std::string? + * + * @param data is a pointer at the character data that is available for the + * Handler instance. + */ + virtual void data(char *data, int len){}; + + /** + * Called whenever a direct child element was created and has ended. + * + * @param handler is a reference at the child Handler instance. + */ + virtual void child(Handler *handler){}; +}; + +/** + * HandlerConstructor is a function pointer type used to create concrete + * instances of the Handler class. + */ +using HandlerConstructor = Handler *(*)(const ParserContext &ctx, + std::string name, State state, + State parentState, bool isChild); + +/** + * The StateStack class is a pushdown automaton responsible for turning a + * command stream into a tree of Node instances. + */ +class StateStack { +public: + /** + * Used internlly by StateStack to store Handler instances and parameters + * from HandlerDescriptor that are not stored in the Handler instance + * itself. Instances of the HandlerInstance class can be created using the + * HandlerDescriptor "create" method. + */ + struct HandlerInstance { + /** + * Pointer at the actual handler instance. + */ + std::unique_ptr<Handler> handler; + + /** + * Value of the arbitraryChildren flag stored in the HandlerDescriptor + * class. + */ + const bool arbitraryChildren; + + HandlerInstance(std::unique_ptr<Handler> handler, + bool arbitraryChildren) + : handler(handler), arbitraryChildren(arbitraryChildren) + { + } + } + + /** + * Used internally by StateStack to store the pushdown automaton + * description. + */ + struct HandlerDescriptor { + /** + * The valid parent states. + */ + const std::set<State> parentStates; + + /** + * Pointer at a function which creates a new concrete Handler instance. + */ + const HandlerConstructor ctor; + + /** + * The target state for the registered handler. + */ + const State targetState; + + /** + * Set to true if this handler instance allows arbitrary children as + * tags. + */ + const bool arbitraryChildren; + + HandlerDescriptor(std::set<State> parentStates, HandlerConstructor ctor, + State targetState, bool arbitraryChildren = false) + : parentStates(std::move(parentStates)), + ctor(constructor), + targetState(targetState), + arbitraryChildren(arbitraryChildren) + { + } + + HandlerInstance create(const ParserContext &ctx, std::string name, + State parentState, bool isChild) + { + return HandlerInstance{ + ctor(ctx, name, targetState, parentState, isChild), + arbitraryChildren}; + } + }; + +private: + /** + * Map containing all registered command names and the corresponding + * handler + * descriptor. + */ + const std::multimap<std::string, HandlerDescriptor> handlers; + + /** + * Reference at the parser context. + */ + const ParserContext &ctx; + + /** + * Internal stack used for managing the currently active Handler instances. + */ + std::stack<HandlerInstance> stack; + + /** + * Used internally to get all expected command names for the given state + * (does not work if the current Handler instance allows arbitrary + * children). This function is used to build error messages. + * + * @param state is the state for which all expected command names should be + * returned. + */ + std::set<std::string> expectedCommands(State state); + +public: + /** + * Creates a new instance of the StateStack class. + * + * @param handlers is a map containing the command names and the + * corresponding HandlerDescriptor instances. + */ + StateStack(const ParserContext &ctx, + std::multimap<std::string, HandlerDescriptor> handlers) + : handlers(std::move(handlers)), + ctx(ctx), + currentState(State::NONE), + arbitraryChildren(false); +}; +} +} +} + +#endif /* _OUSIA_XML_STATES_HPP_ */ + |