diff options
Diffstat (limited to 'src/core/parser')
| -rw-r--r-- | src/core/parser/Parser.cpp | 32 | ||||
| -rw-r--r-- | src/core/parser/Parser.hpp | 116 | ||||
| -rw-r--r-- | src/core/parser/XmlParser.cpp | 134 | ||||
| -rw-r--r-- | src/core/parser/XmlParser.hpp | 63 | 
4 files changed, 148 insertions, 197 deletions
| diff --git a/src/core/parser/Parser.cpp b/src/core/parser/Parser.cpp new file mode 100644 index 0000000..bc98ac0 --- /dev/null +++ b/src/core/parser/Parser.cpp @@ -0,0 +1,32 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include "Parser.hpp" + +namespace ousia { + +Rooted<Node> Parser::parse(const std::string &str, Handle<Node> context, Logger &logger) +{ +	std::istringstream is(str); +	return parse(is, context, logger); +} + +} + diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp new file mode 100644 index 0000000..74a1988 --- /dev/null +++ b/src/core/parser/Parser.hpp @@ -0,0 +1,116 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Parser.hpp + * + * Contains the abstract "Parser" class. Parsers are objects capable of reading + * a certain file format and transforming it into a node. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_HPP_ +#define _OUSIA_PARSER_HPP_ + +#include <istream> +#include <set> +#include <string> + +#include "Exceptions.hpp" +#include "Node.hpp" +#include "Logger.hpp" + +namespace ousia { + +// TODO: Implement a proper Mimetype class + +/** + * Exception to be thrown whenever an error occurs inside a specific parser. + */ +class ParserException : public LoggableException { +public: +	using LoggableException::LoggableException; +}; + +/** + * Abstract parser class. This class builds the basic interface that should be + * used by any parser which reads data from an input stream and transforms it + * into an Ousía node graph. + */ +class Parser { +public: + +	Parser() {}; +	Parser(const Parser&) = delete; + +	/** +	 * Returns a set containing all mime types supported by the parser. The mime +	 * types are used to describe the type of the document that is read by the +	 * parser. The default implementation returns an empty set. This method +	 * should be overridden by derived classes. +	 * +	 * @return a set containing the string value of the supported mime types. +	 */ +	virtual std::set<std::string> mimetypes() +	{ +		return std::set<std::string>{}; +	}; + +	/** +	 * Parses the given input stream and returns a corresponding node for +	 * inclusion in the document graph. This method should be overridden by +	 * derived classes. +	 * +	 * @param is is a reference to the input stream that should be parsed. +	 * @param context defines the context in which the input stream should be +	 * parsed. The context represents the scope from which element names should +	 * be looked up. +	 * @param logger is a reference to the Logger instance that should be used +	 * to log error messages and warnings that occur while parsing the document. +	 * @return a reference to the node representing the subgraph that has been +	 * created. The resulting node may point at not yet resolved entities, the +	 * calling code will try to resolve these. If no valid node can be produced, +	 * a corresponding LoggableException must be thrown by the parser. +	 */ +	virtual Rooted<Node> parse(std::istream &is, Handle<Node> context, +	                           Logger &logger) = 0; + +	/** +	 * Parses the given string and returns a corresponding node for +	 * inclusion in the document graph. This method should be overridden by +	 * derived classes. +	 * +	 * @param str is the string that should be parsed. +	 * @param context defines the context in which the input stream should be +	 * parsed. The context represents the scope from which element names should +	 * be looked up. +	 * @param logger is a reference to the Logger instance that should be used +	 * to log error messages and warnings that occur while parsing the document. +	 * @return a reference to the node representing the subgraph that has been +	 * created. The resulting node may point at not yet resolved entities, the +	 * calling code will try to resolve these. If no valid node can be produced, +	 * a corresponding ParserException must be thrown by the parser. +	 */ +	Rooted<Node> parse(const std::string &str, Handle<Node> context, +	                   Logger &logger); +}; +} + +#endif /* _OUSIA_PARSER_HPP_ */ + diff --git a/src/core/parser/XmlParser.cpp b/src/core/parser/XmlParser.cpp deleted file mode 100644 index f9bb43e..0000000 --- a/src/core/parser/XmlParser.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* -    Ousía -    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel - -    This program is free software: you can redistribute it and/or modify -    it under the terms of the GNU General Public License as published by -    the Free Software Foundation, either version 3 of the License, or -    (at your option) any later version. - -    This program is distributed in the hope that it will be useful, -    but WITHOUT ANY WARRANTY; without even the implied warranty of -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -    GNU General Public License for more details. - -    You should have received a copy of the GNU General Public License -    along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <expat.h> - -#include "XmlParser.hpp" - -namespace ousia { - -/** - * The XmlParserData struct holds all information relevant to the expat callback - * functions. - */ -struct XmlParserData { -	Rooted<Node> context; -	Logger &logger; - -	XmlParserData(Handle<Node> context, Logger &logger) -	    : context(context), logger(logger) -	{ -	} -}; - -/** - * Wrapper class around the XML_Parser pointer which safely frees it whenever - * the scope is left (e.g. because an exception was thrown). - */ -class ScopedExpatXmlParser { -private: -	/** -	 * Internal pointer to the XML_Parser instance. -	 */ -	XML_Parser parser; - -public: -	/** -	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS -	 * from the expat library. Throws a parser exception if the XML parser -	 * cannot be initialized. -	 * -	 * @param encoding is the protocol-defined encoding passed to expat (or -	 * nullptr if expat should determine the encoding by itself). -	 * @param namespaceSeparator is the separator used to separate the namespace -	 * components in the node name given by expat. -	 */ -	ScopedExpatXmlParser(const XML_Char *encoding, XML_Char namespaceSeparator) -	    : parser(nullptr) -	{ -		parser = XML_ParserCreateNS("UTF-8", ':'); -		if (!parser) { -			throw ParserException{ -			    "Internal error: Could not create expat XML parser!"}; -		} -	} - -	/** -	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance. -	 */ -	~ScopedExpatXmlParser() -	{ -		if (parser) { -			XML_ParserFree(parser); -			parser = nullptr; -		} -	} - -	/** -	 * Returns the XML_Parser pointer. -	 */ -	XML_Parser operator&() { return parser; } -}; - -std::set<std::string> XmlParser::mimetypes() -{ -	return std::set<std::string>{{"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"}}; -} - -Rooted<Node> XmlParser::parse(std::istream &is, Handle<Node> context, -                              Logger &logger) -{ -	// Create the parser object -	ScopedExpatXmlParser p{"UTF-8", ':'}; - -	// Set the callback functions, provide a pointer to a XmlParserData instance -	// as user data. -	XmlParserData ctx{context, logger}; - -	// Feed data into expat while there is data to process -	const std::streamsize BUFFER_SIZE = 4096;  // TODO: Move to own header? -	while (true) { -		// Fetch a buffer from expat for the input data -		char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE)); -		if (!buf) { -			throw ParserException{"Internal error: XML parser out of memory!"}; -		} - -		// Read the input data from the stream -		const std::streamsize bytesRead = is.read(buf, BUFFER_SIZE).gcount(); - -		// Parse the data and handle any XML error -		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { -			const int line = XML_GetCurrentLineNumber(&p); -			const int column = XML_GetCurrentColumnNumber(&p); -			const XML_Error code = XML_GetErrorCode(&p); -			const std::string msg = std::string{XML_ErrorString(code)}; -			logger.error("XML: " + msg, line, column); -			break; -		} - -		// Abort once there are no more bytes in the stream -		if (bytesRead == 0) { -			break; -		} -	} - -	return nullptr; -} -} - diff --git a/src/core/parser/XmlParser.hpp b/src/core/parser/XmlParser.hpp deleted file mode 100644 index f6fb060..0000000 --- a/src/core/parser/XmlParser.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/* -    Ousía -    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel - -    This program is free software: you can redistribute it and/or modify -    it under the terms of the GNU General Public License as published by -    the Free Software Foundation, either version 3 of the License, or -    (at your option) any later version. - -    This program is distributed in the hope that it will be useful, -    but WITHOUT ANY WARRANTY; without even the implied warranty of -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -    GNU General Public License for more details. - -    You should have received a copy of the GNU General Public License -    along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file XmlParser.hpp - * - * Contains the parser responsible for reading Ousía XML Documents (extension - * oxd) and Ousía XML Modules (extension oxm). - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_XML_PARSER_HPP_ -#define _OUSIA_XML_PARSER_HPP_ - -#include <core/Parser.hpp> - -namespace ousia { - -/** - * The XmlParser class implements parsing the various types of Ousía XML - * documents using the expat stream XML parser. - */ -class XmlParser : public Parser { -public: -	/** -	 * Returns the mimetype supported by the XmlParser which is -	 * "text/vnd.ousia.oxm" and "text/vnd.ousia.oxd". -	 * -	 * @return a list containing the mimetype supported by Ousía. -	 */ -	std::set<std::string> mimetypes() override; - -	/** -	 * Parses the given input stream as XML file and returns the parsed -	 * top-level node. Throws -	 * -	 * @param is is the input stream that will be parsed. -	 */ -	Rooted<Node> parse(std::istream &is, Handle<Node> context, -	                   Logger &logger) override; - -	using Parser::parse; -}; -} - -#endif /* _OUSIA_XML_PARSER_HPP_ */ - | 
