diff options
| author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-15 21:32:54 +0100 | 
|---|---|---|
| committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-15 21:32:54 +0100 | 
| commit | 8e5e08c4f293434585d2a88f7f331f8ce49b67b9 (patch) | |
| tree | fa82a937b1ea80f45d7955938c333f68f8a0f3f6 | |
| parent | 2544749215bc2465bfeca431e271110ca86d8a83 (diff) | |
| parent | 40f4666c43211d9071a827ad8a2524688e7f678f (diff) | |
Merge branch 'astoecke_parser_stack_new'
Conflicts:
	application/src/core/parser/stack/DocumentHandler.cpp
	application/src/core/parser/stack/DocumentHandler.hpp
71 files changed, 6378 insertions, 2805 deletions
| diff --git a/CMakeLists.txt b/CMakeLists.txt index ab31dab..ec1bb4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,18 @@ FIND_PACKAGE(Boost COMPONENTS system filesystem program_options REQUIRED)  SET(UTF8CPP_INCLUDE_DIR "lib/utf8cpp")  ################################################################################ +# Check the gcc version                                                        # +################################################################################ + +IF(CMAKE_COMPILER_IS_GNUCC) +	EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} -dumpversion +		OUTPUT_VARIABLE GCC_VERSION) +	IF(GCC_VERSION VERSION_LESS 4.8) +		ERROR("This projects requires at last GCC 4.8 or newer to be built") +	ENDIF() +ENDIF() + +################################################################################  # Inclusion of doxygen                                                         #  ################################################################################ @@ -115,9 +127,7 @@ ADD_DEFINITIONS(  )  ADD_LIBRARY(ousia_core -	src/core/CodeTokenizer  	src/core/Registry -	src/core/Tokenizer  	src/core/XML  	src/core/RangeSet  	src/core/common/Argument @@ -152,12 +162,17 @@ ADD_LIBRARY(ousia_core  	src/core/parser/Parser  	src/core/parser/ParserContext  	src/core/parser/ParserScope -	src/core/parser/ParserStack -	src/core/parser/ParserState +	src/core/parser/stack/Callbacks  	src/core/parser/stack/DocumentHandler  	src/core/parser/stack/DomainHandler +	src/core/parser/stack/GenericParserStates +	src/core/parser/stack/Handler  	src/core/parser/stack/ImportIncludeHandler +	src/core/parser/stack/State +	src/core/parser/stack/Stack  	src/core/parser/stack/TypesystemHandler +	src/core/parser/utils/Tokenizer +	src/core/parser/utils/TokenTrie  	src/core/resource/Resource  	src/core/resource/ResourceLocator  	src/core/resource/ResourceManager @@ -165,14 +180,39 @@ ADD_LIBRARY(ousia_core  #	src/core/script/ScriptEngine  ) -ADD_LIBRARY(ousia_css -	src/plugins/css/CSSParser +# Format libraries + +#ADD_LIBRARY(ousia_css +#	src/plugins/css/CodeTokenizer +#	src/plugins/css/Tokenizer +#	src/plugins/css/CSSParser +#) + +#TARGET_LINK_LIBRARIES(ousia_css +#	ousia_core +#) + +ADD_LIBRARY(ousia_osml +	src/formats/osml/OsmlStreamParser +) + +TARGET_LINK_LIBRARIES(ousia_osml +	ousia_core +) + +ADD_LIBRARY(ousia_osxml +	src/formats/osxml/OsxmlAttributeLocator +	src/formats/osxml/OsxmlEventParser +	src/formats/osxml/OsxmlParser  ) -TARGET_LINK_LIBRARIES(ousia_css +TARGET_LINK_LIBRARIES(ousia_osxml  	ousia_core +	${EXPAT_LIBRARIES}  ) +# Resource locators +  ADD_LIBRARY(ousia_filesystem  	src/plugins/filesystem/FileLocator  	src/plugins/filesystem/SpecialPaths @@ -183,6 +223,8 @@ TARGET_LINK_LIBRARIES(ousia_filesystem  	${Boost_LIBRARIES}  ) +# Output libraries +  ADD_LIBRARY(ousia_html  	src/plugins/html/DemoOutput  ) @@ -191,27 +233,6 @@ TARGET_LINK_LIBRARIES(ousia_html  	ousia_core  ) -ADD_LIBRARY(ousia_xml -	src/plugins/xml/XmlOutput -	src/plugins/xml/XmlParser -) - -TARGET_LINK_LIBRARIES(ousia_xml -	ousia_core -	${EXPAT_LIBRARIES} -) - -ADD_LIBRARY(ousia_osdm -	src/formats/osdm/DynamicTokenizer -	src/formats/osdm/TokenTrie -	src/formats/osdm/OsdmStreamParser -) - -TARGET_LINK_LIBRARIES(ousia_osdm -	ousia_core -) - -  #ADD_LIBRARY(ousia_mozjs  #	src/plugins/mozjs/MozJsScriptEngine  #) @@ -223,18 +244,17 @@ TARGET_LINK_LIBRARIES(ousia_osdm  # Command line interface -ADD_EXECUTABLE(ousia -	src/cli/Main -) +#ADD_EXECUTABLE(ousia +#	src/cli/Main +#) -TARGET_LINK_LIBRARIES(ousia -	ousia_core -	ousia_css -	ousia_filesystem -	ousia_html -	ousia_xml -	${Boost_LIBRARIES} -) +#TARGET_LINK_LIBRARIES(ousia +#	ousia_core +#	ousia_filesystem +#	ousia_html +#	ousia_xml +#	${Boost_LIBRARIES} +#)  # If testing is enabled, build the unit tests  IF(TEST) @@ -245,10 +265,8 @@ IF(TEST)  	)  	ADD_EXECUTABLE(ousia_test_core -		test/core/CodeTokenizerTest  		test/core/RangeSetTest  		test/core/RegistryTest -		test/core/TokenizerTest  		test/core/XMLTest  		test/core/common/ArgumentTest  		test/core/common/CharReaderTest @@ -274,8 +292,10 @@ IF(TEST)  		test/core/model/StyleTest  		test/core/model/TypesystemTest  		test/core/parser/ParserScopeTest -		test/core/parser/ParserStackTest -		test/core/parser/ParserStateTest +		test/core/parser/stack/StackTest +		test/core/parser/stack/StateTest +		test/core/parser/utils/TokenizerTest +		test/core/parser/utils/TokenTrieTest  		test/core/resource/ResourceLocatorTest  		test/core/resource/ResourceRequestTest  #		test/core/script/FunctionTest @@ -298,15 +318,17 @@ IF(TEST)  		ousia_filesystem  	) -	ADD_EXECUTABLE(ousia_test_css -		test/plugins/css/CSSParserTest -	) +#	ADD_EXECUTABLE(ousia_test_css +#		test/plugins/css/Tokenizer +#		test/plugins/css/CodeTokenizerTest +#		test/plugins/css/CSSParserTest +#	) -	TARGET_LINK_LIBRARIES(ousia_test_css -		${GTEST_LIBRARIES} -		ousia_core -		ousia_css -	) +#	TARGET_LINK_LIBRARIES(ousia_test_css +#		${GTEST_LIBRARIES} +#		ousia_core +#		ousia_css +#	)  	ADD_EXECUTABLE(ousia_test_html  		test/plugins/html/DemoOutputTest @@ -318,27 +340,26 @@ IF(TEST)  		ousia_html  	) -	ADD_EXECUTABLE(ousia_test_xml -		test/plugins/xml/XmlParserTest +	ADD_EXECUTABLE(ousia_test_osml +		test/formats/osml/OsmlStreamParserTest  	) -	TARGET_LINK_LIBRARIES(ousia_test_xml +	TARGET_LINK_LIBRARIES(ousia_test_osml  		${GTEST_LIBRARIES}  		ousia_core -		ousia_xml -		ousia_filesystem +		ousia_osml  	) -	ADD_EXECUTABLE(ousia_test_osdm -		test/formats/osdm/TokenTrieTest -		test/formats/osdm/DynamicTokenizerTest -		test/formats/osdm/OsdmStreamParserTest +	ADD_EXECUTABLE(ousia_test_osxml +		test/formats/osxml/OsxmlEventParserTest +		test/formats/osxml/OsxmlParserTest  	) -	TARGET_LINK_LIBRARIES(ousia_test_osdm +	TARGET_LINK_LIBRARIES(ousia_test_osxml  		${GTEST_LIBRARIES}  		ousia_core -		ousia_osdm +		ousia_osxml +		ousia_filesystem  	)  #	ADD_EXECUTABLE(ousia_test_mozjs @@ -354,10 +375,10 @@ IF(TEST)  	# Register the unit tests  	ADD_TEST(ousia_test_core ousia_test_core)  	ADD_TEST(ousia_test_filesystem ousia_test_filesystem) -	ADD_TEST(ousia_test_css ousia_test_css) +#	ADD_TEST(ousia_test_css ousia_test_css)  	ADD_TEST(ousia_test_html ousia_test_html) -	ADD_TEST(ousia_test_xml ousia_test_xml) -	ADD_TEST(ousia_test_osdm ousia_test_osdm) +	ADD_TEST(ousia_test_osml ousia_test_osml) +	ADD_TEST(ousia_test_osxml ousia_test_osxml)  #	ADD_TEST(ousia_test_mozjs ousia_test_mozjs)  ENDIF() @@ -375,6 +396,6 @@ INSTALL(DIRECTORY data/ DESTINATION share/ousia  				OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE  ) -INSTALL(TARGETS ousia -		RUNTIME DESTINATION bin -) +#INSTALL(TARGETS ousia +#		RUNTIME DESTINATION bin +#) diff --git a/contrib/gtksourceview-3.0/language-specs/ousia.lang b/contrib/gtksourceview-3.0/language-specs/ousia.lang index 4cefac3..7a91d1e 100644 --- a/contrib/gtksourceview-3.0/language-specs/ousia.lang +++ b/contrib/gtksourceview-3.0/language-specs/ousia.lang @@ -24,7 +24,7 @@  <language id="ousia" _name="Ousia" version="2.0" _section="Markup">    <metadata>      <property name="mimetypes">text/vnd.ousia</property> -    <property name="globs">*.osdm</property> +    <property name="globs">*.osml</property>      <property name="line-comment-start">%</property>      <property name="block-comment-start">%{</property>      <property name="block-comment-end">}%</property> diff --git a/contrib/test.osdm b/contrib/test.osml index 100bc77..100bc77 100644 --- a/contrib/test.osdm +++ b/contrib/test.osml diff --git a/src/core/common/Argument.cpp b/src/core/common/Argument.cpp index bfe74a4..b10fad3 100644 --- a/src/core/common/Argument.cpp +++ b/src/core/common/Argument.cpp @@ -302,10 +302,10 @@ bool Arguments::validateMap(Variant::mapType &map, Logger &logger,  		} else {  			if (ignoreUnknown) {  				logger.note(std::string("Ignoring argument \"") + e.first + -				            std::string("\"")); +				            std::string("\""), e.second);  			} else {  				logger.error(std::string("Unknown argument \"") + e.first + -				             std::string("\"")); +				             std::string("\""), e.second);  				ok = false;  			}  		} diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index 563fe2a..f8b53c6 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -18,19 +18,13 @@  #include <algorithm>  #include <cctype> -#include <limits>  #include <string>  #include "Utils.hpp" +#include "WhitespaceHandler.hpp"  namespace ousia { -std::string Utils::trim(const std::string &s) -{ -	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace); -	return s.substr(bounds.first, bounds.second - bounds.first); -} -  bool Utils::isIdentifier(const std::string &name)  {  	bool first = true; @@ -43,7 +37,27 @@ bool Utils::isIdentifier(const std::string &name)  		}  		first = false;  	} -	return true; +	return !first; +} + +bool Utils::isIdentifierOrEmpty(const std::string &name) +{ +	return name.empty() || isIdentifier(name); +} + +bool Utils::isNamespacedIdentifier(const std::string &name) +{ +	bool first = true; +	for (char c : name) { +		if (first && !isIdentifierStartCharacter(c)) { +			return false; +		} +		if (!first && (!isIdentifierCharacter(c) && c != ':')) { +			return false; +		} +		first = (c == ':'); +	} +	return !first;  }  bool Utils::hasNonWhitepaceChar(const std::string &s) @@ -94,5 +108,29 @@ std::string Utils::extractFileExtension(const std::string &filename)  	}  	return std::string{};  } + +std::string Utils::trim(const std::string &s) +{ +	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace); +	return s.substr(bounds.first, bounds.second - bounds.first); +} + +std::string Utils::collapse(const std::string &s) +{ +	CollapsingWhitespaceHandler h; +	appendToWhitespaceHandler(h, s, 0); +	return h.toString(); +} + +bool Utils::startsWith(const std::string &s, const std::string &prefix) +{ +	return prefix.size() <= s.size() && s.substr(0, prefix.size()) == prefix; +} + +bool Utils::endsWith(const std::string &s, const std::string &suffix) +{ +	return suffix.size() <= s.size() && +	       s.substr(s.size() - suffix.size(), suffix.size()) == suffix; +}  } diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 2c8a5b3..b5a54fc 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -74,16 +74,45 @@ public:  	}  	/** -	 * Returns true if the given character is in [A-Za-z][A-Za-z0-9_-]* +	 * Returns true if the given string is in +	 * \code{.txt} +	 * [A-Za-z][A-Za-z0-9_-]* +	 * \endCode +	 * +	 * @param name is the string that should be tested. +	 * @return true if the string matches the regular expression given above,  +	 * false otherwise.  	 */  	static bool isIdentifier(const std::string &name);  	/** +	 * Returns true if the given string is an identifier or an empty string. +	 */ +	static bool isIdentifierOrEmpty(const std::string &name); + +	/** +	 * Returns true if the given string is in +	 * \code{.txt} +	 * ([A-Za-z][A-Za-z0-9_-]*)(:[A-Za-z][A-Za-z0-9_-]*)* +	 * \endCode +	 * +	 * @param name is the string that should be tested. +	 * @return true if the string matches the regular expression given above,  +	 * false otherwise. +	 */ +	static bool isNamespacedIdentifier(const std::string &name); + +	/** +	 * Returns true if the given character is a linebreak character. +	 */ +	static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); } + +	/**  	 * Returns true if the given character is a whitespace character.  	 */  	static bool isWhitespace(const char c)  	{ -		return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); +		return (c == ' ') || (c == '\t') || isLinebreak(c);  	}  	/** @@ -95,11 +124,6 @@ public:  	static bool hasNonWhitepaceChar(const std::string &s);  	/** -	 * Returns true if the given character is a whitespace character. -	 */ -	static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); } - -	/**  	 * Removes whitespace at the beginning and the end of the given string.  	 *  	 * @param s is the string that should be trimmed. @@ -120,8 +144,25 @@ public:  	template <class T, class Filter>  	static std::pair<size_t, size_t> trim(const T &s, Filter f)  	{ +		return trim(s, s.size(), f); +	} + +	/** +	 * Trims the given string or vector of chars by returning the start and end +	 * index. +	 * +	 * @param s is the container that should be trimmed. +	 * @param len is the number of elements in the container. +	 * @param f is a function that returns true for values that should be +	 * removed. +	 * @return start and end index. Note that "end" points at the character +	 * beyond the end, thus "end" minus "start" +	 */ +	template <class T, class Filter> +	static std::pair<size_t, size_t> trim(const T &s, size_t len, Filter f) +	{  		size_t start = 0; -		for (size_t i = 0; i < s.size(); i++) { +		for (size_t i = 0; i < len; i++) {  			if (!f(s[i])) {  				start = i;  				break; @@ -129,7 +170,7 @@ public:  		}  		size_t end = 0; -		for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) { +		for (ssize_t i = len - 1; i >= static_cast<ssize_t>(start); i--) {  			if (!f(s[i])) {  				end = i + 1;  				break; @@ -145,6 +186,15 @@ public:  	}  	/** +	 * Collapses the whitespaces in the given string (trims the string and +	 * replaces all whitespace characters by a single one). +	 * +	 * @param s is the string in which the whitespace should be collapsed. +	 * @return a copy of s with collapsed whitespace. +	 */ +	static std::string collapse(const std::string &s); + +	/**  	 * Turns the elements of a collection into a string separated by the  	 * given delimiter.  	 * @@ -205,6 +255,24 @@ public:  	static std::string extractFileExtension(const std::string &filename);  	/** +	 * Checks whether the given string starts with the given prefix. +	 * +	 * @param s is the string. +	 * @param prefix is the string which should be checked for being a prefix of +	 * s. +	 */ +	static bool startsWith(const std::string &s, const std::string &prefix); + +	/** +	 * Checks whether the given string ends with the given suffix. +	 * +	 * @param s is the string. +	 * @param suffix is the string which should be checked for being a suffix of +	 * s. +	 */ +	static bool endsWith(const std::string &s, const std::string &suffix); + +	/**  	 * Hash functional to be used for enum classes.  	 * See http://stackoverflow.com/a/24847480/2188211  	 */ diff --git a/src/core/common/Variant.hpp b/src/core/common/Variant.hpp index 6eae7e1..ddd17d7 100644 --- a/src/core/common/Variant.hpp +++ b/src/core/common/Variant.hpp @@ -884,6 +884,21 @@ public:  	}  	/** +	 * If the value of the variant already is a string, the markAsMagic function +	 * marks this string as a "magic" value (a variant which might also be an +	 * identifier). Throws an exception if the variant is not a string or magic +	 * value. +	 */ +	void markAsMagic() +	{ +		if (getType() == VariantType::STRING) { +			meta.setType(VariantType::MAGIC); +			return; +		} +		throw TypeException{getType(), VariantType::STRING}; +	} + +	/**  	 * Returns the value of the Variant as boolean, performs type conversion.  	 *  	 * @return the Variant value converted to a boolean value. @@ -1146,10 +1161,7 @@ public:  	 *  	 * @retun true if the  	 */ -	bool hasLocation() const -	{ -		return meta.hasLocation(); -	} +	bool hasLocation() const { return meta.hasLocation(); }  	/**  	 * Unpacks ans returns the stored source location. Note that the returned @@ -1158,10 +1170,7 @@ public:  	 *  	 * @return the stored SourceLocation.  	 */ -	SourceLocation getLocation() const -	{ -		return meta.getLocation(); -	} +	SourceLocation getLocation() const { return meta.getLocation(); }  	/**  	 * Packs the given source location and stores it in the metadata. Not all diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp index 3f02226..fb93ad0 100644 --- a/src/core/common/VariantReader.cpp +++ b/src/core/common/VariantReader.cpp @@ -495,7 +495,7 @@ std::pair<bool, Variant::boolType> VariantReader::parseBool(CharReader &reader,  	bool val = false;  	CharReaderFork readerFork = reader.fork();  	LoggerFork loggerFork = logger.fork(); -	auto res = parseToken(readerFork, loggerFork, {}); +	auto res = parseToken(readerFork, loggerFork, std::unordered_set<char>{});  	if (res.first) {  		bool valid = false;  		if (res.second == "true") { diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp index 1232f6e..44132a0 100644 --- a/src/core/common/VariantReader.hpp +++ b/src/core/common/VariantReader.hpp @@ -322,7 +322,7 @@ public:  	 */  	static std::pair<bool, Variant> parseTyped(  	    VariantType type, CharReader &reader, Logger &logger, -	    const std::unordered_set<char> &delims = {}); +	    const std::unordered_set<char> &delims = std::unordered_set<char>{});  	/**  	 * Tries to parse an instance of the given type from the given string. The  	 * called method is one of the parse methods defined here and adheres to the diff --git a/src/core/common/Whitespace.hpp b/src/core/common/Whitespace.hpp new file mode 100644 index 0000000..72a2291 --- /dev/null +++ b/src/core/common/Whitespace.hpp @@ -0,0 +1,60 @@ +/* +    Ousía +    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Whitespace.hpp + * + * Contains the WhitespaceMode enum used in various places. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_WHITESPACE_HPP_ +#define _OUSIA_WHITESPACE_HPP_ + +#include <string> +#include <utility> + +namespace ousia { + +/** + * Enum specifying the whitespace handling mode of the tokenizer and the + * parsers. + */ +enum class WhitespaceMode { +	/** +     * Preserves all whitespaces as they are found in the source file. +     */ +	PRESERVE, + +	/** +     * Trims whitespace at the beginning and the end of the found text. +     */ +	TRIM, + +	/** +     * Whitespaces are trimmed and collapsed, multiple whitespace characters +     * are replaced by a single space character. +     */ +	COLLAPSE +}; + +} + +#endif /* _OUSIA_WHITESPACE_HPP_ */ + diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp new file mode 100644 index 0000000..ed52ea3 --- /dev/null +++ b/src/core/common/WhitespaceHandler.hpp @@ -0,0 +1,284 @@ +/* +    Ousía +    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file WhitespaceHandler.hpp + * + * Contains the WhitespaceHandler classes which are used in multiple places to + * trim, compact or preserve whitespaces while at the same time maintaining the + * position information associated with the input strings. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_WHITESPACE_HANDLER_HPP_ +#define _OUSIA_WHITESPACE_HANDLER_HPP_ + +#include <string> +#include <vector> + +#include "Utils.hpp" + +namespace ousia { + +/** + * WhitespaceHandler is a based class that can be used to collect text on a + * character-by-character basis. Note that this class and its descendants are + * hoped to be inlined by the compiler (and used in conjunction with templates), + * thus they are fully defined inside this header. + */ +class WhitespaceHandler { +public: +	/** +	 * Start position of the extracted text. +	 */ +	size_t textStart; + +	/** +	 * End position of the extracted text. +	 */ +	size_t textEnd; + +	/** +	 * Buffer containing the extracted text. +	 */ +	std::vector<char> textBuf; + +	/** +	 * Constructor of the TextHandlerBase base class. Initializes the start and +	 * end position with zeros. +	 */ +	WhitespaceHandler() : textStart(0), textEnd(0) {} + +	/** +	 * Returns true if this whitespace handler has found any text and a text +	 * token could be emitted. +	 * +	 * @return true if the internal data buffer is non-empty. +	 */ +	bool hasText() { return !textBuf.empty(); } + +	/** +	 * Returns the content of the WhitespaceHandler as string. +	 */ +	std::string toString() const +	{ +		return std::string(textBuf.data(), textBuf.size()); +	} +}; + +/** + * The PreservingWhitespaceHandler class preserves all characters unmodified, + * including whitepace characters. + */ +class PreservingWhitespaceHandler : public WhitespaceHandler { +public: +	/** +	 * Appends the given character to the internal text buffer, does not +	 * eliminate whitespace. +	 * +	 * @param c is the character that should be appended to the internal buffer. +	 * @param start is the start byte offset of the given character. +	 * @param end is the end byte offset of the given character. +	 */ +	void append(char c, size_t start, size_t end) +	{ +		append(c, start, end, textBuf, textStart, textEnd); +	} + +	/** +	 * Static version of PreservingWhitespaceHandler append +	 * +	 * @param c is the character that should be appended to the internal buffer. +	 * @param start is the start byte offset of the given character. +	 * @param end is the end byte offset of the given character. +	 * @param textBuf is a reference at the text buffer that is to be used. +	 * @param textStart is a reference at the text start variable that is to be +	 * used. +	 * @param textEnd is a reference at the text end variable that is to be +	 * used. +	 */ +	static void append(char c, size_t start, size_t end, +	                   std::vector<char> &textBuf, size_t &textStart, +	                   size_t &textEnd) +	{ +		if (textBuf.empty()) { +			textStart = start; +		} +		textEnd = end; +		textBuf.push_back(c); +	} +}; + +/** + * The TrimmingTextHandler class trims all whitespace characters at the begin + * and the end of a text section but leaves all other characters unmodified, + * including whitepace characters. + */ +class TrimmingWhitespaceHandler : public WhitespaceHandler { +public: +	/** +	 * Buffer used internally to temporarily store all whitespace characters. +	 * They are only added to the output buffer if another non-whitespace +	 * character is reached. +	 */ +	std::vector<char> whitespaceBuf; + +	/** +	 * Appends the given character to the internal text buffer, eliminates +	 * whitespace characters at the begin and end of the text. +	 * +	 * @param c is the character that should be appended to the internal buffer. +	 * @param start is the start byte offset of the given character. +	 * @param end is the end byte offset of the given character. +	 */ +	void append(char c, size_t start, size_t end) +	{ +		append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf); +	} + +	/** +	 * Static version of TrimmingWhitespaceHandler append +	 * +	 * @param c is the character that should be appended to the internal buffer. +	 * @param start is the start byte offset of the given character. +	 * @param end is the end byte offset of the given character. +	 * @param textBuf is a reference at the text buffer that is to be used. +	 * @param textStart is a reference at the text start variable that is to be +	 * used. +	 * @param textEnd is a reference at the text end variable that is to be +	 * used. +	 * @param whitespaceBuf is a reference at the buffer for storing whitespace +	 * characters. +	 */ +	static void append(char c, size_t start, size_t end, +	                   std::vector<char> &textBuf, size_t &textStart, +	                   size_t &textEnd, std::vector<char> &whitespaceBuf) +	{ +		// Handle whitespace characters +		if (Utils::isWhitespace(c)) { +			if (!textBuf.empty()) { +				whitespaceBuf.push_back(c); +			} +			return; +		} + +		// Set the start and end offset correctly +		if (textBuf.empty()) { +			textStart = start; +		} +		textEnd = end; + +		// Store the character +		if (!whitespaceBuf.empty()) { +			textBuf.insert(textBuf.end(), whitespaceBuf.begin(), +			               whitespaceBuf.end()); +			whitespaceBuf.clear(); +		} +		textBuf.push_back(c); +	} +}; + +/** + * The CollapsingTextHandler trims characters at the beginning and end of the + * text and reduced multiple whitespace characters to a single blank. + */ +class CollapsingWhitespaceHandler : public WhitespaceHandler { +public: +	/** +	 * Flag set to true if a whitespace character was reached. +	 */ +	bool hasWhitespace = false; + +	/** +	 * Appends the given character to the internal text buffer, eliminates +	 * redundant whitespace characters. +	 * +	 * @param c is the character that should be appended to the internal buffer. +	 * @param start is the start byte offset of the given character. +	 * @param end is the end byte offset of the given character. +	 */ +	void append(char c, size_t start, size_t end) +	{ +		append(c, start, end, textBuf, textStart, textEnd, hasWhitespace); +	} + +	/** +	 * Static version of CollapsingWhitespaceHandler append +	 * +	 * @param c is the character that should be appended to the internal buffer. +	 * @param start is the start byte offset of the given character. +	 * @param end is the end byte offset of the given character. +	 * @param textBuf is a reference at the text buffer that is to be used. +	 * @param textStart is a reference at the text start variable that is to be +	 * used. +	 * @param textEnd is a reference at the text end variable that is to be +	 * used. +	 * @param hasWhitespace is a reference at the "hasWhitespace" flag. +	 */ +	static void append(char c, size_t start, size_t end, +	                   std::vector<char> &textBuf, size_t &textStart, +	                   size_t &textEnd, bool &hasWhitespace) +	{ +		// Handle whitespace characters +		if (Utils::isWhitespace(c)) { +			if (!textBuf.empty()) { +				hasWhitespace = true; +			} +			return; +		} + +		// Set the start and end offset correctly +		if (textBuf.empty()) { +			textStart = start; +		} +		textEnd = end; + +		// Store the character +		if (hasWhitespace) { +			textBuf.push_back(' '); +			hasWhitespace = false; +		} +		textBuf.push_back(c); +	} +}; + +/** + * Function that can be used to append the given buffer (e.g. a string or a + * vector) to the whitespace handler. + * + * @tparam WhitespaceHandler is one of the WhitespaceHandler classes. + * @tparam Buffer is an iterable type. + * @param handler is the handler to which the characters of the Buffer should be + * appended. + * @param buf is the buffer from which the characters should be read. + * @param start is the start byte offset. Each character is counted as one byte. + */ +template <typename WhitespaceHandler, typename Buffer> +inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf, +                                      size_t start) +{ +	for (auto elem : buf) { +		handler.append(elem, start, start + 1); +		start++; +	} +} +} + +#endif /* _OUSIA_WHITESPACE_HANDLER_HPP_ */ + diff --git a/src/core/model/Node.cpp b/src/core/model/Node.cpp index 39ee2e4..ce15cad 100644 --- a/src/core/model/Node.cpp +++ b/src/core/model/Node.cpp @@ -448,7 +448,7 @@ bool Node::doValidate(Logger &logger) const { return true; }  bool Node::validateName(Logger &logger) const  { -	if (!Utils::isIdentifier(name)) { +	if (!Utils::isIdentifierOrEmpty(name)) {  		logger.error(type()->name + std::string(" name \"") + name +  		                 std::string("\" is not a valid identifier"),  		             this); diff --git a/src/core/model/Typesystem.cpp b/src/core/model/Typesystem.cpp index 506bd31..df2b9fb 100644 --- a/src/core/model/Typesystem.cpp +++ b/src/core/model/Typesystem.cpp @@ -21,7 +21,6 @@  #include <core/common/RttiBuilder.hpp>  #include <core/common/Utils.hpp>  #include <core/common/VariantConverter.hpp> -#include <core/common/VariantReader.hpp>  namespace ousia { @@ -68,65 +67,6 @@ bool Type::build(Variant &data, Logger &logger) const  	return build(data, logger, NullMagicCallback);  } -std::pair<bool, Variant> Type::read(CharReader &reader, Logger &logger, -                                    const std::unordered_set<char> &delims) -{ -	// try all variant types of this type and use the first successful one. -	Variant v; -	bool success = false; -	for (auto t : getVariantTypes()) { -		auto res = VariantReader::parseTyped(t, reader, logger, delims); -		if (res.first) { -			v = res.second; -			success = true; -			break; -		} -	} - -	if (!success) { -		return std::make_pair(false, Variant{}); -	} -	if (!build(v, logger)) { -		return std::make_pair(false, Variant{}); -	} -	return std::make_pair(true, v); -} - -std::pair<bool, Variant> Type::read(const std::string &str, Logger &logger, -                                    SourceId sourceId, size_t offs) -{ -	// try all variant types of this type and use the first successful one. -	Variant v; -	bool success = false; -	std::vector<LoggerFork> forks; -	auto vts = getVariantTypes(); -	for (auto vt : vts) { -		forks.emplace_back(logger.fork()); -		auto res = -		    VariantReader::parseTyped(vt, str, forks.back(), sourceId, offs); -		if (res.first) { -			v = res.second; -			success = true; -			forks.back().commit(); -			break; -		} -	} - -	if (!success) { -		logger.error("Could not read data with any of the possible types:"); -		for (size_t t = 0; t < forks.size(); t++) { -			logger.note(std::string(Variant::getTypeName(vts[t])) + ":", -			            SourceLocation{}, MessageMode::NO_CONTEXT); -			forks[t].commit(); -		} -		return std::make_pair(false, Variant{}); -	} -	if (!build(v, logger)) { -		return std::make_pair(false, Variant{}); -	} -	return std::make_pair(true, v); -} -  bool Type::doCheckIsa(Handle<const Type> type) const { return false; }  bool Type::checkIsa(Handle<const Type> type) const diff --git a/src/core/model/Typesystem.hpp b/src/core/model/Typesystem.hpp index ca4f206..39f777f 100644 --- a/src/core/model/Typesystem.hpp +++ b/src/core/model/Typesystem.hpp @@ -59,7 +59,27 @@ class SystemTypesystem;   */  class Type : public Node {  public: -	enum class MagicCallbackResult { NOT_FOUND, FOUND_INVALID, FOUND_VALID }; +	/** +	 * Enum describing the result of the MagicCallback. +	 */ +	enum class MagicCallbackResult { +		/** +		 * A magic value with the given name could not be resolved. +		 */ +		NOT_FOUND, + +		/** +		 * A magic value with the given name could be resolved, but is of the +		 * wrong type. +		 */ +		FOUND_INVALID, + +		/** +		 * A magic value with the given name could be resolved and is of the +		 * correct type. +		 */ +		FOUND_VALID +	};  	/**  	 * Callback function called when a variant with "magic" value is reached. @@ -70,7 +90,9 @@ public:  	 * to which the value of the looked up constant should be written.  	 * @param type is a const pointer at the type. TODO: Replace this with a  	 * "ConstHandle". -	 * @return true if a constant was found, false otherwise. +	 * @return a MagicCallbackResult describing whether the magic value could +	 * not be resolved, could be resolved but is of the wrong type or could be +	 * resolved and is of the correct type.  	 */  	using MagicCallback =  	    std::function<MagicCallbackResult(Variant &data, const Type *type)>; @@ -169,32 +191,6 @@ public:  	bool build(Variant &data, Logger &logger) const;  	/** -	 * Tries to parse an instance of this type from the given stream. -	 * -	 * @param reader is a reference to the CharReader instance which is -	 * the source for the character data. The reader will be positioned -	 * at the end of the type instance (or the delimiting character). -	 * @param delims is a set of characters which will terminate the typed -	 * instance if the according parser uses delimiting characters. -	 * These characters are not included in the result. May not be nullptr. -	 */ -	std::pair<bool, Variant> read(CharReader &reader, Logger &logger, -	                              const std::unordered_set<char> &delims = {}); - -	/** -	 * Tries to parse an instance of this type from the given string. -	 * -	 * @param str is the string from which the value should be read. -	 * @param sourceId is an optional descriptor of the source file from which -	 * the element is being read. -	 * @param offs is the by offset in the source file at which the string -	 * starts. -	 */ -	std::pair<bool, Variant> read(const std::string &str, Logger &logger, -	                              SourceId sourceId = InvalidSourceId, -	                              size_t offs = 0); - -	/**  	 * Returns true if and only if the given Variant adheres to this Type. In  	 * essence this just calls the build method on a copy of the input Variant.  	 * @@ -230,23 +226,6 @@ public:  	{  		return this->getParent().cast<Typesystem>();  	} - -	/** -	 * Returns the VariantTypes whose instances are proper input for building an -	 * instance of this type. -	 * More specifically: Every returned VariantType T should be such that: -	 * If a string s can be parsed according to T to a Variant v then the call -	 * build(v, logger) should only fail (return false) if the variant content -	 * does not adhere to the specific type specification. But it should be a -	 * properly typed input for build. -	 * The order of the types returned by this function determines the order in -	 * which a parser should try to interpret an input string s. -	 * -	 * @return the VariantTypes that arethe basis for parsing an instance of -	 *this -	 * type. -	 */ -	virtual std::vector<VariantType> getVariantTypes() const = 0;  };  /** @@ -287,16 +266,6 @@ public:  	 * @return a variant containing an empty string.  	 */  	Variant create() const override { return Variant{""}; } - -	/** -	 * Returns the String VariantType. -	 * -	 * @return the String VariantType. -	 */ -	std::vector<VariantType> getVariantTypes() const override -	{ -		return {VariantType::STRING}; -	}  };  /** @@ -336,16 +305,6 @@ public:  	 * @return the integer value zero.  	 */  	Variant create() const override { return Variant{0}; } - -	/** -	 * Returns the Int VariantType. -	 * -	 * @return the Int VariantType. -	 */ -	std::vector<VariantType> getVariantTypes() const override -	{ -		return {VariantType::INT}; -	}  };  /** @@ -385,16 +344,6 @@ public:  	 * @return the double value zero.  	 */  	Variant create() const override { return Variant{0.0}; } - -	/** -	 * Returns the Double VariantType. -	 * -	 * @return the Double VariantType. -	 */ -	std::vector<VariantType> getVariantTypes() const override -	{ -		return {VariantType::DOUBLE}; -	}  };  /** @@ -434,16 +383,6 @@ public:  	 * @return a Variant with the boolean value false.  	 */  	Variant create() const override { return Variant{false}; } - -	/** -	 * Returns the bool VariantType. -	 * -	 * @return the bool VariantType. -	 */ -	std::vector<VariantType> getVariantTypes() const override -	{ -		return {VariantType::BOOL}; -	}  };  /** @@ -609,16 +548,6 @@ public:  	 * name. Throws a LoggableException if the string does not exist.  	 */  	Ordinal valueOf(const std::string &name) const; - -	/** -	 * Returns the int and string VariantTypes. -	 * -	 * @return the int and string VariantTypes. -	 */ -	std::vector<VariantType> getVariantTypes() const override -	{ -		return {VariantType::INT, VariantType::STRING}; -	}  };  /** @@ -1054,15 +983,6 @@ public:  	 * @return true if the requested attribute name exists, false otherwise.  	 */  	bool hasAttribute(const std::string &name) const; -	/** -	 * Returns the array and map VariantTypes. -	 * -	 * @return the array and map VariantTypes. -	 */ -	std::vector<VariantType> getVariantTypes() const override -	{ -		return {VariantType::MAP}; -	}  };  /** @@ -1128,15 +1048,6 @@ public:  	 * @return Rooted reference pointing at the innerType.  	 */  	Rooted<Type> getInnerType() { return innerType; } -	/** -	 * Returns the array VariantType. -	 * -	 * @return the array VariantType. -	 */ -	std::vector<VariantType> getVariantTypes() const override -	{ -		return {VariantType::ARRAY}; -	}  };  /** @@ -1175,20 +1086,6 @@ public:  	 * @return a Variant instance with nullptr value.  	 */  	Variant create() const override; -	/** -	 * Returns all parseable VariantTypes (bool, int, double, array, map, -	 *cardinality, object, string). -	 * -	 * @return all parseable VariantTypes (bool, int, double, array, map, -	 *cardinality, object, string). -	 */ -	std::vector<VariantType> getVariantTypes() const override -	{ -		return {VariantType::BOOL,   VariantType::INT, -		        VariantType::DOUBLE, VariantType::ARRAY, -		        VariantType::MAP,    VariantType::CARDINALITY, -		        VariantType::OBJECT, VariantType::STRING}; -	}  };  /** diff --git a/src/core/parser/ParserScope.cpp b/src/core/parser/ParserScope.cpp index 3929abf..ce3dc94 100644 --- a/src/core/parser/ParserScope.cpp +++ b/src/core/parser/ParserScope.cpp @@ -351,8 +351,7 @@ bool ParserScope::resolveType(const std::string &name, Handle<Node> owner,  	return resolveType(Utils::split(name, '.'), owner, logger, resultCallback);  } -bool ParserScope::resolveValue(Variant &data, Handle<Type> type, -                               Handle<Node> owner, Logger &logger) +bool ParserScope::resolveValue(Variant &data, Handle<Type> type, Logger &logger)  {  	return type->build(  	    data, logger, @@ -408,7 +407,7 @@ bool ParserScope::resolveTypeWithValue(const std::vector<std::string> &path,  	    [=](Handle<Node> resolved, Handle<Node> owner, Logger &logger) mutable {  		    if (resolved != nullptr) {  			    Rooted<Type> type = resolved.cast<Type>(); -			    scope.resolveValue(*valuePtr, type, owner, logger); +			    scope.resolveValue(*valuePtr, type, logger);  		    }  		    // Call the result callback with the type diff --git a/src/core/parser/ParserScope.hpp b/src/core/parser/ParserScope.hpp index 58fc037..185b845 100644 --- a/src/core/parser/ParserScope.hpp +++ b/src/core/parser/ParserScope.hpp @@ -702,13 +702,11 @@ public:  	 * (even in inner structures). The data will be passed to the "build"  	 * function of the given type.  	 * @param type is the Typesystem type the data should be interpreted with. -	 * @param owner is the node for which the resolution takes place.  	 * @param logger is the logger instance into which resolution problems  	 * should be logged.  	 * @return true if the value was successfully built.  	 */ -	bool resolveValue(Variant &data, Handle<Type> type, Handle<Node> owner, -	                  Logger &logger); +	bool resolveValue(Variant &data, Handle<Type> type, Logger &logger);  	/**  	 * Resolves a type and makes sure the corresponding value is of the correct diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp deleted file mode 100644 index 1265851..0000000 --- a/src/core/parser/ParserStack.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* -    Ousía -    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel - -    This program is free software: you can redistribute it and/or modify -    it under the terms of the GNU General Public License as published by -    the Free Software Foundation, either version 3 of the License, or -    (at your option) any later version. - -    This program is distributed in the hope that it will be useful, -    but WITHOUT ANY WARRANTY; without even the implied warranty of -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -    GNU General Public License for more details. - -    You should have received a copy of the GNU General Public License -    along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <sstream> - -#include <core/common/Utils.hpp> -#include <core/common/Exceptions.hpp> -#include <core/model/Project.hpp> - -#include "ParserScope.hpp" -#include "ParserStack.hpp" - -namespace ousia { - -/* A default handler */ - -/** - * The DefaultHandler class is used in case no element handler is specified in - * the ParserState descriptor. - */ -class DefaultHandler : public Handler { -public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override {} - -	void end() override {} - -	static Handler *create(const HandlerData &handlerData) -	{ -		return new DefaultHandler{handlerData}; -	} -}; - -/* Class Handler */ - -void Handler::data(const std::string &data, int field) -{ -	if (Utils::hasNonWhitepaceChar(data)) { -		logger().error("Expected command but found character data."); -	} -} - -/* Class ParserStack */ - -/** - * Returns an Exception that should be thrown when a currently invalid command - * is thrown. - */ -static LoggableException InvalidCommand(const std::string &name, -                                        const std::set<std::string> &expected) -{ -	if (expected.empty()) { -		return LoggableException{ -		    std::string{"No nested elements allowed, but got \""} + name + -		    std::string{"\""}}; -	} else { -		return LoggableException{ -		    std::string{"Expected "} + -		    (expected.size() == 1 ? std::string{"\""} -		                          : std::string{"one of \""}) + -		    Utils::join(expected, "\", \"") + std::string{"\", but got \""} + -		    name + std::string{"\""}}; -	} -} - -ParserStack::ParserStack( -    ParserContext &ctx, -    const std::multimap<std::string, const ParserState *> &states) -    : ctx(ctx), states(states) -{ -} - -bool ParserStack::deduceState() -{ -	// Assemble all states -	std::vector<const ParserState *> states; -	for (const auto &e : this->states) { -		states.push_back(e.second); -	} - -	// Fetch the type signature of the scope and derive all possible states, -	// abort if no unique parser state was found -	std::vector<const ParserState *> possibleStates = -	    ParserStateDeductor(ctx.getScope().getStackTypeSignature(), states) -	        .deduce(); -	if (possibleStates.size() != 1) { -		ctx.getLogger().error( -		    "Error while including file: Cannot deduce parser state."); -		return false; -	} - -	// Switch to this state by creating a dummy handler -	const ParserState *state = possibleStates[0]; -	Handler *handler = -	    DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}}); -	stack.emplace(handler); -	return true; -} - -std::set<std::string> ParserStack::expectedCommands() -{ -	const ParserState *currentState = &(this->currentState()); -	std::set<std::string> res; -	for (const auto &v : states) { -		if (v.second->parents.count(currentState)) { -			res.insert(v.first); -		} -	} -	return res; -} - -const ParserState &ParserStack::currentState() -{ -	return stack.empty() ? ParserStates::None : stack.top()->state(); -} - -std::string ParserStack::currentCommandName() -{ -	return stack.empty() ? std::string{} : stack.top()->name(); -} - -const ParserState *ParserStack::findTargetState(const std::string &name) -{ -	const ParserState *currentState = &(this->currentState()); -	auto range = states.equal_range(name); -	for (auto it = range.first; it != range.second; it++) { -		const ParserStateSet &parents = it->second->parents; -		if (parents.count(currentState) || parents.count(&ParserStates::All)) { -			return it->second; -		} -	} - -	return nullptr; -} - -void ParserStack::start(const std::string &name, Variant::mapType &args, -                        const SourceLocation &location) -{ -	ParserState const *targetState = findTargetState(name); -// TODO: Andreas, please improve this. -//	if (!Utils::isIdentifier(name)) { -//		throw LoggableException(std::string("Invalid identifier \"") + name + -//		                        std::string("\"")); -//	} - -	if (targetState == nullptr) { -		targetState = findTargetState("*"); -	} -	if (targetState == nullptr) { -		throw InvalidCommand(name, expectedCommands()); -	} - -	// Fetch the associated constructor -	HandlerConstructor ctor = targetState->elementHandler -	                              ? targetState->elementHandler -	                              : DefaultHandler::create; - -	// Canonicalize the arguments, allow additional arguments -	targetState->arguments.validateMap(args, ctx.getLogger(), true); - -	// Instantiate the handler and call its start function -	Handler *handler = ctor({ctx, name, *targetState, currentState(), location}); -	handler->start(args); -	stack.emplace(handler); -} - -void ParserStack::start(std::string name, const Variant::mapType &args, -                        const SourceLocation &location) -{ -	Variant::mapType argsCopy(args); -	start(name, argsCopy); -} - -void ParserStack::end() -{ -	// Check whether the current command could be ended -	if (stack.empty()) { -		throw LoggableException{"No command to end."}; -	} - -	// Remove the current HandlerInstance from the stack -	std::shared_ptr<Handler> inst{stack.top()}; -	stack.pop(); - -	// Call the end function of the last Handler -	inst->end(); -} - -void ParserStack::data(const std::string &data, int field) -{ -	// Check whether there is any command the data can be sent to -	if (stack.empty()) { -		throw LoggableException{"No command to receive data."}; -	} - -	// Pass the data to the current Handler instance -	stack.top()->data(data, field); -} -} - diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp deleted file mode 100644 index efc4e4a..0000000 --- a/src/core/parser/ParserStack.hpp +++ /dev/null @@ -1,361 +0,0 @@ -/* -    Ousía -    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel - -    This program is free software: you can redistribute it and/or modify -    it under the terms of the GNU General Public License as published by -    the Free Software Foundation, either version 3 of the License, or -    (at your option) any later version. - -    This program is distributed in the hope that it will be useful, -    but WITHOUT ANY WARRANTY; without even the implied warranty of -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -    GNU General Public License for more details. - -    You should have received a copy of the GNU General Public License -    along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file ParserStack.hpp - * - * Helper classes for document or description parsers. Contains the ParserStack - * class, which is an pushdown automaton responsible for accepting commands in - * the correct order and calling specified handlers. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_PARSER_STACK_HPP_ -#define _OUSIA_PARSER_STACK_HPP_ - -#include <cstdint> - -#include <map> -#include <memory> -#include <set> -#include <stack> -#include <vector> - -#include <core/common/Variant.hpp> -#include <core/common/Logger.hpp> -#include <core/common/Argument.hpp> - -#include "Parser.hpp" -#include "ParserContext.hpp" -#include "ParserState.hpp" - -namespace ousia { - -/** - * Struct collecting all the data that is being passed to a Handler instance. - */ -struct HandlerData { -	/** -	 * Reference to the ParserContext instance that should be used to resolve -	 * references to nodes in the Graph. -	 */ -	ParserContext &ctx; - -	/** -	 * Contains the name of the tag that is being handled. -	 */ -	const std::string name; - -	/** -	 * Contains the current state of the state machine. -	 */ -	const ParserState &state; - -	/** -	 * Contains the state of the state machine when the parent node was handled. -	 */ -	const ParserState &parentState; - -	/** -	 * Current source code location. -	 */ -	const SourceLocation location; - -	/** -	 * Constructor of the HandlerData class. -	 * -	 * @param ctx is the parser context the handler should be executed in. -	 * @param name is the name of the string. -	 * @param state is the state this handler was called for. -	 * @param parentState is the state of the parent command. -	 * @param location is the location at which the handler is created. -	 */ -	HandlerData(ParserContext &ctx, std::string name, const ParserState &state, -	            const ParserState &parentState, const SourceLocation location) -	    : ctx(ctx), -	      name(std::move(name)), -	      state(state), -	      parentState(parentState), -	      location(location){}; -}; - -/** - * The handler class provides a context for handling an XML tag. It has to be - * overridden and registered in the StateStack class to form handlers for - * concrete XML tags. - */ -class Handler { -private: -	/** -	 * Structure containing the internal handler data. -	 */ -	const HandlerData handlerData; - -public: -	/** -	 * Constructor of the Handler class. -	 * -	 * @param data is a structure containing all data being passed to the -	 * handler. -	 */ -	Handler(const HandlerData &handlerData) : handlerData(handlerData){}; - -	/** -	 * Virtual destructor. -	 */ -	virtual ~Handler(){}; - -	/** -	 * Returns a reference at the ParserContext. -	 * -	 * @return a reference at the ParserContext. -	 */ -	ParserContext &context() { return handlerData.ctx; } - -	/** -	 * Returns the command name for which the handler was created. -	 * -	 * @return a const reference at the command name. -	 */ -	const std::string &name() { return handlerData.name; } - -	/** -	 * Returns a reference at the ParserScope instance. -	 * -	 * @return a reference at the ParserScope instance. -	 */ -	ParserScope &scope() { return handlerData.ctx.getScope(); } - -	/** -	 * Returns a reference at the Manager instance which manages all nodes. -	 * -	 * @return a referance at the Manager instance. -	 */ -	Manager &manager() { return handlerData.ctx.getManager(); } - -	/** -	 * Returns a reference at the Logger instance used for logging error -	 * messages. -	 * -	 * @return a reference at the Logger instance. -	 */ -	Logger &logger() { return handlerData.ctx.getLogger(); } - -	/** -	 * Returns a reference at the Project Node, representing the project into -	 * which the file is currently being parsed. -	 * -	 * @return a referance at the Project Node. -	 */ -	Rooted<Project> project() { return handlerData.ctx.getProject(); } - -	/** -	 * Reference at the ParserState descriptor for which this Handler was -	 * created. -	 * -	 * @return a const reference at the constructing ParserState descriptor. -	 */ -	const ParserState &state() { return handlerData.state; } - -	/** -	 * Reference at the ParserState descriptor of the parent state of the state -	 * for which this Handler was created. Set to ParserStates::None if there -	 * is no parent state. -	 * -	 * @return a const reference at the parent state of the constructing -	 * ParserState descriptor. -	 */ -	const ParserState &parentState() { return handlerData.parentState; } - -	/** -	 * Returns the current location in the source file. -	 * -	 * @return the current location in the source file. -	 */ -	SourceLocation location() { return handlerData.location; } - -	/** -	 * Called when the command that was specified in the constructor is -	 * instanciated. -	 * -	 * @param args is a map from strings to variants (argument name and value). -	 */ -	virtual void start(Variant::mapType &args) = 0; - -	/** -	 * Called whenever the command for which this handler is defined ends. -	 */ -	virtual void end() = 0; - -	/** -	 * Called whenever raw data (int the form of a string) is available for the -	 * Handler instance. In the default handler an exception is raised if the -	 * received data contains non-whitespace characters. -	 * -	 * @param data is a pointer at the character data that is available for the -	 * Handler instance. -	 * @param field is the field number (the interpretation of this value -	 * depends on the format that is being parsed). -	 */ -	virtual void data(const std::string &data, int field); -}; - -/** - * HandlerConstructor is a function pointer type used to create concrete - * instances of the Handler class. - * - * @param handlerData is the data that should be passed to the new handler - * instance. - * @return a newly created handler instance. - */ -using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); - -/** - * The ParserStack class is a pushdown automaton responsible for turning a - * command stream into a tree of Node instances. - */ -class ParserStack { -private: -	/** -	 * Reference at the parser context. -	 */ -	ParserContext &ctx; - -	/** -	 * Map containing all registered command names and the corresponding -	 * state descriptors. -	 */ -	const std::multimap<std::string, const ParserState *> &states; - -	/** -	 * Internal stack used for managing the currently active Handler instances. -	 */ -	std::stack<std::shared_ptr<Handler>> stack; - -	/** -	 * Used internally to get all expected command names for the current state. -	 * This function is used to build error messages. -	 * -	 * @return a set of strings containing the names of the expected commands. -	 */ -	std::set<std::string> expectedCommands(); - -	/** -	 * Returns the targetState for a command with the given name that can be -	 * reached from for the current state. -	 * -	 * @param name is the name of the requested command. -	 * @return nullptr if no target state was found, a pointer at the target -	 *state -	 * otherwise. -	 */ -	const ParserState *findTargetState(const std::string &name); - -public: -	/** -	 * Creates a new instance of the ParserStack class. -	 * -	 * @param ctx is the parser context the parser stack is working on. -	 * @param states is a map containing the command names and pointers at the -	 * corresponding ParserState instances. -	 */ -	ParserStack(ParserContext &ctx, -	            const std::multimap<std::string, const ParserState *> &states); - -	/** -	 * Tries to reconstruct the parser state from the Scope instance of the -	 * ParserContext given in the constructor. This functionality is needed for -	 * including files,as the Parser of the included file needs to be brought to -	 + an equivalent state as the one in the including file. -	 * -	 * @param scope is the ParserScope instance from which the ParserState -	 * should be reconstructed. -	 * @param logger is the logger instance to which error messages should be -	 * written. -	 * @return true if the operation was sucessful, false otherwise. -	 */ -	bool deduceState(); - -	/** -	 * Returns the state the ParserStack instance currently is in. -	 * -	 * @return the state of the currently active Handler instance or STATE_NONE -	 * if no handler is on the stack. -	 */ -	const ParserState ¤tState(); - -	/** -	 * Returns the command name that is currently being handled. -	 * -	 * @return the name of the command currently being handled by the active -	 * Handler instance or an empty string if no handler is currently active. -	 */ -	std::string currentCommandName(); - -	/** -	 * Function that should be called whenever a new command starts. -	 * -	 * @param name is the name of the command. -	 * @param args is a map from strings to variants (argument name and value). -	 * Note that the passed map will be modified. -	 * @param location is the location in the source file at which the command -	 * starts. -	 */ -	void start(const std::string &name, Variant::mapType &args, -	           const SourceLocation &location = SourceLocation{}); - -	/** -	 * Function that should be called whenever a new command starts. -	 * -	 * @param name is the name of the command. -	 * @param args is a map from strings to variants (argument name and value). -	 * @param location is the location in the source file at which the command -	 * starts. -	 */ -	void start(std::string name, -	           const Variant::mapType &args = Variant::mapType{}, -	           const SourceLocation &location = SourceLocation{}); - -	/** -	 * Function called whenever a command ends. -	 */ -	void end(); - -	/** -	 * Function that should be called whenever data is available for the -	 * command. -	 * -	 * @param data is the data that should be passed to the handler. -	 * @param field is the field number (the interpretation of this value -	 * depends on the format that is being parsed). -	 */ -	void data(const std::string &data, int field = 0); - -	/** -	 * Returns a reference to the parser context the parser stack is currently -	 * working on. -	 * -	 * @return a reference to the parser context. -	 */ -	ParserContext &getContext() { return ctx; } -}; -} - -#endif /* _OUSIA_PARSER_STACK_HPP_ */ - diff --git a/src/core/parser/generic/GenericParser.cpp b/src/core/parser/generic/GenericParser.cpp deleted file mode 100644 index e69de29..0000000 --- a/src/core/parser/generic/GenericParser.cpp +++ /dev/null diff --git a/src/core/parser/stack/Callbacks.cpp b/src/core/parser/stack/Callbacks.cpp new file mode 100644 index 0000000..6ebc549 --- /dev/null +++ b/src/core/parser/stack/Callbacks.cpp @@ -0,0 +1,23 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "Callbacks.hpp" + +namespace ousia { +} + diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp new file mode 100644 index 0000000..9c61000 --- /dev/null +++ b/src/core/parser/stack/Callbacks.hpp @@ -0,0 +1,99 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Callbacks.hpp + * + * Contains an interface defining the callbacks that can be directed from a + * StateHandler to the StateStack, and from the StateStack to + * the actual parser. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_CALLBACKS_HPP_ +#define _OUSIA_PARSER_STACK_CALLBACKS_HPP_ + +#include <string> + +#include <core/common/Whitespace.hpp> + +namespace ousia { +namespace parser_stack { + +/** + * Interface defining a set of callback functions that act as a basis for the + * StateStackCallbacks and the ParserCallbacks. + */ +class Callbacks { +public: +	/** +	 * Virtual descructor. +	 */ +	virtual ~Callbacks() {}; + +	/** +	 * Sets the whitespace mode that specifies how string data should be +	 * processed. +	 * +	 * @param whitespaceMode specifies one of the three WhitespaceMode constants +	 * PRESERVE, TRIM or COLLAPSE. +	 */ +	virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0; + +	/** +	 * Registers the given token as token that should be reported to the handler +	 * using the "token" function. +	 * +	 * @param token is the token string that should be reported. +	 */ +	virtual void registerToken(const std::string &token) = 0; + +	/** +	 * Unregisters the given token, it will no longer be reported to the handler +	 * using the "token" function. +	 * +	 * @param token is the token string that should be unregistered. +	 */ +	virtual void unregisterToken(const std::string &token) = 0; +}; + +/** + * Interface defining the callback functions that can be passed from a + * StateStack to the underlying parser. + */ +class ParserCallbacks : public Callbacks { +	/** +	 * Checks whether the given token is supported by the parser. The parser +	 * returns true, if the token is supported, false if this token cannot be +	 * registered. Note that parsers that do not support the registration of +	 * tokens at all should always return "true". +	 * +	 * @param token is the token that should be checked for support. +	 * @return true if the token is generally supported (or the parser does not +	 * support registering tokens at all), false if the token is not supported, +	 * because e.g. it is a reserved token or it interferes with other tokens. +	 */ +	virtual bool supportsToken(const std::string &token) = 0; +}; + +} +} + +#endif /* _OUSIA_PARSER_STACK_CALLBACKS_HPP_ */ + diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp index 3647db3..d514701 100644 --- a/src/core/parser/stack/DocumentHandler.cpp +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -16,28 +16,35 @@      along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ -#include "DocumentHandler.hpp" -  #include <algorithm>  #include <core/common/RttiBuilder.hpp>  #include <core/common/Utils.hpp> +#include <core/common/VariantReader.hpp>  #include <core/model/Document.hpp>  #include <core/model/Domain.hpp> +#include <core/model/Project.hpp>  #include <core/model/Typesystem.hpp>  #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "DocumentHandler.hpp" +#include "State.hpp"  namespace ousia { +namespace parser_stack {  /* DocumentHandler */ -void DocumentHandler::start(Variant::mapType &args) +bool DocumentHandler::start(Variant::mapType &args)  {  	Rooted<Document> document = -	    project()->createDocument(args["name"].asString()); +	    context().getProject()->createDocument(args["name"].asString());  	document->setLocation(location());  	scope().push(document);  	scope().setFlag(ParserFlag::POST_HEAD, false); + +	return true;  }  void DocumentHandler::end() { scope().pop(); } @@ -48,7 +55,7 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode,                                      std::string &fieldName,                                      DocumentEntity *&parent, bool &inField)  { -	// check if the parent in the structure tree was an explicit field +	// Check if the parent in the structure tree was an explicit field  	// reference.  	inField = parentNode->isa(&RttiTypes::DocumentField);  	if (inField) { @@ -56,10 +63,11 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode,  		parentNode = scope().selectOrThrow(  		    {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity});  	} else { -		// if it wasn't an explicit reference, we use the default field. +		// If it wasn't an explicit reference, we use the default field.  		fieldName = DEFAULT_FIELD_NAME;  	} -	// reference the parent entity explicitly. + +	// Reference the parent entity explicitly.  	parent = nullptr;  	if (parentNode->isa(&RttiTypes::StructuredEntity)) {  		parent = static_cast<DocumentEntity *>( @@ -70,17 +78,13 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode,  	}  } -static void createPath(const std::string &firstFieldName, -                       const NodeVector<Node> &path, DocumentEntity *&parent) +static void createPath(const NodeVector<Node> &path, DocumentEntity *&parent, +                       size_t p0 = 1)  { -	// add the first element -	parent = static_cast<DocumentEntity *>( -	    parent->createChildStructuredEntity(path[0].cast<StructuredClass>(), -	                                        Variant::mapType{}, firstFieldName, -	                                        "").get()); - +	// TODO (@benjamin): These should be pushed onto the scope and poped once +	// the scope is left. Otherwise stuff may not be correclty resolved.  	size_t S = path.size(); -	for (size_t p = 2; p < S; p = p + 2) { +	for (size_t p = p0; p < S; p = p + 2) {  		parent = static_cast<DocumentEntity *>(  		    parent->createChildStructuredEntity(  		                path[p].cast<StructuredClass>(), Variant::mapType{}, @@ -88,18 +92,19 @@ static void createPath(const std::string &firstFieldName,  	}  } -static void createPath(const NodeVector<Node> &path, DocumentEntity *&parent) +static void createPath(const std::string &firstFieldName, +                       const NodeVector<Node> &path, DocumentEntity *&parent)  { -	size_t S = path.size(); -	for (size_t p = 1; p < S; p = p + 2) { -		parent = static_cast<DocumentEntity *>( -		    parent->createChildStructuredEntity( -		                path[p].cast<StructuredClass>(), Variant::mapType{}, -		                path[p - 1]->getName(), "").get()); -	} +	// Add the first element +	parent = static_cast<DocumentEntity *>( +	    parent->createChildStructuredEntity(path[0].cast<StructuredClass>(), +	                                        Variant::mapType{}, firstFieldName, +	                                        "").get()); + +	createPath(path, parent, 2);  } -void DocumentChildHandler::start(Variant::mapType &args) +bool DocumentChildHandler::start(Variant::mapType &args)  {  	scope().setFlag(ParserFlag::POST_HEAD, true);  	Rooted<Node> parentNode = scope().selectOrThrow( @@ -112,7 +117,7 @@ void DocumentChildHandler::start(Variant::mapType &args)  	preamble(parentNode, fieldName, parent, inField); -	// try to find a FieldDescriptor for the given tag if we are not in a +	// Try to find a FieldDescriptor for the given tag if we are not in a  	// field already. This does _not_ try to construct transparent paths  	// in between.  	if (!inField && parent != nullptr && @@ -121,7 +126,7 @@ void DocumentChildHandler::start(Variant::mapType &args)  		    new DocumentField(parentNode->getManager(), name(), parentNode)};  		field->setLocation(location());  		scope().push(field); -		return; +		return true;  	}  	// Otherwise create a new StructuredEntity @@ -187,27 +192,39 @@ void DocumentChildHandler::start(Variant::mapType &args)  	}  	entity->setLocation(location());  	scope().push(entity); +	return true;  }  void DocumentChildHandler::end() { scope().pop(); } -std::pair<bool, Variant> DocumentChildHandler::convertData( -    Handle<FieldDescriptor> field, Logger &logger, const std::string &data) +bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field, +                                       Variant &data, Logger &logger)  { -	// if the content is supposed to be of type string, we can finish -	// directly. -	auto vts = field->getPrimitiveType()->getVariantTypes(); -	if (std::find(vts.begin(), vts.end(), VariantType::STRING) != vts.end()) { -		return std::make_pair(true, Variant::fromString(data)); +	bool valid = true; +	Rooted<Type> type = field->getPrimitiveType(); + +	// If the content is supposed to be of type string, we only need to check +	// for "magic" values -- otherwise just call the "parseGenericString" +	// function on the string data +	if (type->isa(&RttiTypes::StringType)) { +		const std::string &str = data.asString(); +		// TODO: Referencing constants with "." separator should also work +		if (Utils::isIdentifier(str)) { +			data.markAsMagic(); +		} +	} else { +		// Parse the string as generic string, assign the result +		auto res = VariantReader::parseGenericString( +		    data.asString(), logger, data.getLocation().getSourceId(), +		    data.getLocation().getStart()); +		data = res.second;  	} -	// then try to parse the content using the type specification. -	auto res = field->getPrimitiveType()->read( -	    data, logger, location().getSourceId(), location().getStart()); -	return res; +	// Now try to resolve the value for the primitive type +	return valid && scope().resolveValue(data, type, logger);  } -void DocumentChildHandler::data(const std::string &data, int fieldIdx) +bool DocumentChildHandler::data(Variant &data)  {  	Rooted<Node> parentNode = scope().selectOrThrow(  	    {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity, @@ -222,11 +239,10 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx)  	Rooted<Descriptor> desc = strctParent->getDescriptor();  	// The parent from which we need to connect to the primitive content.  	Rooted<Node> parentClass; -	/* -	 * We distinguish two cases here: One for fields that are given. -	 */ + +	// We distinguish two cases here: One for fields that are given.  	if (inField) { -		// retrieve the actual FieldDescriptor +		// Retrieve the actual FieldDescriptor  		Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName);  		if (field == nullptr) {  			logger().error( @@ -234,75 +250,102 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx)  			        fieldName + "\" exists in descriptor\"" + desc->getName() +  			        "\".",  			    location()); -			return; +			return false;  		} -		// if it is a primitive field directly, try to parse the content. +		// If it is a primitive field directly, try to parse the content.  		if (field->isPrimitive()) { -			auto res = convertData(field, logger(), data); -			// add it as primitive content. -			if (res.first) { -				strctParent->createChildDocumentPrimitive(res.second, -				                                          fieldName); +			// Add it as primitive content. +			if (!convertData(field, data, logger())) { +				return false;  			} -			return; + +			strctParent->createChildDocumentPrimitive(data, fieldName); +			return true;  		} -		// if it is not primitive we need to connect via transparent elements +		// If it is not primitive we need to connect via transparent elements  		// and default fields.  		parentClass = field;  	} else { -		// in case of default fields we need to construct via default fields +		// In case of default fields we need to construct via default fields  		// and maybe transparent elements.  		parentClass = desc;  	} -	/* -	 * Search through all permitted default fields of the parent class that -	 * allow primitive content at this point and could be constructed via -	 * transparent intermediate entities. -	 * We then try to parse the data using the type specified by the respective -	 * field. If that does not work we proceed to the next possible field. -	 */ -	// retrieve all default fields at this point. + +	// Search through all permitted default fields of the parent class that +	// allow primitive content at this point and could be constructed via +	// transparent intermediate entities. + +	// Retrieve all default fields at this point, either from the field +	// descriptor or the structured class  	NodeVector<FieldDescriptor> defaultFields;  	if (inField) {  		defaultFields = parentClass.cast<FieldDescriptor>()->getDefaultFields();  	} else {  		defaultFields = parentClass.cast<StructuredClass>()->getDefaultFields();  	} + +	// Try to parse the data using the type specified by the respective field. +	// If that does not work we proceed to the next possible field.  	std::vector<LoggerFork> forks;  	for (auto field : defaultFields) { -		// then try to parse the content using the type specification. +		// Then try to parse the content using the type specification.  		forks.emplace_back(logger().fork()); -		auto res = convertData(field, forks.back(), data); -		if (res.first) { -			forks.back().commit(); -			// if that worked, construct the necessary path. -			if (inField) { -				NodeVector<Node> path = -				    parentClass.cast<FieldDescriptor>()->pathTo(field, -				                                                logger()); -				createPath(fieldName, path, strctParent); -			} else { -				auto pathRes = desc->pathTo(field, logger()); -				assert(pathRes.second); -				createPath(pathRes.first, strctParent); -			} -			// then create the primitive element. -			strctParent->createChildDocumentPrimitive(res.second); -			return; +		if (!convertData(field, data, forks.back())) { +			continue;  		} + +		// The conversion worked, commit any possible warnings +		forks.back().commit(); + +		// Construct the necessary path +		if (inField) { +			NodeVector<Node> path = +			    parentClass.cast<FieldDescriptor>()->pathTo(field, logger()); +			createPath(fieldName, path, strctParent); +		} else { +			auto pathRes = desc->pathTo(field, logger()); +			assert(pathRes.second); +			createPath(pathRes.first, strctParent); +		} + +		// Then create the primitive element +		strctParent->createChildDocumentPrimitive(data); +		return true;  	} -	logger().error("Could not read data with any of the possible fields:"); + +	// No field was found that might take the data -- dump the error messages +	// from the loggers +	logger().error("Could not read data with any of the possible fields:", +	               SourceLocation{}, MessageMode::NO_CONTEXT);  	size_t f = 0;  	for (auto field : defaultFields) { -		logger().note(Utils::join(field->path(), ".") + ":", SourceLocation{}, -		              MessageMode::NO_CONTEXT); +		logger().note(std::string("Field ") + Utils::join(field->path(), ".") + +		                  std::string(":"), +		              SourceLocation{}, MessageMode::NO_CONTEXT);  		forks[f].commit();  		f++;  	} +	return false; +} + +namespace States { +const State Document = StateBuilder() +                           .parent(&None) +                           .createdNodeType(&RttiTypes::Document) +                           .elementHandler(DocumentHandler::create) +                           .arguments({Argument::String("name", "")}); + +const State DocumentChild = StateBuilder() +                                .parents({&Document, &DocumentChild}) +                                .createdNodeTypes({&RttiTypes::StructureNode, +                                                   &RttiTypes::AnnotationEntity, +                                                   &RttiTypes::DocumentField}) +                                .elementHandler(DocumentChildHandler::create); +}  }  namespace RttiTypes { -const Rtti DocumentField = -    RttiBuilder<ousia::DocumentField>("DocumentField").parent(&Node); +const Rtti DocumentField = RttiBuilder<ousia::parser_stack::DocumentField>( +                               "DocumentField").parent(&Node); +}  } -}
\ No newline at end of file diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp index cb124aa..b339b96 100644 --- a/src/core/parser/stack/DocumentHandler.hpp +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -19,14 +19,21 @@  /**   * @file DocumentHandler.hpp   * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + * Contains the Handler instances used for parsing actual documents. This file + * declares to classes: The Document handler which parses the "document" command + * that introduces a new document and the "DocumentChildHandler" which parses + * the actual user defined tags. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)   */ -#ifndef _OUSIA_DOCUMENT_HANDLER_HPP_ -#define _OUSIA_DOCUMENT_HANDLER_HPP_ +#ifndef _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ +#define _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_  #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> +#include <core/model/Node.hpp> + +#include "Handler.hpp"  namespace ousia { @@ -35,51 +42,131 @@ class Rtti;  class DocumentEntity;  class FieldDescriptor; -class DocumentHandler : public Handler { +namespace parser_stack { +/** + * The DocumentHandler class parses the "document" tag that is used to introduce + * a new document. Note that this tag is not mandatory in osml files -- if the + * first command is not a typesystem, domain or any other declarative command, + * the DocumentHandler will be implicitly called. + */ +class DocumentHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override; +	/** +	 * Creates a new instance of the ImportHandler. +	 * +	 * @param handlerData is the data that is passed to the constructor of the +	 * Handler base class and used there to e.g. access the ParserContext and +	 * the Callbacks instance. +	 */  	static Handler *create(const HandlerData &handlerData)  	{  		return new DocumentHandler{handlerData};  	}  }; +/** + * Temporary Node that is being pushed onto the ParserScope in order to indicate + * the field the parser is currently in. The name of the Node is stored in the + * "name" field of the parent Node class. + */  class DocumentField : public Node {  public:  	using Node::Node;  }; -class DocumentChildHandler : public Handler { +/** + * The DocumentChildHandler class performs the actual parsing of the user + * defined elements in an Ousía document. + */ +class DocumentChildHandler : public StaticHandler {  private: +	/** +	 * Code shared by both the start() and the end() method. Checks whether the +	 * parser currently is in a field and returns the name of this field. +	 * +	 * @param parentNode is the next possible parent node (a document, +	 * a structured entity, an annotation entity or a field). +	 * @param fieldName is an output parameter to which the name of the current +	 * field is written (or unchanged if we're not in a field). +	 * @param parent is an output parameter to which the parent document entity +	 * will be written. +	 * @param inField is set to true if we actually are in a field. +	 */  	void preamble(Handle<Node> parentNode, std::string &fieldName,  	              DocumentEntity *&parent, bool &inField); -	std::pair<bool, Variant> convertData(Handle<FieldDescriptor> field, -	                                     Logger &logger, -	                                     const std::string &data); +	/** +	 * Constructs all structured entites along the given path and inserts them +	 * into the document graph. +	 * +	 * @param path is a path containing an alternating series of structured +	 * classes and fields. +	 * @pram parent is the root entity from which the process should be started. +	 */ +	void createPath(const NodeVector<Node> &path, DocumentEntity *&parent); + +	/** +	 * Tries to convert the given data to the type that is specified in the +	 * given primitive field. +	 * +	 * @param field is the primitive field for which the data is intended. +	 * @param data is the is the data that should be converted, the result is +	 * written into this argument as output variable. +	 * @param logger is the Logger instance to which error messages should be +	 * written. Needed to allow the convertData function to write to a forked +	 * Logger instance. +	 * @return true if the operation was successful, false otherwise. +	 */ +	bool convertData(Handle<FieldDescriptor> field, Variant &data, +	                 Logger &logger);  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override; - -	void data(const std::string &data, int fieldIdx) override; - +	bool data(Variant &data) override; + +	/** +	 * Creates a new instance of the DocumentChildHandler. +	 * +	 * @param handlerData is the data that is passed to the constructor of the +	 * Handler base class and used there to e.g. access the ParserContext and +	 * the Callbacks instance. +	 */  	static Handler *create(const HandlerData &handlerData)  	{  		return new DocumentChildHandler{handlerData};  	}  }; +namespace States { +/** + * State constant representing the "document" tag. + */ +extern const State Document; + +/** + * State contstant representing any user-defined element within a document. + */ +extern const State DocumentChild; +} + +} +  namespace RttiTypes { +/** + * RttiType for the internally used DocumentField class. + */  extern const Rtti DocumentField;  } +  } -#endif + +#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */ + diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp index 6571717..a2c8eec 100644 --- a/src/core/parser/stack/DomainHandler.cpp +++ b/src/core/parser/stack/DomainHandler.cpp @@ -16,29 +16,48 @@      along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ -#include "DomainHandler.hpp" -  #include <core/common/RttiBuilder.hpp> +#include <core/model/Document.hpp>  #include <core/model/Domain.hpp> +#include <core/model/Project.hpp>  #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "DocumentHandler.hpp" +#include "DomainHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp"  namespace ousia { +namespace parser_stack {  /* DomainHandler */ -void DomainHandler::start(Variant::mapType &args) +bool DomainHandler::start(Variant::mapType &args)  { -	Rooted<Domain> domain = project()->createDomain(args["name"].asString()); +	// Create the Domain node +	Rooted<Domain> domain = +	    context().getProject()->createDomain(args["name"].asString());  	domain->setLocation(location()); +	// If the domain is defined inside a document, add the reference to the +	// document +	Rooted<Document> document = scope().select<Document>(); +	if (document != nullptr) { +		document->reference(domain); +	} + +	// Push the typesystem onto the scope, set the POST_HEAD flag to true  	scope().push(domain); +	scope().setFlag(ParserFlag::POST_HEAD, false); +	return true;  }  void DomainHandler::end() { scope().pop(); }  /* DomainStructHandler */ -void DomainStructHandler::start(Variant::mapType &args) +bool DomainStructHandler::start(Variant::mapType &args)  {  	scope().setFlag(ParserFlag::POST_HEAD, true); @@ -63,12 +82,13 @@ void DomainStructHandler::start(Variant::mapType &args)  	}  	scope().push(structuredClass); +	return true;  }  void DomainStructHandler::end() { scope().pop(); }  /* DomainAnnotationHandler */ -void DomainAnnotationHandler::start(Variant::mapType &args) +bool DomainAnnotationHandler::start(Variant::mapType &args)  {  	scope().setFlag(ParserFlag::POST_HEAD, true); @@ -79,13 +99,14 @@ void DomainAnnotationHandler::start(Variant::mapType &args)  	annotationClass->setLocation(location());  	scope().push(annotationClass); +	return true;  }  void DomainAnnotationHandler::end() { scope().pop(); }  /* DomainAttributesHandler */ -void DomainAttributesHandler::start(Variant::mapType &args) +bool DomainAttributesHandler::start(Variant::mapType &args)  {  	// Fetch the current typesystem and create the struct node  	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); @@ -94,13 +115,14 @@ void DomainAttributesHandler::start(Variant::mapType &args)  	attrDesc->setLocation(location());  	scope().push(attrDesc); +	return true;  }  void DomainAttributesHandler::end() { scope().pop(); }  /* DomainFieldHandler */ -void DomainFieldHandler::start(Variant::mapType &args) +bool DomainFieldHandler::start(Variant::mapType &args)  {  	FieldDescriptor::FieldType type;  	if (args["isSubtree"].asBool()) { @@ -116,13 +138,14 @@ void DomainFieldHandler::start(Variant::mapType &args)  	field->setLocation(location());  	scope().push(field); +	return true;  }  void DomainFieldHandler::end() { scope().pop(); }  /* DomainFieldRefHandler */ -void DomainFieldRefHandler::start(Variant::mapType &args) +bool DomainFieldRefHandler::start(Variant::mapType &args)  {  	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); @@ -135,13 +158,14 @@ void DomainFieldRefHandler::start(Variant::mapType &args)  			        field.cast<FieldDescriptor>(), logger);  		    }  		}); +	return true;  }  void DomainFieldRefHandler::end() {}  /* DomainPrimitiveHandler */ -void DomainPrimitiveHandler::start(Variant::mapType &args) +bool DomainPrimitiveHandler::start(Variant::mapType &args)  {  	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); @@ -167,13 +191,14 @@ void DomainPrimitiveHandler::start(Variant::mapType &args)  	});  	scope().push(field); +	return true;  }  void DomainPrimitiveHandler::end() { scope().pop(); }  /* DomainChildHandler */ -void DomainChildHandler::start(Variant::mapType &args) +bool DomainChildHandler::start(Variant::mapType &args)  {  	Rooted<FieldDescriptor> field = scope().selectOrThrow<FieldDescriptor>(); @@ -186,13 +211,12 @@ void DomainChildHandler::start(Variant::mapType &args)  			        child.cast<StructuredClass>());  		    }  		}); +	return true;  } -void DomainChildHandler::end() {} -  /* DomainParentHandler */ -void DomainParentHandler::start(Variant::mapType &args) +bool DomainParentHandler::start(Variant::mapType &args)  {  	Rooted<StructuredClass> strct = scope().selectOrThrow<StructuredClass>(); @@ -200,12 +224,14 @@ void DomainParentHandler::start(Variant::mapType &args)  	    new DomainParent(strct->getManager(), args["ref"].asString(), strct)};  	parent->setLocation(location());  	scope().push(parent); +	return true;  }  void DomainParentHandler::end() { scope().pop(); }  /* DomainParentFieldHandler */ -void DomainParentFieldHandler::start(Variant::mapType &args) + +bool DomainParentFieldHandler::start(Variant::mapType &args)  {  	Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>();  	FieldDescriptor::FieldType type; @@ -233,13 +259,12 @@ void DomainParentFieldHandler::start(Variant::mapType &args)  			    field->addChild(strct.cast<StructuredClass>());  		    }  		}); +	return true;  } -void DomainParentFieldHandler::end() {} -  /* DomainParentFieldRefHandler */ -void DomainParentFieldRefHandler::start(Variant::mapType &args) +bool DomainParentFieldRefHandler::start(Variant::mapType &args)  {  	Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>(); @@ -265,12 +290,104 @@ void DomainParentFieldRefHandler::start(Variant::mapType &args)  			    field->addChild(strct.cast<StructuredClass>());  		    }  		}); +	return true;  } -void DomainParentFieldRefHandler::end() {} +namespace States { +const State Domain = StateBuilder() +                         .parents({&None, &Document}) +                         .createdNodeType(&RttiTypes::Domain) +                         .elementHandler(DomainHandler::create) +                         .arguments({Argument::String("name")}); + +const State DomainStruct = +    StateBuilder() +        .parent(&Domain) +        .createdNodeType(&RttiTypes::StructuredClass) +        .elementHandler(DomainStructHandler::create) +        .arguments({Argument::String("name"), +                    Argument::Cardinality("cardinality", Cardinality::any()), +                    Argument::Bool("isRoot", false), +                    Argument::Bool("transparent", false), +                    Argument::String("isa", "")}); + +const State DomainAnnotation = +    StateBuilder() +        .parent(&Domain) +        .createdNodeType(&RttiTypes::AnnotationClass) +        .elementHandler(DomainAnnotationHandler::create) +        .arguments({Argument::String("name")}); + +const State DomainAttributes = +    StateBuilder() +        .parents({&DomainStruct, &DomainAnnotation}) +        .createdNodeType(&RttiTypes::StructType) +        .elementHandler(DomainAttributesHandler::create) +        .arguments({}); + +const State DomainAttribute = +    StateBuilder() +        .parent(&DomainAttributes) +        .elementHandler(TypesystemStructFieldHandler::create) +        .arguments({Argument::String("name"), Argument::String("type"), +                    Argument::Any("default", Variant::fromObject(nullptr))}); + +const State DomainField = StateBuilder() +                              .parents({&DomainStruct, &DomainAnnotation}) +                              .createdNodeType(&RttiTypes::FieldDescriptor) +                              .elementHandler(DomainFieldHandler::create) +                              .arguments({Argument::String("name", ""), +                                          Argument::Bool("isSubtree", false), +                                          Argument::Bool("optional", false)}); + +const State DomainFieldRef = +    StateBuilder() +        .parents({&DomainStruct, &DomainAnnotation}) +        .createdNodeType(&RttiTypes::FieldDescriptor) +        .elementHandler(DomainFieldRefHandler::create) +        .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); + +const State DomainStructPrimitive = +    StateBuilder() +        .parents({&DomainStruct, &DomainAnnotation}) +        .createdNodeType(&RttiTypes::FieldDescriptor) +        .elementHandler(DomainPrimitiveHandler::create) +        .arguments( +            {Argument::String("name", ""), Argument::Bool("isSubtree", false), +             Argument::Bool("optional", false), Argument::String("type")}); + +const State DomainStructChild = StateBuilder() +                                    .parent(&DomainField) +                                    .elementHandler(DomainChildHandler::create) +                                    .arguments({Argument::String("ref")}); + +const State DomainStructParent = +    StateBuilder() +        .parent(&DomainStruct) +        .createdNodeType(&RttiTypes::DomainParent) +        .elementHandler(DomainParentHandler::create) +        .arguments({Argument::String("ref")}); + +const State DomainStructParentField = +    StateBuilder() +        .parent(&DomainStructParent) +        .createdNodeType(&RttiTypes::FieldDescriptor) +        .elementHandler(DomainParentFieldHandler::create) +        .arguments({Argument::String("name", ""), +                    Argument::Bool("isSubtree", false), +                    Argument::Bool("optional", false)}); + +const State DomainStructParentFieldRef = +    StateBuilder() +        .parent(&DomainStructParent) +        .createdNodeType(&RttiTypes::FieldDescriptor) +        .elementHandler(DomainParentFieldRefHandler::create) +        .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); +} +}  namespace RttiTypes { -const Rtti DomainParent = -    RttiBuilder<ousia::DomainParent>("DomainParent").parent(&Node); +const Rtti DomainParent = RttiBuilder<ousia::parser_stack::DomainParent>( +                              "DomainParent").parent(&Node);  }  } diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp index 7398812..76172d6 100644 --- a/src/core/parser/stack/DomainHandler.hpp +++ b/src/core/parser/stack/DomainHandler.hpp @@ -19,26 +19,34 @@  /**   * @file DomainHandler.hpp   * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + * Contains the Handler classes used for parsing Domain descriptors. This + * includes the "domain" tag and all describing tags below the "domain" tag. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)   */  #ifndef _OUSIA_DOMAIN_HANDLER_HPP_  #define _OUSIA_DOMAIN_HANDLER_HPP_  #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> +#include <core/model/Node.hpp> + +#include "Handler.hpp"  namespace ousia {  // Forward declarations  class Rtti; -class DomainHandler : public Handler { -public: -	using Handler::Handler; +namespace parser_stack { + +// TODO: Documentation -	void start(Variant::mapType &args) override; +class DomainHandler : public StaticHandler { +public: +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override;  	static Handler *create(const HandlerData &handlerData) @@ -47,12 +55,11 @@ public:  	}  }; -class DomainStructHandler : public Handler { +class DomainStructHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override;  	static Handler *create(const HandlerData &handlerData) @@ -61,12 +68,11 @@ public:  	}  }; -class DomainAnnotationHandler : public Handler { +class DomainAnnotationHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override;  	static Handler *create(const HandlerData &handlerData) @@ -75,12 +81,11 @@ public:  	}  }; -class DomainAttributesHandler : public Handler { +class DomainAttributesHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override;  	static Handler *create(const HandlerData &handlerData) @@ -89,12 +94,11 @@ public:  	}  }; -class DomainFieldHandler : public Handler { +class DomainFieldHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override;  	static Handler *create(const HandlerData &handlerData) @@ -103,12 +107,11 @@ public:  	}  }; -class DomainFieldRefHandler : public Handler { +class DomainFieldRefHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override;  	static Handler *create(const HandlerData &handlerData) @@ -117,12 +120,11 @@ public:  	}  }; -class DomainPrimitiveHandler : public Handler { +class DomainPrimitiveHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override;  	static Handler *create(const HandlerData &handlerData) @@ -131,13 +133,11 @@ public:  	}  }; -class DomainChildHandler : public Handler { +class DomainChildHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; -	void end() override; +	bool start(Variant::mapType &args) override;  	static Handler *create(const HandlerData &handlerData)  	{ @@ -150,16 +150,11 @@ public:  	using Node::Node;  }; -namespace RttiTypes { -extern const Rtti DomainParent; -} - -class DomainParentHandler : public Handler { +class DomainParentHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override;  	static Handler *create(const HandlerData &handlerData) @@ -168,13 +163,11 @@ public:  	}  }; -class DomainParentFieldHandler : public Handler { +class DomainParentFieldHandler : public StaticHandler {  public: -	using Handler::Handler; +	using StaticHandler::StaticHandler; -	void start(Variant::mapType &args) override; - -	void end() override; +	bool start(Variant::mapType &args) override;  	static Handler *create(const HandlerData &handlerData)  	{ @@ -182,18 +175,83 @@ public:  	}  }; -class DomainParentFieldRefHandler : public Handler { +class DomainParentFieldRefHandler : public StaticHandler {  public: -	using Handler::Handler; +	using StaticHandler::StaticHandler; -	void start(Variant::mapType &args) override; - -	void end() override; +	bool start(Variant::mapType &args) override;  	static Handler *create(const HandlerData &handlerData)  	{  		return new DomainParentFieldRefHandler{handlerData};  	}  }; + +namespace States { +/** + * State representing a "domain" struct. + */ +extern const State Domain; + +/** + * State representing a "struct" tag within a domain description. + */ +extern const State DomainStruct; + +/** + * State representing an "annotation" tag within a domain description. + */ +extern const State DomainAnnotation; + +/** + * State representing an "attributes" tag within a structure or annotation. + */ +extern const State DomainAttributes; + +/** + * State representing an "attribute" tag within the "attributes". + */ +extern const State DomainAttribute; + +/** + * State representing a "field" tag within a structure or annotation. + */ +extern const State DomainField; + +/** + * State representing a "fieldref" tag within a structure or annotation. + */ +extern const State DomainFieldRef; + +/** + * State representing a "primitive" tag within a structure or annotation. + */ +extern const State DomainStructPrimitive; + +/** + * State representing a "child" tag within a structure or annotation. + */ +extern const State DomainStructChild; + +/** + * State representing a "parent" tag within a structure or annotation. + */ +extern const State DomainStructParent; + +/** + * State representing a "field" tag within a "parent" tag. + */ +extern const State DomainStructParentField; + +/** + * State representing a "fieldRef" tag within a "parent" tag. + */ +extern const State DomainStructParentFieldRef; +} +} + +namespace RttiTypes { +extern const Rtti DomainParent; +}  }  #endif diff --git a/src/core/parser/stack/GenericParserStates.cpp b/src/core/parser/stack/GenericParserStates.cpp new file mode 100644 index 0000000..69a6e0e --- /dev/null +++ b/src/core/parser/stack/GenericParserStates.cpp @@ -0,0 +1,53 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "DocumentHandler.hpp" +#include "DomainHandler.hpp" +#include "GenericParserStates.hpp" +#include "ImportIncludeHandler.hpp" +#include "TypesystemHandler.hpp" + +namespace ousia { +namespace parser_stack { + +const std::multimap<std::string, const State *> GenericParserStates{ +    {"document", &States::Document}, +    {"*", &States::DocumentChild}, +    {"domain", &States::Domain}, +    {"struct", &States::DomainStruct}, +    {"annotation", &States::DomainAnnotation}, +    {"attributes", &States::DomainAttributes}, +    {"attribute", &States::DomainAttribute}, +    {"field", &States::DomainField}, +    {"fieldRef", &States::DomainFieldRef}, +    {"primitive", &States::DomainStructPrimitive}, +    {"childRef", &States::DomainStructChild}, +    {"parentRef", &States::DomainStructParent}, +    {"field", &States::DomainStructParentField}, +    {"fieldRef", &States::DomainStructParentFieldRef}, +    {"typesystem", &States::Typesystem}, +    {"enum", &States::TypesystemEnum}, +    {"entry", &States::TypesystemEnumEntry}, +    {"struct", &States::TypesystemStruct}, +    {"field", &States::TypesystemStructField}, +    {"constant", &States::TypesystemConstant}, +    {"import", &States::Import}, +    {"include", &States::Include}}; +} +} + diff --git a/src/core/parser/stack/GenericParserStates.hpp b/src/core/parser/stack/GenericParserStates.hpp new file mode 100644 index 0000000..552eee5 --- /dev/null +++ b/src/core/parser/stack/GenericParserStates.hpp @@ -0,0 +1,49 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file GenericParserStates.hpp + * + * Contains a multimap which maps between tag/command names to the corresponding + * state descriptors. This multimap is used to initialize the push down + * automaton residing inside the "Stack" class. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ +#define _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ + +#include <string> +#include <map> + +namespace ousia { +namespace parser_stack { + +// Forward declarations +class State; + +/** + * Map between tagnames and references to the corresponding State instances. + */ +extern const std::multimap<std::string, const State *> GenericParserStates; +} +} + +#endif /* _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ */ + diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp new file mode 100644 index 0000000..bf5d4ea --- /dev/null +++ b/src/core/parser/stack/Handler.cpp @@ -0,0 +1,254 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/common/Exceptions.hpp> +#include <core/common/Logger.hpp> +#include <core/parser/ParserContext.hpp> + +#include "Callbacks.hpp" +#include "Handler.hpp" +#include "State.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class HandlerData */ + +HandlerData::HandlerData(ParserContext &ctx, /*Callbacks &callbacks,*/ +                         const std::string &name, const State &state, +                         const SourceLocation &location) +    : ctx(ctx), +      /*callbacks(callbacks),*/ +      name(name), +      state(state), +      location(location) +{ +} + +/* Class Handler */ + +Handler::Handler(const HandlerData &handlerData) +    : handlerData(handlerData), internalLogger(nullptr) +{ +} + +Handler::~Handler() {} + +ParserContext &Handler::context() { return handlerData.ctx; } + +ParserScope &Handler::scope() { return handlerData.ctx.getScope(); } + +Manager &Handler::manager() { return handlerData.ctx.getManager(); } + +Logger &Handler::logger() +{ +	if (internalLogger != nullptr) { +		return *internalLogger; +	} +	return handlerData.ctx.getLogger(); +} + +const SourceLocation &Handler::location() const { return handlerData.location; } + +const std::string &Handler::name() const { return handlerData.name; } + +void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode) +{ +	/*handlerData.callbacks.setWhitespaceMode(whitespaceMode);*/ +} + +void Handler::registerToken(const std::string &token) +{ +	/*handlerData.callbacks.registerToken(token);*/ +} + +void Handler::unregisterToken(const std::string &token) +{ +	/*handlerData.callbacks.unregisterToken(token);*/ +} + +const std::string &Handler::getName() const { return name(); } + +const State &Handler::getState() const { return handlerData.state; } + +void Handler::setLogger(Logger &logger) { internalLogger = &logger; } + +void Handler::resetLogger() { internalLogger = nullptr; } + +const SourceLocation &Handler::getLocation() const { return location(); } + +/* Class EmptyHandler */ + +bool EmptyHandler::start(Variant::mapType &args) +{ +	// Just accept anything +	return true; +} + +void EmptyHandler::end() +{ +	// Do nothing if a command ends +} + +bool EmptyHandler::fieldStart(bool &isDefaultField, size_t fieldIndex) +{ +	// Accept any field +	return true; +} + +void EmptyHandler::fieldEnd() +{ +	// Do not handle fields +} + +bool EmptyHandler::annotationStart(const Variant &className, +                                   Variant::mapType &args) +{ +	// Accept any data +	return true; +} + +bool EmptyHandler::annotationEnd(const Variant &className, +                                 const Variant &elementName) +{ +	// Accept any annotation +	return true; +} + +bool EmptyHandler::data(Variant &data) +{ +	// Support any data +	return true; +} + +Handler *EmptyHandler::create(const HandlerData &handlerData) +{ +	return new EmptyHandler(handlerData); +} + +/* Class StaticHandler */ + +bool StaticHandler::start(Variant::mapType &args) +{ +	// Do nothing in the default implementation, accept anything +	return true; +} + +void StaticHandler::end() +{ +	// Do nothing here +} + +bool StaticHandler::fieldStart(bool &isDefault, size_t fieldIdx) +{ +	// Return true if either the default field is requested or the field index +	// is zero. This simulates that there is exactly one field (a default field) +	if (fieldIdx == 0) { +		isDefault = true; +		return true; +	} +	return false; +} + +void StaticHandler::fieldEnd() +{ +	// Do nothing here +} + +bool StaticHandler::annotationStart(const Variant &className, +                                    Variant::mapType &args) +{ +	// No annotations supported +	return false; +} + +bool StaticHandler::annotationEnd(const Variant &className, +                                  const Variant &elementName) +{ +	// No annotations supported +	return false; +} + +bool StaticHandler::data(Variant &data) +{ +	logger().error("Did not expect any data here", data); +	return false; +} + +/* Class StaticFieldHandler */ + +StaticFieldHandler::StaticFieldHandler(const HandlerData &handlerData, +                                       const std::string &argName) +    : StaticHandler(handlerData), argName(argName), handled(false) +{ +} + +bool StaticFieldHandler::start(Variant::mapType &args) +{ +	if (!argName.empty()) { +		auto it = args.find(argName); +		if (it != args.end() && !it->second.toString().empty()) { +			handled = true; +			doHandle(it->second, args); +			return true; +		} +	} + +	this->args = args; +	return true; +} + +void StaticFieldHandler::end() +{ +	if (!handled) { +		if (!argName.empty()) { +			logger().error(std::string("Required argument \"") + argName + +			                   std::string("\" is missing."), +			               location()); +		} else { +			logger().error("Command requires data, but no data given", +			               location()); +		} +	} +} + +bool StaticFieldHandler::data(Variant &data) +{ +	// Call the doHandle function if this has not been done before +	if (!handled) { +		handled = true; +		doHandle(data, args); +		return true; +	} + +	// The doHandle function was already called, print an error message +	logger().error( +	    std::string("Found data, but the corresponding argument \"") + argName + +	        std::string("\" was already specified"), +	    data); + +	// Print the location at which the attribute was originally specified +	auto it = args.find(argName); +	if (it != args.end()) { +		logger().note(std::string("Attribute was specified here:"), it->second); +	} +	return false; +} +} +} + diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp new file mode 100644 index 0000000..7cda7a4 --- /dev/null +++ b/src/core/parser/stack/Handler.hpp @@ -0,0 +1,421 @@ +/* +    Ousía +    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_PARSER_STACK_HANDLER_HPP_ +#define _OUSIA_PARSER_STACK_HANDLER_HPP_ + +#include <string> + +#include <core/common/Location.hpp> +#include <core/common/Variant.hpp> +#include <core/common/Whitespace.hpp> + +namespace ousia { + +// Forward declarations +class ParserScope; +class ParserContext; +class Logger; + +namespace parser_stack { + +// More forward declarations +class Callbacks; +class State; + +/** + * Class collecting all the data that is being passed to a Handler + * instance. + */ +class HandlerData { +public: +	/** +	 * Reference to the ParserContext instance that should be used to resolve +	 * references to nodes in the Graph. +	 */ +	ParserContext &ctx; + +	/** +	 * Reference at an instance of the Callbacks class, used for +	 * modifying the behaviour of the parser (like registering tokens, setting +	 * the data type or changing the whitespace handling mode). +	 */ +	//	Callbacks &callbacks; + +	/** +	 * Contains the name of the command that is being handled. +	 */ +	std::string name; + +	/** +	 * Contains the current state of the state machine. +	 */ +	const State &state; + +	/** +	 * Current source code location. +	 */ +	SourceLocation location; + +	/** +	 * Constructor of the HandlerData class. +	 * +	 * @param ctx is the parser context the handler should be executed in. +	 * @param callbacks is an instance of Callbacks used to notify +	 * the parser about certain state changes. +	 * @param name is the name of the string. +	 * @param state is the state this handler was called for. +	 * @param location is the location at which the handler is created. +	 */ +	HandlerData(ParserContext &ctx, +	            /*Callbacks &callbacks,*/ const std::string &name, +	            const State &state, const SourceLocation &location); +}; + +/** + * The Handler class provides a context for handling a generic stack element. + * It has to beoverridden and registered in the StateStack class to form + * handlers for concrete XML tags. + */ +class Handler { +private: +	/** +	 * Structure containing the internal handler data. +	 */ +	const HandlerData handlerData; + +	/** +	 * Reference at the current logger. If not nullptr, this will override the +	 * logger from the ParserContext specified in the handlerData. +	 */ +	Logger *internalLogger; + +protected: +	/** +	 * Constructor of the Handler class. +	 * +	 * @param data is a structure containing all data being passed to the +	 * handler. +	 */ +	Handler(const HandlerData &handlerData); + +	/** +	 * Returns a reference at the ParserContext. +	 * +	 * @return a reference at the ParserContext. +	 */ +	ParserContext &context(); + +	/** +	 * Returns a reference at the ParserScope instance. +	 * +	 * @return a reference at the ParserScope instance. +	 */ +	ParserScope &scope(); + +	/** +	 * Returns a reference at the Manager instance which manages all nodes. +	 * +	 * @return a referance at the Manager instance. +	 */ +	Manager &manager(); + +	/** +	 * Returns a reference at the Logger instance used for logging error +	 * messages. +	 * +	 * @return a reference at the Logger instance. +	 */ +	Logger &logger(); + +	/** +	 * Returns the location of the element in the source file, for which this +	 * Handler was created. +	 * +	 * @return the location of the Handler in the source file. +	 */ +	const SourceLocation &location() const; + +	/** +	 * Returns the command name for which the handler was created. +	 * +	 * @return a const reference at the command name. +	 */ +	const std::string &name() const; + +public: +	/** +	 * Virtual destructor. +	 */ +	virtual ~Handler(); + +	/** +	 * Calls the corresponding function in the Callbacks instance. Sets the +	 * whitespace mode that specifies how string data should be processed. The +	 * calls to this function are placed on a stack by the underlying Stack +	 * class. +	 * +	 * @param whitespaceMode specifies one of the three WhitespaceMode constants +	 * PRESERVE, TRIM or COLLAPSE. +	 */ +	void setWhitespaceMode(WhitespaceMode whitespaceMode); + +	/** +	 * Calls the corresponding function in the Callbacks instance. +	 * Registers the given token as token that should be reported to the handler +	 * using the "token" function. +	 * +	 * @param token is the token string that should be reported. +	 */ +	void registerToken(const std::string &token); + +	/** +	 * Calls the corresponding function in the Callbacks instance. +	 * Unregisters the given token, it will no longer be reported to the handler +	 * using the "token" function. +	 * +	 * @param token is the token string that should be unregistered. +	 */ +	void unregisterToken(const std::string &token); + +	/** +	 * Returns the command name for which the handler was created. +	 * +	 * @return a const reference at the command name. +	 */ +	const std::string &getName() const; + +	/** +	 * Reference at the State descriptor for which this Handler was created. +	 * +	 * @return a const reference at the constructing State descriptor. +	 */ +	const State &getState() const; + +	/** +	 * Sets the internal logger to the given logger instance. +	 * +	 * @param logger is the Logger instance to which the logger should be set. +	 */ +	void setLogger(Logger &logger); + +	/** +	 * Resets the logger instance to the logger instance provided in the +	 * ParserContext. +	 */ +	void resetLogger(); + +	/** +	 * Returns the location of the element in the source file, for which this +	 * Handler was created. +	 * +	 * @return the location of the Handler in the source file. +	 */ +	const SourceLocation &getLocation() const; + +	/** +	 * Called when the command that was specified in the constructor is +	 * instanciated. +	 * +	 * @param args is a map from strings to variants (argument name and value). +	 * @return true if the handler was successful in starting the element it +	 * represents, false otherwise. +	 */ +	virtual bool start(Variant::mapType &args) = 0; + +	/** +	 * Called before the command for which this handler is defined ends (is +	 * forever removed from the stack). +	 */ +	virtual void end() = 0; + +	/** +	 * Called when a new field starts, while the handler is active. This +	 * function should return true if the field is supported, false otherwise. +	 * No error should be logged if the field cannot be started, the caller will +	 * take care of that (since it is always valid to start a default field, +	 * even though the corresponding structure does not have a field, as long as +	 * no data is fed into the field). +	 * +	 * @param isDefault is set to true if the field that is being started is the +	 * default/tree field. The handler should set the value of this variable to +	 * true if the referenced field is indeed the default field. +	 * @param fieldIdx is the numerical index of the field. +	 */ +	virtual bool fieldStart(bool &isDefault, size_t fieldIdx) = 0; + +	/** +	 * Called when a previously opened field ends, while the handler is active. +	 * Note that a "fieldStart" and "fieldEnd" are always called alternately. +	 */ +	virtual void fieldEnd() = 0; + +	/** +	 * Called whenever an annotation starts while this handler is active. The +	 * function should return true if starting the annotation was successful, +	 * false otherwise. +	 * +	 * @param className is a string variant containing the name of the +	 * annotation class and the location of the name in the source code. +	 * @param args is a map from strings to variants (argument name and value). +	 * @return true if the mentioned annotation could be started here, false +	 * if an error occurred. +	 */ +	virtual bool annotationStart(const Variant &className, +	                             Variant::mapType &args) = 0; + +	/** +	 * Called whenever an annotation ends while this handler is active. The +	 * function should return true if ending the annotation was successful, +	 * false otherwise. +	 * +	 * @param className is a string variant containing the name of the +	 * annotation class and the location of the class name in the source code. +	 * @param elementName is a string variant containing the name of the +	 * annotation class and the location of the element name in the source code. +	 * @return true if the mentioned annotation could be started here, false if +	 * an error occurred. +	 */ +	virtual bool annotationEnd(const Variant &className, +	                           const Variant &elementName) = 0; + +	/** +	 * Called whenever raw data (int the form of a string) is available for the +	 * Handler instance. Should return true if the data could be handled, false +	 * otherwise. +	 * +	 * @param data is a string variant containing the character data and its +	 * location. +	 * @return true if the data could be handled, false otherwise. +	 */ +	virtual bool data(Variant &data) = 0; +}; + +/** + * HandlerConstructor is a function pointer type used to create concrete + * instances of the Handler class. + * + * @param handlerData is the data that should be passed to the new handler + * instance. + * @return a newly created handler instance. + */ +using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); + +/** + * The EmptyHandler class is used in case no element handler is specified in + * the State descriptor. It just accepts all data and does nothing. + */ +class EmptyHandler : public Handler { +protected: +	using Handler::Handler; + +public: +	bool start(Variant::mapType &args) override; +	void end() override; +	bool fieldStart(bool &isDefault, size_t fieldIdx) override; +	void fieldEnd() override; +	bool annotationStart(const Variant &className, +	                     Variant::mapType &args) override; +	bool annotationEnd(const Variant &className, +	                   const Variant &elementName) override; +	bool data(Variant &data) override; + +	/** +	 * Creates an instance of the EmptyHandler class. +	 */ +	static Handler *create(const HandlerData &handlerData); +}; + +/** + * The StaticHandler class is used to handle predifined commands which do + * neither support annotations, nor multiple fields. Child classes can decide + * whether a single data field should be used. + */ +class StaticHandler : public Handler { +protected: +	using Handler::Handler; + +public: +	bool start(Variant::mapType &args) override; +	void end() override; +	bool fieldStart(bool &isDefault, size_t fieldIdx) override; +	void fieldEnd() override; +	bool annotationStart(const Variant &className, +	                     Variant::mapType &args) override; +	bool annotationEnd(const Variant &className, +	                   const Variant &elementName) override; +	bool data(Variant &data) override; +}; + +/** + * The StaticFieldHandler class is used to handle predifined commands which do + * neither support annotations, nor multiple fields. Additionally, it captures a + * data entry from a single default field. + */ +class StaticFieldHandler : public StaticHandler { +private: +	/** +	 * Set to the name of the data argument that should be used instead of the +	 * data field, if no data field is given. +	 */ +	std::string argName; + +	/** +	 * Set to true, once the "doHandle" function has been called. +	 */ +	bool handled; + +	/** +	 * Map containing the arguments given in the start function. +	 */ +	Variant::mapType args; + +protected: +	/** +	 * Constructor of the StaticFieldHandler class. +	 * +	 * @param handlerData is a structure containing the internal data that +	 * should be stored inside the handler. +	 * @param name of the data argument that -- if present -- should be used +	 * instead of the data field. If empty, data is not captured from the +	 * arguments. If both, data in the data field and the argument, are given, +	 * this results in an error. +	 */ +	StaticFieldHandler(const HandlerData &handlerData, +	                   const std::string &argName); + +	/** +	 * Function that should be overriden in order to handle the field data and +	 * the other arguments. This function is not called if no data was given. +	 * +	 * @param fieldData is the captured field data. +	 * @param args are the arguments that were given in the "start" function. +	 */ +	virtual void doHandle(const Variant &fieldData, +	                      Variant::mapType &args) = 0; + +public: +	bool start(Variant::mapType &args) override; +	void end() override; +	bool data(Variant &data) override; +}; +} +} + +#endif /* _OUSIA_PARSER_STACK_HANDLER_HPP_ */ + diff --git a/src/core/parser/stack/ImportIncludeHandler.cpp b/src/core/parser/stack/ImportIncludeHandler.cpp index 94ee82d..d1ea97d 100644 --- a/src/core/parser/stack/ImportIncludeHandler.cpp +++ b/src/core/parser/stack/ImportIncludeHandler.cpp @@ -16,50 +16,22 @@      along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ -#include "ImportIncludeHandler.hpp" - +#include <core/model/RootNode.hpp>  #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> -namespace ousia { - -/* ImportIncludeHandler */ - -void ImportIncludeHandler::start(Variant::mapType &args) -{ -	rel = args["rel"].asString(); -	type = args["type"].asString(); -	src = args["src"].asString(); -	srcInArgs = !src.empty(); -} +#include "DomainHandler.hpp" +#include "DocumentHandler.hpp" +#include "ImportIncludeHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" -void ImportIncludeHandler::data(const std::string &data, int field) -{ -	if (srcInArgs) { -		logger().error("\"src\" attribute has already been set"); -		return; -	} -	if (field != 0) { -		logger().error("Command has only one field."); -		return; -	} -	src.append(data); -} +namespace ousia { +namespace parser_stack {  /* ImportHandler */ -void ImportHandler::start(Variant::mapType &args) -{ -	ImportIncludeHandler::start(args); - -	// Make sure imports are still possible -	if (scope().getFlag(ParserFlag::POST_HEAD)) { -		logger().error("Imports must be listed before other commands.", -		               location()); -		return; -	} -} - -void ImportHandler::end() +void ImportHandler::doHandle(const Variant &fieldData, Variant::mapType &args)  {  	// Fetch the last node and check whether an import is valid at this  	// position @@ -75,8 +47,9 @@ void ImportHandler::end()  	// Perform the actual import, register the imported node within the leaf  	// node -	Rooted<Node> imported = -	    context().import(src, type, rel, leafRootNode->getReferenceTypes()); +	Rooted<Node> imported = context().import( +	    fieldData.asString(), args["type"].asString(), args["rel"].asString(), +	    leafRootNode->getReferenceTypes());  	if (imported != nullptr) {  		leafRootNode->reference(imported);  	} @@ -84,13 +57,26 @@ void ImportHandler::end()  /* IncludeHandler */ -void IncludeHandler::start(Variant::mapType &args) +void IncludeHandler::doHandle(const Variant &fieldData, Variant::mapType &args)  { -	ImportIncludeHandler::start(args); +	context().include(fieldData.asString(), args["type"].asString(), +	                  args["rel"].asString(), {&RttiTypes::Node});  } -void IncludeHandler::end() -{ -	context().include(src, type, rel, {&RttiTypes::Node}); +namespace States { +const State Import = +    StateBuilder() +        .parents({&Document, &Typesystem, &Domain}) +        .elementHandler(ImportHandler::create) +        .arguments({Argument::String("rel", ""), Argument::String("type", ""), +                    Argument::String("src", "")}); + +const State Include = +    StateBuilder() +        .parent(&All) +        .elementHandler(IncludeHandler::create) +        .arguments({Argument::String("rel", ""), Argument::String("type", ""), +                    Argument::String("src", "")}); +}  }  } diff --git a/src/core/parser/stack/ImportIncludeHandler.hpp b/src/core/parser/stack/ImportIncludeHandler.hpp index b0767be..6168639 100644 --- a/src/core/parser/stack/ImportIncludeHandler.hpp +++ b/src/core/parser/stack/ImportIncludeHandler.hpp @@ -19,6 +19,9 @@  /**   * @file ImportIncludeHandler.hpp   * + * Contains the conceptually similar handlers for the "include" and "import" + * commands. + *   * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)   */ @@ -26,51 +29,78 @@  #define _OUSIA_IMPORT_INCLUDE_HANDLER_HPP_  #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> - -namespace ousia { -class ImportIncludeHandler : public Handler { -protected: -	bool srcInArgs = false; -	std::string rel; -	std::string type; -	std::string src; +#include "Handler.hpp" -public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; - -	void data(const std::string &data, int field) override; -}; +namespace ousia { +namespace parser_stack { -class ImportHandler : public ImportIncludeHandler { +/** + * The ImportHandler is responsible for handling the "import" command. An import + * creates a reference to a specified file. The specified file is parsed (if + * this has not already been done) outside of the context of the current file. + * If the specified resource has already been parsed, a reference to the already + * parsed file is inserted. Imports are only possible before no other content + * has been parsed. + */ +class ImportHandler : public StaticFieldHandler {  public: -	using ImportIncludeHandler::ImportIncludeHandler; - -	void start(Variant::mapType &args) override; - -	void end() override; - +	using StaticFieldHandler::StaticFieldHandler; + +	void doHandle(const Variant &fieldData, +	              Variant::mapType &args) override; + +	/** +	 * Creates a new instance of the ImportHandler. +	 * +	 * @param handlerData is the data that is passed to the constructor of the +	 * Handler base class and used there to e.g. access the ParserContext and +	 * the Callbacks instance. +	 */  	static Handler *create(const HandlerData &handlerData)  	{ -		return new ImportHandler{handlerData}; +		return new ImportHandler{handlerData, "src"};  	}  }; -class IncludeHandler : public ImportIncludeHandler { +/** + * The IncludeHandler is responsible for handling the "include" command. The + * included file is parsed in the context of the current file and will change + * the content that is currently being parsed. Includes are possible at (almost) + * any position in the source file. + */ +class IncludeHandler : public StaticFieldHandler {  public: -	using ImportIncludeHandler::ImportIncludeHandler; - -	void start(Variant::mapType &args) override; - -	void end() override; - +	using StaticFieldHandler::StaticFieldHandler; + +	void doHandle(const Variant &fieldData, +	              Variant::mapType &args) override; + +	/** +	 * Creates a new instance of the IncludeHandler. +	 * +	 * @param handlerData is the data that is passed to the constructor of the +	 * Handler base class and used there to e.g. access the ParserContext and +	 * the Callbacks instance. +	 */  	static Handler *create(const HandlerData &handlerData)  	{ -		return new IncludeHandler{handlerData}; +		return new IncludeHandler{handlerData, "src"};  	}  }; + +namespace States { +/** + * State representing the "import" command. + */ +extern const State Import; + +/** + * State representing the "include" command. + */ +extern const State Include; +} + +}  }  #endif diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp new file mode 100644 index 0000000..47f7d2c --- /dev/null +++ b/src/core/parser/stack/Stack.cpp @@ -0,0 +1,550 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include <core/common/Logger.hpp> +#include <core/common/Utils.hpp> +#include <core/common/Exceptions.hpp> +#include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "Handler.hpp" +#include "Stack.hpp" +#include "State.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class HandlerInfo */ + +HandlerInfo::HandlerInfo() : HandlerInfo(nullptr) {} + +HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler) +    : handler(handler), +      fieldIdx(0), +      valid(true), +      implicit(false), +      inField(false), +      inDefaultField(false), +      inImplicitDefaultField(false), +      inValidField(false), +      hadDefaultField(false) +{ +} + +HandlerInfo::HandlerInfo(bool valid, bool implicit, bool inField, +                         bool inDefaultField, bool inImplicitDefaultField, +                         bool inValidField) +    : handler(nullptr), +      fieldIdx(0), +      valid(valid), +      implicit(implicit), +      inField(inField), +      inDefaultField(inDefaultField), +      inImplicitDefaultField(inImplicitDefaultField), +      inValidField(inValidField), +      hadDefaultField(false) +{ +} + +HandlerInfo::~HandlerInfo() +{ +	// Do nothing +} + +void HandlerInfo::fieldStart(bool isDefault, bool isImplicit, bool isValid) +{ +	inField = true; +	inDefaultField = isDefault || isImplicit; +	inImplicitDefaultField = isImplicit; +	inValidField = isValid; +	hadDefaultField = hadDefaultField || inDefaultField; +	fieldIdx++; +} + +void HandlerInfo::fieldEnd() +{ +	inField = false; +	inDefaultField = false; +	inImplicitDefaultField = false; +	inValidField = false; +} + +/** + * Stub instance of HandlerInfo containing no handler information. + */ +static HandlerInfo EmptyHandlerInfo{true, true, true, true, false, true}; + +/* Helper functions */ + +/** + * Returns an Exception that should be thrown when a currently invalid command + * is thrown. + * + * @param name is the name of the command for which no state transition is + * found. + * @param expected is a set containing the names of the expected commands. + */ +static LoggableException buildInvalidCommandException( +    const std::string &name, const std::set<std::string> &expected) +{ +	if (expected.empty()) { +		return LoggableException{ +		    std::string{"No nested elements allowed, but got \""} + name + +		    std::string{"\""}}; +	} else { +		return LoggableException{ +		    std::string{"Expected "} + +		    (expected.size() == 1 ? std::string{"\""} +		                          : std::string{"one of \""}) + +		    Utils::join(expected, "\", \"") + std::string{"\", but got \""} + +		    name + std::string{"\""}}; +	} +} + +/* Class Stack */ + +Stack::Stack(ParserContext &ctx, +             const std::multimap<std::string, const State *> &states) +    : ctx(ctx), states(states) +{ +	// If the scope instance is not empty we need to deduce the current parser +	// state +	if (!ctx.getScope().isEmpty()) { +		deduceState(); +	} +} + +Stack::~Stack() +{ +	while (!stack.empty()) { +		// Fetch the topmost stack element +		HandlerInfo &info = currentInfo(); + +		// It is an error if we're still in a field of an element while the +		// Stack instance is destroyed. Log that +		if (handlersValid()) { +			if (info.inField && !info.implicit && +			    !info.inImplicitDefaultField) { +				logger().error( +				    std::string("Reached end of stream, but command \"") + +				        info.handler->getName() + +				        "\" has not ended yet. Command was started here:", +				    info.handler->getLocation()); +			} +		} + +		// Remove the command from the stack +		endCurrentHandler(); +	} +} + +void Stack::deduceState() +{ +	// Assemble all states +	std::vector<const State *> states; +	for (const auto &e : this->states) { +		states.push_back(e.second); +	} + +	// Fetch the type signature of the scope and derive all possible states, +	// abort if no unique parser state was found +	std::vector<const State *> possibleStates = +	    StateDeductor(ctx.getScope().getStackTypeSignature(), states).deduce(); +	if (possibleStates.size() != 1U) { +		throw LoggableException( +		    "Error while including file: Cannot deduce parser state."); +	} + +	// Switch to this state by creating a handler, but do not call its start +	// function +	const State &state = *possibleStates[0]; +	HandlerConstructor ctor = +	    state.elementHandler ? state.elementHandler : EmptyHandler::create; + +	std::shared_ptr<Handler> handler = +	    std::shared_ptr<Handler>{ctor({ctx, "", state, SourceLocation{}})}; +	stack.emplace_back(handler); + +	// Set the correct flags for this implicit handler +	HandlerInfo &info = currentInfo(); +	info.implicit = true; +	info.fieldStart(true, false, true); +} + +std::set<std::string> Stack::expectedCommands() +{ +	const State *currentState = &(this->currentState()); +	std::set<std::string> res; +	for (const auto &v : states) { +		if (v.second->parents.count(currentState)) { +			res.insert(v.first); +		} +	} +	return res; +} + +const State &Stack::currentState() +{ +	return stack.empty() ? States::None : stack.back().handler->getState(); +} + +std::string Stack::currentCommandName() +{ +	return stack.empty() ? std::string{} : stack.back().handler->getName(); +} + +const State *Stack::findTargetState(const std::string &name) +{ +	const State *currentState = &(this->currentState()); +	auto range = states.equal_range(name); +	for (auto it = range.first; it != range.second; it++) { +		const StateSet &parents = it->second->parents; +		if (parents.count(currentState) || parents.count(&States::All)) { +			return it->second; +		} +	} + +	return nullptr; +} + +const State *Stack::findTargetStateOrWildcard(const std::string &name) +{ +	// Try to find the target state with the given name, if none is found, try +	// find a matching "*" state. +	State const *targetState = findTargetState(name); +	if (targetState == nullptr) { +		return findTargetState("*"); +	} +	return targetState; +} + +HandlerInfo &Stack::currentInfo() +{ +	return stack.empty() ? EmptyHandlerInfo : stack.back(); +} +HandlerInfo &Stack::lastInfo() +{ +	return stack.size() < 2U ? EmptyHandlerInfo : stack[stack.size() - 2]; +} + +void Stack::endCurrentHandler() +{ +	if (!stack.empty()) { +		// Fetch the handler info for the current top-level element +		HandlerInfo &info = stack.back(); + +		// Do not call any callback functions while the stack is marked as +		// invalid or this is an elment marked as "implicit" +		if (!info.implicit && handlersValid()) { +			// Make sure the fieldEnd handler is called if the element still +			// is in a field +			if (info.inField) { +				info.handler->fieldEnd(); +				info.fieldEnd(); +			} + +			// Call the "end" function of the corresponding Handler instance +			info.handler->end(); +		} + +		// Remove the element from the stack +		stack.pop_back(); +	} +} + +bool Stack::ensureHandlerIsInField() +{ +	// If the current handler is not in a field (and actually has a handler) +	// try to start a default field +	HandlerInfo &info = currentInfo(); +	if (!info.inField && info.handler != nullptr) { +		// Abort if the element already had a default field +		if (info.hadDefaultField) { +			return false; +		} + +		// Try to start a new default field, abort if this did not work +		bool isDefault = true; +		if (!info.handler->fieldStart(isDefault, info.fieldIdx)) { +			info.handler->fieldEnd(); +			endCurrentHandler(); +			return false; +		} + +		// Mark the field as started +		info.fieldStart(true, true, true); +	} +	return true; +} + +bool Stack::handlersValid() +{ +	for (auto it = stack.crbegin(); it != stack.crend(); it++) { +		if (!it->valid) { +			return false; +		} +	} +	return true; +} + +Logger &Stack::logger() { return ctx.getLogger(); } + +void Stack::command(const Variant &name, const Variant::mapType &args) +{ +	// Make sure the given identifier is valid (preventing "*" from being +	// malicously passed to this function) +	if (!Utils::isNamespacedIdentifier(name.asString())) { +		throw LoggableException(std::string("Invalid identifier \"") + +		                            name.asString() + std::string("\""), +		                        name); +	} + +	while (true) { +		// Try to find a target state for the given command, if none can be +		// found and the current command does not have an open field, then try +		// to create an empty default field, otherwise this is an exception +		const State *targetState = findTargetStateOrWildcard(name.asString()); +		if (targetState == nullptr) { +			if (!currentInfo().inField) { +				endCurrentHandler(); +				continue; +			} else { +				throw buildInvalidCommandException(name.asString(), +				                                   expectedCommands()); +			} +		} + +		// Make sure we're currently inside a field +		if (!ensureHandlerIsInField()) { +			endCurrentHandler(); +			continue; +		} + +		// Fork the logger. We do not want any validation errors to skip +		LoggerFork loggerFork = logger().fork(); + +		// Instantiate the handler and push it onto the stack +		HandlerConstructor ctor = targetState->elementHandler +		                              ? targetState->elementHandler +		                              : EmptyHandler::create; +		std::shared_ptr<Handler> handler{ +		    ctor({ctx, name.asString(), *targetState, name.getLocation()})}; +		stack.emplace_back(handler); + +		// Fetch the HandlerInfo for the parent element and the current element +		HandlerInfo &parentInfo = lastInfo(); +		HandlerInfo &info = currentInfo(); + +		// Call the "start" method of the handler, store the result of the start +		// method as the validity of the handler -- do not call the start method +		// if the stack is currently invalid (as this may cause further, +		// unwanted errors) +		bool validStack = handlersValid(); +		info.valid = false; +		if (validStack) { +			// Canonicalize the arguments (if this has not already been done), +			// allow additional arguments +			Variant::mapType canonicalArgs = args; +			targetState->arguments.validateMap(canonicalArgs, loggerFork, true); + +			handler->setLogger(loggerFork); +			try { +				info.valid = handler->start(canonicalArgs); +			} +			catch (LoggableException ex) { +				loggerFork.log(ex); +			} +			handler->resetLogger(); +		} + +		// We started the command within an implicit default field and it is not +		// valid -- remove both the new handler and the parent field from the +		// stack +		if (!info.valid && parentInfo.inImplicitDefaultField) { +			endCurrentHandler(); +			endCurrentHandler(); +			continue; +		} + +		// If we ended up here, starting the command may or may not have worked, +		// but after all, we cannot unroll the stack any further. Update the +		// "valid" flag, commit any potential error messages and return. +		info.valid = parentInfo.valid && info.valid; +		loggerFork.commit(); +		return; +	} +} + +void Stack::data(const Variant &data) +{ +	while (true) { +		// Check whether there is any command the data can be sent to +		if (stack.empty()) { +			throw LoggableException("No command here to receive data."); +		} + +		// Fetch the current command handler information +		HandlerInfo &info = currentInfo(); + +		// Make sure the current handler has an open field +		if (!ensureHandlerIsInField()) { +			endCurrentHandler(); +			continue; +		} + +		// If this field should not get any data, log an error and do not call +		// the "data" handler +		if (!info.inValidField) { +			logger().error("Did not expect any data here", data); +		} + +		if (handlersValid() && info.inValidField) { +			// Fork the logger and set it as temporary logger for the "start" +			// method. We only want to keep error messages if this was not a try +			// to implicitly open a default field. +			LoggerFork loggerFork = logger().fork(); +			info.handler->setLogger(loggerFork); + +			// Pass the data to the current Handler instance +			bool valid = false; +			try { +				Variant dataCopy = data; +				valid = info.handler->data(dataCopy); +			} +			catch (LoggableException ex) { +				loggerFork.log(ex); +			} + +			// Reset the logger instance as soon as possible +			info.handler->resetLogger(); + +			// If placing the data here failed and we're currently in an +			// implicitly opened field, just unroll the stack to the next field +			// and try again +			if (!valid && info.inImplicitDefaultField) { +				endCurrentHandler(); +				continue; +			} + +			// Commit the content of the logger fork. Do not change the valid +			// flag. +			loggerFork.commit(); +		} + +		// There was no reason to unroll the stack any further, so continue +		return; +	} +} + +void Stack::fieldStart(bool isDefault) +{ +	// Make sure the current handler stack is not empty +	if (stack.empty()) { +		throw LoggableException( +		    "No command for which a field could be started"); +	} + +	// Fetch the information attached to the current handler +	HandlerInfo &info = currentInfo(); +	if (info.inField) { +		logger().error( +		    "Got field start, but there is no command for which to start the " +		    "field."); +		return; +	} + +	// Copy the isDefault flag to a local variable, the fieldStart method will +	// write into this variable +	bool defaultField = isDefault; + +	// Do not call the "fieldStart" function if we're in an invalid subtree +	bool valid = false; +	if (handlersValid()) { +		try { +			valid = info.handler->fieldStart(defaultField, info.fieldIdx); +		} +		catch (LoggableException ex) { +			logger().log(ex); +		} +		if (!valid && !defaultField) { +			logger().error( +			    std::string("Cannot start a new field here (index ") + +			    std::to_string(info.fieldIdx + 1) + +			    std::string("), field does not exist")); +		} +	} + +	// Mark the field as started +	info.fieldStart(defaultField, false, valid); +} + +void Stack::fieldEnd() +{ +	// Make sure the current handler stack is not empty +	if (stack.empty()) { +		throw LoggableException("No command for which a field could be ended"); +	} + +	// Fetch the information attached to the current handler +	HandlerInfo &info = currentInfo(); +	if (!info.inField) { +		logger().error( +		    "Got field end, but there is no command for which to end the " +		    "field."); +		return; +	} + +	// Only continue if the current handler stack is in a valid state, do not +	// call the fieldEnd function if something went wrong before +	if (handlersValid()) { +		try { +			info.handler->fieldEnd(); +		} +		catch (LoggableException ex) { +			logger().log(ex); +		} +	} + +	// This command no longer is in a field +	info.fieldEnd(); + +	// As soon as this command had a default field, remove it from the stack +	if (info.hadDefaultField) { +		endCurrentHandler(); +	} +} + +void Stack::annotationStart(const Variant &className, const Variant &args) +{ +	// TODO +} + +void Stack::annotationEnd(const Variant &className, const Variant &elementName) +{ +	// TODO +} + +void Stack::token(Variant token) +{ +	// TODO +} +} +} + diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp new file mode 100644 index 0000000..76eefd9 --- /dev/null +++ b/src/core/parser/stack/Stack.hpp @@ -0,0 +1,341 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Stack.hpp + * + * Helper classes for document or description parsers. Contains the + * Stack class, which is an pushdown automaton responsible for + * accepting commands in the correct order and calling specified handlers. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_STACK_HPP_ +#define _OUSIA_PARSER_STACK_STACK_HPP_ + +#include <cstdint> + +#include <map> +#include <memory> +#include <set> +#include <vector> + +#include <core/common/Variant.hpp> +#include <core/parser/Parser.hpp> + +namespace ousia { + +// Forward declarations +class ParserContext; +class Logger; + +namespace parser_stack { + +// Forward declarations +class Handler; +class State; + +/** + * The HandlerInfo class is used internally by the stack to associate additional + * (mutable) data with a handler instance. + */ +class HandlerInfo { +public: +	/** +	 * Pointer pointing at the actual handler instance. +	 */ +	std::shared_ptr<Handler> handler; + +	/** +	 * Next field index to be passed to the "fieldStart" function of the Handler +	 * class. +	 */ +	size_t fieldIdx; + +	/** +	 * Set to true if the handler is valid (which is the case if the "start" +	 * method has returned true). If the handler is invalid, no more calls are +	 * directed at it until it can be removed from the stack. +	 */ +	bool valid : 1; + +	/** +	 * Set to true if this is an implicit handler, that was created when the +	 * current stack state was deduced. +	 */ +	bool implicit : 1; + +	/** +	 * Set to true if the handler currently is in a field. +	 */ +	bool inField : 1; + +	/** +	 * Set to true if the handler currently is in the default field. +	 */ +	bool inDefaultField : 1; + +	/** +	 * Set to true if the handler currently is in an implicitly started default +	 * field. +	 */ +	bool inImplicitDefaultField : 1; + +	/** +	 * Set to false if this field is only opened pro-forma and does not accept +	 * any data. Otherwise set to true. +	 */ +	bool inValidField : 1; + +	/** +	 * Set to true, if the default field was already started. +	 */ +	bool hadDefaultField : 1; + +	/** +	 * Default constructor of the HandlerInfo class. +	 */ +	HandlerInfo(); +	/** +	 * Constructor of the HandlerInfo class, allows to set all flags manually. +	 */ +	HandlerInfo(bool valid, bool implicit, bool inField, bool inDefaultField, +	            bool inImplicitDefaultField, bool inValidField); + +	/** +	 * Constructor of the HandlerInfo class, taking a shared_ptr to the handler +	 * to which additional information should be attached. +	 */ +	HandlerInfo(std::shared_ptr<Handler> handler); + +	/** +	 * Destructor of the HandlerInfo class (to allow Handler to be forward +	 * declared). +	 */ +	~HandlerInfo(); + +	/** +	 * Updates the "field" flags according to a "fieldStart" event. +	 */ +	void fieldStart(bool isDefault, bool isImplicit, bool isValid); + +	/** +	 * Updates the "fields" flags according to a "fieldEnd" event. +	 */ +	void fieldEnd(); +}; + +/** + * The Stack class is a pushdown automaton responsible for turning a command + * stream into a tree of Node instances. It does so by following a state + * transition graph and creating a set of Handler instances, which are placed + * on the stack. + */ +class Stack { +private: +	/** +	 * Reference at the parser context. +	 */ +	ParserContext &ctx; + +	/** +	 * Map containing all registered command names and the corresponding +	 * state descriptors. +	 */ +	const std::multimap<std::string, const State *> &states; + +	/** +	 * Internal stack used for managing the currently active Handler instances. +	 */ +	std::vector<HandlerInfo> stack; + +	/** +	 * Return the reference in the Logger instance stored within the context. +	 */ +	Logger &logger(); + +	/** +	 * Used internally to get all expected command names for the current state. +	 * This function is used to build error messages. +	 * +	 * @return a set of strings containing the names of the expected commands. +	 */ +	std::set<std::string> expectedCommands(); + +	/** +	 * Returns the targetState for a command with the given name that can be +	 * reached from the current state. +	 * +	 * @param name is the name of the requested command. +	 * @return nullptr if no target state was found, a pointer at the target +	 * state otherwise. +	 */ +	const State *findTargetState(const std::string &name); + +	/** +	 * Returns the targetState for a command with the given name that can be +	 * reached from the current state, also including the wildcard "*" state. +	 * Throws an exception if the given target state is not a valid identifier. +	 * +	 * @param name is the name of the requested command. +	 * @return nullptr if no target state was found, a pointer at the target +	 * state otherwise. +	 */ +	const State *findTargetStateOrWildcard(const std::string &name); + +	/** +	 * Tries to reconstruct the parser state from the Scope instance of the +	 * ParserContext given in the constructor. This functionality is needed for +	 * including files,as the Parser of the included file needs to be brought to +	 * an equivalent state as the one in the including file. +	 */ +	void deduceState(); + +	/** +	 * Returns a reference at the current HandlerInfo instance (or a stub +	 * HandlerInfo instance if the stack is empty). +	 */ +	HandlerInfo ¤tInfo(); + +	/** +	 * Returns a reference at the last HandlerInfo instance (or a stub +	 * HandlerInfo instance if the stack has only one element). +	 */ +	HandlerInfo &lastInfo(); + +	/** +	 * Ends the current handler and removes the corresponding element from the +	 * stack. +	 */ +	void endCurrentHandler(); + +	/** +	 * Tries to start a default field for the current handler, if currently the +	 * handler is not inside a field and did not have a default field yet. +	 * +	 * @return true if the handler is inside a field, false if no field could +	 * be started. +	 */ +	bool ensureHandlerIsInField(); + +	/** +	 * Returns true if all handlers on the stack are currently valid, or false +	 * if at least one handler is invalid. +	 * +	 * @return true if all handlers on the stack are valid. +	 */ +	bool handlersValid(); + +public: +	/** +	 * Creates a new instance of the Stack class. +	 * +	 * @param ctx is the parser context the parser stack is working on. +	 * @param states is a map containing the command names and pointers at the +	 * corresponding State instances. +	 */ +	Stack(ParserContext &ctx, +	      const std::multimap<std::string, const State *> &states); + +	/** +	 * Destructor of the Stack class. +	 */ +	~Stack(); + +	/** +	 * Returns the state the Stack instance currently is in. +	 * +	 * @return the state of the currently active Handler instance or STATE_NONE +	 * if no handler is on the stack. +	 */ +	const State ¤tState(); + +	/** +	 * Returns the command name that is currently being handled. +	 * +	 * @return the name of the command currently being handled by the active +	 * Handler instance or an empty string if no handler is currently active. +	 */ +	std::string currentCommandName(); + +	/** +	 * Function that should be called whenever a new command is reached. +	 * +	 * @param name is the name of the command (including the namespace +	 * separator ':') and its corresponding location. Must be a string variant. +	 * @param args is a map containing the arguments that were passed to the +	 * command. +	 */ +	void command(const Variant &name, const Variant::mapType &args); + +	/** +	 * Function that shuold be called whenever character data is found in the +	 * input stream. May only be called if the currently is a command on the +	 * stack. +	 * +	 * @param data is a string variant containing the data that has been found. +	 */ +	void data(const Variant &data); + +	/** +	 * Function that should be called whenever a new field starts. Fields of the +	 * same command may not be separated by calls to data or annotations. Doing +	 * so will result in a LoggableException. +	 * +	 * @param isDefault should be set to true if the started field explicitly +	 * is the default field. +	 */ +	void fieldStart(bool isDefault); + +	/** +	 * Function that should be called whenever a field ends. Calling this +	 * function if there is no field to end will result in a LoggableException. +	 */ +	void fieldEnd(); + +	/** +	 * Function that should be called whenever an annotation starts. +	 * +	 * @param name is the name of the annotation class. +	 * @param args is a map variant containing the arguments that were passed +	 * to the annotation. +	 */ +	void annotationStart(const Variant &className, const Variant &args); + +	/** +	 * Function that should be called whenever an annotation ends. +	 * +	 * @param name is the name of the annotation class that was ended. +	 * @param annotationName is the name of the annotation that was ended. +	 */ +	void annotationEnd(const Variant &className, const Variant &elementName); + +	/** +	 * Function that should be called whenever a previously registered token +	 * is found in the input stream. +	 * +	 * @param token is string variant containing the token that was encountered. +	 */ +	void token(Variant token); +}; +} +} + +#endif /* _OUSIA_STACK_HPP_ */ + diff --git a/src/core/parser/ParserState.cpp b/src/core/parser/stack/State.cpp index f635d86..d72f533 100644 --- a/src/core/parser/ParserState.cpp +++ b/src/core/parser/stack/State.cpp @@ -16,88 +16,97 @@      along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ -#include "ParserState.hpp" +#include "State.hpp"  namespace ousia { +namespace parser_stack { -/* Class ParserState */ +/* Class State */ -ParserState::ParserState() : elementHandler(nullptr) {} +State::State() : elementHandler(nullptr) {} -ParserState::ParserState(ParserStateSet parents, Arguments arguments, +State::State(StateSet parents, Arguments arguments,                           RttiSet createdNodeTypes, -                         HandlerConstructor elementHandler) +                         HandlerConstructor elementHandler, +                         bool supportsAnnotations)      : parents(parents),        arguments(arguments),        createdNodeTypes(createdNodeTypes), -      elementHandler(elementHandler) +      elementHandler(elementHandler), +      supportsAnnotations(supportsAnnotations)  {  } -ParserState::ParserState(const ParserStateBuilder &builder) -    : ParserState(builder.build()) +State::State(const StateBuilder &builder) +    : State(builder.build())  {  } -/* Class ParserStateBuilder */ +/* Class StateBuilder */ -ParserStateBuilder &ParserStateBuilder::copy(const ParserState &state) +StateBuilder &StateBuilder::copy(const State &state)  {  	this->state = state;  	return *this;  } -ParserStateBuilder &ParserStateBuilder::parent(const ParserState *parent) +StateBuilder &StateBuilder::parent(const State *parent)  { -	state.parents = ParserStateSet{parent}; +	state.parents = StateSet{parent};  	return *this;  } -ParserStateBuilder &ParserStateBuilder::parents(const ParserStateSet &parents) +StateBuilder &StateBuilder::parents(const StateSet &parents)  {  	state.parents = parents;  	return *this;  } -ParserStateBuilder &ParserStateBuilder::arguments(const Arguments &arguments) +StateBuilder &StateBuilder::arguments(const Arguments &arguments)  {  	state.arguments = arguments;  	return *this;  } -ParserStateBuilder &ParserStateBuilder::createdNodeType(const Rtti *type) +StateBuilder &StateBuilder::createdNodeType(const Rtti *type)  {  	state.createdNodeTypes = RttiSet{type};  	return *this;  } -ParserStateBuilder &ParserStateBuilder::createdNodeTypes(const RttiSet &types) +StateBuilder &StateBuilder::createdNodeTypes(const RttiSet &types)  {  	state.createdNodeTypes = types;  	return *this;  } -ParserStateBuilder &ParserStateBuilder::elementHandler( +StateBuilder &StateBuilder::elementHandler(      HandlerConstructor elementHandler)  {  	state.elementHandler = elementHandler;  	return *this;  } -const ParserState &ParserStateBuilder::build() const { return state; } +StateBuilder &StateBuilder::supportsAnnotations(bool supportsAnnotations) +{ +	state.supportsAnnotations = supportsAnnotations; +	return *this; +} -/* Class ParserStateDeductor */ +const State &StateBuilder::build() const { return state; } -ParserStateDeductor::ParserStateDeductor( +/* Class StateDeductor */ + +StateDeductor::StateDeductor(      std::vector<const Rtti *> signature, -    std::vector<const ParserState *> states) +    std::vector<const State *> states)      : tbl(signature.size()),        signature(std::move(signature)),        states(std::move(states))  {  } -bool ParserStateDeductor::isActive(size_t d, const ParserState *s) +bool StateDeductor::isActive(size_t d, const State *s)  {  	// Lookup the "active" state of (d, s), if it was not already set  	// (e.second is true) we'll have to calculate it @@ -123,7 +132,7 @@ bool ParserStateDeductor::isActive(size_t d, const ParserState *s)  			// Check whether any of the parent nodes were active -- either for  			// the previous element (if this one is generative) or for the  			// current element (assuming this node was not generative) -			for (const ParserState *parent : s->parents) { +			for (const State *parent : s->parents) {  				if ((isGenerative && isActive(d - 1, parent)) ||  					isActive(d, parent)) {  					res = true; @@ -136,9 +145,9 @@ bool ParserStateDeductor::isActive(size_t d, const ParserState *s)  	return res;  } -std::vector<const ParserState *> ParserStateDeductor::deduce() +std::vector<const State *> StateDeductor::deduce()  { -	std::vector<const ParserState *> res; +	std::vector<const State *> res;  	if (!signature.empty()) {  		const size_t D = signature.size();  		for (auto s : states) { @@ -153,9 +162,10 @@ std::vector<const ParserState *> ParserStateDeductor::deduce()  /* Constant initializations */ -namespace ParserStates { -const ParserState All; -const ParserState None; +namespace States { +const State All; +const State None; +}  }  } diff --git a/src/core/parser/ParserState.hpp b/src/core/parser/stack/State.hpp index 6487fdd..4766235 100644 --- a/src/core/parser/ParserState.hpp +++ b/src/core/parser/stack/State.hpp @@ -17,10 +17,10 @@  */  /** - * @file ParserState.hpp + * @file State.hpp   * - * Defines the ParserState class used within the ParserStack pushdown - * automaton and the ParserStateBuilder class for convenient construction of + * Defines the State class used within the ParserStack pushdown + * automaton and the StateBuilder class for convenient construction of   * such classes.   *   * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) @@ -33,12 +33,14 @@  #include <core/common/Rtti.hpp>  #include <core/common/Argument.hpp> +#include <core/common/Whitespace.hpp>  namespace ousia { +namespace parser_stack {  // Forward declarations -class ParserStateBuilder; -class ParserState; +class StateBuilder; +class State;  class HandlerData;  class Handler;  using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); @@ -47,17 +49,17 @@ using HandlerConstructor = Handler *(*)(const HandlerData &handlerData);   * Set of pointers of parser states -- used for specifying a set of parent   * states.   */ -using ParserStateSet = std::unordered_set<const ParserState *>; +using StateSet = std::unordered_set<const State *>;  /** - * Class used for the complete specification of a ParserState. Stores possible + * Class used for the complete specification of a State. Stores possible   * parent states, state handlers and arguments to be passed to that state.   */ -struct ParserState { +struct State {  	/**  	 * Vector containing all possible parent states.  	 */ -	ParserStateSet parents; +	StateSet parents;  	/**  	 * Descriptor of the arguments that should be passed to the handler. @@ -66,8 +68,8 @@ struct ParserState {  	/**  	 * Set containing the types of the nodes that may be created in this -	 * ParserState. This information is needed for Parsers to reconstruct the -	 * current ParserState from a given ParserScope when a file is included. +	 * State. This information is needed for Parsers to reconstruct the +	 * current State from a given ParserScope when a file is included.  	 */  	RttiSet createdNodeTypes; @@ -79,109 +81,119 @@ struct ParserState {  	HandlerConstructor elementHandler;  	/** +	 * Set to true if this handler does support annotations. This is almost +	 * always false (e.g. all description handlers), except for document  +	 * element handlers. +	 */ +	bool supportsAnnotations; + +	/**  	 * Default constructor, initializes the handlers with nullptr.  	 */ -	ParserState(); +	State();  	/** -	 * Constructor taking values for all fields. Use the ParserStateBuilder -	 * class for a more convenient construction of ParserState instances. +	 * Constructor taking values for all fields. Use the StateBuilder +	 * class for a more convenient construction of State instances.  	 *  	 * @param parents is a vector containing all possible parent states.  	 * @param arguments is a descriptor of arguments that should be passed to  	 * the handler.  	 * @param createdNodeTypes is a set containing the types of the nodes tha -	 * may be created in this ParserState. This information is needed for -	 * Parsers to reconstruct the current ParserState from a given ParserScope +	 * may be created in this State. This information is needed for +	 * Parsers to reconstruct the current State from a given ParserScope  	 * when a file is included.  	 * @param elementHandler is a pointer at a function which creates a new  	 * concrete Handler instance for the elements described by this state. May  	 * be nullptr in which case no handler instance is created. +	 * @param supportsAnnotations specifies whether annotations are supported +	 * here at all.  	 */ -	ParserState(ParserStateSet parents, Arguments arguments = Arguments{}, +	State(StateSet parents, Arguments arguments = Arguments{},  	            RttiSet createdNodeTypes = RttiSet{}, -	            HandlerConstructor elementHandler = nullptr); +	            HandlerConstructor elementHandler = nullptr, +	            bool supportsAnnotations = false);  	/** -	 * Creates this ParserState from the given ParserStateBuilder instance. +	 * Creates this State from the given StateBuilder instance.  	 */ -	ParserState(const ParserStateBuilder &builder); +	State(const StateBuilder &builder);  };  /** - * The ParserStateBuilder class is a class used for conveniently building new - * ParserState instances. + * The StateBuilder class is a class used for conveniently building new + * State instances.   */ -class ParserStateBuilder { +class StateBuilder {  private:  	/** -	 * ParserState instance that is currently being built by the -	 * ParserStateBuilder. +	 * State instance that is currently being built by the +	 * StateBuilder.  	 */ -	ParserState state; +	State state;  public:  	/** -	 * Copies the ParserState instance and uses it as internal state. Overrides -	 * all changes made by the ParserStateBuilder. +	 * Copies the State instance and uses it as internal state. Overrides +	 * all changes made by the StateBuilder.  	 *  	 * @param state is the state that should be copied. -	 * @return a reference at this ParserStateBuilder instance for method +	 * @return a reference at this StateBuilder instance for method  	 * chaining.  	 */ -	ParserStateBuilder ©(const ParserState &state); +	StateBuilder ©(const State &state);  	/**  	 * Sets the possible parent states to the single given parent element.  	 * -	 * @param parent is a pointer at the parent ParserState instance that should +	 * @param parent is a pointer at the parent State instance that should  	 * be the possible parent state. -	 * @return a reference at this ParserStateBuilder instance for method +	 * @return a reference at this StateBuilder instance for method  	 * chaining.  	 */ -	ParserStateBuilder &parent(const ParserState *parent); +	StateBuilder &parent(const State *parent);  	/** -	 * Sets the ParserState instances in the given ParserStateSet as the list of +	 * Sets the State instances in the given StateSet as the list of  	 * supported parent states.  	 * -	 * @param parents is a set of pointers at ParserState instances that should +	 * @param parents is a set of pointers at State instances that should  	 * be the possible parent states. -	 * @return a reference at this ParserStateBuilder instance for method +	 * @return a reference at this StateBuilder instance for method  	 * chaining.  	 */ -	ParserStateBuilder &parents(const ParserStateSet &parents); +	StateBuilder &parents(const StateSet &parents);  	/**  	 * Sets the arguments that should be passed to the parser state handler to  	 * those given as argument.  	 *  	 * @param arguments is the Arguments instance describing the Arguments that -	 * should be parsed to a Handler for this ParserState. -	 * @return a reference at this ParserStateBuilder instance for method +	 * should be parsed to a Handler for this State. +	 * @return a reference at this StateBuilder instance for method  	 * chaining.  	 */ -	ParserStateBuilder &arguments(const Arguments &arguments); +	StateBuilder &arguments(const Arguments &arguments);  	/**  	 * Sets the Node types this state may produce to the given Rtti descriptor.  	 *  	 * @param type is the Rtti descriptor of the Type that may be produced by  	 * this state. -	 * @return a reference at this ParserStateBuilder instance for method +	 * @return a reference at this StateBuilder instance for method  	 * chaining.  	 */ -	ParserStateBuilder &createdNodeType(const Rtti *type); +	StateBuilder &createdNodeType(const Rtti *type);  	/**  	 * Sets the Node types this state may produce to the given Rtti descriptors.  	 *  	 * @param types is a set of Rtti descriptors of the Types that may be  	 * produced by this state. -	 * @return a reference at this ParserStateBuilder instance for method +	 * @return a reference at this StateBuilder instance for method  	 * chaining.  	 */ -	ParserStateBuilder &createdNodeTypes(const RttiSet &types); +	StateBuilder &createdNodeTypes(const RttiSet &types);  	/**  	 * Sets the constructor for the element handler. The constructor creates a @@ -191,31 +203,42 @@ public:  	 *  	 * @param elementHandler is the HandlerConstructor that should create a  	 * new Handler instance. -	 * @return a reference at this ParserStateBuilder instance for method +	 * @return a reference at this StateBuilder instance for method  	 * chaining.  	 */ -	ParserStateBuilder &elementHandler(HandlerConstructor elementHandler); +	StateBuilder &elementHandler(HandlerConstructor elementHandler);  	/** -	 * Returns a reference at the internal ParserState instance that was built -	 * using the ParserStateBuilder. +	 * Sets the state of the "supportsAnnotations" flags (default value is +	 * false)  	 * -	 * @return the built ParserState. +	 * @param supportsAnnotations should be set to true, if annotations are +	 * supported for the handlers associated with this document. +	 * @return a reference at this StateBuilder instance for method +	 * chaining.  	 */ -	const ParserState &build() const; +	StateBuilder &supportsAnnotations(bool supportsAnnotations); + +	/** +	 * Returns a reference at the internal State instance that was built +	 * using the StateBuilder. +	 * +	 * @return the built State. +	 */ +	const State &build() const;  };  /** - * Class used to deduce the ParserState a Parser is currently in based on the + * Class used to deduce the State a Parser is currently in based on the   * types of the Nodes that currently are on the ParserStack. Uses dynamic   * programming in order to solve this problem.   */ -class ParserStateDeductor { +class StateDeductor {  public:  	/**  	 * Type containing the dynamic programming table.  	 */ -	using Table = std::vector<std::unordered_map<const ParserState *, bool>>; +	using Table = std::vector<std::unordered_map<const State *, bool>>;  private:  	/** @@ -231,7 +254,7 @@ private:  	/**  	 * List of states that should be checked for being active.  	 */ -	const std::vector<const ParserState *> states; +	const std::vector<const State *> states;  	/**  	 * Used internally to check whether the given parser stack s may have been @@ -239,20 +262,20 @@ private:  	 *  	 * @param d is the signature element.  	 * @param s is the parser state. -	 * @return true if the the given ParserState may have been active. +	 * @return true if the the given State may have been active.  	 */ -	bool isActive(size_t d, const ParserState *s); +	bool isActive(size_t d, const State *s);  public:  	/** -	 * Constructor of the ParserStateDeductor class. +	 * Constructor of the StateDeductor class.  	 *  	 * @param signature a Node type signature describing the types of the nodes  	 * which currently reside on e.g. the ParserScope stack.  	 * @param states is a list of states that should be checked.  	 */ -	ParserStateDeductor(std::vector<const Rtti *> signature, -	                    std::vector<const ParserState *> states); +	StateDeductor(std::vector<const Rtti *> signature, +	                    std::vector<const State *> states);  	/**  	 * Selects all active states from the given states. Only considers those @@ -260,23 +283,24 @@ public:  	 *  	 * @return a list of states that may actually have been active.  	 */ -	std::vector<const ParserState *> deduce(); +	std::vector<const State *> deduce();  };  /** - * The ParserStates namespace contains all the global state constants used + * The States namespace contains all the global state constants used   * in the ParserStack class.   */ -namespace ParserStates { +namespace States {  /**   * State representing all states.   */ -extern const ParserState All; +extern const State All;  /**   * State representing the initial state.   */ -extern const ParserState None; +extern const State None; +}  }  } diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp index 2cc7dfb..8fd9525 100644 --- a/src/core/parser/stack/TypesystemHandler.cpp +++ b/src/core/parser/stack/TypesystemHandler.cpp @@ -16,32 +16,46 @@      along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ -#include "TypesystemHandler.hpp" -  #include <core/model/Typesystem.hpp> +#include <core/model/Domain.hpp>  #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "DomainHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp"  namespace ousia { +namespace parser_stack {  /* TypesystemHandler */ -void TypesystemHandler::start(Variant::mapType &args) +bool TypesystemHandler::start(Variant::mapType &args)  {  	// Create the typesystem instance  	Rooted<Typesystem> typesystem = -	    project()->createTypesystem(args["name"].asString()); +	    context().getProject()->createTypesystem(args["name"].asString());  	typesystem->setLocation(location()); +	// If the typesystem is defined inside a domain, add a reference to the +	// typesystem to the domain +	Rooted<Domain> domain = scope().select<Domain>(); +	if (domain != nullptr) { +		domain->reference(typesystem); +	} +  	// Push the typesystem onto the scope, set the POST_HEAD flag to true  	scope().push(typesystem);  	scope().setFlag(ParserFlag::POST_HEAD, false); + +	return true;  }  void TypesystemHandler::end() { scope().pop(); }  /* TypesystemEnumHandler */ -void TypesystemEnumHandler::start(Variant::mapType &args) +bool TypesystemEnumHandler::start(Variant::mapType &args)  {  	scope().setFlag(ParserFlag::POST_HEAD, true); @@ -52,33 +66,24 @@ void TypesystemEnumHandler::start(Variant::mapType &args)  	enumType->setLocation(location());  	scope().push(enumType); + +	return true;  }  void TypesystemEnumHandler::end() { scope().pop(); }  /* TypesystemEnumEntryHandler */ -void TypesystemEnumEntryHandler::start(Variant::mapType &args) {} - -void TypesystemEnumEntryHandler::end() +void TypesystemEnumEntryHandler::doHandle(const Variant &fieldData, +                                          Variant::mapType &args)  {  	Rooted<EnumType> enumType = scope().selectOrThrow<EnumType>(); -	enumType->addEntry(entry, logger()); -} - -void TypesystemEnumEntryHandler::data(const std::string &data, int field) -{ -	if (field != 0) { -		// TODO: This should be stored in the HandlerData -		logger().error("Enum entry only has one field."); -		return; -	} -	entry.append(data); +	enumType->addEntry(fieldData.asString(), logger());  }  /* TypesystemStructHandler */ -void TypesystemStructHandler::start(Variant::mapType &args) +bool TypesystemStructHandler::start(Variant::mapType &args)  {  	scope().setFlag(ParserFlag::POST_HEAD, true); @@ -103,13 +108,15 @@ void TypesystemStructHandler::start(Variant::mapType &args)  			});  	}  	scope().push(structType); + +	return true;  }  void TypesystemStructHandler::end() { scope().pop(); }  /* TypesystemStructFieldHandler */ -void TypesystemStructFieldHandler::start(Variant::mapType &args) +bool TypesystemStructFieldHandler::start(Variant::mapType &args)  {  	// Read the argument values  	const std::string &name = args["name"].asString(); @@ -142,13 +149,13 @@ void TypesystemStructFieldHandler::start(Variant::mapType &args)  			}  		});  	} -} -void TypesystemStructFieldHandler::end() {} +	return true; +}  /* TypesystemConstantHandler */ -void TypesystemConstantHandler::start(Variant::mapType &args) +bool TypesystemConstantHandler::start(Variant::mapType &args)  {  	scope().setFlag(ParserFlag::POST_HEAD, true); @@ -169,7 +176,51 @@ void TypesystemConstantHandler::start(Variant::mapType &args)  			    constant.cast<Constant>()->setType(type.cast<Type>(), logger);  		    }  		}); + +	return true;  } -void TypesystemConstantHandler::end() {} +namespace States { +const State Typesystem = StateBuilder() +                             .parents({&None, &Domain}) +                             .createdNodeType(&RttiTypes::Typesystem) +                             .elementHandler(TypesystemHandler::create) +                             .arguments({Argument::String("name", "")}); + +const State TypesystemEnum = StateBuilder() +                                 .parent(&Typesystem) +                                 .createdNodeType(&RttiTypes::EnumType) +                                 .elementHandler(TypesystemEnumHandler::create) +                                 .arguments({Argument::String("name")}); + +const State TypesystemEnumEntry = +    StateBuilder() +        .parent(&TypesystemEnum) +        .elementHandler(TypesystemEnumEntryHandler::create) +        .arguments({}); + +const State TypesystemStruct = +    StateBuilder() +        .parent(&Typesystem) +        .createdNodeType(&RttiTypes::StructType) +        .elementHandler(TypesystemStructHandler::create) +        .arguments({Argument::String("name"), Argument::String("parent", "")}); + +const State TypesystemStructField = +    StateBuilder() +        .parent(&TypesystemStruct) +        .elementHandler(TypesystemStructFieldHandler::create) +        .arguments({Argument::String("name"), Argument::String("type"), +                    Argument::Any("default", Variant::fromObject(nullptr))}); + +const State TypesystemConstant = +    StateBuilder() +        .parent(&Typesystem) +        .createdNodeType(&RttiTypes::Constant) +        .elementHandler(TypesystemConstantHandler::create) +        .arguments({Argument::String("name"), Argument::String("type"), +                    Argument::Any("value")}); +}  } +} + diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp index 76a7bc9..85494f1 100644 --- a/src/core/parser/stack/TypesystemHandler.hpp +++ b/src/core/parser/stack/TypesystemHandler.hpp @@ -19,6 +19,9 @@  /**   * @file TypesystemHandler.hpp   * + * Contains the Handler classes used to parse Typesystem descriptions. The + * Handlers parse all the tags found below and including the "typesystem" tag. + *   * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)   */ @@ -26,96 +29,180 @@  #define _OUSIA_TYPESYSTEM_HANDLER_HPP_  #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> + +#include "Handler.hpp"  namespace ousia { +namespace parser_stack { -class TypesystemHandler : public Handler { +/** + * Handles the occurance of the "typesystem" tag. Creates a new Typesystem + * instance and places it on the ParserScope. + */ +class TypesystemHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override; +	/** +	 * Creates a new instance of the TypesystemHandler. +	 * +	 * @param handlerData is the data that is passed to the constructor of the +	 * Handler base class and used there to e.g. access the ParserContext and +	 * the Callbacks instance. +	 */  	static Handler *create(const HandlerData &handlerData)  	{  		return new TypesystemHandler{handlerData};  	}  }; -class TypesystemEnumHandler : public Handler { +/** + * Handles the occurance of the "enum" tag. Creates a new EnumType instance and + * places it on the ParserScope. + */ +class TypesystemEnumHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override; +	/** +	 * Creates a new instance of the TypesystemEnumHandler. +	 * +	 * @param handlerData is the data that is passed to the constructor of the +	 * Handler base class and used there to e.g. access the ParserContext and +	 * the Callbacks instance. +	 */  	static Handler *create(const HandlerData &handlerData)  	{  		return new TypesystemEnumHandler{handlerData};  	}  }; -class TypesystemEnumEntryHandler : public Handler { +/** + * Handles the occurance of the "entry" tag within an "enum" tag. Creates a new + * EnumType instance and places it on the ParserScope. + */ +class TypesystemEnumEntryHandler : public StaticFieldHandler {  public: -	using Handler::Handler; +	using StaticFieldHandler::StaticFieldHandler; -	std::string entry; - -	void start(Variant::mapType &args) override; - -	void end() override; - -	void data(const std::string &data, int field) override; +	void doHandle(const Variant &fieldData, Variant::mapType &args) override; +	/** +	 * Creates a new instance of the TypesystemEnumEntryHandler. +	 * +	 * @param handlerData is the data that is passed to the constructor of the +	 * Handler base class and used there to e.g. access the ParserContext and +	 * the Callbacks instance. +	 */  	static Handler *create(const HandlerData &handlerData)  	{ -		return new TypesystemEnumEntryHandler{handlerData}; +		return new TypesystemEnumEntryHandler{handlerData, "name"};  	}  }; -class TypesystemStructHandler : public Handler { +/** + * Handles the occurance of the "struct" tag within a typesystem description. + * Creates a new StructType instance and places it on the ParserScope. + */ +class TypesystemStructHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; +	bool start(Variant::mapType &args) override;  	void end() override; +	/** +	 * Creates a new instance of the TypesystemStructHandler. +	 * +	 * @param handlerData is the data that is passed to the constructor of the +	 * Handler base class and used there to e.g. access the ParserContext and +	 * the Callbacks instance. +	 */  	static Handler *create(const HandlerData &handlerData)  	{  		return new TypesystemStructHandler{handlerData};  	}  }; -class TypesystemStructFieldHandler : public Handler { +/** + * Handles the occurance of the "field" tag within a typesystem structure + * description. Places a new Attribute instance in the StructType instance + * that is currently at the top of the scope. + */ +class TypesystemStructFieldHandler : public StaticHandler {  public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override; +	using StaticHandler::StaticHandler; -	void end() override; +	bool start(Variant::mapType &args) override; +	/** +	 * Creates a new instance of the TypesystemStructFieldHandler. +	 * +	 * @param handlerData is the data that is passed to the constructor of the +	 * Handler base class and used there to e.g. access the ParserContext and +	 * the Callbacks instance. +	 */  	static Handler *create(const HandlerData &handlerData)  	{  		return new TypesystemStructFieldHandler{handlerData};  	}  }; -class TypesystemConstantHandler : public Handler { +/** + * Handles the occurance of the "constant" tag within a typesystem structure + * description. Places a new Constant instance in the current typesystem. + */ +class TypesystemConstantHandler : public StaticHandler {  public: -	using Handler::Handler; +	using StaticHandler::StaticHandler; -	void start(Variant::mapType &args) override; - -	void end() override; +	bool start(Variant::mapType &args) override; +	/** +	 * Creates a new instance of the TypesystemConstantHandler. +	 * +	 * @param handlerData is the data that is passed to the constructor of the +	 * Handler base class and used there to e.g. access the ParserContext and +	 * the Callbacks instance. +	 */  	static Handler *create(const HandlerData &handlerData)  	{  		return new TypesystemConstantHandler{handlerData};  	}  }; + +namespace States { +/** + * State representing the "typesystem" tag. + */ +extern const State Typesystem; +/** + * State representing the "enum" tag within a typesystem. + */ +extern const State TypesystemEnum; +/** + * State representing the "entry" tag within an enum. + */ +extern const State TypesystemEnumEntry; +/** + * State representing the "struct" tag within a typesystem. + */ +extern const State TypesystemStruct; +/** + * State representing the "field" tag within a typesystem structure. + */ +extern const State TypesystemStructField; +/** + * State representing the "constant" tag within a typesystem. + */ +extern const State TypesystemConstant; +} +}  }  #endif diff --git a/src/formats/osdm/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp index 4a0430b..4a0430b 100644 --- a/src/formats/osdm/TokenTrie.cpp +++ b/src/core/parser/utils/TokenTrie.cpp diff --git a/src/formats/osdm/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp index 36c2ffa..36c2ffa 100644 --- a/src/formats/osdm/TokenTrie.hpp +++ b/src/core/parser/utils/TokenTrie.hpp diff --git a/src/formats/osdm/DynamicTokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp index f2cfcd1..3c8177d 100644 --- a/src/formats/osdm/DynamicTokenizer.cpp +++ b/src/core/parser/utils/Tokenizer.cpp @@ -22,8 +22,9 @@  #include <core/common/CharReader.hpp>  #include <core/common/Exceptions.hpp>  #include <core/common/Utils.hpp> +#include <core/common/WhitespaceHandler.hpp> -#include "DynamicTokenizer.hpp" +#include "Tokenizer.hpp"  namespace ousia { @@ -38,7 +39,7 @@ struct TokenMatch {  	/**  	 * Token that was matched.  	 */ -	DynamicToken token; +	Token token;  	/**  	 * Current length of the data within the text handler. The text buffer needs @@ -102,8 +103,8 @@ public:  	 * @param textLength is the text buffer length of the previous text token.  	 * @param textEnd is the current end location of the previous text token.  	 */ -	TokenLookup(const TokenTrie::Node *node, size_t start, -	            size_t textLength, size_t textEnd) +	TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength, +	            size_t textEnd)  	    : node(node), start(start), textLength(textLength), textEnd(textEnd)  	{  	} @@ -116,10 +117,10 @@ public:  	 * @param c is the character that should be appended to the current prefix.  	 * @param lookups is a list to which new TokeLookup instances are added --  	 * which could potentially be expanded in the next iteration. -	 * @param match is the DynamicToken instance to which the matching token +	 * @param match is the Token instance to which the matching token  	 * should be written.  	 * @param tokens is a reference at the internal token list of the -	 * DynamicTokenizer. +	 * Tokenizer.  	 * @param end is the end byte offset of the current character.  	 * @param sourceId is the source if of this file.  	 */ @@ -142,7 +143,7 @@ public:  			size_t len = str.size();  			if (len > match.token.content.size()) {  				match.token = -				    DynamicToken{node->type, str, {sourceId, start, end}}; +				    Token{node->type, str, {sourceId, start, end}};  				match.textLength = textLength;  				match.textEnd = textEnd;  			} @@ -155,203 +156,40 @@ public:  	}  }; -/* Internal class TextHandlerBase */ - -/** - * Base class used for those classes that may be used as TextHandler in the - * DynamicTokenizer::next function. - */ -class TextHandlerBase { -public: -	/** -	 * Start position of the extracted text. -	 */ -	size_t textStart; - -	/** -	 * End position of the extracted text. -	 */ -	size_t textEnd; - -	/** -	 * Buffer containing the extracted text. -	 */ -	std::vector<char> textBuf; - -	/** -	 * Constructor of the TextHandlerBase base class. Initializes the start and -	 * end position with zeros. -	 */ -	TextHandlerBase() : textStart(0), textEnd(0) {} - -	/** -	 * Transforms the given token into a text token containing the extracted -	 * text. -	 * -	 * @param token is the output token to which the text should be written. -	 * @param sourceId is the source id of the underlying file. -	 */ -	void buildTextToken(TokenMatch &match, SourceId sourceId) -	{ -		if (match.hasMatch()) { -			match.token.content = -			    std::string{textBuf.data(), match.textLength}; -			match.token.location = -			    SourceLocation{sourceId, textStart, match.textEnd}; -		} else { -			match.token.content = std::string{textBuf.data(), textBuf.size()}; -			match.token.location = SourceLocation{sourceId, textStart, textEnd}; -		} -		match.token.type = TextToken; -	} - -	/** -	 * Returns true if this whitespace handler has found any text and a text -	 * token could be emitted. -	 * -	 * @return true if the internal data buffer is non-empty. -	 */ -	bool hasText() { return !textBuf.empty(); } -}; - -/* Internal class PreservingTextHandler */ - -/** - * The PreservingTextHandler class preserves all characters unmodified, - * including whitepace characters. - */ -class PreservingTextHandler : public TextHandlerBase { -public: -	using TextHandlerBase::TextHandlerBase; - -	/** -	 * Appends the given character to the internal text buffer, does not -	 * eliminate whitespace. -	 * -	 * @param c is the character that should be appended to the internal buffer. -	 * @param start is the start byte offset of the given character. -	 * @param end is the end byte offset of the given character. -	 */ -	void append(char c, size_t start, size_t end) -	{ -		if (textBuf.empty()) { -			textStart = start; -		} -		textEnd = end; -		textBuf.push_back(c); -	} -}; - -/* Internal class TrimmingTextHandler */ -  /** - * The TrimmingTextHandler class trims all whitespace characters at the begin - * and the end of a text section but leaves all other characters unmodified, - * including whitepace characters. + * Transforms the given token into a text token containing the extracted + * text. + * + * @param handler is the WhitespaceHandler containing the collected data. + * @param token is the output token to which the text should be written. + * @param sourceId is the source id of the underlying file.   */ -class TrimmingTextHandler : public TextHandlerBase { -public: -	using TextHandlerBase::TextHandlerBase; - -	/** -	 * Buffer used internally to temporarily store all whitespace characters. -	 * They are only added to the output buffer if another non-whitespace -	 * character is reached. -	 */ -	std::vector<char> whitespaceBuf; - -	/** -	 * Appends the given character to the internal text buffer, eliminates -	 * whitespace characters at the begin and end of the text. -	 * -	 * @param c is the character that should be appended to the internal buffer. -	 * @param start is the start byte offset of the given character. -	 * @param end is the end byte offset of the given character. -	 */ -	void append(char c, size_t start, size_t end) -	{ -		// Handle whitespace characters -		if (Utils::isWhitespace(c)) { -			if (!textBuf.empty()) { -				whitespaceBuf.push_back(c); -			} -			return; -		} - -		// Set the start and end offset correctly -		if (textBuf.empty()) { -			textStart = start; -		} -		textEnd = end; - -		// Store the character -		if (!whitespaceBuf.empty()) { -			textBuf.insert(textBuf.end(), whitespaceBuf.begin(), -			               whitespaceBuf.end()); -			whitespaceBuf.clear(); -		} -		textBuf.push_back(c); -	} -}; - -/* Internal class CollapsingTextHandler */ - -/** - * The CollapsingTextHandler trims characters at the beginning and end of the - * text and reduced multiple whitespace characters to a single blank. - */ -class CollapsingTextHandler : public TextHandlerBase { -public: -	using TextHandlerBase::TextHandlerBase; - -	/** -	 * Flag set to true if a whitespace character was reached. -	 */ -	bool hasWhitespace = false; - -	/** -	 * Appends the given character to the internal text buffer, eliminates -	 * redundant whitespace characters. -	 * -	 * @param c is the character that should be appended to the internal buffer. -	 * @param start is the start byte offset of the given character. -	 * @param end is the end byte offset of the given character. -	 */ -	void append(char c, size_t start, size_t end) -	{ -		// Handle whitespace characters -		if (Utils::isWhitespace(c)) { -			if (!textBuf.empty()) { -				hasWhitespace = true; -			} -			return; -		} - -		// Set the start and end offset correctly -		if (textBuf.empty()) { -			textStart = start; -		} -		textEnd = end; - -		// Store the character -		if (hasWhitespace) { -			textBuf.push_back(' '); -			hasWhitespace = false; -		} -		textBuf.push_back(c); +static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match, +                           SourceId sourceId) +{ +	if (match.hasMatch()) { +		match.token.content = +		    std::string{handler.textBuf.data(), match.textLength}; +		match.token.location = +		    SourceLocation{sourceId, handler.textStart, match.textEnd}; +	} else { +		match.token.content = handler.toString(); +		match.token.location = +		    SourceLocation{sourceId, handler.textStart, handler.textEnd};  	} -}; +	match.token.type = TextToken; +}  } -/* Class DynamicTokenizer */ +/* Class Tokenizer */ -DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode) +Tokenizer::Tokenizer(WhitespaceMode whitespaceMode)      : whitespaceMode(whitespaceMode), nextTokenTypeId(0)  {  }  template <typename TextHandler, bool read> -bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token) +bool Tokenizer::next(CharReader &reader, Token &token)  {  	// If we're in the read mode, reset the char reader peek position to the  	// current read position @@ -409,9 +247,8 @@ bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)  	}  	// If we found text, emit that text -	if (textHandler.hasText() && -	    (!match.hasMatch() || match.textLength > 0)) { -		textHandler.buildTextToken(match, sourceId); +	if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) { +		buildTextToken(textHandler, match, sourceId);  	}  	// Move the read/peek cursor to the end of the token, abort if an error @@ -431,38 +268,38 @@ bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)  		}  		token = match.token;  	} else { -		token = DynamicToken{}; +		token = Token{};  	}  	return match.hasMatch();  } -bool DynamicTokenizer::read(CharReader &reader,DynamicToken &token) +bool Tokenizer::read(CharReader &reader, Token &token)  {  	switch (whitespaceMode) {  		case WhitespaceMode::PRESERVE: -			return next<PreservingTextHandler, true>(reader, token); +			return next<PreservingWhitespaceHandler, true>(reader, token);  		case WhitespaceMode::TRIM: -			return next<TrimmingTextHandler, true>(reader, token); +			return next<TrimmingWhitespaceHandler, true>(reader, token);  		case WhitespaceMode::COLLAPSE: -			return next<CollapsingTextHandler, true>(reader, token); +			return next<CollapsingWhitespaceHandler, true>(reader, token);  	}  	return false;  } -bool DynamicTokenizer::peek(CharReader &reader,DynamicToken &token) +bool Tokenizer::peek(CharReader &reader, Token &token)  {  	switch (whitespaceMode) {  		case WhitespaceMode::PRESERVE: -			return next<PreservingTextHandler, false>(reader, token); +			return next<PreservingWhitespaceHandler, false>(reader, token);  		case WhitespaceMode::TRIM: -			return next<TrimmingTextHandler, false>(reader, token); +			return next<TrimmingWhitespaceHandler, false>(reader, token);  		case WhitespaceMode::COLLAPSE: -			return next<CollapsingTextHandler, false>(reader, token); +			return next<CollapsingWhitespaceHandler, false>(reader, token);  	}  	return false;  } -TokenTypeId DynamicTokenizer::registerToken(const std::string &token) +TokenTypeId Tokenizer::registerToken(const std::string &token)  {  	// Abort if an empty token should be registered  	if (token.empty()) { @@ -493,14 +330,14 @@ TokenTypeId DynamicTokenizer::registerToken(const std::string &token)  	// Try to register the token in the trie -- if this fails, remove it  	// from the tokens list  	if (!trie.registerToken(token, type)) { -		tokens[type] = std::string(); +		tokens[type] = std::string{};  		nextTokenTypeId = type;  		return EmptyToken;  	}  	return type;  } -bool DynamicTokenizer::unregisterToken(TokenTypeId type) +bool Tokenizer::unregisterToken(TokenTypeId type)  {  	// Unregister the token from the trie, abort if an invalid type is given  	if (type < tokens.size() && trie.unregisterToken(tokens[type])) { @@ -511,7 +348,7 @@ bool DynamicTokenizer::unregisterToken(TokenTypeId type)  	return false;  } -std::string DynamicTokenizer::getTokenString(TokenTypeId type) +std::string Tokenizer::getTokenString(TokenTypeId type)  {  	if (type < tokens.size()) {  		return tokens[type]; @@ -519,26 +356,26 @@ std::string DynamicTokenizer::getTokenString(TokenTypeId type)  	return std::string{};  } -void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode) +void Tokenizer::setWhitespaceMode(WhitespaceMode mode)  {  	whitespaceMode = mode;  } -WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; } +WhitespaceMode Tokenizer::getWhitespaceMode() { return whitespaceMode; }  /* Explicitly instantiate all possible instantiations of the "next" member     function */ -template bool DynamicTokenizer::next<PreservingTextHandler, false>( -    CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next<TrimmingTextHandler, false>( -    CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next<CollapsingTextHandler, false>( -    CharReader &reader,DynamicToken &token); -template bool DynamicTokenizer::next<PreservingTextHandler, true>( -    CharReader &reader,DynamicToken &token); -template bool DynamicTokenizer::next<TrimmingTextHandler, true>( -    CharReader &reader,DynamicToken &token); -template bool DynamicTokenizer::next<CollapsingTextHandler, true>( -    CharReader &reader,DynamicToken &token); +template bool Tokenizer::next<PreservingWhitespaceHandler, false>( +    CharReader &reader, Token &token); +template bool Tokenizer::next<TrimmingWhitespaceHandler, false>( +    CharReader &reader, Token &token); +template bool Tokenizer::next<CollapsingWhitespaceHandler, false>( +    CharReader &reader, Token &token); +template bool Tokenizer::next<PreservingWhitespaceHandler, true>( +    CharReader &reader, Token &token); +template bool Tokenizer::next<TrimmingWhitespaceHandler, true>( +    CharReader &reader, Token &token); +template bool Tokenizer::next<CollapsingWhitespaceHandler, true>( +    CharReader &reader, Token &token);  } diff --git a/src/formats/osdm/DynamicTokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp index 0cac2e8..6b4e116 100644 --- a/src/formats/osdm/DynamicTokenizer.hpp +++ b/src/core/parser/utils/Tokenizer.hpp @@ -17,7 +17,7 @@  */  /** - * @file DynamicTokenizer.hpp + * @file Tokenizer.hpp   *   * Tokenizer that can be reconfigured at runtime used for parsing the plain   * text format. @@ -33,6 +33,7 @@  #include <vector>  #include <core/common/Location.hpp> +#include <core/common/Whitespace.hpp>  #include "TokenTrie.hpp" @@ -42,9 +43,9 @@ namespace ousia {  class CharReader;  /** - * The DynamicToken structure describes a token discovered by the Tokenizer. + * The Token structure describes a token discovered by the Tokenizer.   */ -struct DynamicToken { +struct Token {  	/**  	 * Id of the type of this token.  	 */ @@ -63,28 +64,28 @@ struct DynamicToken {  	/**  	 * Default constructor.  	 */ -	DynamicToken() : type(EmptyToken) {} +	Token() : type(EmptyToken) {}  	/** -	 * Constructor of the DynamicToken struct. +	 * Constructor of the Token struct.  	 *  	 * @param id represents the token type.  	 * @param content is the string content that has been extracted.  	 * @param location is the location of the extracted string content in the  	 * source file.  	 */ -	DynamicToken(TokenTypeId type, const std::string &content, +	Token(TokenTypeId type, const std::string &content,  	             SourceLocation location)  	    : type(type), content(content), location(location)  	{  	}  	/** -	 * Constructor of the DynamicToken struct, only initializes the token type +	 * Constructor of the Token struct, only initializes the token type  	 *  	 * @param type is the id corresponding to the type of the token.  	 */ -	DynamicToken(TokenTypeId type) : type(type) {} +	Token(TokenTypeId type) : type(type) {}  	/**  	 * The getLocation function allows the tokens to be directly passed as @@ -96,35 +97,13 @@ struct DynamicToken {  };  /** - * Enum specifying the whitespace handling of the DynamicTokenizer class when - * reading non-token text. - */ -enum class WhitespaceMode { -	/** -     * Preserves all whitespaces as they are found in the source file. -     */ -	PRESERVE, - -	/** -     * Trims whitespace at the beginning and the end of the found text. -     */ -	TRIM, - -	/** -     * Whitespaces are trimmed and collapsed, multiple whitespace characters -     * are replaced by a single space character. -     */ -	COLLAPSE -}; - -/** - * The DynamicTokenizer is used to extract tokens and chunks of text from a + * The Tokenizer is used to extract tokens and chunks of text from a   * CharReader. It allows to register and unregister tokens while parsing and   * to modify the handling of whitespace characters. Note that the - * DynamicTokenizer always tries to extract the longest possible token from the + * Tokenizer always tries to extract the longest possible token from the   * tokenizer.   */ -class DynamicTokenizer { +class Tokenizer {  private:  	/**  	 * Internally used token trie. This object holds all registered tokens. @@ -161,15 +140,15 @@ private:  	 * @return false if the end of the stream has been reached, true otherwise.  	 */  	template <typename TextHandler, bool read> -	bool next(CharReader &reader, DynamicToken &token); +	bool next(CharReader &reader, Token &token);  public:  	/** -	 * Constructor of the DynamicTokenizer class. +	 * Constructor of the Tokenizer class.  	 *  	 * @param whitespaceMode specifies how whitespace should be handled.  	 */ -	DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); +	Tokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);  	/**  	 * Registers the given string as a token. Returns a const pointer at a @@ -222,7 +201,7 @@ public:  	/**  	 * Reads a new token from the CharReader and stores it in the given -	 * DynamicToken instance. +	 * Token instance.  	 *  	 * @param reader is the CharReader instance from which the data should be  	 * read. @@ -231,7 +210,7 @@ public:  	 * @return true if a token could be read, false if the end of the stream  	 * has been reached.  	 */ -	bool read(CharReader &reader, DynamicToken &token); +	bool read(CharReader &reader, Token &token);  	/**  	 * The peek method does not advance the read position of the char reader, @@ -244,7 +223,7 @@ public:  	 * @return true if a token could be read, false if the end of the stream  	 * has been reached.  	 */ -	bool peek(CharReader &reader, DynamicToken &token); +	bool peek(CharReader &reader, Token &token);  };  } diff --git a/src/formats/osml/OsmlParser.cpp b/src/formats/osml/OsmlParser.cpp new file mode 100644 index 0000000..4973639 --- /dev/null +++ b/src/formats/osml/OsmlParser.cpp @@ -0,0 +1,57 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/parser/generic/ParserStateCallbacks.hpp> +#include <core/parser/generic/ParserStateStack.hpp> + +#include "OsdmParser.hpp" +#include "OsdmStreamParser.hpp" + +namespace ousia { + +namespace { + +/** + * The OsdmParserImplementation class contains the actual implementation of the + * parsing process and is created in the "doParse" function of the OsdmParser. +  + */ +class OsdmParserImplementation : public ParserStateCallbacks { +private: +	/** +	 * OsdmStreamParser instance. +	 */ +	OsdmStreamParser parser; + +	/** +	 * Instance of the ParserStateStack. +	 */ +	ParserStateStack stack; + +public: +	OsdmParserImplementation parser(reader, ctx) : parser(reader), stack(ctx, std::multimap) +}; +} + +void OsdmParser::doParse(CharReader &reader, ParserContext &ctx) +{ +	OsdmParserImplementation parser(reader, ctx); +	parser.parse(); +} + +} diff --git a/src/core/parser/generic/GenericParser.hpp b/src/formats/osml/OsmlParser.hpp index 4f29f94..37505b4 100644 --- a/src/core/parser/generic/GenericParser.hpp +++ b/src/formats/osml/OsmlParser.hpp @@ -17,33 +17,32 @@  */  /** - * @file GenericParser.hpp + * @file OsdmParser.hpp   * - * The GenericParser class builds an abstraction layer that separates the - * underlying document format (e.g. osdm or osdmx) from the actual process of - * building the document model. It provides a set of genric functions that - * should be called by the inheriting concrete parser class, e.g. indicating a - * command with parameters, the start/end of a field or the start/end of an - * annotation. The GenericParser maintains an internal stack of - * ParserStateHandlers and relays the commands to the elements of this stack. + * Contains the parser of the osdm format, the standard plain-text format used + * by Ousía for documents.   *   * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)   */ -#ifndef _OUSIA_GENERIC_PARSER_HPP_ -#define _OUSIA_GENERIC_PARSER_HPP_ +#ifndef _OUSIA_OSDM_PARSER_HPP_ +#define _OUSIA_OSDM_PARSER_HPP_ -#include <core/parser/Parseer.hpp> +#include <core/parser/Parser.hpp>  namespace ousia { -class GenericParser : public Parser { - - - +/** + * OsdmParser is a small wrapper implementing the Parser interface. The actual + * parsing is performed with the OsdmStreamParser in conjunction with the + * ParserStateStack. + */ +class OsdmParser : public Parser { +protected: +	void doParse(CharReader &reader, ParserContext &ctx) override;  };  } -#endif _OUSIA_GENERIC_PARSER_HPP_ +#endif /* _OUSIA_OSDM_PARSER_HPP_ */ diff --git a/src/formats/osdm/OsdmStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp index 8cb8caf..0174fa4 100644 --- a/src/formats/osdm/OsdmStreamParser.cpp +++ b/src/formats/osml/OsmlStreamParser.cpp @@ -21,14 +21,14 @@  #include <core/common/Utils.hpp>  #include <core/common/VariantReader.hpp> -#include "OsdmStreamParser.hpp" +#include "OsmlStreamParser.hpp"  namespace ousia {  /**   * Plain format default tokenizer.   */ -class PlainFormatTokens : public DynamicTokenizer { +class PlainFormatTokens : public Tokenizer {  public:  	/**  	 * Id of the backslash token. @@ -61,6 +61,21 @@ public:  	TokenTypeId FieldEnd;  	/** +	 * Id of the default field start token. +	 */ +	TokenTypeId DefaultFieldStart; + +	/** +	 * Id of the annotation start token. +	 */ +	TokenTypeId AnnotationStart; + +	/** +	 * Id of the annotation end token. +	 */ +	TokenTypeId AnnotationEnd; + +	/**  	 * Registers the plain format tokens in the internal tokenizer.  	 */  	PlainFormatTokens() @@ -71,6 +86,9 @@ public:  		BlockCommentEnd = registerToken("}%");  		FieldStart = registerToken("{");  		FieldEnd = registerToken("}"); +		DefaultFieldStart = registerToken("{!"); +		AnnotationStart = registerToken("<\\"); +		AnnotationEnd = registerToken("\\>");  	}  }; @@ -160,14 +178,14 @@ public:  	}  }; -OsdmStreamParser::OsdmStreamParser(CharReader &reader, Logger &logger) +OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)      : reader(reader), logger(logger), tokenizer(Tokens)  {  	// Place an intial command representing the complete file on the stack -	commands.push(Command{"", Variant::mapType{}, true, true, true}); +	commands.push(Command{"", Variant::mapType{}, true, true, true, false});  } -Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep) +Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)  {  	bool first = true;  	bool hasCharSiceNSSep = false; @@ -210,7 +228,7 @@ Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep)  	return res;  } -OsdmStreamParser::State OsdmStreamParser::parseBeginCommand() +OsmlStreamParser::State OsmlStreamParser::parseBeginCommand()  {  	// Expect a '{' after the command  	reader.consumeWhitespace(); @@ -251,7 +269,7 @@ OsdmStreamParser::State OsdmStreamParser::parseBeginCommand()  	return State::COMMAND;  } -static bool checkStillInField(const OsdmStreamParser::Command &cmd, +static bool checkStillInField(const OsmlStreamParser::Command &cmd,                                const Variant &endName, Logger &logger)  {  	if (cmd.inField && !cmd.inRangeField) { @@ -264,7 +282,7 @@ static bool checkStillInField(const OsdmStreamParser::Command &cmd,  	return false;  } -OsdmStreamParser::State OsdmStreamParser::parseEndCommand() +OsmlStreamParser::State OsmlStreamParser::parseEndCommand()  {  	// Expect a '{' after the command  	if (!reader.expect('{')) { @@ -327,7 +345,7 @@ OsdmStreamParser::State OsdmStreamParser::parseEndCommand()  	return cmd.inRangeField ? State::FIELD_END : State::NONE;  } -Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName) +Variant OsmlStreamParser::parseCommandArguments(Variant commandArgName)  {  	// Parse the arguments using the universal VariantReader  	Variant commandArguments; @@ -353,7 +371,7 @@ Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName)  	return commandArguments;  } -void OsdmStreamParser::pushCommand(Variant commandName, +void OsmlStreamParser::pushCommand(Variant commandName,                                     Variant commandArguments, bool hasRange)  {  	// Store the location on the stack @@ -365,10 +383,11 @@ void OsdmStreamParser::pushCommand(Variant commandName,  		commands.pop();  	}  	commands.push(Command{std::move(commandName), std::move(commandArguments), -	                      hasRange, false, false}); +	                      hasRange, false, false, false});  } -OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start) +OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start, +                                                       bool isAnnotation)  {  	// Parse the commandName as a first identifier  	Variant commandName = parseIdentifier(start, true); @@ -382,6 +401,9 @@ OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start)  	    Utils::split(commandName.asString(), ':');  	const bool isBegin = commandNameComponents[0] == "begin";  	const bool isEnd = commandNameComponents[0] == "end"; + +	// Parse the begin or end command +	State res = State::COMMAND;  	if (isBegin || isEnd) {  		if (commandNameComponents.size() > 1) {  			logger.error( @@ -390,35 +412,81 @@ OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start)  			    commandName);  		}  		if (isBegin) { -			return parseBeginCommand(); +			res = parseBeginCommand();  		} else if (isEnd) { -			return parseEndCommand(); +			res = parseEndCommand();  		} +	} else { +		// Check whether the next character is a '#', indicating the start of +		// the command name +		Variant commandArgName; +		start = reader.getOffset(); +		if (reader.expect('#')) { +			commandArgName = parseIdentifier(start); +			if (commandArgName.asString().empty()) { +				logger.error("Expected identifier after \"#\"", commandArgName); +			} +		} + +		// Parse the arugments +		Variant commandArguments = +		    parseCommandArguments(std::move(commandArgName)); + +		// Push the command onto the command stack +		pushCommand(std::move(commandName), std::move(commandArguments), false);  	} -	// Check whether the next character is a '#', indicating the start of the -	// command name -	Variant commandArgName; -	start = reader.getOffset(); -	if (reader.expect('#')) { -		commandArgName = parseIdentifier(start); -		if (commandArgName.asString().empty()) { -			logger.error("Expected identifier after \"#\"", commandArgName); +	// Check whether a ">" character is the next character that is to be read. +	// In that case the current command could be an annotation end command! +	char c; +	if (reader.fetch(c) && c == '>') { +		// Ignore the character after a begin or end command +		if (isBegin || isEnd) { +			logger.warning( +			    "Ignoring annotation end character \">\" after special " +			    "commands \"begin\" or \"end\". Write \"\\>\" to end a " +			    "\"begin\"/\"end\" enclosed annotation.", +			    reader); +			return res;  		} -	} -	// Parse the arugments -	Variant commandArguments = parseCommandArguments(std::move(commandArgName)); +		// If this should be an annoation, ignore the character +		if (isAnnotation) { +			logger.warning( +			    "Ignoring annotation end character \">\" after annotation " +			    "start command. Write \"\\>\" to end the annotation.", +			    reader); +		} else { +			// Make sure no arguments apart from the "name" argument are given +			// to an annotation end +			Variant::mapType &map = commands.top().arguments.asMap(); +			if (!map.empty()) { +				if (map.count("name") == 0 || map.size() > 1U) { +					logger.error( +					    "An annotation end command may not have any arguments " +					    "other than \"name\""); +					return res; +				} +			} -	// Push the command onto the command stack -	pushCommand(std::move(commandName), std::move(commandArguments), false); +			// If we got here, this is a valid ANNOTATION_END command, issue it +			reader.peek(c); +			reader.consumePeek(); +			return State::ANNOTATION_END; +		} +	} -	return State::COMMAND; +	// If we're starting an annotation, return the command as annotation start +	// instead of command +	if (isAnnotation && res == State::COMMAND) { +		return State::ANNOTATION_START; +	} +	return res;  } -void OsdmStreamParser::parseBlockComment() +void OsmlStreamParser::parseBlockComment()  { -	DynamicToken token; +	Token token;  	size_t depth = 1;  	while (tokenizer.read(reader, token)) {  		if (token.type == Tokens.BlockCommentEnd) { @@ -436,7 +504,7 @@ void OsdmStreamParser::parseBlockComment()  	logger.error("File ended while being in a block comment", reader);  } -void OsdmStreamParser::parseLineComment() +void OsmlStreamParser::parseLineComment()  {  	char c;  	while (reader.read(c)) { @@ -446,7 +514,7 @@ void OsdmStreamParser::parseLineComment()  	}  } -bool OsdmStreamParser::checkIssueData(DataHandler &handler) +bool OsmlStreamParser::checkIssueData(DataHandler &handler)  {  	if (!handler.isEmpty()) {  		data = handler.toVariant(reader.getSourceId()); @@ -457,7 +525,7 @@ bool OsdmStreamParser::checkIssueData(DataHandler &handler)  	return false;  } -bool OsdmStreamParser::checkIssueFieldStart() +bool OsmlStreamParser::checkIssueFieldStart()  {  	// Fetch the current command, and check whether we're currently inside a  	// field of this command @@ -482,18 +550,41 @@ bool OsdmStreamParser::checkIssueFieldStart()  	return false;  } -OsdmStreamParser::State OsdmStreamParser::parse() +bool OsmlStreamParser::closeField() +{ +	// Try to end an open field of the current command -- if the current command +	// is not inside an open field, end this command and try to close the next +	// one +	for (int i = 0; i < 2 && commands.size() > 1; i++) { +		Command &cmd = commands.top(); +		if (!cmd.inRangeField) { +			if (cmd.inField) { +				cmd.inField = false; +				if (cmd.inDefaultField) { +					commands.pop(); +				} +				return true; +			} +			commands.pop(); +		} else { +			return false; +		} +	} +	return false; +} + +OsmlStreamParser::State OsmlStreamParser::parse()  {  	// Handler for incomming data  	DataHandler handler;  	// Read tokens until the outer loop should be left -	DynamicToken token; +	Token token;  	while (tokenizer.peek(reader, token)) {  		const TokenTypeId type = token.type;  		// Special handling for Backslash and Text -		if (type == Tokens.Backslash) { +		if (type == Tokens.Backslash || type == Tokens.AnnotationStart) {  			// Before appending anything to the output data or starting a new  			// command, check whether FIELD_START has to be issued, as the  			// current command is a command with range @@ -519,7 +610,8 @@ OsdmStreamParser::State OsdmStreamParser::parse()  				}  				// Parse the actual command -				State res = parseCommand(token.location.getStart()); +				State res = parseCommand(token.location.getStart(), +				                         type == Tokens.AnnotationStart);  				switch (res) {  					case State::ERROR:  						throw LoggableException( @@ -536,6 +628,14 @@ OsdmStreamParser::State OsdmStreamParser::parse()  			// to the data buffer, use the escape character start as start  			// location and the peek offset as end location  			reader.peek(c);  // Peek the previously fetched character + +			// If this was an annotation start token, add the parsed < to the +			// output +			if (type == Tokens.AnnotationStart) { +				handler.append('<', token.location.getStart(), +				               token.location.getStart() + 1); +			} +  			handler.append(c, token.location.getStart(),  			               reader.getPeekOffset());  			reader.consumePeek(); @@ -579,28 +679,37 @@ OsdmStreamParser::State OsdmStreamParser::parse()  			}  			logger.error(  			    "Got field start token \"{\", but no command for which to " -			    "start the field. Did you mean \"\\{\"?", +			    "start the field. Write \"\\{\" to insert this sequence as " +			    "text.",  			    token);  		} else if (token.type == Tokens.FieldEnd) { -			// Try to end an open field of the current command -- if the current -			// command is not inside an open field, end this command and try to -			// close the next one -			for (int i = 0; i < 2 && commands.size() > 1; i++) { -				Command &cmd = commands.top(); -				if (!cmd.inRangeField) { -					if (cmd.inField) { -						cmd.inField = false; -						return State::FIELD_END; -					} -					commands.pop(); -				} else { -					break; -				} +			if (closeField()) { +				return State::FIELD_END;  			}  			logger.error( -			    "Got field end token \"}\", but there is no field to end. Did " -			    "you mean \"\\}\"?", +			    "Got field end token \"}\", but there is no field to end. " +			    "Write \"\\}\" to insert this sequence as text.",  			    token); +		} else if (token.type == Tokens.DefaultFieldStart) { +			// Try to start a default field the first time the token is reached +			Command &topCmd = commands.top(); +			if (!topCmd.inField) { +				topCmd.inField = true; +				topCmd.inDefaultField = true; +				return State::FIELD_START; +			} +			logger.error( +			    "Got default field start token \"{!\", but no command for " +			    "which to start the field. Write \"\\{!\" to insert this " +			    "sequence as text", +			    token); +		} else if (token.type == Tokens.AnnotationEnd) { +			// We got a single annotation end token "\>" -- simply issue the +			// ANNOTATION_END event +			Variant annotationName = Variant::fromString(""); +			annotationName.setLocation(token.location); +			pushCommand(annotationName, Variant::mapType{}, false); +			return State::ANNOTATION_END;  		} else {  			logger.error("Unexpected token \"" + token.content + "\"", token);  		} @@ -627,14 +736,19 @@ OsdmStreamParser::State OsdmStreamParser::parse()  	return State::END;  } -const Variant &OsdmStreamParser::getCommandName() +const Variant &OsmlStreamParser::getCommandName() const  {  	return commands.top().name;  } -const Variant &OsdmStreamParser::getCommandArguments() +const Variant &OsmlStreamParser::getCommandArguments() const  {  	return commands.top().arguments;  } + +bool OsmlStreamParser::inDefaultField() const +{ +	return commands.top().inRangeField || commands.top().inDefaultField; +}  } diff --git a/src/formats/osdm/OsdmStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp index 48d8fb7..dc3034c 100644 --- a/src/formats/osdm/OsdmStreamParser.hpp +++ b/src/formats/osml/OsmlStreamParser.hpp @@ -17,23 +17,22 @@  */  /** - * @file OsdmStreamParser.hpp + * @file OsmlStreamParser.hpp   * - * Provides classes for low-level classes for reading the TeX-esque osdm + * Provides classes for low-level classes for reading the TeX-esque osml   * format. The class provided here does not build any model objects and does not   * implement the Parser interface.   *   * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)   */ -#ifndef _OUSIA_OSDM_STREAM_PARSER_HPP_ -#define _OUSIA_OSDM_STREAM_PARSER_HPP_ +#ifndef _OUSIA_OSML_STREAM_PARSER_HPP_ +#define _OUSIA_OSML_STREAM_PARSER_HPP_  #include <stack>  #include <core/common/Variant.hpp> - -#include "DynamicTokenizer.hpp" +#include <core/parser/utils/Tokenizer.hpp>  namespace ousia { @@ -43,7 +42,7 @@ class Logger;  class DataHandler;  /** - * The OsdmStreamParser class provides a low-level reader for the TeX-esque osdm + * The OsmlStreamParser class provides a low-level reader for the TeX-esque osml   * format. The parser is constructed around a "parse" function, which reads data   * from the underlying CharReader until a new state is reached and indicates   * this state in a return value. The calling code then has to pull corresponding @@ -53,10 +52,10 @@ class DataHandler;   * fields, as this would lead to too many consecutive errors) a   * LoggableException is thrown.   */ -class OsdmStreamParser { +class OsmlStreamParser {  public:  	/** -	 * Enum used to indicate which state the OsdmStreamParser class is in +	 * Enum used to indicate which state the OsmlStreamParser class is in  	 * after calling the "parse" function.  	 */  	enum class State { @@ -140,23 +139,35 @@ public:  		/**  		 * Set to true if this is a command with clear begin and end.  		 */ -		bool hasRange; +		bool hasRange : 1;  		/**  		 * Set to true if we are currently inside a field of this command.  		 */ -		bool inField; +		bool inField : 1;  		/**  		 * Set to true if we are currently in the range field of the command  		 * (implies inField being set to true).  		 */ -		bool inRangeField; +		bool inRangeField : 1; + +		/** +		 * Set to true if we are currently in a field that has been especially +		 * marked as default field (using the "|") syntax. +		 */ +		bool inDefaultField : 1;  		/**  		 * Default constructor.  		 */ -		Command() : hasRange(false), inField(false), inRangeField(false) {} +		Command() +		    : hasRange(false), +		      inField(false), +		      inRangeField(false), +		      inDefaultField() +		{ +		}  		/**  		 * Constructor of the Command class. @@ -169,16 +180,19 @@ public:  		 * explicit range.  		 * @param inField is set to true if we currently are inside a field  		 * of this command. -		 * @param inRangeField is set to true if we currently inside the outer -		 * field of the command. +		 * @param inRangeField is set to true if we currently are inside the +		 * outer field of a ranged command. +		 * @param inDefaultField is set to true if we currently are in a +		 * specially marked default field.  		 */ -		Command(Variant name, Variant arguments, bool hasRange, bool inField, -		        bool inRangeField) +		Command(Variant name, Variant arguments, bool hasRange, +		        bool inField, bool inRangeField, bool inDefaultField)  		    : name(std::move(name)),  		      arguments(std::move(arguments)),  		      hasRange(hasRange),  		      inField(inField), -		      inRangeField(inRangeField) +		      inRangeField(inRangeField), +		      inDefaultField(inDefaultField)  		{  		}  	}; @@ -198,7 +212,7 @@ private:  	/**  	 * Tokenizer instance used to read individual tokens from the text.  	 */ -	DynamicTokenizer tokenizer; +	Tokenizer tokenizer;  	/**  	 * Stack containing the current commands. @@ -258,9 +272,11 @@ private:  	 *  	 * @param start is the start byte offset of the command (including the  	 * backslash) +	 * @param isAnnotation if true, the command is not returned as command, but +	 * as annotation start.  	 * @return true if a command was actuall parsed, false otherwise.  	 */ -	State parseCommand(size_t start); +	State parseCommand(size_t start, bool isAnnotation);  	/**  	 * Function used internally to parse a block comment. @@ -290,16 +306,26 @@ private:  	 */  	bool checkIssueFieldStart(); +	/** +	 * Closes a currently open field. Note that the command will be removed from +	 * the internal command stack if the field that is being closed is a +	 * field marked as default field. +	 * +	 * @return true if the field could be closed, false if there was no field +	 * to close. +	 */ +	bool closeField(); +  public:  	/** -	 * Constructor of the OsdmStreamParser class. Attaches the new -	 * OsdmStreamParser to the given CharReader and Logger instances. +	 * Constructor of the OsmlStreamParser class. Attaches the new +	 * OsmlStreamParser to the given CharReader and Logger instances.  	 *  	 * @param reader is the reader instance from which incomming characters  	 * should be read.  	 * @param logger is the logger instance to which errors should be written.  	 */ -	OsdmStreamParser(CharReader &reader, Logger &logger); +	OsmlStreamParser(CharReader &reader, Logger &logger);  	/**  	 * Continues parsing. Returns one of the states defined in the State enum. @@ -318,7 +344,7 @@ public:  	 * @return a reference at a variant containing the data parsed by the  	 * "parse" function.  	 */ -	const Variant &getData() { return data; } +	const Variant &getData() const { return data; }  	/**  	 * Returns a reference at the internally stored command name. Only valid if @@ -327,7 +353,7 @@ public:  	 * @return a reference at a variant containing name and location of the  	 * parsed command.  	 */ -	const Variant &getCommandName(); +	const Variant &getCommandName() const;  	/**  	 * Returns a reference at the internally stored command name. Only valid if @@ -336,16 +362,24 @@ public:  	 * @return a reference at a variant containing arguments given to the  	 * command.  	 */ -	const Variant &getCommandArguments(); +	const Variant &getCommandArguments() const; + +	/** +	 * Returns true if the current field is the "default" field. This is true if +	 * the parser either is in the outer range of a range command or inside a +	 * field that has been especially marked as "default" field (using the "|" +	 * syntax). +	 */ +	bool inDefaultField() const;  	/**  	 * Returns a reference at the char reader.  	 *  	 * @return the last internal token location.  	 */ -	SourceLocation &getLocation() { return location; } +	const SourceLocation &getLocation() const { return location; }  };  } -#endif /* _OUSIA_OSDM_STREAM_PARSER_HPP_ */ +#endif /* _OUSIA_OSML_STREAM_PARSER_HPP_ */ diff --git a/src/formats/osxml/OsxmlAttributeLocator.cpp b/src/formats/osxml/OsxmlAttributeLocator.cpp new file mode 100644 index 0000000..e37446a --- /dev/null +++ b/src/formats/osxml/OsxmlAttributeLocator.cpp @@ -0,0 +1,144 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/common/Location.hpp> +#include <core/common/CharReader.hpp> +#include <core/common/Utils.hpp> + +#include "OsxmlAttributeLocator.hpp" + +namespace ousia { + +/** + * Enum used internally in the statemachine of the xml argument parser. + */ +enum class XmlAttributeState { +	IN_TAG_NAME, +	SEARCH_ATTR, +	IN_ATTR_NAME, +	HAS_ATTR_NAME, +	HAS_ATTR_EQUALS, +	IN_ATTR_DATA +}; + +std::map<std::string, SourceLocation> OsxmlAttributeLocator::locate( +    CharReader &reader, size_t offs) +{ +	std::map<std::string, SourceLocation> res; + +	// Fork the reader, we don't want to mess up the XML parsing process, do we? +	CharReaderFork readerFork = reader.fork(); + +	// Move the read cursor to the start location, abort if this does not work +	if (offs != readerFork.seek(offs)) { +		return res; +	} + +	// Now all we need to do is to implement one half of an XML parser. As this +	// is inherently complicated we'll totaly fail at it. Don't care. All we +	// want to get is those darn offsets for pretty error messages... (and we +	// can assume the XML is valid as it was already read by expat) +	XmlAttributeState state = XmlAttributeState::IN_TAG_NAME; +	char c; +	std::stringstream attrName; +	while (readerFork.read(c)) { +		// Abort at the end of the tag +		if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) { +			return res; +		} + +		// One state machine to rule them all, one state machine to find them, +		// One state machine to bring them all and in the darkness bind them +		// (the byte offsets) +		switch (state) { +			case XmlAttributeState::IN_TAG_NAME: +				if (Utils::isWhitespace(c)) { +					res.emplace("$tag", +					            SourceLocation{reader.getSourceId(), offs + 1, +					                           readerFork.getOffset() - 1}); +					state = XmlAttributeState::SEARCH_ATTR; +				} +				break; +			case XmlAttributeState::SEARCH_ATTR: +				if (!Utils::isWhitespace(c)) { +					state = XmlAttributeState::IN_ATTR_NAME; +					attrName << c; +				} +				break; +			case XmlAttributeState::IN_ATTR_NAME: +				if (Utils::isWhitespace(c)) { +					state = XmlAttributeState::HAS_ATTR_NAME; +				} else if (c == '=') { +					state = XmlAttributeState::HAS_ATTR_EQUALS; +				} else { +					attrName << c; +				} +				break; +			case XmlAttributeState::HAS_ATTR_NAME: +				if (!Utils::isWhitespace(c)) { +					if (c == '=') { +						state = XmlAttributeState::HAS_ATTR_EQUALS; +						break; +					} +					// Well, this is a strange XML file... We expected to +					// see a '=' here! Try to continue with the +					// "HAS_ATTR_EQUALS" state as this state will hopefully +					// inlcude some error recovery +				} else { +					// Skip whitespace here +					break; +				} +			// Fallthrough +			case XmlAttributeState::HAS_ATTR_EQUALS: +				if (!Utils::isWhitespace(c)) { +					if (c == '"') { +						// Here we are! We have found the beginning of an +						// attribute. Let's quickly lock the current offset away +						// in the result map +						res.emplace(attrName.str(), +						            SourceLocation{reader.getSourceId(), +						                           readerFork.getOffset()}); +						state = XmlAttributeState::IN_ATTR_DATA; +					} else { +						// No, this XML file is not well formed. Assume we're in +						// an attribute name once again +						attrName.str(std::string{&c, 1}); +						state = XmlAttributeState::IN_ATTR_NAME; +					} +				} +				break; +			case XmlAttributeState::IN_ATTR_DATA: +				if (c == '"') { +					// We're at the end of the attribute data, set the end +					// location +					auto it = res.find(attrName.str()); +					if (it != res.end()) { +						it->second.setEnd(readerFork.getOffset() - 1); +					} + +					// Reset the attribute name and restart the search +					attrName.str(std::string{}); +					state = XmlAttributeState::SEARCH_ATTR; +				} +				break; +		} +	} +	return res; +} +} + diff --git a/src/formats/osxml/OsxmlAttributeLocator.hpp b/src/formats/osxml/OsxmlAttributeLocator.hpp new file mode 100644 index 0000000..f9a3437 --- /dev/null +++ b/src/formats/osxml/OsxmlAttributeLocator.hpp @@ -0,0 +1,67 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file OsxmlAttributeLocator.hpp + * + * Contains a class used for locating the byte offsets of the attributes given + * in a XML tag. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ +#define _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ + +#include <map> + +namespace ousia { + +// Forward declarations +class CharReader; +class SourceLocation; + +/** + * Class containing one static function for locating the byte offsets of the + * attributes in a XML tag. This are not retrieved by our xml parser, so we have + * to do this manually. + */ +class OsxmlAttributeLocator { +public: +	/** +	 * Function used to reconstruct the location of the attributes of a XML tag +	 * in the source code. This is necessary, as the xml parser only returns an +	 * offset to the begining of a tag and not to the position of the individual +	 * arguments. +	 * +	 * @param reader is the char reader from which the character data should be +	 * read. +	 * @param offs is a byte offset in the xml file pointing at the "<" +	 * character of the tag. +	 * @return a map from attribute keys to the corresponding location +	 * (including range) of the atribute. Also contains the location of the +	 * tagname in the form of the virtual attribute "$tag". +	 */ +	static std::map<std::string, SourceLocation> locate(CharReader &reader, +	                                                    size_t offs); +}; + +} + +#endif /* _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ */ + diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp new file mode 100644 index 0000000..7404960 --- /dev/null +++ b/src/formats/osxml/OsxmlEventParser.cpp @@ -0,0 +1,547 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <expat.h> + +#include <vector> + +#include <core/common/CharReader.hpp> +#include <core/common/Logger.hpp> +#include <core/common/Variant.hpp> +#include <core/common/VariantReader.hpp> +#include <core/common/Utils.hpp> +#include <core/common/WhitespaceHandler.hpp> + +#include "OsxmlAttributeLocator.hpp" +#include "OsxmlEventParser.hpp" + +namespace ousia { + +/* Class OsxmlEventParser */ + +/** + * Class containing data used by the internal functions. + */ +class OsxmlEventParserData { +public: +	/** +	 * Contains the current depth of the parsing process. +	 */ +	ssize_t depth; + +	/** +	 * Set to a value larger or equal to zero if the parser is currently inside +	 * an annotation end tag -- the value represents the depth in which the +	 * tag was opened. +	 */ +	ssize_t annotationEndTagDepth; + +	/** +	 * Current character data buffer. +	 */ +	std::vector<char> textBuf; + +	/** +	 * Current whitespace buffer (for the trimming whitspace mode) +	 */ +	std::vector<char> whitespaceBuf; + +	/** +	 * Flag indicating whether a whitespace character was present (for the +	 * collapsing whitespace mode). +	 */ +	bool hasWhitespace; + +	/** +	 * Current character data start. +	 */ +	size_t textStart; + +	/** +	 * Current character data end. +	 */ +	size_t textEnd; + +	/** +	 * Default constructor. +	 */ +	OsxmlEventParserData(); + +	/** +	 * Increments the depth. +	 */ +	void incrDepth(); + +	/** +	 * Decrement the depth and reset the annotationEndTagDepth flag. +	 */ +	void decrDepth(); + +	/** +	 * Returns true if we're currently inside an end tag. +	 */ +	bool inAnnotationEndTag(); + +	/** +	 * Returns true if character data is available. +	 * +	 * @return true if character data is available. +	 */ +	bool hasText(); + +	/** +	 * Returns a Variant containing the character data and its location. +	 * +	 * @return a string variant containing the text data and the character +	 * location. +	 */ +	Variant getText(SourceId sourceId); +}; + +/* Class GuardedExpatXmlParser */ + +/** + * Wrapper class around the XML_Parser pointer which safely frees it whenever + * the scope is left (e.g. because an exception was thrown). + */ +class GuardedExpatXmlParser { +private: +	/** +	 * Internal pointer to the XML_Parser instance. +	 */ +	XML_Parser parser; + +public: +	/** +	 * Constructor of the GuardedExpatXmlParser class. Calls XML_ParserCreateNS +	 * from the expat library. Throws a parser exception if the XML parser +	 * cannot be initialized. +	 * +	 * @param encoding is the protocol-defined encoding passed to expat (or +	 * nullptr if expat should determine the encoding by itself). +	 */ +	GuardedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) +	{ +		parser = XML_ParserCreate(encoding); +		if (!parser) { +			throw LoggableException{ +			    "Internal error: Could not create expat XML parser!"}; +		} +	} + +	/** +	 * Destuctor of the GuardedExpatXmlParser, frees the XML parser instance. +	 */ +	~GuardedExpatXmlParser() +	{ +		if (parser) { +			XML_ParserFree(parser); +			parser = nullptr; +		} +	} + +	/** +	 * Returns the XML_Parser pointer. +	 */ +	XML_Parser operator&() { return parser; } +}; + +/** + * Name of the special outer tag used for allowing multiple top-level elements + * in an xml file. + */ +static const std::string TOP_LEVEL_TAG{"ousia"}; + +/** + * Prefix used to indicate the start of an annoation (note the trailing colon) + */ +static const std::string ANNOTATION_START_PREFIX{"a:start:"}; + +/** + * Prefix used to indicate the end of an annotation. + */ +static const std::string ANNOTATION_END_PREFIX{"a:end"}; + +/** + * Synchronizes the position of the xml parser with the default location of the + * logger instance. + * + * @param p is a pointer at the xml parser instance. + * @param len is the length of the string that should be refered to. + * @return the SourceLocation that has been set in the logger. + */ +static SourceLocation xmlSyncLoggerPosition(XML_Parser p, size_t len = 0) +{ +	// Fetch the OsxmlEventParser instance +	OsxmlEventParser *parser = +	    static_cast<OsxmlEventParser *>(XML_GetUserData(p)); + +	// Fetch the current location in the XML file and set the default location +	// in the logger +	size_t offs = XML_GetCurrentByteIndex(p); +	SourceLocation loc = +	    SourceLocation{parser->getReader().getSourceId(), offs, offs + len}; +	parser->getLogger().setDefaultLocation(loc); + +	// Return the fetched location +	return loc; +} + +/** + * Callback called by eXpat whenever a start handler is reached. + */ +static void xmlStartElementHandler(void *ref, const XML_Char *name, +                                   const XML_Char **attrs) +{ +	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser +	XML_Parser p = static_cast<XML_Parser>(ref); +	OsxmlEventParser *parser = +	    static_cast<OsxmlEventParser *>(XML_GetUserData(p)); + +	// If there is any text data in the buffer, issue that first +	if (parser->getData().hasText()) { +		parser->getEvents().data( +		    parser->getData().getText(parser->getReader().getSourceId())); +	} + +	// Read the argument locations -- this is only a stupid and slow hack, +	// but it is necessary, as expat doesn't give use the byte offset of the +	// arguments. +	std::map<std::string, SourceLocation> attributeOffsets = +	    OsxmlAttributeLocator::locate(parser->getReader(), +	                                  XML_GetCurrentByteIndex(p)); + +	// Update the logger position +	SourceLocation loc = xmlSyncLoggerPosition(p); + +	// Fetch the location of the name +	SourceLocation nameLoc = loc; +	auto it = attributeOffsets.find("$tag"); +	if (it != attributeOffsets.end()) { +		nameLoc = it->second; +	} +	// Increment the current depth +	parser->getData().incrDepth(); + +	// Make sure we're currently not inside an annotation end tag -- this would +	// be highly illegal! +	if (parser->getData().inAnnotationEndTag()) { +		parser->getLogger().error( +		    "No tags allowed inside an annotation end tag", nameLoc); +		return; +	} + +	// Assemble the arguments +	Variant::mapType args; +	const XML_Char **attr = attrs; +	while (*attr) { +		// Convert the C string to a std::string +		const std::string key{*(attr++)}; + +		// Search the location of the key +		SourceLocation keyLoc; +		auto it = attributeOffsets.find(key); +		if (it != attributeOffsets.end()) { +			keyLoc = it->second; +		} + +		// Parse the string, pass the location of the key +		std::pair<bool, Variant> value = VariantReader::parseGenericString( +		    *(attr++), parser->getLogger(), keyLoc.getSourceId(), +		    keyLoc.getStart()); + +		// Set the overall location of the parsed element to the attribute +		// location +		value.second.setLocation(keyLoc); + +		// Store the keys in the map +		args.emplace(key, value.second).second; +	} + +	// Fetch the name of the tag, check for special tags +	std::string nameStr(name); +	if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 1) { +		// We're in the top-level and the magic tag is reached -- just +		// ignore it and issue a warning for each argument that has been given +		for (const auto &arg : args) { +			parser->getLogger().warning(std::string("Ignoring attribute \"") + +			                                arg.first + +			                                std::string("\" for magic tag \"") + +			                                TOP_LEVEL_TAG + std::string("\""), +			                            arg.second); +		} +	} else if (Utils::startsWith(nameStr, ANNOTATION_START_PREFIX)) { +		// Assemble a name variant containing the name minus the prefix +		Variant nameVar = +		    Variant::fromString(nameStr.substr(ANNOTATION_START_PREFIX.size())); +		nameVar.setLocation(nameLoc); + +		// Issue the "annotationStart" event +		parser->getEvents().annotationStart(nameVar, args); +	} else if (Utils::startsWith(nameStr, ANNOTATION_END_PREFIX)) { +		// Assemble a name variant containing the name minus the prefix +		nameStr = nameStr.substr(ANNOTATION_END_PREFIX.size()); + +		// Discard a potentially leading colon +		if (!nameStr.empty() && nameStr[0] == ':') { +			nameStr = nameStr.substr(1); +		} + +		// Assemble the variant containing the name and its location +		Variant nameVar = Variant::fromString(nameStr); +		nameVar.setLocation(nameLoc); + +		// Check whether a "name" attribute was given +		Variant elementName; +		for (const auto &arg : args) { +			if (arg.first == "name") { +				elementName = arg.second; +			} else { +				parser->getLogger().warning( +				    std::string("Ignoring attribute \"") + arg.first + +				        "\" in annotation end tag", +				    arg.second); +			} +		} + +		// Set the annotationEndTagDepth to disallow any further tags to be +		// opened inside the annotation end tag. +		parser->getData().annotationEndTagDepth = parser->getData().depth; + +		// Issue the "annotationEnd" event +		parser->getEvents().annotationEnd(nameVar, args); +	} else { +		// Just issue a "commandStart" event in any other case +		Variant nameVar = Variant::fromString(nameStr); +		nameVar.setLocation(nameLoc); +		parser->getEvents().command(nameVar, args); +	} +} + +static void xmlEndElementHandler(void *ref, const XML_Char *name) +{ +	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser +	XML_Parser p = static_cast<XML_Parser>(ref); +	OsxmlEventParser *parser = +	    static_cast<OsxmlEventParser *>(XML_GetUserData(p)); + +	// Synchronize the position of the logger with teh position +	xmlSyncLoggerPosition(p); + +	// Abort as long as we're in an annotation end tag +	if (parser->getData().inAnnotationEndTag()) { +		parser->getData().decrDepth(); +		return; +	} + +	// Decrement the current depth +	parser->getData().decrDepth(); + +	// If there is any text data in the buffer, issue that first +	if (parser->getData().hasText()) { +		parser->getEvents().data( +		    parser->getData().getText(parser->getReader().getSourceId())); +	} + +	// Abort if the special ousia tag ends here +	std::string nameStr{name}; +	if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 0) { +		return; +	} + +	// Issue the "fieldEnd" event +	parser->getEvents().fieldEnd(); +} + +static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len) +{ +	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser +	XML_Parser p = static_cast<XML_Parser>(ref); +	OsxmlEventParser *parser = +	    static_cast<OsxmlEventParser *>(XML_GetUserData(p)); + +	// Abort as long as we're in an annotation end tag +	if (parser->getData().inAnnotationEndTag()) { +		return; +	} + +	// Convert the signed (smell the 90's C library here?) length to an usigned +	// value +	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0; + +	// Synchronize the logger position +	SourceLocation loc = xmlSyncLoggerPosition(p, ulen); + +	// Fetch some variables for convenience +	const WhitespaceMode mode = parser->getWhitespaceMode(); +	OsxmlEventParserData &data = parser->getData(); +	std::vector<char> &textBuf = data.textBuf; +	std::vector<char> &whitespaceBuf = data.whitespaceBuf; +	bool &hasWhitespace = data.hasWhitespace; +	size_t &textStart = data.textStart; +	size_t &textEnd = data.textEnd; + +	size_t pos = loc.getStart(); +	for (size_t i = 0; i < ulen; i++, pos++) { +		switch (mode) { +			case WhitespaceMode::PRESERVE: +				PreservingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, +				                                    textStart, textEnd); +				break; +			case WhitespaceMode::TRIM: +				TrimmingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, +				                                  textStart, textEnd, +				                                  whitespaceBuf); +				break; +			case WhitespaceMode::COLLAPSE: +				CollapsingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, +				                                    textStart, textEnd, +				                                    hasWhitespace); +				break; +		} +	} +} + +/* Class OsxmlEvents */ + +OsxmlEvents::~OsxmlEvents() {} + +/* Class OsxmlEventParser */ + +OsxmlEventParserData::OsxmlEventParserData() +    : depth(0), +      annotationEndTagDepth(-1), +      hasWhitespace(false), +      textStart(0), +      textEnd(0) +{ +} + +void OsxmlEventParserData::incrDepth() { depth++; } + +void OsxmlEventParserData::decrDepth() +{ +	if (depth > 0) { +		depth--; +	} +	if (depth < annotationEndTagDepth) { +		annotationEndTagDepth = -1; +	} +} + +bool OsxmlEventParserData::inAnnotationEndTag() +{ +	return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth); +} + +bool OsxmlEventParserData::hasText() { return !textBuf.empty(); } + +Variant OsxmlEventParserData::getText(SourceId sourceId) +{ +	// Create a variant containing the string data and the location +	Variant var = +	    Variant::fromString(std::string{textBuf.data(), textBuf.size()}); +	var.setLocation({sourceId, textStart, textEnd}); + +	// Reset the text buffers +	textBuf.clear(); +	whitespaceBuf.clear(); +	hasWhitespace = false; +	textStart = 0; +	textEnd = 0; + +	// Return the variant +	return var; +} + +/* Class OsxmlEventParser */ + +OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events, +                                   Logger &logger) +    : reader(reader), +      events(events), +      logger(logger), +      whitespaceMode(WhitespaceMode::TRIM), +      data(new OsxmlEventParserData()) +{ +} + +OsxmlEventParser::~OsxmlEventParser() {} + +void OsxmlEventParser::parse() +{ +	// Create the parser object +	GuardedExpatXmlParser p{"UTF-8"}; + +	// Reset the depth +	data->depth = 0; + +	// Pass the reference to this parser instance to the XML handler +	XML_SetUserData(&p, this); +	XML_UseParserAsHandlerArg(&p); + +	// Set the callback functions +	XML_SetStartElementHandler(&p, xmlStartElementHandler); +	XML_SetEndElementHandler(&p, xmlEndElementHandler); +	XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler); + +	// Feed data into expat while there is data to process +	constexpr size_t BUFFER_SIZE = 64 * 1024; +	while (true) { +		// Fetch a buffer from expat for the input data +		char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE)); +		if (!buf) { +			throw OusiaException{"Internal error: XML parser out of memory!"}; +		} + +		// Read into the buffer +		size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE); + +		// Parse the data and handle any XML error as exception +		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { +			throw LoggableException{ +			    "XML: " + std::string{XML_ErrorString(XML_GetErrorCode(&p))}, +			    xmlSyncLoggerPosition(&p)}; +		} + +		// Abort once there are no more bytes in the stream +		if (bytesRead == 0) { +			break; +		} +	} +} + +void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode) +{ +	this->whitespaceMode = whitespaceMode; +} + +WhitespaceMode OsxmlEventParser::getWhitespaceMode() const +{ +	return whitespaceMode; +} + +CharReader &OsxmlEventParser::getReader() const { return reader; } + +Logger &OsxmlEventParser::getLogger() const { return logger; } + +OsxmlEvents &OsxmlEventParser::getEvents() const { return events; } + +OsxmlEventParserData &OsxmlEventParser::getData() const { return *data; } +} + diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp new file mode 100644 index 0000000..e39245f --- /dev/null +++ b/src/formats/osxml/OsxmlEventParser.hpp @@ -0,0 +1,217 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file OsxmlEventParser.hpp + * + * The OsxmlEventParser class is responsible for parsing an XML file and calling + * the corresponding event handler functions if an XML item is found. Event + * handling is performed using a listener interface. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OSXML_EVENT_PARSER_HPP_ +#define _OSXML_EVENT_PARSER_HPP_ + +#include <memory> +#include <string> + +#include <core/common/Whitespace.hpp> + +namespace ousia { + +// Forward declarations +class Logger; +class Variant; +class OsxmlEventParserData; + +/** + * Interface which defines the callback functions which are called by the + * OsxmlEventParser whenever an event occurs. + */ +class OsxmlEvents { +public: +	/** +	 * Virtual destructor. +	 */ +	virtual ~OsxmlEvents(); + +	/** +	 * Called whenever a command starts. Note that this implicitly always starts +	 * the default field of the command. +	 * +	 * @param name is a string variant containing name and location of the +	 * command. +	 * @param args is a map containing the arguments that were given to the +	 * command. +	 */ +	virtual void command(const Variant &name, const Variant::mapType &args) = 0; + +	/** +	 * Called whenever an annotation starts. Note that this implicitly always +	 * starts the default field of the annotation. +	 * +	 * @param className is a string variant containing the name of the +	 * annotation class and the location of the annotation definition. +	 * @param args is a map variant containing the arguments that were given +	 * to the annotation definition. +	 */ +	virtual void annotationStart(const Variant &className, +	                             const Variant::mapType &args) = 0; + +	/** +	 * Called whenever the range of an annotation ends. The callee must +	 * disambiguate the actual annotation that is finished here. +	 * +	 * @param className is a string variant containing the name of the +	 * annotation class that should end here. May be empty (or nullptr), if no +	 * elementName has been specified at the end of the annotation. +	 * @param elementName is the name of the annotation element that should be +	 * ended here. May be empty (or nullptr), if no elementName has been +	 * specified at the end of the annotation. +	 */ +	virtual void annotationEnd(const Variant &className, +	                           const Variant &elementName) = 0; + +	/** +	 * Called whenever the default field which was implicitly started by +	 * commandStart or annotationStart ends. Note that this does not end the +	 * range of an annotation, but the default field of the annotation. To +	 * signal the end of the annotation this, the annotationEnd method will be +	 * invoked. +	 */ +	virtual void fieldEnd() = 0; + +	/** +	 * Called whenever data is found. Whitespace data is handled as specified +	 * and the data has been parsed to the specified variant type. This function +	 * is not called if the parsing failed, the parser prints an error message +	 * instead. +	 * +	 * @param data is the already parsed data that should be passed to the +	 * handler. +	 */ +	virtual void data(const Variant &data) = 0; +}; + +/** + * The OsxmlEventParser class is a wrapper around eXpat which implements the + * specialities of the osxml formats class (like annotation ranges). It notifies + * a specified event handler whenever a command, annotation or data has been + * reached. + */ +class OsxmlEventParser { +private: +	/** +	 * Reference at the internal CharReader instance. +	 */ +	CharReader &reader; + +	/** +	 * Set of callback functions to be called whenever an event is triggered. +	 */ +	OsxmlEvents &events; + +	/** +	 * Reference at the Logger object to which error messages or warnings should +	 * be logged. +	 */ +	Logger &logger; + +	/** +	 * Current whitespace mode. +	 */ +	WhitespaceMode whitespaceMode; + +	/** +	 * Data to be used by the internal functions. +	 */ +	std::unique_ptr<OsxmlEventParserData> data; + +public: +	/** +	 * Constructor fo the OsxmlEventParser. Takes a reference at the OsxmlEvents +	 * of which the callback functions are called. +	 * +	 * @param reader is a reference to the CharReader instance from which the +	 * XML should be read. +	 * @param events is a refence at an instance of the OsxmlEvents class. All +	 * events are forwarded to this class. +	 * @param logger is the Logger instance to which log messages should be +	 * written. +	 */ +	OsxmlEventParser(CharReader &reader, OsxmlEvents &events, Logger &logger); + +	/** +	 * Destructor of OsxmlEventParser (needed for unique_ptr to incomplete type) +	 */ +	~OsxmlEventParser(); + +	/** +	 * Performs the actual parsing. Reads the XML using eXpat and calles the +	 * callbacks in the event listener instance whenever something interesting +	 * happens. +	 */ +	void parse(); + +	/** +	 * Sets the whitespace handling mode. +	 * +	 * @param whitespaceMode defines how whitespace in the data should be +	 * handled. +	 */ +	void setWhitespaceMode(WhitespaceMode whitespaceMode); + +	/** +	 * Returns the current whitespace handling mode. +	 * +	 * @return the currently set whitespace handling mode. +	 */ +	WhitespaceMode getWhitespaceMode() const; + +	/** +	 * Returns the internal CharReader reference. +	 * +	 * @return the CharReader reference. +	 */ +	CharReader &getReader() const; + +	/** +	 * Returns the internal Logger reference. +	 * +	 * @return the internal Logger reference. +	 */ +	Logger &getLogger() const; + +	/** +	 * Returns the internal OsxmlEvents reference. +	 * +	 * @return the internal OsxmlEvents reference. +	 */ +	OsxmlEvents &getEvents() const; + +	/** +	 * Returns a reference at the internal data. +	 */ +	OsxmlEventParserData &getData() const; +}; +} + +#endif /* _OSXML_EVENT_PARSER_HPP_ */ + diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp new file mode 100644 index 0000000..c216855 --- /dev/null +++ b/src/formats/osxml/OsxmlParser.cpp @@ -0,0 +1,98 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/parser/stack/GenericParserStates.hpp> +#include <core/parser/stack/Stack.hpp> +#include <core/parser/ParserContext.hpp> + +#include "OsxmlEventParser.hpp" +#include "OsxmlParser.hpp" + +namespace ousia { + +using namespace parser_stack; + +/** + * Class containing the actual OsxmlParser implementation. + */ +class OsxmlParserImplementation : public OsxmlEvents { +private: +	/** +	 * Actual xml parser -- converts the xml stream into a set of events. +	 */ +	OsxmlEventParser parser; + +	/** +	 * Pushdown automaton responsible for converting the xml events into an +	 * actual Node tree. +	 */ +	Stack stack; + +public: +	/** +	 * Constructor of the OsxmlParserImplementation class. +	 * +	 * @param reader is a reference to the CharReader instance from which the +	 * XML should be read. +	 * @param ctx is a reference to the ParserContext instance that should be +	 * used. +	 */ +	OsxmlParserImplementation(CharReader &reader, ParserContext &ctx) +	    : parser(reader, *this, ctx.getLogger()), +	      stack(ctx, GenericParserStates) +	{ +	} + +	/** +	 * Starts the actual parsing process. +	 */ +	void parse() { parser.parse(); } + +	void command(const Variant &name, const Variant::mapType &args) override +	{ +		stack.command(name, args); +		stack.fieldStart(true); +	} + +	void annotationStart(const Variant &name, +	                     const Variant::mapType &args) override +	{ +		stack.annotationStart(name, args); +		stack.fieldStart(true); +	} + +	void annotationEnd(const Variant &className, +	                   const Variant &elementName) override +	{ +		stack.annotationEnd(className, elementName); +	} + +	void fieldEnd() override { stack.fieldEnd(); } + +	void data(const Variant &data) override { stack.data(data); } +}; + +/* Class OsxmlParser */ + +void OsxmlParser::doParse(CharReader &reader, ParserContext &ctx) +{ +	OsxmlParserImplementation impl(reader, ctx); +	impl.parse(); +} +} + diff --git a/src/plugins/xml/XmlParser.hpp b/src/formats/osxml/OsxmlParser.hpp index c8b6302..0fbf83c 100644 --- a/src/plugins/xml/XmlParser.hpp +++ b/src/formats/osxml/OsxmlParser.hpp @@ -17,7 +17,7 @@  */  /** - * @file XmlParser.hpp + * @file OsxmlParser.hpp   *   * Contains the parser responsible for reading Ousía XML Documents (extension   * oxd) and Ousía XML Modules (extension oxm). @@ -25,18 +25,18 @@   * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)   */ -#ifndef _OUSIA_XML_PARSER_HPP_ -#define _OUSIA_XML_PARSER_HPP_ +#ifndef _OUSIA_OSXML_PARSER_HPP_ +#define _OUSIA_OSXML_PARSER_HPP_  #include <core/parser/Parser.hpp>  namespace ousia {  /** - * The XmlParser class implements parsing the various types of Ousía XML - * documents using the expat stream XML parser. + * The OsxmlParser class implements parsing the various types of Ousía XML + * documents using the OsxmlEventParser and Stack classes.   */ -class XmlParser : public Parser { +class OsxmlParser : public Parser {  protected:  	/**  	 * Parses the given input stream as XML file and returns the parsed @@ -51,5 +51,5 @@ protected:  } -#endif /* _OUSIA_XML_PARSER_HPP_ */ +#endif /* _OUSIA_OSXML_PARSER_HPP_ */ diff --git a/src/core/CodeTokenizer.cpp b/src/plugins/css/CodeTokenizer.cpp index d65c514..d65c514 100644 --- a/src/core/CodeTokenizer.cpp +++ b/src/plugins/css/CodeTokenizer.cpp diff --git a/src/core/CodeTokenizer.hpp b/src/plugins/css/CodeTokenizer.hpp index 154f949..154f949 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/plugins/css/CodeTokenizer.hpp diff --git a/src/core/Tokenizer.cpp b/src/plugins/css/Tokenizer.cpp index ab4735a..ab4735a 100644 --- a/src/core/Tokenizer.cpp +++ b/src/plugins/css/Tokenizer.cpp diff --git a/src/core/Tokenizer.hpp b/src/plugins/css/Tokenizer.hpp index 50e458c..50e458c 100644 --- a/src/core/Tokenizer.hpp +++ b/src/plugins/css/Tokenizer.hpp diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp deleted file mode 100644 index 6dfad49..0000000 --- a/src/plugins/xml/XmlParser.cpp +++ /dev/null @@ -1,575 +0,0 @@ -/* -    Ousía -    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel - -    This program is free software: you can redistribute it and/or modify -    it under the terms of the GNU General Public License as published by -    the Free Software Foundation, either version 3 of the License, or -    (at your option) any later version. - -    This program is distributed in the hope that it will be useful, -    but WITHOUT ANY WARRANTY; without even the implied warranty of -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -    GNU General Public License for more details. - -    You should have received a copy of the GNU General Public License -    along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <iostream> -#include <map> -#include <sstream> -#include <vector> - -#include <expat.h> - -#include <core/common/CharReader.hpp> -#include <core/common/Utils.hpp> -#include <core/common/VariantReader.hpp> -#include <core/parser/ParserScope.hpp> -#include <core/parser/ParserStack.hpp> -#include <core/parser/stack/DocumentHandler.hpp> -#include <core/parser/stack/DomainHandler.hpp> -#include <core/parser/stack/ImportIncludeHandler.hpp> -#include <core/parser/stack/TypesystemHandler.hpp> -#include <core/model/Document.hpp> -#include <core/model/Domain.hpp> -#include <core/model/Typesystem.hpp> - -#include "XmlParser.hpp" - -namespace ousia { - -namespace ParserStates { -/* Document states */ -static const ParserState Document = -    ParserStateBuilder() -        .parent(&None) -        .createdNodeType(&RttiTypes::Document) -        .elementHandler(DocumentHandler::create) -        .arguments({Argument::String("name", "")}); - -static const ParserState DocumentChild = -    ParserStateBuilder() -        .parents({&Document, &DocumentChild}) -        .createdNodeTypes({&RttiTypes::StructureNode, -                           &RttiTypes::AnnotationEntity, -                           &RttiTypes::DocumentField}) -        .elementHandler(DocumentChildHandler::create); - -/* Domain states */ -static const ParserState Domain = ParserStateBuilder() -                                      .parents({&None, &Document}) -                                      .createdNodeType(&RttiTypes::Domain) -                                      .elementHandler(DomainHandler::create) -                                      .arguments({Argument::String("name")}); - -static const ParserState DomainStruct = -    ParserStateBuilder() -        .parent(&Domain) -        .createdNodeType(&RttiTypes::StructuredClass) -        .elementHandler(DomainStructHandler::create) -        .arguments({Argument::String("name"), -                    Argument::Cardinality("cardinality", Cardinality::any()), -                    Argument::Bool("isRoot", false), -                    Argument::Bool("transparent", false), -                    Argument::String("isa", "")}); - -static const ParserState DomainAnnotation = -    ParserStateBuilder() -        .parent(&Domain) -        .createdNodeType(&RttiTypes::AnnotationClass) -        .elementHandler(DomainAnnotationHandler::create) -        .arguments({Argument::String("name")}); - -static const ParserState DomainAttributes = -    ParserStateBuilder() -        .parents({&DomainStruct, &DomainAnnotation}) -        .createdNodeType(&RttiTypes::StructType) -        .elementHandler(DomainAttributesHandler::create) -        .arguments({}); - -static const ParserState DomainAttribute = -    ParserStateBuilder() -        .parent(&DomainAttributes) -        .elementHandler(TypesystemStructFieldHandler::create) -        .arguments({Argument::String("name"), Argument::String("type"), -                    Argument::Any("default", Variant::fromObject(nullptr))}); - -static const ParserState DomainField = -    ParserStateBuilder() -        .parents({&DomainStruct, &DomainAnnotation}) -        .createdNodeType(&RttiTypes::FieldDescriptor) -        .elementHandler(DomainFieldHandler::create) -        .arguments({Argument::String("name", ""), -                    Argument::Bool("isSubtree", false), -                    Argument::Bool("optional", false)}); - -static const ParserState DomainFieldRef = -    ParserStateBuilder() -        .parents({&DomainStruct, &DomainAnnotation}) -        .createdNodeType(&RttiTypes::FieldDescriptor) -        .elementHandler(DomainFieldRefHandler::create) -        .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); - -static const ParserState DomainStructPrimitive = -    ParserStateBuilder() -        .parents({&DomainStruct, &DomainAnnotation}) -        .createdNodeType(&RttiTypes::FieldDescriptor) -        .elementHandler(DomainPrimitiveHandler::create) -        .arguments( -            {Argument::String("name", ""), Argument::Bool("isSubtree", false), -             Argument::Bool("optional", false), Argument::String("type")}); - -static const ParserState DomainStructChild = -    ParserStateBuilder() -        .parent(&DomainField) -        .elementHandler(DomainChildHandler::create) -        .arguments({Argument::String("ref")}); - -static const ParserState DomainStructParent = -    ParserStateBuilder() -        .parent(&DomainStruct) -        .createdNodeType(&RttiTypes::DomainParent) -        .elementHandler(DomainParentHandler::create) -        .arguments({Argument::String("ref")}); - -static const ParserState DomainStructParentField = -    ParserStateBuilder() -        .parent(&DomainStructParent) -        .createdNodeType(&RttiTypes::FieldDescriptor) -        .elementHandler(DomainParentFieldHandler::create) -        .arguments({Argument::String("name", ""), -                    Argument::Bool("isSubtree", false), -                    Argument::Bool("optional", false)}); - -static const ParserState DomainStructParentFieldRef = -    ParserStateBuilder() -        .parent(&DomainStructParent) -        .createdNodeType(&RttiTypes::FieldDescriptor) -        .elementHandler(DomainParentFieldRefHandler::create) -        .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); - -/* Typesystem states */ -static const ParserState Typesystem = -    ParserStateBuilder() -        .parents({&None, &Domain}) -        .createdNodeType(&RttiTypes::Typesystem) -        .elementHandler(TypesystemHandler::create) -        .arguments({Argument::String("name", "")}); - -static const ParserState TypesystemEnum = -    ParserStateBuilder() -        .parent(&Typesystem) -        .createdNodeType(&RttiTypes::EnumType) -        .elementHandler(TypesystemEnumHandler::create) -        .arguments({Argument::String("name")}); - -static const ParserState TypesystemEnumEntry = -    ParserStateBuilder() -        .parent(&TypesystemEnum) -        .elementHandler(TypesystemEnumEntryHandler::create) -        .arguments({}); - -static const ParserState TypesystemStruct = -    ParserStateBuilder() -        .parent(&Typesystem) -        .createdNodeType(&RttiTypes::StructType) -        .elementHandler(TypesystemStructHandler::create) -        .arguments({Argument::String("name"), Argument::String("parent", "")}); - -static const ParserState TypesystemStructField = -    ParserStateBuilder() -        .parent(&TypesystemStruct) -        .elementHandler(TypesystemStructFieldHandler::create) -        .arguments({Argument::String("name"), Argument::String("type"), -                    Argument::Any("default", Variant::fromObject(nullptr))}); - -static const ParserState TypesystemConstant = -    ParserStateBuilder() -        .parent(&Typesystem) -        .createdNodeType(&RttiTypes::Constant) -        .elementHandler(TypesystemConstantHandler::create) -        .arguments({Argument::String("name"), Argument::String("type"), -                    Argument::Any("value")}); - -/* Special states for import and include */ -static const ParserState Import = -    ParserStateBuilder() -        .parents({&Document, &Typesystem, &Domain}) -        .elementHandler(ImportHandler::create) -        .arguments({Argument::String("rel", ""), Argument::String("type", ""), -                    Argument::String("src", "")}); - -static const ParserState Include = -    ParserStateBuilder() -        .parent(&All) -        .elementHandler(IncludeHandler::create) -        .arguments({Argument::String("rel", ""), Argument::String("type", ""), -                    Argument::String("src", "")}); - -static const std::multimap<std::string, const ParserState *> XmlStates{ -    {"document", &Document}, -    {"*", &DocumentChild}, -    {"domain", &Domain}, -    {"struct", &DomainStruct}, -    {"annotation", &DomainAnnotation}, -    {"attributes", &DomainAttributes}, -    {"attribute", &DomainAttribute}, -    {"field", &DomainField}, -    {"fieldRef", &DomainFieldRef}, -    {"primitive", &DomainStructPrimitive}, -    {"childRef", &DomainStructChild}, -    {"parentRef", &DomainStructParent}, -    {"field", &DomainStructParentField}, -    {"fieldRef", &DomainStructParentFieldRef}, -    {"typesystem", &Typesystem}, -    {"enum", &TypesystemEnum}, -    {"entry", &TypesystemEnumEntry}, -    {"struct", &TypesystemStruct}, -    {"field", &TypesystemStructField}, -    {"constant", &TypesystemConstant}, -    {"import", &Import}, -    {"include", &Include}}; -} - -/** - * Structue containing the private data that is being passed to the - * XML-Handlers. - */ -struct XMLUserData { -	/** -	 * Containing the depth of the current XML file -	 */ -	size_t depth; - -	/** -	 * Reference at the ParserStack instance. -	 */ -	ParserStack *stack; - -	/** -	 * Reference at the CharReader instance. -	 */ -	CharReader *reader; - -	/** -	 * Constructor of the XMLUserData struct. -	 * -	 * @param stack is a pointer at the ParserStack instance. -	 * @param reader is a pointer at the CharReader instance. -	 */ -	XMLUserData(ParserStack *stack, CharReader *reader) -	    : depth(0), stack(stack), reader(reader) -	{ -	} -}; - -/** - * Wrapper class around the XML_Parser pointer which safely frees it whenever - * the scope is left (e.g. because an exception was thrown). - */ -class ScopedExpatXmlParser { -private: -	/** -	 * Internal pointer to the XML_Parser instance. -	 */ -	XML_Parser parser; - -public: -	/** -	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS -	 * from the expat library. Throws a parser exception if the XML parser -	 * cannot be initialized. -	 * -	 * @param encoding is the protocol-defined encoding passed to expat (or -	 * nullptr if expat should determine the encoding by itself). -	 */ -	ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) -	{ -		parser = XML_ParserCreate(encoding); -		if (!parser) { -			throw LoggableException{ -			    "Internal error: Could not create expat XML parser!"}; -		} -	} - -	/** -	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance. -	 */ -	~ScopedExpatXmlParser() -	{ -		if (parser) { -			XML_ParserFree(parser); -			parser = nullptr; -		} -	} - -	/** -	 * Returns the XML_Parser pointer. -	 */ -	XML_Parser operator&() { return parser; } -}; - -/* Adapter Expat -> ParserStack */ - -static SourceLocation syncLoggerPosition(XML_Parser p, size_t len = 0) -{ -	// Fetch the parser stack and the associated user data -	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); -	ParserStack *stack = userData->stack; - -	// Fetch the current location in the XML file -	size_t offs = XML_GetCurrentByteIndex(p); - -	// Build the source location and update the default location of the -	// current -	// logger instance -	SourceLocation loc{stack->getContext().getSourceId(), offs, offs + len}; -	stack->getContext().getLogger().setDefaultLocation(loc); -	return loc; -} - -enum class XMLAttributeState { -	IN_TAG_NAME, -	SEARCH_ATTR, -	IN_ATTR_NAME, -	HAS_ATTR_NAME, -	HAS_ATTR_EQUALS, -	IN_ATTR_DATA -}; - -static std::map<std::string, SourceLocation> reconstructXMLAttributeOffsets( -    CharReader &reader, SourceLocation location) -{ -	std::map<std::string, SourceLocation> res; - -	// Fork the reader, we don't want to mess up the XML parsing process, do we? -	CharReaderFork readerFork = reader.fork(); - -	// Move the read cursor to the start location, abort if this does not work -	size_t offs = location.getStart(); -	if (!location.isValid() || offs != readerFork.seek(offs)) { -		return res; -	} - -	// Now all we need to do is to implement one half of an XML parser. As this -	// is inherently complicated we'll totaly fail at it. Don't care. All we -	// want to get is those darn offsets for pretty error messages... (and we -	// can assume the XML is valid as it was already read by expat) -	XMLAttributeState state = XMLAttributeState::IN_TAG_NAME; -	char c; -	std::stringstream attrName; -	while (readerFork.read(c)) { -		// Abort at the end of the tag -		if (c == '>' && state != XMLAttributeState::IN_ATTR_DATA) { -			return res; -		} - -		// One state machine to rule them all, one state machine to find them, -		// One state machine to bring them all and in the darkness bind them -		// (the byte offsets) -		switch (state) { -			case XMLAttributeState::IN_TAG_NAME: -				if (Utils::isWhitespace(c)) { -					state = XMLAttributeState::SEARCH_ATTR; -				} -				break; -			case XMLAttributeState::SEARCH_ATTR: -				if (!Utils::isWhitespace(c)) { -					state = XMLAttributeState::IN_ATTR_NAME; -					attrName << c; -				} -				break; -			case XMLAttributeState::IN_ATTR_NAME: -				if (Utils::isWhitespace(c)) { -					state = XMLAttributeState::HAS_ATTR_NAME; -				} else if (c == '=') { -					state = XMLAttributeState::HAS_ATTR_EQUALS; -				} else { -					attrName << c; -				} -				break; -			case XMLAttributeState::HAS_ATTR_NAME: -				if (!Utils::isWhitespace(c)) { -					if (c == '=') { -						state = XMLAttributeState::HAS_ATTR_EQUALS; -						break; -					} -					// Well, this is a strange XML file... We expected to -					// see a '=' here! Try to continue with the -					// "HAS_ATTR_EQUALS" state as this state will hopefully -					// inlcude some error recovery -				} else { -					// Skip whitespace here -					break; -				} -			// Fallthrough -			case XMLAttributeState::HAS_ATTR_EQUALS: -				if (!Utils::isWhitespace(c)) { -					if (c == '"') { -						// Here we are! We have found the beginning of an -						// attribute. Let's quickly lock the current offset away -						// in the result map -						res.emplace(attrName.str(), -						            SourceLocation{reader.getSourceId(), -						                           readerFork.getOffset()}); -						attrName.str(std::string{}); -						state = XMLAttributeState::IN_ATTR_DATA; -					} else { -						// No, this XML file is not well formed. Assume we're in -						// an attribute name once again -						attrName.str(std::string{&c, 1}); -						state = XMLAttributeState::IN_ATTR_NAME; -					} -				} -				break; -			case XMLAttributeState::IN_ATTR_DATA: -				if (c == '"') { -					// We're at the end of the attribute data, start anew -					state = XMLAttributeState::SEARCH_ATTR; -				} -				break; -		} -	} -	return res; -} - -static void xmlStartElementHandler(void *p, const XML_Char *name, -                                   const XML_Char **attrs) -{ -	XML_Parser parser = static_cast<XML_Parser>(p); -	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); -	ParserStack *stack = userData->stack; - -	SourceLocation loc = syncLoggerPosition(parser); - -	// Read the argument locations -- this is only a stupid and slow hack, -	// but it is necessary, as expat doesn't give use the byte offset of the -	// arguments. -	std::map<std::string, SourceLocation> offs = -	    reconstructXMLAttributeOffsets(*userData->reader, loc); - -	// Assemble the arguments -	Variant::mapType args; - -	const XML_Char **attr = attrs; -	while (*attr) { -		// Convert the C string to a std::string -		const std::string key{*(attr++)}; - -		// Search the location of the key -		SourceLocation keyLoc; -		auto it = offs.find(key); -		if (it != offs.end()) { -			keyLoc = it->second; -		} - -		// Parse the string, pass the location of the key -		std::pair<bool, Variant> value = VariantReader::parseGenericString( -		    *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(), -		    keyLoc.getStart()); -		args.emplace(key, value.second); -	} - -	// Call the start function -	std::string nameStr(name); -	if (nameStr != "ousia" || userData->depth > 0) { -		stack->start(std::string(name), args, loc); -	} - -	// Increment the current depth -	userData->depth++; -} - -static void xmlEndElementHandler(void *p, const XML_Char *name) -{ -	XML_Parser parser = static_cast<XML_Parser>(p); -	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); -	ParserStack *stack = userData->stack; - -	syncLoggerPosition(parser); - -	// Decrement the current depth -	userData->depth--; - -	// Call the end function -	std::string nameStr(name); -	if (nameStr != "ousia" || userData->depth > 0) { -		stack->end(); -	} -} - -static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len) -{ -	XML_Parser parser = static_cast<XML_Parser>(p); -	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); -	ParserStack *stack = userData->stack; - -	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0; -	syncLoggerPosition(parser, ulen); -	const std::string data = Utils::trim(std::string{s, ulen}); -	if (!data.empty()) { -		stack->data(data); -	} -} - -/* Class XmlParser */ - -void XmlParser::doParse(CharReader &reader, ParserContext &ctx) -{ -	// Create the parser object -	ScopedExpatXmlParser p{"UTF-8"}; - -	// Create the parser stack instance, if we're starting on a non-empty scope, -	// try to deduce the parser state -	ParserStack stack(ctx, ParserStates::XmlStates); -	if (!ctx.getScope().isEmpty()) { -		if (!stack.deduceState()) { -			return; -		} -	} - -	// Pass the reference to the ParserStack to the XML handler -	XMLUserData data(&stack, &reader); -	XML_SetUserData(&p, &data); -	XML_UseParserAsHandlerArg(&p); - -	// Set the callback functions -	XML_SetStartElementHandler(&p, xmlStartElementHandler); -	XML_SetEndElementHandler(&p, xmlEndElementHandler); -	XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler); - -	// Feed data into expat while there is data to process -	constexpr size_t BUFFER_SIZE = 64 * 1024; -	while (true) { -		// Fetch a buffer from expat for the input data -		char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE)); -		if (!buf) { -			throw LoggableException{ -			    "Internal error: XML parser out of memory!"}; -		} - -		// Read into the buffer -		size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE); - -		// Parse the data and handle any XML error -		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { -			// Fetch the xml parser byte offset -			size_t offs = XML_GetCurrentByteIndex(&p); - -			// Throw a corresponding exception -			XML_Error code = XML_GetErrorCode(&p); -			std::string msg = std::string{XML_ErrorString(code)}; -			throw LoggableException{"XML: " + msg, -			                        SourceLocation{ctx.getSourceId(), offs}}; -		} - -		// Abort once there are no more bytes in the stream -		if (bytesRead == 0) { -			break; -		} -	} -} -} - diff --git a/test/core/RangeSetTest.cpp b/test/core/RangeSetTest.cpp index cbf8f59..446ee51 100644 --- a/test/core/RangeSetTest.cpp +++ b/test/core/RangeSetTest.cpp @@ -110,7 +110,7 @@ TEST(RangeSet, Merge)  	s.merge(Range<int>(40, 50));  	s.merge(Range<int>(60, 70));  	{ -		ASSERT_EQ(ranges.size(), 4); +		ASSERT_EQ(ranges.size(), 4U);  		auto it = ranges.begin();  		ASSERT_EQ((*it).start, 0); @@ -132,7 +132,7 @@ TEST(RangeSet, Merge)  	// Now insert an element which spans the second and third element  	s.merge(Range<int>(15, 55));  	{ -		ASSERT_EQ(ranges.size(), 3); +		ASSERT_EQ(ranges.size(), 3U);  		auto it = ranges.begin();  		ASSERT_EQ((*it).start, 0); @@ -150,7 +150,7 @@ TEST(RangeSet, Merge)  	// Now insert an element which expands the first element  	s.merge(Range<int>(-10, 11));  	{ -		ASSERT_EQ(ranges.size(), 3); +		ASSERT_EQ(ranges.size(), 3U);  		auto it = ranges.begin();  		ASSERT_EQ((*it).start, -10); @@ -168,7 +168,7 @@ TEST(RangeSet, Merge)  	// Now insert an element which merges the last two elements  	s.merge(Range<int>(13, 70));  	{ -		ASSERT_EQ(ranges.size(), 2); +		ASSERT_EQ(ranges.size(), 2U);  		auto it = ranges.begin();  		ASSERT_EQ((*it).start, -10); @@ -182,7 +182,7 @@ TEST(RangeSet, Merge)  	// Now insert an element which merges the remaining elements  	s.merge(Range<int>(-9, 12));  	{ -		ASSERT_EQ(ranges.size(), 1); +		ASSERT_EQ(ranges.size(), 1U);  		auto it = ranges.begin();  		ASSERT_EQ((*it).start, -10); diff --git a/test/core/StandaloneEnvironment.hpp b/test/core/StandaloneEnvironment.hpp index a9dcdce..790bad4 100644 --- a/test/core/StandaloneEnvironment.hpp +++ b/test/core/StandaloneEnvironment.hpp @@ -31,6 +31,10 @@  namespace ousia { +/** + * StandaloneEnvironment is a class used for quickly setting up an entire + * environment needed for running an Ousia instance. + */  struct StandaloneEnvironment {  	ConcreteLogger &logger;  	Manager manager; diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index 917f45c..7801296 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -24,22 +24,40 @@ namespace ousia {  TEST(Utils, isIdentifier)  { -	ASSERT_TRUE(Utils::isIdentifier("test")); -	ASSERT_TRUE(Utils::isIdentifier("t0-_est")); -	ASSERT_FALSE(Utils::isIdentifier("_t0-_EST")); -	ASSERT_FALSE(Utils::isIdentifier("-t0-_EST")); -	ASSERT_FALSE(Utils::isIdentifier("0t-_EST")); -	ASSERT_FALSE(Utils::isIdentifier("invalid key")); +	EXPECT_TRUE(Utils::isIdentifier("test")); +	EXPECT_TRUE(Utils::isIdentifier("t0-_est")); +	EXPECT_FALSE(Utils::isIdentifier("_t0-_EST")); +	EXPECT_FALSE(Utils::isIdentifier("-t0-_EST")); +	EXPECT_FALSE(Utils::isIdentifier("0t-_EST")); +	EXPECT_FALSE(Utils::isIdentifier("_A")); +	EXPECT_FALSE(Utils::isIdentifier("invalid key")); +	EXPECT_FALSE(Utils::isIdentifier(""));  } -TEST(Utils, trim) + +TEST(Utils, isNamespacedIdentifier)  { -	ASSERT_EQ("hello world", Utils::trim("\t hello world   \n\r\t")); -	ASSERT_EQ("hello world", Utils::trim("hello world   \n\r\t")); -	ASSERT_EQ("hello world", Utils::trim("   hello world")); -	ASSERT_EQ("hello world", Utils::trim("hello world")); +	EXPECT_TRUE(Utils::isNamespacedIdentifier("test")); +	EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier("_t0-_EST")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier("-t0-_EST")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier("0t-_EST")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier("invalid key")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier("_A")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier("")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier(":")); +	EXPECT_TRUE(Utils::isNamespacedIdentifier("test:a")); +	EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est:b")); +	EXPECT_TRUE(Utils::isNamespacedIdentifier("test:test")); +	EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est:t0-_est")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier("test:_A")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier("test::a")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier(":test")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier("t0-_est:_t0-_EST")); +	EXPECT_FALSE(Utils::isNamespacedIdentifier("t0-_est: b"));  } +  TEST(Utils, split)  {  	ASSERT_EQ(std::vector<std::string>({"ab"}), Utils::split("ab", '.')); @@ -73,5 +91,23 @@ TEST(Utils, extractFileExtension)  	ASSERT_EQ("ext", Utils::extractFileExtension("foo.bar/test.EXT"));  } +TEST(Utils, startsWith) +{ +	ASSERT_TRUE(Utils::startsWith("foobar", "foo")); +	ASSERT_TRUE(Utils::startsWith("foo", "foo")); +	ASSERT_FALSE(Utils::startsWith("foo", "foobar")); +	ASSERT_FALSE(Utils::startsWith("foobar", "bar")); +	ASSERT_TRUE(Utils::startsWith("foo", "")); +} + +TEST(Utils, endsWith) +{ +	ASSERT_FALSE(Utils::endsWith("foobar", "foo")); +	ASSERT_TRUE(Utils::endsWith("foo", "foo")); +	ASSERT_FALSE(Utils::endsWith("foo", "foobar")); +	ASSERT_TRUE(Utils::endsWith("foobar", "bar")); +	ASSERT_TRUE(Utils::endsWith("foo", "")); +} +  } diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp index 8fcbdf2..4cb4331 100644 --- a/test/core/model/DomainTest.cpp +++ b/test/core/model/DomainTest.cpp @@ -242,7 +242,7 @@ TEST(Descriptor, getDefaultFields)  	    A->createPrimitiveFieldDescriptor(sys->getStringType(), logger);  	// now we should find that.  	auto fields = A->getDefaultFields(); -	ASSERT_EQ(1, fields.size()); +	ASSERT_EQ(1U, fields.size());  	ASSERT_EQ(A_prim_field, fields[0]);  	// remove that field from A and add it to another class. @@ -258,7 +258,7 @@ TEST(Descriptor, getDefaultFields)  	// but we should find it again if we set B as superclass of A.  	A->setSuperclass(B, logger);  	fields = A->getDefaultFields(); -	ASSERT_EQ(1, fields.size()); +	ASSERT_EQ(1U, fields.size());  	ASSERT_EQ(A_prim_field, fields[0]);  	// and we should not be able to find it if we override the field. @@ -277,7 +277,7 @@ TEST(Descriptor, getDefaultFields)  	// now we should find that.  	fields = A->getDefaultFields(); -	ASSERT_EQ(1, fields.size()); +	ASSERT_EQ(1U, fields.size());  	ASSERT_EQ(C_field, fields[0]);  	// add another transparent child class to A with a daughter class that has @@ -296,7 +296,7 @@ TEST(Descriptor, getDefaultFields)  	// now we should find both primitive fields, but the C field first.  	fields = A->getDefaultFields(); -	ASSERT_EQ(2, fields.size()); +	ASSERT_EQ(2U, fields.size());  	ASSERT_EQ(C_field, fields[0]);  	ASSERT_EQ(F_field, fields[1]);  } @@ -321,7 +321,7 @@ TEST(Descriptor, getPermittedChildren)  	 * in between.  	 */  	NodeVector<StructuredClass> children = book->getPermittedChildren(); -	ASSERT_EQ(3, children.size()); +	ASSERT_EQ(3U, children.size());  	ASSERT_EQ(section, children[0]);  	ASSERT_EQ(paragraph, children[1]);  	ASSERT_EQ(text, children[2]); @@ -331,7 +331,7 @@ TEST(Descriptor, getPermittedChildren)  	    mgr, "Subclass", domain, Cardinality::any(), text, true, false)};  	// And that should be in the result list as well now.  	children = book->getPermittedChildren(); -	ASSERT_EQ(4, children.size()); +	ASSERT_EQ(4U, children.size());  	ASSERT_EQ(section, children[0]);  	ASSERT_EQ(paragraph, children[1]);  	ASSERT_EQ(text, children[2]); diff --git a/test/core/parser/ParserStackTest.cpp b/test/core/parser/ParserStackTest.cpp deleted file mode 100644 index 3a0decb..0000000 --- a/test/core/parser/ParserStackTest.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* -    Ousía -    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel - -    This program is free software: you can redistribute it and/or modify -    it under the terms of the GNU General Public License as published by -    the Free Software Foundation, either version 3 of the License, or -    (at your option) any later version. - -    This program is distributed in the hope that it will be useful, -    but WITHOUT ANY WARRANTY; without even the implied warranty of -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -    GNU General Public License for more details. - -    You should have received a copy of the GNU General Public License -    along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <iostream> - -#include <gtest/gtest.h> - -#include <core/parser/ParserStack.hpp> -#include <core/StandaloneEnvironment.hpp> - -namespace ousia { - -ConcreteLogger logger; - -static int startCount = 0; -static int endCount = 0; -static int dataCount = 0; - -class TestHandler : public Handler { -public: -	using Handler::Handler; - -	void start(Variant::mapType &args) override { startCount++; } - -	void end() override { endCount++; } - -	void data(const std::string &data, int field) override { dataCount++; } - -	static Handler *create(const HandlerData &data) -	{ -		return new TestHandler(data); -	} -}; - -namespace ParserStates { -static const ParserState Document = -    ParserStateBuilder().parent(&None).elementHandler(TestHandler::create); -static const ParserState Body = ParserStateBuilder() -                                    .parent(&Document) -                                    .elementHandler(TestHandler::create); -static const ParserState Empty = -    ParserStateBuilder().parent(&Document).elementHandler(TestHandler::create); -static const ParserState Special = -    ParserStateBuilder().parent(&All).elementHandler(TestHandler::create); -static const ParserState Arguments = -    ParserStateBuilder() -        .parent(&None) -        .elementHandler(TestHandler::create) -        .arguments({Argument::Int("a"), Argument::String("b")}); -static const ParserState BodyChildren = -    ParserStateBuilder() -        .parent(&Body) -        .elementHandler(TestHandler::create); - -static const std::multimap<std::string, const ParserState *> TestHandlers{ -    {"document", &Document}, -    {"body", &Body}, -    {"empty", &Empty}, -    {"special", &Special}, -    {"arguments", &Arguments}, -    {"*", &BodyChildren}}; -} - -TEST(ParserStack, simpleTest) -{ -	StandaloneEnvironment env(logger); -	ParserStack s{env.context, ParserStates::TestHandlers}; - -	startCount = 0; -	endCount = 0; -	dataCount = 0; - -	EXPECT_EQ("", s.currentCommandName()); -	EXPECT_EQ(&ParserStates::None, &s.currentState()); - -	s.start("document", {}); -	s.data("test1"); - -	EXPECT_EQ("document", s.currentCommandName()); -	EXPECT_EQ(&ParserStates::Document, &s.currentState()); -	EXPECT_EQ(1, startCount); -	EXPECT_EQ(1, dataCount); - -	s.start("body", {}); -	s.data("test2"); -	EXPECT_EQ("body", s.currentCommandName()); -	EXPECT_EQ(&ParserStates::Body, &s.currentState()); -	EXPECT_EQ(2, startCount); -	EXPECT_EQ(2, dataCount); - -	s.start("inner", {}); -	EXPECT_EQ("inner", s.currentCommandName()); -	EXPECT_EQ(&ParserStates::BodyChildren, &s.currentState()); -	s.end(); -	EXPECT_EQ(3, startCount); -	EXPECT_EQ(1, endCount); - -	s.end(); -	EXPECT_EQ(2, endCount); - -	EXPECT_EQ("document", s.currentCommandName()); -	EXPECT_EQ(&ParserStates::Document, &s.currentState()); - -	s.start("body", {}); -	s.data("test3"); -	EXPECT_EQ("body", s.currentCommandName()); -	EXPECT_EQ(&ParserStates::Body, &s.currentState()); -	s.end(); -	EXPECT_EQ(4, startCount); -	EXPECT_EQ(3, dataCount); -	EXPECT_EQ(3, endCount); - -	EXPECT_EQ("document", s.currentCommandName()); -	EXPECT_EQ(&ParserStates::Document, &s.currentState()); - -	s.end(); -	EXPECT_EQ(4, endCount); - -	EXPECT_EQ("", s.currentCommandName()); -	EXPECT_EQ(&ParserStates::None, &s.currentState()); -} - -TEST(ParserStack, errorHandling) -{ -	StandaloneEnvironment env(logger); -	ParserStack s{env.context, ParserStates::TestHandlers}; - -	EXPECT_THROW(s.start("body", {}), OusiaException); -	s.start("document", {}); -	EXPECT_THROW(s.start("document", {}), OusiaException); -	s.start("empty", {}); -	EXPECT_THROW(s.start("body", {}), OusiaException); -	s.start("special", {}); -	s.end(); -	s.end(); -	s.end(); -	EXPECT_EQ(&ParserStates::None, &s.currentState()); -	ASSERT_THROW(s.end(), OusiaException); -	ASSERT_THROW(s.data("test", 1), OusiaException); -} - -TEST(ParserStack, validation) -{ -	StandaloneEnvironment env(logger); -	ParserStack s{env.context, ParserStates::TestHandlers}; - -	logger.reset(); -	s.start("arguments", {}); -	EXPECT_TRUE(logger.hasError()); -	s.end(); - -	s.start("arguments", {{"a", 5}}); -	EXPECT_TRUE(logger.hasError()); -	s.end(); - -	logger.reset(); -	s.start("arguments", {{"a", 5}, {"b", "test"}}); -	EXPECT_FALSE(logger.hasError()); -	s.end(); -} -} - diff --git a/test/core/parser/ParserStateTest.cpp b/test/core/parser/ParserStateTest.cpp deleted file mode 100644 index 91d8dcd..0000000 --- a/test/core/parser/ParserStateTest.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* -    Ousía -    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel - -    This program is free software: you can redistribute it and/or modify -    it under the terms of the GNU General Public License as published by -    the Free Software Foundation, either version 3 of the License, or -    (at your option) any later version. - -    This program is distributed in the hope that it will be useful, -    but WITHOUT ANY WARRANTY; without even the implied warranty of -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -    GNU General Public License for more details. - -    You should have received a copy of the GNU General Public License -    along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <gtest/gtest.h> - -#include <core/common/Rtti.hpp> -#include <core/parser/ParserState.hpp> - -namespace ousia { - -static const Rtti t1; -static const Rtti t2; -static const Rtti t3; -static const Rtti t4; -static const Rtti t5; - -static const ParserState s1 = ParserStateBuilder().createdNodeType(&t1); -static const ParserState s2a = -    ParserStateBuilder().parent(&s1).createdNodeType(&t2); -static const ParserState s2b = -    ParserStateBuilder().parent(&s1).createdNodeType(&t2); -static const ParserState s3 = -    ParserStateBuilder().parents({&s2a, &s1}).createdNodeType(&t3); -static const ParserState s4 = -    ParserStateBuilder().parent(&s3).createdNodeType(&t4); -static const ParserState s5 = -    ParserStateBuilder().parent(&s2b).createdNodeType(&t5); - -TEST(ParserStateDeductor, deduce) -{ -	using Result = std::vector<const ParserState *>; -	using Signature = std::vector<const Rtti *>; -	std::vector<const ParserState *> states{&s1, &s2a, &s2b, &s3, &s4, &s5}; - -	// Should not crash on empty signature -	ASSERT_EQ(Result{}, ParserStateDeductor(Signature{}, states).deduce()); - -	// Try repeating signature elements -	ASSERT_EQ(Result({&s1}), -	          ParserStateDeductor(Signature({&t1}), states).deduce()); -	ASSERT_EQ(Result({&s1}), -	          ParserStateDeductor(Signature({&t1, &t1}), states).deduce()); -	ASSERT_EQ(Result({&s1}), -	          ParserStateDeductor(Signature({&t1, &t1, &t1}), states).deduce()); - -	// Go to another state -	ASSERT_EQ(Result({&s2a, &s2b}), -	          ParserStateDeductor(Signature({&t1, &t1, &t2}), states).deduce()); -	ASSERT_EQ(Result({&s4}), -	          ParserStateDeductor(Signature({&t1, &t3, &t4}), states).deduce()); - -	// Skip one state -	ASSERT_EQ(Result({&s4}), -	          ParserStateDeductor(Signature({&t2, &t4}), states).deduce()); - -	// Impossible signature -	ASSERT_EQ(Result({}), -	          ParserStateDeductor(Signature({&t4, &t5}), states).deduce()); - -} -} - diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp new file mode 100644 index 0000000..321d471 --- /dev/null +++ b/test/core/parser/stack/StackTest.cpp @@ -0,0 +1,666 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <iostream> + +#include <gtest/gtest.h> + +#include <core/frontend/TerminalLogger.hpp> +#include <core/parser/stack/Handler.hpp> +#include <core/parser/stack/Stack.hpp> +#include <core/parser/stack/State.hpp> + +#include <core/StandaloneEnvironment.hpp> + +namespace ousia { +namespace parser_stack { + +// Build an instance of the StandaloneEnvironment used for this unit test +static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; +static StandaloneEnvironment env(logger); + +namespace { + +struct Tracker { +	int startCount; +	int endCount; +	int fieldStartCount; +	int fieldEndCount; +	int annotationStartCount; +	int annotationEndCount; +	int dataCount; + +	Variant::mapType startArgs; +	bool fieldStartIsDefault; +	size_t fieldStartIdx; +	Variant annotationStartClassName; +	Variant::mapType annotationStartArgs; +	Variant annotationEndClassName; +	Variant annotationEndElementName; +	Variant dataData; + +	bool startResult; +	bool fieldStartSetIsDefault; +	bool fieldStartResult; +	bool annotationStartResult; +	bool annotationEndResult; +	bool dataResult; + +	Tracker() { reset(); } + +	void reset() +	{ +		startCount = 0; +		endCount = 0; +		fieldStartCount = 0; +		fieldEndCount = 0; +		annotationStartCount = 0; +		annotationEndCount = 0; +		dataCount = 0; + +		startArgs = Variant::mapType{}; +		fieldStartIsDefault = false; +		fieldStartIdx = 0; +		annotationStartClassName = Variant::fromString(std::string{}); +		annotationStartArgs = Variant::mapType{}; +		annotationEndClassName = Variant::fromString(std::string{}); +		annotationEndElementName = Variant::fromString(std::string{}); +		dataData = Variant::fromString(std::string{}); + +		startResult = true; +		fieldStartSetIsDefault = false; +		fieldStartResult = true; +		annotationStartResult = true; +		annotationEndResult = true; +		dataResult = true; +	} + +	void expect(int startCount, int endCount, int fieldStartCount, +	            int fieldEndCount, int annotationStartCount, +	            int annotationEndCount, int dataCount) +	{ +		EXPECT_EQ(startCount, this->startCount); +		EXPECT_EQ(endCount, this->endCount); +		EXPECT_EQ(fieldStartCount, this->fieldStartCount); +		EXPECT_EQ(fieldEndCount, this->fieldEndCount); +		EXPECT_EQ(annotationStartCount, this->annotationStartCount); +		EXPECT_EQ(annotationEndCount, this->annotationEndCount); +		EXPECT_EQ(dataCount, this->dataCount); +	} +}; + +static Tracker tracker; + +class TestHandler : public Handler { +private: +	TestHandler(const HandlerData &handlerData) : Handler(handlerData) {} + +public: +	bool start(Variant::mapType &args) override +	{ +		tracker.startCount++; +		tracker.startArgs = args; +		if (!tracker.startResult) { +			logger().error( +			    "The TestHandler was told not to allow a field start. So it " +			    "doesn't. The TestHandler always obeys its master."); +		} +		return tracker.startResult; +	} + +	void end() override { tracker.endCount++; } + +	bool fieldStart(bool &isDefault, size_t fieldIdx) override +	{ +		tracker.fieldStartCount++; +		tracker.fieldStartIsDefault = isDefault; +		tracker.fieldStartIdx = fieldIdx; +		if (tracker.fieldStartSetIsDefault) { +			isDefault = true; +		} +		return tracker.fieldStartResult; +	} + +	void fieldEnd() override { tracker.fieldEndCount++; } + +	bool annotationStart(const Variant &className, +	                     Variant::mapType &args) override +	{ +		tracker.annotationStartCount++; +		tracker.annotationStartClassName = className; +		tracker.annotationStartArgs = args; +		return tracker.annotationStartResult; +	} + +	bool annotationEnd(const Variant &className, +	                   const Variant &elementName) override +	{ +		tracker.annotationEndCount++; +		tracker.annotationEndClassName = className; +		tracker.annotationEndElementName = elementName; +		return tracker.annotationEndResult; +	} + +	bool data(Variant &data) override +	{ +		tracker.dataCount++; +		tracker.dataData = data; +		return tracker.dataResult; +	} + +	static Handler *create(const HandlerData &handlerData) +	{ +		return new TestHandler(handlerData); +	} +}; +} + +namespace States { +static const State Document = +    StateBuilder().parent(&None).elementHandler(TestHandler::create); +static const State Body = +    StateBuilder().parent(&Document).elementHandler(TestHandler::create); +static const State Empty = +    StateBuilder().parent(&Document).elementHandler(TestHandler::create); +static const State Special = +    StateBuilder().parent(&All).elementHandler(TestHandler::create); +static const State Arguments = +    StateBuilder().parent(&None).elementHandler(TestHandler::create).arguments( +        {Argument::Int("a"), Argument::String("b")}); +static const State BodyChildren = +    StateBuilder().parent(&Body).elementHandler(TestHandler::create); +static const State Any = +    StateBuilder().parents({&None, &Any}).elementHandler(TestHandler::create); + +static const std::multimap<std::string, const State *> TestHandlers{ +    {"document", &Document}, +    {"body", &Body}, +    {"empty", &Empty}, +    {"special", &Special}, +    {"arguments", &Arguments}, +    {"*", &BodyChildren}}; + +static const std::multimap<std::string, const State *> AnyHandlers{{"*", &Any}}; +} + +TEST(Stack, basicTest) +{ +	tracker.reset(); +	logger.reset(); +	{ +		Stack s{env.context, States::TestHandlers}; + +		EXPECT_EQ("", s.currentCommandName()); +		EXPECT_EQ(&States::None, &s.currentState()); + +		s.command("document", {}); +		s.fieldStart(true); +		s.data("test1"); + +		EXPECT_EQ("document", s.currentCommandName()); +		EXPECT_EQ(&States::Document, &s.currentState()); +		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc + +		s.command("body", {}); +		s.fieldStart(true); +		s.data("test2"); +		EXPECT_EQ("body", s.currentCommandName()); +		EXPECT_EQ(&States::Body, &s.currentState()); +		tracker.expect(2, 0, 2, 0, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc + +		s.command("inner", {}); +		s.fieldStart(true); +		EXPECT_EQ("inner", s.currentCommandName()); +		EXPECT_EQ(&States::BodyChildren, &s.currentState()); + +		s.fieldEnd(); +		tracker.expect(3, 1, 3, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc + +		s.fieldEnd(); +		EXPECT_EQ("document", s.currentCommandName()); +		EXPECT_EQ(&States::Document, &s.currentState()); +		tracker.expect(3, 2, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc + +		s.command("body", {}); +		s.fieldStart(true); +		s.data("test3"); +		EXPECT_EQ("body", s.currentCommandName()); +		EXPECT_EQ(&States::Body, &s.currentState()); +		s.fieldEnd(); +		tracker.expect(4, 3, 4, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc + +		EXPECT_EQ("document", s.currentCommandName()); +		EXPECT_EQ(&States::Document, &s.currentState()); + +		s.fieldEnd(); +		tracker.expect(4, 4, 4, 4, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc + +		EXPECT_EQ("", s.currentCommandName()); +		EXPECT_EQ(&States::None, &s.currentState()); +	} +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, errorInvalidCommands) +{ +	Stack s{env.context, States::TestHandlers}; +	tracker.reset(); +	EXPECT_THROW(s.command("body", {}), LoggableException); +	s.command("document", {}); +	s.fieldStart(true); +	EXPECT_THROW(s.command("document", {}), LoggableException); +	s.command("empty", {}); +	s.fieldStart(true); +	EXPECT_THROW(s.command("body", {}), LoggableException); +	s.command("special", {}); +	s.fieldStart(true); +	s.fieldEnd(); +	s.fieldEnd(); +	s.fieldEnd(); +	EXPECT_EQ(&States::None, &s.currentState()); +	ASSERT_THROW(s.fieldEnd(), LoggableException); +	ASSERT_THROW(s.data("test"), LoggableException); +} + +TEST(Stack, validation) +{ +	Stack s{env.context, States::TestHandlers}; +	tracker.reset(); +	logger.reset(); + +	s.command("arguments", {}); +	EXPECT_TRUE(logger.hasError()); +	s.fieldStart(true); +	s.fieldEnd(); + +	logger.reset(); +	s.command("arguments", {{"a", 5}}); +	EXPECT_TRUE(logger.hasError()); +	s.fieldStart(true); +	s.fieldEnd(); + +	logger.reset(); +	s.command("arguments", {{"a", 5}, {"b", "test"}}); +	EXPECT_FALSE(logger.hasError()); +	s.fieldStart(true); +	s.fieldEnd(); +} + +TEST(Stack, invalidCommandName) +{ +	Stack s{env.context, States::AnyHandlers}; +	tracker.reset(); +	logger.reset(); + +	s.command("a", {}); +	s.fieldStart(true); +	s.fieldEnd(); +	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc + +	s.command("a_", {}); +	s.fieldStart(true); +	s.fieldEnd(); +	tracker.expect(2, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc + +	s.command("a_:b", {}); +	s.fieldStart(true); +	s.fieldEnd(); +	tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc + +	ASSERT_THROW(s.command("_a", {}), LoggableException); +	ASSERT_THROW(s.command("a:", {}), LoggableException); +	ASSERT_THROW(s.command("a:_b", {}), LoggableException); +	tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, multipleFields) +{ +	tracker.reset(); +	logger.reset(); +	{ +		Stack s{env.context, States::AnyHandlers}; + +		s.command("a", {{"a", false}}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		EXPECT_EQ("a", s.currentCommandName()); +		EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startArgs); + +		s.fieldStart(false); +		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		EXPECT_FALSE(tracker.fieldStartIsDefault); +		EXPECT_EQ(0U, tracker.fieldStartIdx); + +		s.data("test"); +		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc +		EXPECT_EQ("test", tracker.dataData); + +		s.fieldEnd(); +		tracker.expect(1, 0, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc + +		s.fieldStart(false); +		tracker.expect(1, 0, 2, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc +		EXPECT_FALSE(tracker.fieldStartIsDefault); +		EXPECT_EQ(1U, tracker.fieldStartIdx); + +		s.data("test2"); +		tracker.expect(1, 0, 2, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc +		EXPECT_EQ("test2", tracker.dataData); + +		s.fieldEnd(); +		tracker.expect(1, 0, 2, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc + +		s.fieldStart(true); +		tracker.expect(1, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc +		EXPECT_TRUE(tracker.fieldStartIsDefault); +		EXPECT_EQ(2U, tracker.fieldStartIdx); + +		s.data("test3"); +		tracker.expect(1, 0, 3, 2, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc +		EXPECT_EQ("test3", tracker.dataData); + +		s.fieldEnd(); +		tracker.expect(1, 1, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc +	} +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, implicitDefaultFieldOnNewCommand) +{ +	tracker.reset(); +	logger.reset(); +	{ +		Stack s{env.context, States::AnyHandlers}; + +		s.command("a", {}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc + +		s.command("b", {}); +		tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	} +	tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, implicitDefaultFieldOnNewCommandWithExplicitDefaultField) +{ +	tracker.reset(); +	logger.reset(); +	{ +		Stack s{env.context, States::AnyHandlers}; + +		s.command("a", {}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("a", s.currentCommandName()); + +		s.command("b", {}); +		tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("b", s.currentCommandName()); +		s.fieldStart(true); +		s.fieldEnd(); +		tracker.expect(2, 1, 2, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("a", s.currentCommandName()); +	} +	tracker.expect(2, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, noImplicitDefaultFieldOnIncompatibleCommand) +{ +	tracker.reset(); +	logger.reset(); +	{ +		Stack s{env.context, States::AnyHandlers}; + +		s.command("a", {}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("a", s.currentCommandName()); + +		tracker.fieldStartResult = false; +		s.command("b", {}); +		tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("b", s.currentCommandName()); +	} +	tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, noImplicitDefaultFieldIfDefaultFieldGiven) +{ +	tracker.reset(); +	logger.reset(); +	{ +		Stack s{env.context, States::AnyHandlers}; + +		s.command("a", {}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("a", s.currentCommandName()); +		s.fieldStart(true); +		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("a", s.currentCommandName()); +		s.fieldEnd(); +		tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("", s.currentCommandName()); + +		s.command("b", {}); +		tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("b", s.currentCommandName()); +	} +	tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, noEndIfStartFails) +{ +	tracker.reset(); +	logger.reset(); +	{ +		Stack s{env.context, States::AnyHandlers}; + +		s.command("a", {}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("a", s.currentCommandName()); + +		tracker.startResult = false; +		s.command("b", {}); +		tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		ASSERT_EQ("b", s.currentCommandName()); +	} +	tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_TRUE(logger.hasError()); +} + +TEST(Stack, implicitDefaultFieldOnData) +{ +	tracker.reset(); +	logger.reset(); +	{ +		Stack s{env.context, States::AnyHandlers}; + +		s.command("a", {}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc + +		s.data("test"); +		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc +	} +	tracker.expect(1, 1, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, autoFieldEnd) +{ +	tracker.reset(); +	logger.reset(); + +	{ +		Stack s{env.context, States::AnyHandlers}; +		s.command("a", {}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	} +	tracker.expect(1, 1, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, autoImplicitFieldEnd) +{ +	tracker.reset(); +	logger.reset(); + +	{ +		Stack s{env.context, States::AnyHandlers}; +		s.command("a", {}); +		s.command("b", {}); +		s.command("c", {}); +		s.command("d", {}); +		s.command("e", {}); +		s.fieldStart(true); +		s.fieldEnd(); +		tracker.expect(5, 1, 5, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	} +	tracker.expect(5, 5, 5, 5, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, invalidDefaultField) +{ +	tracker.reset(); +	logger.reset(); + +	{ +		Stack s{env.context, States::AnyHandlers}; +		s.command("a", {}); +		tracker.fieldStartResult = false; +		s.fieldStart(true); +		s.fieldEnd(); +		tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	} +	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, errorInvalidDefaultFieldData) +{ +	tracker.reset(); +	logger.reset(); + +	{ +		Stack s{env.context, States::AnyHandlers}; +		s.command("a", {}); +		tracker.fieldStartResult = false; +		s.fieldStart(true); +		ASSERT_FALSE(logger.hasError()); +		s.data("test"); +		ASSERT_TRUE(logger.hasError()); +		s.fieldEnd(); +		tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	} +	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorInvalidFieldData) +{ +	tracker.reset(); +	logger.reset(); + +	{ +		Stack s{env.context, States::AnyHandlers}; +		s.command("a", {}); +		tracker.fieldStartResult = false; +		ASSERT_FALSE(logger.hasError()); +		s.fieldStart(false); +		ASSERT_TRUE(logger.hasError()); +		s.data("test"); +		s.fieldEnd(); +		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	} +	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorFieldStartNoCommand) +{ +	tracker.reset(); +	logger.reset(); + +	Stack s{env.context, States::AnyHandlers}; +	ASSERT_THROW(s.fieldStart(false), LoggableException); +	ASSERT_THROW(s.fieldStart(true), LoggableException); +	tracker.expect(0, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorMutlipleFieldStarts) +{ +	tracker.reset(); +	logger.reset(); + +	{ +		Stack s{env.context, States::AnyHandlers}; +		s.command("a", {}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc + +		s.fieldStart(false); +		ASSERT_FALSE(logger.hasError()); +		s.fieldStart(false); +		ASSERT_TRUE(logger.hasError()); +		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc + +		s.fieldEnd(); +		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	} +	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorMutlipleFieldEnds) +{ +	tracker.reset(); +	logger.reset(); + +	{ +		Stack s{env.context, States::AnyHandlers}; +		s.command("a", {}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc + +		s.fieldStart(false); +		s.fieldEnd(); +		ASSERT_FALSE(logger.hasError()); +		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +		s.fieldEnd(); +		ASSERT_TRUE(logger.hasError()); +		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +	} +	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorOpenField) +{ +	tracker.reset(); +	logger.reset(); + +	{ +		Stack s{env.context, States::AnyHandlers}; +		s.command("a", {}); +		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc + +		s.fieldStart(false); +		ASSERT_FALSE(logger.hasError()); +	} +	ASSERT_TRUE(logger.hasError()); +	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc +} +} +} + diff --git a/test/core/parser/stack/StateTest.cpp b/test/core/parser/stack/StateTest.cpp new file mode 100644 index 0000000..e503d30 --- /dev/null +++ b/test/core/parser/stack/StateTest.cpp @@ -0,0 +1,79 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <gtest/gtest.h> + +#include <core/common/Rtti.hpp> +#include <core/parser/stack/State.hpp> + +namespace ousia { +namespace parser_stack { + +static const Rtti t1; +static const Rtti t2; +static const Rtti t3; +static const Rtti t4; +static const Rtti t5; + +static const State s1 = StateBuilder().createdNodeType(&t1); +static const State s2a = +    StateBuilder().parent(&s1).createdNodeType(&t2); +static const State s2b = +    StateBuilder().parent(&s1).createdNodeType(&t2); +static const State s3 = +    StateBuilder().parents({&s2a, &s1}).createdNodeType(&t3); +static const State s4 = +    StateBuilder().parent(&s3).createdNodeType(&t4); +static const State s5 = +    StateBuilder().parent(&s2b).createdNodeType(&t5); + +TEST(StateDeductor, deduce) +{ +	using Result = std::vector<const State *>; +	using Signature = std::vector<const Rtti *>; +	std::vector<const State *> states{&s1, &s2a, &s2b, &s3, &s4, &s5}; + +	// Should not crash on empty signature +	ASSERT_EQ(Result{}, StateDeductor(Signature{}, states).deduce()); + +	// Try repeating signature elements +	ASSERT_EQ(Result({&s1}), +	          StateDeductor(Signature({&t1}), states).deduce()); +	ASSERT_EQ(Result({&s1}), +	          StateDeductor(Signature({&t1, &t1}), states).deduce()); +	ASSERT_EQ(Result({&s1}), +	          StateDeductor(Signature({&t1, &t1, &t1}), states).deduce()); + +	// Go to another state +	ASSERT_EQ(Result({&s2a, &s2b}), +	          StateDeductor(Signature({&t1, &t1, &t2}), states).deduce()); +	ASSERT_EQ(Result({&s4}), +	          StateDeductor(Signature({&t1, &t3, &t4}), states).deduce()); + +	// Skip one state +	ASSERT_EQ(Result({&s4}), +	          StateDeductor(Signature({&t2, &t4}), states).deduce()); + +	// Impossible signature +	ASSERT_EQ(Result({}), +	          StateDeductor(Signature({&t4, &t5}), states).deduce()); + +} +} +} + diff --git a/test/formats/osdm/TokenTrieTest.cpp b/test/core/parser/utils/TokenTrieTest.cpp index aacd6c0..087e6e6 100644 --- a/test/formats/osdm/TokenTrieTest.cpp +++ b/test/core/parser/utils/TokenTrieTest.cpp @@ -18,7 +18,7 @@  #include <gtest/gtest.h> -#include <formats/osdm/TokenTrie.hpp> +#include <core/parser/utils/TokenTrie.hpp>  namespace ousia { diff --git a/test/formats/osdm/DynamicTokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp index c1f8785..8565057 100644 --- a/test/formats/osdm/DynamicTokenizerTest.cpp +++ b/test/core/parser/utils/TokenizerTest.cpp @@ -19,13 +19,13 @@  #include <gtest/gtest.h>  #include <core/common/CharReader.hpp> -#include <formats/osdm/DynamicTokenizer.hpp> +#include <core/parser/utils/Tokenizer.hpp>  namespace ousia { -TEST(DynamicTokenizer, tokenRegistration) +TEST(Tokenizer, tokenRegistration)  { -	DynamicTokenizer tokenizer; +	Tokenizer tokenizer;  	ASSERT_EQ(EmptyToken, tokenizer.registerToken("")); @@ -50,15 +50,15 @@ TEST(DynamicTokenizer, tokenRegistration)  	ASSERT_EQ("d", tokenizer.getTokenString(1U));  } -TEST(DynamicTokenizer, textTokenPreserveWhitespace) +TEST(Tokenizer, textTokenPreserveWhitespace)  {  	{  		CharReader reader{" this \t is only a  \n\n test   text   "};  		//                 012345 6789012345678 9 0123456789012345  		//                 0          1           2         3 -		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; +		Tokenizer tokenizer{WhitespaceMode::PRESERVE}; -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(TextToken, token.type);  		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content); @@ -74,9 +74,9 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace)  		CharReader reader{"this \t is only a  \n\n test   text"};  		//                 01234 5678901234567 8 9012345678901  		//                 0          1           2         3 -		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; +		Tokenizer tokenizer{WhitespaceMode::PRESERVE}; -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(TextToken, token.type);  		ASSERT_EQ("this \t is only a  \n\n test   text", token.content); @@ -89,15 +89,15 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace)  	}  } -TEST(DynamicTokenizer, textTokenTrimWhitespace) +TEST(Tokenizer, textTokenTrimWhitespace)  {  	{  		CharReader reader{" this \t is only a  \n\n test   text   "};  		//                 012345 6789012345678 9 0123456789012345  		//                 0          1           2         3 -		DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; +		Tokenizer tokenizer{WhitespaceMode::TRIM}; -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(TextToken, token.type);  		ASSERT_EQ("this \t is only a  \n\n test   text", token.content); @@ -113,9 +113,9 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace)  		CharReader reader{"this \t is only a  \n\n test   text"};  		//                 01234 5678901234567 8 9012345678901  		//                 0          1           2         3 -		DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; +		Tokenizer tokenizer{WhitespaceMode::TRIM}; -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(TextToken, token.type);  		ASSERT_EQ("this \t is only a  \n\n test   text", token.content); @@ -128,15 +128,15 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace)  	}  } -TEST(DynamicTokenizer, textTokenCollapseWhitespace) +TEST(Tokenizer, textTokenCollapseWhitespace)  {  	{  		CharReader reader{" this \t is only a  \n\n test   text   "};  		//                 012345 6789012345678 9 0123456789012345  		//                 0          1           2         3 -		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; +		Tokenizer tokenizer{WhitespaceMode::COLLAPSE}; -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(TextToken, token.type);  		ASSERT_EQ("this is only a test text", token.content); @@ -152,9 +152,9 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace)  		CharReader reader{"this \t is only a  \n\n test   text"};  		//                 01234 5678901234567 8 9012345678901  		//                 0          1           2         3 -		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; +		Tokenizer tokenizer{WhitespaceMode::COLLAPSE}; -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(TextToken, token.type);  		ASSERT_EQ("this is only a test text", token.content); @@ -167,16 +167,16 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace)  	}  } -TEST(DynamicTokenizer, simpleReadToken) +TEST(Tokenizer, simpleReadToken)  {  	CharReader reader{"test1:test2"}; -	DynamicTokenizer tokenizer; +	Tokenizer tokenizer;  	const TokenTypeId tid = tokenizer.registerToken(":");  	ASSERT_EQ(0U, tid);  	{ -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(TextToken, token.type); @@ -192,7 +192,7 @@ TEST(DynamicTokenizer, simpleReadToken)  	}  	{ -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(tid, token.type); @@ -208,7 +208,7 @@ TEST(DynamicTokenizer, simpleReadToken)  	}  	{ -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(TextToken, token.type); @@ -223,16 +223,16 @@ TEST(DynamicTokenizer, simpleReadToken)  	}  } -TEST(DynamicTokenizer, simplePeekToken) +TEST(Tokenizer, simplePeekToken)  {  	CharReader reader{"test1:test2"}; -	DynamicTokenizer tokenizer; +	Tokenizer tokenizer;  	const TokenTypeId tid = tokenizer.registerToken(":");  	ASSERT_EQ(0U, tid);  	{ -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.peek(reader, token));  		ASSERT_EQ(TextToken, token.type); @@ -246,7 +246,7 @@ TEST(DynamicTokenizer, simplePeekToken)  	}  	{ -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.peek(reader, token));  		ASSERT_EQ(tid, token.type); @@ -260,7 +260,7 @@ TEST(DynamicTokenizer, simplePeekToken)  	}  	{ -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.peek(reader, token));  		ASSERT_EQ(TextToken, token.type); @@ -274,7 +274,7 @@ TEST(DynamicTokenizer, simplePeekToken)  	}  	{ -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(TextToken, token.type); @@ -288,7 +288,7 @@ TEST(DynamicTokenizer, simplePeekToken)  	}  	{ -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(tid, token.type); @@ -302,7 +302,7 @@ TEST(DynamicTokenizer, simplePeekToken)  	}  	{ -		DynamicToken token; +		Token token;  		ASSERT_TRUE(tokenizer.read(reader, token));  		ASSERT_EQ(TextToken, token.type); @@ -316,10 +316,10 @@ TEST(DynamicTokenizer, simplePeekToken)  	}  } -TEST(DynamicTokenizer, ambiguousTokens) +TEST(Tokenizer, ambiguousTokens)  {  	CharReader reader{"abc"}; -	DynamicTokenizer tokenizer; +	Tokenizer tokenizer;  	TokenTypeId t1 = tokenizer.registerToken("abd");  	TokenTypeId t2 = tokenizer.registerToken("bc"); @@ -327,7 +327,7 @@ TEST(DynamicTokenizer, ambiguousTokens)  	ASSERT_EQ(0U, t1);  	ASSERT_EQ(1U, t2); -	DynamicToken token; +	Token token;  	ASSERT_TRUE(tokenizer.read(reader, token));  	ASSERT_EQ(TextToken, token.type); @@ -349,18 +349,18 @@ TEST(DynamicTokenizer, ambiguousTokens)  	ASSERT_FALSE(tokenizer.read(reader, token));  } -TEST(DynamicTokenizer, commentTestWhitespacePreserve) +TEST(Tokenizer, commentTestWhitespacePreserve)  {  	CharReader reader{"Test/Test /* Block Comment */", 0};  	//                 012345678901234567890123456789  	//                 0        1         2 -	DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE); +	Tokenizer tokenizer(WhitespaceMode::PRESERVE);  	const TokenTypeId t1 = tokenizer.registerToken("/");  	const TokenTypeId t2 = tokenizer.registerToken("/*");  	const TokenTypeId t3 = tokenizer.registerToken("*/"); -	std::vector<DynamicToken> expected = { +	std::vector<Token> expected = {  	    {TextToken, "Test", SourceLocation{0, 0, 4}},  	    {t1, "/", SourceLocation{0, 4, 5}},  	    {TextToken, "Test ", SourceLocation{0, 5, 10}}, @@ -368,7 +368,7 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve)  	    {TextToken, " Block Comment ", SourceLocation{0, 12, 27}},  	    {t3, "*/", SourceLocation{0, 27, 29}}}; -	DynamicToken t; +	Token t;  	for (auto &te : expected) {  		EXPECT_TRUE(tokenizer.read(reader, t));  		EXPECT_EQ(te.type, t.type); @@ -380,18 +380,18 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve)  	ASSERT_FALSE(tokenizer.read(reader, t));  } -TEST(DynamicTokenizer, commentTestWhitespaceCollapse) +TEST(Tokenizer, commentTestWhitespaceCollapse)  {  	CharReader reader{"Test/Test /* Block Comment */", 0};  	//                 012345678901234567890123456789  	//                 0        1         2 -	DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE); +	Tokenizer tokenizer(WhitespaceMode::COLLAPSE);  	const TokenTypeId t1 = tokenizer.registerToken("/");  	const TokenTypeId t2 = tokenizer.registerToken("/*");  	const TokenTypeId t3 = tokenizer.registerToken("*/"); -	std::vector<DynamicToken> expected = { +	std::vector<Token> expected = {  	    {TextToken, "Test", SourceLocation{0, 0, 4}},  	    {t1, "/", SourceLocation{0, 4, 5}},  	    {TextToken, "Test", SourceLocation{0, 5, 9}}, @@ -399,7 +399,7 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse)  	    {TextToken, "Block Comment", SourceLocation{0, 13, 26}},  	    {t3, "*/", SourceLocation{0, 27, 29}}}; -	DynamicToken t; +	Token t;  	for (auto &te : expected) {  		EXPECT_TRUE(tokenizer.read(reader, t));  		EXPECT_EQ(te.type, t.type); @@ -410,6 +410,5 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse)  	}  	ASSERT_FALSE(tokenizer.read(reader, t));  } -  } diff --git a/test/formats/osdm/OsdmStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index 46f4cf6..d52fa5b 100644 --- a/test/formats/osdm/OsdmStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -23,95 +23,205 @@  #include <core/common/CharReader.hpp>  #include <core/frontend/TerminalLogger.hpp> -#include <formats/osdm/OsdmStreamParser.hpp> +#include <formats/osml/OsmlStreamParser.hpp>  namespace ousia {  static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; -TEST(OsdmStreamParser, empty) +static void assertCommand(OsmlStreamParser &reader, const std::string &name, +                          SourceOffset start = InvalidSourceOffset, +                          SourceOffset end = InvalidSourceOffset) +{ +	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); +	EXPECT_EQ(name, reader.getCommandName().asString()); +	if (start != InvalidSourceOffset) { +		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); +		EXPECT_EQ(start, reader.getLocation().getStart()); +	} +	if (end != InvalidSourceOffset) { +		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); +		EXPECT_EQ(end, reader.getLocation().getEnd()); +	} +} + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, +                          const Variant::mapType &args, +                          SourceOffset start = InvalidSourceOffset, +                          SourceOffset end = InvalidSourceOffset) +{ +	assertCommand(reader, name, start, end); +	EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertData(OsmlStreamParser &reader, const std::string &data, +                       SourceOffset start = InvalidSourceOffset, +                       SourceOffset end = InvalidSourceOffset) +{ +	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); +	EXPECT_EQ(data, reader.getData().asString()); +	if (start != InvalidSourceOffset) { +		EXPECT_EQ(start, reader.getData().getLocation().getStart()); +		EXPECT_EQ(start, reader.getLocation().getStart()); +	} +	if (end != InvalidSourceOffset) { +		EXPECT_EQ(end, reader.getData().getLocation().getEnd()); +		EXPECT_EQ(end, reader.getLocation().getEnd()); +	} +} + +static void assertFieldStart(OsmlStreamParser &reader, bool defaultField, +                             SourceOffset start = InvalidSourceOffset, +                             SourceOffset end = InvalidSourceOffset) +{ +	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); +	EXPECT_EQ(defaultField, reader.inDefaultField()); +	if (start != InvalidSourceOffset) { +		EXPECT_EQ(start, reader.getLocation().getStart()); +	} +	if (end != InvalidSourceOffset) { +		EXPECT_EQ(end, reader.getLocation().getEnd()); +	} +} + +static void assertFieldEnd(OsmlStreamParser &reader, +                           SourceOffset start = InvalidSourceOffset, +                           SourceOffset end = InvalidSourceOffset) +{ +	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); +	if (start != InvalidSourceOffset) { +		EXPECT_EQ(start, reader.getLocation().getStart()); +	} +	if (end != InvalidSourceOffset) { +		EXPECT_EQ(end, reader.getLocation().getEnd()); +	} +} + +static void assertAnnotationStart(OsmlStreamParser &reader, +                                  const std::string &name, +                                  SourceOffset start = InvalidSourceOffset, +                                  SourceOffset end = InvalidSourceOffset) +{ +	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse()); +	EXPECT_EQ(name, reader.getCommandName().asString()); +	if (start != InvalidSourceOffset) { +		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); +		EXPECT_EQ(start, reader.getLocation().getStart()); +	} +	if (end != InvalidSourceOffset) { +		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); +		EXPECT_EQ(end, reader.getLocation().getEnd()); +	} +} + +static void assertAnnotationStart(OsmlStreamParser &reader, +                                  const std::string &name, +                                  const Variant::mapType &args, +                                  SourceOffset start = InvalidSourceOffset, +                                  SourceOffset end = InvalidSourceOffset) +{ +	assertAnnotationStart(reader, name, start, end); +	EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertAnnotationEnd(OsmlStreamParser &reader, +                                const std::string &name, +                                const std::string &elementName, +                                SourceOffset start = InvalidSourceOffset, +                                SourceOffset end = InvalidSourceOffset) +{ +	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse()); +	ASSERT_EQ(name, reader.getCommandName().asString()); +	if (!elementName.empty()) { +		ASSERT_EQ(1U, reader.getCommandArguments().asMap().size()); +		ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name")); + +		auto it = reader.getCommandArguments().asMap().find("name"); +		ASSERT_EQ(elementName, it->second.asString()); +	} +	if (start != InvalidSourceOffset) { +		EXPECT_EQ(start, reader.getLocation().getStart()); +	} +	if (end != InvalidSourceOffset) { +		EXPECT_EQ(end, reader.getLocation().getEnd()); +	} +} + +static void assertEnd(OsmlStreamParser &reader, +                      SourceOffset start = InvalidSourceOffset, +                      SourceOffset end = InvalidSourceOffset) +{ +	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); +	if (start != InvalidSourceOffset) { +		EXPECT_EQ(start, reader.getLocation().getStart()); +	} +	if (end != InvalidSourceOffset) { +		EXPECT_EQ(end, reader.getLocation().getEnd()); +	} +} + +TEST(OsmlStreamParser, empty)  {  	const char *testString = "";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger); -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());  } -TEST(OsdmStreamParser, oneCharacter) +TEST(OsmlStreamParser, oneCharacter)  {  	const char *testString = "a";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); - -	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); -	ASSERT_EQ("a", reader.getData().asString()); +	OsmlStreamParser reader(charReader, logger); -	SourceLocation loc = reader.getData().getLocation(); -	ASSERT_EQ(0U, loc.getStart()); -	ASSERT_EQ(1U, loc.getEnd()); +	assertData(reader, "a", 0, 1);  } -TEST(OsdmStreamParser, whitespaceElimination) +TEST(OsmlStreamParser, whitespaceElimination)  {  	const char *testString = " hello \t world ";  	//                        0123456 78901234  	//                        0          1  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); - -	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); -	ASSERT_EQ("hello world", reader.getData().asString()); +	OsmlStreamParser reader(charReader, logger); -	SourceLocation loc = reader.getData().getLocation(); -	ASSERT_EQ(1U, loc.getStart()); -	ASSERT_EQ(14U, loc.getEnd()); +	assertData(reader, "hello world", 1, 14);  } -TEST(OsdmStreamParser, whitespaceEliminationWithLinebreak) +TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak)  {  	const char *testString = " hello \n world ";  	//                        0123456 78901234  	//                        0          1  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); - -	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); -	ASSERT_EQ("hello world", reader.getData().asString()); +	OsmlStreamParser reader(charReader, logger); -	SourceLocation loc = reader.getData().getLocation(); -	ASSERT_EQ(1U, loc.getStart()); -	ASSERT_EQ(14U, loc.getEnd()); -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	assertData(reader, "hello world", 1, 14);  } -TEST(OsdmStreamParser, escapeWhitespace) +TEST(OsmlStreamParser, escapeWhitespace)  {  	const char *testString = " hello\\ \\ world ";  	//                        012345 67 89012345  	//                        0           1  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger); -	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); -	ASSERT_EQ("hello  world", reader.getData().asString()); - -	SourceLocation loc = reader.getData().getLocation(); -	ASSERT_EQ(1U, loc.getStart()); -	ASSERT_EQ(15U, loc.getEnd()); -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	assertData(reader, "hello  world", 1, 15);  }  static void testEscapeSpecialCharacter(const std::string &c)  {  	CharReader charReader(std::string("\\") + c); -	OsdmStreamParser reader(charReader, logger); -	EXPECT_EQ(OsdmStreamParser::State::DATA, reader.parse()); +	OsmlStreamParser reader(charReader, logger); +	EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse());  	EXPECT_EQ(c, reader.getData().asString());  	SourceLocation loc = reader.getData().getLocation(); @@ -119,32 +229,30 @@ static void testEscapeSpecialCharacter(const std::string &c)  	EXPECT_EQ(1U + c.size(), loc.getEnd());  } -TEST(OsdmStreamParser, escapeSpecialCharacters) +TEST(OsmlStreamParser, escapeSpecialCharacters)  {  	testEscapeSpecialCharacter("\\");  	testEscapeSpecialCharacter("{");  	testEscapeSpecialCharacter("}"); -	testEscapeSpecialCharacter("<"); -	testEscapeSpecialCharacter(">");  } -TEST(OsdmStreamParser, simpleSingleLineComment) +TEST(OsmlStreamParser, simpleSingleLineComment)  {  	const char *testString = "% This is a single line comment";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	OsmlStreamParser reader(charReader, logger); +	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());  } -TEST(OsdmStreamParser, singleLineComment) +TEST(OsmlStreamParser, singleLineComment)  {  	const char *testString = "a% This is a single line comment\nb";  	//                        01234567890123456789012345678901 23  	//                        0         1         2         3  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	{ -		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); +		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());  		ASSERT_EQ("a", reader.getData().asString());  		SourceLocation loc = reader.getData().getLocation();  		ASSERT_EQ(0U, loc.getStart()); @@ -152,25 +260,25 @@ TEST(OsdmStreamParser, singleLineComment)  	}  	{ -		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); +		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());  		ASSERT_EQ("b", reader.getData().asString());  		SourceLocation loc = reader.getData().getLocation();  		ASSERT_EQ(33U, loc.getStart());  		ASSERT_EQ(34U, loc.getEnd());  	} -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());  } -TEST(OsdmStreamParser, multilineComment) +TEST(OsmlStreamParser, multilineComment)  {  	const char *testString = "a%{ This is a\n\n multiline line comment}%b";  	//                        0123456789012 3 456789012345678901234567890  	//                        0         1           2         3         4  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	{ -		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); +		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());  		ASSERT_EQ("a", reader.getData().asString());  		SourceLocation loc = reader.getData().getLocation();  		ASSERT_EQ(0U, loc.getStart()); @@ -178,25 +286,25 @@ TEST(OsdmStreamParser, multilineComment)  	}  	{ -		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); +		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());  		ASSERT_EQ("b", reader.getData().asString());  		SourceLocation loc = reader.getData().getLocation();  		ASSERT_EQ(40U, loc.getStart());  		ASSERT_EQ(41U, loc.getEnd());  	} -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());  } -TEST(OsdmStreamParser, nestedMultilineComment) +TEST(OsmlStreamParser, nestedMultilineComment)  {  	const char *testString = "a%{%{Another\n\n}%multiline line comment}%b";  	//                        0123456789012 3 456789012345678901234567890  	//                        0         1           2         3         4  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	{ -		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); +		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());  		ASSERT_EQ("a", reader.getData().asString());  		SourceLocation loc = reader.getData().getLocation();  		ASSERT_EQ(0U, loc.getStart()); @@ -204,23 +312,23 @@ TEST(OsdmStreamParser, nestedMultilineComment)  	}  	{ -		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); +		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());  		ASSERT_EQ("b", reader.getData().asString());  		SourceLocation loc = reader.getData().getLocation();  		ASSERT_EQ(40U, loc.getStart());  		ASSERT_EQ(41U, loc.getEnd());  	} -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());  } -TEST(OsdmStreamParser, simpleCommand) +TEST(OsmlStreamParser, simpleCommand)  {  	const char *testString = "\\test";  	//                        0 12345  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); -	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); +	OsmlStreamParser reader(charReader, logger); +	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());  	Variant commandName = reader.getCommandName();  	ASSERT_EQ("test", commandName.asString()); @@ -230,16 +338,16 @@ TEST(OsdmStreamParser, simpleCommand)  	ASSERT_EQ(5U, loc.getEnd());  	ASSERT_EQ(0U, reader.getCommandArguments().asMap().size()); -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());  } -TEST(OsdmStreamParser, simpleCommandWithName) +TEST(OsmlStreamParser, simpleCommandWithName)  {  	const char *testString = "\\test#bla";  	//                        0 12345678  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); -	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); +	OsmlStreamParser reader(charReader, logger); +	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());  	Variant commandName = reader.getCommandName();  	ASSERT_EQ("test", commandName.asString()); @@ -257,17 +365,17 @@ TEST(OsdmStreamParser, simpleCommandWithName)  	ASSERT_EQ(5U, loc.getStart());  	ASSERT_EQ(9U, loc.getEnd()); -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());  } -TEST(OsdmStreamParser, simpleCommandWithArguments) +TEST(OsmlStreamParser, simpleCommandWithArguments)  {  	const char *testString = "\\test[a=1,b=2,c=\"test\"]";  	//                        0 123456789012345 678901 2  	//                        0          1          2  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); -	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); +	OsmlStreamParser reader(charReader, logger); +	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());  	Variant commandName = reader.getCommandName();  	ASSERT_EQ("test", commandName.asString()); @@ -297,17 +405,17 @@ TEST(OsdmStreamParser, simpleCommandWithArguments)  	ASSERT_EQ(16U, loc.getStart());  	ASSERT_EQ(22U, loc.getEnd()); -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());  } -TEST(OsdmStreamParser, simpleCommandWithArgumentsAndName) +TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName)  {  	const char *testString = "\\test#bla[a=1,b=2,c=\"test\"]";  	//                        0 1234567890123456789 01234 56  	//                        0          1          2  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); -	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); +	OsmlStreamParser reader(charReader, logger); +	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());  	Variant commandName = reader.getCommandName();  	ASSERT_EQ("test", commandName.asString()); @@ -343,126 +451,46 @@ TEST(OsdmStreamParser, simpleCommandWithArgumentsAndName)  	ASSERT_EQ(5U, loc.getStart());  	ASSERT_EQ(9U, loc.getEnd()); -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); +	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());  } -static void assertCommand(OsdmStreamParser &reader, const std::string &name, -                          SourceOffset start = InvalidSourceOffset, -                          SourceOffset end = InvalidSourceOffset) -{ -	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); -	EXPECT_EQ(name, reader.getCommandName().asString()); -	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); -		EXPECT_EQ(start, reader.getLocation().getStart()); -	} -	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); -		EXPECT_EQ(end, reader.getLocation().getEnd()); -	} -} - -static void assertCommand(OsdmStreamParser &reader, const std::string &name, -                          const Variant::mapType &args, -                          SourceOffset start = InvalidSourceOffset, -                          SourceOffset end = InvalidSourceOffset) -{ -	assertCommand(reader, name, start, end); -	EXPECT_EQ(args, reader.getCommandArguments()); -} - -static void assertData(OsdmStreamParser &reader, const std::string &data, -                       SourceOffset start = InvalidSourceOffset, -                       SourceOffset end = InvalidSourceOffset) -{ -	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); -	EXPECT_EQ(data, reader.getData().asString()); -	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getData().getLocation().getStart()); -		EXPECT_EQ(start, reader.getLocation().getStart()); -	} -	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getData().getLocation().getEnd()); -		EXPECT_EQ(end, reader.getLocation().getEnd()); -	} -} - -static void assertFieldStart(OsdmStreamParser &reader, -                             SourceOffset start = InvalidSourceOffset, -                             SourceOffset end = InvalidSourceOffset) -{ -	ASSERT_EQ(OsdmStreamParser::State::FIELD_START, reader.parse()); -	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getLocation().getStart()); -	} -	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getLocation().getEnd()); -	} -} - -static void assertFieldEnd(OsdmStreamParser &reader, -                           SourceOffset start = InvalidSourceOffset, -                           SourceOffset end = InvalidSourceOffset) -{ -	ASSERT_EQ(OsdmStreamParser::State::FIELD_END, reader.parse()); -	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getLocation().getStart()); -	} -	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getLocation().getEnd()); -	} -} - -static void assertEnd(OsdmStreamParser &reader, -                      SourceOffset start = InvalidSourceOffset, -                      SourceOffset end = InvalidSourceOffset) -{ -	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); -	if (start != InvalidSourceOffset) { -		EXPECT_EQ(start, reader.getLocation().getStart()); -	} -	if (end != InvalidSourceOffset) { -		EXPECT_EQ(end, reader.getLocation().getEnd()); -	} -} - -TEST(OsdmStreamParser, fields) +TEST(OsmlStreamParser, fields)  {  	const char *testString = "\\test{a}{b}{c}";  	//                         01234567890123  	//                         0         1  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, 5, 6); +	assertFieldStart(reader, false, 5, 6);  	assertData(reader, "a", 6, 7);  	assertFieldEnd(reader, 7, 8); -	assertFieldStart(reader, 8, 9); +	assertFieldStart(reader, false, 8, 9);  	assertData(reader, "b", 9, 10);  	assertFieldEnd(reader, 10, 11); -	assertFieldStart(reader, 11, 12); +	assertFieldStart(reader, false, 11, 12);  	assertData(reader, "c", 12, 13);  	assertFieldEnd(reader, 13, 14);  	assertEnd(reader, 14, 14);  } -TEST(OsdmStreamParser, dataOutsideField) +TEST(OsmlStreamParser, dataOutsideField)  {  	const char *testString = "\\test{a}{b} c";  	//                         0123456789012  	//                         0         1  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, 5, 6); +	assertFieldStart(reader, false, 5, 6);  	assertData(reader, "a", 6, 7);  	assertFieldEnd(reader, 7, 8); -	assertFieldStart(reader, 8, 9); +	assertFieldStart(reader, false, 8, 9);  	assertData(reader, "b", 9, 10);  	assertFieldEnd(reader, 10, 11); @@ -470,24 +498,24 @@ TEST(OsdmStreamParser, dataOutsideField)  	assertEnd(reader, 13, 13);  } -TEST(OsdmStreamParser, nestedCommand) +TEST(OsmlStreamParser, nestedCommand)  {  	const char *testString = "\\test{a}{\\test2{b} c} d";  	//                         012345678 90123456789012  	//                         0          1         2  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, 5, 6); +	assertFieldStart(reader, false, 5, 6);  	assertData(reader, "a", 6, 7);  	assertFieldEnd(reader, 7, 8); -	assertFieldStart(reader, 8, 9); +	assertFieldStart(reader, false, 8, 9);  	{  		assertCommand(reader, "test2", 9, 15); -		assertFieldStart(reader, 15, 16); +		assertFieldStart(reader, false, 15, 16);  		assertData(reader, "b", 16, 17);  		assertFieldEnd(reader, 17, 18);  	} @@ -497,19 +525,19 @@ TEST(OsdmStreamParser, nestedCommand)  	assertEnd(reader, 23, 23);  } -TEST(OsdmStreamParser, nestedCommandImmediateEnd) +TEST(OsmlStreamParser, nestedCommandImmediateEnd)  {  	const char *testString = "\\test{\\test2{b}} d";  	//                         012345 678901234567  	//                         0          1  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, 5, 6); +	assertFieldStart(reader, false, 5, 6);  	{  		assertCommand(reader, "test2", 6, 12); -		assertFieldStart(reader, 12, 13); +		assertFieldStart(reader, false, 12, 13);  		assertData(reader, "b", 13, 14);  		assertFieldEnd(reader, 14, 15);  	} @@ -518,27 +546,27 @@ TEST(OsdmStreamParser, nestedCommandImmediateEnd)  	assertEnd(reader, 18, 18);  } -TEST(OsdmStreamParser, nestedCommandNoData) +TEST(OsmlStreamParser, nestedCommandNoData)  {  	const char *testString = "\\test{\\test2}";  	//                         012345 6789012  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, 5, 6); +	assertFieldStart(reader, false, 5, 6);  	assertCommand(reader, "test2", 6, 12);  	assertFieldEnd(reader, 12, 13);  	assertEnd(reader, 13, 13);  } -TEST(OsdmStreamParser, multipleCommands) +TEST(OsmlStreamParser, multipleCommands)  {  	const char *testString = "\\a \\b \\c \\d";  	//                         012 345 678 90  	//                         0            1  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "a", 0, 2);  	assertCommand(reader, "b", 3, 5); @@ -547,33 +575,33 @@ TEST(OsdmStreamParser, multipleCommands)  	assertEnd(reader, 11, 11);  } -TEST(OsdmStreamParser, fieldsWithSpaces) +TEST(OsmlStreamParser, fieldsWithSpaces)  {  	const char *testString = "\\a {\\b \\c}   \n\n {\\d}";  	//                         0123 456 789012 3 456 789  	//                         0           1  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "a", 0, 2); -	assertFieldStart(reader, 3, 4); +	assertFieldStart(reader, false, 3, 4);  	assertCommand(reader, "b", 4, 6);  	assertCommand(reader, "c", 7, 9);  	assertFieldEnd(reader, 9, 10); -	assertFieldStart(reader, 16, 17); +	assertFieldStart(reader, false, 16, 17);  	assertCommand(reader, "d", 17, 19);  	assertFieldEnd(reader, 19, 20);  	assertEnd(reader, 20, 20);  } -TEST(OsdmStreamParser, errorNoFieldToStart) +TEST(OsmlStreamParser, errorNoFieldToStart)  {  	const char *testString = "\\a b {";  	//                         012345  	//                         0  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	assertCommand(reader, "a", 0, 2); @@ -583,14 +611,14 @@ TEST(OsdmStreamParser, errorNoFieldToStart)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorNoFieldToEnd) +TEST(OsmlStreamParser, errorNoFieldToEnd)  {  	const char *testString = "\\a b }";  	//                         012345  	//                         0  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	assertCommand(reader, "a", 0, 2); @@ -600,20 +628,20 @@ TEST(OsdmStreamParser, errorNoFieldToEnd)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorNoFieldEndNested) +TEST(OsmlStreamParser, errorNoFieldEndNested)  {  	const char *testString = "\\test{\\test2{}}}";  	//                         012345 6789012345  	//                         0          1  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, 5, 6); +	assertFieldStart(reader, false, 5, 6);  	assertCommand(reader, "test2", 6, 12); -	assertFieldStart(reader, 12, 13); +	assertFieldStart(reader, false, 12, 13);  	assertFieldEnd(reader, 13, 14);  	assertFieldEnd(reader, 14, 15);  	ASSERT_FALSE(logger.hasError()); @@ -621,20 +649,20 @@ TEST(OsdmStreamParser, errorNoFieldEndNested)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorNoFieldEndNestedData) +TEST(OsmlStreamParser, errorNoFieldEndNestedData)  {  	const char *testString = "\\test{\\test2{}}a}";  	//                         012345 67890123456  	//                         0          1  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	assertCommand(reader, "test", 0, 5); -	assertFieldStart(reader, 5, 6); +	assertFieldStart(reader, false, 5, 6);  	assertCommand(reader, "test2", 6, 12); -	assertFieldStart(reader, 12, 13); +	assertFieldStart(reader, false, 12, 13);  	assertFieldEnd(reader, 13, 14);  	assertFieldEnd(reader, 14, 15);  	assertData(reader, "a", 15, 16); @@ -643,53 +671,53 @@ TEST(OsdmStreamParser, errorNoFieldEndNestedData)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, beginEnd) +TEST(OsmlStreamParser, beginEnd)  {  	const char *testString = "\\begin{book}\\end{book}";  	//                         012345678901 2345678901  	//                         0         1          2  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "book", 7, 11); -	assertFieldStart(reader, 12, 13); +	assertFieldStart(reader, true, 12, 13);  	assertFieldEnd(reader, 17, 21);  	assertEnd(reader, 22, 22);  } -TEST(OsdmStreamParser, beginEndWithName) +TEST(OsmlStreamParser, beginEndWithName)  {  	const char *testString = "\\begin{book#a}\\end{book}";  	//                         01234567890123 4567890123  	//                         0         1          2  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "book", {{"name", "a"}}, 7, 11); -	assertFieldStart(reader, 14, 15); +	assertFieldStart(reader, true, 14, 15);  	assertFieldEnd(reader, 19, 23);  	assertEnd(reader, 24, 24);  } -TEST(OsdmStreamParser, beginEndWithNameAndArgs) +TEST(OsmlStreamParser, beginEndWithNameAndArgs)  {  	const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}";  	//                         0123456789012345678901234 56789 01 2345678901  	//                         0         1         2           3          4  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "book",  	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); -	assertFieldStart(reader, 32, 33); +	assertFieldStart(reader, true, 32, 33);  	assertFieldEnd(reader, 37, 41);  	assertEnd(reader, 42, 42);  } -TEST(OsdmStreamParser, beginEndWithNameAndArgsMultipleFields) +TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields)  {  	const char *testString =  	    "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}"; @@ -697,67 +725,100 @@ TEST(OsdmStreamParser, beginEndWithNameAndArgsMultipleFields)  	//    0         1         2           3          4          5          6  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "book",  	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); -	assertFieldStart(reader, 32, 33); +	assertFieldStart(reader, false, 32, 33);  	assertData(reader, "a", 33, 34);  	assertCommand(reader, "test", Variant::mapType{}, 35, 40);  	assertFieldEnd(reader, 40, 41); -	assertFieldStart(reader, 41, 42); +	assertFieldStart(reader, false, 41, 42);  	assertData(reader, "b", 42, 43);  	assertCommand(reader, "test", Variant::mapType{}, 44, 49); -	assertFieldStart(reader, 49, 50); +	assertFieldStart(reader, false, 49, 50);  	assertFieldEnd(reader, 50, 51);  	assertFieldEnd(reader, 51, 52); -	assertFieldStart(reader, 52, 53); +	assertFieldStart(reader, true, 52, 53);  	assertFieldEnd(reader, 57, 61);  	assertEnd(reader, 62, 62);  } -TEST(OsdmStreamParser, beginEndWithData) +TEST(OsmlStreamParser, beginEndWithData)  {  	const char *testString = "\\begin{book}a\\end{book}";  	//                         0123456789012 3456789012  	//                         0         1          2  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "book", 7, 11); -	assertFieldStart(reader, 12, 13); +	assertFieldStart(reader, true, 12, 13);  	assertData(reader, "a", 12, 13);  	assertFieldEnd(reader, 18, 22);  	assertEnd(reader, 23, 23);  } -TEST(OsdmStreamParser, beginEndWithCommand) +TEST(OsmlStreamParser, beginEndNested) +{ +	const char *testString = +	    "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}"; +	//    012345678901234 5678901234567890 123456 7890123 4567890 +	//    0         1          2         3           4          5 +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertCommand(reader, "a", 7, 8); +	assertFieldStart(reader, false, 9, 10); +	assertData(reader, "b", 10, 11); +	assertFieldEnd(reader, 11, 12); +	assertFieldStart(reader, true, 13, 14); +	assertData(reader, "c", 13, 14); +	assertCommand(reader, "d", 22, 23); +	assertFieldStart(reader, false, 24, 25); +	assertData(reader, "e", 25, 26); +	assertFieldEnd(reader, 26, 27); +	assertFieldStart(reader, false, 27, 28); +	assertData(reader, "f", 28, 29); +	assertFieldEnd(reader, 29, 30); +	assertFieldStart(reader, true, 31, 32); +	assertCommand(reader, "g", 31, 33); +	assertFieldStart(reader, false, 33, 34); +	assertData(reader, "h", 34, 35); +	assertFieldEnd(reader, 35, 36); +	assertFieldEnd(reader, 42, 43); +	assertFieldEnd(reader, 49, 50); +	assertEnd(reader, 51, 51); +} + +TEST(OsmlStreamParser, beginEndWithCommand)  {  	const char *testString = "\\begin{book}\\a{test}\\end{book}";  	//                         012345678901 23456789 0123456789  	//                         0         1           2  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "book", 7, 11); -	assertFieldStart(reader, 12, 13); +	assertFieldStart(reader, true, 12, 13);  	assertCommand(reader, "a", 12, 14); -	assertFieldStart(reader, 14, 15); +	assertFieldStart(reader, false, 14, 15);  	assertData(reader, "test", 15, 19);  	assertFieldEnd(reader, 19, 20);  	assertFieldEnd(reader, 25, 29);  	assertEnd(reader, 30, 30);  } -TEST(OsdmStreamParser, errorBeginNoBraceOpen) +TEST(OsmlStreamParser, errorBeginNoBraceOpen)  {  	const char *testString = "\\begin a";  	//                         01234567  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); @@ -765,12 +826,12 @@ TEST(OsdmStreamParser, errorBeginNoBraceOpen)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorBeginNoIdentifier) +TEST(OsmlStreamParser, errorBeginNoIdentifier)  {  	const char *testString = "\\begin{!";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); @@ -778,12 +839,12 @@ TEST(OsdmStreamParser, errorBeginNoIdentifier)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorBeginNoBraceClose) +TEST(OsmlStreamParser, errorBeginNoBraceClose)  {  	const char *testString = "\\begin{a";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); @@ -791,12 +852,12 @@ TEST(OsdmStreamParser, errorBeginNoBraceClose)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorBeginNoName) +TEST(OsmlStreamParser, errorBeginNoName)  {  	const char *testString = "\\begin{a#}";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); @@ -808,13 +869,13 @@ TEST(OsdmStreamParser, errorBeginNoName)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorEndNoBraceOpen) +TEST(OsmlStreamParser, errorEndNoBraceOpen)  {  	const char *testString = "\\end a";  	//                         012345  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); @@ -822,12 +883,12 @@ TEST(OsdmStreamParser, errorEndNoBraceOpen)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorEndNoIdentifier) +TEST(OsmlStreamParser, errorEndNoIdentifier)  {  	const char *testString = "\\end{!";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); @@ -835,12 +896,12 @@ TEST(OsdmStreamParser, errorEndNoIdentifier)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorEndNoBraceClose) +TEST(OsmlStreamParser, errorEndNoBraceClose)  {  	const char *testString = "\\end{a";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); @@ -848,12 +909,12 @@ TEST(OsdmStreamParser, errorEndNoBraceClose)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorEndNoBegin) +TEST(OsmlStreamParser, errorEndNoBegin)  {  	const char *testString = "\\end{a}";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); @@ -861,91 +922,91 @@ TEST(OsdmStreamParser, errorEndNoBegin)  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, errorBeginEndMismatch) +TEST(OsmlStreamParser, errorBeginEndMismatch)  {  	const char *testString = "\\begin{a} \\begin{b} test \\end{a}";  	//                         0123456789 012345678901234 5678901  	//                         0          1         2          3  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	assertCommand(reader, "a", 7, 8); -	assertFieldStart(reader, 10, 11); +	assertFieldStart(reader, true, 10, 11);  	assertCommand(reader, "b", 17, 18); -	assertFieldStart(reader, 20, 24); +	assertFieldStart(reader, true, 20, 24);  	assertData(reader, "test", 20, 24);  	ASSERT_FALSE(logger.hasError());  	ASSERT_THROW(reader.parse(), LoggableException);  	ASSERT_TRUE(logger.hasError());  } -TEST(OsdmStreamParser, commandWithNSSep) +TEST(OsmlStreamParser, commandWithNSSep)  {  	const char *testString = "\\test1:test2";  	//                         012345678901  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "test1:test2", 0, 12);  	assertEnd(reader, 12, 12);  } -TEST(OsdmStreamParser, beginEndWithNSSep) +TEST(OsmlStreamParser, beginEndWithNSSep)  {  	const char *testString = "\\begin{test1:test2}\\end{test1:test2}";  	//                         0123456789012345678 90123456789012345  	//                         0         1          2         3  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	assertCommand(reader, "test1:test2", 7, 18); -	assertFieldStart(reader, 19, 20); +	assertFieldStart(reader, true, 19, 20);  	assertFieldEnd(reader, 24, 35);  	assertEnd(reader, 36, 36);  } -TEST(OsdmStreamParser, errorBeginNSSep) +TEST(OsmlStreamParser, errorBeginNSSep)  {  	const char *testString = "\\begin:test{blub}\\end{blub}";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError());  	assertCommand(reader, "blub");  	ASSERT_TRUE(logger.hasError()); -	assertFieldStart(reader); +	assertFieldStart(reader, true);  	assertFieldEnd(reader);  	assertEnd(reader);  } -TEST(OsdmStreamParser, errorEndNSSep) +TEST(OsmlStreamParser, errorEndNSSep)  {  	const char *testString = "\\begin{blub}\\end:test{blub}";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	assertCommand(reader, "blub"); -	assertFieldStart(reader); +	assertFieldStart(reader, true);  	ASSERT_FALSE(logger.hasError());  	assertFieldEnd(reader);  	ASSERT_TRUE(logger.hasError());  	assertEnd(reader);  } -TEST(OsdmStreamParser, errorEmptyNs) +TEST(OsmlStreamParser, errorEmptyNs)  {  	const char *testString = "\\test:";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); @@ -955,12 +1016,12 @@ TEST(OsdmStreamParser, errorEmptyNs)  	assertEnd(reader);  } -TEST(OsdmStreamParser, errorRepeatedNs) +TEST(OsmlStreamParser, errorRepeatedNs)  {  	const char *testString = "\\test::";  	CharReader charReader(testString); -	OsdmStreamParser reader(charReader, logger); +	OsmlStreamParser reader(charReader, logger);  	logger.reset();  	ASSERT_FALSE(logger.hasError()); @@ -969,5 +1030,232 @@ TEST(OsdmStreamParser, errorRepeatedNs)  	assertData(reader, "::");  	assertEnd(reader);  } + +TEST(OsmlStreamParser, explicitDefaultField) +{ +	const char *testString = "\\a{!b}c"; +	//                         01234567 +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertCommand(reader, "a", 0, 2); +	assertFieldStart(reader, true, 2, 4); +	assertData(reader, "b", 4, 5); +	assertFieldEnd(reader, 5, 6); +	assertData(reader, "c", 6, 7); +	assertEnd(reader, 7, 7); +} + +TEST(OsmlStreamParser, explicitDefaultFieldWithCommand) +{ +	const char *testString = "\\a{!\\b}c"; +	//                         0123 4567 +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertCommand(reader, "a", 0, 2); +	assertFieldStart(reader, true, 2, 4); +	assertCommand(reader, "b", 4, 6); +	assertFieldEnd(reader, 6, 7); +	assertData(reader, "c", 7, 8); +	assertEnd(reader, 8, 8); +} + +TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField) +{ +	const char *testString = "\\a{!\\b}{c}"; +	//                         0123 456789 +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	logger.reset(); +	assertCommand(reader, "a", 0, 2); +	assertFieldStart(reader, true, 2, 4); +	assertCommand(reader, "b", 4, 6); +	assertFieldEnd(reader, 6, 7); +	ASSERT_FALSE(logger.hasError()); +	assertData(reader, "c", 8, 9); +	ASSERT_TRUE(logger.hasError()); +	assertEnd(reader, 10, 10); +} + +TEST(OsmlStreamParser, annotationStart) +{ +	const char *testString = "<\\a"; +	//                        0 12 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); +	assertEnd(reader, 3, 3); +} + +TEST(OsmlStreamParser, annotationStartWithName) +{ +	const char *testString = "<\\annotationWithName#aName"; +	//                        0 1234567890123456789012345 +	//                        0          1         2 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertAnnotationStart(reader, "annotationWithName", +	                      Variant::mapType{{"name", "aName"}}, 0, 20); +	assertEnd(reader, 26, 26); +} + +TEST(OsmlStreamParser, annotationStartWithArguments) +{ +	const char *testString = "<\\annotationWithName#aName[a=1,b=2]"; +	//                        0 1234567890123456789012345678901234 +	//                        0          1         2         3 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertAnnotationStart( +	    reader, "annotationWithName", +	    Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20); +	assertEnd(reader, 35, 35); +} + +TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd) +{ +	const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>"; +	//                        0 123456789012345678901234567 89012345 67 +	//                        0          1         2          3 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertAnnotationStart( +	    reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8, +	    10); +	assertFieldStart(reader, true, 26, 27); +	assertData(reader, "a", 26, 27); +	assertFieldEnd(reader, 33, 35); +	assertAnnotationEnd(reader, "", "", 36, 38); +	assertEnd(reader, 38, 38); +} + +TEST(OsmlStreamParser, annotationEnd) +{ +	const char *testString = "\\a>"; +	//                         012 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertAnnotationEnd(reader, "a", "", 0, 2); +	assertEnd(reader, 3, 3); +} + +TEST(OsmlStreamParser, annotationEndWithName) +{ +	const char *testString = "\\a#name>"; +	//                         01234567 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertAnnotationEnd(reader, "a", "name", 0, 2); +	assertEnd(reader, 8, 8); +} + +TEST(OsmlStreamParser, annotationEndWithNameAsArgs) +{ +	const char *testString = "\\a[name=name]>"; +	//                         01234567890123 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertAnnotationEnd(reader, "a", "name", 0, 2); +	assertEnd(reader, 14, 14); +} + +TEST(OsmlStreamParser, errorAnnotationEndWithArguments) +{ +	const char *testString = "\\a[foo=bar]>"; +	//                         012345678901 +	//                         0         1 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	logger.reset(); +	ASSERT_FALSE(logger.hasError()); +	assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2); +	ASSERT_TRUE(logger.hasError()); +	assertData(reader, ">", 11, 12); +	assertEnd(reader, 12, 12); +} + +TEST(OsmlStreamParser, closingAnnotation) +{ +	const char *testString = "<\\a>"; +	//                        0 123 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); +	assertData(reader, ">", 3, 4); +	assertEnd(reader, 4, 4); +} + +TEST(OsmlStreamParser, annotationWithFields) +{ +	const char *testString = "a <\\b{c}{d}{!e} f \\> g"; +	//                        012 345678901234567 8901 +	//                        0          1          2 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertData(reader, "a", 0, 1); +	assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5); +	assertFieldStart(reader, false, 5, 6); +	assertData(reader, "c", 6, 7); +	assertFieldEnd(reader, 7, 8); +	assertFieldStart(reader, false, 8, 9); +	assertData(reader, "d", 9, 10); +	assertFieldEnd(reader, 10, 11); +	assertFieldStart(reader, true, 11, 13); +	assertData(reader, "e", 13, 14); +	assertFieldEnd(reader, 14, 15); +	assertData(reader, "f", 16, 17); +	assertAnnotationEnd(reader, "", "", 18, 20); +	assertData(reader, "g", 21, 22); +	assertEnd(reader, 22, 22); +} + +TEST(OsmlStreamParser, annotationStartEscape) +{ +	const char *testString = "<\\%test"; +	//                        0 123456 +	//                        0 + +	CharReader charReader(testString); + +	OsmlStreamParser reader(charReader, logger); + +	assertData(reader, "<%test", 0, 7); +	assertEnd(reader, 7, 7); +}  } diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp new file mode 100644 index 0000000..3293370 --- /dev/null +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -0,0 +1,217 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <gtest/gtest.h> + +#include <core/frontend/TerminalLogger.hpp> +#include <core/common/CharReader.hpp> +#include <core/common/Variant.hpp> + +#include <formats/osxml/OsxmlEventParser.hpp> + +namespace ousia { + +static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; + +namespace { +enum class OsxmlEvent { +	COMMAND, +	ANNOTATION_START, +	ANNOTATION_END, +	FIELD_END, +	DATA +}; + +class TestOsxmlEventListener : public OsxmlEvents { +public: +	std::vector<std::pair<OsxmlEvent, Variant>> events; + +	void command(const Variant &name, const Variant::mapType &args) override +	{ +		events.emplace_back(OsxmlEvent::COMMAND, +		                    Variant::arrayType{name, args}); +	} + +	void annotationStart(const Variant &className, +	                     const Variant::mapType &args) override +	{ +		events.emplace_back(OsxmlEvent::ANNOTATION_START, +		                    Variant::arrayType{className, args}); +	} + +	void annotationEnd(const Variant &className, +	                   const Variant &elementName) override +	{ +		events.emplace_back(OsxmlEvent::ANNOTATION_END, +		                    Variant::arrayType{className, elementName}); +	} + +	void fieldEnd() override +	{ +		events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{}); +	} + +	void data(const Variant &data) override +	{ +		events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data}); +	} +}; + +static std::vector<std::pair<OsxmlEvent, Variant>> parseXml( +    const char *testString, +    WhitespaceMode whitespaceMode = WhitespaceMode::TRIM) +{ +	TestOsxmlEventListener listener; +	CharReader reader(testString); +	OsxmlEventParser parser(reader, listener, logger); +	parser.setWhitespaceMode(whitespaceMode); +	parser.parse(); +	return listener.events; +} +} + +TEST(OsxmlEventParser, simpleCommandWithArgs) +{ +	const char *testString = "<a name=\"test\" a=\"1\" b=\"2\" c=\"blub\"/>"; +	//                        01234567 89012 3456 78 9012 34 5678 90123 456 +	//                        0          1            2            3 + +	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ +	    {OsxmlEvent::COMMAND, +	     Variant::arrayType{ +	         "a", Variant::mapType{ +	                  {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}}, +	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + +	auto events = parseXml(testString); +	ASSERT_EQ(expectedEvents, events); + +	// Check the locations (I'll do this one time and then just assume it works) +	ASSERT_EQ(1U, events[0].second.asArray()[0].getLocation().getStart()); +	ASSERT_EQ(2U, events[0].second.asArray()[0].getLocation().getEnd()); +	ASSERT_EQ( +	    9U, +	    events[0].second.asArray()[1].asMap()["name"].getLocation().getStart()); +	ASSERT_EQ( +	    13U, +	    events[0].second.asArray()[1].asMap()["name"].getLocation().getEnd()); +	ASSERT_EQ( +	    18U, +	    events[0].second.asArray()[1].asMap()["a"].getLocation().getStart()); +	ASSERT_EQ( +	    19U, events[0].second.asArray()[1].asMap()["a"].getLocation().getEnd()); +	ASSERT_EQ( +	    24U, +	    events[0].second.asArray()[1].asMap()["b"].getLocation().getStart()); +	ASSERT_EQ( +	    25U, events[0].second.asArray()[1].asMap()["b"].getLocation().getEnd()); +	ASSERT_EQ( +	    30U, +	    events[0].second.asArray()[1].asMap()["c"].getLocation().getStart()); +	ASSERT_EQ( +	    34U, events[0].second.asArray()[1].asMap()["c"].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, magicTopLevelTag) +{ +	const char *testString = "<ousia><a/><b/></ousia>"; + +	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ +	    {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, +	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}, +	    {OsxmlEvent::COMMAND, Variant::arrayType{{"b", Variant::mapType{}}}}, +	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + +	auto events = parseXml(testString); +	ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, magicTopLevelTagInside) +{ +	const char *testString = "<a><ousia/></a>"; + +	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ +	    {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, +	    {OsxmlEvent::COMMAND, +	     Variant::arrayType{{"ousia", Variant::mapType{}}}}, +	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}, +	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + +	auto events = parseXml(testString); +	ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) +{ +	const char *testString = "<a>  hello  \n world </a>"; +	//                        012345678901 234567890123 +	//                        0         1          2 + +	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ +	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, +	    {OsxmlEvent::DATA, Variant::arrayType{"  hello  \n world "}}, +	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + +	auto events = parseXml(testString, WhitespaceMode::PRESERVE); +	ASSERT_EQ(expectedEvents, events); + +	// Check the location of the text +	ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart()); +	ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataTrimWhitespace) +{ +	const char *testString = "<a>  hello  \n world </a>"; +	//                        012345678901 234567890123 +	//                        0         1          2 + +	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ +	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, +	    {OsxmlEvent::DATA, Variant::arrayType{"hello  \n world"}}, +	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + +	auto events = parseXml(testString, WhitespaceMode::TRIM); +	ASSERT_EQ(expectedEvents, events); + +	// Check the location of the text +	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); +	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataCollapseWhitespace) +{ +	const char *testString = "<a>  hello  \n world </a>"; +	//                        012345678901 234567890123 +	//                        0         1          2 + +	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ +	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, +	    {OsxmlEvent::DATA, Variant::arrayType{"hello world"}}, +	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + +	auto events = parseXml(testString, WhitespaceMode::COLLAPSE); +	ASSERT_EQ(expectedEvents, events); + +	// Check the location of the text +	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); +	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} +} + diff --git a/test/plugins/xml/XmlParserTest.cpp b/test/formats/osxml/OsxmlParserTest.cpp index fdae779..fe8ed34 100644 --- a/test/plugins/xml/XmlParserTest.cpp +++ b/test/formats/osxml/OsxmlParserTest.cpp @@ -30,7 +30,7 @@  #include <core/StandaloneEnvironment.hpp>  #include <plugins/filesystem/FileLocator.hpp> -#include <plugins/xml/XmlParser.hpp> +#include <formats/osxml/OsxmlParser.hpp>  namespace ousia { @@ -41,7 +41,7 @@ extern const Rtti Typesystem;  }  struct XmlStandaloneEnvironment : public StandaloneEnvironment { -	XmlParser xmlParser; +	OsxmlParser parser;  	FileLocator fileLocator;  	XmlStandaloneEnvironment(ConcreteLogger &logger) @@ -52,21 +52,21 @@ struct XmlStandaloneEnvironment : public StandaloneEnvironment {  		registry.registerDefaultExtensions();  		registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"}, -		                        {&RttiTypes::Node}, &xmlParser); +		                        {&RttiTypes::Node}, &parser);  		registry.registerResourceLocator(&fileLocator);  	}  };  static TerminalLogger logger(std::cerr, true); -TEST(XmlParser, mismatchedTag) +TEST(OsxmlParser, mismatchedTag)  {  	XmlStandaloneEnvironment env(logger);  	env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document});  	ASSERT_TRUE(logger.hasError());  } -TEST(XmlParser, generic) +TEST(OsxmlParser, generic)  {  	XmlStandaloneEnvironment env(logger);  	env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node}); @@ -186,7 +186,7 @@ static void checkFieldDescriptor(      Handle<Type> primitiveType = nullptr, bool optional = false)  {  	auto res = desc->resolve(&RttiTypes::FieldDescriptor, name); -	ASSERT_EQ(1, res.size()); +	ASSERT_EQ(1U, res.size());  	checkFieldDescriptor(res[0].node, name, parent, children, type,  	                     primitiveType, optional);  } @@ -201,7 +201,7 @@ static void checkFieldDescriptor(  	                     optional);  } -TEST(XmlParser, domainParsing) +TEST(OsxmlParser, domainParsing)  {  	XmlStandaloneEnvironment env(logger);  	Rooted<Node> book_domain_node = @@ -332,10 +332,10 @@ static void checkText(Handle<Node> p, Handle<Node> expectedParent,  {  	checkStructuredEntity(p, expectedParent, doc, "paragraph");  	Rooted<StructuredEntity> par = p.cast<StructuredEntity>(); -	ASSERT_EQ(1, par->getField().size()); +	ASSERT_EQ(1U, par->getField().size());  	checkStructuredEntity(par->getField()[0], par, doc, "text");  	Rooted<StructuredEntity> text = par->getField()[0].cast<StructuredEntity>(); -	ASSERT_EQ(1, text->getField().size()); +	ASSERT_EQ(1U, text->getField().size());  	Handle<StructureNode> d = text->getField()[0];  	ASSERT_FALSE(d == nullptr); @@ -345,7 +345,7 @@ static void checkText(Handle<Node> p, Handle<Node> expectedParent,  	ASSERT_EQ(expected, prim->getContent());  } -TEST(XmlParser, documentParsing) +TEST(OsxmlParser, documentParsing)  {  	XmlStandaloneEnvironment env(logger);  	Rooted<Node> book_document_node = @@ -357,7 +357,7 @@ TEST(XmlParser, documentParsing)  	checkStructuredEntity(doc->getRoot(), doc, doc, "book");  	{  		Rooted<StructuredEntity> book = doc->getRoot(); -		ASSERT_EQ(2, book->getField().size()); +		ASSERT_EQ(2U, book->getField().size());  		checkText(book->getField()[0], book, doc,  		          "This might be some introductory text or a dedication.");  		checkStructuredEntity(book->getField()[1], book, doc, "chapter", @@ -365,7 +365,7 @@ TEST(XmlParser, documentParsing)  		{  			Rooted<StructuredEntity> chapter =  			    book->getField()[1].cast<StructuredEntity>(); -			ASSERT_EQ(3, chapter->getField().size()); +			ASSERT_EQ(3U, chapter->getField().size());  			checkText(chapter->getField()[0], chapter, doc,  			          "Here we might have an introduction to the chapter.");  			checkStructuredEntity(chapter->getField()[1], chapter, doc, @@ -374,7 +374,7 @@ TEST(XmlParser, documentParsing)  			{  				Rooted<StructuredEntity> section =  				    chapter->getField()[1].cast<StructuredEntity>(); -				ASSERT_EQ(1, section->getField().size()); +				ASSERT_EQ(1U, section->getField().size());  				checkText(section->getField()[0], section, doc,  				          "Here we might find the actual section content.");  			} @@ -384,7 +384,7 @@ TEST(XmlParser, documentParsing)  			{  				Rooted<StructuredEntity> section =  				    chapter->getField()[2].cast<StructuredEntity>(); -				ASSERT_EQ(1, section->getField().size()); +				ASSERT_EQ(1U, section->getField().size());  				checkText(section->getField()[0], section, doc,  				          "Here we might find the actual section content.");  			} diff --git a/test/core/CodeTokenizerTest.cpp b/test/plugins/css/CodeTokenizerTest.cpp index 2d4d5a7..2d4d5a7 100644 --- a/test/core/CodeTokenizerTest.cpp +++ b/test/plugins/css/CodeTokenizerTest.cpp diff --git a/test/core/TokenizerTest.cpp b/test/plugins/css/TokenizerTest.cpp index c53f93d..c53f93d 100644 --- a/test/core/TokenizerTest.cpp +++ b/test/plugins/css/TokenizerTest.cpp | 
