diff options
71 files changed, 6378 insertions, 2805 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index ab31dab..ec1bb4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,18 @@ FIND_PACKAGE(Boost COMPONENTS system filesystem program_options REQUIRED) SET(UTF8CPP_INCLUDE_DIR "lib/utf8cpp") ################################################################################ +# Check the gcc version # +################################################################################ + +IF(CMAKE_COMPILER_IS_GNUCC) + EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} -dumpversion + OUTPUT_VARIABLE GCC_VERSION) + IF(GCC_VERSION VERSION_LESS 4.8) + ERROR("This projects requires at last GCC 4.8 or newer to be built") + ENDIF() +ENDIF() + +################################################################################ # Inclusion of doxygen # ################################################################################ @@ -115,9 +127,7 @@ ADD_DEFINITIONS( ) ADD_LIBRARY(ousia_core - src/core/CodeTokenizer src/core/Registry - src/core/Tokenizer src/core/XML src/core/RangeSet src/core/common/Argument @@ -152,12 +162,17 @@ ADD_LIBRARY(ousia_core src/core/parser/Parser src/core/parser/ParserContext src/core/parser/ParserScope - src/core/parser/ParserStack - src/core/parser/ParserState + src/core/parser/stack/Callbacks src/core/parser/stack/DocumentHandler src/core/parser/stack/DomainHandler + src/core/parser/stack/GenericParserStates + src/core/parser/stack/Handler src/core/parser/stack/ImportIncludeHandler + src/core/parser/stack/State + src/core/parser/stack/Stack src/core/parser/stack/TypesystemHandler + src/core/parser/utils/Tokenizer + src/core/parser/utils/TokenTrie src/core/resource/Resource src/core/resource/ResourceLocator src/core/resource/ResourceManager @@ -165,14 +180,39 @@ ADD_LIBRARY(ousia_core # src/core/script/ScriptEngine ) -ADD_LIBRARY(ousia_css - src/plugins/css/CSSParser +# Format libraries + +#ADD_LIBRARY(ousia_css +# src/plugins/css/CodeTokenizer +# src/plugins/css/Tokenizer +# src/plugins/css/CSSParser +#) + +#TARGET_LINK_LIBRARIES(ousia_css +# ousia_core +#) + +ADD_LIBRARY(ousia_osml + src/formats/osml/OsmlStreamParser +) + +TARGET_LINK_LIBRARIES(ousia_osml + ousia_core +) + +ADD_LIBRARY(ousia_osxml + src/formats/osxml/OsxmlAttributeLocator + src/formats/osxml/OsxmlEventParser + src/formats/osxml/OsxmlParser ) -TARGET_LINK_LIBRARIES(ousia_css +TARGET_LINK_LIBRARIES(ousia_osxml ousia_core + ${EXPAT_LIBRARIES} ) +# Resource locators + ADD_LIBRARY(ousia_filesystem src/plugins/filesystem/FileLocator src/plugins/filesystem/SpecialPaths @@ -183,6 +223,8 @@ TARGET_LINK_LIBRARIES(ousia_filesystem ${Boost_LIBRARIES} ) +# Output libraries + ADD_LIBRARY(ousia_html src/plugins/html/DemoOutput ) @@ -191,27 +233,6 @@ TARGET_LINK_LIBRARIES(ousia_html ousia_core ) -ADD_LIBRARY(ousia_xml - src/plugins/xml/XmlOutput - src/plugins/xml/XmlParser -) - -TARGET_LINK_LIBRARIES(ousia_xml - ousia_core - ${EXPAT_LIBRARIES} -) - -ADD_LIBRARY(ousia_osdm - src/formats/osdm/DynamicTokenizer - src/formats/osdm/TokenTrie - src/formats/osdm/OsdmStreamParser -) - -TARGET_LINK_LIBRARIES(ousia_osdm - ousia_core -) - - #ADD_LIBRARY(ousia_mozjs # src/plugins/mozjs/MozJsScriptEngine #) @@ -223,18 +244,17 @@ TARGET_LINK_LIBRARIES(ousia_osdm # Command line interface -ADD_EXECUTABLE(ousia - src/cli/Main -) +#ADD_EXECUTABLE(ousia +# src/cli/Main +#) -TARGET_LINK_LIBRARIES(ousia - ousia_core - ousia_css - ousia_filesystem - ousia_html - ousia_xml - ${Boost_LIBRARIES} -) +#TARGET_LINK_LIBRARIES(ousia +# ousia_core +# ousia_filesystem +# ousia_html +# ousia_xml +# ${Boost_LIBRARIES} +#) # If testing is enabled, build the unit tests IF(TEST) @@ -245,10 +265,8 @@ IF(TEST) ) ADD_EXECUTABLE(ousia_test_core - test/core/CodeTokenizerTest test/core/RangeSetTest test/core/RegistryTest - test/core/TokenizerTest test/core/XMLTest test/core/common/ArgumentTest test/core/common/CharReaderTest @@ -274,8 +292,10 @@ IF(TEST) test/core/model/StyleTest test/core/model/TypesystemTest test/core/parser/ParserScopeTest - test/core/parser/ParserStackTest - test/core/parser/ParserStateTest + test/core/parser/stack/StackTest + test/core/parser/stack/StateTest + test/core/parser/utils/TokenizerTest + test/core/parser/utils/TokenTrieTest test/core/resource/ResourceLocatorTest test/core/resource/ResourceRequestTest # test/core/script/FunctionTest @@ -298,15 +318,17 @@ IF(TEST) ousia_filesystem ) - ADD_EXECUTABLE(ousia_test_css - test/plugins/css/CSSParserTest - ) +# ADD_EXECUTABLE(ousia_test_css +# test/plugins/css/Tokenizer +# test/plugins/css/CodeTokenizerTest +# test/plugins/css/CSSParserTest +# ) - TARGET_LINK_LIBRARIES(ousia_test_css - ${GTEST_LIBRARIES} - ousia_core - ousia_css - ) +# TARGET_LINK_LIBRARIES(ousia_test_css +# ${GTEST_LIBRARIES} +# ousia_core +# ousia_css +# ) ADD_EXECUTABLE(ousia_test_html test/plugins/html/DemoOutputTest @@ -318,27 +340,26 @@ IF(TEST) ousia_html ) - ADD_EXECUTABLE(ousia_test_xml - test/plugins/xml/XmlParserTest + ADD_EXECUTABLE(ousia_test_osml + test/formats/osml/OsmlStreamParserTest ) - TARGET_LINK_LIBRARIES(ousia_test_xml + TARGET_LINK_LIBRARIES(ousia_test_osml ${GTEST_LIBRARIES} ousia_core - ousia_xml - ousia_filesystem + ousia_osml ) - ADD_EXECUTABLE(ousia_test_osdm - test/formats/osdm/TokenTrieTest - test/formats/osdm/DynamicTokenizerTest - test/formats/osdm/OsdmStreamParserTest + ADD_EXECUTABLE(ousia_test_osxml + test/formats/osxml/OsxmlEventParserTest + test/formats/osxml/OsxmlParserTest ) - TARGET_LINK_LIBRARIES(ousia_test_osdm + TARGET_LINK_LIBRARIES(ousia_test_osxml ${GTEST_LIBRARIES} ousia_core - ousia_osdm + ousia_osxml + ousia_filesystem ) # ADD_EXECUTABLE(ousia_test_mozjs @@ -354,10 +375,10 @@ IF(TEST) # Register the unit tests ADD_TEST(ousia_test_core ousia_test_core) ADD_TEST(ousia_test_filesystem ousia_test_filesystem) - ADD_TEST(ousia_test_css ousia_test_css) +# ADD_TEST(ousia_test_css ousia_test_css) ADD_TEST(ousia_test_html ousia_test_html) - ADD_TEST(ousia_test_xml ousia_test_xml) - ADD_TEST(ousia_test_osdm ousia_test_osdm) + ADD_TEST(ousia_test_osml ousia_test_osml) + ADD_TEST(ousia_test_osxml ousia_test_osxml) # ADD_TEST(ousia_test_mozjs ousia_test_mozjs) ENDIF() @@ -375,6 +396,6 @@ INSTALL(DIRECTORY data/ DESTINATION share/ousia OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE ) -INSTALL(TARGETS ousia - RUNTIME DESTINATION bin -) +#INSTALL(TARGETS ousia +# RUNTIME DESTINATION bin +#) diff --git a/contrib/gtksourceview-3.0/language-specs/ousia.lang b/contrib/gtksourceview-3.0/language-specs/ousia.lang index 4cefac3..7a91d1e 100644 --- a/contrib/gtksourceview-3.0/language-specs/ousia.lang +++ b/contrib/gtksourceview-3.0/language-specs/ousia.lang @@ -24,7 +24,7 @@ <language id="ousia" _name="Ousia" version="2.0" _section="Markup"> <metadata> <property name="mimetypes">text/vnd.ousia</property> - <property name="globs">*.osdm</property> + <property name="globs">*.osml</property> <property name="line-comment-start">%</property> <property name="block-comment-start">%{</property> <property name="block-comment-end">}%</property> diff --git a/contrib/test.osdm b/contrib/test.osml index 100bc77..100bc77 100644 --- a/contrib/test.osdm +++ b/contrib/test.osml diff --git a/src/core/common/Argument.cpp b/src/core/common/Argument.cpp index bfe74a4..b10fad3 100644 --- a/src/core/common/Argument.cpp +++ b/src/core/common/Argument.cpp @@ -302,10 +302,10 @@ bool Arguments::validateMap(Variant::mapType &map, Logger &logger, } else { if (ignoreUnknown) { logger.note(std::string("Ignoring argument \"") + e.first + - std::string("\"")); + std::string("\""), e.second); } else { logger.error(std::string("Unknown argument \"") + e.first + - std::string("\"")); + std::string("\""), e.second); ok = false; } } diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index 563fe2a..f8b53c6 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -18,19 +18,13 @@ #include <algorithm> #include <cctype> -#include <limits> #include <string> #include "Utils.hpp" +#include "WhitespaceHandler.hpp" namespace ousia { -std::string Utils::trim(const std::string &s) -{ - std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace); - return s.substr(bounds.first, bounds.second - bounds.first); -} - bool Utils::isIdentifier(const std::string &name) { bool first = true; @@ -43,7 +37,27 @@ bool Utils::isIdentifier(const std::string &name) } first = false; } - return true; + return !first; +} + +bool Utils::isIdentifierOrEmpty(const std::string &name) +{ + return name.empty() || isIdentifier(name); +} + +bool Utils::isNamespacedIdentifier(const std::string &name) +{ + bool first = true; + for (char c : name) { + if (first && !isIdentifierStartCharacter(c)) { + return false; + } + if (!first && (!isIdentifierCharacter(c) && c != ':')) { + return false; + } + first = (c == ':'); + } + return !first; } bool Utils::hasNonWhitepaceChar(const std::string &s) @@ -94,5 +108,29 @@ std::string Utils::extractFileExtension(const std::string &filename) } return std::string{}; } + +std::string Utils::trim(const std::string &s) +{ + std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace); + return s.substr(bounds.first, bounds.second - bounds.first); +} + +std::string Utils::collapse(const std::string &s) +{ + CollapsingWhitespaceHandler h; + appendToWhitespaceHandler(h, s, 0); + return h.toString(); +} + +bool Utils::startsWith(const std::string &s, const std::string &prefix) +{ + return prefix.size() <= s.size() && s.substr(0, prefix.size()) == prefix; +} + +bool Utils::endsWith(const std::string &s, const std::string &suffix) +{ + return suffix.size() <= s.size() && + s.substr(s.size() - suffix.size(), suffix.size()) == suffix; +} } diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 2c8a5b3..b5a54fc 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -74,16 +74,45 @@ public: } /** - * Returns true if the given character is in [A-Za-z][A-Za-z0-9_-]* + * Returns true if the given string is in + * \code{.txt} + * [A-Za-z][A-Za-z0-9_-]* + * \endCode + * + * @param name is the string that should be tested. + * @return true if the string matches the regular expression given above, + * false otherwise. */ static bool isIdentifier(const std::string &name); /** + * Returns true if the given string is an identifier or an empty string. + */ + static bool isIdentifierOrEmpty(const std::string &name); + + /** + * Returns true if the given string is in + * \code{.txt} + * ([A-Za-z][A-Za-z0-9_-]*)(:[A-Za-z][A-Za-z0-9_-]*)* + * \endCode + * + * @param name is the string that should be tested. + * @return true if the string matches the regular expression given above, + * false otherwise. + */ + static bool isNamespacedIdentifier(const std::string &name); + + /** + * Returns true if the given character is a linebreak character. + */ + static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); } + + /** * Returns true if the given character is a whitespace character. */ static bool isWhitespace(const char c) { - return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); + return (c == ' ') || (c == '\t') || isLinebreak(c); } /** @@ -95,11 +124,6 @@ public: static bool hasNonWhitepaceChar(const std::string &s); /** - * Returns true if the given character is a whitespace character. - */ - static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); } - - /** * Removes whitespace at the beginning and the end of the given string. * * @param s is the string that should be trimmed. @@ -120,8 +144,25 @@ public: template <class T, class Filter> static std::pair<size_t, size_t> trim(const T &s, Filter f) { + return trim(s, s.size(), f); + } + + /** + * Trims the given string or vector of chars by returning the start and end + * index. + * + * @param s is the container that should be trimmed. + * @param len is the number of elements in the container. + * @param f is a function that returns true for values that should be + * removed. + * @return start and end index. Note that "end" points at the character + * beyond the end, thus "end" minus "start" + */ + template <class T, class Filter> + static std::pair<size_t, size_t> trim(const T &s, size_t len, Filter f) + { size_t start = 0; - for (size_t i = 0; i < s.size(); i++) { + for (size_t i = 0; i < len; i++) { if (!f(s[i])) { start = i; break; @@ -129,7 +170,7 @@ public: } size_t end = 0; - for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) { + for (ssize_t i = len - 1; i >= static_cast<ssize_t>(start); i--) { if (!f(s[i])) { end = i + 1; break; @@ -145,6 +186,15 @@ public: } /** + * Collapses the whitespaces in the given string (trims the string and + * replaces all whitespace characters by a single one). + * + * @param s is the string in which the whitespace should be collapsed. + * @return a copy of s with collapsed whitespace. + */ + static std::string collapse(const std::string &s); + + /** * Turns the elements of a collection into a string separated by the * given delimiter. * @@ -205,6 +255,24 @@ public: static std::string extractFileExtension(const std::string &filename); /** + * Checks whether the given string starts with the given prefix. + * + * @param s is the string. + * @param prefix is the string which should be checked for being a prefix of + * s. + */ + static bool startsWith(const std::string &s, const std::string &prefix); + + /** + * Checks whether the given string ends with the given suffix. + * + * @param s is the string. + * @param suffix is the string which should be checked for being a suffix of + * s. + */ + static bool endsWith(const std::string &s, const std::string &suffix); + + /** * Hash functional to be used for enum classes. * See http://stackoverflow.com/a/24847480/2188211 */ diff --git a/src/core/common/Variant.hpp b/src/core/common/Variant.hpp index 6eae7e1..ddd17d7 100644 --- a/src/core/common/Variant.hpp +++ b/src/core/common/Variant.hpp @@ -884,6 +884,21 @@ public: } /** + * If the value of the variant already is a string, the markAsMagic function + * marks this string as a "magic" value (a variant which might also be an + * identifier). Throws an exception if the variant is not a string or magic + * value. + */ + void markAsMagic() + { + if (getType() == VariantType::STRING) { + meta.setType(VariantType::MAGIC); + return; + } + throw TypeException{getType(), VariantType::STRING}; + } + + /** * Returns the value of the Variant as boolean, performs type conversion. * * @return the Variant value converted to a boolean value. @@ -1146,10 +1161,7 @@ public: * * @retun true if the */ - bool hasLocation() const - { - return meta.hasLocation(); - } + bool hasLocation() const { return meta.hasLocation(); } /** * Unpacks ans returns the stored source location. Note that the returned @@ -1158,10 +1170,7 @@ public: * * @return the stored SourceLocation. */ - SourceLocation getLocation() const - { - return meta.getLocation(); - } + SourceLocation getLocation() const { return meta.getLocation(); } /** * Packs the given source location and stores it in the metadata. Not all diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp index 3f02226..fb93ad0 100644 --- a/src/core/common/VariantReader.cpp +++ b/src/core/common/VariantReader.cpp @@ -495,7 +495,7 @@ std::pair<bool, Variant::boolType> VariantReader::parseBool(CharReader &reader, bool val = false; CharReaderFork readerFork = reader.fork(); LoggerFork loggerFork = logger.fork(); - auto res = parseToken(readerFork, loggerFork, {}); + auto res = parseToken(readerFork, loggerFork, std::unordered_set<char>{}); if (res.first) { bool valid = false; if (res.second == "true") { diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp index 1232f6e..44132a0 100644 --- a/src/core/common/VariantReader.hpp +++ b/src/core/common/VariantReader.hpp @@ -322,7 +322,7 @@ public: */ static std::pair<bool, Variant> parseTyped( VariantType type, CharReader &reader, Logger &logger, - const std::unordered_set<char> &delims = {}); + const std::unordered_set<char> &delims = std::unordered_set<char>{}); /** * Tries to parse an instance of the given type from the given string. The * called method is one of the parse methods defined here and adheres to the diff --git a/src/core/common/Whitespace.hpp b/src/core/common/Whitespace.hpp new file mode 100644 index 0000000..72a2291 --- /dev/null +++ b/src/core/common/Whitespace.hpp @@ -0,0 +1,60 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Whitespace.hpp + * + * Contains the WhitespaceMode enum used in various places. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_WHITESPACE_HPP_ +#define _OUSIA_WHITESPACE_HPP_ + +#include <string> +#include <utility> + +namespace ousia { + +/** + * Enum specifying the whitespace handling mode of the tokenizer and the + * parsers. + */ +enum class WhitespaceMode { + /** + * Preserves all whitespaces as they are found in the source file. + */ + PRESERVE, + + /** + * Trims whitespace at the beginning and the end of the found text. + */ + TRIM, + + /** + * Whitespaces are trimmed and collapsed, multiple whitespace characters + * are replaced by a single space character. + */ + COLLAPSE +}; + +} + +#endif /* _OUSIA_WHITESPACE_HPP_ */ + diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp new file mode 100644 index 0000000..ed52ea3 --- /dev/null +++ b/src/core/common/WhitespaceHandler.hpp @@ -0,0 +1,284 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file WhitespaceHandler.hpp + * + * Contains the WhitespaceHandler classes which are used in multiple places to + * trim, compact or preserve whitespaces while at the same time maintaining the + * position information associated with the input strings. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_WHITESPACE_HANDLER_HPP_ +#define _OUSIA_WHITESPACE_HANDLER_HPP_ + +#include <string> +#include <vector> + +#include "Utils.hpp" + +namespace ousia { + +/** + * WhitespaceHandler is a based class that can be used to collect text on a + * character-by-character basis. Note that this class and its descendants are + * hoped to be inlined by the compiler (and used in conjunction with templates), + * thus they are fully defined inside this header. + */ +class WhitespaceHandler { +public: + /** + * Start position of the extracted text. + */ + size_t textStart; + + /** + * End position of the extracted text. + */ + size_t textEnd; + + /** + * Buffer containing the extracted text. + */ + std::vector<char> textBuf; + + /** + * Constructor of the TextHandlerBase base class. Initializes the start and + * end position with zeros. + */ + WhitespaceHandler() : textStart(0), textEnd(0) {} + + /** + * Returns true if this whitespace handler has found any text and a text + * token could be emitted. + * + * @return true if the internal data buffer is non-empty. + */ + bool hasText() { return !textBuf.empty(); } + + /** + * Returns the content of the WhitespaceHandler as string. + */ + std::string toString() const + { + return std::string(textBuf.data(), textBuf.size()); + } +}; + +/** + * The PreservingWhitespaceHandler class preserves all characters unmodified, + * including whitepace characters. + */ +class PreservingWhitespaceHandler : public WhitespaceHandler { +public: + /** + * Appends the given character to the internal text buffer, does not + * eliminate whitespace. + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + */ + void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd); + } + + /** + * Static version of PreservingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + */ + static void append(char c, size_t start, size_t end, + std::vector<char> &textBuf, size_t &textStart, + size_t &textEnd) + { + if (textBuf.empty()) { + textStart = start; + } + textEnd = end; + textBuf.push_back(c); + } +}; + +/** + * The TrimmingTextHandler class trims all whitespace characters at the begin + * and the end of a text section but leaves all other characters unmodified, + * including whitepace characters. + */ +class TrimmingWhitespaceHandler : public WhitespaceHandler { +public: + /** + * Buffer used internally to temporarily store all whitespace characters. + * They are only added to the output buffer if another non-whitespace + * character is reached. + */ + std::vector<char> whitespaceBuf; + + /** + * Appends the given character to the internal text buffer, eliminates + * whitespace characters at the begin and end of the text. + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + */ + void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf); + } + + /** + * Static version of TrimmingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param whitespaceBuf is a reference at the buffer for storing whitespace + * characters. + */ + static void append(char c, size_t start, size_t end, + std::vector<char> &textBuf, size_t &textStart, + size_t &textEnd, std::vector<char> &whitespaceBuf) + { + // Handle whitespace characters + if (Utils::isWhitespace(c)) { + if (!textBuf.empty()) { + whitespaceBuf.push_back(c); + } + return; + } + + // Set the start and end offset correctly + if (textBuf.empty()) { + textStart = start; + } + textEnd = end; + + // Store the character + if (!whitespaceBuf.empty()) { + textBuf.insert(textBuf.end(), whitespaceBuf.begin(), + whitespaceBuf.end()); + whitespaceBuf.clear(); + } + textBuf.push_back(c); + } +}; + +/** + * The CollapsingTextHandler trims characters at the beginning and end of the + * text and reduced multiple whitespace characters to a single blank. + */ +class CollapsingWhitespaceHandler : public WhitespaceHandler { +public: + /** + * Flag set to true if a whitespace character was reached. + */ + bool hasWhitespace = false; + + /** + * Appends the given character to the internal text buffer, eliminates + * redundant whitespace characters. + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + */ + void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd, hasWhitespace); + } + + /** + * Static version of CollapsingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param hasWhitespace is a reference at the "hasWhitespace" flag. + */ + static void append(char c, size_t start, size_t end, + std::vector<char> &textBuf, size_t &textStart, + size_t &textEnd, bool &hasWhitespace) + { + // Handle whitespace characters + if (Utils::isWhitespace(c)) { + if (!textBuf.empty()) { + hasWhitespace = true; + } + return; + } + + // Set the start and end offset correctly + if (textBuf.empty()) { + textStart = start; + } + textEnd = end; + + // Store the character + if (hasWhitespace) { + textBuf.push_back(' '); + hasWhitespace = false; + } + textBuf.push_back(c); + } +}; + +/** + * Function that can be used to append the given buffer (e.g. a string or a + * vector) to the whitespace handler. + * + * @tparam WhitespaceHandler is one of the WhitespaceHandler classes. + * @tparam Buffer is an iterable type. + * @param handler is the handler to which the characters of the Buffer should be + * appended. + * @param buf is the buffer from which the characters should be read. + * @param start is the start byte offset. Each character is counted as one byte. + */ +template <typename WhitespaceHandler, typename Buffer> +inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf, + size_t start) +{ + for (auto elem : buf) { + handler.append(elem, start, start + 1); + start++; + } +} +} + +#endif /* _OUSIA_WHITESPACE_HANDLER_HPP_ */ + diff --git a/src/core/model/Node.cpp b/src/core/model/Node.cpp index 39ee2e4..ce15cad 100644 --- a/src/core/model/Node.cpp +++ b/src/core/model/Node.cpp @@ -448,7 +448,7 @@ bool Node::doValidate(Logger &logger) const { return true; } bool Node::validateName(Logger &logger) const { - if (!Utils::isIdentifier(name)) { + if (!Utils::isIdentifierOrEmpty(name)) { logger.error(type()->name + std::string(" name \"") + name + std::string("\" is not a valid identifier"), this); diff --git a/src/core/model/Typesystem.cpp b/src/core/model/Typesystem.cpp index 506bd31..df2b9fb 100644 --- a/src/core/model/Typesystem.cpp +++ b/src/core/model/Typesystem.cpp @@ -21,7 +21,6 @@ #include <core/common/RttiBuilder.hpp> #include <core/common/Utils.hpp> #include <core/common/VariantConverter.hpp> -#include <core/common/VariantReader.hpp> namespace ousia { @@ -68,65 +67,6 @@ bool Type::build(Variant &data, Logger &logger) const return build(data, logger, NullMagicCallback); } -std::pair<bool, Variant> Type::read(CharReader &reader, Logger &logger, - const std::unordered_set<char> &delims) -{ - // try all variant types of this type and use the first successful one. - Variant v; - bool success = false; - for (auto t : getVariantTypes()) { - auto res = VariantReader::parseTyped(t, reader, logger, delims); - if (res.first) { - v = res.second; - success = true; - break; - } - } - - if (!success) { - return std::make_pair(false, Variant{}); - } - if (!build(v, logger)) { - return std::make_pair(false, Variant{}); - } - return std::make_pair(true, v); -} - -std::pair<bool, Variant> Type::read(const std::string &str, Logger &logger, - SourceId sourceId, size_t offs) -{ - // try all variant types of this type and use the first successful one. - Variant v; - bool success = false; - std::vector<LoggerFork> forks; - auto vts = getVariantTypes(); - for (auto vt : vts) { - forks.emplace_back(logger.fork()); - auto res = - VariantReader::parseTyped(vt, str, forks.back(), sourceId, offs); - if (res.first) { - v = res.second; - success = true; - forks.back().commit(); - break; - } - } - - if (!success) { - logger.error("Could not read data with any of the possible types:"); - for (size_t t = 0; t < forks.size(); t++) { - logger.note(std::string(Variant::getTypeName(vts[t])) + ":", - SourceLocation{}, MessageMode::NO_CONTEXT); - forks[t].commit(); - } - return std::make_pair(false, Variant{}); - } - if (!build(v, logger)) { - return std::make_pair(false, Variant{}); - } - return std::make_pair(true, v); -} - bool Type::doCheckIsa(Handle<const Type> type) const { return false; } bool Type::checkIsa(Handle<const Type> type) const diff --git a/src/core/model/Typesystem.hpp b/src/core/model/Typesystem.hpp index ca4f206..39f777f 100644 --- a/src/core/model/Typesystem.hpp +++ b/src/core/model/Typesystem.hpp @@ -59,7 +59,27 @@ class SystemTypesystem; */ class Type : public Node { public: - enum class MagicCallbackResult { NOT_FOUND, FOUND_INVALID, FOUND_VALID }; + /** + * Enum describing the result of the MagicCallback. + */ + enum class MagicCallbackResult { + /** + * A magic value with the given name could not be resolved. + */ + NOT_FOUND, + + /** + * A magic value with the given name could be resolved, but is of the + * wrong type. + */ + FOUND_INVALID, + + /** + * A magic value with the given name could be resolved and is of the + * correct type. + */ + FOUND_VALID + }; /** * Callback function called when a variant with "magic" value is reached. @@ -70,7 +90,9 @@ public: * to which the value of the looked up constant should be written. * @param type is a const pointer at the type. TODO: Replace this with a * "ConstHandle". - * @return true if a constant was found, false otherwise. + * @return a MagicCallbackResult describing whether the magic value could + * not be resolved, could be resolved but is of the wrong type or could be + * resolved and is of the correct type. */ using MagicCallback = std::function<MagicCallbackResult(Variant &data, const Type *type)>; @@ -169,32 +191,6 @@ public: bool build(Variant &data, Logger &logger) const; /** - * Tries to parse an instance of this type from the given stream. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned - * at the end of the type instance (or the delimiting character). - * @param delims is a set of characters which will terminate the typed - * instance if the according parser uses delimiting characters. - * These characters are not included in the result. May not be nullptr. - */ - std::pair<bool, Variant> read(CharReader &reader, Logger &logger, - const std::unordered_set<char> &delims = {}); - - /** - * Tries to parse an instance of this type from the given string. - * - * @param str is the string from which the value should be read. - * @param sourceId is an optional descriptor of the source file from which - * the element is being read. - * @param offs is the by offset in the source file at which the string - * starts. - */ - std::pair<bool, Variant> read(const std::string &str, Logger &logger, - SourceId sourceId = InvalidSourceId, - size_t offs = 0); - - /** * Returns true if and only if the given Variant adheres to this Type. In * essence this just calls the build method on a copy of the input Variant. * @@ -230,23 +226,6 @@ public: { return this->getParent().cast<Typesystem>(); } - - /** - * Returns the VariantTypes whose instances are proper input for building an - * instance of this type. - * More specifically: Every returned VariantType T should be such that: - * If a string s can be parsed according to T to a Variant v then the call - * build(v, logger) should only fail (return false) if the variant content - * does not adhere to the specific type specification. But it should be a - * properly typed input for build. - * The order of the types returned by this function determines the order in - * which a parser should try to interpret an input string s. - * - * @return the VariantTypes that arethe basis for parsing an instance of - *this - * type. - */ - virtual std::vector<VariantType> getVariantTypes() const = 0; }; /** @@ -287,16 +266,6 @@ public: * @return a variant containing an empty string. */ Variant create() const override { return Variant{""}; } - - /** - * Returns the String VariantType. - * - * @return the String VariantType. - */ - std::vector<VariantType> getVariantTypes() const override - { - return {VariantType::STRING}; - } }; /** @@ -336,16 +305,6 @@ public: * @return the integer value zero. */ Variant create() const override { return Variant{0}; } - - /** - * Returns the Int VariantType. - * - * @return the Int VariantType. - */ - std::vector<VariantType> getVariantTypes() const override - { - return {VariantType::INT}; - } }; /** @@ -385,16 +344,6 @@ public: * @return the double value zero. */ Variant create() const override { return Variant{0.0}; } - - /** - * Returns the Double VariantType. - * - * @return the Double VariantType. - */ - std::vector<VariantType> getVariantTypes() const override - { - return {VariantType::DOUBLE}; - } }; /** @@ -434,16 +383,6 @@ public: * @return a Variant with the boolean value false. */ Variant create() const override { return Variant{false}; } - - /** - * Returns the bool VariantType. - * - * @return the bool VariantType. - */ - std::vector<VariantType> getVariantTypes() const override - { - return {VariantType::BOOL}; - } }; /** @@ -609,16 +548,6 @@ public: * name. Throws a LoggableException if the string does not exist. */ Ordinal valueOf(const std::string &name) const; - - /** - * Returns the int and string VariantTypes. - * - * @return the int and string VariantTypes. - */ - std::vector<VariantType> getVariantTypes() const override - { - return {VariantType::INT, VariantType::STRING}; - } }; /** @@ -1054,15 +983,6 @@ public: * @return true if the requested attribute name exists, false otherwise. */ bool hasAttribute(const std::string &name) const; - /** - * Returns the array and map VariantTypes. - * - * @return the array and map VariantTypes. - */ - std::vector<VariantType> getVariantTypes() const override - { - return {VariantType::MAP}; - } }; /** @@ -1128,15 +1048,6 @@ public: * @return Rooted reference pointing at the innerType. */ Rooted<Type> getInnerType() { return innerType; } - /** - * Returns the array VariantType. - * - * @return the array VariantType. - */ - std::vector<VariantType> getVariantTypes() const override - { - return {VariantType::ARRAY}; - } }; /** @@ -1175,20 +1086,6 @@ public: * @return a Variant instance with nullptr value. */ Variant create() const override; - /** - * Returns all parseable VariantTypes (bool, int, double, array, map, - *cardinality, object, string). - * - * @return all parseable VariantTypes (bool, int, double, array, map, - *cardinality, object, string). - */ - std::vector<VariantType> getVariantTypes() const override - { - return {VariantType::BOOL, VariantType::INT, - VariantType::DOUBLE, VariantType::ARRAY, - VariantType::MAP, VariantType::CARDINALITY, - VariantType::OBJECT, VariantType::STRING}; - } }; /** diff --git a/src/core/parser/ParserScope.cpp b/src/core/parser/ParserScope.cpp index 3929abf..ce3dc94 100644 --- a/src/core/parser/ParserScope.cpp +++ b/src/core/parser/ParserScope.cpp @@ -351,8 +351,7 @@ bool ParserScope::resolveType(const std::string &name, Handle<Node> owner, return resolveType(Utils::split(name, '.'), owner, logger, resultCallback); } -bool ParserScope::resolveValue(Variant &data, Handle<Type> type, - Handle<Node> owner, Logger &logger) +bool ParserScope::resolveValue(Variant &data, Handle<Type> type, Logger &logger) { return type->build( data, logger, @@ -408,7 +407,7 @@ bool ParserScope::resolveTypeWithValue(const std::vector<std::string> &path, [=](Handle<Node> resolved, Handle<Node> owner, Logger &logger) mutable { if (resolved != nullptr) { Rooted<Type> type = resolved.cast<Type>(); - scope.resolveValue(*valuePtr, type, owner, logger); + scope.resolveValue(*valuePtr, type, logger); } // Call the result callback with the type diff --git a/src/core/parser/ParserScope.hpp b/src/core/parser/ParserScope.hpp index 58fc037..185b845 100644 --- a/src/core/parser/ParserScope.hpp +++ b/src/core/parser/ParserScope.hpp @@ -702,13 +702,11 @@ public: * (even in inner structures). The data will be passed to the "build" * function of the given type. * @param type is the Typesystem type the data should be interpreted with. - * @param owner is the node for which the resolution takes place. * @param logger is the logger instance into which resolution problems * should be logged. * @return true if the value was successfully built. */ - bool resolveValue(Variant &data, Handle<Type> type, Handle<Node> owner, - Logger &logger); + bool resolveValue(Variant &data, Handle<Type> type, Logger &logger); /** * Resolves a type and makes sure the corresponding value is of the correct diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp deleted file mode 100644 index 1265851..0000000 --- a/src/core/parser/ParserStack.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <sstream> - -#include <core/common/Utils.hpp> -#include <core/common/Exceptions.hpp> -#include <core/model/Project.hpp> - -#include "ParserScope.hpp" -#include "ParserStack.hpp" - -namespace ousia { - -/* A default handler */ - -/** - * The DefaultHandler class is used in case no element handler is specified in - * the ParserState descriptor. - */ -class DefaultHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override {} - - void end() override {} - - static Handler *create(const HandlerData &handlerData) - { - return new DefaultHandler{handlerData}; - } -}; - -/* Class Handler */ - -void Handler::data(const std::string &data, int field) -{ - if (Utils::hasNonWhitepaceChar(data)) { - logger().error("Expected command but found character data."); - } -} - -/* Class ParserStack */ - -/** - * Returns an Exception that should be thrown when a currently invalid command - * is thrown. - */ -static LoggableException InvalidCommand(const std::string &name, - const std::set<std::string> &expected) -{ - if (expected.empty()) { - return LoggableException{ - std::string{"No nested elements allowed, but got \""} + name + - std::string{"\""}}; - } else { - return LoggableException{ - std::string{"Expected "} + - (expected.size() == 1 ? std::string{"\""} - : std::string{"one of \""}) + - Utils::join(expected, "\", \"") + std::string{"\", but got \""} + - name + std::string{"\""}}; - } -} - -ParserStack::ParserStack( - ParserContext &ctx, - const std::multimap<std::string, const ParserState *> &states) - : ctx(ctx), states(states) -{ -} - -bool ParserStack::deduceState() -{ - // Assemble all states - std::vector<const ParserState *> states; - for (const auto &e : this->states) { - states.push_back(e.second); - } - - // Fetch the type signature of the scope and derive all possible states, - // abort if no unique parser state was found - std::vector<const ParserState *> possibleStates = - ParserStateDeductor(ctx.getScope().getStackTypeSignature(), states) - .deduce(); - if (possibleStates.size() != 1) { - ctx.getLogger().error( - "Error while including file: Cannot deduce parser state."); - return false; - } - - // Switch to this state by creating a dummy handler - const ParserState *state = possibleStates[0]; - Handler *handler = - DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}}); - stack.emplace(handler); - return true; -} - -std::set<std::string> ParserStack::expectedCommands() -{ - const ParserState *currentState = &(this->currentState()); - std::set<std::string> res; - for (const auto &v : states) { - if (v.second->parents.count(currentState)) { - res.insert(v.first); - } - } - return res; -} - -const ParserState &ParserStack::currentState() -{ - return stack.empty() ? ParserStates::None : stack.top()->state(); -} - -std::string ParserStack::currentCommandName() -{ - return stack.empty() ? std::string{} : stack.top()->name(); -} - -const ParserState *ParserStack::findTargetState(const std::string &name) -{ - const ParserState *currentState = &(this->currentState()); - auto range = states.equal_range(name); - for (auto it = range.first; it != range.second; it++) { - const ParserStateSet &parents = it->second->parents; - if (parents.count(currentState) || parents.count(&ParserStates::All)) { - return it->second; - } - } - - return nullptr; -} - -void ParserStack::start(const std::string &name, Variant::mapType &args, - const SourceLocation &location) -{ - ParserState const *targetState = findTargetState(name); -// TODO: Andreas, please improve this. -// if (!Utils::isIdentifier(name)) { -// throw LoggableException(std::string("Invalid identifier \"") + name + -// std::string("\"")); -// } - - if (targetState == nullptr) { - targetState = findTargetState("*"); - } - if (targetState == nullptr) { - throw InvalidCommand(name, expectedCommands()); - } - - // Fetch the associated constructor - HandlerConstructor ctor = targetState->elementHandler - ? targetState->elementHandler - : DefaultHandler::create; - - // Canonicalize the arguments, allow additional arguments - targetState->arguments.validateMap(args, ctx.getLogger(), true); - - // Instantiate the handler and call its start function - Handler *handler = ctor({ctx, name, *targetState, currentState(), location}); - handler->start(args); - stack.emplace(handler); -} - -void ParserStack::start(std::string name, const Variant::mapType &args, - const SourceLocation &location) -{ - Variant::mapType argsCopy(args); - start(name, argsCopy); -} - -void ParserStack::end() -{ - // Check whether the current command could be ended - if (stack.empty()) { - throw LoggableException{"No command to end."}; - } - - // Remove the current HandlerInstance from the stack - std::shared_ptr<Handler> inst{stack.top()}; - stack.pop(); - - // Call the end function of the last Handler - inst->end(); -} - -void ParserStack::data(const std::string &data, int field) -{ - // Check whether there is any command the data can be sent to - if (stack.empty()) { - throw LoggableException{"No command to receive data."}; - } - - // Pass the data to the current Handler instance - stack.top()->data(data, field); -} -} - diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp deleted file mode 100644 index efc4e4a..0000000 --- a/src/core/parser/ParserStack.hpp +++ /dev/null @@ -1,361 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file ParserStack.hpp - * - * Helper classes for document or description parsers. Contains the ParserStack - * class, which is an pushdown automaton responsible for accepting commands in - * the correct order and calling specified handlers. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_PARSER_STACK_HPP_ -#define _OUSIA_PARSER_STACK_HPP_ - -#include <cstdint> - -#include <map> -#include <memory> -#include <set> -#include <stack> -#include <vector> - -#include <core/common/Variant.hpp> -#include <core/common/Logger.hpp> -#include <core/common/Argument.hpp> - -#include "Parser.hpp" -#include "ParserContext.hpp" -#include "ParserState.hpp" - -namespace ousia { - -/** - * Struct collecting all the data that is being passed to a Handler instance. - */ -struct HandlerData { - /** - * Reference to the ParserContext instance that should be used to resolve - * references to nodes in the Graph. - */ - ParserContext &ctx; - - /** - * Contains the name of the tag that is being handled. - */ - const std::string name; - - /** - * Contains the current state of the state machine. - */ - const ParserState &state; - - /** - * Contains the state of the state machine when the parent node was handled. - */ - const ParserState &parentState; - - /** - * Current source code location. - */ - const SourceLocation location; - - /** - * Constructor of the HandlerData class. - * - * @param ctx is the parser context the handler should be executed in. - * @param name is the name of the string. - * @param state is the state this handler was called for. - * @param parentState is the state of the parent command. - * @param location is the location at which the handler is created. - */ - HandlerData(ParserContext &ctx, std::string name, const ParserState &state, - const ParserState &parentState, const SourceLocation location) - : ctx(ctx), - name(std::move(name)), - state(state), - parentState(parentState), - location(location){}; -}; - -/** - * The handler class provides a context for handling an XML tag. It has to be - * overridden and registered in the StateStack class to form handlers for - * concrete XML tags. - */ -class Handler { -private: - /** - * Structure containing the internal handler data. - */ - const HandlerData handlerData; - -public: - /** - * Constructor of the Handler class. - * - * @param data is a structure containing all data being passed to the - * handler. - */ - Handler(const HandlerData &handlerData) : handlerData(handlerData){}; - - /** - * Virtual destructor. - */ - virtual ~Handler(){}; - - /** - * Returns a reference at the ParserContext. - * - * @return a reference at the ParserContext. - */ - ParserContext &context() { return handlerData.ctx; } - - /** - * Returns the command name for which the handler was created. - * - * @return a const reference at the command name. - */ - const std::string &name() { return handlerData.name; } - - /** - * Returns a reference at the ParserScope instance. - * - * @return a reference at the ParserScope instance. - */ - ParserScope &scope() { return handlerData.ctx.getScope(); } - - /** - * Returns a reference at the Manager instance which manages all nodes. - * - * @return a referance at the Manager instance. - */ - Manager &manager() { return handlerData.ctx.getManager(); } - - /** - * Returns a reference at the Logger instance used for logging error - * messages. - * - * @return a reference at the Logger instance. - */ - Logger &logger() { return handlerData.ctx.getLogger(); } - - /** - * Returns a reference at the Project Node, representing the project into - * which the file is currently being parsed. - * - * @return a referance at the Project Node. - */ - Rooted<Project> project() { return handlerData.ctx.getProject(); } - - /** - * Reference at the ParserState descriptor for which this Handler was - * created. - * - * @return a const reference at the constructing ParserState descriptor. - */ - const ParserState &state() { return handlerData.state; } - - /** - * Reference at the ParserState descriptor of the parent state of the state - * for which this Handler was created. Set to ParserStates::None if there - * is no parent state. - * - * @return a const reference at the parent state of the constructing - * ParserState descriptor. - */ - const ParserState &parentState() { return handlerData.parentState; } - - /** - * Returns the current location in the source file. - * - * @return the current location in the source file. - */ - SourceLocation location() { return handlerData.location; } - - /** - * Called when the command that was specified in the constructor is - * instanciated. - * - * @param args is a map from strings to variants (argument name and value). - */ - virtual void start(Variant::mapType &args) = 0; - - /** - * Called whenever the command for which this handler is defined ends. - */ - virtual void end() = 0; - - /** - * Called whenever raw data (int the form of a string) is available for the - * Handler instance. In the default handler an exception is raised if the - * received data contains non-whitespace characters. - * - * @param data is a pointer at the character data that is available for the - * Handler instance. - * @param field is the field number (the interpretation of this value - * depends on the format that is being parsed). - */ - virtual void data(const std::string &data, int field); -}; - -/** - * HandlerConstructor is a function pointer type used to create concrete - * instances of the Handler class. - * - * @param handlerData is the data that should be passed to the new handler - * instance. - * @return a newly created handler instance. - */ -using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); - -/** - * The ParserStack class is a pushdown automaton responsible for turning a - * command stream into a tree of Node instances. - */ -class ParserStack { -private: - /** - * Reference at the parser context. - */ - ParserContext &ctx; - - /** - * Map containing all registered command names and the corresponding - * state descriptors. - */ - const std::multimap<std::string, const ParserState *> &states; - - /** - * Internal stack used for managing the currently active Handler instances. - */ - std::stack<std::shared_ptr<Handler>> stack; - - /** - * Used internally to get all expected command names for the current state. - * This function is used to build error messages. - * - * @return a set of strings containing the names of the expected commands. - */ - std::set<std::string> expectedCommands(); - - /** - * Returns the targetState for a command with the given name that can be - * reached from for the current state. - * - * @param name is the name of the requested command. - * @return nullptr if no target state was found, a pointer at the target - *state - * otherwise. - */ - const ParserState *findTargetState(const std::string &name); - -public: - /** - * Creates a new instance of the ParserStack class. - * - * @param ctx is the parser context the parser stack is working on. - * @param states is a map containing the command names and pointers at the - * corresponding ParserState instances. - */ - ParserStack(ParserContext &ctx, - const std::multimap<std::string, const ParserState *> &states); - - /** - * Tries to reconstruct the parser state from the Scope instance of the - * ParserContext given in the constructor. This functionality is needed for - * including files,as the Parser of the included file needs to be brought to - + an equivalent state as the one in the including file. - * - * @param scope is the ParserScope instance from which the ParserState - * should be reconstructed. - * @param logger is the logger instance to which error messages should be - * written. - * @return true if the operation was sucessful, false otherwise. - */ - bool deduceState(); - - /** - * Returns the state the ParserStack instance currently is in. - * - * @return the state of the currently active Handler instance or STATE_NONE - * if no handler is on the stack. - */ - const ParserState ¤tState(); - - /** - * Returns the command name that is currently being handled. - * - * @return the name of the command currently being handled by the active - * Handler instance or an empty string if no handler is currently active. - */ - std::string currentCommandName(); - - /** - * Function that should be called whenever a new command starts. - * - * @param name is the name of the command. - * @param args is a map from strings to variants (argument name and value). - * Note that the passed map will be modified. - * @param location is the location in the source file at which the command - * starts. - */ - void start(const std::string &name, Variant::mapType &args, - const SourceLocation &location = SourceLocation{}); - - /** - * Function that should be called whenever a new command starts. - * - * @param name is the name of the command. - * @param args is a map from strings to variants (argument name and value). - * @param location is the location in the source file at which the command - * starts. - */ - void start(std::string name, - const Variant::mapType &args = Variant::mapType{}, - const SourceLocation &location = SourceLocation{}); - - /** - * Function called whenever a command ends. - */ - void end(); - - /** - * Function that should be called whenever data is available for the - * command. - * - * @param data is the data that should be passed to the handler. - * @param field is the field number (the interpretation of this value - * depends on the format that is being parsed). - */ - void data(const std::string &data, int field = 0); - - /** - * Returns a reference to the parser context the parser stack is currently - * working on. - * - * @return a reference to the parser context. - */ - ParserContext &getContext() { return ctx; } -}; -} - -#endif /* _OUSIA_PARSER_STACK_HPP_ */ - diff --git a/src/core/parser/generic/GenericParser.cpp b/src/core/parser/generic/GenericParser.cpp deleted file mode 100644 index e69de29..0000000 --- a/src/core/parser/generic/GenericParser.cpp +++ /dev/null diff --git a/src/core/parser/stack/Callbacks.cpp b/src/core/parser/stack/Callbacks.cpp new file mode 100644 index 0000000..6ebc549 --- /dev/null +++ b/src/core/parser/stack/Callbacks.cpp @@ -0,0 +1,23 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "Callbacks.hpp" + +namespace ousia { +} + diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp new file mode 100644 index 0000000..9c61000 --- /dev/null +++ b/src/core/parser/stack/Callbacks.hpp @@ -0,0 +1,99 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Callbacks.hpp + * + * Contains an interface defining the callbacks that can be directed from a + * StateHandler to the StateStack, and from the StateStack to + * the actual parser. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_CALLBACKS_HPP_ +#define _OUSIA_PARSER_STACK_CALLBACKS_HPP_ + +#include <string> + +#include <core/common/Whitespace.hpp> + +namespace ousia { +namespace parser_stack { + +/** + * Interface defining a set of callback functions that act as a basis for the + * StateStackCallbacks and the ParserCallbacks. + */ +class Callbacks { +public: + /** + * Virtual descructor. + */ + virtual ~Callbacks() {}; + + /** + * Sets the whitespace mode that specifies how string data should be + * processed. + * + * @param whitespaceMode specifies one of the three WhitespaceMode constants + * PRESERVE, TRIM or COLLAPSE. + */ + virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0; + + /** + * Registers the given token as token that should be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be reported. + */ + virtual void registerToken(const std::string &token) = 0; + + /** + * Unregisters the given token, it will no longer be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be unregistered. + */ + virtual void unregisterToken(const std::string &token) = 0; +}; + +/** + * Interface defining the callback functions that can be passed from a + * StateStack to the underlying parser. + */ +class ParserCallbacks : public Callbacks { + /** + * Checks whether the given token is supported by the parser. The parser + * returns true, if the token is supported, false if this token cannot be + * registered. Note that parsers that do not support the registration of + * tokens at all should always return "true". + * + * @param token is the token that should be checked for support. + * @return true if the token is generally supported (or the parser does not + * support registering tokens at all), false if the token is not supported, + * because e.g. it is a reserved token or it interferes with other tokens. + */ + virtual bool supportsToken(const std::string &token) = 0; +}; + +} +} + +#endif /* _OUSIA_PARSER_STACK_CALLBACKS_HPP_ */ + diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp index 3647db3..d514701 100644 --- a/src/core/parser/stack/DocumentHandler.cpp +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -16,28 +16,35 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "DocumentHandler.hpp" - #include <algorithm> #include <core/common/RttiBuilder.hpp> #include <core/common/Utils.hpp> +#include <core/common/VariantReader.hpp> #include <core/model/Document.hpp> #include <core/model/Domain.hpp> +#include <core/model/Project.hpp> #include <core/model/Typesystem.hpp> #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "DocumentHandler.hpp" +#include "State.hpp" namespace ousia { +namespace parser_stack { /* DocumentHandler */ -void DocumentHandler::start(Variant::mapType &args) +bool DocumentHandler::start(Variant::mapType &args) { Rooted<Document> document = - project()->createDocument(args["name"].asString()); + context().getProject()->createDocument(args["name"].asString()); document->setLocation(location()); scope().push(document); scope().setFlag(ParserFlag::POST_HEAD, false); + + return true; } void DocumentHandler::end() { scope().pop(); } @@ -48,7 +55,7 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode, std::string &fieldName, DocumentEntity *&parent, bool &inField) { - // check if the parent in the structure tree was an explicit field + // Check if the parent in the structure tree was an explicit field // reference. inField = parentNode->isa(&RttiTypes::DocumentField); if (inField) { @@ -56,10 +63,11 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode, parentNode = scope().selectOrThrow( {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity}); } else { - // if it wasn't an explicit reference, we use the default field. + // If it wasn't an explicit reference, we use the default field. fieldName = DEFAULT_FIELD_NAME; } - // reference the parent entity explicitly. + + // Reference the parent entity explicitly. parent = nullptr; if (parentNode->isa(&RttiTypes::StructuredEntity)) { parent = static_cast<DocumentEntity *>( @@ -70,17 +78,13 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode, } } -static void createPath(const std::string &firstFieldName, - const NodeVector<Node> &path, DocumentEntity *&parent) +static void createPath(const NodeVector<Node> &path, DocumentEntity *&parent, + size_t p0 = 1) { - // add the first element - parent = static_cast<DocumentEntity *>( - parent->createChildStructuredEntity(path[0].cast<StructuredClass>(), - Variant::mapType{}, firstFieldName, - "").get()); - + // TODO (@benjamin): These should be pushed onto the scope and poped once + // the scope is left. Otherwise stuff may not be correclty resolved. size_t S = path.size(); - for (size_t p = 2; p < S; p = p + 2) { + for (size_t p = p0; p < S; p = p + 2) { parent = static_cast<DocumentEntity *>( parent->createChildStructuredEntity( path[p].cast<StructuredClass>(), Variant::mapType{}, @@ -88,18 +92,19 @@ static void createPath(const std::string &firstFieldName, } } -static void createPath(const NodeVector<Node> &path, DocumentEntity *&parent) +static void createPath(const std::string &firstFieldName, + const NodeVector<Node> &path, DocumentEntity *&parent) { - size_t S = path.size(); - for (size_t p = 1; p < S; p = p + 2) { - parent = static_cast<DocumentEntity *>( - parent->createChildStructuredEntity( - path[p].cast<StructuredClass>(), Variant::mapType{}, - path[p - 1]->getName(), "").get()); - } + // Add the first element + parent = static_cast<DocumentEntity *>( + parent->createChildStructuredEntity(path[0].cast<StructuredClass>(), + Variant::mapType{}, firstFieldName, + "").get()); + + createPath(path, parent, 2); } -void DocumentChildHandler::start(Variant::mapType &args) +bool DocumentChildHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); Rooted<Node> parentNode = scope().selectOrThrow( @@ -112,7 +117,7 @@ void DocumentChildHandler::start(Variant::mapType &args) preamble(parentNode, fieldName, parent, inField); - // try to find a FieldDescriptor for the given tag if we are not in a + // Try to find a FieldDescriptor for the given tag if we are not in a // field already. This does _not_ try to construct transparent paths // in between. if (!inField && parent != nullptr && @@ -121,7 +126,7 @@ void DocumentChildHandler::start(Variant::mapType &args) new DocumentField(parentNode->getManager(), name(), parentNode)}; field->setLocation(location()); scope().push(field); - return; + return true; } // Otherwise create a new StructuredEntity @@ -187,27 +192,39 @@ void DocumentChildHandler::start(Variant::mapType &args) } entity->setLocation(location()); scope().push(entity); + return true; } void DocumentChildHandler::end() { scope().pop(); } -std::pair<bool, Variant> DocumentChildHandler::convertData( - Handle<FieldDescriptor> field, Logger &logger, const std::string &data) +bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field, + Variant &data, Logger &logger) { - // if the content is supposed to be of type string, we can finish - // directly. - auto vts = field->getPrimitiveType()->getVariantTypes(); - if (std::find(vts.begin(), vts.end(), VariantType::STRING) != vts.end()) { - return std::make_pair(true, Variant::fromString(data)); + bool valid = true; + Rooted<Type> type = field->getPrimitiveType(); + + // If the content is supposed to be of type string, we only need to check + // for "magic" values -- otherwise just call the "parseGenericString" + // function on the string data + if (type->isa(&RttiTypes::StringType)) { + const std::string &str = data.asString(); + // TODO: Referencing constants with "." separator should also work + if (Utils::isIdentifier(str)) { + data.markAsMagic(); + } + } else { + // Parse the string as generic string, assign the result + auto res = VariantReader::parseGenericString( + data.asString(), logger, data.getLocation().getSourceId(), + data.getLocation().getStart()); + data = res.second; } - // then try to parse the content using the type specification. - auto res = field->getPrimitiveType()->read( - data, logger, location().getSourceId(), location().getStart()); - return res; + // Now try to resolve the value for the primitive type + return valid && scope().resolveValue(data, type, logger); } -void DocumentChildHandler::data(const std::string &data, int fieldIdx) +bool DocumentChildHandler::data(Variant &data) { Rooted<Node> parentNode = scope().selectOrThrow( {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity, @@ -222,11 +239,10 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx) Rooted<Descriptor> desc = strctParent->getDescriptor(); // The parent from which we need to connect to the primitive content. Rooted<Node> parentClass; - /* - * We distinguish two cases here: One for fields that are given. - */ + + // We distinguish two cases here: One for fields that are given. if (inField) { - // retrieve the actual FieldDescriptor + // Retrieve the actual FieldDescriptor Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName); if (field == nullptr) { logger().error( @@ -234,75 +250,102 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx) fieldName + "\" exists in descriptor\"" + desc->getName() + "\".", location()); - return; + return false; } - // if it is a primitive field directly, try to parse the content. + // If it is a primitive field directly, try to parse the content. if (field->isPrimitive()) { - auto res = convertData(field, logger(), data); - // add it as primitive content. - if (res.first) { - strctParent->createChildDocumentPrimitive(res.second, - fieldName); + // Add it as primitive content. + if (!convertData(field, data, logger())) { + return false; } - return; + + strctParent->createChildDocumentPrimitive(data, fieldName); + return true; } - // if it is not primitive we need to connect via transparent elements + // If it is not primitive we need to connect via transparent elements // and default fields. parentClass = field; } else { - // in case of default fields we need to construct via default fields + // In case of default fields we need to construct via default fields // and maybe transparent elements. parentClass = desc; } - /* - * Search through all permitted default fields of the parent class that - * allow primitive content at this point and could be constructed via - * transparent intermediate entities. - * We then try to parse the data using the type specified by the respective - * field. If that does not work we proceed to the next possible field. - */ - // retrieve all default fields at this point. + + // Search through all permitted default fields of the parent class that + // allow primitive content at this point and could be constructed via + // transparent intermediate entities. + + // Retrieve all default fields at this point, either from the field + // descriptor or the structured class NodeVector<FieldDescriptor> defaultFields; if (inField) { defaultFields = parentClass.cast<FieldDescriptor>()->getDefaultFields(); } else { defaultFields = parentClass.cast<StructuredClass>()->getDefaultFields(); } + + // Try to parse the data using the type specified by the respective field. + // If that does not work we proceed to the next possible field. std::vector<LoggerFork> forks; for (auto field : defaultFields) { - // then try to parse the content using the type specification. + // Then try to parse the content using the type specification. forks.emplace_back(logger().fork()); - auto res = convertData(field, forks.back(), data); - if (res.first) { - forks.back().commit(); - // if that worked, construct the necessary path. - if (inField) { - NodeVector<Node> path = - parentClass.cast<FieldDescriptor>()->pathTo(field, - logger()); - createPath(fieldName, path, strctParent); - } else { - auto pathRes = desc->pathTo(field, logger()); - assert(pathRes.second); - createPath(pathRes.first, strctParent); - } - // then create the primitive element. - strctParent->createChildDocumentPrimitive(res.second); - return; + if (!convertData(field, data, forks.back())) { + continue; } + + // The conversion worked, commit any possible warnings + forks.back().commit(); + + // Construct the necessary path + if (inField) { + NodeVector<Node> path = + parentClass.cast<FieldDescriptor>()->pathTo(field, logger()); + createPath(fieldName, path, strctParent); + } else { + auto pathRes = desc->pathTo(field, logger()); + assert(pathRes.second); + createPath(pathRes.first, strctParent); + } + + // Then create the primitive element + strctParent->createChildDocumentPrimitive(data); + return true; } - logger().error("Could not read data with any of the possible fields:"); + + // No field was found that might take the data -- dump the error messages + // from the loggers + logger().error("Could not read data with any of the possible fields:", + SourceLocation{}, MessageMode::NO_CONTEXT); size_t f = 0; for (auto field : defaultFields) { - logger().note(Utils::join(field->path(), ".") + ":", SourceLocation{}, - MessageMode::NO_CONTEXT); + logger().note(std::string("Field ") + Utils::join(field->path(), ".") + + std::string(":"), + SourceLocation{}, MessageMode::NO_CONTEXT); forks[f].commit(); f++; } + return false; +} + +namespace States { +const State Document = StateBuilder() + .parent(&None) + .createdNodeType(&RttiTypes::Document) + .elementHandler(DocumentHandler::create) + .arguments({Argument::String("name", "")}); + +const State DocumentChild = StateBuilder() + .parents({&Document, &DocumentChild}) + .createdNodeTypes({&RttiTypes::StructureNode, + &RttiTypes::AnnotationEntity, + &RttiTypes::DocumentField}) + .elementHandler(DocumentChildHandler::create); +} } namespace RttiTypes { -const Rtti DocumentField = - RttiBuilder<ousia::DocumentField>("DocumentField").parent(&Node); +const Rtti DocumentField = RttiBuilder<ousia::parser_stack::DocumentField>( + "DocumentField").parent(&Node); +} } -}
\ No newline at end of file diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp index cb124aa..b339b96 100644 --- a/src/core/parser/stack/DocumentHandler.hpp +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -19,14 +19,21 @@ /** * @file DocumentHandler.hpp * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + * Contains the Handler instances used for parsing actual documents. This file + * declares to classes: The Document handler which parses the "document" command + * that introduces a new document and the "DocumentChildHandler" which parses + * the actual user defined tags. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) */ -#ifndef _OUSIA_DOCUMENT_HANDLER_HPP_ -#define _OUSIA_DOCUMENT_HANDLER_HPP_ +#ifndef _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ +#define _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> +#include <core/model/Node.hpp> + +#include "Handler.hpp" namespace ousia { @@ -35,51 +42,131 @@ class Rtti; class DocumentEntity; class FieldDescriptor; -class DocumentHandler : public Handler { +namespace parser_stack { +/** + * The DocumentHandler class parses the "document" tag that is used to introduce + * a new document. Note that this tag is not mandatory in osml files -- if the + * first command is not a typesystem, domain or any other declarative command, + * the DocumentHandler will be implicitly called. + */ +class DocumentHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the ImportHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new DocumentHandler{handlerData}; } }; +/** + * Temporary Node that is being pushed onto the ParserScope in order to indicate + * the field the parser is currently in. The name of the Node is stored in the + * "name" field of the parent Node class. + */ class DocumentField : public Node { public: using Node::Node; }; -class DocumentChildHandler : public Handler { +/** + * The DocumentChildHandler class performs the actual parsing of the user + * defined elements in an Ousía document. + */ +class DocumentChildHandler : public StaticHandler { private: + /** + * Code shared by both the start() and the end() method. Checks whether the + * parser currently is in a field and returns the name of this field. + * + * @param parentNode is the next possible parent node (a document, + * a structured entity, an annotation entity or a field). + * @param fieldName is an output parameter to which the name of the current + * field is written (or unchanged if we're not in a field). + * @param parent is an output parameter to which the parent document entity + * will be written. + * @param inField is set to true if we actually are in a field. + */ void preamble(Handle<Node> parentNode, std::string &fieldName, DocumentEntity *&parent, bool &inField); - std::pair<bool, Variant> convertData(Handle<FieldDescriptor> field, - Logger &logger, - const std::string &data); + /** + * Constructs all structured entites along the given path and inserts them + * into the document graph. + * + * @param path is a path containing an alternating series of structured + * classes and fields. + * @pram parent is the root entity from which the process should be started. + */ + void createPath(const NodeVector<Node> &path, DocumentEntity *&parent); + + /** + * Tries to convert the given data to the type that is specified in the + * given primitive field. + * + * @param field is the primitive field for which the data is intended. + * @param data is the is the data that should be converted, the result is + * written into this argument as output variable. + * @param logger is the Logger instance to which error messages should be + * written. Needed to allow the convertData function to write to a forked + * Logger instance. + * @return true if the operation was successful, false otherwise. + */ + bool convertData(Handle<FieldDescriptor> field, Variant &data, + Logger &logger); public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; - - void data(const std::string &data, int fieldIdx) override; - + bool data(Variant &data) override; + + /** + * Creates a new instance of the DocumentChildHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new DocumentChildHandler{handlerData}; } }; +namespace States { +/** + * State constant representing the "document" tag. + */ +extern const State Document; + +/** + * State contstant representing any user-defined element within a document. + */ +extern const State DocumentChild; +} + +} + namespace RttiTypes { +/** + * RttiType for the internally used DocumentField class. + */ extern const Rtti DocumentField; } + } -#endif + +#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */ + diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp index 6571717..a2c8eec 100644 --- a/src/core/parser/stack/DomainHandler.cpp +++ b/src/core/parser/stack/DomainHandler.cpp @@ -16,29 +16,48 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "DomainHandler.hpp" - #include <core/common/RttiBuilder.hpp> +#include <core/model/Document.hpp> #include <core/model/Domain.hpp> +#include <core/model/Project.hpp> #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "DocumentHandler.hpp" +#include "DomainHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" namespace ousia { +namespace parser_stack { /* DomainHandler */ -void DomainHandler::start(Variant::mapType &args) +bool DomainHandler::start(Variant::mapType &args) { - Rooted<Domain> domain = project()->createDomain(args["name"].asString()); + // Create the Domain node + Rooted<Domain> domain = + context().getProject()->createDomain(args["name"].asString()); domain->setLocation(location()); + // If the domain is defined inside a document, add the reference to the + // document + Rooted<Document> document = scope().select<Document>(); + if (document != nullptr) { + document->reference(domain); + } + + // Push the typesystem onto the scope, set the POST_HEAD flag to true scope().push(domain); + scope().setFlag(ParserFlag::POST_HEAD, false); + return true; } void DomainHandler::end() { scope().pop(); } /* DomainStructHandler */ -void DomainStructHandler::start(Variant::mapType &args) +bool DomainStructHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -63,12 +82,13 @@ void DomainStructHandler::start(Variant::mapType &args) } scope().push(structuredClass); + return true; } void DomainStructHandler::end() { scope().pop(); } /* DomainAnnotationHandler */ -void DomainAnnotationHandler::start(Variant::mapType &args) +bool DomainAnnotationHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -79,13 +99,14 @@ void DomainAnnotationHandler::start(Variant::mapType &args) annotationClass->setLocation(location()); scope().push(annotationClass); + return true; } void DomainAnnotationHandler::end() { scope().pop(); } /* DomainAttributesHandler */ -void DomainAttributesHandler::start(Variant::mapType &args) +bool DomainAttributesHandler::start(Variant::mapType &args) { // Fetch the current typesystem and create the struct node Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); @@ -94,13 +115,14 @@ void DomainAttributesHandler::start(Variant::mapType &args) attrDesc->setLocation(location()); scope().push(attrDesc); + return true; } void DomainAttributesHandler::end() { scope().pop(); } /* DomainFieldHandler */ -void DomainFieldHandler::start(Variant::mapType &args) +bool DomainFieldHandler::start(Variant::mapType &args) { FieldDescriptor::FieldType type; if (args["isSubtree"].asBool()) { @@ -116,13 +138,14 @@ void DomainFieldHandler::start(Variant::mapType &args) field->setLocation(location()); scope().push(field); + return true; } void DomainFieldHandler::end() { scope().pop(); } /* DomainFieldRefHandler */ -void DomainFieldRefHandler::start(Variant::mapType &args) +bool DomainFieldRefHandler::start(Variant::mapType &args) { Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); @@ -135,13 +158,14 @@ void DomainFieldRefHandler::start(Variant::mapType &args) field.cast<FieldDescriptor>(), logger); } }); + return true; } void DomainFieldRefHandler::end() {} /* DomainPrimitiveHandler */ -void DomainPrimitiveHandler::start(Variant::mapType &args) +bool DomainPrimitiveHandler::start(Variant::mapType &args) { Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); @@ -167,13 +191,14 @@ void DomainPrimitiveHandler::start(Variant::mapType &args) }); scope().push(field); + return true; } void DomainPrimitiveHandler::end() { scope().pop(); } /* DomainChildHandler */ -void DomainChildHandler::start(Variant::mapType &args) +bool DomainChildHandler::start(Variant::mapType &args) { Rooted<FieldDescriptor> field = scope().selectOrThrow<FieldDescriptor>(); @@ -186,13 +211,12 @@ void DomainChildHandler::start(Variant::mapType &args) child.cast<StructuredClass>()); } }); + return true; } -void DomainChildHandler::end() {} - /* DomainParentHandler */ -void DomainParentHandler::start(Variant::mapType &args) +bool DomainParentHandler::start(Variant::mapType &args) { Rooted<StructuredClass> strct = scope().selectOrThrow<StructuredClass>(); @@ -200,12 +224,14 @@ void DomainParentHandler::start(Variant::mapType &args) new DomainParent(strct->getManager(), args["ref"].asString(), strct)}; parent->setLocation(location()); scope().push(parent); + return true; } void DomainParentHandler::end() { scope().pop(); } /* DomainParentFieldHandler */ -void DomainParentFieldHandler::start(Variant::mapType &args) + +bool DomainParentFieldHandler::start(Variant::mapType &args) { Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>(); FieldDescriptor::FieldType type; @@ -233,13 +259,12 @@ void DomainParentFieldHandler::start(Variant::mapType &args) field->addChild(strct.cast<StructuredClass>()); } }); + return true; } -void DomainParentFieldHandler::end() {} - /* DomainParentFieldRefHandler */ -void DomainParentFieldRefHandler::start(Variant::mapType &args) +bool DomainParentFieldRefHandler::start(Variant::mapType &args) { Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>(); @@ -265,12 +290,104 @@ void DomainParentFieldRefHandler::start(Variant::mapType &args) field->addChild(strct.cast<StructuredClass>()); } }); + return true; } -void DomainParentFieldRefHandler::end() {} +namespace States { +const State Domain = StateBuilder() + .parents({&None, &Document}) + .createdNodeType(&RttiTypes::Domain) + .elementHandler(DomainHandler::create) + .arguments({Argument::String("name")}); + +const State DomainStruct = + StateBuilder() + .parent(&Domain) + .createdNodeType(&RttiTypes::StructuredClass) + .elementHandler(DomainStructHandler::create) + .arguments({Argument::String("name"), + Argument::Cardinality("cardinality", Cardinality::any()), + Argument::Bool("isRoot", false), + Argument::Bool("transparent", false), + Argument::String("isa", "")}); + +const State DomainAnnotation = + StateBuilder() + .parent(&Domain) + .createdNodeType(&RttiTypes::AnnotationClass) + .elementHandler(DomainAnnotationHandler::create) + .arguments({Argument::String("name")}); + +const State DomainAttributes = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::StructType) + .elementHandler(DomainAttributesHandler::create) + .arguments({}); + +const State DomainAttribute = + StateBuilder() + .parent(&DomainAttributes) + .elementHandler(TypesystemStructFieldHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("default", Variant::fromObject(nullptr))}); + +const State DomainField = StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainFieldHandler::create) + .arguments({Argument::String("name", ""), + Argument::Bool("isSubtree", false), + Argument::Bool("optional", false)}); + +const State DomainFieldRef = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainFieldRefHandler::create) + .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); + +const State DomainStructPrimitive = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainPrimitiveHandler::create) + .arguments( + {Argument::String("name", ""), Argument::Bool("isSubtree", false), + Argument::Bool("optional", false), Argument::String("type")}); + +const State DomainStructChild = StateBuilder() + .parent(&DomainField) + .elementHandler(DomainChildHandler::create) + .arguments({Argument::String("ref")}); + +const State DomainStructParent = + StateBuilder() + .parent(&DomainStruct) + .createdNodeType(&RttiTypes::DomainParent) + .elementHandler(DomainParentHandler::create) + .arguments({Argument::String("ref")}); + +const State DomainStructParentField = + StateBuilder() + .parent(&DomainStructParent) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainParentFieldHandler::create) + .arguments({Argument::String("name", ""), + Argument::Bool("isSubtree", false), + Argument::Bool("optional", false)}); + +const State DomainStructParentFieldRef = + StateBuilder() + .parent(&DomainStructParent) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainParentFieldRefHandler::create) + .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); +} +} namespace RttiTypes { -const Rtti DomainParent = - RttiBuilder<ousia::DomainParent>("DomainParent").parent(&Node); +const Rtti DomainParent = RttiBuilder<ousia::parser_stack::DomainParent>( + "DomainParent").parent(&Node); } } diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp index 7398812..76172d6 100644 --- a/src/core/parser/stack/DomainHandler.hpp +++ b/src/core/parser/stack/DomainHandler.hpp @@ -19,26 +19,34 @@ /** * @file DomainHandler.hpp * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + * Contains the Handler classes used for parsing Domain descriptors. This + * includes the "domain" tag and all describing tags below the "domain" tag. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) */ #ifndef _OUSIA_DOMAIN_HANDLER_HPP_ #define _OUSIA_DOMAIN_HANDLER_HPP_ #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> +#include <core/model/Node.hpp> + +#include "Handler.hpp" namespace ousia { // Forward declarations class Rtti; -class DomainHandler : public Handler { -public: - using Handler::Handler; +namespace parser_stack { + +// TODO: Documentation - void start(Variant::mapType &args) override; +class DomainHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -47,12 +55,11 @@ public: } }; -class DomainStructHandler : public Handler { +class DomainStructHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -61,12 +68,11 @@ public: } }; -class DomainAnnotationHandler : public Handler { +class DomainAnnotationHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -75,12 +81,11 @@ public: } }; -class DomainAttributesHandler : public Handler { +class DomainAttributesHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -89,12 +94,11 @@ public: } }; -class DomainFieldHandler : public Handler { +class DomainFieldHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -103,12 +107,11 @@ public: } }; -class DomainFieldRefHandler : public Handler { +class DomainFieldRefHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -117,12 +120,11 @@ public: } }; -class DomainPrimitiveHandler : public Handler { +class DomainPrimitiveHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -131,13 +133,11 @@ public: } }; -class DomainChildHandler : public Handler { +class DomainChildHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; - void end() override; + bool start(Variant::mapType &args) override; static Handler *create(const HandlerData &handlerData) { @@ -150,16 +150,11 @@ public: using Node::Node; }; -namespace RttiTypes { -extern const Rtti DomainParent; -} - -class DomainParentHandler : public Handler { +class DomainParentHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; static Handler *create(const HandlerData &handlerData) @@ -168,13 +163,11 @@ public: } }; -class DomainParentFieldHandler : public Handler { +class DomainParentFieldHandler : public StaticHandler { public: - using Handler::Handler; + using StaticHandler::StaticHandler; - void start(Variant::mapType &args) override; - - void end() override; + bool start(Variant::mapType &args) override; static Handler *create(const HandlerData &handlerData) { @@ -182,18 +175,83 @@ public: } }; -class DomainParentFieldRefHandler : public Handler { +class DomainParentFieldRefHandler : public StaticHandler { public: - using Handler::Handler; + using StaticHandler::StaticHandler; - void start(Variant::mapType &args) override; - - void end() override; + bool start(Variant::mapType &args) override; static Handler *create(const HandlerData &handlerData) { return new DomainParentFieldRefHandler{handlerData}; } }; + +namespace States { +/** + * State representing a "domain" struct. + */ +extern const State Domain; + +/** + * State representing a "struct" tag within a domain description. + */ +extern const State DomainStruct; + +/** + * State representing an "annotation" tag within a domain description. + */ +extern const State DomainAnnotation; + +/** + * State representing an "attributes" tag within a structure or annotation. + */ +extern const State DomainAttributes; + +/** + * State representing an "attribute" tag within the "attributes". + */ +extern const State DomainAttribute; + +/** + * State representing a "field" tag within a structure or annotation. + */ +extern const State DomainField; + +/** + * State representing a "fieldref" tag within a structure or annotation. + */ +extern const State DomainFieldRef; + +/** + * State representing a "primitive" tag within a structure or annotation. + */ +extern const State DomainStructPrimitive; + +/** + * State representing a "child" tag within a structure or annotation. + */ +extern const State DomainStructChild; + +/** + * State representing a "parent" tag within a structure or annotation. + */ +extern const State DomainStructParent; + +/** + * State representing a "field" tag within a "parent" tag. + */ +extern const State DomainStructParentField; + +/** + * State representing a "fieldRef" tag within a "parent" tag. + */ +extern const State DomainStructParentFieldRef; +} +} + +namespace RttiTypes { +extern const Rtti DomainParent; +} } #endif diff --git a/src/core/parser/stack/GenericParserStates.cpp b/src/core/parser/stack/GenericParserStates.cpp new file mode 100644 index 0000000..69a6e0e --- /dev/null +++ b/src/core/parser/stack/GenericParserStates.cpp @@ -0,0 +1,53 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "DocumentHandler.hpp" +#include "DomainHandler.hpp" +#include "GenericParserStates.hpp" +#include "ImportIncludeHandler.hpp" +#include "TypesystemHandler.hpp" + +namespace ousia { +namespace parser_stack { + +const std::multimap<std::string, const State *> GenericParserStates{ + {"document", &States::Document}, + {"*", &States::DocumentChild}, + {"domain", &States::Domain}, + {"struct", &States::DomainStruct}, + {"annotation", &States::DomainAnnotation}, + {"attributes", &States::DomainAttributes}, + {"attribute", &States::DomainAttribute}, + {"field", &States::DomainField}, + {"fieldRef", &States::DomainFieldRef}, + {"primitive", &States::DomainStructPrimitive}, + {"childRef", &States::DomainStructChild}, + {"parentRef", &States::DomainStructParent}, + {"field", &States::DomainStructParentField}, + {"fieldRef", &States::DomainStructParentFieldRef}, + {"typesystem", &States::Typesystem}, + {"enum", &States::TypesystemEnum}, + {"entry", &States::TypesystemEnumEntry}, + {"struct", &States::TypesystemStruct}, + {"field", &States::TypesystemStructField}, + {"constant", &States::TypesystemConstant}, + {"import", &States::Import}, + {"include", &States::Include}}; +} +} + diff --git a/src/core/parser/stack/GenericParserStates.hpp b/src/core/parser/stack/GenericParserStates.hpp new file mode 100644 index 0000000..552eee5 --- /dev/null +++ b/src/core/parser/stack/GenericParserStates.hpp @@ -0,0 +1,49 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file GenericParserStates.hpp + * + * Contains a multimap which maps between tag/command names to the corresponding + * state descriptors. This multimap is used to initialize the push down + * automaton residing inside the "Stack" class. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ +#define _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ + +#include <string> +#include <map> + +namespace ousia { +namespace parser_stack { + +// Forward declarations +class State; + +/** + * Map between tagnames and references to the corresponding State instances. + */ +extern const std::multimap<std::string, const State *> GenericParserStates; +} +} + +#endif /* _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ */ + diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp new file mode 100644 index 0000000..bf5d4ea --- /dev/null +++ b/src/core/parser/stack/Handler.cpp @@ -0,0 +1,254 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/common/Exceptions.hpp> +#include <core/common/Logger.hpp> +#include <core/parser/ParserContext.hpp> + +#include "Callbacks.hpp" +#include "Handler.hpp" +#include "State.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class HandlerData */ + +HandlerData::HandlerData(ParserContext &ctx, /*Callbacks &callbacks,*/ + const std::string &name, const State &state, + const SourceLocation &location) + : ctx(ctx), + /*callbacks(callbacks),*/ + name(name), + state(state), + location(location) +{ +} + +/* Class Handler */ + +Handler::Handler(const HandlerData &handlerData) + : handlerData(handlerData), internalLogger(nullptr) +{ +} + +Handler::~Handler() {} + +ParserContext &Handler::context() { return handlerData.ctx; } + +ParserScope &Handler::scope() { return handlerData.ctx.getScope(); } + +Manager &Handler::manager() { return handlerData.ctx.getManager(); } + +Logger &Handler::logger() +{ + if (internalLogger != nullptr) { + return *internalLogger; + } + return handlerData.ctx.getLogger(); +} + +const SourceLocation &Handler::location() const { return handlerData.location; } + +const std::string &Handler::name() const { return handlerData.name; } + +void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode) +{ + /*handlerData.callbacks.setWhitespaceMode(whitespaceMode);*/ +} + +void Handler::registerToken(const std::string &token) +{ + /*handlerData.callbacks.registerToken(token);*/ +} + +void Handler::unregisterToken(const std::string &token) +{ + /*handlerData.callbacks.unregisterToken(token);*/ +} + +const std::string &Handler::getName() const { return name(); } + +const State &Handler::getState() const { return handlerData.state; } + +void Handler::setLogger(Logger &logger) { internalLogger = &logger; } + +void Handler::resetLogger() { internalLogger = nullptr; } + +const SourceLocation &Handler::getLocation() const { return location(); } + +/* Class EmptyHandler */ + +bool EmptyHandler::start(Variant::mapType &args) +{ + // Just accept anything + return true; +} + +void EmptyHandler::end() +{ + // Do nothing if a command ends +} + +bool EmptyHandler::fieldStart(bool &isDefaultField, size_t fieldIndex) +{ + // Accept any field + return true; +} + +void EmptyHandler::fieldEnd() +{ + // Do not handle fields +} + +bool EmptyHandler::annotationStart(const Variant &className, + Variant::mapType &args) +{ + // Accept any data + return true; +} + +bool EmptyHandler::annotationEnd(const Variant &className, + const Variant &elementName) +{ + // Accept any annotation + return true; +} + +bool EmptyHandler::data(Variant &data) +{ + // Support any data + return true; +} + +Handler *EmptyHandler::create(const HandlerData &handlerData) +{ + return new EmptyHandler(handlerData); +} + +/* Class StaticHandler */ + +bool StaticHandler::start(Variant::mapType &args) +{ + // Do nothing in the default implementation, accept anything + return true; +} + +void StaticHandler::end() +{ + // Do nothing here +} + +bool StaticHandler::fieldStart(bool &isDefault, size_t fieldIdx) +{ + // Return true if either the default field is requested or the field index + // is zero. This simulates that there is exactly one field (a default field) + if (fieldIdx == 0) { + isDefault = true; + return true; + } + return false; +} + +void StaticHandler::fieldEnd() +{ + // Do nothing here +} + +bool StaticHandler::annotationStart(const Variant &className, + Variant::mapType &args) +{ + // No annotations supported + return false; +} + +bool StaticHandler::annotationEnd(const Variant &className, + const Variant &elementName) +{ + // No annotations supported + return false; +} + +bool StaticHandler::data(Variant &data) +{ + logger().error("Did not expect any data here", data); + return false; +} + +/* Class StaticFieldHandler */ + +StaticFieldHandler::StaticFieldHandler(const HandlerData &handlerData, + const std::string &argName) + : StaticHandler(handlerData), argName(argName), handled(false) +{ +} + +bool StaticFieldHandler::start(Variant::mapType &args) +{ + if (!argName.empty()) { + auto it = args.find(argName); + if (it != args.end() && !it->second.toString().empty()) { + handled = true; + doHandle(it->second, args); + return true; + } + } + + this->args = args; + return true; +} + +void StaticFieldHandler::end() +{ + if (!handled) { + if (!argName.empty()) { + logger().error(std::string("Required argument \"") + argName + + std::string("\" is missing."), + location()); + } else { + logger().error("Command requires data, but no data given", + location()); + } + } +} + +bool StaticFieldHandler::data(Variant &data) +{ + // Call the doHandle function if this has not been done before + if (!handled) { + handled = true; + doHandle(data, args); + return true; + } + + // The doHandle function was already called, print an error message + logger().error( + std::string("Found data, but the corresponding argument \"") + argName + + std::string("\" was already specified"), + data); + + // Print the location at which the attribute was originally specified + auto it = args.find(argName); + if (it != args.end()) { + logger().note(std::string("Attribute was specified here:"), it->second); + } + return false; +} +} +} + diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp new file mode 100644 index 0000000..7cda7a4 --- /dev/null +++ b/src/core/parser/stack/Handler.hpp @@ -0,0 +1,421 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_PARSER_STACK_HANDLER_HPP_ +#define _OUSIA_PARSER_STACK_HANDLER_HPP_ + +#include <string> + +#include <core/common/Location.hpp> +#include <core/common/Variant.hpp> +#include <core/common/Whitespace.hpp> + +namespace ousia { + +// Forward declarations +class ParserScope; +class ParserContext; +class Logger; + +namespace parser_stack { + +// More forward declarations +class Callbacks; +class State; + +/** + * Class collecting all the data that is being passed to a Handler + * instance. + */ +class HandlerData { +public: + /** + * Reference to the ParserContext instance that should be used to resolve + * references to nodes in the Graph. + */ + ParserContext &ctx; + + /** + * Reference at an instance of the Callbacks class, used for + * modifying the behaviour of the parser (like registering tokens, setting + * the data type or changing the whitespace handling mode). + */ + // Callbacks &callbacks; + + /** + * Contains the name of the command that is being handled. + */ + std::string name; + + /** + * Contains the current state of the state machine. + */ + const State &state; + + /** + * Current source code location. + */ + SourceLocation location; + + /** + * Constructor of the HandlerData class. + * + * @param ctx is the parser context the handler should be executed in. + * @param callbacks is an instance of Callbacks used to notify + * the parser about certain state changes. + * @param name is the name of the string. + * @param state is the state this handler was called for. + * @param location is the location at which the handler is created. + */ + HandlerData(ParserContext &ctx, + /*Callbacks &callbacks,*/ const std::string &name, + const State &state, const SourceLocation &location); +}; + +/** + * The Handler class provides a context for handling a generic stack element. + * It has to beoverridden and registered in the StateStack class to form + * handlers for concrete XML tags. + */ +class Handler { +private: + /** + * Structure containing the internal handler data. + */ + const HandlerData handlerData; + + /** + * Reference at the current logger. If not nullptr, this will override the + * logger from the ParserContext specified in the handlerData. + */ + Logger *internalLogger; + +protected: + /** + * Constructor of the Handler class. + * + * @param data is a structure containing all data being passed to the + * handler. + */ + Handler(const HandlerData &handlerData); + + /** + * Returns a reference at the ParserContext. + * + * @return a reference at the ParserContext. + */ + ParserContext &context(); + + /** + * Returns a reference at the ParserScope instance. + * + * @return a reference at the ParserScope instance. + */ + ParserScope &scope(); + + /** + * Returns a reference at the Manager instance which manages all nodes. + * + * @return a referance at the Manager instance. + */ + Manager &manager(); + + /** + * Returns a reference at the Logger instance used for logging error + * messages. + * + * @return a reference at the Logger instance. + */ + Logger &logger(); + + /** + * Returns the location of the element in the source file, for which this + * Handler was created. + * + * @return the location of the Handler in the source file. + */ + const SourceLocation &location() const; + + /** + * Returns the command name for which the handler was created. + * + * @return a const reference at the command name. + */ + const std::string &name() const; + +public: + /** + * Virtual destructor. + */ + virtual ~Handler(); + + /** + * Calls the corresponding function in the Callbacks instance. Sets the + * whitespace mode that specifies how string data should be processed. The + * calls to this function are placed on a stack by the underlying Stack + * class. + * + * @param whitespaceMode specifies one of the three WhitespaceMode constants + * PRESERVE, TRIM or COLLAPSE. + */ + void setWhitespaceMode(WhitespaceMode whitespaceMode); + + /** + * Calls the corresponding function in the Callbacks instance. + * Registers the given token as token that should be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be reported. + */ + void registerToken(const std::string &token); + + /** + * Calls the corresponding function in the Callbacks instance. + * Unregisters the given token, it will no longer be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be unregistered. + */ + void unregisterToken(const std::string &token); + + /** + * Returns the command name for which the handler was created. + * + * @return a const reference at the command name. + */ + const std::string &getName() const; + + /** + * Reference at the State descriptor for which this Handler was created. + * + * @return a const reference at the constructing State descriptor. + */ + const State &getState() const; + + /** + * Sets the internal logger to the given logger instance. + * + * @param logger is the Logger instance to which the logger should be set. + */ + void setLogger(Logger &logger); + + /** + * Resets the logger instance to the logger instance provided in the + * ParserContext. + */ + void resetLogger(); + + /** + * Returns the location of the element in the source file, for which this + * Handler was created. + * + * @return the location of the Handler in the source file. + */ + const SourceLocation &getLocation() const; + + /** + * Called when the command that was specified in the constructor is + * instanciated. + * + * @param args is a map from strings to variants (argument name and value). + * @return true if the handler was successful in starting the element it + * represents, false otherwise. + */ + virtual bool start(Variant::mapType &args) = 0; + + /** + * Called before the command for which this handler is defined ends (is + * forever removed from the stack). + */ + virtual void end() = 0; + + /** + * Called when a new field starts, while the handler is active. This + * function should return true if the field is supported, false otherwise. + * No error should be logged if the field cannot be started, the caller will + * take care of that (since it is always valid to start a default field, + * even though the corresponding structure does not have a field, as long as + * no data is fed into the field). + * + * @param isDefault is set to true if the field that is being started is the + * default/tree field. The handler should set the value of this variable to + * true if the referenced field is indeed the default field. + * @param fieldIdx is the numerical index of the field. + */ + virtual bool fieldStart(bool &isDefault, size_t fieldIdx) = 0; + + /** + * Called when a previously opened field ends, while the handler is active. + * Note that a "fieldStart" and "fieldEnd" are always called alternately. + */ + virtual void fieldEnd() = 0; + + /** + * Called whenever an annotation starts while this handler is active. The + * function should return true if starting the annotation was successful, + * false otherwise. + * + * @param className is a string variant containing the name of the + * annotation class and the location of the name in the source code. + * @param args is a map from strings to variants (argument name and value). + * @return true if the mentioned annotation could be started here, false + * if an error occurred. + */ + virtual bool annotationStart(const Variant &className, + Variant::mapType &args) = 0; + + /** + * Called whenever an annotation ends while this handler is active. The + * function should return true if ending the annotation was successful, + * false otherwise. + * + * @param className is a string variant containing the name of the + * annotation class and the location of the class name in the source code. + * @param elementName is a string variant containing the name of the + * annotation class and the location of the element name in the source code. + * @return true if the mentioned annotation could be started here, false if + * an error occurred. + */ + virtual bool annotationEnd(const Variant &className, + const Variant &elementName) = 0; + + /** + * Called whenever raw data (int the form of a string) is available for the + * Handler instance. Should return true if the data could be handled, false + * otherwise. + * + * @param data is a string variant containing the character data and its + * location. + * @return true if the data could be handled, false otherwise. + */ + virtual bool data(Variant &data) = 0; +}; + +/** + * HandlerConstructor is a function pointer type used to create concrete + * instances of the Handler class. + * + * @param handlerData is the data that should be passed to the new handler + * instance. + * @return a newly created handler instance. + */ +using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); + +/** + * The EmptyHandler class is used in case no element handler is specified in + * the State descriptor. It just accepts all data and does nothing. + */ +class EmptyHandler : public Handler { +protected: + using Handler::Handler; + +public: + bool start(Variant::mapType &args) override; + void end() override; + bool fieldStart(bool &isDefault, size_t fieldIdx) override; + void fieldEnd() override; + bool annotationStart(const Variant &className, + Variant::mapType &args) override; + bool annotationEnd(const Variant &className, + const Variant &elementName) override; + bool data(Variant &data) override; + + /** + * Creates an instance of the EmptyHandler class. + */ + static Handler *create(const HandlerData &handlerData); +}; + +/** + * The StaticHandler class is used to handle predifined commands which do + * neither support annotations, nor multiple fields. Child classes can decide + * whether a single data field should be used. + */ +class StaticHandler : public Handler { +protected: + using Handler::Handler; + +public: + bool start(Variant::mapType &args) override; + void end() override; + bool fieldStart(bool &isDefault, size_t fieldIdx) override; + void fieldEnd() override; + bool annotationStart(const Variant &className, + Variant::mapType &args) override; + bool annotationEnd(const Variant &className, + const Variant &elementName) override; + bool data(Variant &data) override; +}; + +/** + * The StaticFieldHandler class is used to handle predifined commands which do + * neither support annotations, nor multiple fields. Additionally, it captures a + * data entry from a single default field. + */ +class StaticFieldHandler : public StaticHandler { +private: + /** + * Set to the name of the data argument that should be used instead of the + * data field, if no data field is given. + */ + std::string argName; + + /** + * Set to true, once the "doHandle" function has been called. + */ + bool handled; + + /** + * Map containing the arguments given in the start function. + */ + Variant::mapType args; + +protected: + /** + * Constructor of the StaticFieldHandler class. + * + * @param handlerData is a structure containing the internal data that + * should be stored inside the handler. + * @param name of the data argument that -- if present -- should be used + * instead of the data field. If empty, data is not captured from the + * arguments. If both, data in the data field and the argument, are given, + * this results in an error. + */ + StaticFieldHandler(const HandlerData &handlerData, + const std::string &argName); + + /** + * Function that should be overriden in order to handle the field data and + * the other arguments. This function is not called if no data was given. + * + * @param fieldData is the captured field data. + * @param args are the arguments that were given in the "start" function. + */ + virtual void doHandle(const Variant &fieldData, + Variant::mapType &args) = 0; + +public: + bool start(Variant::mapType &args) override; + void end() override; + bool data(Variant &data) override; +}; +} +} + +#endif /* _OUSIA_PARSER_STACK_HANDLER_HPP_ */ + diff --git a/src/core/parser/stack/ImportIncludeHandler.cpp b/src/core/parser/stack/ImportIncludeHandler.cpp index 94ee82d..d1ea97d 100644 --- a/src/core/parser/stack/ImportIncludeHandler.cpp +++ b/src/core/parser/stack/ImportIncludeHandler.cpp @@ -16,50 +16,22 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "ImportIncludeHandler.hpp" - +#include <core/model/RootNode.hpp> #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> -namespace ousia { - -/* ImportIncludeHandler */ - -void ImportIncludeHandler::start(Variant::mapType &args) -{ - rel = args["rel"].asString(); - type = args["type"].asString(); - src = args["src"].asString(); - srcInArgs = !src.empty(); -} +#include "DomainHandler.hpp" +#include "DocumentHandler.hpp" +#include "ImportIncludeHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" -void ImportIncludeHandler::data(const std::string &data, int field) -{ - if (srcInArgs) { - logger().error("\"src\" attribute has already been set"); - return; - } - if (field != 0) { - logger().error("Command has only one field."); - return; - } - src.append(data); -} +namespace ousia { +namespace parser_stack { /* ImportHandler */ -void ImportHandler::start(Variant::mapType &args) -{ - ImportIncludeHandler::start(args); - - // Make sure imports are still possible - if (scope().getFlag(ParserFlag::POST_HEAD)) { - logger().error("Imports must be listed before other commands.", - location()); - return; - } -} - -void ImportHandler::end() +void ImportHandler::doHandle(const Variant &fieldData, Variant::mapType &args) { // Fetch the last node and check whether an import is valid at this // position @@ -75,8 +47,9 @@ void ImportHandler::end() // Perform the actual import, register the imported node within the leaf // node - Rooted<Node> imported = - context().import(src, type, rel, leafRootNode->getReferenceTypes()); + Rooted<Node> imported = context().import( + fieldData.asString(), args["type"].asString(), args["rel"].asString(), + leafRootNode->getReferenceTypes()); if (imported != nullptr) { leafRootNode->reference(imported); } @@ -84,13 +57,26 @@ void ImportHandler::end() /* IncludeHandler */ -void IncludeHandler::start(Variant::mapType &args) +void IncludeHandler::doHandle(const Variant &fieldData, Variant::mapType &args) { - ImportIncludeHandler::start(args); + context().include(fieldData.asString(), args["type"].asString(), + args["rel"].asString(), {&RttiTypes::Node}); } -void IncludeHandler::end() -{ - context().include(src, type, rel, {&RttiTypes::Node}); +namespace States { +const State Import = + StateBuilder() + .parents({&Document, &Typesystem, &Domain}) + .elementHandler(ImportHandler::create) + .arguments({Argument::String("rel", ""), Argument::String("type", ""), + Argument::String("src", "")}); + +const State Include = + StateBuilder() + .parent(&All) + .elementHandler(IncludeHandler::create) + .arguments({Argument::String("rel", ""), Argument::String("type", ""), + Argument::String("src", "")}); +} } } diff --git a/src/core/parser/stack/ImportIncludeHandler.hpp b/src/core/parser/stack/ImportIncludeHandler.hpp index b0767be..6168639 100644 --- a/src/core/parser/stack/ImportIncludeHandler.hpp +++ b/src/core/parser/stack/ImportIncludeHandler.hpp @@ -19,6 +19,9 @@ /** * @file ImportIncludeHandler.hpp * + * Contains the conceptually similar handlers for the "include" and "import" + * commands. + * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ @@ -26,51 +29,78 @@ #define _OUSIA_IMPORT_INCLUDE_HANDLER_HPP_ #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> - -namespace ousia { -class ImportIncludeHandler : public Handler { -protected: - bool srcInArgs = false; - std::string rel; - std::string type; - std::string src; +#include "Handler.hpp" -public: - using Handler::Handler; - - void start(Variant::mapType &args) override; - - void data(const std::string &data, int field) override; -}; +namespace ousia { +namespace parser_stack { -class ImportHandler : public ImportIncludeHandler { +/** + * The ImportHandler is responsible for handling the "import" command. An import + * creates a reference to a specified file. The specified file is parsed (if + * this has not already been done) outside of the context of the current file. + * If the specified resource has already been parsed, a reference to the already + * parsed file is inserted. Imports are only possible before no other content + * has been parsed. + */ +class ImportHandler : public StaticFieldHandler { public: - using ImportIncludeHandler::ImportIncludeHandler; - - void start(Variant::mapType &args) override; - - void end() override; - + using StaticFieldHandler::StaticFieldHandler; + + void doHandle(const Variant &fieldData, + Variant::mapType &args) override; + + /** + * Creates a new instance of the ImportHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { - return new ImportHandler{handlerData}; + return new ImportHandler{handlerData, "src"}; } }; -class IncludeHandler : public ImportIncludeHandler { +/** + * The IncludeHandler is responsible for handling the "include" command. The + * included file is parsed in the context of the current file and will change + * the content that is currently being parsed. Includes are possible at (almost) + * any position in the source file. + */ +class IncludeHandler : public StaticFieldHandler { public: - using ImportIncludeHandler::ImportIncludeHandler; - - void start(Variant::mapType &args) override; - - void end() override; - + using StaticFieldHandler::StaticFieldHandler; + + void doHandle(const Variant &fieldData, + Variant::mapType &args) override; + + /** + * Creates a new instance of the IncludeHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { - return new IncludeHandler{handlerData}; + return new IncludeHandler{handlerData, "src"}; } }; + +namespace States { +/** + * State representing the "import" command. + */ +extern const State Import; + +/** + * State representing the "include" command. + */ +extern const State Include; +} + +} } #endif diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp new file mode 100644 index 0000000..47f7d2c --- /dev/null +++ b/src/core/parser/stack/Stack.cpp @@ -0,0 +1,550 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include <core/common/Logger.hpp> +#include <core/common/Utils.hpp> +#include <core/common/Exceptions.hpp> +#include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "Handler.hpp" +#include "Stack.hpp" +#include "State.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class HandlerInfo */ + +HandlerInfo::HandlerInfo() : HandlerInfo(nullptr) {} + +HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler) + : handler(handler), + fieldIdx(0), + valid(true), + implicit(false), + inField(false), + inDefaultField(false), + inImplicitDefaultField(false), + inValidField(false), + hadDefaultField(false) +{ +} + +HandlerInfo::HandlerInfo(bool valid, bool implicit, bool inField, + bool inDefaultField, bool inImplicitDefaultField, + bool inValidField) + : handler(nullptr), + fieldIdx(0), + valid(valid), + implicit(implicit), + inField(inField), + inDefaultField(inDefaultField), + inImplicitDefaultField(inImplicitDefaultField), + inValidField(inValidField), + hadDefaultField(false) +{ +} + +HandlerInfo::~HandlerInfo() +{ + // Do nothing +} + +void HandlerInfo::fieldStart(bool isDefault, bool isImplicit, bool isValid) +{ + inField = true; + inDefaultField = isDefault || isImplicit; + inImplicitDefaultField = isImplicit; + inValidField = isValid; + hadDefaultField = hadDefaultField || inDefaultField; + fieldIdx++; +} + +void HandlerInfo::fieldEnd() +{ + inField = false; + inDefaultField = false; + inImplicitDefaultField = false; + inValidField = false; +} + +/** + * Stub instance of HandlerInfo containing no handler information. + */ +static HandlerInfo EmptyHandlerInfo{true, true, true, true, false, true}; + +/* Helper functions */ + +/** + * Returns an Exception that should be thrown when a currently invalid command + * is thrown. + * + * @param name is the name of the command for which no state transition is + * found. + * @param expected is a set containing the names of the expected commands. + */ +static LoggableException buildInvalidCommandException( + const std::string &name, const std::set<std::string> &expected) +{ + if (expected.empty()) { + return LoggableException{ + std::string{"No nested elements allowed, but got \""} + name + + std::string{"\""}}; + } else { + return LoggableException{ + std::string{"Expected "} + + (expected.size() == 1 ? std::string{"\""} + : std::string{"one of \""}) + + Utils::join(expected, "\", \"") + std::string{"\", but got \""} + + name + std::string{"\""}}; + } +} + +/* Class Stack */ + +Stack::Stack(ParserContext &ctx, + const std::multimap<std::string, const State *> &states) + : ctx(ctx), states(states) +{ + // If the scope instance is not empty we need to deduce the current parser + // state + if (!ctx.getScope().isEmpty()) { + deduceState(); + } +} + +Stack::~Stack() +{ + while (!stack.empty()) { + // Fetch the topmost stack element + HandlerInfo &info = currentInfo(); + + // It is an error if we're still in a field of an element while the + // Stack instance is destroyed. Log that + if (handlersValid()) { + if (info.inField && !info.implicit && + !info.inImplicitDefaultField) { + logger().error( + std::string("Reached end of stream, but command \"") + + info.handler->getName() + + "\" has not ended yet. Command was started here:", + info.handler->getLocation()); + } + } + + // Remove the command from the stack + endCurrentHandler(); + } +} + +void Stack::deduceState() +{ + // Assemble all states + std::vector<const State *> states; + for (const auto &e : this->states) { + states.push_back(e.second); + } + + // Fetch the type signature of the scope and derive all possible states, + // abort if no unique parser state was found + std::vector<const State *> possibleStates = + StateDeductor(ctx.getScope().getStackTypeSignature(), states).deduce(); + if (possibleStates.size() != 1U) { + throw LoggableException( + "Error while including file: Cannot deduce parser state."); + } + + // Switch to this state by creating a handler, but do not call its start + // function + const State &state = *possibleStates[0]; + HandlerConstructor ctor = + state.elementHandler ? state.elementHandler : EmptyHandler::create; + + std::shared_ptr<Handler> handler = + std::shared_ptr<Handler>{ctor({ctx, "", state, SourceLocation{}})}; + stack.emplace_back(handler); + + // Set the correct flags for this implicit handler + HandlerInfo &info = currentInfo(); + info.implicit = true; + info.fieldStart(true, false, true); +} + +std::set<std::string> Stack::expectedCommands() +{ + const State *currentState = &(this->currentState()); + std::set<std::string> res; + for (const auto &v : states) { + if (v.second->parents.count(currentState)) { + res.insert(v.first); + } + } + return res; +} + +const State &Stack::currentState() +{ + return stack.empty() ? States::None : stack.back().handler->getState(); +} + +std::string Stack::currentCommandName() +{ + return stack.empty() ? std::string{} : stack.back().handler->getName(); +} + +const State *Stack::findTargetState(const std::string &name) +{ + const State *currentState = &(this->currentState()); + auto range = states.equal_range(name); + for (auto it = range.first; it != range.second; it++) { + const StateSet &parents = it->second->parents; + if (parents.count(currentState) || parents.count(&States::All)) { + return it->second; + } + } + + return nullptr; +} + +const State *Stack::findTargetStateOrWildcard(const std::string &name) +{ + // Try to find the target state with the given name, if none is found, try + // find a matching "*" state. + State const *targetState = findTargetState(name); + if (targetState == nullptr) { + return findTargetState("*"); + } + return targetState; +} + +HandlerInfo &Stack::currentInfo() +{ + return stack.empty() ? EmptyHandlerInfo : stack.back(); +} +HandlerInfo &Stack::lastInfo() +{ + return stack.size() < 2U ? EmptyHandlerInfo : stack[stack.size() - 2]; +} + +void Stack::endCurrentHandler() +{ + if (!stack.empty()) { + // Fetch the handler info for the current top-level element + HandlerInfo &info = stack.back(); + + // Do not call any callback functions while the stack is marked as + // invalid or this is an elment marked as "implicit" + if (!info.implicit && handlersValid()) { + // Make sure the fieldEnd handler is called if the element still + // is in a field + if (info.inField) { + info.handler->fieldEnd(); + info.fieldEnd(); + } + + // Call the "end" function of the corresponding Handler instance + info.handler->end(); + } + + // Remove the element from the stack + stack.pop_back(); + } +} + +bool Stack::ensureHandlerIsInField() +{ + // If the current handler is not in a field (and actually has a handler) + // try to start a default field + HandlerInfo &info = currentInfo(); + if (!info.inField && info.handler != nullptr) { + // Abort if the element already had a default field + if (info.hadDefaultField) { + return false; + } + + // Try to start a new default field, abort if this did not work + bool isDefault = true; + if (!info.handler->fieldStart(isDefault, info.fieldIdx)) { + info.handler->fieldEnd(); + endCurrentHandler(); + return false; + } + + // Mark the field as started + info.fieldStart(true, true, true); + } + return true; +} + +bool Stack::handlersValid() +{ + for (auto it = stack.crbegin(); it != stack.crend(); it++) { + if (!it->valid) { + return false; + } + } + return true; +} + +Logger &Stack::logger() { return ctx.getLogger(); } + +void Stack::command(const Variant &name, const Variant::mapType &args) +{ + // Make sure the given identifier is valid (preventing "*" from being + // malicously passed to this function) + if (!Utils::isNamespacedIdentifier(name.asString())) { + throw LoggableException(std::string("Invalid identifier \"") + + name.asString() + std::string("\""), + name); + } + + while (true) { + // Try to find a target state for the given command, if none can be + // found and the current command does not have an open field, then try + // to create an empty default field, otherwise this is an exception + const State *targetState = findTargetStateOrWildcard(name.asString()); + if (targetState == nullptr) { + if (!currentInfo().inField) { + endCurrentHandler(); + continue; + } else { + throw buildInvalidCommandException(name.asString(), + expectedCommands()); + } + } + + // Make sure we're currently inside a field + if (!ensureHandlerIsInField()) { + endCurrentHandler(); + continue; + } + + // Fork the logger. We do not want any validation errors to skip + LoggerFork loggerFork = logger().fork(); + + // Instantiate the handler and push it onto the stack + HandlerConstructor ctor = targetState->elementHandler + ? targetState->elementHandler + : EmptyHandler::create; + std::shared_ptr<Handler> handler{ + ctor({ctx, name.asString(), *targetState, name.getLocation()})}; + stack.emplace_back(handler); + + // Fetch the HandlerInfo for the parent element and the current element + HandlerInfo &parentInfo = lastInfo(); + HandlerInfo &info = currentInfo(); + + // Call the "start" method of the handler, store the result of the start + // method as the validity of the handler -- do not call the start method + // if the stack is currently invalid (as this may cause further, + // unwanted errors) + bool validStack = handlersValid(); + info.valid = false; + if (validStack) { + // Canonicalize the arguments (if this has not already been done), + // allow additional arguments + Variant::mapType canonicalArgs = args; + targetState->arguments.validateMap(canonicalArgs, loggerFork, true); + + handler->setLogger(loggerFork); + try { + info.valid = handler->start(canonicalArgs); + } + catch (LoggableException ex) { + loggerFork.log(ex); + } + handler->resetLogger(); + } + + // We started the command within an implicit default field and it is not + // valid -- remove both the new handler and the parent field from the + // stack + if (!info.valid && parentInfo.inImplicitDefaultField) { + endCurrentHandler(); + endCurrentHandler(); + continue; + } + + // If we ended up here, starting the command may or may not have worked, + // but after all, we cannot unroll the stack any further. Update the + // "valid" flag, commit any potential error messages and return. + info.valid = parentInfo.valid && info.valid; + loggerFork.commit(); + return; + } +} + +void Stack::data(const Variant &data) +{ + while (true) { + // Check whether there is any command the data can be sent to + if (stack.empty()) { + throw LoggableException("No command here to receive data."); + } + + // Fetch the current command handler information + HandlerInfo &info = currentInfo(); + + // Make sure the current handler has an open field + if (!ensureHandlerIsInField()) { + endCurrentHandler(); + continue; + } + + // If this field should not get any data, log an error and do not call + // the "data" handler + if (!info.inValidField) { + logger().error("Did not expect any data here", data); + } + + if (handlersValid() && info.inValidField) { + // Fork the logger and set it as temporary logger for the "start" + // method. We only want to keep error messages if this was not a try + // to implicitly open a default field. + LoggerFork loggerFork = logger().fork(); + info.handler->setLogger(loggerFork); + + // Pass the data to the current Handler instance + bool valid = false; + try { + Variant dataCopy = data; + valid = info.handler->data(dataCopy); + } + catch (LoggableException ex) { + loggerFork.log(ex); + } + + // Reset the logger instance as soon as possible + info.handler->resetLogger(); + + // If placing the data here failed and we're currently in an + // implicitly opened field, just unroll the stack to the next field + // and try again + if (!valid && info.inImplicitDefaultField) { + endCurrentHandler(); + continue; + } + + // Commit the content of the logger fork. Do not change the valid + // flag. + loggerFork.commit(); + } + + // There was no reason to unroll the stack any further, so continue + return; + } +} + +void Stack::fieldStart(bool isDefault) +{ + // Make sure the current handler stack is not empty + if (stack.empty()) { + throw LoggableException( + "No command for which a field could be started"); + } + + // Fetch the information attached to the current handler + HandlerInfo &info = currentInfo(); + if (info.inField) { + logger().error( + "Got field start, but there is no command for which to start the " + "field."); + return; + } + + // Copy the isDefault flag to a local variable, the fieldStart method will + // write into this variable + bool defaultField = isDefault; + + // Do not call the "fieldStart" function if we're in an invalid subtree + bool valid = false; + if (handlersValid()) { + try { + valid = info.handler->fieldStart(defaultField, info.fieldIdx); + } + catch (LoggableException ex) { + logger().log(ex); + } + if (!valid && !defaultField) { + logger().error( + std::string("Cannot start a new field here (index ") + + std::to_string(info.fieldIdx + 1) + + std::string("), field does not exist")); + } + } + + // Mark the field as started + info.fieldStart(defaultField, false, valid); +} + +void Stack::fieldEnd() +{ + // Make sure the current handler stack is not empty + if (stack.empty()) { + throw LoggableException("No command for which a field could be ended"); + } + + // Fetch the information attached to the current handler + HandlerInfo &info = currentInfo(); + if (!info.inField) { + logger().error( + "Got field end, but there is no command for which to end the " + "field."); + return; + } + + // Only continue if the current handler stack is in a valid state, do not + // call the fieldEnd function if something went wrong before + if (handlersValid()) { + try { + info.handler->fieldEnd(); + } + catch (LoggableException ex) { + logger().log(ex); + } + } + + // This command no longer is in a field + info.fieldEnd(); + + // As soon as this command had a default field, remove it from the stack + if (info.hadDefaultField) { + endCurrentHandler(); + } +} + +void Stack::annotationStart(const Variant &className, const Variant &args) +{ + // TODO +} + +void Stack::annotationEnd(const Variant &className, const Variant &elementName) +{ + // TODO +} + +void Stack::token(Variant token) +{ + // TODO +} +} +} + diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp new file mode 100644 index 0000000..76eefd9 --- /dev/null +++ b/src/core/parser/stack/Stack.hpp @@ -0,0 +1,341 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Stack.hpp + * + * Helper classes for document or description parsers. Contains the + * Stack class, which is an pushdown automaton responsible for + * accepting commands in the correct order and calling specified handlers. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_STACK_HPP_ +#define _OUSIA_PARSER_STACK_STACK_HPP_ + +#include <cstdint> + +#include <map> +#include <memory> +#include <set> +#include <vector> + +#include <core/common/Variant.hpp> +#include <core/parser/Parser.hpp> + +namespace ousia { + +// Forward declarations +class ParserContext; +class Logger; + +namespace parser_stack { + +// Forward declarations +class Handler; +class State; + +/** + * The HandlerInfo class is used internally by the stack to associate additional + * (mutable) data with a handler instance. + */ +class HandlerInfo { +public: + /** + * Pointer pointing at the actual handler instance. + */ + std::shared_ptr<Handler> handler; + + /** + * Next field index to be passed to the "fieldStart" function of the Handler + * class. + */ + size_t fieldIdx; + + /** + * Set to true if the handler is valid (which is the case if the "start" + * method has returned true). If the handler is invalid, no more calls are + * directed at it until it can be removed from the stack. + */ + bool valid : 1; + + /** + * Set to true if this is an implicit handler, that was created when the + * current stack state was deduced. + */ + bool implicit : 1; + + /** + * Set to true if the handler currently is in a field. + */ + bool inField : 1; + + /** + * Set to true if the handler currently is in the default field. + */ + bool inDefaultField : 1; + + /** + * Set to true if the handler currently is in an implicitly started default + * field. + */ + bool inImplicitDefaultField : 1; + + /** + * Set to false if this field is only opened pro-forma and does not accept + * any data. Otherwise set to true. + */ + bool inValidField : 1; + + /** + * Set to true, if the default field was already started. + */ + bool hadDefaultField : 1; + + /** + * Default constructor of the HandlerInfo class. + */ + HandlerInfo(); + /** + * Constructor of the HandlerInfo class, allows to set all flags manually. + */ + HandlerInfo(bool valid, bool implicit, bool inField, bool inDefaultField, + bool inImplicitDefaultField, bool inValidField); + + /** + * Constructor of the HandlerInfo class, taking a shared_ptr to the handler + * to which additional information should be attached. + */ + HandlerInfo(std::shared_ptr<Handler> handler); + + /** + * Destructor of the HandlerInfo class (to allow Handler to be forward + * declared). + */ + ~HandlerInfo(); + + /** + * Updates the "field" flags according to a "fieldStart" event. + */ + void fieldStart(bool isDefault, bool isImplicit, bool isValid); + + /** + * Updates the "fields" flags according to a "fieldEnd" event. + */ + void fieldEnd(); +}; + +/** + * The Stack class is a pushdown automaton responsible for turning a command + * stream into a tree of Node instances. It does so by following a state + * transition graph and creating a set of Handler instances, which are placed + * on the stack. + */ +class Stack { +private: + /** + * Reference at the parser context. + */ + ParserContext &ctx; + + /** + * Map containing all registered command names and the corresponding + * state descriptors. + */ + const std::multimap<std::string, const State *> &states; + + /** + * Internal stack used for managing the currently active Handler instances. + */ + std::vector<HandlerInfo> stack; + + /** + * Return the reference in the Logger instance stored within the context. + */ + Logger &logger(); + + /** + * Used internally to get all expected command names for the current state. + * This function is used to build error messages. + * + * @return a set of strings containing the names of the expected commands. + */ + std::set<std::string> expectedCommands(); + + /** + * Returns the targetState for a command with the given name that can be + * reached from the current state. + * + * @param name is the name of the requested command. + * @return nullptr if no target state was found, a pointer at the target + * state otherwise. + */ + const State *findTargetState(const std::string &name); + + /** + * Returns the targetState for a command with the given name that can be + * reached from the current state, also including the wildcard "*" state. + * Throws an exception if the given target state is not a valid identifier. + * + * @param name is the name of the requested command. + * @return nullptr if no target state was found, a pointer at the target + * state otherwise. + */ + const State *findTargetStateOrWildcard(const std::string &name); + + /** + * Tries to reconstruct the parser state from the Scope instance of the + * ParserContext given in the constructor. This functionality is needed for + * including files,as the Parser of the included file needs to be brought to + * an equivalent state as the one in the including file. + */ + void deduceState(); + + /** + * Returns a reference at the current HandlerInfo instance (or a stub + * HandlerInfo instance if the stack is empty). + */ + HandlerInfo ¤tInfo(); + + /** + * Returns a reference at the last HandlerInfo instance (or a stub + * HandlerInfo instance if the stack has only one element). + */ + HandlerInfo &lastInfo(); + + /** + * Ends the current handler and removes the corresponding element from the + * stack. + */ + void endCurrentHandler(); + + /** + * Tries to start a default field for the current handler, if currently the + * handler is not inside a field and did not have a default field yet. + * + * @return true if the handler is inside a field, false if no field could + * be started. + */ + bool ensureHandlerIsInField(); + + /** + * Returns true if all handlers on the stack are currently valid, or false + * if at least one handler is invalid. + * + * @return true if all handlers on the stack are valid. + */ + bool handlersValid(); + +public: + /** + * Creates a new instance of the Stack class. + * + * @param ctx is the parser context the parser stack is working on. + * @param states is a map containing the command names and pointers at the + * corresponding State instances. + */ + Stack(ParserContext &ctx, + const std::multimap<std::string, const State *> &states); + + /** + * Destructor of the Stack class. + */ + ~Stack(); + + /** + * Returns the state the Stack instance currently is in. + * + * @return the state of the currently active Handler instance or STATE_NONE + * if no handler is on the stack. + */ + const State ¤tState(); + + /** + * Returns the command name that is currently being handled. + * + * @return the name of the command currently being handled by the active + * Handler instance or an empty string if no handler is currently active. + */ + std::string currentCommandName(); + + /** + * Function that should be called whenever a new command is reached. + * + * @param name is the name of the command (including the namespace + * separator ':') and its corresponding location. Must be a string variant. + * @param args is a map containing the arguments that were passed to the + * command. + */ + void command(const Variant &name, const Variant::mapType &args); + + /** + * Function that shuold be called whenever character data is found in the + * input stream. May only be called if the currently is a command on the + * stack. + * + * @param data is a string variant containing the data that has been found. + */ + void data(const Variant &data); + + /** + * Function that should be called whenever a new field starts. Fields of the + * same command may not be separated by calls to data or annotations. Doing + * so will result in a LoggableException. + * + * @param isDefault should be set to true if the started field explicitly + * is the default field. + */ + void fieldStart(bool isDefault); + + /** + * Function that should be called whenever a field ends. Calling this + * function if there is no field to end will result in a LoggableException. + */ + void fieldEnd(); + + /** + * Function that should be called whenever an annotation starts. + * + * @param name is the name of the annotation class. + * @param args is a map variant containing the arguments that were passed + * to the annotation. + */ + void annotationStart(const Variant &className, const Variant &args); + + /** + * Function that should be called whenever an annotation ends. + * + * @param name is the name of the annotation class that was ended. + * @param annotationName is the name of the annotation that was ended. + */ + void annotationEnd(const Variant &className, const Variant &elementName); + + /** + * Function that should be called whenever a previously registered token + * is found in the input stream. + * + * @param token is string variant containing the token that was encountered. + */ + void token(Variant token); +}; +} +} + +#endif /* _OUSIA_STACK_HPP_ */ + diff --git a/src/core/parser/ParserState.cpp b/src/core/parser/stack/State.cpp index f635d86..d72f533 100644 --- a/src/core/parser/ParserState.cpp +++ b/src/core/parser/stack/State.cpp @@ -16,88 +16,97 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "ParserState.hpp" +#include "State.hpp" namespace ousia { +namespace parser_stack { -/* Class ParserState */ +/* Class State */ -ParserState::ParserState() : elementHandler(nullptr) {} +State::State() : elementHandler(nullptr) {} -ParserState::ParserState(ParserStateSet parents, Arguments arguments, +State::State(StateSet parents, Arguments arguments, RttiSet createdNodeTypes, - HandlerConstructor elementHandler) + HandlerConstructor elementHandler, + bool supportsAnnotations) : parents(parents), arguments(arguments), createdNodeTypes(createdNodeTypes), - elementHandler(elementHandler) + elementHandler(elementHandler), + supportsAnnotations(supportsAnnotations) { } -ParserState::ParserState(const ParserStateBuilder &builder) - : ParserState(builder.build()) +State::State(const StateBuilder &builder) + : State(builder.build()) { } -/* Class ParserStateBuilder */ +/* Class StateBuilder */ -ParserStateBuilder &ParserStateBuilder::copy(const ParserState &state) +StateBuilder &StateBuilder::copy(const State &state) { this->state = state; return *this; } -ParserStateBuilder &ParserStateBuilder::parent(const ParserState *parent) +StateBuilder &StateBuilder::parent(const State *parent) { - state.parents = ParserStateSet{parent}; + state.parents = StateSet{parent}; return *this; } -ParserStateBuilder &ParserStateBuilder::parents(const ParserStateSet &parents) +StateBuilder &StateBuilder::parents(const StateSet &parents) { state.parents = parents; return *this; } -ParserStateBuilder &ParserStateBuilder::arguments(const Arguments &arguments) +StateBuilder &StateBuilder::arguments(const Arguments &arguments) { state.arguments = arguments; return *this; } -ParserStateBuilder &ParserStateBuilder::createdNodeType(const Rtti *type) +StateBuilder &StateBuilder::createdNodeType(const Rtti *type) { state.createdNodeTypes = RttiSet{type}; return *this; } -ParserStateBuilder &ParserStateBuilder::createdNodeTypes(const RttiSet &types) +StateBuilder &StateBuilder::createdNodeTypes(const RttiSet &types) { state.createdNodeTypes = types; return *this; } -ParserStateBuilder &ParserStateBuilder::elementHandler( +StateBuilder &StateBuilder::elementHandler( HandlerConstructor elementHandler) { state.elementHandler = elementHandler; return *this; } -const ParserState &ParserStateBuilder::build() const { return state; } +StateBuilder &StateBuilder::supportsAnnotations(bool supportsAnnotations) +{ + state.supportsAnnotations = supportsAnnotations; + return *this; +} -/* Class ParserStateDeductor */ +const State &StateBuilder::build() const { return state; } -ParserStateDeductor::ParserStateDeductor( +/* Class StateDeductor */ + +StateDeductor::StateDeductor( std::vector<const Rtti *> signature, - std::vector<const ParserState *> states) + std::vector<const State *> states) : tbl(signature.size()), signature(std::move(signature)), states(std::move(states)) { } -bool ParserStateDeductor::isActive(size_t d, const ParserState *s) +bool StateDeductor::isActive(size_t d, const State *s) { // Lookup the "active" state of (d, s), if it was not already set // (e.second is true) we'll have to calculate it @@ -123,7 +132,7 @@ bool ParserStateDeductor::isActive(size_t d, const ParserState *s) // Check whether any of the parent nodes were active -- either for // the previous element (if this one is generative) or for the // current element (assuming this node was not generative) - for (const ParserState *parent : s->parents) { + for (const State *parent : s->parents) { if ((isGenerative && isActive(d - 1, parent)) || isActive(d, parent)) { res = true; @@ -136,9 +145,9 @@ bool ParserStateDeductor::isActive(size_t d, const ParserState *s) return res; } -std::vector<const ParserState *> ParserStateDeductor::deduce() +std::vector<const State *> StateDeductor::deduce() { - std::vector<const ParserState *> res; + std::vector<const State *> res; if (!signature.empty()) { const size_t D = signature.size(); for (auto s : states) { @@ -153,9 +162,10 @@ std::vector<const ParserState *> ParserStateDeductor::deduce() /* Constant initializations */ -namespace ParserStates { -const ParserState All; -const ParserState None; +namespace States { +const State All; +const State None; +} } } diff --git a/src/core/parser/ParserState.hpp b/src/core/parser/stack/State.hpp index 6487fdd..4766235 100644 --- a/src/core/parser/ParserState.hpp +++ b/src/core/parser/stack/State.hpp @@ -17,10 +17,10 @@ */ /** - * @file ParserState.hpp + * @file State.hpp * - * Defines the ParserState class used within the ParserStack pushdown - * automaton and the ParserStateBuilder class for convenient construction of + * Defines the State class used within the ParserStack pushdown + * automaton and the StateBuilder class for convenient construction of * such classes. * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) @@ -33,12 +33,14 @@ #include <core/common/Rtti.hpp> #include <core/common/Argument.hpp> +#include <core/common/Whitespace.hpp> namespace ousia { +namespace parser_stack { // Forward declarations -class ParserStateBuilder; -class ParserState; +class StateBuilder; +class State; class HandlerData; class Handler; using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); @@ -47,17 +49,17 @@ using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); * Set of pointers of parser states -- used for specifying a set of parent * states. */ -using ParserStateSet = std::unordered_set<const ParserState *>; +using StateSet = std::unordered_set<const State *>; /** - * Class used for the complete specification of a ParserState. Stores possible + * Class used for the complete specification of a State. Stores possible * parent states, state handlers and arguments to be passed to that state. */ -struct ParserState { +struct State { /** * Vector containing all possible parent states. */ - ParserStateSet parents; + StateSet parents; /** * Descriptor of the arguments that should be passed to the handler. @@ -66,8 +68,8 @@ struct ParserState { /** * Set containing the types of the nodes that may be created in this - * ParserState. This information is needed for Parsers to reconstruct the - * current ParserState from a given ParserScope when a file is included. + * State. This information is needed for Parsers to reconstruct the + * current State from a given ParserScope when a file is included. */ RttiSet createdNodeTypes; @@ -79,109 +81,119 @@ struct ParserState { HandlerConstructor elementHandler; /** + * Set to true if this handler does support annotations. This is almost + * always false (e.g. all description handlers), except for document + * element handlers. + */ + bool supportsAnnotations; + + /** * Default constructor, initializes the handlers with nullptr. */ - ParserState(); + State(); /** - * Constructor taking values for all fields. Use the ParserStateBuilder - * class for a more convenient construction of ParserState instances. + * Constructor taking values for all fields. Use the StateBuilder + * class for a more convenient construction of State instances. * * @param parents is a vector containing all possible parent states. * @param arguments is a descriptor of arguments that should be passed to * the handler. * @param createdNodeTypes is a set containing the types of the nodes tha - * may be created in this ParserState. This information is needed for - * Parsers to reconstruct the current ParserState from a given ParserScope + * may be created in this State. This information is needed for + * Parsers to reconstruct the current State from a given ParserScope * when a file is included. * @param elementHandler is a pointer at a function which creates a new * concrete Handler instance for the elements described by this state. May * be nullptr in which case no handler instance is created. + * @param supportsAnnotations specifies whether annotations are supported + * here at all. */ - ParserState(ParserStateSet parents, Arguments arguments = Arguments{}, + State(StateSet parents, Arguments arguments = Arguments{}, RttiSet createdNodeTypes = RttiSet{}, - HandlerConstructor elementHandler = nullptr); + HandlerConstructor elementHandler = nullptr, + bool supportsAnnotations = false); /** - * Creates this ParserState from the given ParserStateBuilder instance. + * Creates this State from the given StateBuilder instance. */ - ParserState(const ParserStateBuilder &builder); + State(const StateBuilder &builder); }; /** - * The ParserStateBuilder class is a class used for conveniently building new - * ParserState instances. + * The StateBuilder class is a class used for conveniently building new + * State instances. */ -class ParserStateBuilder { +class StateBuilder { private: /** - * ParserState instance that is currently being built by the - * ParserStateBuilder. + * State instance that is currently being built by the + * StateBuilder. */ - ParserState state; + State state; public: /** - * Copies the ParserState instance and uses it as internal state. Overrides - * all changes made by the ParserStateBuilder. + * Copies the State instance and uses it as internal state. Overrides + * all changes made by the StateBuilder. * * @param state is the state that should be copied. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder ©(const ParserState &state); + StateBuilder ©(const State &state); /** * Sets the possible parent states to the single given parent element. * - * @param parent is a pointer at the parent ParserState instance that should + * @param parent is a pointer at the parent State instance that should * be the possible parent state. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &parent(const ParserState *parent); + StateBuilder &parent(const State *parent); /** - * Sets the ParserState instances in the given ParserStateSet as the list of + * Sets the State instances in the given StateSet as the list of * supported parent states. * - * @param parents is a set of pointers at ParserState instances that should + * @param parents is a set of pointers at State instances that should * be the possible parent states. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &parents(const ParserStateSet &parents); + StateBuilder &parents(const StateSet &parents); /** * Sets the arguments that should be passed to the parser state handler to * those given as argument. * * @param arguments is the Arguments instance describing the Arguments that - * should be parsed to a Handler for this ParserState. - * @return a reference at this ParserStateBuilder instance for method + * should be parsed to a Handler for this State. + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &arguments(const Arguments &arguments); + StateBuilder &arguments(const Arguments &arguments); /** * Sets the Node types this state may produce to the given Rtti descriptor. * * @param type is the Rtti descriptor of the Type that may be produced by * this state. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &createdNodeType(const Rtti *type); + StateBuilder &createdNodeType(const Rtti *type); /** * Sets the Node types this state may produce to the given Rtti descriptors. * * @param types is a set of Rtti descriptors of the Types that may be * produced by this state. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &createdNodeTypes(const RttiSet &types); + StateBuilder &createdNodeTypes(const RttiSet &types); /** * Sets the constructor for the element handler. The constructor creates a @@ -191,31 +203,42 @@ public: * * @param elementHandler is the HandlerConstructor that should create a * new Handler instance. - * @return a reference at this ParserStateBuilder instance for method + * @return a reference at this StateBuilder instance for method * chaining. */ - ParserStateBuilder &elementHandler(HandlerConstructor elementHandler); + StateBuilder &elementHandler(HandlerConstructor elementHandler); /** - * Returns a reference at the internal ParserState instance that was built - * using the ParserStateBuilder. + * Sets the state of the "supportsAnnotations" flags (default value is + * false) * - * @return the built ParserState. + * @param supportsAnnotations should be set to true, if annotations are + * supported for the handlers associated with this document. + * @return a reference at this StateBuilder instance for method + * chaining. */ - const ParserState &build() const; + StateBuilder &supportsAnnotations(bool supportsAnnotations); + + /** + * Returns a reference at the internal State instance that was built + * using the StateBuilder. + * + * @return the built State. + */ + const State &build() const; }; /** - * Class used to deduce the ParserState a Parser is currently in based on the + * Class used to deduce the State a Parser is currently in based on the * types of the Nodes that currently are on the ParserStack. Uses dynamic * programming in order to solve this problem. */ -class ParserStateDeductor { +class StateDeductor { public: /** * Type containing the dynamic programming table. */ - using Table = std::vector<std::unordered_map<const ParserState *, bool>>; + using Table = std::vector<std::unordered_map<const State *, bool>>; private: /** @@ -231,7 +254,7 @@ private: /** * List of states that should be checked for being active. */ - const std::vector<const ParserState *> states; + const std::vector<const State *> states; /** * Used internally to check whether the given parser stack s may have been @@ -239,20 +262,20 @@ private: * * @param d is the signature element. * @param s is the parser state. - * @return true if the the given ParserState may have been active. + * @return true if the the given State may have been active. */ - bool isActive(size_t d, const ParserState *s); + bool isActive(size_t d, const State *s); public: /** - * Constructor of the ParserStateDeductor class. + * Constructor of the StateDeductor class. * * @param signature a Node type signature describing the types of the nodes * which currently reside on e.g. the ParserScope stack. * @param states is a list of states that should be checked. */ - ParserStateDeductor(std::vector<const Rtti *> signature, - std::vector<const ParserState *> states); + StateDeductor(std::vector<const Rtti *> signature, + std::vector<const State *> states); /** * Selects all active states from the given states. Only considers those @@ -260,23 +283,24 @@ public: * * @return a list of states that may actually have been active. */ - std::vector<const ParserState *> deduce(); + std::vector<const State *> deduce(); }; /** - * The ParserStates namespace contains all the global state constants used + * The States namespace contains all the global state constants used * in the ParserStack class. */ -namespace ParserStates { +namespace States { /** * State representing all states. */ -extern const ParserState All; +extern const State All; /** * State representing the initial state. */ -extern const ParserState None; +extern const State None; +} } } diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp index 2cc7dfb..8fd9525 100644 --- a/src/core/parser/stack/TypesystemHandler.cpp +++ b/src/core/parser/stack/TypesystemHandler.cpp @@ -16,32 +16,46 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "TypesystemHandler.hpp" - #include <core/model/Typesystem.hpp> +#include <core/model/Domain.hpp> #include <core/parser/ParserScope.hpp> +#include <core/parser/ParserContext.hpp> + +#include "DomainHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" namespace ousia { +namespace parser_stack { /* TypesystemHandler */ -void TypesystemHandler::start(Variant::mapType &args) +bool TypesystemHandler::start(Variant::mapType &args) { // Create the typesystem instance Rooted<Typesystem> typesystem = - project()->createTypesystem(args["name"].asString()); + context().getProject()->createTypesystem(args["name"].asString()); typesystem->setLocation(location()); + // If the typesystem is defined inside a domain, add a reference to the + // typesystem to the domain + Rooted<Domain> domain = scope().select<Domain>(); + if (domain != nullptr) { + domain->reference(typesystem); + } + // Push the typesystem onto the scope, set the POST_HEAD flag to true scope().push(typesystem); scope().setFlag(ParserFlag::POST_HEAD, false); + + return true; } void TypesystemHandler::end() { scope().pop(); } /* TypesystemEnumHandler */ -void TypesystemEnumHandler::start(Variant::mapType &args) +bool TypesystemEnumHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -52,33 +66,24 @@ void TypesystemEnumHandler::start(Variant::mapType &args) enumType->setLocation(location()); scope().push(enumType); + + return true; } void TypesystemEnumHandler::end() { scope().pop(); } /* TypesystemEnumEntryHandler */ -void TypesystemEnumEntryHandler::start(Variant::mapType &args) {} - -void TypesystemEnumEntryHandler::end() +void TypesystemEnumEntryHandler::doHandle(const Variant &fieldData, + Variant::mapType &args) { Rooted<EnumType> enumType = scope().selectOrThrow<EnumType>(); - enumType->addEntry(entry, logger()); -} - -void TypesystemEnumEntryHandler::data(const std::string &data, int field) -{ - if (field != 0) { - // TODO: This should be stored in the HandlerData - logger().error("Enum entry only has one field."); - return; - } - entry.append(data); + enumType->addEntry(fieldData.asString(), logger()); } /* TypesystemStructHandler */ -void TypesystemStructHandler::start(Variant::mapType &args) +bool TypesystemStructHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -103,13 +108,15 @@ void TypesystemStructHandler::start(Variant::mapType &args) }); } scope().push(structType); + + return true; } void TypesystemStructHandler::end() { scope().pop(); } /* TypesystemStructFieldHandler */ -void TypesystemStructFieldHandler::start(Variant::mapType &args) +bool TypesystemStructFieldHandler::start(Variant::mapType &args) { // Read the argument values const std::string &name = args["name"].asString(); @@ -142,13 +149,13 @@ void TypesystemStructFieldHandler::start(Variant::mapType &args) } }); } -} -void TypesystemStructFieldHandler::end() {} + return true; +} /* TypesystemConstantHandler */ -void TypesystemConstantHandler::start(Variant::mapType &args) +bool TypesystemConstantHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -169,7 +176,51 @@ void TypesystemConstantHandler::start(Variant::mapType &args) constant.cast<Constant>()->setType(type.cast<Type>(), logger); } }); + + return true; } -void TypesystemConstantHandler::end() {} +namespace States { +const State Typesystem = StateBuilder() + .parents({&None, &Domain}) + .createdNodeType(&RttiTypes::Typesystem) + .elementHandler(TypesystemHandler::create) + .arguments({Argument::String("name", "")}); + +const State TypesystemEnum = StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::EnumType) + .elementHandler(TypesystemEnumHandler::create) + .arguments({Argument::String("name")}); + +const State TypesystemEnumEntry = + StateBuilder() + .parent(&TypesystemEnum) + .elementHandler(TypesystemEnumEntryHandler::create) + .arguments({}); + +const State TypesystemStruct = + StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::StructType) + .elementHandler(TypesystemStructHandler::create) + .arguments({Argument::String("name"), Argument::String("parent", "")}); + +const State TypesystemStructField = + StateBuilder() + .parent(&TypesystemStruct) + .elementHandler(TypesystemStructFieldHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("default", Variant::fromObject(nullptr))}); + +const State TypesystemConstant = + StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::Constant) + .elementHandler(TypesystemConstantHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("value")}); +} } +} + diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp index 76a7bc9..85494f1 100644 --- a/src/core/parser/stack/TypesystemHandler.hpp +++ b/src/core/parser/stack/TypesystemHandler.hpp @@ -19,6 +19,9 @@ /** * @file TypesystemHandler.hpp * + * Contains the Handler classes used to parse Typesystem descriptions. The + * Handlers parse all the tags found below and including the "typesystem" tag. + * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ @@ -26,96 +29,180 @@ #define _OUSIA_TYPESYSTEM_HANDLER_HPP_ #include <core/common/Variant.hpp> -#include <core/parser/ParserStack.hpp> + +#include "Handler.hpp" namespace ousia { +namespace parser_stack { -class TypesystemHandler : public Handler { +/** + * Handles the occurance of the "typesystem" tag. Creates a new Typesystem + * instance and places it on the ParserScope. + */ +class TypesystemHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the TypesystemHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemHandler{handlerData}; } }; -class TypesystemEnumHandler : public Handler { +/** + * Handles the occurance of the "enum" tag. Creates a new EnumType instance and + * places it on the ParserScope. + */ +class TypesystemEnumHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the TypesystemEnumHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemEnumHandler{handlerData}; } }; -class TypesystemEnumEntryHandler : public Handler { +/** + * Handles the occurance of the "entry" tag within an "enum" tag. Creates a new + * EnumType instance and places it on the ParserScope. + */ +class TypesystemEnumEntryHandler : public StaticFieldHandler { public: - using Handler::Handler; + using StaticFieldHandler::StaticFieldHandler; - std::string entry; - - void start(Variant::mapType &args) override; - - void end() override; - - void data(const std::string &data, int field) override; + void doHandle(const Variant &fieldData, Variant::mapType &args) override; + /** + * Creates a new instance of the TypesystemEnumEntryHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { - return new TypesystemEnumEntryHandler{handlerData}; + return new TypesystemEnumEntryHandler{handlerData, "name"}; } }; -class TypesystemStructHandler : public Handler { +/** + * Handles the occurance of the "struct" tag within a typesystem description. + * Creates a new StructType instance and places it on the ParserScope. + */ +class TypesystemStructHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the TypesystemStructHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemStructHandler{handlerData}; } }; -class TypesystemStructFieldHandler : public Handler { +/** + * Handles the occurance of the "field" tag within a typesystem structure + * description. Places a new Attribute instance in the StructType instance + * that is currently at the top of the scope. + */ +class TypesystemStructFieldHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; - void end() override; + bool start(Variant::mapType &args) override; + /** + * Creates a new instance of the TypesystemStructFieldHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemStructFieldHandler{handlerData}; } }; -class TypesystemConstantHandler : public Handler { +/** + * Handles the occurance of the "constant" tag within a typesystem structure + * description. Places a new Constant instance in the current typesystem. + */ +class TypesystemConstantHandler : public StaticHandler { public: - using Handler::Handler; + using StaticHandler::StaticHandler; - void start(Variant::mapType &args) override; - - void end() override; + bool start(Variant::mapType &args) override; + /** + * Creates a new instance of the TypesystemConstantHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemConstantHandler{handlerData}; } }; + +namespace States { +/** + * State representing the "typesystem" tag. + */ +extern const State Typesystem; +/** + * State representing the "enum" tag within a typesystem. + */ +extern const State TypesystemEnum; +/** + * State representing the "entry" tag within an enum. + */ +extern const State TypesystemEnumEntry; +/** + * State representing the "struct" tag within a typesystem. + */ +extern const State TypesystemStruct; +/** + * State representing the "field" tag within a typesystem structure. + */ +extern const State TypesystemStructField; +/** + * State representing the "constant" tag within a typesystem. + */ +extern const State TypesystemConstant; +} +} } #endif diff --git a/src/formats/osdm/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp index 4a0430b..4a0430b 100644 --- a/src/formats/osdm/TokenTrie.cpp +++ b/src/core/parser/utils/TokenTrie.cpp diff --git a/src/formats/osdm/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp index 36c2ffa..36c2ffa 100644 --- a/src/formats/osdm/TokenTrie.hpp +++ b/src/core/parser/utils/TokenTrie.hpp diff --git a/src/formats/osdm/DynamicTokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp index f2cfcd1..3c8177d 100644 --- a/src/formats/osdm/DynamicTokenizer.cpp +++ b/src/core/parser/utils/Tokenizer.cpp @@ -22,8 +22,9 @@ #include <core/common/CharReader.hpp> #include <core/common/Exceptions.hpp> #include <core/common/Utils.hpp> +#include <core/common/WhitespaceHandler.hpp> -#include "DynamicTokenizer.hpp" +#include "Tokenizer.hpp" namespace ousia { @@ -38,7 +39,7 @@ struct TokenMatch { /** * Token that was matched. */ - DynamicToken token; + Token token; /** * Current length of the data within the text handler. The text buffer needs @@ -102,8 +103,8 @@ public: * @param textLength is the text buffer length of the previous text token. * @param textEnd is the current end location of the previous text token. */ - TokenLookup(const TokenTrie::Node *node, size_t start, - size_t textLength, size_t textEnd) + TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength, + size_t textEnd) : node(node), start(start), textLength(textLength), textEnd(textEnd) { } @@ -116,10 +117,10 @@ public: * @param c is the character that should be appended to the current prefix. * @param lookups is a list to which new TokeLookup instances are added -- * which could potentially be expanded in the next iteration. - * @param match is the DynamicToken instance to which the matching token + * @param match is the Token instance to which the matching token * should be written. * @param tokens is a reference at the internal token list of the - * DynamicTokenizer. + * Tokenizer. * @param end is the end byte offset of the current character. * @param sourceId is the source if of this file. */ @@ -142,7 +143,7 @@ public: size_t len = str.size(); if (len > match.token.content.size()) { match.token = - DynamicToken{node->type, str, {sourceId, start, end}}; + Token{node->type, str, {sourceId, start, end}}; match.textLength = textLength; match.textEnd = textEnd; } @@ -155,203 +156,40 @@ public: } }; -/* Internal class TextHandlerBase */ - -/** - * Base class used for those classes that may be used as TextHandler in the - * DynamicTokenizer::next function. - */ -class TextHandlerBase { -public: - /** - * Start position of the extracted text. - */ - size_t textStart; - - /** - * End position of the extracted text. - */ - size_t textEnd; - - /** - * Buffer containing the extracted text. - */ - std::vector<char> textBuf; - - /** - * Constructor of the TextHandlerBase base class. Initializes the start and - * end position with zeros. - */ - TextHandlerBase() : textStart(0), textEnd(0) {} - - /** - * Transforms the given token into a text token containing the extracted - * text. - * - * @param token is the output token to which the text should be written. - * @param sourceId is the source id of the underlying file. - */ - void buildTextToken(TokenMatch &match, SourceId sourceId) - { - if (match.hasMatch()) { - match.token.content = - std::string{textBuf.data(), match.textLength}; - match.token.location = - SourceLocation{sourceId, textStart, match.textEnd}; - } else { - match.token.content = std::string{textBuf.data(), textBuf.size()}; - match.token.location = SourceLocation{sourceId, textStart, textEnd}; - } - match.token.type = TextToken; - } - - /** - * Returns true if this whitespace handler has found any text and a text - * token could be emitted. - * - * @return true if the internal data buffer is non-empty. - */ - bool hasText() { return !textBuf.empty(); } -}; - -/* Internal class PreservingTextHandler */ - -/** - * The PreservingTextHandler class preserves all characters unmodified, - * including whitepace characters. - */ -class PreservingTextHandler : public TextHandlerBase { -public: - using TextHandlerBase::TextHandlerBase; - - /** - * Appends the given character to the internal text buffer, does not - * eliminate whitespace. - * - * @param c is the character that should be appended to the internal buffer. - * @param start is the start byte offset of the given character. - * @param end is the end byte offset of the given character. - */ - void append(char c, size_t start, size_t end) - { - if (textBuf.empty()) { - textStart = start; - } - textEnd = end; - textBuf.push_back(c); - } -}; - -/* Internal class TrimmingTextHandler */ - /** - * The TrimmingTextHandler class trims all whitespace characters at the begin - * and the end of a text section but leaves all other characters unmodified, - * including whitepace characters. + * Transforms the given token into a text token containing the extracted + * text. + * + * @param handler is the WhitespaceHandler containing the collected data. + * @param token is the output token to which the text should be written. + * @param sourceId is the source id of the underlying file. */ -class TrimmingTextHandler : public TextHandlerBase { -public: - using TextHandlerBase::TextHandlerBase; - - /** - * Buffer used internally to temporarily store all whitespace characters. - * They are only added to the output buffer if another non-whitespace - * character is reached. - */ - std::vector<char> whitespaceBuf; - - /** - * Appends the given character to the internal text buffer, eliminates - * whitespace characters at the begin and end of the text. - * - * @param c is the character that should be appended to the internal buffer. - * @param start is the start byte offset of the given character. - * @param end is the end byte offset of the given character. - */ - void append(char c, size_t start, size_t end) - { - // Handle whitespace characters - if (Utils::isWhitespace(c)) { - if (!textBuf.empty()) { - whitespaceBuf.push_back(c); - } - return; - } - - // Set the start and end offset correctly - if (textBuf.empty()) { - textStart = start; - } - textEnd = end; - - // Store the character - if (!whitespaceBuf.empty()) { - textBuf.insert(textBuf.end(), whitespaceBuf.begin(), - whitespaceBuf.end()); - whitespaceBuf.clear(); - } - textBuf.push_back(c); - } -}; - -/* Internal class CollapsingTextHandler */ - -/** - * The CollapsingTextHandler trims characters at the beginning and end of the - * text and reduced multiple whitespace characters to a single blank. - */ -class CollapsingTextHandler : public TextHandlerBase { -public: - using TextHandlerBase::TextHandlerBase; - - /** - * Flag set to true if a whitespace character was reached. - */ - bool hasWhitespace = false; - - /** - * Appends the given character to the internal text buffer, eliminates - * redundant whitespace characters. - * - * @param c is the character that should be appended to the internal buffer. - * @param start is the start byte offset of the given character. - * @param end is the end byte offset of the given character. - */ - void append(char c, size_t start, size_t end) - { - // Handle whitespace characters - if (Utils::isWhitespace(c)) { - if (!textBuf.empty()) { - hasWhitespace = true; - } - return; - } - - // Set the start and end offset correctly - if (textBuf.empty()) { - textStart = start; - } - textEnd = end; - - // Store the character - if (hasWhitespace) { - textBuf.push_back(' '); - hasWhitespace = false; - } - textBuf.push_back(c); +static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match, + SourceId sourceId) +{ + if (match.hasMatch()) { + match.token.content = + std::string{handler.textBuf.data(), match.textLength}; + match.token.location = + SourceLocation{sourceId, handler.textStart, match.textEnd}; + } else { + match.token.content = handler.toString(); + match.token.location = + SourceLocation{sourceId, handler.textStart, handler.textEnd}; } -}; + match.token.type = TextToken; +} } -/* Class DynamicTokenizer */ +/* Class Tokenizer */ -DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode) +Tokenizer::Tokenizer(WhitespaceMode whitespaceMode) : whitespaceMode(whitespaceMode), nextTokenTypeId(0) { } template <typename TextHandler, bool read> -bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token) +bool Tokenizer::next(CharReader &reader, Token &token) { // If we're in the read mode, reset the char reader peek position to the // current read position @@ -409,9 +247,8 @@ bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token) } // If we found text, emit that text - if (textHandler.hasText() && - (!match.hasMatch() || match.textLength > 0)) { - textHandler.buildTextToken(match, sourceId); + if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) { + buildTextToken(textHandler, match, sourceId); } // Move the read/peek cursor to the end of the token, abort if an error @@ -431,38 +268,38 @@ bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token) } token = match.token; } else { - token = DynamicToken{}; + token = Token{}; } return match.hasMatch(); } -bool DynamicTokenizer::read(CharReader &reader,DynamicToken &token) +bool Tokenizer::read(CharReader &reader, Token &token) { switch (whitespaceMode) { case WhitespaceMode::PRESERVE: - return next<PreservingTextHandler, true>(reader, token); + return next<PreservingWhitespaceHandler, true>(reader, token); case WhitespaceMode::TRIM: - return next<TrimmingTextHandler, true>(reader, token); + return next<TrimmingWhitespaceHandler, true>(reader, token); case WhitespaceMode::COLLAPSE: - return next<CollapsingTextHandler, true>(reader, token); + return next<CollapsingWhitespaceHandler, true>(reader, token); } return false; } -bool DynamicTokenizer::peek(CharReader &reader,DynamicToken &token) +bool Tokenizer::peek(CharReader &reader, Token &token) { switch (whitespaceMode) { case WhitespaceMode::PRESERVE: - return next<PreservingTextHandler, false>(reader, token); + return next<PreservingWhitespaceHandler, false>(reader, token); case WhitespaceMode::TRIM: - return next<TrimmingTextHandler, false>(reader, token); + return next<TrimmingWhitespaceHandler, false>(reader, token); case WhitespaceMode::COLLAPSE: - return next<CollapsingTextHandler, false>(reader, token); + return next<CollapsingWhitespaceHandler, false>(reader, token); } return false; } -TokenTypeId DynamicTokenizer::registerToken(const std::string &token) +TokenTypeId Tokenizer::registerToken(const std::string &token) { // Abort if an empty token should be registered if (token.empty()) { @@ -493,14 +330,14 @@ TokenTypeId DynamicTokenizer::registerToken(const std::string &token) // Try to register the token in the trie -- if this fails, remove it // from the tokens list if (!trie.registerToken(token, type)) { - tokens[type] = std::string(); + tokens[type] = std::string{}; nextTokenTypeId = type; return EmptyToken; } return type; } -bool DynamicTokenizer::unregisterToken(TokenTypeId type) +bool Tokenizer::unregisterToken(TokenTypeId type) { // Unregister the token from the trie, abort if an invalid type is given if (type < tokens.size() && trie.unregisterToken(tokens[type])) { @@ -511,7 +348,7 @@ bool DynamicTokenizer::unregisterToken(TokenTypeId type) return false; } -std::string DynamicTokenizer::getTokenString(TokenTypeId type) +std::string Tokenizer::getTokenString(TokenTypeId type) { if (type < tokens.size()) { return tokens[type]; @@ -519,26 +356,26 @@ std::string DynamicTokenizer::getTokenString(TokenTypeId type) return std::string{}; } -void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode) +void Tokenizer::setWhitespaceMode(WhitespaceMode mode) { whitespaceMode = mode; } -WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; } +WhitespaceMode Tokenizer::getWhitespaceMode() { return whitespaceMode; } /* Explicitly instantiate all possible instantiations of the "next" member function */ -template bool DynamicTokenizer::next<PreservingTextHandler, false>( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next<TrimmingTextHandler, false>( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next<CollapsingTextHandler, false>( - CharReader &reader,DynamicToken &token); -template bool DynamicTokenizer::next<PreservingTextHandler, true>( - CharReader &reader,DynamicToken &token); -template bool DynamicTokenizer::next<TrimmingTextHandler, true>( - CharReader &reader,DynamicToken &token); -template bool DynamicTokenizer::next<CollapsingTextHandler, true>( - CharReader &reader,DynamicToken &token); +template bool Tokenizer::next<PreservingWhitespaceHandler, false>( + CharReader &reader, Token &token); +template bool Tokenizer::next<TrimmingWhitespaceHandler, false>( + CharReader &reader, Token &token); +template bool Tokenizer::next<CollapsingWhitespaceHandler, false>( + CharReader &reader, Token &token); +template bool Tokenizer::next<PreservingWhitespaceHandler, true>( + CharReader &reader, Token &token); +template bool Tokenizer::next<TrimmingWhitespaceHandler, true>( + CharReader &reader, Token &token); +template bool Tokenizer::next<CollapsingWhitespaceHandler, true>( + CharReader &reader, Token &token); } diff --git a/src/formats/osdm/DynamicTokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp index 0cac2e8..6b4e116 100644 --- a/src/formats/osdm/DynamicTokenizer.hpp +++ b/src/core/parser/utils/Tokenizer.hpp @@ -17,7 +17,7 @@ */ /** - * @file DynamicTokenizer.hpp + * @file Tokenizer.hpp * * Tokenizer that can be reconfigured at runtime used for parsing the plain * text format. @@ -33,6 +33,7 @@ #include <vector> #include <core/common/Location.hpp> +#include <core/common/Whitespace.hpp> #include "TokenTrie.hpp" @@ -42,9 +43,9 @@ namespace ousia { class CharReader; /** - * The DynamicToken structure describes a token discovered by the Tokenizer. + * The Token structure describes a token discovered by the Tokenizer. */ -struct DynamicToken { +struct Token { /** * Id of the type of this token. */ @@ -63,28 +64,28 @@ struct DynamicToken { /** * Default constructor. */ - DynamicToken() : type(EmptyToken) {} + Token() : type(EmptyToken) {} /** - * Constructor of the DynamicToken struct. + * Constructor of the Token struct. * * @param id represents the token type. * @param content is the string content that has been extracted. * @param location is the location of the extracted string content in the * source file. */ - DynamicToken(TokenTypeId type, const std::string &content, + Token(TokenTypeId type, const std::string &content, SourceLocation location) : type(type), content(content), location(location) { } /** - * Constructor of the DynamicToken struct, only initializes the token type + * Constructor of the Token struct, only initializes the token type * * @param type is the id corresponding to the type of the token. */ - DynamicToken(TokenTypeId type) : type(type) {} + Token(TokenTypeId type) : type(type) {} /** * The getLocation function allows the tokens to be directly passed as @@ -96,35 +97,13 @@ struct DynamicToken { }; /** - * Enum specifying the whitespace handling of the DynamicTokenizer class when - * reading non-token text. - */ -enum class WhitespaceMode { - /** - * Preserves all whitespaces as they are found in the source file. - */ - PRESERVE, - - /** - * Trims whitespace at the beginning and the end of the found text. - */ - TRIM, - - /** - * Whitespaces are trimmed and collapsed, multiple whitespace characters - * are replaced by a single space character. - */ - COLLAPSE -}; - -/** - * The DynamicTokenizer is used to extract tokens and chunks of text from a + * The Tokenizer is used to extract tokens and chunks of text from a * CharReader. It allows to register and unregister tokens while parsing and * to modify the handling of whitespace characters. Note that the - * DynamicTokenizer always tries to extract the longest possible token from the + * Tokenizer always tries to extract the longest possible token from the * tokenizer. */ -class DynamicTokenizer { +class Tokenizer { private: /** * Internally used token trie. This object holds all registered tokens. @@ -161,15 +140,15 @@ private: * @return false if the end of the stream has been reached, true otherwise. */ template <typename TextHandler, bool read> - bool next(CharReader &reader, DynamicToken &token); + bool next(CharReader &reader, Token &token); public: /** - * Constructor of the DynamicTokenizer class. + * Constructor of the Tokenizer class. * * @param whitespaceMode specifies how whitespace should be handled. */ - DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); + Tokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * Registers the given string as a token. Returns a const pointer at a @@ -222,7 +201,7 @@ public: /** * Reads a new token from the CharReader and stores it in the given - * DynamicToken instance. + * Token instance. * * @param reader is the CharReader instance from which the data should be * read. @@ -231,7 +210,7 @@ public: * @return true if a token could be read, false if the end of the stream * has been reached. */ - bool read(CharReader &reader, DynamicToken &token); + bool read(CharReader &reader, Token &token); /** * The peek method does not advance the read position of the char reader, @@ -244,7 +223,7 @@ public: * @return true if a token could be read, false if the end of the stream * has been reached. */ - bool peek(CharReader &reader, DynamicToken &token); + bool peek(CharReader &reader, Token &token); }; } diff --git a/src/formats/osml/OsmlParser.cpp b/src/formats/osml/OsmlParser.cpp new file mode 100644 index 0000000..4973639 --- /dev/null +++ b/src/formats/osml/OsmlParser.cpp @@ -0,0 +1,57 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/parser/generic/ParserStateCallbacks.hpp> +#include <core/parser/generic/ParserStateStack.hpp> + +#include "OsdmParser.hpp" +#include "OsdmStreamParser.hpp" + +namespace ousia { + +namespace { + +/** + * The OsdmParserImplementation class contains the actual implementation of the + * parsing process and is created in the "doParse" function of the OsdmParser. + + */ +class OsdmParserImplementation : public ParserStateCallbacks { +private: + /** + * OsdmStreamParser instance. + */ + OsdmStreamParser parser; + + /** + * Instance of the ParserStateStack. + */ + ParserStateStack stack; + +public: + OsdmParserImplementation parser(reader, ctx) : parser(reader), stack(ctx, std::multimap) +}; +} + +void OsdmParser::doParse(CharReader &reader, ParserContext &ctx) +{ + OsdmParserImplementation parser(reader, ctx); + parser.parse(); +} + +} diff --git a/src/core/parser/generic/GenericParser.hpp b/src/formats/osml/OsmlParser.hpp index 4f29f94..37505b4 100644 --- a/src/core/parser/generic/GenericParser.hpp +++ b/src/formats/osml/OsmlParser.hpp @@ -17,33 +17,32 @@ */ /** - * @file GenericParser.hpp + * @file OsdmParser.hpp * - * The GenericParser class builds an abstraction layer that separates the - * underlying document format (e.g. osdm or osdmx) from the actual process of - * building the document model. It provides a set of genric functions that - * should be called by the inheriting concrete parser class, e.g. indicating a - * command with parameters, the start/end of a field or the start/end of an - * annotation. The GenericParser maintains an internal stack of - * ParserStateHandlers and relays the commands to the elements of this stack. + * Contains the parser of the osdm format, the standard plain-text format used + * by Ousía for documents. * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ -#ifndef _OUSIA_GENERIC_PARSER_HPP_ -#define _OUSIA_GENERIC_PARSER_HPP_ +#ifndef _OUSIA_OSDM_PARSER_HPP_ +#define _OUSIA_OSDM_PARSER_HPP_ -#include <core/parser/Parseer.hpp> +#include <core/parser/Parser.hpp> namespace ousia { -class GenericParser : public Parser { - - - +/** + * OsdmParser is a small wrapper implementing the Parser interface. The actual + * parsing is performed with the OsdmStreamParser in conjunction with the + * ParserStateStack. + */ +class OsdmParser : public Parser { +protected: + void doParse(CharReader &reader, ParserContext &ctx) override; }; } -#endif _OUSIA_GENERIC_PARSER_HPP_ +#endif /* _OUSIA_OSDM_PARSER_HPP_ */ diff --git a/src/formats/osdm/OsdmStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp index 8cb8caf..0174fa4 100644 --- a/src/formats/osdm/OsdmStreamParser.cpp +++ b/src/formats/osml/OsmlStreamParser.cpp @@ -21,14 +21,14 @@ #include <core/common/Utils.hpp> #include <core/common/VariantReader.hpp> -#include "OsdmStreamParser.hpp" +#include "OsmlStreamParser.hpp" namespace ousia { /** * Plain format default tokenizer. */ -class PlainFormatTokens : public DynamicTokenizer { +class PlainFormatTokens : public Tokenizer { public: /** * Id of the backslash token. @@ -61,6 +61,21 @@ public: TokenTypeId FieldEnd; /** + * Id of the default field start token. + */ + TokenTypeId DefaultFieldStart; + + /** + * Id of the annotation start token. + */ + TokenTypeId AnnotationStart; + + /** + * Id of the annotation end token. + */ + TokenTypeId AnnotationEnd; + + /** * Registers the plain format tokens in the internal tokenizer. */ PlainFormatTokens() @@ -71,6 +86,9 @@ public: BlockCommentEnd = registerToken("}%"); FieldStart = registerToken("{"); FieldEnd = registerToken("}"); + DefaultFieldStart = registerToken("{!"); + AnnotationStart = registerToken("<\\"); + AnnotationEnd = registerToken("\\>"); } }; @@ -160,14 +178,14 @@ public: } }; -OsdmStreamParser::OsdmStreamParser(CharReader &reader, Logger &logger) +OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger) : reader(reader), logger(logger), tokenizer(Tokens) { // Place an intial command representing the complete file on the stack - commands.push(Command{"", Variant::mapType{}, true, true, true}); + commands.push(Command{"", Variant::mapType{}, true, true, true, false}); } -Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep) +Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep) { bool first = true; bool hasCharSiceNSSep = false; @@ -210,7 +228,7 @@ Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep) return res; } -OsdmStreamParser::State OsdmStreamParser::parseBeginCommand() +OsmlStreamParser::State OsmlStreamParser::parseBeginCommand() { // Expect a '{' after the command reader.consumeWhitespace(); @@ -251,7 +269,7 @@ OsdmStreamParser::State OsdmStreamParser::parseBeginCommand() return State::COMMAND; } -static bool checkStillInField(const OsdmStreamParser::Command &cmd, +static bool checkStillInField(const OsmlStreamParser::Command &cmd, const Variant &endName, Logger &logger) { if (cmd.inField && !cmd.inRangeField) { @@ -264,7 +282,7 @@ static bool checkStillInField(const OsdmStreamParser::Command &cmd, return false; } -OsdmStreamParser::State OsdmStreamParser::parseEndCommand() +OsmlStreamParser::State OsmlStreamParser::parseEndCommand() { // Expect a '{' after the command if (!reader.expect('{')) { @@ -327,7 +345,7 @@ OsdmStreamParser::State OsdmStreamParser::parseEndCommand() return cmd.inRangeField ? State::FIELD_END : State::NONE; } -Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName) +Variant OsmlStreamParser::parseCommandArguments(Variant commandArgName) { // Parse the arguments using the universal VariantReader Variant commandArguments; @@ -353,7 +371,7 @@ Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName) return commandArguments; } -void OsdmStreamParser::pushCommand(Variant commandName, +void OsmlStreamParser::pushCommand(Variant commandName, Variant commandArguments, bool hasRange) { // Store the location on the stack @@ -365,10 +383,11 @@ void OsdmStreamParser::pushCommand(Variant commandName, commands.pop(); } commands.push(Command{std::move(commandName), std::move(commandArguments), - hasRange, false, false}); + hasRange, false, false, false}); } -OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start) +OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start, + bool isAnnotation) { // Parse the commandName as a first identifier Variant commandName = parseIdentifier(start, true); @@ -382,6 +401,9 @@ OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start) Utils::split(commandName.asString(), ':'); const bool isBegin = commandNameComponents[0] == "begin"; const bool isEnd = commandNameComponents[0] == "end"; + + // Parse the begin or end command + State res = State::COMMAND; if (isBegin || isEnd) { if (commandNameComponents.size() > 1) { logger.error( @@ -390,35 +412,81 @@ OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start) commandName); } if (isBegin) { - return parseBeginCommand(); + res = parseBeginCommand(); } else if (isEnd) { - return parseEndCommand(); + res = parseEndCommand(); } + } else { + // Check whether the next character is a '#', indicating the start of + // the command name + Variant commandArgName; + start = reader.getOffset(); + if (reader.expect('#')) { + commandArgName = parseIdentifier(start); + if (commandArgName.asString().empty()) { + logger.error("Expected identifier after \"#\"", commandArgName); + } + } + + // Parse the arugments + Variant commandArguments = + parseCommandArguments(std::move(commandArgName)); + + // Push the command onto the command stack + pushCommand(std::move(commandName), std::move(commandArguments), false); } - // Check whether the next character is a '#', indicating the start of the - // command name - Variant commandArgName; - start = reader.getOffset(); - if (reader.expect('#')) { - commandArgName = parseIdentifier(start); - if (commandArgName.asString().empty()) { - logger.error("Expected identifier after \"#\"", commandArgName); + // Check whether a ">" character is the next character that is to be read. + // In that case the current command could be an annotation end command! + char c; + if (reader.fetch(c) && c == '>') { + // Ignore the character after a begin or end command + if (isBegin || isEnd) { + logger.warning( + "Ignoring annotation end character \">\" after special " + "commands \"begin\" or \"end\". Write \"\\>\" to end a " + "\"begin\"/\"end\" enclosed annotation.", + reader); + return res; } - } - // Parse the arugments - Variant commandArguments = parseCommandArguments(std::move(commandArgName)); + // If this should be an annoation, ignore the character + if (isAnnotation) { + logger.warning( + "Ignoring annotation end character \">\" after annotation " + "start command. Write \"\\>\" to end the annotation.", + reader); + } else { + // Make sure no arguments apart from the "name" argument are given + // to an annotation end + Variant::mapType &map = commands.top().arguments.asMap(); + if (!map.empty()) { + if (map.count("name") == 0 || map.size() > 1U) { + logger.error( + "An annotation end command may not have any arguments " + "other than \"name\""); + return res; + } + } - // Push the command onto the command stack - pushCommand(std::move(commandName), std::move(commandArguments), false); + // If we got here, this is a valid ANNOTATION_END command, issue it + reader.peek(c); + reader.consumePeek(); + return State::ANNOTATION_END; + } + } - return State::COMMAND; + // If we're starting an annotation, return the command as annotation start + // instead of command + if (isAnnotation && res == State::COMMAND) { + return State::ANNOTATION_START; + } + return res; } -void OsdmStreamParser::parseBlockComment() +void OsmlStreamParser::parseBlockComment() { - DynamicToken token; + Token token; size_t depth = 1; while (tokenizer.read(reader, token)) { if (token.type == Tokens.BlockCommentEnd) { @@ -436,7 +504,7 @@ void OsdmStreamParser::parseBlockComment() logger.error("File ended while being in a block comment", reader); } -void OsdmStreamParser::parseLineComment() +void OsmlStreamParser::parseLineComment() { char c; while (reader.read(c)) { @@ -446,7 +514,7 @@ void OsdmStreamParser::parseLineComment() } } -bool OsdmStreamParser::checkIssueData(DataHandler &handler) +bool OsmlStreamParser::checkIssueData(DataHandler &handler) { if (!handler.isEmpty()) { data = handler.toVariant(reader.getSourceId()); @@ -457,7 +525,7 @@ bool OsdmStreamParser::checkIssueData(DataHandler &handler) return false; } -bool OsdmStreamParser::checkIssueFieldStart() +bool OsmlStreamParser::checkIssueFieldStart() { // Fetch the current command, and check whether we're currently inside a // field of this command @@ -482,18 +550,41 @@ bool OsdmStreamParser::checkIssueFieldStart() return false; } -OsdmStreamParser::State OsdmStreamParser::parse() +bool OsmlStreamParser::closeField() +{ + // Try to end an open field of the current command -- if the current command + // is not inside an open field, end this command and try to close the next + // one + for (int i = 0; i < 2 && commands.size() > 1; i++) { + Command &cmd = commands.top(); + if (!cmd.inRangeField) { + if (cmd.inField) { + cmd.inField = false; + if (cmd.inDefaultField) { + commands.pop(); + } + return true; + } + commands.pop(); + } else { + return false; + } + } + return false; +} + +OsmlStreamParser::State OsmlStreamParser::parse() { // Handler for incomming data DataHandler handler; // Read tokens until the outer loop should be left - DynamicToken token; + Token token; while (tokenizer.peek(reader, token)) { const TokenTypeId type = token.type; // Special handling for Backslash and Text - if (type == Tokens.Backslash) { + if (type == Tokens.Backslash || type == Tokens.AnnotationStart) { // Before appending anything to the output data or starting a new // command, check whether FIELD_START has to be issued, as the // current command is a command with range @@ -519,7 +610,8 @@ OsdmStreamParser::State OsdmStreamParser::parse() } // Parse the actual command - State res = parseCommand(token.location.getStart()); + State res = parseCommand(token.location.getStart(), + type == Tokens.AnnotationStart); switch (res) { case State::ERROR: throw LoggableException( @@ -536,6 +628,14 @@ OsdmStreamParser::State OsdmStreamParser::parse() // to the data buffer, use the escape character start as start // location and the peek offset as end location reader.peek(c); // Peek the previously fetched character + + // If this was an annotation start token, add the parsed < to the + // output + if (type == Tokens.AnnotationStart) { + handler.append('<', token.location.getStart(), + token.location.getStart() + 1); + } + handler.append(c, token.location.getStart(), reader.getPeekOffset()); reader.consumePeek(); @@ -579,28 +679,37 @@ OsdmStreamParser::State OsdmStreamParser::parse() } logger.error( "Got field start token \"{\", but no command for which to " - "start the field. Did you mean \"\\{\"?", + "start the field. Write \"\\{\" to insert this sequence as " + "text.", token); } else if (token.type == Tokens.FieldEnd) { - // Try to end an open field of the current command -- if the current - // command is not inside an open field, end this command and try to - // close the next one - for (int i = 0; i < 2 && commands.size() > 1; i++) { - Command &cmd = commands.top(); - if (!cmd.inRangeField) { - if (cmd.inField) { - cmd.inField = false; - return State::FIELD_END; - } - commands.pop(); - } else { - break; - } + if (closeField()) { + return State::FIELD_END; } logger.error( - "Got field end token \"}\", but there is no field to end. Did " - "you mean \"\\}\"?", + "Got field end token \"}\", but there is no field to end. " + "Write \"\\}\" to insert this sequence as text.", token); + } else if (token.type == Tokens.DefaultFieldStart) { + // Try to start a default field the first time the token is reached + Command &topCmd = commands.top(); + if (!topCmd.inField) { + topCmd.inField = true; + topCmd.inDefaultField = true; + return State::FIELD_START; + } + logger.error( + "Got default field start token \"{!\", but no command for " + "which to start the field. Write \"\\{!\" to insert this " + "sequence as text", + token); + } else if (token.type == Tokens.AnnotationEnd) { + // We got a single annotation end token "\>" -- simply issue the + // ANNOTATION_END event + Variant annotationName = Variant::fromString(""); + annotationName.setLocation(token.location); + pushCommand(annotationName, Variant::mapType{}, false); + return State::ANNOTATION_END; } else { logger.error("Unexpected token \"" + token.content + "\"", token); } @@ -627,14 +736,19 @@ OsdmStreamParser::State OsdmStreamParser::parse() return State::END; } -const Variant &OsdmStreamParser::getCommandName() +const Variant &OsmlStreamParser::getCommandName() const { return commands.top().name; } -const Variant &OsdmStreamParser::getCommandArguments() +const Variant &OsmlStreamParser::getCommandArguments() const { return commands.top().arguments; } + +bool OsmlStreamParser::inDefaultField() const +{ + return commands.top().inRangeField || commands.top().inDefaultField; +} } diff --git a/src/formats/osdm/OsdmStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp index 48d8fb7..dc3034c 100644 --- a/src/formats/osdm/OsdmStreamParser.hpp +++ b/src/formats/osml/OsmlStreamParser.hpp @@ -17,23 +17,22 @@ */ /** - * @file OsdmStreamParser.hpp + * @file OsmlStreamParser.hpp * - * Provides classes for low-level classes for reading the TeX-esque osdm + * Provides classes for low-level classes for reading the TeX-esque osml * format. The class provided here does not build any model objects and does not * implement the Parser interface. * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ -#ifndef _OUSIA_OSDM_STREAM_PARSER_HPP_ -#define _OUSIA_OSDM_STREAM_PARSER_HPP_ +#ifndef _OUSIA_OSML_STREAM_PARSER_HPP_ +#define _OUSIA_OSML_STREAM_PARSER_HPP_ #include <stack> #include <core/common/Variant.hpp> - -#include "DynamicTokenizer.hpp" +#include <core/parser/utils/Tokenizer.hpp> namespace ousia { @@ -43,7 +42,7 @@ class Logger; class DataHandler; /** - * The OsdmStreamParser class provides a low-level reader for the TeX-esque osdm + * The OsmlStreamParser class provides a low-level reader for the TeX-esque osml * format. The parser is constructed around a "parse" function, which reads data * from the underlying CharReader until a new state is reached and indicates * this state in a return value. The calling code then has to pull corresponding @@ -53,10 +52,10 @@ class DataHandler; * fields, as this would lead to too many consecutive errors) a * LoggableException is thrown. */ -class OsdmStreamParser { +class OsmlStreamParser { public: /** - * Enum used to indicate which state the OsdmStreamParser class is in + * Enum used to indicate which state the OsmlStreamParser class is in * after calling the "parse" function. */ enum class State { @@ -140,23 +139,35 @@ public: /** * Set to true if this is a command with clear begin and end. */ - bool hasRange; + bool hasRange : 1; /** * Set to true if we are currently inside a field of this command. */ - bool inField; + bool inField : 1; /** * Set to true if we are currently in the range field of the command * (implies inField being set to true). */ - bool inRangeField; + bool inRangeField : 1; + + /** + * Set to true if we are currently in a field that has been especially + * marked as default field (using the "|") syntax. + */ + bool inDefaultField : 1; /** * Default constructor. */ - Command() : hasRange(false), inField(false), inRangeField(false) {} + Command() + : hasRange(false), + inField(false), + inRangeField(false), + inDefaultField() + { + } /** * Constructor of the Command class. @@ -169,16 +180,19 @@ public: * explicit range. * @param inField is set to true if we currently are inside a field * of this command. - * @param inRangeField is set to true if we currently inside the outer - * field of the command. + * @param inRangeField is set to true if we currently are inside the + * outer field of a ranged command. + * @param inDefaultField is set to true if we currently are in a + * specially marked default field. */ - Command(Variant name, Variant arguments, bool hasRange, bool inField, - bool inRangeField) + Command(Variant name, Variant arguments, bool hasRange, + bool inField, bool inRangeField, bool inDefaultField) : name(std::move(name)), arguments(std::move(arguments)), hasRange(hasRange), inField(inField), - inRangeField(inRangeField) + inRangeField(inRangeField), + inDefaultField(inDefaultField) { } }; @@ -198,7 +212,7 @@ private: /** * Tokenizer instance used to read individual tokens from the text. */ - DynamicTokenizer tokenizer; + Tokenizer tokenizer; /** * Stack containing the current commands. @@ -258,9 +272,11 @@ private: * * @param start is the start byte offset of the command (including the * backslash) + * @param isAnnotation if true, the command is not returned as command, but + * as annotation start. * @return true if a command was actuall parsed, false otherwise. */ - State parseCommand(size_t start); + State parseCommand(size_t start, bool isAnnotation); /** * Function used internally to parse a block comment. @@ -290,16 +306,26 @@ private: */ bool checkIssueFieldStart(); + /** + * Closes a currently open field. Note that the command will be removed from + * the internal command stack if the field that is being closed is a + * field marked as default field. + * + * @return true if the field could be closed, false if there was no field + * to close. + */ + bool closeField(); + public: /** - * Constructor of the OsdmStreamParser class. Attaches the new - * OsdmStreamParser to the given CharReader and Logger instances. + * Constructor of the OsmlStreamParser class. Attaches the new + * OsmlStreamParser to the given CharReader and Logger instances. * * @param reader is the reader instance from which incomming characters * should be read. * @param logger is the logger instance to which errors should be written. */ - OsdmStreamParser(CharReader &reader, Logger &logger); + OsmlStreamParser(CharReader &reader, Logger &logger); /** * Continues parsing. Returns one of the states defined in the State enum. @@ -318,7 +344,7 @@ public: * @return a reference at a variant containing the data parsed by the * "parse" function. */ - const Variant &getData() { return data; } + const Variant &getData() const { return data; } /** * Returns a reference at the internally stored command name. Only valid if @@ -327,7 +353,7 @@ public: * @return a reference at a variant containing name and location of the * parsed command. */ - const Variant &getCommandName(); + const Variant &getCommandName() const; /** * Returns a reference at the internally stored command name. Only valid if @@ -336,16 +362,24 @@ public: * @return a reference at a variant containing arguments given to the * command. */ - const Variant &getCommandArguments(); + const Variant &getCommandArguments() const; + + /** + * Returns true if the current field is the "default" field. This is true if + * the parser either is in the outer range of a range command or inside a + * field that has been especially marked as "default" field (using the "|" + * syntax). + */ + bool inDefaultField() const; /** * Returns a reference at the char reader. * * @return the last internal token location. */ - SourceLocation &getLocation() { return location; } + const SourceLocation &getLocation() const { return location; } }; } -#endif /* _OUSIA_OSDM_STREAM_PARSER_HPP_ */ +#endif /* _OUSIA_OSML_STREAM_PARSER_HPP_ */ diff --git a/src/formats/osxml/OsxmlAttributeLocator.cpp b/src/formats/osxml/OsxmlAttributeLocator.cpp new file mode 100644 index 0000000..e37446a --- /dev/null +++ b/src/formats/osxml/OsxmlAttributeLocator.cpp @@ -0,0 +1,144 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/common/Location.hpp> +#include <core/common/CharReader.hpp> +#include <core/common/Utils.hpp> + +#include "OsxmlAttributeLocator.hpp" + +namespace ousia { + +/** + * Enum used internally in the statemachine of the xml argument parser. + */ +enum class XmlAttributeState { + IN_TAG_NAME, + SEARCH_ATTR, + IN_ATTR_NAME, + HAS_ATTR_NAME, + HAS_ATTR_EQUALS, + IN_ATTR_DATA +}; + +std::map<std::string, SourceLocation> OsxmlAttributeLocator::locate( + CharReader &reader, size_t offs) +{ + std::map<std::string, SourceLocation> res; + + // Fork the reader, we don't want to mess up the XML parsing process, do we? + CharReaderFork readerFork = reader.fork(); + + // Move the read cursor to the start location, abort if this does not work + if (offs != readerFork.seek(offs)) { + return res; + } + + // Now all we need to do is to implement one half of an XML parser. As this + // is inherently complicated we'll totaly fail at it. Don't care. All we + // want to get is those darn offsets for pretty error messages... (and we + // can assume the XML is valid as it was already read by expat) + XmlAttributeState state = XmlAttributeState::IN_TAG_NAME; + char c; + std::stringstream attrName; + while (readerFork.read(c)) { + // Abort at the end of the tag + if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) { + return res; + } + + // One state machine to rule them all, one state machine to find them, + // One state machine to bring them all and in the darkness bind them + // (the byte offsets) + switch (state) { + case XmlAttributeState::IN_TAG_NAME: + if (Utils::isWhitespace(c)) { + res.emplace("$tag", + SourceLocation{reader.getSourceId(), offs + 1, + readerFork.getOffset() - 1}); + state = XmlAttributeState::SEARCH_ATTR; + } + break; + case XmlAttributeState::SEARCH_ATTR: + if (!Utils::isWhitespace(c)) { + state = XmlAttributeState::IN_ATTR_NAME; + attrName << c; + } + break; + case XmlAttributeState::IN_ATTR_NAME: + if (Utils::isWhitespace(c)) { + state = XmlAttributeState::HAS_ATTR_NAME; + } else if (c == '=') { + state = XmlAttributeState::HAS_ATTR_EQUALS; + } else { + attrName << c; + } + break; + case XmlAttributeState::HAS_ATTR_NAME: + if (!Utils::isWhitespace(c)) { + if (c == '=') { + state = XmlAttributeState::HAS_ATTR_EQUALS; + break; + } + // Well, this is a strange XML file... We expected to + // see a '=' here! Try to continue with the + // "HAS_ATTR_EQUALS" state as this state will hopefully + // inlcude some error recovery + } else { + // Skip whitespace here + break; + } + // Fallthrough + case XmlAttributeState::HAS_ATTR_EQUALS: + if (!Utils::isWhitespace(c)) { + if (c == '"') { + // Here we are! We have found the beginning of an + // attribute. Let's quickly lock the current offset away + // in the result map + res.emplace(attrName.str(), + SourceLocation{reader.getSourceId(), + readerFork.getOffset()}); + state = XmlAttributeState::IN_ATTR_DATA; + } else { + // No, this XML file is not well formed. Assume we're in + // an attribute name once again + attrName.str(std::string{&c, 1}); + state = XmlAttributeState::IN_ATTR_NAME; + } + } + break; + case XmlAttributeState::IN_ATTR_DATA: + if (c == '"') { + // We're at the end of the attribute data, set the end + // location + auto it = res.find(attrName.str()); + if (it != res.end()) { + it->second.setEnd(readerFork.getOffset() - 1); + } + + // Reset the attribute name and restart the search + attrName.str(std::string{}); + state = XmlAttributeState::SEARCH_ATTR; + } + break; + } + } + return res; +} +} + diff --git a/src/formats/osxml/OsxmlAttributeLocator.hpp b/src/formats/osxml/OsxmlAttributeLocator.hpp new file mode 100644 index 0000000..f9a3437 --- /dev/null +++ b/src/formats/osxml/OsxmlAttributeLocator.hpp @@ -0,0 +1,67 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file OsxmlAttributeLocator.hpp + * + * Contains a class used for locating the byte offsets of the attributes given + * in a XML tag. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ +#define _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ + +#include <map> + +namespace ousia { + +// Forward declarations +class CharReader; +class SourceLocation; + +/** + * Class containing one static function for locating the byte offsets of the + * attributes in a XML tag. This are not retrieved by our xml parser, so we have + * to do this manually. + */ +class OsxmlAttributeLocator { +public: + /** + * Function used to reconstruct the location of the attributes of a XML tag + * in the source code. This is necessary, as the xml parser only returns an + * offset to the begining of a tag and not to the position of the individual + * arguments. + * + * @param reader is the char reader from which the character data should be + * read. + * @param offs is a byte offset in the xml file pointing at the "<" + * character of the tag. + * @return a map from attribute keys to the corresponding location + * (including range) of the atribute. Also contains the location of the + * tagname in the form of the virtual attribute "$tag". + */ + static std::map<std::string, SourceLocation> locate(CharReader &reader, + size_t offs); +}; + +} + +#endif /* _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ */ + diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp new file mode 100644 index 0000000..7404960 --- /dev/null +++ b/src/formats/osxml/OsxmlEventParser.cpp @@ -0,0 +1,547 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <expat.h> + +#include <vector> + +#include <core/common/CharReader.hpp> +#include <core/common/Logger.hpp> +#include <core/common/Variant.hpp> +#include <core/common/VariantReader.hpp> +#include <core/common/Utils.hpp> +#include <core/common/WhitespaceHandler.hpp> + +#include "OsxmlAttributeLocator.hpp" +#include "OsxmlEventParser.hpp" + +namespace ousia { + +/* Class OsxmlEventParser */ + +/** + * Class containing data used by the internal functions. + */ +class OsxmlEventParserData { +public: + /** + * Contains the current depth of the parsing process. + */ + ssize_t depth; + + /** + * Set to a value larger or equal to zero if the parser is currently inside + * an annotation end tag -- the value represents the depth in which the + * tag was opened. + */ + ssize_t annotationEndTagDepth; + + /** + * Current character data buffer. + */ + std::vector<char> textBuf; + + /** + * Current whitespace buffer (for the trimming whitspace mode) + */ + std::vector<char> whitespaceBuf; + + /** + * Flag indicating whether a whitespace character was present (for the + * collapsing whitespace mode). + */ + bool hasWhitespace; + + /** + * Current character data start. + */ + size_t textStart; + + /** + * Current character data end. + */ + size_t textEnd; + + /** + * Default constructor. + */ + OsxmlEventParserData(); + + /** + * Increments the depth. + */ + void incrDepth(); + + /** + * Decrement the depth and reset the annotationEndTagDepth flag. + */ + void decrDepth(); + + /** + * Returns true if we're currently inside an end tag. + */ + bool inAnnotationEndTag(); + + /** + * Returns true if character data is available. + * + * @return true if character data is available. + */ + bool hasText(); + + /** + * Returns a Variant containing the character data and its location. + * + * @return a string variant containing the text data and the character + * location. + */ + Variant getText(SourceId sourceId); +}; + +/* Class GuardedExpatXmlParser */ + +/** + * Wrapper class around the XML_Parser pointer which safely frees it whenever + * the scope is left (e.g. because an exception was thrown). + */ +class GuardedExpatXmlParser { +private: + /** + * Internal pointer to the XML_Parser instance. + */ + XML_Parser parser; + +public: + /** + * Constructor of the GuardedExpatXmlParser class. Calls XML_ParserCreateNS + * from the expat library. Throws a parser exception if the XML parser + * cannot be initialized. + * + * @param encoding is the protocol-defined encoding passed to expat (or + * nullptr if expat should determine the encoding by itself). + */ + GuardedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) + { + parser = XML_ParserCreate(encoding); + if (!parser) { + throw LoggableException{ + "Internal error: Could not create expat XML parser!"}; + } + } + + /** + * Destuctor of the GuardedExpatXmlParser, frees the XML parser instance. + */ + ~GuardedExpatXmlParser() + { + if (parser) { + XML_ParserFree(parser); + parser = nullptr; + } + } + + /** + * Returns the XML_Parser pointer. + */ + XML_Parser operator&() { return parser; } +}; + +/** + * Name of the special outer tag used for allowing multiple top-level elements + * in an xml file. + */ +static const std::string TOP_LEVEL_TAG{"ousia"}; + +/** + * Prefix used to indicate the start of an annoation (note the trailing colon) + */ +static const std::string ANNOTATION_START_PREFIX{"a:start:"}; + +/** + * Prefix used to indicate the end of an annotation. + */ +static const std::string ANNOTATION_END_PREFIX{"a:end"}; + +/** + * Synchronizes the position of the xml parser with the default location of the + * logger instance. + * + * @param p is a pointer at the xml parser instance. + * @param len is the length of the string that should be refered to. + * @return the SourceLocation that has been set in the logger. + */ +static SourceLocation xmlSyncLoggerPosition(XML_Parser p, size_t len = 0) +{ + // Fetch the OsxmlEventParser instance + OsxmlEventParser *parser = + static_cast<OsxmlEventParser *>(XML_GetUserData(p)); + + // Fetch the current location in the XML file and set the default location + // in the logger + size_t offs = XML_GetCurrentByteIndex(p); + SourceLocation loc = + SourceLocation{parser->getReader().getSourceId(), offs, offs + len}; + parser->getLogger().setDefaultLocation(loc); + + // Return the fetched location + return loc; +} + +/** + * Callback called by eXpat whenever a start handler is reached. + */ +static void xmlStartElementHandler(void *ref, const XML_Char *name, + const XML_Char **attrs) +{ + // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser + XML_Parser p = static_cast<XML_Parser>(ref); + OsxmlEventParser *parser = + static_cast<OsxmlEventParser *>(XML_GetUserData(p)); + + // If there is any text data in the buffer, issue that first + if (parser->getData().hasText()) { + parser->getEvents().data( + parser->getData().getText(parser->getReader().getSourceId())); + } + + // Read the argument locations -- this is only a stupid and slow hack, + // but it is necessary, as expat doesn't give use the byte offset of the + // arguments. + std::map<std::string, SourceLocation> attributeOffsets = + OsxmlAttributeLocator::locate(parser->getReader(), + XML_GetCurrentByteIndex(p)); + + // Update the logger position + SourceLocation loc = xmlSyncLoggerPosition(p); + + // Fetch the location of the name + SourceLocation nameLoc = loc; + auto it = attributeOffsets.find("$tag"); + if (it != attributeOffsets.end()) { + nameLoc = it->second; + } + // Increment the current depth + parser->getData().incrDepth(); + + // Make sure we're currently not inside an annotation end tag -- this would + // be highly illegal! + if (parser->getData().inAnnotationEndTag()) { + parser->getLogger().error( + "No tags allowed inside an annotation end tag", nameLoc); + return; + } + + // Assemble the arguments + Variant::mapType args; + const XML_Char **attr = attrs; + while (*attr) { + // Convert the C string to a std::string + const std::string key{*(attr++)}; + + // Search the location of the key + SourceLocation keyLoc; + auto it = attributeOffsets.find(key); + if (it != attributeOffsets.end()) { + keyLoc = it->second; + } + + // Parse the string, pass the location of the key + std::pair<bool, Variant> value = VariantReader::parseGenericString( + *(attr++), parser->getLogger(), keyLoc.getSourceId(), + keyLoc.getStart()); + + // Set the overall location of the parsed element to the attribute + // location + value.second.setLocation(keyLoc); + + // Store the keys in the map + args.emplace(key, value.second).second; + } + + // Fetch the name of the tag, check for special tags + std::string nameStr(name); + if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 1) { + // We're in the top-level and the magic tag is reached -- just + // ignore it and issue a warning for each argument that has been given + for (const auto &arg : args) { + parser->getLogger().warning(std::string("Ignoring attribute \"") + + arg.first + + std::string("\" for magic tag \"") + + TOP_LEVEL_TAG + std::string("\""), + arg.second); + } + } else if (Utils::startsWith(nameStr, ANNOTATION_START_PREFIX)) { + // Assemble a name variant containing the name minus the prefix + Variant nameVar = + Variant::fromString(nameStr.substr(ANNOTATION_START_PREFIX.size())); + nameVar.setLocation(nameLoc); + + // Issue the "annotationStart" event + parser->getEvents().annotationStart(nameVar, args); + } else if (Utils::startsWith(nameStr, ANNOTATION_END_PREFIX)) { + // Assemble a name variant containing the name minus the prefix + nameStr = nameStr.substr(ANNOTATION_END_PREFIX.size()); + + // Discard a potentially leading colon + if (!nameStr.empty() && nameStr[0] == ':') { + nameStr = nameStr.substr(1); + } + + // Assemble the variant containing the name and its location + Variant nameVar = Variant::fromString(nameStr); + nameVar.setLocation(nameLoc); + + // Check whether a "name" attribute was given + Variant elementName; + for (const auto &arg : args) { + if (arg.first == "name") { + elementName = arg.second; + } else { + parser->getLogger().warning( + std::string("Ignoring attribute \"") + arg.first + + "\" in annotation end tag", + arg.second); + } + } + + // Set the annotationEndTagDepth to disallow any further tags to be + // opened inside the annotation end tag. + parser->getData().annotationEndTagDepth = parser->getData().depth; + + // Issue the "annotationEnd" event + parser->getEvents().annotationEnd(nameVar, args); + } else { + // Just issue a "commandStart" event in any other case + Variant nameVar = Variant::fromString(nameStr); + nameVar.setLocation(nameLoc); + parser->getEvents().command(nameVar, args); + } +} + +static void xmlEndElementHandler(void *ref, const XML_Char *name) +{ + // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser + XML_Parser p = static_cast<XML_Parser>(ref); + OsxmlEventParser *parser = + static_cast<OsxmlEventParser *>(XML_GetUserData(p)); + + // Synchronize the position of the logger with teh position + xmlSyncLoggerPosition(p); + + // Abort as long as we're in an annotation end tag + if (parser->getData().inAnnotationEndTag()) { + parser->getData().decrDepth(); + return; + } + + // Decrement the current depth + parser->getData().decrDepth(); + + // If there is any text data in the buffer, issue that first + if (parser->getData().hasText()) { + parser->getEvents().data( + parser->getData().getText(parser->getReader().getSourceId())); + } + + // Abort if the special ousia tag ends here + std::string nameStr{name}; + if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 0) { + return; + } + + // Issue the "fieldEnd" event + parser->getEvents().fieldEnd(); +} + +static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len) +{ + // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser + XML_Parser p = static_cast<XML_Parser>(ref); + OsxmlEventParser *parser = + static_cast<OsxmlEventParser *>(XML_GetUserData(p)); + + // Abort as long as we're in an annotation end tag + if (parser->getData().inAnnotationEndTag()) { + return; + } + + // Convert the signed (smell the 90's C library here?) length to an usigned + // value + size_t ulen = len > 0 ? static_cast<size_t>(len) : 0; + + // Synchronize the logger position + SourceLocation loc = xmlSyncLoggerPosition(p, ulen); + + // Fetch some variables for convenience + const WhitespaceMode mode = parser->getWhitespaceMode(); + OsxmlEventParserData &data = parser->getData(); + std::vector<char> &textBuf = data.textBuf; + std::vector<char> &whitespaceBuf = data.whitespaceBuf; + bool &hasWhitespace = data.hasWhitespace; + size_t &textStart = data.textStart; + size_t &textEnd = data.textEnd; + + size_t pos = loc.getStart(); + for (size_t i = 0; i < ulen; i++, pos++) { + switch (mode) { + case WhitespaceMode::PRESERVE: + PreservingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd); + break; + case WhitespaceMode::TRIM: + TrimmingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd, + whitespaceBuf); + break; + case WhitespaceMode::COLLAPSE: + CollapsingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd, + hasWhitespace); + break; + } + } +} + +/* Class OsxmlEvents */ + +OsxmlEvents::~OsxmlEvents() {} + +/* Class OsxmlEventParser */ + +OsxmlEventParserData::OsxmlEventParserData() + : depth(0), + annotationEndTagDepth(-1), + hasWhitespace(false), + textStart(0), + textEnd(0) +{ +} + +void OsxmlEventParserData::incrDepth() { depth++; } + +void OsxmlEventParserData::decrDepth() +{ + if (depth > 0) { + depth--; + } + if (depth < annotationEndTagDepth) { + annotationEndTagDepth = -1; + } +} + +bool OsxmlEventParserData::inAnnotationEndTag() +{ + return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth); +} + +bool OsxmlEventParserData::hasText() { return !textBuf.empty(); } + +Variant OsxmlEventParserData::getText(SourceId sourceId) +{ + // Create a variant containing the string data and the location + Variant var = + Variant::fromString(std::string{textBuf.data(), textBuf.size()}); + var.setLocation({sourceId, textStart, textEnd}); + + // Reset the text buffers + textBuf.clear(); + whitespaceBuf.clear(); + hasWhitespace = false; + textStart = 0; + textEnd = 0; + + // Return the variant + return var; +} + +/* Class OsxmlEventParser */ + +OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events, + Logger &logger) + : reader(reader), + events(events), + logger(logger), + whitespaceMode(WhitespaceMode::TRIM), + data(new OsxmlEventParserData()) +{ +} + +OsxmlEventParser::~OsxmlEventParser() {} + +void OsxmlEventParser::parse() +{ + // Create the parser object + GuardedExpatXmlParser p{"UTF-8"}; + + // Reset the depth + data->depth = 0; + + // Pass the reference to this parser instance to the XML handler + XML_SetUserData(&p, this); + XML_UseParserAsHandlerArg(&p); + + // Set the callback functions + XML_SetStartElementHandler(&p, xmlStartElementHandler); + XML_SetEndElementHandler(&p, xmlEndElementHandler); + XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler); + + // Feed data into expat while there is data to process + constexpr size_t BUFFER_SIZE = 64 * 1024; + while (true) { + // Fetch a buffer from expat for the input data + char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE)); + if (!buf) { + throw OusiaException{"Internal error: XML parser out of memory!"}; + } + + // Read into the buffer + size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE); + + // Parse the data and handle any XML error as exception + if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { + throw LoggableException{ + "XML: " + std::string{XML_ErrorString(XML_GetErrorCode(&p))}, + xmlSyncLoggerPosition(&p)}; + } + + // Abort once there are no more bytes in the stream + if (bytesRead == 0) { + break; + } + } +} + +void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode) +{ + this->whitespaceMode = whitespaceMode; +} + +WhitespaceMode OsxmlEventParser::getWhitespaceMode() const +{ + return whitespaceMode; +} + +CharReader &OsxmlEventParser::getReader() const { return reader; } + +Logger &OsxmlEventParser::getLogger() const { return logger; } + +OsxmlEvents &OsxmlEventParser::getEvents() const { return events; } + +OsxmlEventParserData &OsxmlEventParser::getData() const { return *data; } +} + diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp new file mode 100644 index 0000000..e39245f --- /dev/null +++ b/src/formats/osxml/OsxmlEventParser.hpp @@ -0,0 +1,217 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file OsxmlEventParser.hpp + * + * The OsxmlEventParser class is responsible for parsing an XML file and calling + * the corresponding event handler functions if an XML item is found. Event + * handling is performed using a listener interface. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OSXML_EVENT_PARSER_HPP_ +#define _OSXML_EVENT_PARSER_HPP_ + +#include <memory> +#include <string> + +#include <core/common/Whitespace.hpp> + +namespace ousia { + +// Forward declarations +class Logger; +class Variant; +class OsxmlEventParserData; + +/** + * Interface which defines the callback functions which are called by the + * OsxmlEventParser whenever an event occurs. + */ +class OsxmlEvents { +public: + /** + * Virtual destructor. + */ + virtual ~OsxmlEvents(); + + /** + * Called whenever a command starts. Note that this implicitly always starts + * the default field of the command. + * + * @param name is a string variant containing name and location of the + * command. + * @param args is a map containing the arguments that were given to the + * command. + */ + virtual void command(const Variant &name, const Variant::mapType &args) = 0; + + /** + * Called whenever an annotation starts. Note that this implicitly always + * starts the default field of the annotation. + * + * @param className is a string variant containing the name of the + * annotation class and the location of the annotation definition. + * @param args is a map variant containing the arguments that were given + * to the annotation definition. + */ + virtual void annotationStart(const Variant &className, + const Variant::mapType &args) = 0; + + /** + * Called whenever the range of an annotation ends. The callee must + * disambiguate the actual annotation that is finished here. + * + * @param className is a string variant containing the name of the + * annotation class that should end here. May be empty (or nullptr), if no + * elementName has been specified at the end of the annotation. + * @param elementName is the name of the annotation element that should be + * ended here. May be empty (or nullptr), if no elementName has been + * specified at the end of the annotation. + */ + virtual void annotationEnd(const Variant &className, + const Variant &elementName) = 0; + + /** + * Called whenever the default field which was implicitly started by + * commandStart or annotationStart ends. Note that this does not end the + * range of an annotation, but the default field of the annotation. To + * signal the end of the annotation this, the annotationEnd method will be + * invoked. + */ + virtual void fieldEnd() = 0; + + /** + * Called whenever data is found. Whitespace data is handled as specified + * and the data has been parsed to the specified variant type. This function + * is not called if the parsing failed, the parser prints an error message + * instead. + * + * @param data is the already parsed data that should be passed to the + * handler. + */ + virtual void data(const Variant &data) = 0; +}; + +/** + * The OsxmlEventParser class is a wrapper around eXpat which implements the + * specialities of the osxml formats class (like annotation ranges). It notifies + * a specified event handler whenever a command, annotation or data has been + * reached. + */ +class OsxmlEventParser { +private: + /** + * Reference at the internal CharReader instance. + */ + CharReader &reader; + + /** + * Set of callback functions to be called whenever an event is triggered. + */ + OsxmlEvents &events; + + /** + * Reference at the Logger object to which error messages or warnings should + * be logged. + */ + Logger &logger; + + /** + * Current whitespace mode. + */ + WhitespaceMode whitespaceMode; + + /** + * Data to be used by the internal functions. + */ + std::unique_ptr<OsxmlEventParserData> data; + +public: + /** + * Constructor fo the OsxmlEventParser. Takes a reference at the OsxmlEvents + * of which the callback functions are called. + * + * @param reader is a reference to the CharReader instance from which the + * XML should be read. + * @param events is a refence at an instance of the OsxmlEvents class. All + * events are forwarded to this class. + * @param logger is the Logger instance to which log messages should be + * written. + */ + OsxmlEventParser(CharReader &reader, OsxmlEvents &events, Logger &logger); + + /** + * Destructor of OsxmlEventParser (needed for unique_ptr to incomplete type) + */ + ~OsxmlEventParser(); + + /** + * Performs the actual parsing. Reads the XML using eXpat and calles the + * callbacks in the event listener instance whenever something interesting + * happens. + */ + void parse(); + + /** + * Sets the whitespace handling mode. + * + * @param whitespaceMode defines how whitespace in the data should be + * handled. + */ + void setWhitespaceMode(WhitespaceMode whitespaceMode); + + /** + * Returns the current whitespace handling mode. + * + * @return the currently set whitespace handling mode. + */ + WhitespaceMode getWhitespaceMode() const; + + /** + * Returns the internal CharReader reference. + * + * @return the CharReader reference. + */ + CharReader &getReader() const; + + /** + * Returns the internal Logger reference. + * + * @return the internal Logger reference. + */ + Logger &getLogger() const; + + /** + * Returns the internal OsxmlEvents reference. + * + * @return the internal OsxmlEvents reference. + */ + OsxmlEvents &getEvents() const; + + /** + * Returns a reference at the internal data. + */ + OsxmlEventParserData &getData() const; +}; +} + +#endif /* _OSXML_EVENT_PARSER_HPP_ */ + diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp new file mode 100644 index 0000000..c216855 --- /dev/null +++ b/src/formats/osxml/OsxmlParser.cpp @@ -0,0 +1,98 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/parser/stack/GenericParserStates.hpp> +#include <core/parser/stack/Stack.hpp> +#include <core/parser/ParserContext.hpp> + +#include "OsxmlEventParser.hpp" +#include "OsxmlParser.hpp" + +namespace ousia { + +using namespace parser_stack; + +/** + * Class containing the actual OsxmlParser implementation. + */ +class OsxmlParserImplementation : public OsxmlEvents { +private: + /** + * Actual xml parser -- converts the xml stream into a set of events. + */ + OsxmlEventParser parser; + + /** + * Pushdown automaton responsible for converting the xml events into an + * actual Node tree. + */ + Stack stack; + +public: + /** + * Constructor of the OsxmlParserImplementation class. + * + * @param reader is a reference to the CharReader instance from which the + * XML should be read. + * @param ctx is a reference to the ParserContext instance that should be + * used. + */ + OsxmlParserImplementation(CharReader &reader, ParserContext &ctx) + : parser(reader, *this, ctx.getLogger()), + stack(ctx, GenericParserStates) + { + } + + /** + * Starts the actual parsing process. + */ + void parse() { parser.parse(); } + + void command(const Variant &name, const Variant::mapType &args) override + { + stack.command(name, args); + stack.fieldStart(true); + } + + void annotationStart(const Variant &name, + const Variant::mapType &args) override + { + stack.annotationStart(name, args); + stack.fieldStart(true); + } + + void annotationEnd(const Variant &className, + const Variant &elementName) override + { + stack.annotationEnd(className, elementName); + } + + void fieldEnd() override { stack.fieldEnd(); } + + void data(const Variant &data) override { stack.data(data); } +}; + +/* Class OsxmlParser */ + +void OsxmlParser::doParse(CharReader &reader, ParserContext &ctx) +{ + OsxmlParserImplementation impl(reader, ctx); + impl.parse(); +} +} + diff --git a/src/plugins/xml/XmlParser.hpp b/src/formats/osxml/OsxmlParser.hpp index c8b6302..0fbf83c 100644 --- a/src/plugins/xml/XmlParser.hpp +++ b/src/formats/osxml/OsxmlParser.hpp @@ -17,7 +17,7 @@ */ /** - * @file XmlParser.hpp + * @file OsxmlParser.hpp * * Contains the parser responsible for reading Ousía XML Documents (extension * oxd) and Ousía XML Modules (extension oxm). @@ -25,18 +25,18 @@ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ -#ifndef _OUSIA_XML_PARSER_HPP_ -#define _OUSIA_XML_PARSER_HPP_ +#ifndef _OUSIA_OSXML_PARSER_HPP_ +#define _OUSIA_OSXML_PARSER_HPP_ #include <core/parser/Parser.hpp> namespace ousia { /** - * The XmlParser class implements parsing the various types of Ousía XML - * documents using the expat stream XML parser. + * The OsxmlParser class implements parsing the various types of Ousía XML + * documents using the OsxmlEventParser and Stack classes. */ -class XmlParser : public Parser { +class OsxmlParser : public Parser { protected: /** * Parses the given input stream as XML file and returns the parsed @@ -51,5 +51,5 @@ protected: } -#endif /* _OUSIA_XML_PARSER_HPP_ */ +#endif /* _OUSIA_OSXML_PARSER_HPP_ */ diff --git a/src/core/CodeTokenizer.cpp b/src/plugins/css/CodeTokenizer.cpp index d65c514..d65c514 100644 --- a/src/core/CodeTokenizer.cpp +++ b/src/plugins/css/CodeTokenizer.cpp diff --git a/src/core/CodeTokenizer.hpp b/src/plugins/css/CodeTokenizer.hpp index 154f949..154f949 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/plugins/css/CodeTokenizer.hpp diff --git a/src/core/Tokenizer.cpp b/src/plugins/css/Tokenizer.cpp index ab4735a..ab4735a 100644 --- a/src/core/Tokenizer.cpp +++ b/src/plugins/css/Tokenizer.cpp diff --git a/src/core/Tokenizer.hpp b/src/plugins/css/Tokenizer.hpp index 50e458c..50e458c 100644 --- a/src/core/Tokenizer.hpp +++ b/src/plugins/css/Tokenizer.hpp diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp deleted file mode 100644 index 6dfad49..0000000 --- a/src/plugins/xml/XmlParser.cpp +++ /dev/null @@ -1,575 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <iostream> -#include <map> -#include <sstream> -#include <vector> - -#include <expat.h> - -#include <core/common/CharReader.hpp> -#include <core/common/Utils.hpp> -#include <core/common/VariantReader.hpp> -#include <core/parser/ParserScope.hpp> -#include <core/parser/ParserStack.hpp> -#include <core/parser/stack/DocumentHandler.hpp> -#include <core/parser/stack/DomainHandler.hpp> -#include <core/parser/stack/ImportIncludeHandler.hpp> -#include <core/parser/stack/TypesystemHandler.hpp> -#include <core/model/Document.hpp> -#include <core/model/Domain.hpp> -#include <core/model/Typesystem.hpp> - -#include "XmlParser.hpp" - -namespace ousia { - -namespace ParserStates { -/* Document states */ -static const ParserState Document = - ParserStateBuilder() - .parent(&None) - .createdNodeType(&RttiTypes::Document) - .elementHandler(DocumentHandler::create) - .arguments({Argument::String("name", "")}); - -static const ParserState DocumentChild = - ParserStateBuilder() - .parents({&Document, &DocumentChild}) - .createdNodeTypes({&RttiTypes::StructureNode, - &RttiTypes::AnnotationEntity, - &RttiTypes::DocumentField}) - .elementHandler(DocumentChildHandler::create); - -/* Domain states */ -static const ParserState Domain = ParserStateBuilder() - .parents({&None, &Document}) - .createdNodeType(&RttiTypes::Domain) - .elementHandler(DomainHandler::create) - .arguments({Argument::String("name")}); - -static const ParserState DomainStruct = - ParserStateBuilder() - .parent(&Domain) - .createdNodeType(&RttiTypes::StructuredClass) - .elementHandler(DomainStructHandler::create) - .arguments({Argument::String("name"), - Argument::Cardinality("cardinality", Cardinality::any()), - Argument::Bool("isRoot", false), - Argument::Bool("transparent", false), - Argument::String("isa", "")}); - -static const ParserState DomainAnnotation = - ParserStateBuilder() - .parent(&Domain) - .createdNodeType(&RttiTypes::AnnotationClass) - .elementHandler(DomainAnnotationHandler::create) - .arguments({Argument::String("name")}); - -static const ParserState DomainAttributes = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::StructType) - .elementHandler(DomainAttributesHandler::create) - .arguments({}); - -static const ParserState DomainAttribute = - ParserStateBuilder() - .parent(&DomainAttributes) - .elementHandler(TypesystemStructFieldHandler::create) - .arguments({Argument::String("name"), Argument::String("type"), - Argument::Any("default", Variant::fromObject(nullptr))}); - -static const ParserState DomainField = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainFieldHandler::create) - .arguments({Argument::String("name", ""), - Argument::Bool("isSubtree", false), - Argument::Bool("optional", false)}); - -static const ParserState DomainFieldRef = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainFieldRefHandler::create) - .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); - -static const ParserState DomainStructPrimitive = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainPrimitiveHandler::create) - .arguments( - {Argument::String("name", ""), Argument::Bool("isSubtree", false), - Argument::Bool("optional", false), Argument::String("type")}); - -static const ParserState DomainStructChild = - ParserStateBuilder() - .parent(&DomainField) - .elementHandler(DomainChildHandler::create) - .arguments({Argument::String("ref")}); - -static const ParserState DomainStructParent = - ParserStateBuilder() - .parent(&DomainStruct) - .createdNodeType(&RttiTypes::DomainParent) - .elementHandler(DomainParentHandler::create) - .arguments({Argument::String("ref")}); - -static const ParserState DomainStructParentField = - ParserStateBuilder() - .parent(&DomainStructParent) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainParentFieldHandler::create) - .arguments({Argument::String("name", ""), - Argument::Bool("isSubtree", false), - Argument::Bool("optional", false)}); - -static const ParserState DomainStructParentFieldRef = - ParserStateBuilder() - .parent(&DomainStructParent) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainParentFieldRefHandler::create) - .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); - -/* Typesystem states */ -static const ParserState Typesystem = - ParserStateBuilder() - .parents({&None, &Domain}) - .createdNodeType(&RttiTypes::Typesystem) - .elementHandler(TypesystemHandler::create) - .arguments({Argument::String("name", "")}); - -static const ParserState TypesystemEnum = - ParserStateBuilder() - .parent(&Typesystem) - .createdNodeType(&RttiTypes::EnumType) - .elementHandler(TypesystemEnumHandler::create) - .arguments({Argument::String("name")}); - -static const ParserState TypesystemEnumEntry = - ParserStateBuilder() - .parent(&TypesystemEnum) - .elementHandler(TypesystemEnumEntryHandler::create) - .arguments({}); - -static const ParserState TypesystemStruct = - ParserStateBuilder() - .parent(&Typesystem) - .createdNodeType(&RttiTypes::StructType) - .elementHandler(TypesystemStructHandler::create) - .arguments({Argument::String("name"), Argument::String("parent", "")}); - -static const ParserState TypesystemStructField = - ParserStateBuilder() - .parent(&TypesystemStruct) - .elementHandler(TypesystemStructFieldHandler::create) - .arguments({Argument::String("name"), Argument::String("type"), - Argument::Any("default", Variant::fromObject(nullptr))}); - -static const ParserState TypesystemConstant = - ParserStateBuilder() - .parent(&Typesystem) - .createdNodeType(&RttiTypes::Constant) - .elementHandler(TypesystemConstantHandler::create) - .arguments({Argument::String("name"), Argument::String("type"), - Argument::Any("value")}); - -/* Special states for import and include */ -static const ParserState Import = - ParserStateBuilder() - .parents({&Document, &Typesystem, &Domain}) - .elementHandler(ImportHandler::create) - .arguments({Argument::String("rel", ""), Argument::String("type", ""), - Argument::String("src", "")}); - -static const ParserState Include = - ParserStateBuilder() - .parent(&All) - .elementHandler(IncludeHandler::create) - .arguments({Argument::String("rel", ""), Argument::String("type", ""), - Argument::String("src", "")}); - -static const std::multimap<std::string, const ParserState *> XmlStates{ - {"document", &Document}, - {"*", &DocumentChild}, - {"domain", &Domain}, - {"struct", &DomainStruct}, - {"annotation", &DomainAnnotation}, - {"attributes", &DomainAttributes}, - {"attribute", &DomainAttribute}, - {"field", &DomainField}, - {"fieldRef", &DomainFieldRef}, - {"primitive", &DomainStructPrimitive}, - {"childRef", &DomainStructChild}, - {"parentRef", &DomainStructParent}, - {"field", &DomainStructParentField}, - {"fieldRef", &DomainStructParentFieldRef}, - {"typesystem", &Typesystem}, - {"enum", &TypesystemEnum}, - {"entry", &TypesystemEnumEntry}, - {"struct", &TypesystemStruct}, - {"field", &TypesystemStructField}, - {"constant", &TypesystemConstant}, - {"import", &Import}, - {"include", &Include}}; -} - -/** - * Structue containing the private data that is being passed to the - * XML-Handlers. - */ -struct XMLUserData { - /** - * Containing the depth of the current XML file - */ - size_t depth; - - /** - * Reference at the ParserStack instance. - */ - ParserStack *stack; - - /** - * Reference at the CharReader instance. - */ - CharReader *reader; - - /** - * Constructor of the XMLUserData struct. - * - * @param stack is a pointer at the ParserStack instance. - * @param reader is a pointer at the CharReader instance. - */ - XMLUserData(ParserStack *stack, CharReader *reader) - : depth(0), stack(stack), reader(reader) - { - } -}; - -/** - * Wrapper class around the XML_Parser pointer which safely frees it whenever - * the scope is left (e.g. because an exception was thrown). - */ -class ScopedExpatXmlParser { -private: - /** - * Internal pointer to the XML_Parser instance. - */ - XML_Parser parser; - -public: - /** - * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS - * from the expat library. Throws a parser exception if the XML parser - * cannot be initialized. - * - * @param encoding is the protocol-defined encoding passed to expat (or - * nullptr if expat should determine the encoding by itself). - */ - ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) - { - parser = XML_ParserCreate(encoding); - if (!parser) { - throw LoggableException{ - "Internal error: Could not create expat XML parser!"}; - } - } - - /** - * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance. - */ - ~ScopedExpatXmlParser() - { - if (parser) { - XML_ParserFree(parser); - parser = nullptr; - } - } - - /** - * Returns the XML_Parser pointer. - */ - XML_Parser operator&() { return parser; } -}; - -/* Adapter Expat -> ParserStack */ - -static SourceLocation syncLoggerPosition(XML_Parser p, size_t len = 0) -{ - // Fetch the parser stack and the associated user data - XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); - ParserStack *stack = userData->stack; - - // Fetch the current location in the XML file - size_t offs = XML_GetCurrentByteIndex(p); - - // Build the source location and update the default location of the - // current - // logger instance - SourceLocation loc{stack->getContext().getSourceId(), offs, offs + len}; - stack->getContext().getLogger().setDefaultLocation(loc); - return loc; -} - -enum class XMLAttributeState { - IN_TAG_NAME, - SEARCH_ATTR, - IN_ATTR_NAME, - HAS_ATTR_NAME, - HAS_ATTR_EQUALS, - IN_ATTR_DATA -}; - -static std::map<std::string, SourceLocation> reconstructXMLAttributeOffsets( - CharReader &reader, SourceLocation location) -{ - std::map<std::string, SourceLocation> res; - - // Fork the reader, we don't want to mess up the XML parsing process, do we? - CharReaderFork readerFork = reader.fork(); - - // Move the read cursor to the start location, abort if this does not work - size_t offs = location.getStart(); - if (!location.isValid() || offs != readerFork.seek(offs)) { - return res; - } - - // Now all we need to do is to implement one half of an XML parser. As this - // is inherently complicated we'll totaly fail at it. Don't care. All we - // want to get is those darn offsets for pretty error messages... (and we - // can assume the XML is valid as it was already read by expat) - XMLAttributeState state = XMLAttributeState::IN_TAG_NAME; - char c; - std::stringstream attrName; - while (readerFork.read(c)) { - // Abort at the end of the tag - if (c == '>' && state != XMLAttributeState::IN_ATTR_DATA) { - return res; - } - - // One state machine to rule them all, one state machine to find them, - // One state machine to bring them all and in the darkness bind them - // (the byte offsets) - switch (state) { - case XMLAttributeState::IN_TAG_NAME: - if (Utils::isWhitespace(c)) { - state = XMLAttributeState::SEARCH_ATTR; - } - break; - case XMLAttributeState::SEARCH_ATTR: - if (!Utils::isWhitespace(c)) { - state = XMLAttributeState::IN_ATTR_NAME; - attrName << c; - } - break; - case XMLAttributeState::IN_ATTR_NAME: - if (Utils::isWhitespace(c)) { - state = XMLAttributeState::HAS_ATTR_NAME; - } else if (c == '=') { - state = XMLAttributeState::HAS_ATTR_EQUALS; - } else { - attrName << c; - } - break; - case XMLAttributeState::HAS_ATTR_NAME: - if (!Utils::isWhitespace(c)) { - if (c == '=') { - state = XMLAttributeState::HAS_ATTR_EQUALS; - break; - } - // Well, this is a strange XML file... We expected to - // see a '=' here! Try to continue with the - // "HAS_ATTR_EQUALS" state as this state will hopefully - // inlcude some error recovery - } else { - // Skip whitespace here - break; - } - // Fallthrough - case XMLAttributeState::HAS_ATTR_EQUALS: - if (!Utils::isWhitespace(c)) { - if (c == '"') { - // Here we are! We have found the beginning of an - // attribute. Let's quickly lock the current offset away - // in the result map - res.emplace(attrName.str(), - SourceLocation{reader.getSourceId(), - readerFork.getOffset()}); - attrName.str(std::string{}); - state = XMLAttributeState::IN_ATTR_DATA; - } else { - // No, this XML file is not well formed. Assume we're in - // an attribute name once again - attrName.str(std::string{&c, 1}); - state = XMLAttributeState::IN_ATTR_NAME; - } - } - break; - case XMLAttributeState::IN_ATTR_DATA: - if (c == '"') { - // We're at the end of the attribute data, start anew - state = XMLAttributeState::SEARCH_ATTR; - } - break; - } - } - return res; -} - -static void xmlStartElementHandler(void *p, const XML_Char *name, - const XML_Char **attrs) -{ - XML_Parser parser = static_cast<XML_Parser>(p); - XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); - ParserStack *stack = userData->stack; - - SourceLocation loc = syncLoggerPosition(parser); - - // Read the argument locations -- this is only a stupid and slow hack, - // but it is necessary, as expat doesn't give use the byte offset of the - // arguments. - std::map<std::string, SourceLocation> offs = - reconstructXMLAttributeOffsets(*userData->reader, loc); - - // Assemble the arguments - Variant::mapType args; - - const XML_Char **attr = attrs; - while (*attr) { - // Convert the C string to a std::string - const std::string key{*(attr++)}; - - // Search the location of the key - SourceLocation keyLoc; - auto it = offs.find(key); - if (it != offs.end()) { - keyLoc = it->second; - } - - // Parse the string, pass the location of the key - std::pair<bool, Variant> value = VariantReader::parseGenericString( - *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(), - keyLoc.getStart()); - args.emplace(key, value.second); - } - - // Call the start function - std::string nameStr(name); - if (nameStr != "ousia" || userData->depth > 0) { - stack->start(std::string(name), args, loc); - } - - // Increment the current depth - userData->depth++; -} - -static void xmlEndElementHandler(void *p, const XML_Char *name) -{ - XML_Parser parser = static_cast<XML_Parser>(p); - XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); - ParserStack *stack = userData->stack; - - syncLoggerPosition(parser); - - // Decrement the current depth - userData->depth--; - - // Call the end function - std::string nameStr(name); - if (nameStr != "ousia" || userData->depth > 0) { - stack->end(); - } -} - -static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len) -{ - XML_Parser parser = static_cast<XML_Parser>(p); - XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); - ParserStack *stack = userData->stack; - - size_t ulen = len > 0 ? static_cast<size_t>(len) : 0; - syncLoggerPosition(parser, ulen); - const std::string data = Utils::trim(std::string{s, ulen}); - if (!data.empty()) { - stack->data(data); - } -} - -/* Class XmlParser */ - -void XmlParser::doParse(CharReader &reader, ParserContext &ctx) -{ - // Create the parser object - ScopedExpatXmlParser p{"UTF-8"}; - - // Create the parser stack instance, if we're starting on a non-empty scope, - // try to deduce the parser state - ParserStack stack(ctx, ParserStates::XmlStates); - if (!ctx.getScope().isEmpty()) { - if (!stack.deduceState()) { - return; - } - } - - // Pass the reference to the ParserStack to the XML handler - XMLUserData data(&stack, &reader); - XML_SetUserData(&p, &data); - XML_UseParserAsHandlerArg(&p); - - // Set the callback functions - XML_SetStartElementHandler(&p, xmlStartElementHandler); - XML_SetEndElementHandler(&p, xmlEndElementHandler); - XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler); - - // Feed data into expat while there is data to process - constexpr size_t BUFFER_SIZE = 64 * 1024; - while (true) { - // Fetch a buffer from expat for the input data - char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE)); - if (!buf) { - throw LoggableException{ - "Internal error: XML parser out of memory!"}; - } - - // Read into the buffer - size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE); - - // Parse the data and handle any XML error - if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { - // Fetch the xml parser byte offset - size_t offs = XML_GetCurrentByteIndex(&p); - - // Throw a corresponding exception - XML_Error code = XML_GetErrorCode(&p); - std::string msg = std::string{XML_ErrorString(code)}; - throw LoggableException{"XML: " + msg, - SourceLocation{ctx.getSourceId(), offs}}; - } - - // Abort once there are no more bytes in the stream - if (bytesRead == 0) { - break; - } - } -} -} - diff --git a/test/core/RangeSetTest.cpp b/test/core/RangeSetTest.cpp index cbf8f59..446ee51 100644 --- a/test/core/RangeSetTest.cpp +++ b/test/core/RangeSetTest.cpp @@ -110,7 +110,7 @@ TEST(RangeSet, Merge) s.merge(Range<int>(40, 50)); s.merge(Range<int>(60, 70)); { - ASSERT_EQ(ranges.size(), 4); + ASSERT_EQ(ranges.size(), 4U); auto it = ranges.begin(); ASSERT_EQ((*it).start, 0); @@ -132,7 +132,7 @@ TEST(RangeSet, Merge) // Now insert an element which spans the second and third element s.merge(Range<int>(15, 55)); { - ASSERT_EQ(ranges.size(), 3); + ASSERT_EQ(ranges.size(), 3U); auto it = ranges.begin(); ASSERT_EQ((*it).start, 0); @@ -150,7 +150,7 @@ TEST(RangeSet, Merge) // Now insert an element which expands the first element s.merge(Range<int>(-10, 11)); { - ASSERT_EQ(ranges.size(), 3); + ASSERT_EQ(ranges.size(), 3U); auto it = ranges.begin(); ASSERT_EQ((*it).start, -10); @@ -168,7 +168,7 @@ TEST(RangeSet, Merge) // Now insert an element which merges the last two elements s.merge(Range<int>(13, 70)); { - ASSERT_EQ(ranges.size(), 2); + ASSERT_EQ(ranges.size(), 2U); auto it = ranges.begin(); ASSERT_EQ((*it).start, -10); @@ -182,7 +182,7 @@ TEST(RangeSet, Merge) // Now insert an element which merges the remaining elements s.merge(Range<int>(-9, 12)); { - ASSERT_EQ(ranges.size(), 1); + ASSERT_EQ(ranges.size(), 1U); auto it = ranges.begin(); ASSERT_EQ((*it).start, -10); diff --git a/test/core/StandaloneEnvironment.hpp b/test/core/StandaloneEnvironment.hpp index a9dcdce..790bad4 100644 --- a/test/core/StandaloneEnvironment.hpp +++ b/test/core/StandaloneEnvironment.hpp @@ -31,6 +31,10 @@ namespace ousia { +/** + * StandaloneEnvironment is a class used for quickly setting up an entire + * environment needed for running an Ousia instance. + */ struct StandaloneEnvironment { ConcreteLogger &logger; Manager manager; diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index 917f45c..7801296 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -24,22 +24,40 @@ namespace ousia { TEST(Utils, isIdentifier) { - ASSERT_TRUE(Utils::isIdentifier("test")); - ASSERT_TRUE(Utils::isIdentifier("t0-_est")); - ASSERT_FALSE(Utils::isIdentifier("_t0-_EST")); - ASSERT_FALSE(Utils::isIdentifier("-t0-_EST")); - ASSERT_FALSE(Utils::isIdentifier("0t-_EST")); - ASSERT_FALSE(Utils::isIdentifier("invalid key")); + EXPECT_TRUE(Utils::isIdentifier("test")); + EXPECT_TRUE(Utils::isIdentifier("t0-_est")); + EXPECT_FALSE(Utils::isIdentifier("_t0-_EST")); + EXPECT_FALSE(Utils::isIdentifier("-t0-_EST")); + EXPECT_FALSE(Utils::isIdentifier("0t-_EST")); + EXPECT_FALSE(Utils::isIdentifier("_A")); + EXPECT_FALSE(Utils::isIdentifier("invalid key")); + EXPECT_FALSE(Utils::isIdentifier("")); } -TEST(Utils, trim) + +TEST(Utils, isNamespacedIdentifier) { - ASSERT_EQ("hello world", Utils::trim("\t hello world \n\r\t")); - ASSERT_EQ("hello world", Utils::trim("hello world \n\r\t")); - ASSERT_EQ("hello world", Utils::trim(" hello world")); - ASSERT_EQ("hello world", Utils::trim("hello world")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("test")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("_t0-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("-t0-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("0t-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("invalid key")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("_A")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("")); + EXPECT_FALSE(Utils::isNamespacedIdentifier(":")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("test:a")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est:b")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("test:test")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est:t0-_est")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("test:_A")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("test::a")); + EXPECT_FALSE(Utils::isNamespacedIdentifier(":test")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("t0-_est:_t0-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("t0-_est: b")); } + TEST(Utils, split) { ASSERT_EQ(std::vector<std::string>({"ab"}), Utils::split("ab", '.')); @@ -73,5 +91,23 @@ TEST(Utils, extractFileExtension) ASSERT_EQ("ext", Utils::extractFileExtension("foo.bar/test.EXT")); } +TEST(Utils, startsWith) +{ + ASSERT_TRUE(Utils::startsWith("foobar", "foo")); + ASSERT_TRUE(Utils::startsWith("foo", "foo")); + ASSERT_FALSE(Utils::startsWith("foo", "foobar")); + ASSERT_FALSE(Utils::startsWith("foobar", "bar")); + ASSERT_TRUE(Utils::startsWith("foo", "")); +} + +TEST(Utils, endsWith) +{ + ASSERT_FALSE(Utils::endsWith("foobar", "foo")); + ASSERT_TRUE(Utils::endsWith("foo", "foo")); + ASSERT_FALSE(Utils::endsWith("foo", "foobar")); + ASSERT_TRUE(Utils::endsWith("foobar", "bar")); + ASSERT_TRUE(Utils::endsWith("foo", "")); +} + } diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp index 8fcbdf2..4cb4331 100644 --- a/test/core/model/DomainTest.cpp +++ b/test/core/model/DomainTest.cpp @@ -242,7 +242,7 @@ TEST(Descriptor, getDefaultFields) A->createPrimitiveFieldDescriptor(sys->getStringType(), logger); // now we should find that. auto fields = A->getDefaultFields(); - ASSERT_EQ(1, fields.size()); + ASSERT_EQ(1U, fields.size()); ASSERT_EQ(A_prim_field, fields[0]); // remove that field from A and add it to another class. @@ -258,7 +258,7 @@ TEST(Descriptor, getDefaultFields) // but we should find it again if we set B as superclass of A. A->setSuperclass(B, logger); fields = A->getDefaultFields(); - ASSERT_EQ(1, fields.size()); + ASSERT_EQ(1U, fields.size()); ASSERT_EQ(A_prim_field, fields[0]); // and we should not be able to find it if we override the field. @@ -277,7 +277,7 @@ TEST(Descriptor, getDefaultFields) // now we should find that. fields = A->getDefaultFields(); - ASSERT_EQ(1, fields.size()); + ASSERT_EQ(1U, fields.size()); ASSERT_EQ(C_field, fields[0]); // add another transparent child class to A with a daughter class that has @@ -296,7 +296,7 @@ TEST(Descriptor, getDefaultFields) // now we should find both primitive fields, but the C field first. fields = A->getDefaultFields(); - ASSERT_EQ(2, fields.size()); + ASSERT_EQ(2U, fields.size()); ASSERT_EQ(C_field, fields[0]); ASSERT_EQ(F_field, fields[1]); } @@ -321,7 +321,7 @@ TEST(Descriptor, getPermittedChildren) * in between. */ NodeVector<StructuredClass> children = book->getPermittedChildren(); - ASSERT_EQ(3, children.size()); + ASSERT_EQ(3U, children.size()); ASSERT_EQ(section, children[0]); ASSERT_EQ(paragraph, children[1]); ASSERT_EQ(text, children[2]); @@ -331,7 +331,7 @@ TEST(Descriptor, getPermittedChildren) mgr, "Subclass", domain, Cardinality::any(), text, true, false)}; // And that should be in the result list as well now. children = book->getPermittedChildren(); - ASSERT_EQ(4, children.size()); + ASSERT_EQ(4U, children.size()); ASSERT_EQ(section, children[0]); ASSERT_EQ(paragraph, children[1]); ASSERT_EQ(text, children[2]); diff --git a/test/core/parser/ParserStackTest.cpp b/test/core/parser/ParserStackTest.cpp deleted file mode 100644 index 3a0decb..0000000 --- a/test/core/parser/ParserStackTest.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <iostream> - -#include <gtest/gtest.h> - -#include <core/parser/ParserStack.hpp> -#include <core/StandaloneEnvironment.hpp> - -namespace ousia { - -ConcreteLogger logger; - -static int startCount = 0; -static int endCount = 0; -static int dataCount = 0; - -class TestHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override { startCount++; } - - void end() override { endCount++; } - - void data(const std::string &data, int field) override { dataCount++; } - - static Handler *create(const HandlerData &data) - { - return new TestHandler(data); - } -}; - -namespace ParserStates { -static const ParserState Document = - ParserStateBuilder().parent(&None).elementHandler(TestHandler::create); -static const ParserState Body = ParserStateBuilder() - .parent(&Document) - .elementHandler(TestHandler::create); -static const ParserState Empty = - ParserStateBuilder().parent(&Document).elementHandler(TestHandler::create); -static const ParserState Special = - ParserStateBuilder().parent(&All).elementHandler(TestHandler::create); -static const ParserState Arguments = - ParserStateBuilder() - .parent(&None) - .elementHandler(TestHandler::create) - .arguments({Argument::Int("a"), Argument::String("b")}); -static const ParserState BodyChildren = - ParserStateBuilder() - .parent(&Body) - .elementHandler(TestHandler::create); - -static const std::multimap<std::string, const ParserState *> TestHandlers{ - {"document", &Document}, - {"body", &Body}, - {"empty", &Empty}, - {"special", &Special}, - {"arguments", &Arguments}, - {"*", &BodyChildren}}; -} - -TEST(ParserStack, simpleTest) -{ - StandaloneEnvironment env(logger); - ParserStack s{env.context, ParserStates::TestHandlers}; - - startCount = 0; - endCount = 0; - dataCount = 0; - - EXPECT_EQ("", s.currentCommandName()); - EXPECT_EQ(&ParserStates::None, &s.currentState()); - - s.start("document", {}); - s.data("test1"); - - EXPECT_EQ("document", s.currentCommandName()); - EXPECT_EQ(&ParserStates::Document, &s.currentState()); - EXPECT_EQ(1, startCount); - EXPECT_EQ(1, dataCount); - - s.start("body", {}); - s.data("test2"); - EXPECT_EQ("body", s.currentCommandName()); - EXPECT_EQ(&ParserStates::Body, &s.currentState()); - EXPECT_EQ(2, startCount); - EXPECT_EQ(2, dataCount); - - s.start("inner", {}); - EXPECT_EQ("inner", s.currentCommandName()); - EXPECT_EQ(&ParserStates::BodyChildren, &s.currentState()); - s.end(); - EXPECT_EQ(3, startCount); - EXPECT_EQ(1, endCount); - - s.end(); - EXPECT_EQ(2, endCount); - - EXPECT_EQ("document", s.currentCommandName()); - EXPECT_EQ(&ParserStates::Document, &s.currentState()); - - s.start("body", {}); - s.data("test3"); - EXPECT_EQ("body", s.currentCommandName()); - EXPECT_EQ(&ParserStates::Body, &s.currentState()); - s.end(); - EXPECT_EQ(4, startCount); - EXPECT_EQ(3, dataCount); - EXPECT_EQ(3, endCount); - - EXPECT_EQ("document", s.currentCommandName()); - EXPECT_EQ(&ParserStates::Document, &s.currentState()); - - s.end(); - EXPECT_EQ(4, endCount); - - EXPECT_EQ("", s.currentCommandName()); - EXPECT_EQ(&ParserStates::None, &s.currentState()); -} - -TEST(ParserStack, errorHandling) -{ - StandaloneEnvironment env(logger); - ParserStack s{env.context, ParserStates::TestHandlers}; - - EXPECT_THROW(s.start("body", {}), OusiaException); - s.start("document", {}); - EXPECT_THROW(s.start("document", {}), OusiaException); - s.start("empty", {}); - EXPECT_THROW(s.start("body", {}), OusiaException); - s.start("special", {}); - s.end(); - s.end(); - s.end(); - EXPECT_EQ(&ParserStates::None, &s.currentState()); - ASSERT_THROW(s.end(), OusiaException); - ASSERT_THROW(s.data("test", 1), OusiaException); -} - -TEST(ParserStack, validation) -{ - StandaloneEnvironment env(logger); - ParserStack s{env.context, ParserStates::TestHandlers}; - - logger.reset(); - s.start("arguments", {}); - EXPECT_TRUE(logger.hasError()); - s.end(); - - s.start("arguments", {{"a", 5}}); - EXPECT_TRUE(logger.hasError()); - s.end(); - - logger.reset(); - s.start("arguments", {{"a", 5}, {"b", "test"}}); - EXPECT_FALSE(logger.hasError()); - s.end(); -} -} - diff --git a/test/core/parser/ParserStateTest.cpp b/test/core/parser/ParserStateTest.cpp deleted file mode 100644 index 91d8dcd..0000000 --- a/test/core/parser/ParserStateTest.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <gtest/gtest.h> - -#include <core/common/Rtti.hpp> -#include <core/parser/ParserState.hpp> - -namespace ousia { - -static const Rtti t1; -static const Rtti t2; -static const Rtti t3; -static const Rtti t4; -static const Rtti t5; - -static const ParserState s1 = ParserStateBuilder().createdNodeType(&t1); -static const ParserState s2a = - ParserStateBuilder().parent(&s1).createdNodeType(&t2); -static const ParserState s2b = - ParserStateBuilder().parent(&s1).createdNodeType(&t2); -static const ParserState s3 = - ParserStateBuilder().parents({&s2a, &s1}).createdNodeType(&t3); -static const ParserState s4 = - ParserStateBuilder().parent(&s3).createdNodeType(&t4); -static const ParserState s5 = - ParserStateBuilder().parent(&s2b).createdNodeType(&t5); - -TEST(ParserStateDeductor, deduce) -{ - using Result = std::vector<const ParserState *>; - using Signature = std::vector<const Rtti *>; - std::vector<const ParserState *> states{&s1, &s2a, &s2b, &s3, &s4, &s5}; - - // Should not crash on empty signature - ASSERT_EQ(Result{}, ParserStateDeductor(Signature{}, states).deduce()); - - // Try repeating signature elements - ASSERT_EQ(Result({&s1}), - ParserStateDeductor(Signature({&t1}), states).deduce()); - ASSERT_EQ(Result({&s1}), - ParserStateDeductor(Signature({&t1, &t1}), states).deduce()); - ASSERT_EQ(Result({&s1}), - ParserStateDeductor(Signature({&t1, &t1, &t1}), states).deduce()); - - // Go to another state - ASSERT_EQ(Result({&s2a, &s2b}), - ParserStateDeductor(Signature({&t1, &t1, &t2}), states).deduce()); - ASSERT_EQ(Result({&s4}), - ParserStateDeductor(Signature({&t1, &t3, &t4}), states).deduce()); - - // Skip one state - ASSERT_EQ(Result({&s4}), - ParserStateDeductor(Signature({&t2, &t4}), states).deduce()); - - // Impossible signature - ASSERT_EQ(Result({}), - ParserStateDeductor(Signature({&t4, &t5}), states).deduce()); - -} -} - diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp new file mode 100644 index 0000000..321d471 --- /dev/null +++ b/test/core/parser/stack/StackTest.cpp @@ -0,0 +1,666 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <iostream> + +#include <gtest/gtest.h> + +#include <core/frontend/TerminalLogger.hpp> +#include <core/parser/stack/Handler.hpp> +#include <core/parser/stack/Stack.hpp> +#include <core/parser/stack/State.hpp> + +#include <core/StandaloneEnvironment.hpp> + +namespace ousia { +namespace parser_stack { + +// Build an instance of the StandaloneEnvironment used for this unit test +static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; +static StandaloneEnvironment env(logger); + +namespace { + +struct Tracker { + int startCount; + int endCount; + int fieldStartCount; + int fieldEndCount; + int annotationStartCount; + int annotationEndCount; + int dataCount; + + Variant::mapType startArgs; + bool fieldStartIsDefault; + size_t fieldStartIdx; + Variant annotationStartClassName; + Variant::mapType annotationStartArgs; + Variant annotationEndClassName; + Variant annotationEndElementName; + Variant dataData; + + bool startResult; + bool fieldStartSetIsDefault; + bool fieldStartResult; + bool annotationStartResult; + bool annotationEndResult; + bool dataResult; + + Tracker() { reset(); } + + void reset() + { + startCount = 0; + endCount = 0; + fieldStartCount = 0; + fieldEndCount = 0; + annotationStartCount = 0; + annotationEndCount = 0; + dataCount = 0; + + startArgs = Variant::mapType{}; + fieldStartIsDefault = false; + fieldStartIdx = 0; + annotationStartClassName = Variant::fromString(std::string{}); + annotationStartArgs = Variant::mapType{}; + annotationEndClassName = Variant::fromString(std::string{}); + annotationEndElementName = Variant::fromString(std::string{}); + dataData = Variant::fromString(std::string{}); + + startResult = true; + fieldStartSetIsDefault = false; + fieldStartResult = true; + annotationStartResult = true; + annotationEndResult = true; + dataResult = true; + } + + void expect(int startCount, int endCount, int fieldStartCount, + int fieldEndCount, int annotationStartCount, + int annotationEndCount, int dataCount) + { + EXPECT_EQ(startCount, this->startCount); + EXPECT_EQ(endCount, this->endCount); + EXPECT_EQ(fieldStartCount, this->fieldStartCount); + EXPECT_EQ(fieldEndCount, this->fieldEndCount); + EXPECT_EQ(annotationStartCount, this->annotationStartCount); + EXPECT_EQ(annotationEndCount, this->annotationEndCount); + EXPECT_EQ(dataCount, this->dataCount); + } +}; + +static Tracker tracker; + +class TestHandler : public Handler { +private: + TestHandler(const HandlerData &handlerData) : Handler(handlerData) {} + +public: + bool start(Variant::mapType &args) override + { + tracker.startCount++; + tracker.startArgs = args; + if (!tracker.startResult) { + logger().error( + "The TestHandler was told not to allow a field start. So it " + "doesn't. The TestHandler always obeys its master."); + } + return tracker.startResult; + } + + void end() override { tracker.endCount++; } + + bool fieldStart(bool &isDefault, size_t fieldIdx) override + { + tracker.fieldStartCount++; + tracker.fieldStartIsDefault = isDefault; + tracker.fieldStartIdx = fieldIdx; + if (tracker.fieldStartSetIsDefault) { + isDefault = true; + } + return tracker.fieldStartResult; + } + + void fieldEnd() override { tracker.fieldEndCount++; } + + bool annotationStart(const Variant &className, + Variant::mapType &args) override + { + tracker.annotationStartCount++; + tracker.annotationStartClassName = className; + tracker.annotationStartArgs = args; + return tracker.annotationStartResult; + } + + bool annotationEnd(const Variant &className, + const Variant &elementName) override + { + tracker.annotationEndCount++; + tracker.annotationEndClassName = className; + tracker.annotationEndElementName = elementName; + return tracker.annotationEndResult; + } + + bool data(Variant &data) override + { + tracker.dataCount++; + tracker.dataData = data; + return tracker.dataResult; + } + + static Handler *create(const HandlerData &handlerData) + { + return new TestHandler(handlerData); + } +}; +} + +namespace States { +static const State Document = + StateBuilder().parent(&None).elementHandler(TestHandler::create); +static const State Body = + StateBuilder().parent(&Document).elementHandler(TestHandler::create); +static const State Empty = + StateBuilder().parent(&Document).elementHandler(TestHandler::create); +static const State Special = + StateBuilder().parent(&All).elementHandler(TestHandler::create); +static const State Arguments = + StateBuilder().parent(&None).elementHandler(TestHandler::create).arguments( + {Argument::Int("a"), Argument::String("b")}); +static const State BodyChildren = + StateBuilder().parent(&Body).elementHandler(TestHandler::create); +static const State Any = + StateBuilder().parents({&None, &Any}).elementHandler(TestHandler::create); + +static const std::multimap<std::string, const State *> TestHandlers{ + {"document", &Document}, + {"body", &Body}, + {"empty", &Empty}, + {"special", &Special}, + {"arguments", &Arguments}, + {"*", &BodyChildren}}; + +static const std::multimap<std::string, const State *> AnyHandlers{{"*", &Any}}; +} + +TEST(Stack, basicTest) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::TestHandlers}; + + EXPECT_EQ("", s.currentCommandName()); + EXPECT_EQ(&States::None, &s.currentState()); + + s.command("document", {}); + s.fieldStart(true); + s.data("test1"); + + EXPECT_EQ("document", s.currentCommandName()); + EXPECT_EQ(&States::Document, &s.currentState()); + tracker.expect(1, 0, 1, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + + s.command("body", {}); + s.fieldStart(true); + s.data("test2"); + EXPECT_EQ("body", s.currentCommandName()); + EXPECT_EQ(&States::Body, &s.currentState()); + tracker.expect(2, 0, 2, 0, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + + s.command("inner", {}); + s.fieldStart(true); + EXPECT_EQ("inner", s.currentCommandName()); + EXPECT_EQ(&States::BodyChildren, &s.currentState()); + + s.fieldEnd(); + tracker.expect(3, 1, 3, 1, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldEnd(); + EXPECT_EQ("document", s.currentCommandName()); + EXPECT_EQ(&States::Document, &s.currentState()); + tracker.expect(3, 2, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + + s.command("body", {}); + s.fieldStart(true); + s.data("test3"); + EXPECT_EQ("body", s.currentCommandName()); + EXPECT_EQ(&States::Body, &s.currentState()); + s.fieldEnd(); + tracker.expect(4, 3, 4, 3, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + + EXPECT_EQ("document", s.currentCommandName()); + EXPECT_EQ(&States::Document, &s.currentState()); + + s.fieldEnd(); + tracker.expect(4, 4, 4, 4, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + + EXPECT_EQ("", s.currentCommandName()); + EXPECT_EQ(&States::None, &s.currentState()); + } + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, errorInvalidCommands) +{ + Stack s{env.context, States::TestHandlers}; + tracker.reset(); + EXPECT_THROW(s.command("body", {}), LoggableException); + s.command("document", {}); + s.fieldStart(true); + EXPECT_THROW(s.command("document", {}), LoggableException); + s.command("empty", {}); + s.fieldStart(true); + EXPECT_THROW(s.command("body", {}), LoggableException); + s.command("special", {}); + s.fieldStart(true); + s.fieldEnd(); + s.fieldEnd(); + s.fieldEnd(); + EXPECT_EQ(&States::None, &s.currentState()); + ASSERT_THROW(s.fieldEnd(), LoggableException); + ASSERT_THROW(s.data("test"), LoggableException); +} + +TEST(Stack, validation) +{ + Stack s{env.context, States::TestHandlers}; + tracker.reset(); + logger.reset(); + + s.command("arguments", {}); + EXPECT_TRUE(logger.hasError()); + s.fieldStart(true); + s.fieldEnd(); + + logger.reset(); + s.command("arguments", {{"a", 5}}); + EXPECT_TRUE(logger.hasError()); + s.fieldStart(true); + s.fieldEnd(); + + logger.reset(); + s.command("arguments", {{"a", 5}, {"b", "test"}}); + EXPECT_FALSE(logger.hasError()); + s.fieldStart(true); + s.fieldEnd(); +} + +TEST(Stack, invalidCommandName) +{ + Stack s{env.context, States::AnyHandlers}; + tracker.reset(); + logger.reset(); + + s.command("a", {}); + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.command("a_", {}); + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(2, 2, 2, 2, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.command("a_:b", {}); + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(3, 3, 3, 3, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + ASSERT_THROW(s.command("_a", {}), LoggableException); + ASSERT_THROW(s.command("a:", {}), LoggableException); + ASSERT_THROW(s.command("a:_b", {}), LoggableException); + tracker.expect(3, 3, 3, 3, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, multipleFields) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {{"a", false}}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_EQ("a", s.currentCommandName()); + EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startArgs); + + s.fieldStart(false); + tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_FALSE(tracker.fieldStartIsDefault); + EXPECT_EQ(0U, tracker.fieldStartIdx); + + s.data("test"); + tracker.expect(1, 0, 1, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_EQ("test", tracker.dataData); + + s.fieldEnd(); + tracker.expect(1, 0, 1, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldStart(false); + tracker.expect(1, 0, 2, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_FALSE(tracker.fieldStartIsDefault); + EXPECT_EQ(1U, tracker.fieldStartIdx); + + s.data("test2"); + tracker.expect(1, 0, 2, 1, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_EQ("test2", tracker.dataData); + + s.fieldEnd(); + tracker.expect(1, 0, 2, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldStart(true); + tracker.expect(1, 0, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_TRUE(tracker.fieldStartIsDefault); + EXPECT_EQ(2U, tracker.fieldStartIdx); + + s.data("test3"); + tracker.expect(1, 0, 3, 2, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_EQ("test3", tracker.dataData); + + s.fieldEnd(); + tracker.expect(1, 1, 3, 3, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + } + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, implicitDefaultFieldOnNewCommand) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.command("b", {}); + tracker.expect(2, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(2, 2, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, implicitDefaultFieldOnNewCommandWithExplicitDefaultField) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + + s.command("b", {}); + tracker.expect(2, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("b", s.currentCommandName()); + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(2, 1, 2, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + } + tracker.expect(2, 2, 2, 2, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, noImplicitDefaultFieldOnIncompatibleCommand) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + + tracker.fieldStartResult = false; + s.command("b", {}); + tracker.expect(2, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("b", s.currentCommandName()); + } + tracker.expect(2, 2, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, noImplicitDefaultFieldIfDefaultFieldGiven) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + s.fieldStart(true); + tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + s.fieldEnd(); + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("", s.currentCommandName()); + + s.command("b", {}); + tracker.expect(2, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("b", s.currentCommandName()); + } + tracker.expect(2, 2, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, noEndIfStartFails) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + + tracker.startResult = false; + s.command("b", {}); + tracker.expect(3, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("b", s.currentCommandName()); + } + tracker.expect(3, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_TRUE(logger.hasError()); +} + +TEST(Stack, implicitDefaultFieldOnData) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.data("test"); + tracker.expect(1, 0, 1, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, autoFieldEnd) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, autoImplicitFieldEnd) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + s.command("b", {}); + s.command("c", {}); + s.command("d", {}); + s.command("e", {}); + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(5, 1, 5, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(5, 5, 5, 5, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, invalidDefaultField) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.fieldStartResult = false; + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, errorInvalidDefaultFieldData) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.fieldStartResult = false; + s.fieldStart(true); + ASSERT_FALSE(logger.hasError()); + s.data("test"); + ASSERT_TRUE(logger.hasError()); + s.fieldEnd(); + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorInvalidFieldData) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.fieldStartResult = false; + ASSERT_FALSE(logger.hasError()); + s.fieldStart(false); + ASSERT_TRUE(logger.hasError()); + s.data("test"); + s.fieldEnd(); + tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorFieldStartNoCommand) +{ + tracker.reset(); + logger.reset(); + + Stack s{env.context, States::AnyHandlers}; + ASSERT_THROW(s.fieldStart(false), LoggableException); + ASSERT_THROW(s.fieldStart(true), LoggableException); + tracker.expect(0, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorMutlipleFieldStarts) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldStart(false); + ASSERT_FALSE(logger.hasError()); + s.fieldStart(false); + ASSERT_TRUE(logger.hasError()); + tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldEnd(); + tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorMutlipleFieldEnds) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldStart(false); + s.fieldEnd(); + ASSERT_FALSE(logger.hasError()); + tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.fieldEnd(); + ASSERT_TRUE(logger.hasError()); + tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorOpenField) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldStart(false); + ASSERT_FALSE(logger.hasError()); + } + ASSERT_TRUE(logger.hasError()); + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} +} +} + diff --git a/test/core/parser/stack/StateTest.cpp b/test/core/parser/stack/StateTest.cpp new file mode 100644 index 0000000..e503d30 --- /dev/null +++ b/test/core/parser/stack/StateTest.cpp @@ -0,0 +1,79 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <gtest/gtest.h> + +#include <core/common/Rtti.hpp> +#include <core/parser/stack/State.hpp> + +namespace ousia { +namespace parser_stack { + +static const Rtti t1; +static const Rtti t2; +static const Rtti t3; +static const Rtti t4; +static const Rtti t5; + +static const State s1 = StateBuilder().createdNodeType(&t1); +static const State s2a = + StateBuilder().parent(&s1).createdNodeType(&t2); +static const State s2b = + StateBuilder().parent(&s1).createdNodeType(&t2); +static const State s3 = + StateBuilder().parents({&s2a, &s1}).createdNodeType(&t3); +static const State s4 = + StateBuilder().parent(&s3).createdNodeType(&t4); +static const State s5 = + StateBuilder().parent(&s2b).createdNodeType(&t5); + +TEST(StateDeductor, deduce) +{ + using Result = std::vector<const State *>; + using Signature = std::vector<const Rtti *>; + std::vector<const State *> states{&s1, &s2a, &s2b, &s3, &s4, &s5}; + + // Should not crash on empty signature + ASSERT_EQ(Result{}, StateDeductor(Signature{}, states).deduce()); + + // Try repeating signature elements + ASSERT_EQ(Result({&s1}), + StateDeductor(Signature({&t1}), states).deduce()); + ASSERT_EQ(Result({&s1}), + StateDeductor(Signature({&t1, &t1}), states).deduce()); + ASSERT_EQ(Result({&s1}), + StateDeductor(Signature({&t1, &t1, &t1}), states).deduce()); + + // Go to another state + ASSERT_EQ(Result({&s2a, &s2b}), + StateDeductor(Signature({&t1, &t1, &t2}), states).deduce()); + ASSERT_EQ(Result({&s4}), + StateDeductor(Signature({&t1, &t3, &t4}), states).deduce()); + + // Skip one state + ASSERT_EQ(Result({&s4}), + StateDeductor(Signature({&t2, &t4}), states).deduce()); + + // Impossible signature + ASSERT_EQ(Result({}), + StateDeductor(Signature({&t4, &t5}), states).deduce()); + +} +} +} + diff --git a/test/formats/osdm/TokenTrieTest.cpp b/test/core/parser/utils/TokenTrieTest.cpp index aacd6c0..087e6e6 100644 --- a/test/formats/osdm/TokenTrieTest.cpp +++ b/test/core/parser/utils/TokenTrieTest.cpp @@ -18,7 +18,7 @@ #include <gtest/gtest.h> -#include <formats/osdm/TokenTrie.hpp> +#include <core/parser/utils/TokenTrie.hpp> namespace ousia { diff --git a/test/formats/osdm/DynamicTokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp index c1f8785..8565057 100644 --- a/test/formats/osdm/DynamicTokenizerTest.cpp +++ b/test/core/parser/utils/TokenizerTest.cpp @@ -19,13 +19,13 @@ #include <gtest/gtest.h> #include <core/common/CharReader.hpp> -#include <formats/osdm/DynamicTokenizer.hpp> +#include <core/parser/utils/Tokenizer.hpp> namespace ousia { -TEST(DynamicTokenizer, tokenRegistration) +TEST(Tokenizer, tokenRegistration) { - DynamicTokenizer tokenizer; + Tokenizer tokenizer; ASSERT_EQ(EmptyToken, tokenizer.registerToken("")); @@ -50,15 +50,15 @@ TEST(DynamicTokenizer, tokenRegistration) ASSERT_EQ("d", tokenizer.getTokenString(1U)); } -TEST(DynamicTokenizer, textTokenPreserveWhitespace) +TEST(Tokenizer, textTokenPreserveWhitespace) { { CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; + Tokenizer tokenizer{WhitespaceMode::PRESERVE}; - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ(" this \t is only a \n\n test text ", token.content); @@ -74,9 +74,9 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace) CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; + Tokenizer tokenizer{WhitespaceMode::PRESERVE}; - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("this \t is only a \n\n test text", token.content); @@ -89,15 +89,15 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace) } } -TEST(DynamicTokenizer, textTokenTrimWhitespace) +TEST(Tokenizer, textTokenTrimWhitespace) { { CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; + Tokenizer tokenizer{WhitespaceMode::TRIM}; - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("this \t is only a \n\n test text", token.content); @@ -113,9 +113,9 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace) CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; + Tokenizer tokenizer{WhitespaceMode::TRIM}; - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("this \t is only a \n\n test text", token.content); @@ -128,15 +128,15 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace) } } -TEST(DynamicTokenizer, textTokenCollapseWhitespace) +TEST(Tokenizer, textTokenCollapseWhitespace) { { CharReader reader{" this \t is only a \n\n test text "}; // 012345 6789012345678 9 0123456789012345 // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; + Tokenizer tokenizer{WhitespaceMode::COLLAPSE}; - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("this is only a test text", token.content); @@ -152,9 +152,9 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace) CharReader reader{"this \t is only a \n\n test text"}; // 01234 5678901234567 8 9012345678901 // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; + Tokenizer tokenizer{WhitespaceMode::COLLAPSE}; - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); ASSERT_EQ("this is only a test text", token.content); @@ -167,16 +167,16 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace) } } -TEST(DynamicTokenizer, simpleReadToken) +TEST(Tokenizer, simpleReadToken) { CharReader reader{"test1:test2"}; - DynamicTokenizer tokenizer; + Tokenizer tokenizer; const TokenTypeId tid = tokenizer.registerToken(":"); ASSERT_EQ(0U, tid); { - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); @@ -192,7 +192,7 @@ TEST(DynamicTokenizer, simpleReadToken) } { - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(tid, token.type); @@ -208,7 +208,7 @@ TEST(DynamicTokenizer, simpleReadToken) } { - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); @@ -223,16 +223,16 @@ TEST(DynamicTokenizer, simpleReadToken) } } -TEST(DynamicTokenizer, simplePeekToken) +TEST(Tokenizer, simplePeekToken) { CharReader reader{"test1:test2"}; - DynamicTokenizer tokenizer; + Tokenizer tokenizer; const TokenTypeId tid = tokenizer.registerToken(":"); ASSERT_EQ(0U, tid); { - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.peek(reader, token)); ASSERT_EQ(TextToken, token.type); @@ -246,7 +246,7 @@ TEST(DynamicTokenizer, simplePeekToken) } { - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.peek(reader, token)); ASSERT_EQ(tid, token.type); @@ -260,7 +260,7 @@ TEST(DynamicTokenizer, simplePeekToken) } { - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.peek(reader, token)); ASSERT_EQ(TextToken, token.type); @@ -274,7 +274,7 @@ TEST(DynamicTokenizer, simplePeekToken) } { - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); @@ -288,7 +288,7 @@ TEST(DynamicTokenizer, simplePeekToken) } { - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(tid, token.type); @@ -302,7 +302,7 @@ TEST(DynamicTokenizer, simplePeekToken) } { - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); @@ -316,10 +316,10 @@ TEST(DynamicTokenizer, simplePeekToken) } } -TEST(DynamicTokenizer, ambiguousTokens) +TEST(Tokenizer, ambiguousTokens) { CharReader reader{"abc"}; - DynamicTokenizer tokenizer; + Tokenizer tokenizer; TokenTypeId t1 = tokenizer.registerToken("abd"); TokenTypeId t2 = tokenizer.registerToken("bc"); @@ -327,7 +327,7 @@ TEST(DynamicTokenizer, ambiguousTokens) ASSERT_EQ(0U, t1); ASSERT_EQ(1U, t2); - DynamicToken token; + Token token; ASSERT_TRUE(tokenizer.read(reader, token)); ASSERT_EQ(TextToken, token.type); @@ -349,18 +349,18 @@ TEST(DynamicTokenizer, ambiguousTokens) ASSERT_FALSE(tokenizer.read(reader, token)); } -TEST(DynamicTokenizer, commentTestWhitespacePreserve) +TEST(Tokenizer, commentTestWhitespacePreserve) { CharReader reader{"Test/Test /* Block Comment */", 0}; // 012345678901234567890123456789 // 0 1 2 - DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE); + Tokenizer tokenizer(WhitespaceMode::PRESERVE); const TokenTypeId t1 = tokenizer.registerToken("/"); const TokenTypeId t2 = tokenizer.registerToken("/*"); const TokenTypeId t3 = tokenizer.registerToken("*/"); - std::vector<DynamicToken> expected = { + std::vector<Token> expected = { {TextToken, "Test", SourceLocation{0, 0, 4}}, {t1, "/", SourceLocation{0, 4, 5}}, {TextToken, "Test ", SourceLocation{0, 5, 10}}, @@ -368,7 +368,7 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve) {TextToken, " Block Comment ", SourceLocation{0, 12, 27}}, {t3, "*/", SourceLocation{0, 27, 29}}}; - DynamicToken t; + Token t; for (auto &te : expected) { EXPECT_TRUE(tokenizer.read(reader, t)); EXPECT_EQ(te.type, t.type); @@ -380,18 +380,18 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve) ASSERT_FALSE(tokenizer.read(reader, t)); } -TEST(DynamicTokenizer, commentTestWhitespaceCollapse) +TEST(Tokenizer, commentTestWhitespaceCollapse) { CharReader reader{"Test/Test /* Block Comment */", 0}; // 012345678901234567890123456789 // 0 1 2 - DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE); + Tokenizer tokenizer(WhitespaceMode::COLLAPSE); const TokenTypeId t1 = tokenizer.registerToken("/"); const TokenTypeId t2 = tokenizer.registerToken("/*"); const TokenTypeId t3 = tokenizer.registerToken("*/"); - std::vector<DynamicToken> expected = { + std::vector<Token> expected = { {TextToken, "Test", SourceLocation{0, 0, 4}}, {t1, "/", SourceLocation{0, 4, 5}}, {TextToken, "Test", SourceLocation{0, 5, 9}}, @@ -399,7 +399,7 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse) {TextToken, "Block Comment", SourceLocation{0, 13, 26}}, {t3, "*/", SourceLocation{0, 27, 29}}}; - DynamicToken t; + Token t; for (auto &te : expected) { EXPECT_TRUE(tokenizer.read(reader, t)); EXPECT_EQ(te.type, t.type); @@ -410,6 +410,5 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse) } ASSERT_FALSE(tokenizer.read(reader, t)); } - } diff --git a/test/formats/osdm/OsdmStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index 46f4cf6..d52fa5b 100644 --- a/test/formats/osdm/OsdmStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -23,95 +23,205 @@ #include <core/common/CharReader.hpp> #include <core/frontend/TerminalLogger.hpp> -#include <formats/osdm/OsdmStreamParser.hpp> +#include <formats/osml/OsmlStreamParser.hpp> namespace ousia { static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; -TEST(OsdmStreamParser, empty) +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); + EXPECT_EQ(name, reader.getCommandName().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertCommand(OsmlStreamParser &reader, const std::string &name, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + assertCommand(reader, name, start, end); + EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertData(OsmlStreamParser &reader, const std::string &data, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); + EXPECT_EQ(data, reader.getData().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getData().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getData().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldStart(OsmlStreamParser &reader, bool defaultField, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse()); + EXPECT_EQ(defaultField, reader.inDefaultField()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertFieldEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertAnnotationStart(OsmlStreamParser &reader, + const std::string &name, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse()); + EXPECT_EQ(name, reader.getCommandName().asString()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertAnnotationStart(OsmlStreamParser &reader, + const std::string &name, + const Variant::mapType &args, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + assertAnnotationStart(reader, name, start, end); + EXPECT_EQ(args, reader.getCommandArguments()); +} + +static void assertAnnotationEnd(OsmlStreamParser &reader, + const std::string &name, + const std::string &elementName, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse()); + ASSERT_EQ(name, reader.getCommandName().asString()); + if (!elementName.empty()) { + ASSERT_EQ(1U, reader.getCommandArguments().asMap().size()); + ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name")); + + auto it = reader.getCommandArguments().asMap().find("name"); + ASSERT_EQ(elementName, it->second.asString()); + } + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +static void assertEnd(OsmlStreamParser &reader, + SourceOffset start = InvalidSourceOffset, + SourceOffset end = InvalidSourceOffset) +{ + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); + if (start != InvalidSourceOffset) { + EXPECT_EQ(start, reader.getLocation().getStart()); + } + if (end != InvalidSourceOffset) { + EXPECT_EQ(end, reader.getLocation().getEnd()); + } +} + +TEST(OsmlStreamParser, empty) { const char *testString = ""; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, oneCharacter) +TEST(OsmlStreamParser, oneCharacter) { const char *testString = "a"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("a", reader.getData().asString()); + OsmlStreamParser reader(charReader, logger); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); + assertData(reader, "a", 0, 1); } -TEST(OsdmStreamParser, whitespaceElimination) +TEST(OsmlStreamParser, whitespaceElimination) { const char *testString = " hello \t world "; // 0123456 78901234 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); + OsmlStreamParser reader(charReader, logger); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(14U, loc.getEnd()); + assertData(reader, "hello world", 1, 14); } -TEST(OsdmStreamParser, whitespaceEliminationWithLinebreak) +TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak) { const char *testString = " hello \n world "; // 0123456 78901234 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); + OsmlStreamParser reader(charReader, logger); - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(14U, loc.getEnd()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + assertData(reader, "hello world", 1, 14); } -TEST(OsdmStreamParser, escapeWhitespace) +TEST(OsmlStreamParser, escapeWhitespace) { const char *testString = " hello\\ \\ world "; // 012345 67 89012345 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); - ASSERT_EQ("hello world", reader.getData().asString()); - - SourceLocation loc = reader.getData().getLocation(); - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(15U, loc.getEnd()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + assertData(reader, "hello world", 1, 15); } static void testEscapeSpecialCharacter(const std::string &c) { CharReader charReader(std::string("\\") + c); - OsdmStreamParser reader(charReader, logger); - EXPECT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + OsmlStreamParser reader(charReader, logger); + EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse()); EXPECT_EQ(c, reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); @@ -119,32 +229,30 @@ static void testEscapeSpecialCharacter(const std::string &c) EXPECT_EQ(1U + c.size(), loc.getEnd()); } -TEST(OsdmStreamParser, escapeSpecialCharacters) +TEST(OsmlStreamParser, escapeSpecialCharacters) { testEscapeSpecialCharacter("\\"); testEscapeSpecialCharacter("{"); testEscapeSpecialCharacter("}"); - testEscapeSpecialCharacter("<"); - testEscapeSpecialCharacter(">"); } -TEST(OsdmStreamParser, simpleSingleLineComment) +TEST(OsmlStreamParser, simpleSingleLineComment) { const char *testString = "% This is a single line comment"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, singleLineComment) +TEST(OsmlStreamParser, singleLineComment) { const char *testString = "a% This is a single line comment\nb"; // 01234567890123456789012345678901 23 // 0 1 2 3 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("a", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(0U, loc.getStart()); @@ -152,25 +260,25 @@ TEST(OsdmStreamParser, singleLineComment) } { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("b", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(33U, loc.getStart()); ASSERT_EQ(34U, loc.getEnd()); } - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, multilineComment) +TEST(OsmlStreamParser, multilineComment) { const char *testString = "a%{ This is a\n\n multiline line comment}%b"; // 0123456789012 3 456789012345678901234567890 // 0 1 2 3 4 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("a", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(0U, loc.getStart()); @@ -178,25 +286,25 @@ TEST(OsdmStreamParser, multilineComment) } { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("b", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(40U, loc.getStart()); ASSERT_EQ(41U, loc.getEnd()); } - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, nestedMultilineComment) +TEST(OsmlStreamParser, nestedMultilineComment) { const char *testString = "a%{%{Another\n\n}%multiline line comment}%b"; // 0123456789012 3 456789012345678901234567890 // 0 1 2 3 4 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("a", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(0U, loc.getStart()); @@ -204,23 +312,23 @@ TEST(OsdmStreamParser, nestedMultilineComment) } { - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse()); ASSERT_EQ("b", reader.getData().asString()); SourceLocation loc = reader.getData().getLocation(); ASSERT_EQ(40U, loc.getStart()); ASSERT_EQ(41U, loc.getEnd()); } - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, simpleCommand) +TEST(OsmlStreamParser, simpleCommand) { const char *testString = "\\test"; // 0 12345 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); Variant commandName = reader.getCommandName(); ASSERT_EQ("test", commandName.asString()); @@ -230,16 +338,16 @@ TEST(OsdmStreamParser, simpleCommand) ASSERT_EQ(5U, loc.getEnd()); ASSERT_EQ(0U, reader.getCommandArguments().asMap().size()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, simpleCommandWithName) +TEST(OsmlStreamParser, simpleCommandWithName) { const char *testString = "\\test#bla"; // 0 12345678 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); Variant commandName = reader.getCommandName(); ASSERT_EQ("test", commandName.asString()); @@ -257,17 +365,17 @@ TEST(OsdmStreamParser, simpleCommandWithName) ASSERT_EQ(5U, loc.getStart()); ASSERT_EQ(9U, loc.getEnd()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, simpleCommandWithArguments) +TEST(OsmlStreamParser, simpleCommandWithArguments) { const char *testString = "\\test[a=1,b=2,c=\"test\"]"; // 0 123456789012345 678901 2 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); Variant commandName = reader.getCommandName(); ASSERT_EQ("test", commandName.asString()); @@ -297,17 +405,17 @@ TEST(OsdmStreamParser, simpleCommandWithArguments) ASSERT_EQ(16U, loc.getStart()); ASSERT_EQ(22U, loc.getEnd()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -TEST(OsdmStreamParser, simpleCommandWithArgumentsAndName) +TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName) { const char *testString = "\\test#bla[a=1,b=2,c=\"test\"]"; // 0 1234567890123456789 01234 56 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); - ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); + OsmlStreamParser reader(charReader, logger); + ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse()); Variant commandName = reader.getCommandName(); ASSERT_EQ("test", commandName.asString()); @@ -343,126 +451,46 @@ TEST(OsdmStreamParser, simpleCommandWithArgumentsAndName) ASSERT_EQ(5U, loc.getStart()); ASSERT_EQ(9U, loc.getEnd()); - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); + ASSERT_EQ(OsmlStreamParser::State::END, reader.parse()); } -static void assertCommand(OsdmStreamParser &reader, const std::string &name, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse()); - EXPECT_EQ(name, reader.getCommandName().asString()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getCommandName().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertCommand(OsdmStreamParser &reader, const std::string &name, - const Variant::mapType &args, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - assertCommand(reader, name, start, end); - EXPECT_EQ(args, reader.getCommandArguments()); -} - -static void assertData(OsdmStreamParser &reader, const std::string &data, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse()); - EXPECT_EQ(data, reader.getData().asString()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getData().getLocation().getStart()); - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getData().getLocation().getEnd()); - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertFieldStart(OsdmStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsdmStreamParser::State::FIELD_START, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertFieldEnd(OsdmStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsdmStreamParser::State::FIELD_END, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -static void assertEnd(OsdmStreamParser &reader, - SourceOffset start = InvalidSourceOffset, - SourceOffset end = InvalidSourceOffset) -{ - ASSERT_EQ(OsdmStreamParser::State::END, reader.parse()); - if (start != InvalidSourceOffset) { - EXPECT_EQ(start, reader.getLocation().getStart()); - } - if (end != InvalidSourceOffset) { - EXPECT_EQ(end, reader.getLocation().getEnd()); - } -} - -TEST(OsdmStreamParser, fields) +TEST(OsmlStreamParser, fields) { const char *testString = "\\test{a}{b}{c}"; // 01234567890123 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); assertData(reader, "b", 9, 10); assertFieldEnd(reader, 10, 11); - assertFieldStart(reader, 11, 12); + assertFieldStart(reader, false, 11, 12); assertData(reader, "c", 12, 13); assertFieldEnd(reader, 13, 14); assertEnd(reader, 14, 14); } -TEST(OsdmStreamParser, dataOutsideField) +TEST(OsmlStreamParser, dataOutsideField) { const char *testString = "\\test{a}{b} c"; // 0123456789012 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); assertData(reader, "b", 9, 10); assertFieldEnd(reader, 10, 11); @@ -470,24 +498,24 @@ TEST(OsdmStreamParser, dataOutsideField) assertEnd(reader, 13, 13); } -TEST(OsdmStreamParser, nestedCommand) +TEST(OsmlStreamParser, nestedCommand) { const char *testString = "\\test{a}{\\test2{b} c} d"; // 012345678 90123456789012 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertData(reader, "a", 6, 7); assertFieldEnd(reader, 7, 8); - assertFieldStart(reader, 8, 9); + assertFieldStart(reader, false, 8, 9); { assertCommand(reader, "test2", 9, 15); - assertFieldStart(reader, 15, 16); + assertFieldStart(reader, false, 15, 16); assertData(reader, "b", 16, 17); assertFieldEnd(reader, 17, 18); } @@ -497,19 +525,19 @@ TEST(OsdmStreamParser, nestedCommand) assertEnd(reader, 23, 23); } -TEST(OsdmStreamParser, nestedCommandImmediateEnd) +TEST(OsmlStreamParser, nestedCommandImmediateEnd) { const char *testString = "\\test{\\test2{b}} d"; // 012345 678901234567 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); { assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertData(reader, "b", 13, 14); assertFieldEnd(reader, 14, 15); } @@ -518,27 +546,27 @@ TEST(OsdmStreamParser, nestedCommandImmediateEnd) assertEnd(reader, 18, 18); } -TEST(OsdmStreamParser, nestedCommandNoData) +TEST(OsmlStreamParser, nestedCommandNoData) { const char *testString = "\\test{\\test2}"; // 012345 6789012 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); assertFieldEnd(reader, 12, 13); assertEnd(reader, 13, 13); } -TEST(OsdmStreamParser, multipleCommands) +TEST(OsmlStreamParser, multipleCommands) { const char *testString = "\\a \\b \\c \\d"; // 012 345 678 90 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "a", 0, 2); assertCommand(reader, "b", 3, 5); @@ -547,33 +575,33 @@ TEST(OsdmStreamParser, multipleCommands) assertEnd(reader, 11, 11); } -TEST(OsdmStreamParser, fieldsWithSpaces) +TEST(OsmlStreamParser, fieldsWithSpaces) { const char *testString = "\\a {\\b \\c} \n\n {\\d}"; // 0123 456 789012 3 456 789 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "a", 0, 2); - assertFieldStart(reader, 3, 4); + assertFieldStart(reader, false, 3, 4); assertCommand(reader, "b", 4, 6); assertCommand(reader, "c", 7, 9); assertFieldEnd(reader, 9, 10); - assertFieldStart(reader, 16, 17); + assertFieldStart(reader, false, 16, 17); assertCommand(reader, "d", 17, 19); assertFieldEnd(reader, 19, 20); assertEnd(reader, 20, 20); } -TEST(OsdmStreamParser, errorNoFieldToStart) +TEST(OsmlStreamParser, errorNoFieldToStart) { const char *testString = "\\a b {"; // 012345 // 0 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "a", 0, 2); @@ -583,14 +611,14 @@ TEST(OsdmStreamParser, errorNoFieldToStart) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorNoFieldToEnd) +TEST(OsmlStreamParser, errorNoFieldToEnd) { const char *testString = "\\a b }"; // 012345 // 0 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "a", 0, 2); @@ -600,20 +628,20 @@ TEST(OsdmStreamParser, errorNoFieldToEnd) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorNoFieldEndNested) +TEST(OsmlStreamParser, errorNoFieldEndNested) { const char *testString = "\\test{\\test2{}}}"; // 012345 6789012345 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertFieldEnd(reader, 13, 14); assertFieldEnd(reader, 14, 15); ASSERT_FALSE(logger.hasError()); @@ -621,20 +649,20 @@ TEST(OsdmStreamParser, errorNoFieldEndNested) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorNoFieldEndNestedData) +TEST(OsmlStreamParser, errorNoFieldEndNestedData) { const char *testString = "\\test{\\test2{}}a}"; // 012345 67890123456 // 0 1 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "test", 0, 5); - assertFieldStart(reader, 5, 6); + assertFieldStart(reader, false, 5, 6); assertCommand(reader, "test2", 6, 12); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, false, 12, 13); assertFieldEnd(reader, 13, 14); assertFieldEnd(reader, 14, 15); assertData(reader, "a", 15, 16); @@ -643,53 +671,53 @@ TEST(OsdmStreamParser, errorNoFieldEndNestedData) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, beginEnd) +TEST(OsmlStreamParser, beginEnd) { const char *testString = "\\begin{book}\\end{book}"; // 012345678901 2345678901 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertFieldEnd(reader, 17, 21); assertEnd(reader, 22, 22); } -TEST(OsdmStreamParser, beginEndWithName) +TEST(OsmlStreamParser, beginEndWithName) { const char *testString = "\\begin{book#a}\\end{book}"; // 01234567890123 4567890123 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", {{"name", "a"}}, 7, 11); - assertFieldStart(reader, 14, 15); + assertFieldStart(reader, true, 14, 15); assertFieldEnd(reader, 19, 23); assertEnd(reader, 24, 24); } -TEST(OsdmStreamParser, beginEndWithNameAndArgs) +TEST(OsmlStreamParser, beginEndWithNameAndArgs) { const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}"; // 0123456789012345678901234 56789 01 2345678901 // 0 1 2 3 4 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, 32, 33); + assertFieldStart(reader, true, 32, 33); assertFieldEnd(reader, 37, 41); assertEnd(reader, 42, 42); } -TEST(OsdmStreamParser, beginEndWithNameAndArgsMultipleFields) +TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields) { const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}"; @@ -697,67 +725,100 @@ TEST(OsdmStreamParser, beginEndWithNameAndArgsMultipleFields) // 0 1 2 3 4 5 6 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11); - assertFieldStart(reader, 32, 33); + assertFieldStart(reader, false, 32, 33); assertData(reader, "a", 33, 34); assertCommand(reader, "test", Variant::mapType{}, 35, 40); assertFieldEnd(reader, 40, 41); - assertFieldStart(reader, 41, 42); + assertFieldStart(reader, false, 41, 42); assertData(reader, "b", 42, 43); assertCommand(reader, "test", Variant::mapType{}, 44, 49); - assertFieldStart(reader, 49, 50); + assertFieldStart(reader, false, 49, 50); assertFieldEnd(reader, 50, 51); assertFieldEnd(reader, 51, 52); - assertFieldStart(reader, 52, 53); + assertFieldStart(reader, true, 52, 53); assertFieldEnd(reader, 57, 61); assertEnd(reader, 62, 62); } -TEST(OsdmStreamParser, beginEndWithData) +TEST(OsmlStreamParser, beginEndWithData) { const char *testString = "\\begin{book}a\\end{book}"; // 0123456789012 3456789012 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertData(reader, "a", 12, 13); assertFieldEnd(reader, 18, 22); assertEnd(reader, 23, 23); } -TEST(OsdmStreamParser, beginEndWithCommand) +TEST(OsmlStreamParser, beginEndNested) +{ + const char *testString = + "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}"; + // 012345678901234 5678901234567890 123456 7890123 4567890 + // 0 1 2 3 4 5 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 7, 8); + assertFieldStart(reader, false, 9, 10); + assertData(reader, "b", 10, 11); + assertFieldEnd(reader, 11, 12); + assertFieldStart(reader, true, 13, 14); + assertData(reader, "c", 13, 14); + assertCommand(reader, "d", 22, 23); + assertFieldStart(reader, false, 24, 25); + assertData(reader, "e", 25, 26); + assertFieldEnd(reader, 26, 27); + assertFieldStart(reader, false, 27, 28); + assertData(reader, "f", 28, 29); + assertFieldEnd(reader, 29, 30); + assertFieldStart(reader, true, 31, 32); + assertCommand(reader, "g", 31, 33); + assertFieldStart(reader, false, 33, 34); + assertData(reader, "h", 34, 35); + assertFieldEnd(reader, 35, 36); + assertFieldEnd(reader, 42, 43); + assertFieldEnd(reader, 49, 50); + assertEnd(reader, 51, 51); +} + +TEST(OsmlStreamParser, beginEndWithCommand) { const char *testString = "\\begin{book}\\a{test}\\end{book}"; // 012345678901 23456789 0123456789 // 0 1 2 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "book", 7, 11); - assertFieldStart(reader, 12, 13); + assertFieldStart(reader, true, 12, 13); assertCommand(reader, "a", 12, 14); - assertFieldStart(reader, 14, 15); + assertFieldStart(reader, false, 14, 15); assertData(reader, "test", 15, 19); assertFieldEnd(reader, 19, 20); assertFieldEnd(reader, 25, 29); assertEnd(reader, 30, 30); } -TEST(OsdmStreamParser, errorBeginNoBraceOpen) +TEST(OsmlStreamParser, errorBeginNoBraceOpen) { const char *testString = "\\begin a"; // 01234567 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -765,12 +826,12 @@ TEST(OsdmStreamParser, errorBeginNoBraceOpen) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorBeginNoIdentifier) +TEST(OsmlStreamParser, errorBeginNoIdentifier) { const char *testString = "\\begin{!"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -778,12 +839,12 @@ TEST(OsdmStreamParser, errorBeginNoIdentifier) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorBeginNoBraceClose) +TEST(OsmlStreamParser, errorBeginNoBraceClose) { const char *testString = "\\begin{a"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -791,12 +852,12 @@ TEST(OsdmStreamParser, errorBeginNoBraceClose) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorBeginNoName) +TEST(OsmlStreamParser, errorBeginNoName) { const char *testString = "\\begin{a#}"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -808,13 +869,13 @@ TEST(OsdmStreamParser, errorBeginNoName) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorEndNoBraceOpen) +TEST(OsmlStreamParser, errorEndNoBraceOpen) { const char *testString = "\\end a"; // 012345 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -822,12 +883,12 @@ TEST(OsdmStreamParser, errorEndNoBraceOpen) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorEndNoIdentifier) +TEST(OsmlStreamParser, errorEndNoIdentifier) { const char *testString = "\\end{!"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -835,12 +896,12 @@ TEST(OsdmStreamParser, errorEndNoIdentifier) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorEndNoBraceClose) +TEST(OsmlStreamParser, errorEndNoBraceClose) { const char *testString = "\\end{a"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -848,12 +909,12 @@ TEST(OsdmStreamParser, errorEndNoBraceClose) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorEndNoBegin) +TEST(OsmlStreamParser, errorEndNoBegin) { const char *testString = "\\end{a}"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -861,91 +922,91 @@ TEST(OsdmStreamParser, errorEndNoBegin) ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, errorBeginEndMismatch) +TEST(OsmlStreamParser, errorBeginEndMismatch) { const char *testString = "\\begin{a} \\begin{b} test \\end{a}"; // 0123456789 012345678901234 5678901 // 0 1 2 3 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "a", 7, 8); - assertFieldStart(reader, 10, 11); + assertFieldStart(reader, true, 10, 11); assertCommand(reader, "b", 17, 18); - assertFieldStart(reader, 20, 24); + assertFieldStart(reader, true, 20, 24); assertData(reader, "test", 20, 24); ASSERT_FALSE(logger.hasError()); ASSERT_THROW(reader.parse(), LoggableException); ASSERT_TRUE(logger.hasError()); } -TEST(OsdmStreamParser, commandWithNSSep) +TEST(OsmlStreamParser, commandWithNSSep) { const char *testString = "\\test1:test2"; // 012345678901 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test1:test2", 0, 12); assertEnd(reader, 12, 12); } -TEST(OsdmStreamParser, beginEndWithNSSep) +TEST(OsmlStreamParser, beginEndWithNSSep) { const char *testString = "\\begin{test1:test2}\\end{test1:test2}"; // 0123456789012345678 90123456789012345 // 0 1 2 3 CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); assertCommand(reader, "test1:test2", 7, 18); - assertFieldStart(reader, 19, 20); + assertFieldStart(reader, true, 19, 20); assertFieldEnd(reader, 24, 35); assertEnd(reader, 36, 36); } -TEST(OsdmStreamParser, errorBeginNSSep) +TEST(OsmlStreamParser, errorBeginNSSep) { const char *testString = "\\begin:test{blub}\\end{blub}"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); assertCommand(reader, "blub"); ASSERT_TRUE(logger.hasError()); - assertFieldStart(reader); + assertFieldStart(reader, true); assertFieldEnd(reader); assertEnd(reader); } -TEST(OsdmStreamParser, errorEndNSSep) +TEST(OsmlStreamParser, errorEndNSSep) { const char *testString = "\\begin{blub}\\end:test{blub}"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); assertCommand(reader, "blub"); - assertFieldStart(reader); + assertFieldStart(reader, true); ASSERT_FALSE(logger.hasError()); assertFieldEnd(reader); ASSERT_TRUE(logger.hasError()); assertEnd(reader); } -TEST(OsdmStreamParser, errorEmptyNs) +TEST(OsmlStreamParser, errorEmptyNs) { const char *testString = "\\test:"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -955,12 +1016,12 @@ TEST(OsdmStreamParser, errorEmptyNs) assertEnd(reader); } -TEST(OsdmStreamParser, errorRepeatedNs) +TEST(OsmlStreamParser, errorRepeatedNs) { const char *testString = "\\test::"; CharReader charReader(testString); - OsdmStreamParser reader(charReader, logger); + OsmlStreamParser reader(charReader, logger); logger.reset(); ASSERT_FALSE(logger.hasError()); @@ -969,5 +1030,232 @@ TEST(OsdmStreamParser, errorRepeatedNs) assertData(reader, "::"); assertEnd(reader); } + +TEST(OsmlStreamParser, explicitDefaultField) +{ + const char *testString = "\\a{!b}c"; + // 01234567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertData(reader, "b", 4, 5); + assertFieldEnd(reader, 5, 6); + assertData(reader, "c", 6, 7); + assertEnd(reader, 7, 7); +} + +TEST(OsmlStreamParser, explicitDefaultFieldWithCommand) +{ + const char *testString = "\\a{!\\b}c"; + // 0123 4567 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertCommand(reader, "b", 4, 6); + assertFieldEnd(reader, 6, 7); + assertData(reader, "c", 7, 8); + assertEnd(reader, 8, 8); +} + +TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField) +{ + const char *testString = "\\a{!\\b}{c}"; + // 0123 456789 + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + assertCommand(reader, "a", 0, 2); + assertFieldStart(reader, true, 2, 4); + assertCommand(reader, "b", 4, 6); + assertFieldEnd(reader, 6, 7); + ASSERT_FALSE(logger.hasError()); + assertData(reader, "c", 8, 9); + ASSERT_TRUE(logger.hasError()); + assertEnd(reader, 10, 10); +} + +TEST(OsmlStreamParser, annotationStart) +{ + const char *testString = "<\\a"; + // 0 12 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); + assertEnd(reader, 3, 3); +} + +TEST(OsmlStreamParser, annotationStartWithName) +{ + const char *testString = "<\\annotationWithName#aName"; + // 0 1234567890123456789012345 + // 0 1 2 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart(reader, "annotationWithName", + Variant::mapType{{"name", "aName"}}, 0, 20); + assertEnd(reader, 26, 26); +} + +TEST(OsmlStreamParser, annotationStartWithArguments) +{ + const char *testString = "<\\annotationWithName#aName[a=1,b=2]"; + // 0 1234567890123456789012345678901234 + // 0 1 2 3 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart( + reader, "annotationWithName", + Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20); + assertEnd(reader, 35, 35); +} + +TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd) +{ + const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>"; + // 0 123456789012345678901234567 89012345 67 + // 0 1 2 3 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart( + reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8, + 10); + assertFieldStart(reader, true, 26, 27); + assertData(reader, "a", 26, 27); + assertFieldEnd(reader, 33, 35); + assertAnnotationEnd(reader, "", "", 36, 38); + assertEnd(reader, 38, 38); +} + +TEST(OsmlStreamParser, annotationEnd) +{ + const char *testString = "\\a>"; + // 012 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationEnd(reader, "a", "", 0, 2); + assertEnd(reader, 3, 3); +} + +TEST(OsmlStreamParser, annotationEndWithName) +{ + const char *testString = "\\a#name>"; + // 01234567 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationEnd(reader, "a", "name", 0, 2); + assertEnd(reader, 8, 8); +} + +TEST(OsmlStreamParser, annotationEndWithNameAsArgs) +{ + const char *testString = "\\a[name=name]>"; + // 01234567890123 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationEnd(reader, "a", "name", 0, 2); + assertEnd(reader, 14, 14); +} + +TEST(OsmlStreamParser, errorAnnotationEndWithArguments) +{ + const char *testString = "\\a[foo=bar]>"; + // 012345678901 + // 0 1 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + logger.reset(); + ASSERT_FALSE(logger.hasError()); + assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2); + ASSERT_TRUE(logger.hasError()); + assertData(reader, ">", 11, 12); + assertEnd(reader, 12, 12); +} + +TEST(OsmlStreamParser, closingAnnotation) +{ + const char *testString = "<\\a>"; + // 0 123 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3); + assertData(reader, ">", 3, 4); + assertEnd(reader, 4, 4); +} + +TEST(OsmlStreamParser, annotationWithFields) +{ + const char *testString = "a <\\b{c}{d}{!e} f \\> g"; + // 012 345678901234567 8901 + // 0 1 2 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertData(reader, "a", 0, 1); + assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5); + assertFieldStart(reader, false, 5, 6); + assertData(reader, "c", 6, 7); + assertFieldEnd(reader, 7, 8); + assertFieldStart(reader, false, 8, 9); + assertData(reader, "d", 9, 10); + assertFieldEnd(reader, 10, 11); + assertFieldStart(reader, true, 11, 13); + assertData(reader, "e", 13, 14); + assertFieldEnd(reader, 14, 15); + assertData(reader, "f", 16, 17); + assertAnnotationEnd(reader, "", "", 18, 20); + assertData(reader, "g", 21, 22); + assertEnd(reader, 22, 22); +} + +TEST(OsmlStreamParser, annotationStartEscape) +{ + const char *testString = "<\\%test"; + // 0 123456 + // 0 + + CharReader charReader(testString); + + OsmlStreamParser reader(charReader, logger); + + assertData(reader, "<%test", 0, 7); + assertEnd(reader, 7, 7); +} } diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp new file mode 100644 index 0000000..3293370 --- /dev/null +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -0,0 +1,217 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <gtest/gtest.h> + +#include <core/frontend/TerminalLogger.hpp> +#include <core/common/CharReader.hpp> +#include <core/common/Variant.hpp> + +#include <formats/osxml/OsxmlEventParser.hpp> + +namespace ousia { + +static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; + +namespace { +enum class OsxmlEvent { + COMMAND, + ANNOTATION_START, + ANNOTATION_END, + FIELD_END, + DATA +}; + +class TestOsxmlEventListener : public OsxmlEvents { +public: + std::vector<std::pair<OsxmlEvent, Variant>> events; + + void command(const Variant &name, const Variant::mapType &args) override + { + events.emplace_back(OsxmlEvent::COMMAND, + Variant::arrayType{name, args}); + } + + void annotationStart(const Variant &className, + const Variant::mapType &args) override + { + events.emplace_back(OsxmlEvent::ANNOTATION_START, + Variant::arrayType{className, args}); + } + + void annotationEnd(const Variant &className, + const Variant &elementName) override + { + events.emplace_back(OsxmlEvent::ANNOTATION_END, + Variant::arrayType{className, elementName}); + } + + void fieldEnd() override + { + events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{}); + } + + void data(const Variant &data) override + { + events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data}); + } +}; + +static std::vector<std::pair<OsxmlEvent, Variant>> parseXml( + const char *testString, + WhitespaceMode whitespaceMode = WhitespaceMode::TRIM) +{ + TestOsxmlEventListener listener; + CharReader reader(testString); + OsxmlEventParser parser(reader, listener, logger); + parser.setWhitespaceMode(whitespaceMode); + parser.parse(); + return listener.events; +} +} + +TEST(OsxmlEventParser, simpleCommandWithArgs) +{ + const char *testString = "<a name=\"test\" a=\"1\" b=\"2\" c=\"blub\"/>"; + // 01234567 89012 3456 78 9012 34 5678 90123 456 + // 0 1 2 3 + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, + Variant::arrayType{ + "a", Variant::mapType{ + {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); + + // Check the locations (I'll do this one time and then just assume it works) + ASSERT_EQ(1U, events[0].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(2U, events[0].second.asArray()[0].getLocation().getEnd()); + ASSERT_EQ( + 9U, + events[0].second.asArray()[1].asMap()["name"].getLocation().getStart()); + ASSERT_EQ( + 13U, + events[0].second.asArray()[1].asMap()["name"].getLocation().getEnd()); + ASSERT_EQ( + 18U, + events[0].second.asArray()[1].asMap()["a"].getLocation().getStart()); + ASSERT_EQ( + 19U, events[0].second.asArray()[1].asMap()["a"].getLocation().getEnd()); + ASSERT_EQ( + 24U, + events[0].second.asArray()[1].asMap()["b"].getLocation().getStart()); + ASSERT_EQ( + 25U, events[0].second.asArray()[1].asMap()["b"].getLocation().getEnd()); + ASSERT_EQ( + 30U, + events[0].second.asArray()[1].asMap()["c"].getLocation().getStart()); + ASSERT_EQ( + 34U, events[0].second.asArray()[1].asMap()["c"].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, magicTopLevelTag) +{ + const char *testString = "<ousia><a/><b/></ousia>"; + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}, + {OsxmlEvent::COMMAND, Variant::arrayType{{"b", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, magicTopLevelTagInside) +{ + const char *testString = "<a><ousia/></a>"; + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::COMMAND, + Variant::arrayType{{"ousia", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) +{ + const char *testString = "<a> hello \n world </a>"; + // 012345678901 234567890123 + // 0 1 2 + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{" hello \n world "}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::PRESERVE); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataTrimWhitespace) +{ + const char *testString = "<a> hello \n world </a>"; + // 012345678901 234567890123 + // 0 1 2 + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{"hello \n world"}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::TRIM); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataCollapseWhitespace) +{ + const char *testString = "<a> hello \n world </a>"; + // 012345678901 234567890123 + // 0 1 2 + + std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{ + {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{"hello world"}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::COLLAPSE); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} +} + diff --git a/test/plugins/xml/XmlParserTest.cpp b/test/formats/osxml/OsxmlParserTest.cpp index fdae779..fe8ed34 100644 --- a/test/plugins/xml/XmlParserTest.cpp +++ b/test/formats/osxml/OsxmlParserTest.cpp @@ -30,7 +30,7 @@ #include <core/StandaloneEnvironment.hpp> #include <plugins/filesystem/FileLocator.hpp> -#include <plugins/xml/XmlParser.hpp> +#include <formats/osxml/OsxmlParser.hpp> namespace ousia { @@ -41,7 +41,7 @@ extern const Rtti Typesystem; } struct XmlStandaloneEnvironment : public StandaloneEnvironment { - XmlParser xmlParser; + OsxmlParser parser; FileLocator fileLocator; XmlStandaloneEnvironment(ConcreteLogger &logger) @@ -52,21 +52,21 @@ struct XmlStandaloneEnvironment : public StandaloneEnvironment { registry.registerDefaultExtensions(); registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"}, - {&RttiTypes::Node}, &xmlParser); + {&RttiTypes::Node}, &parser); registry.registerResourceLocator(&fileLocator); } }; static TerminalLogger logger(std::cerr, true); -TEST(XmlParser, mismatchedTag) +TEST(OsxmlParser, mismatchedTag) { XmlStandaloneEnvironment env(logger); env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document}); ASSERT_TRUE(logger.hasError()); } -TEST(XmlParser, generic) +TEST(OsxmlParser, generic) { XmlStandaloneEnvironment env(logger); env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node}); @@ -186,7 +186,7 @@ static void checkFieldDescriptor( Handle<Type> primitiveType = nullptr, bool optional = false) { auto res = desc->resolve(&RttiTypes::FieldDescriptor, name); - ASSERT_EQ(1, res.size()); + ASSERT_EQ(1U, res.size()); checkFieldDescriptor(res[0].node, name, parent, children, type, primitiveType, optional); } @@ -201,7 +201,7 @@ static void checkFieldDescriptor( optional); } -TEST(XmlParser, domainParsing) +TEST(OsxmlParser, domainParsing) { XmlStandaloneEnvironment env(logger); Rooted<Node> book_domain_node = @@ -332,10 +332,10 @@ static void checkText(Handle<Node> p, Handle<Node> expectedParent, { checkStructuredEntity(p, expectedParent, doc, "paragraph"); Rooted<StructuredEntity> par = p.cast<StructuredEntity>(); - ASSERT_EQ(1, par->getField().size()); + ASSERT_EQ(1U, par->getField().size()); checkStructuredEntity(par->getField()[0], par, doc, "text"); Rooted<StructuredEntity> text = par->getField()[0].cast<StructuredEntity>(); - ASSERT_EQ(1, text->getField().size()); + ASSERT_EQ(1U, text->getField().size()); Handle<StructureNode> d = text->getField()[0]; ASSERT_FALSE(d == nullptr); @@ -345,7 +345,7 @@ static void checkText(Handle<Node> p, Handle<Node> expectedParent, ASSERT_EQ(expected, prim->getContent()); } -TEST(XmlParser, documentParsing) +TEST(OsxmlParser, documentParsing) { XmlStandaloneEnvironment env(logger); Rooted<Node> book_document_node = @@ -357,7 +357,7 @@ TEST(XmlParser, documentParsing) checkStructuredEntity(doc->getRoot(), doc, doc, "book"); { Rooted<StructuredEntity> book = doc->getRoot(); - ASSERT_EQ(2, book->getField().size()); + ASSERT_EQ(2U, book->getField().size()); checkText(book->getField()[0], book, doc, "This might be some introductory text or a dedication."); checkStructuredEntity(book->getField()[1], book, doc, "chapter", @@ -365,7 +365,7 @@ TEST(XmlParser, documentParsing) { Rooted<StructuredEntity> chapter = book->getField()[1].cast<StructuredEntity>(); - ASSERT_EQ(3, chapter->getField().size()); + ASSERT_EQ(3U, chapter->getField().size()); checkText(chapter->getField()[0], chapter, doc, "Here we might have an introduction to the chapter."); checkStructuredEntity(chapter->getField()[1], chapter, doc, @@ -374,7 +374,7 @@ TEST(XmlParser, documentParsing) { Rooted<StructuredEntity> section = chapter->getField()[1].cast<StructuredEntity>(); - ASSERT_EQ(1, section->getField().size()); + ASSERT_EQ(1U, section->getField().size()); checkText(section->getField()[0], section, doc, "Here we might find the actual section content."); } @@ -384,7 +384,7 @@ TEST(XmlParser, documentParsing) { Rooted<StructuredEntity> section = chapter->getField()[2].cast<StructuredEntity>(); - ASSERT_EQ(1, section->getField().size()); + ASSERT_EQ(1U, section->getField().size()); checkText(section->getField()[0], section, doc, "Here we might find the actual section content."); } diff --git a/test/core/CodeTokenizerTest.cpp b/test/plugins/css/CodeTokenizerTest.cpp index 2d4d5a7..2d4d5a7 100644 --- a/test/core/CodeTokenizerTest.cpp +++ b/test/plugins/css/CodeTokenizerTest.cpp diff --git a/test/core/TokenizerTest.cpp b/test/plugins/css/TokenizerTest.cpp index c53f93d..c53f93d 100644 --- a/test/core/TokenizerTest.cpp +++ b/test/plugins/css/TokenizerTest.cpp |