diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-14 23:42:05 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-14 23:42:05 +0100 |
commit | efe60ac3c3a8725ac71329c0bb19fa9d9c58f399 (patch) | |
tree | 97802b0eec92d51be02b94c939c1285feeaf3b4f /src/plugins | |
parent | c1776468bc3daab431d0e2b51589dd12df595227 (diff) |
Moved specific file format parsers to formats/ folder, moved old tokenizer to css code (this is the only place where it is actually used)
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/css/CodeTokenizer.cpp | 169 | ||||
-rw-r--r-- | src/plugins/css/CodeTokenizer.hpp | 136 | ||||
-rw-r--r-- | src/plugins/css/Tokenizer.cpp | 204 | ||||
-rw-r--r-- | src/plugins/css/Tokenizer.hpp | 227 | ||||
-rw-r--r-- | src/plugins/xml/XmlParser.cpp | 1435 | ||||
-rw-r--r-- | src/plugins/xml/XmlParser.hpp | 55 |
6 files changed, 736 insertions, 1490 deletions
diff --git a/src/plugins/css/CodeTokenizer.cpp b/src/plugins/css/CodeTokenizer.cpp new file mode 100644 index 0000000..d65c514 --- /dev/null +++ b/src/plugins/css/CodeTokenizer.cpp @@ -0,0 +1,169 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <cassert> + +#include "CodeTokenizer.hpp" + +namespace ousia { + +Token CodeTokenizer::constructToken(const Token &t) +{ + std::string content = buf.str(); + buf.str(std::string()); + return Token{ + returnTokenId, content, + SourceLocation{t.location.getSourceId(), startToken.location.getStart(), + t.location.getEnd()}}; +} + +void CodeTokenizer::buffer(const Token &t) { buf << t.content; } + +bool CodeTokenizer::doPrepare(const Token &t, std::deque<Token> &peeked) +{ + auto it = descriptors.find(t.tokenId); + CodeTokenMode mode = CodeTokenMode::NONE; + if (it != descriptors.end()) { + mode = it->second.mode; + } + + switch (state) { + case CodeTokenizerState::NORMAL: + switch (mode) { + case CodeTokenMode::STRING_START_END: + state = CodeTokenizerState::IN_STRING; + break; + case CodeTokenMode::BLOCK_COMMENT_START: + state = CodeTokenizerState::IN_BLOCK_COMMENT; + break; + case CodeTokenMode::LINE_COMMENT: + state = CodeTokenizerState::IN_LINE_COMMENT; + break; + case CodeTokenMode::LINEBREAK: + if (!ignoreLinebreaks) { + peeked.push_back( + {it->second.id, t.content, t.location}); + } + return !ignoreLinebreaks; + default: + bool empty = true; + if (t.tokenId == TOKEN_TEXT) { + int begin = -1; + for (size_t c = 0; c < t.content.length(); c++) { + bool isWhitespace = + t.content[c] == ' ' || t.content[c] == '\t'; + if (begin < 0) { + // if we have not yet set our beginning, + // we wait for the first + // non-whitespace-character to set it. + if (!isWhitespace) { + begin = c; + } + } else { + // if we have set our beginning, we wait for the + // first whitespace character, which marks the + // end of the current word. + if (isWhitespace) { + peeked.push_back(Token{ + TOKEN_TEXT, + t.content.substr(begin, (int)c - begin), + SourceLocation{ + t.location.getSourceId(), + t.location.getStart() + begin, + t.location.getStart() + c}}); + begin = -1; + empty = false; + } + } + } + if (begin >= 0) { + peeked.push_back(Token{ + TOKEN_TEXT, t.content.substr(begin), + SourceLocation{t.location.getSourceId(), + t.location.getStart() + begin, + t.location.getEnd()}}); + empty = false; + } + } else { + empty = false; + peeked.push_back(t); + } + return !empty; + } + startToken = t; + returnTokenId = it->second.id; + return false; + case CodeTokenizerState::IN_LINE_COMMENT: + switch (mode) { + case CodeTokenMode::LINEBREAK: + state = CodeTokenizerState::NORMAL; + if (!ignoreComments) { + peeked.push_back(constructToken(t)); + } + return !ignoreComments; + default: + if (!ignoreComments) { + buffer(t); + } + return false; + } + case CodeTokenizerState::IN_BLOCK_COMMENT: + switch (mode) { + case CodeTokenMode::BLOCK_COMMENT_END: + state = CodeTokenizerState::NORMAL; + if (!ignoreComments) { + peeked.push_back(constructToken(t)); + } + return !ignoreComments; + default: + if (!ignoreComments) { + buffer(t); + } + return false; + } + case CodeTokenizerState::IN_STRING: + switch (mode) { + case CodeTokenMode::ESCAPE: + if (escaped) { + buffer(t); + } + escaped = !escaped; + return false; + case CodeTokenMode::STRING_START_END: + if (escaped) { + buffer(t); + escaped = false; + return false; + } else { + peeked.push_back(constructToken(t)); + state = CodeTokenizerState::NORMAL; + return true; + } + default: + if (escaped) { + // TODO: handle escaped characters? + escaped = false; + } + buffer(t); + return false; + } + } + assert(false); + return false; +} +} diff --git a/src/plugins/css/CodeTokenizer.hpp b/src/plugins/css/CodeTokenizer.hpp new file mode 100644 index 0000000..154f949 --- /dev/null +++ b/src/plugins/css/CodeTokenizer.hpp @@ -0,0 +1,136 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file CodeTokenizer.hpp + + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ +#ifndef _OUSIA_CODE_TOKENIZER_HPP_ +#define _OUSIA_CODE_TOKENIZER_HPP_ + +#include <map> +#include <sstream> + +#include <core/common/CharReader.hpp> +#include "Tokenizer.hpp" + +namespace ousia { + +/* + * This enum contains all special Token the CodeTokenizer supports, namely: + * + * 1.) An ambigous Tokens - in post programming languages single-quotes ' or + * double-quotes " - to delimit string tokens. + * 2.) A start token for line comments, which would e.g. be // in Java. + * 3.) A start token for a block comment + * 4.) An end token for a block comment. + * 5.) A linebreak token + * 6.) The escape token, which would e.g. be \ in java. + */ +enum class CodeTokenMode { + STRING_START_END, + LINE_COMMENT, + BLOCK_COMMENT_START, + BLOCK_COMMENT_END, + LINEBREAK, + ESCAPE, + NONE +}; + +/** + * A CodeTokenDescriptor defines the id the user likes to have returned for + * a Token of the mode specified, e.g. if you want to get the id 4 for a + * String Token the corresponding CodeTokenDescriptor would be inizialized + * with CodeTokenDescriptor myDesc {CodeTokenMode::STRING_START_END, 4}; + */ +struct CodeTokenDescriptor { + CodeTokenMode mode; + int id; + + CodeTokenDescriptor(CodeTokenMode mode, int id) : mode(mode), id(id) {} +}; + +/** + * The CodeTokenizer is a finite state machine with the states NORMAL, being + * IN_BLOCK_COMMENT, being IN_LINE_COMMENT or being IN_STRING. + */ +enum class CodeTokenizerState { + NORMAL, + IN_BLOCK_COMMENT, + IN_LINE_COMMENT, + IN_STRING +}; + +/** + * The purpose of a CodeTokenizer is to make it easier to parse classical + * programming Code. It adds the following features to a regular Tokenizer: + * 1.) String tokens (e.g. "string" in Java Code) instead of 3 separate tokens + * for the opening delimiter, the text and the closing delimiter. + * 2.) Escaping in String tokens. + * 3.) Comment Tokens (for line comments as well as block comments) + */ +class CodeTokenizer : public Tokenizer { +private: + std::map<int, CodeTokenDescriptor> descriptors; + CodeTokenizerState state; + std::stringstream buf; + Token startToken; + int returnTokenId; + bool escaped = false; + + Token constructToken(const Token &t); + void buffer(const Token &t); + +protected: + bool doPrepare(const Token &t, std::deque<Token> &peeked) override; + +public: + /** + * If you do not want comment tokens to be returned you can set this to + * true. + */ + bool ignoreComments = false; + /** + * If you do not want linebreaks to be returned you can set this to true. + */ + bool ignoreLinebreaks = false; + + /** + * + * @param input a CharReader containing the input for this tokenizer, as + * with a regular tokenizer. + * @param root a TokenTreeNode representing the root of the TokenTree. + * Please note that you have to specify all tokenIDs here that you use + * in the descriptors map. + * @param descriptors a map mapping tokenIDs to CodeTokenDescriptors. + * In this way you can specify the meaning of certain Tokens. Say you + * specified the Token "//" with the id 1 in the TokenTree. Then you could + * add the entry "1" with the Mode "LINE_COMMENT" to the descriptors map + * and this CodeTokenizer would recognize the token "//" as starting a + * line comment. + */ + CodeTokenizer(CharReader &input, const TokenTreeNode &root, + std::map<int, CodeTokenDescriptor> descriptors) + : Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL) + { + } +}; +} + +#endif diff --git a/src/plugins/css/Tokenizer.cpp b/src/plugins/css/Tokenizer.cpp new file mode 100644 index 0000000..ab4735a --- /dev/null +++ b/src/plugins/css/Tokenizer.cpp @@ -0,0 +1,204 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include "Tokenizer.hpp" + +namespace ousia { + +static std::map<char, TokenTreeNode> buildChildren( + const std::map<std::string, int> &inputs) +{ + std::map<char, TokenTreeNode> children; + std::map<char, std::map<std::string, int>> nexts; + + for (auto &e : inputs) { + const std::string &s = e.first; + const int id = e.second; + if (s.empty()) { + continue; + } + char start = s[0]; + const std::string suffix = s.substr(1); + if (nexts.find(start) != nexts.end()) { + nexts[start].insert(std::make_pair(suffix, id)); + } else { + nexts.insert(std::make_pair( + start, std::map<std::string, int>{{suffix, id}})); + } + } + + for (auto &n : nexts) { + children.insert(std::make_pair(n.first, TokenTreeNode{n.second})); + } + + return children; +} + +static int buildId(const std::map<std::string, int> &inputs) +{ + int tokenId = TOKEN_NONE; + for (auto &e : inputs) { + if (e.first.empty()) { + if (tokenId != TOKEN_NONE) { + throw TokenizerException{std::string{"Ambigous token found: "} + + std::to_string(e.second)}; + } else { + tokenId = e.second; + } + } + } + return tokenId; +} + +TokenTreeNode::TokenTreeNode(const std::map<std::string, int> &inputs) + : children(buildChildren(inputs)), tokenId(buildId(inputs)) +{ +} + +Tokenizer::Tokenizer(CharReader &input, const TokenTreeNode &root) + : input(input), root(root) +{ +} + +bool Tokenizer::prepare() +{ + std::stringstream buffer; + char c; + SourcePosition start = input.getOffset(); + bool bufEmpty = true; + while (input.peek(c)) { + if (root.children.find(c) != root.children.end()) { + // if there might be a special token, keep peeking forward + // until we find the token (or we don't). + TokenTreeNode const *n = &root; + std::stringstream tBuf; + int match = TOKEN_NONE; + while (true) { + tBuf << c; + n = &(n->children.at(c)); + if (n->tokenId != TOKEN_NONE) { + match = n->tokenId; + // from here on we found a token. If we have something + // in our buffer already, we end the search now. + if (!bufEmpty) { + break; + } else { + // if we want to return this token ( = we have nothing + // in our buffer yet) we look greedily for the longest + // possible token we can construct. + input.consumePeek(); + } + } + if (!input.peek(c)) { + // if we are at the end we break off the search. + break; + } + if (n->children.find(c) == n->children.end()) { + // if we do not find a possible continuation anymore, + // break off the search. + break; + } + } + //reset the peek pointer to the last valid position. + input.resetPeek(); + // check if we did indeed find a special token. + if (match != TOKEN_NONE) { + if (bufEmpty) { + // if we did not have text before, construct that token. + if (doPrepare( + Token{match, tBuf.str(), input.getLocation(start)}, + peeked)) { + return true; + } else { + start = input.getOffset(); + continue; + } + } else { + // otherwise we return the text before the token. + if (doPrepare(Token{TOKEN_TEXT, buffer.str(), input.getLocation(start)}, + peeked)) { + return true; + } else{ + //we need to clear the buffer here. After all the token + //corresponding to this buffer segment is already + //constructed. + buffer.str(std::string()); + bufEmpty = true; + start = input.getOffset(); + continue; + } + } + } else{ + //if we found nothing, read at least one character. + input.peek(c); + } + } + buffer << c; + bufEmpty = false; + input.consumePeek(); + } + if (!bufEmpty) { + return doPrepare(Token{TOKEN_TEXT, buffer.str(), input.getLocation(start)}, + peeked); + } + return false; +} + +bool Tokenizer::doPrepare(const Token &t, std::deque<Token> &peeked) +{ + peeked.push_back(t); + return true; +} + +bool Tokenizer::next(Token &t) +{ + if (peeked.empty()) { + if (!prepare()) { + return false; + } + } + t = peeked.front(); + peeked.pop_front(); + resetPeek(); + return true; +} + +bool Tokenizer::peek(Token &t) +{ + if (peekCursor >= peeked.size()) { + if (!prepare()) { + return false; + } + } + t = peeked[peekCursor]; + peekCursor++; + return true; +} + +void Tokenizer::resetPeek() { peekCursor = 0; } + +void Tokenizer::consumePeek() +{ + while (peekCursor > 0) { + peeked.pop_front(); + peekCursor--; + } +} +} diff --git a/src/plugins/css/Tokenizer.hpp b/src/plugins/css/Tokenizer.hpp new file mode 100644 index 0000000..50e458c --- /dev/null +++ b/src/plugins/css/Tokenizer.hpp @@ -0,0 +1,227 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_TOKENIZER_HPP_ +#define _OUSIA_TOKENIZER_HPP_ + +#include <cstdint> +#include <deque> +#include <istream> +#include <map> + +#include <core/common/CharReader.hpp> + +namespace ousia { + +/** + * This exception is currently only thrown if errors are made during the + * initialization of the Tokenizer. Have a closer look at the documentation + * of the TokenTreeNode constructor for more information. + */ +class TokenizerException : public std::exception { +public: + const std::string msg; + + TokenizerException(const std::string &msg) : msg(msg){}; + + virtual const char *what() const noexcept override { return msg.c_str(); } +}; + +/** + * The Tokenizer internally uses a TokenTree to be efficiently able to identify + * the longest consecutive token in the text. This is equivalent to a prefix + * trie. + * + * The TokenTree is a construct that structures all special tokens this + * Tokenizer recognizes. Consider the Tokens "aab", "a" and "aac". Then + * the TokenTree would look like this: + * + * a + * | \ + * a $ + * | \ + * b c + * | | + * $ $ + * + * Every node in the TokenTree is a valid end state that has a $ attached to it. + * During the search algorithm the Tokenizer goes through the tree and stores + * the last valid position. If a character follows that does not lead to a new + * node in the TokenTree the search ends (and starts again at this character). + * The token corresponding to the last valid position is returned. + * + * This allows us to uniquely identify the matching token given a certain + * input text. Note that this is a greedy matching approach that does not + * work if you're using truly ambiguous tokens (that have the same text). + * + * It is also not allowed that tokens have common middle parts but varying + * pre- and suffixes. Consider the example of two tokens "abd" and "bc" and + * the input string "abc". In that case we start looking for "abd" at the + * start, won't find it, wenn we hit "c" and start the scanning process + * anew. Thus the "bc" token is not found. + * + * For most (well-behaved) tokenization schemes this is not the case, + * though. + */ +class TokenTreeNode { +public: + const std::map<char, TokenTreeNode> children; + const int tokenId; + + /** + * The TokenTreeNode constructor builds a TokenTree from the given token + * specifications. The node returned by this constructor then is the root of + * said TokenTree. + * @param inputs Specifications of tokens in map form. Each specification + * is a tuple of the text that should be matched and some unique ID (>= 0) + * that is returned to you if that Token is found in the text. + * An example for such a map would be + * { + * { "#" , 1}, + * { "##", 2}, + * { "/" , 3} + * } + * Note that IDs below zero are reserved for system Ids, mainly TOKEN_NONE + * (-1) and TOKEN_TEXT (-2). + */ + TokenTreeNode(const std::map<std::string, int> &inputs); +}; + +/** + * This is a reserved constant for the empty token. + */ +static const int TOKEN_NONE = -1; +/** + * This is a reserved constant for every part of the input text that is not a + * specified token. + */ +static const int TOKEN_TEXT = -2; + +/** + * A token for us is identified by an integer tokenID (either one of the + * constants TOKEN_NONE or TOKEN_TEXT or one of the user-defined constants). + * Additionally we return the matched text (which should only be really + * interesting in case of TOKEN_TEXT tokens) and the position in the input text. + */ +struct Token { + int tokenId; + std::string content; + SourceLocation location; + + Token(int tokenId, std::string content, SourceLocation location) + : tokenId(tokenId), + content(content), + location(location) + { + } + + Token() : tokenId(TOKEN_NONE) {} +}; + +/** + * A Tokenizer has the purpose of subdividing an input text into tokens. In our + * definition here we distinguish between two kinds of tokens: + * 1.) User-specified tokens that match a fixed text. + * 2.) Any other text between those tokens. + * The user might want to specify the tokens '#{' and '#}' for example, because + * they have some meaning in her code. The user sets the IDs to 1 and 2. + * Given the input text + * "some text #{ special command #} some text" + * the tokenizer would return the tokens: + * 1.) "some text " with the id TOKEN_TEXT (-2). + * 2.) "#{" with the id 1. + * 3.) " special command " with the id TOKEN_TEXT (-2). + * 4.) "#}" with the id 2. + * 5.) " some text" with the id TOKEN_TEXT (-2). + * This makes the subsequent parsing of files of a specific type easier. + * Note that in case of tokens with that are prefixes of other tokens the + * longest possible match is returned. + */ +class Tokenizer { +private: + CharReader &input; + const TokenTreeNode &root; + std::deque<Token> peeked; + unsigned int peekCursor = 0; + + bool prepare(); + +protected: + /** + * This method is an interface to build multiple tokens from a single one in + * derived classes. This might be interesting if you want to implement + * further logic on text tokens or similar applications. + * + * @param t a Token the "basic" tokenizer found. + * @param peeked a reference to the deque containing all temporary Tokens. + * You are supposed to append your tokens there. In the trivial case you just + * put the given Token on top of the deque. + * @return false if no token was appended to the deque (meaning that you want + * to ignore the given token explicitly) and true in all other cases. + */ + virtual bool doPrepare(const Token &t, std::deque<Token> &peeked); + +public: + /** + * @param input The input of a Tokenizer is given in the form of a + * CharReader. Please refer to the respective documentation. + * @param root This is meant to be the root of a TokenTree giving the + * specification of user-defined tokens this Tokenizer should recognize. + * The Tokenizer promises to not change the TokenTree such that you can + * re-use the same specification for multiple inputs. + * Please refer to the TokenTreeNode documentation for more information. + */ + Tokenizer(CharReader &input, const TokenTreeNode &root); + + /** + * The next method consumes one Token from the input stream and gives + * it to the user (stored in the input argument). + * + * @param t a Token reference that is set to the next found token. + * @return true if a next token was found and false if the input is at its + * end. + */ + bool next(Token &t); + /** + * The peek method does not consume the next Token but buffers it and + * shows it to the user (stored in the input argument). + * + * @param t a Token reference that is set to the next found token. + * @return true if a next token was found and false if the input is at its + * end. + */ + bool peek(Token &t); + + /** + * Resets the peek pointer to the current position in the stream (to the + * beginning of the buffer). + */ + void resetPeek(); + + /** + * Clears the peek buffer, such that all peeked Tokens are consumed. + */ + void consumePeek(); + + const CharReader &getInput() const { return input; } + + CharReader &getInput() { return input; } +}; +} + +#endif diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp deleted file mode 100644 index c46d9de..0000000 --- a/src/plugins/xml/XmlParser.cpp +++ /dev/null @@ -1,1435 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <iostream> -#include <map> -#include <sstream> -#include <vector> - -#include <expat.h> - -#include <core/common/CharReader.hpp> -#include <core/common/RttiBuilder.hpp> -#include <core/common/Utils.hpp> -#include <core/common/VariantReader.hpp> -#include <core/parser/ParserStack.hpp> -#include <core/parser/ParserScope.hpp> -#include <core/model/Document.hpp> -#include <core/model/Domain.hpp> -#include <core/model/Project.hpp> -#include <core/model/RootNode.hpp> -#include <core/model/Typesystem.hpp> - -#include "XmlParser.hpp" - -namespace ousia { - -/* HeadNode Helper class */ - -namespace { -class HeadNode : public Node { -public: - using Node::Node; -}; -} - -namespace RttiTypes { -static Rtti HeadNode = RttiBuilder<ousia::HeadNode>("HeadNode"); -} - -/* Element Handler Classes */ - -class DocumentHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - Rooted<Document> document = - project()->createDocument(args["name"].asString()); - document->setLocation(location()); - scope().push(document); - scope().setFlag(ParserFlag::POST_HEAD, false); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new DocumentHandler{handlerData}; - } -}; - -class DocumentField : public Node { -public: - DocumentField(Manager &mgr, std::string name, Handle<Node> parent) - : Node(mgr, name, parent) - { - } -}; - -namespace RttiTypes { -const Rtti DocumentField = - RttiBuilder<ousia::DocumentField>("DocumentField").parent(&Node); -} - -class DocumentChildHandler : public Handler { -public: - using Handler::Handler; - - void preamble(Handle<Node> parentNode, std::string &fieldName, - DocumentEntity *&parent, bool &inField) - { - // check if the parent in the structure tree was an explicit field - // reference. - inField = parentNode->isa(&RttiTypes::DocumentField); - if (inField) { - fieldName = parentNode->getName(); - parentNode = scope().selectOrThrow( - {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity}); - } else { - // if it wasn't an explicit reference, we use the default field. - fieldName = DEFAULT_FIELD_NAME; - } - // reference the parent entity explicitly. - parent = nullptr; - if (parentNode->isa(&RttiTypes::StructuredEntity)) { - parent = static_cast<DocumentEntity *>( - parentNode.cast<StructuredEntity>().get()); - } else if (parentNode->isa(&RttiTypes::AnnotationEntity)) { - parent = static_cast<DocumentEntity *>( - parentNode.cast<AnnotationEntity>().get()); - } - } - - void start(Variant::mapType &args) override - { - scope().setFlag(ParserFlag::POST_HEAD, true); - Rooted<Node> parentNode = scope().selectOrThrow( - {&RttiTypes::Document, &RttiTypes::StructuredEntity, - &RttiTypes::AnnotationEntity, &RttiTypes::DocumentField}); - - std::string fieldName; - DocumentEntity *parent; - bool inField; - - preamble(parentNode, fieldName, parent, inField); - - // try to find a FieldDescriptor for the given tag if we are not in a - // field already. - // TODO: Consider fields of transparent classes - if (!inField && parent != nullptr && - parent->getDescriptor()->hasField(name())) { - Rooted<DocumentField> field{new DocumentField( - parentNode->getManager(), fieldName, parentNode)}; - field->setLocation(location()); - scope().push(field); - return; - } - - // Otherwise create a new StructuredEntity - // TODO: Consider Anchors and AnnotationEntities - Rooted<StructuredClass> strct = scope().resolve<StructuredClass>( - Utils::split(name(), ':'), logger()); - if (strct == nullptr) { - // if we could not resolve the name, throw an exception. - throw LoggableException( - std::string("\"") + name() + "\" could not be resolved.", - location()); - } - - std::string name; - auto it = args.find("name"); - if (it != args.end()) { - name = it->second.asString(); - args.erase(it); - } - - Rooted<StructuredEntity> entity; - if (parentNode->isa(&RttiTypes::Document)) { - entity = parentNode.cast<Document>()->createRootStructuredEntity( - strct, args, name); - } else { - // calculate a path if transparent entities are needed in between. - auto path = parent->getDescriptor()->pathTo(strct); - if (path.empty()) { - throw LoggableException( - std::string("An instance of \"") + strct->getName() + - "\" is not allowed as child of an instance of \"" + - parent->getDescriptor()->getName() + "\"", - location()); - } - - // create all transparent entities until the last field. - for (size_t p = 1; p < path.size() - 1; p = p + 2) { - parent = static_cast<DocumentEntity *>( - parent->createChildStructuredEntity( - path[p].cast<StructuredClass>(), - Variant::mapType{}, path[p - 1]->getName(), - "").get()); - } - entity = parent->createChildStructuredEntity(strct, args, fieldName, - name); - } - entity->setLocation(location()); - scope().push(entity); - } - - void end() override { scope().pop(); } - - void data(const std::string &data, int fieldIdx) override - { - Rooted<Node> parentNode = scope().selectOrThrow( - {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity, - &RttiTypes::DocumentField}); - - std::string fieldName; - DocumentEntity *parent; - bool inField; - - preamble(parentNode, fieldName, parent, inField); - - // retrieve the correct FieldDescriptor. - // TODO: Consider fields of transparent classes - Rooted<Descriptor> desc = parent->getDescriptor(); - Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName); - if (field == nullptr) { - logger().error( - std::string("Can't handle data because no field with name \"") + - fieldName + "\" exists in descriptor\"" + desc->getName() + - "\".", - location()); - return; - } - if (!field->isPrimitive()) { - logger().error(std::string("Can't handle data because field \"") + - fieldName + "\" of descriptor \"" + - desc->getName() + "\" is not primitive!", - location()); - return; - } - - // try to parse the content. - auto res = VariantReader::parseGenericString( - data, logger(), location().getSourceId(), location().getStart()); - if (!res.first) { - return; - } - // try to convert it to the correct type. - if (!field->getPrimitiveType()->build(res.second, logger())) { - return; - } - // add it as primitive content. - parent->createChildDocumentPrimitive(res.second, fieldName); - } - - static Handler *create(const HandlerData &handlerData) - { - return new DocumentChildHandler{handlerData}; - } -}; - -class TypesystemHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - // Create the typesystem instance - Rooted<Typesystem> typesystem = - project()->createTypesystem(args["name"].asString()); - typesystem->setLocation(location()); - - // Push the typesystem onto the scope, set the POST_HEAD flag to true - scope().push(typesystem); - scope().setFlag(ParserFlag::POST_HEAD, false); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new TypesystemHandler{handlerData}; - } -}; - -class TypesystemEnumHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - scope().setFlag(ParserFlag::POST_HEAD, true); - - // Fetch the current typesystem and create the enum node - Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>(); - Rooted<EnumType> enumType = - typesystem->createEnumType(args["name"].asString()); - enumType->setLocation(location()); - - scope().push(enumType); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new TypesystemEnumHandler{handlerData}; - } -}; - -class TypesystemEnumEntryHandler : public Handler { -public: - using Handler::Handler; - - std::string entry; - - void start(Variant::mapType &args) override {} - - void end() override - { - Rooted<EnumType> enumType = scope().selectOrThrow<EnumType>(); - enumType->addEntry(entry, logger()); - } - - void data(const std::string &data, int field) override - { - if (field != 0) { - // TODO: This should be stored in the HandlerData - logger().error("Enum entry only has one field."); - return; - } - entry.append(data); - } - - static Handler *create(const HandlerData &handlerData) - { - return new TypesystemEnumEntryHandler{handlerData}; - } -}; - -class TypesystemStructHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - scope().setFlag(ParserFlag::POST_HEAD, true); - - // Fetch the arguments used for creating this type - const std::string &name = args["name"].asString(); - const std::string &parent = args["parent"].asString(); - - // Fetch the current typesystem and create the struct node - Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>(); - Rooted<StructType> structType = typesystem->createStructType(name); - structType->setLocation(location()); - - // Try to resolve the parent type and set it as parent structure - if (!parent.empty()) { - scope().resolve<StructType>( - parent, structType, logger(), - [](Handle<Node> parent, Handle<Node> structType, - Logger &logger) { - if (parent != nullptr) { - structType.cast<StructType>()->setParentStructure( - parent.cast<StructType>(), logger); - } - }); - } - scope().push(structType); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new TypesystemStructHandler{handlerData}; - } -}; - -class TypesystemStructFieldHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - // Read the argument values - const std::string &name = args["name"].asString(); - const std::string &type = args["type"].asString(); - const Variant &defaultValue = args["default"]; - const bool optional = - !(defaultValue.isObject() && defaultValue.asObject() == nullptr); - - Rooted<StructType> structType = scope().selectOrThrow<StructType>(); - Rooted<Attribute> attribute = - structType->createAttribute(name, defaultValue, optional, logger()); - attribute->setLocation(location()); - - // Try to resolve the type and default value - if (optional) { - scope().resolveTypeWithValue( - type, attribute, attribute->getDefaultValue(), logger(), - [](Handle<Node> type, Handle<Node> attribute, Logger &logger) { - if (type != nullptr) { - attribute.cast<Attribute>()->setType(type.cast<Type>(), - logger); - } - }); - } else { - scope().resolveType( - type, attribute, logger(), - [](Handle<Node> type, Handle<Node> attribute, Logger &logger) { - if (type != nullptr) { - attribute.cast<Attribute>()->setType(type.cast<Type>(), - logger); - } - }); - } - } - - void end() override {} - - static Handler *create(const HandlerData &handlerData) - { - return new TypesystemStructFieldHandler{handlerData}; - } -}; - -class TypesystemConstantHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - scope().setFlag(ParserFlag::POST_HEAD, true); - - // Read the argument values - const std::string &name = args["name"].asString(); - const std::string &type = args["type"].asString(); - const Variant &value = args["value"]; - - Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>(); - Rooted<Constant> constant = typesystem->createConstant(name, value); - constant->setLocation(location()); - - // Try to resolve the type - scope().resolveTypeWithValue( - type, constant, constant->getValue(), logger(), - [](Handle<Node> type, Handle<Node> constant, Logger &logger) { - if (type != nullptr) { - constant.cast<Constant>()->setType(type.cast<Type>(), - logger); - } - }); - } - - void end() override {} - - static Handler *create(const HandlerData &handlerData) - { - return new TypesystemConstantHandler{handlerData}; - } -}; - -/* - * Domain Handlers - */ - -class DomainHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - Rooted<Domain> domain = - project()->createDomain(args["name"].asString()); - domain->setLocation(location()); - - scope().push(domain); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new DomainHandler{handlerData}; - } -}; - -class DomainStructHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - scope().setFlag(ParserFlag::POST_HEAD, true); - - Rooted<Domain> domain = scope().selectOrThrow<Domain>(); - - Rooted<StructuredClass> structuredClass = domain->createStructuredClass( - args["name"].asString(), args["cardinality"].asCardinality(), - nullptr, args["transparent"].asBool(), args["isRoot"].asBool()); - structuredClass->setLocation(location()); - - const std::string &isa = args["isa"].asString(); - if (!isa.empty()) { - scope().resolve<StructuredClass>( - isa, structuredClass, logger(), - [](Handle<Node> superclass, Handle<Node> structuredClass, - Logger &logger) { - if (superclass != nullptr) { - structuredClass.cast<StructuredClass>()->setSuperclass( - superclass.cast<StructuredClass>(), logger); - } - }); - } - - scope().push(structuredClass); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new DomainStructHandler{handlerData}; - } -}; - -class DomainAnnotationHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - scope().setFlag(ParserFlag::POST_HEAD, true); - - Rooted<Domain> domain = scope().selectOrThrow<Domain>(); - - Rooted<AnnotationClass> annotationClass = - domain->createAnnotationClass(args["name"].asString()); - annotationClass->setLocation(location()); - - scope().push(annotationClass); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new DomainAnnotationHandler{handlerData}; - } -}; - -class DomainAttributesHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - // Fetch the current typesystem and create the struct node - Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); - - Rooted<StructType> attrDesc = parent->getAttributesDescriptor(); - attrDesc->setLocation(location()); - - scope().push(attrDesc); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new DomainAttributesHandler{handlerData}; - } -}; - -class DomainFieldHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - FieldDescriptor::FieldType type; - if (args["isSubtree"].asBool()) { - type = FieldDescriptor::FieldType::SUBTREE; - } else { - type = FieldDescriptor::FieldType::TREE; - } - - Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); - - Rooted<FieldDescriptor> field = parent->createFieldDescriptor( - type, args["name"].asString(), args["optional"].asBool()); - field->setLocation(location()); - - scope().push(field); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new DomainFieldHandler{handlerData}; - } -}; - -class DomainFieldRefHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); - - const std::string &name = args["name"].asString(); - scope().resolve<FieldDescriptor>( - name, parent, logger(), - [](Handle<Node> field, Handle<Node> parent, Logger &logger) { - if (field != nullptr) { - parent.cast<StructuredClass>()->addFieldDescriptor( - field.cast<FieldDescriptor>()); - } - }); - } - - void end() override {} - - static Handler *create(const HandlerData &handlerData) - { - return new DomainFieldRefHandler{handlerData}; - } -}; - -class DomainPrimitiveHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>(); - - Rooted<FieldDescriptor> field = parent->createPrimitiveFieldDescriptor( - nullptr, args["name"].asString(), args["optional"].asBool()); - field->setLocation(location()); - - const std::string &type = args["type"].asString(); - scope().resolve<Type>( - type, field, logger(), - [](Handle<Node> type, Handle<Node> field, Logger &logger) { - if (type != nullptr) { - field.cast<FieldDescriptor>()->setPrimitiveType( - type.cast<Type>()); - } - }); - - scope().push(field); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new DomainPrimitiveHandler{handlerData}; - } -}; - -class DomainChildHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - Rooted<FieldDescriptor> field = - scope().selectOrThrow<FieldDescriptor>(); - - const std::string &ref = args["ref"].asString(); - scope().resolve<StructuredClass>( - ref, field, logger(), - [](Handle<Node> child, Handle<Node> field, Logger &logger) { - if (child != nullptr) { - field.cast<FieldDescriptor>()->addChild( - child.cast<StructuredClass>()); - } - }); - } - - void end() override {} - - static Handler *create(const HandlerData &handlerData) - { - return new DomainChildHandler{handlerData}; - } -}; - -class DomainParent : public Node { -public: - DomainParent(Manager &mgr, std::string name, Handle<Node> parent) - : Node(mgr, name, parent) - { - } -}; - -namespace RttiTypes { -const Rtti DomainParent = - RttiBuilder<ousia::DomainParent>("DomainParent").parent(&Node); -} - -class DomainParentHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - Rooted<StructuredClass> strct = - scope().selectOrThrow<StructuredClass>(); - - Rooted<DomainParent> parent{new DomainParent( - strct->getManager(), args["name"].asString(), strct)}; - parent->setLocation(location()); - scope().push(parent); - } - - void end() override { scope().pop(); } - - static Handler *create(const HandlerData &handlerData) - { - return new DomainParentHandler{handlerData}; - } -}; - -class DomainParentFieldHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - Rooted<DomainParent> parentNameNode = - scope().selectOrThrow<DomainParent>(); - FieldDescriptor::FieldType type; - if (args["isSubtree"].asBool()) { - type = FieldDescriptor::FieldType::SUBTREE; - } else { - type = FieldDescriptor::FieldType::TREE; - } - - const std::string &name = args["name"].asString(); - const bool optional = args["optional"].asBool(); - Rooted<StructuredClass> strct = - parentNameNode->getParent().cast<StructuredClass>(); - - // resolve the parent, create the declared field and add the declared - // StructuredClass as child to it. - scope().resolve<Descriptor>( - parentNameNode->getName(), strct, logger(), - [type, name, optional](Handle<Node> parent, Handle<Node> strct, - Logger &logger) { - if (parent != nullptr) { - Rooted<FieldDescriptor> field = - parent.cast<Descriptor>()->createFieldDescriptor( - type, name, optional); - field->addChild(strct.cast<StructuredClass>()); - } - }); - } - - void end() override {} - - static Handler *create(const HandlerData &handlerData) - { - return new DomainParentFieldHandler{handlerData}; - } -}; - -class DomainParentFieldRefHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override - { - Rooted<DomainParent> parentNameNode = - scope().selectOrThrow<DomainParent>(); - - const std::string &name = args["name"].asString(); - Rooted<StructuredClass> strct = - parentNameNode->getParent().cast<StructuredClass>(); - auto loc = location(); - - // resolve the parent, get the referenced field and add the declared - // StructuredClass as child to it. - scope().resolve<Descriptor>(parentNameNode->getName(), strct, logger(), - [name, loc](Handle<Node> parent, - Handle<Node> strct, - Logger &logger) { - if (parent != nullptr) { - auto res = parent.cast<Descriptor>()->resolve( - &RttiTypes::FieldDescriptor, name); - if (res.size() != 1) { - logger.error( - std::string("Could not find referenced field ") + name, - loc); - return; - } - Rooted<FieldDescriptor> field = - res[0].node.cast<FieldDescriptor>(); - field->addChild(strct.cast<StructuredClass>()); - } - }); - } - - void end() override {} - - static Handler *create(const HandlerData &handlerData) - { - return new DomainParentFieldRefHandler{handlerData}; - } -}; - -/* - * Import and Include Handler - */ - -class ImportIncludeHandler : public Handler { -public: - using Handler::Handler; - - bool srcInArgs = false; - std::string rel; - std::string type; - std::string src; - - void start(Variant::mapType &args) override - { - rel = args["rel"].asString(); - type = args["type"].asString(); - src = args["src"].asString(); - srcInArgs = !src.empty(); - } - - void data(const std::string &data, int field) override - { - if (srcInArgs) { - logger().error("\"src\" attribute has already been set"); - return; - } - if (field != 0) { - logger().error("Command has only one field."); - return; - } - src.append(data); - } -}; - -class ImportHandler : public ImportIncludeHandler { -public: - using ImportIncludeHandler::ImportIncludeHandler; - - void start(Variant::mapType &args) override - { - ImportIncludeHandler::start(args); - - // Make sure imports are still possible - if (scope().getFlag(ParserFlag::POST_HEAD)) { - logger().error("Imports must be listed before other commands.", - location()); - return; - } - } - - void end() override - { - // Fetch the last node and check whether an import is valid at this - // position - Rooted<Node> leaf = scope().getLeaf(); - if (leaf == nullptr || !leaf->isa(&RttiTypes::RootNode)) { - logger().error( - "Import not supported here, must be inside a document, domain " - "or typesystem command.", - location()); - return; - } - Rooted<RootNode> leafRootNode = leaf.cast<RootNode>(); - - // Perform the actual import, register the imported node within the leaf - // node - Rooted<Node> imported = - context().import(src, type, rel, leafRootNode->getReferenceTypes()); - if (imported != nullptr) { - leafRootNode->reference(imported); - } - } - - static Handler *create(const HandlerData &handlerData) - { - return new ImportHandler{handlerData}; - } -}; - -class IncludeHandler : public ImportIncludeHandler { -public: - using ImportIncludeHandler::ImportIncludeHandler; - - void start(Variant::mapType &args) override - { - ImportIncludeHandler::start(args); - } - - void end() override - { - context().include(src, type, rel, {&RttiTypes::Node}); - } - - static Handler *create(const HandlerData &handlerData) - { - return new IncludeHandler{handlerData}; - } -}; - -namespace ParserStates { -/* Document states */ -static const ParserState Document = - ParserStateBuilder() - .parent(&None) - .createdNodeType(&RttiTypes::Document) - .elementHandler(DocumentHandler::create) - .arguments({Argument::String("name", "")}); - -static const ParserState DocumentChild = - ParserStateBuilder() - .parents({&Document, &DocumentChild}) - .createdNodeTypes({&RttiTypes::StructureNode, - &RttiTypes::AnnotationEntity, - &RttiTypes::DocumentField}) - .elementHandler(DocumentChildHandler::create); - -/* Domain states */ -static const ParserState Domain = ParserStateBuilder() - .parents({&None, &Document}) - .createdNodeType(&RttiTypes::Domain) - .elementHandler(DomainHandler::create) - .arguments({Argument::String("name")}); - -static const ParserState DomainStruct = - ParserStateBuilder() - .parent(&Domain) - .createdNodeType(&RttiTypes::StructuredClass) - .elementHandler(DomainStructHandler::create) - .arguments({Argument::String("name"), - Argument::Cardinality("cardinality", Cardinality::any()), - Argument::Bool("isRoot", false), - Argument::Bool("transparent", false), - Argument::String("isa", "")}); - -static const ParserState DomainAnnotation = - ParserStateBuilder() - .parent(&Domain) - .createdNodeType(&RttiTypes::AnnotationClass) - .elementHandler(DomainAnnotationHandler::create) - .arguments({Argument::String("name")}); - -static const ParserState DomainAttributes = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::StructType) - .elementHandler(DomainAttributesHandler::create) - .arguments({}); - -static const ParserState DomainAttribute = - ParserStateBuilder() - .parent(&DomainAttributes) - .elementHandler(TypesystemStructFieldHandler::create) - .arguments({Argument::String("name"), Argument::String("type"), - Argument::Any("default", Variant::fromObject(nullptr))}); - -static const ParserState DomainField = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainFieldHandler::create) - .arguments({Argument::String("name", DEFAULT_FIELD_NAME), - Argument::Bool("isSubtree", false), - Argument::Bool("optional", false)}); - -static const ParserState DomainFieldRef = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainFieldRefHandler::create) - .arguments({Argument::String("name", DEFAULT_FIELD_NAME)}); - -static const ParserState DomainStructPrimitive = - ParserStateBuilder() - .parents({&DomainStruct, &DomainAnnotation}) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainPrimitiveHandler::create) - .arguments({Argument::String("name", DEFAULT_FIELD_NAME), - Argument::Bool("optional", false), - Argument::String("type")}); - -static const ParserState DomainStructChild = - ParserStateBuilder() - .parent(&DomainField) - .elementHandler(DomainChildHandler::create) - .arguments({Argument::String("ref")}); - -static const ParserState DomainStructParent = - ParserStateBuilder() - .parent(&DomainStruct) - .createdNodeType(&RttiTypes::DomainParent) - .elementHandler(DomainParentHandler::create) - .arguments({Argument::String("name")}); - -static const ParserState DomainStructParentField = - ParserStateBuilder() - .parent(&DomainStructParent) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainParentFieldHandler::create) - .arguments({Argument::String("name", DEFAULT_FIELD_NAME), - Argument::Bool("isSubtree", false), - Argument::Bool("optional", false)}); - -static const ParserState DomainStructParentFieldRef = - ParserStateBuilder() - .parent(&DomainStructParent) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(DomainParentFieldRefHandler::create) - .arguments({Argument::String("name", DEFAULT_FIELD_NAME)}); - -/* Typesystem states */ -static const ParserState Typesystem = - ParserStateBuilder() - .parents({&None, &Domain}) - .createdNodeType(&RttiTypes::Typesystem) - .elementHandler(TypesystemHandler::create) - .arguments({Argument::String("name", "")}); - -static const ParserState TypesystemEnum = - ParserStateBuilder() - .parent(&Typesystem) - .createdNodeType(&RttiTypes::EnumType) - .elementHandler(TypesystemEnumHandler::create) - .arguments({Argument::String("name")}); - -static const ParserState TypesystemEnumEntry = - ParserStateBuilder() - .parent(&TypesystemEnum) - .elementHandler(TypesystemEnumEntryHandler::create) - .arguments({}); - -static const ParserState TypesystemStruct = - ParserStateBuilder() - .parent(&Typesystem) - .createdNodeType(&RttiTypes::StructType) - .elementHandler(TypesystemStructHandler::create) - .arguments({Argument::String("name"), Argument::String("parent", "")}); - -static const ParserState TypesystemStructField = - ParserStateBuilder() - .parent(&TypesystemStruct) - .elementHandler(TypesystemStructFieldHandler::create) - .arguments({Argument::String("name"), Argument::String("type"), - Argument::Any("default", Variant::fromObject(nullptr))}); - -static const ParserState TypesystemConstant = - ParserStateBuilder() - .parent(&Typesystem) - .createdNodeType(&RttiTypes::Constant) - .elementHandler(TypesystemConstantHandler::create) - .arguments({Argument::String("name"), Argument::String("type"), - Argument::Any("value")}); - -/* Special states for import and include */ -static const ParserState Import = - ParserStateBuilder() - .parents({&Document, &Typesystem, &Domain}) - .elementHandler(ImportHandler::create) - .arguments({Argument::String("rel", ""), Argument::String("type", ""), - Argument::String("src", "")}); - -static const ParserState Include = - ParserStateBuilder() - .parent(&All) - .elementHandler(IncludeHandler::create) - .arguments({Argument::String("rel", ""), Argument::String("type", ""), - Argument::String("src", "")}); - -static const std::multimap<std::string, const ParserState *> XmlStates{ - {"document", &Document}, - {"*", &DocumentChild}, - {"domain", &Domain}, - {"struct", &DomainStruct}, - {"annotation", &DomainAnnotation}, - {"attributes", &DomainAttributes}, - {"attribute", &DomainAttribute}, - {"field", &DomainField}, - {"fieldRef", &DomainFieldRef}, - {"primitive", &DomainStructPrimitive}, - {"child", &DomainStructChild}, - {"parent", &DomainStructParent}, - {"field", &DomainStructParentField}, - {"fieldRef", &DomainStructParentFieldRef}, - {"typesystem", &Typesystem}, - {"enum", &TypesystemEnum}, - {"entry", &TypesystemEnumEntry}, - {"struct", &TypesystemStruct}, - {"field", &TypesystemStructField}, - {"constant", &TypesystemConstant}, - {"import", &Import}, - {"include", &Include}}; -} - -/** - * Structue containing the private data that is being passed to the - * XML-Handlers. - */ -struct XMLUserData { - /** - * Containing the depth of the current XML file - */ - size_t depth; - - /** - * Reference at the ParserStack instance. - */ - ParserStack *stack; - - /** - * Reference at the CharReader instance. - */ - CharReader *reader; - - /** - * Constructor of the XMLUserData struct. - * - * @param stack is a pointer at the ParserStack instance. - * @param reader is a pointer at the CharReader instance. - */ - XMLUserData(ParserStack *stack, CharReader *reader) - : depth(0), stack(stack), reader(reader) - { - } -}; - -/** - * Wrapper class around the XML_Parser pointer which safely frees it whenever - * the scope is left (e.g. because an exception was thrown). - */ -class ScopedExpatXmlParser { -private: - /** - * Internal pointer to the XML_Parser instance. - */ - XML_Parser parser; - -public: - /** - * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS - * from the expat library. Throws a parser exception if the XML parser - * cannot be initialized. - * - * @param encoding is the protocol-defined encoding passed to expat (or - * nullptr if expat should determine the encoding by itself). - */ - ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) - { - parser = XML_ParserCreate(encoding); - if (!parser) { - throw LoggableException{ - "Internal error: Could not create expat XML parser!"}; - } - } - - /** - * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance. - */ - ~ScopedExpatXmlParser() - { - if (parser) { - XML_ParserFree(parser); - parser = nullptr; - } - } - - /** - * Returns the XML_Parser pointer. - */ - XML_Parser operator&() { return parser; } -}; - -/* Adapter Expat -> ParserStack */ - -static SourceLocation syncLoggerPosition(XML_Parser p, size_t len = 0) -{ - // Fetch the parser stack and the associated user data - XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); - ParserStack *stack = userData->stack; - - // Fetch the current location in the XML file - size_t offs = XML_GetCurrentByteIndex(p); - - // Build the source location and update the default location of the - // current - // logger instance - SourceLocation loc{stack->getContext().getSourceId(), offs, offs + len}; - stack->getContext().getLogger().setDefaultLocation(loc); - return loc; -} - -enum class XMLAttributeState { - IN_TAG_NAME, - SEARCH_ATTR, - IN_ATTR_NAME, - HAS_ATTR_NAME, - HAS_ATTR_EQUALS, - IN_ATTR_DATA -}; - -static std::map<std::string, SourceLocation> reconstructXMLAttributeOffsets( - CharReader &reader, SourceLocation location) -{ - std::map<std::string, SourceLocation> res; - - // Fork the reader, we don't want to mess up the XML parsing process, do we? - CharReaderFork readerFork = reader.fork(); - - // Move the read cursor to the start location, abort if this does not work - size_t offs = location.getStart(); - if (!location.isValid() || offs != readerFork.seek(offs)) { - return res; - } - - // Now all we need to do is to implement one half of an XML parser. As this - // is inherently complicated we'll totaly fail at it. Don't care. All we - // want to get is those darn offsets for pretty error messages... (and we - // can assume the XML is valid as it was already read by expat) - XMLAttributeState state = XMLAttributeState::IN_TAG_NAME; - char c; - std::stringstream attrName; - while (readerFork.read(c)) { - // Abort at the end of the tag - if (c == '>' && state != XMLAttributeState::IN_ATTR_DATA) { - return res; - } - - // One state machine to rule them all, one state machine to find them, - // One state machine to bring them all and in the darkness bind them - // (the byte offsets) - switch (state) { - case XMLAttributeState::IN_TAG_NAME: - if (Utils::isWhitespace(c)) { - state = XMLAttributeState::SEARCH_ATTR; - } - break; - case XMLAttributeState::SEARCH_ATTR: - if (!Utils::isWhitespace(c)) { - state = XMLAttributeState::IN_ATTR_NAME; - attrName << c; - } - break; - case XMLAttributeState::IN_ATTR_NAME: - if (Utils::isWhitespace(c)) { - state = XMLAttributeState::HAS_ATTR_NAME; - } else if (c == '=') { - state = XMLAttributeState::HAS_ATTR_EQUALS; - } else { - attrName << c; - } - break; - case XMLAttributeState::HAS_ATTR_NAME: - if (!Utils::isWhitespace(c)) { - if (c == '=') { - state = XMLAttributeState::HAS_ATTR_EQUALS; - break; - } - // Well, this is a strange XML file... We expected to - // see a '=' here! Try to continue with the - // "HAS_ATTR_EQUALS" state as this state will hopefully - // inlcude some error recovery - } else { - // Skip whitespace here - break; - } - // Fallthrough - case XMLAttributeState::HAS_ATTR_EQUALS: - if (!Utils::isWhitespace(c)) { - if (c == '"') { - // Here we are! We have found the beginning of an - // attribute. Let's quickly lock the current offset away - // in the result map - res.emplace(attrName.str(), - SourceLocation{reader.getSourceId(), - readerFork.getOffset()}); - attrName.str(std::string{}); - state = XMLAttributeState::IN_ATTR_DATA; - } else { - // No, this XML file is not well formed. Assume we're in - // an attribute name once again - attrName.str(std::string{&c, 1}); - state = XMLAttributeState::IN_ATTR_NAME; - } - } - break; - case XMLAttributeState::IN_ATTR_DATA: - if (c == '"') { - // We're at the end of the attribute data, start anew - state = XMLAttributeState::SEARCH_ATTR; - } - break; - } - } - return res; -} - -static void xmlStartElementHandler(void *p, const XML_Char *name, - const XML_Char **attrs) -{ - XML_Parser parser = static_cast<XML_Parser>(p); - XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); - ParserStack *stack = userData->stack; - - SourceLocation loc = syncLoggerPosition(parser); - - // Read the argument locations -- this is only a stupid and slow hack, - // but it is necessary, as expat doesn't give use the byte offset of the - // arguments. - std::map<std::string, SourceLocation> offs = - reconstructXMLAttributeOffsets(*userData->reader, loc); - - // Assemble the arguments - Variant::mapType args; - - const XML_Char **attr = attrs; - while (*attr) { - // Convert the C string to a std::string - const std::string key{*(attr++)}; - - // Search the location of the key - SourceLocation keyLoc; - auto it = offs.find(key); - if (it != offs.end()) { - keyLoc = it->second; - } - - // Parse the string, pass the location of the key - std::pair<bool, Variant> value = VariantReader::parseGenericString( - *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(), - keyLoc.getStart()); - args.emplace(key, value.second); - } - - // Call the start function - std::string nameStr(name); - if (nameStr != "ousia" || userData->depth > 0) { - stack->start(std::string(name), args, loc); - } - - // Increment the current depth - userData->depth++; -} - -static void xmlEndElementHandler(void *p, const XML_Char *name) -{ - XML_Parser parser = static_cast<XML_Parser>(p); - XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); - ParserStack *stack = userData->stack; - - syncLoggerPosition(parser); - - // Decrement the current depth - userData->depth--; - - // Call the end function - std::string nameStr(name); - if (nameStr != "ousia" || userData->depth > 0) { - stack->end(); - } -} - -static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len) -{ - XML_Parser parser = static_cast<XML_Parser>(p); - XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p)); - ParserStack *stack = userData->stack; - - size_t ulen = len > 0 ? static_cast<size_t>(len) : 0; - syncLoggerPosition(parser, ulen); - const std::string data = Utils::trim(std::string{s, ulen}); - if (!data.empty()) { - stack->data(data); - } -} - -/* Class XmlParser */ - -void XmlParser::doParse(CharReader &reader, ParserContext &ctx) -{ - // Create the parser object - ScopedExpatXmlParser p{"UTF-8"}; - - // Create the parser stack instance, if we're starting on a non-empty scope, - // try to deduce the parser state - ParserStack stack(ctx, ParserStates::XmlStates); - if (!ctx.getScope().isEmpty()) { - if (!stack.deduceState()) { - return; - } - } - - // Pass the reference to the ParserStack to the XML handler - XMLUserData data(&stack, &reader); - XML_SetUserData(&p, &data); - XML_UseParserAsHandlerArg(&p); - - // Set the callback functions - XML_SetStartElementHandler(&p, xmlStartElementHandler); - XML_SetEndElementHandler(&p, xmlEndElementHandler); - XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler); - - // Feed data into expat while there is data to process - constexpr size_t BUFFER_SIZE = 64 * 1024; - while (true) { - // Fetch a buffer from expat for the input data - char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE)); - if (!buf) { - throw LoggableException{ - "Internal error: XML parser out of memory!"}; - } - - // Read into the buffer - size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE); - - // Parse the data and handle any XML error - if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { - // Fetch the xml parser byte offset - size_t offs = XML_GetCurrentByteIndex(&p); - - // Throw a corresponding exception - XML_Error code = XML_GetErrorCode(&p); - std::string msg = std::string{XML_ErrorString(code)}; - throw LoggableException{"XML: " + msg, - SourceLocation{ctx.getSourceId(), offs}}; - } - - // Abort once there are no more bytes in the stream - if (bytesRead == 0) { - break; - } - } -} -} - diff --git a/src/plugins/xml/XmlParser.hpp b/src/plugins/xml/XmlParser.hpp deleted file mode 100644 index c8b6302..0000000 --- a/src/plugins/xml/XmlParser.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file XmlParser.hpp - * - * Contains the parser responsible for reading Ousía XML Documents (extension - * oxd) and Ousía XML Modules (extension oxm). - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_XML_PARSER_HPP_ -#define _OUSIA_XML_PARSER_HPP_ - -#include <core/parser/Parser.hpp> - -namespace ousia { - -/** - * The XmlParser class implements parsing the various types of Ousía XML - * documents using the expat stream XML parser. - */ -class XmlParser : public Parser { -protected: - /** - * Parses the given input stream as XML file and returns the parsed - * top-level node. - * - * @param reader is the CharReader from which the input should be read. - * @param ctx is a reference to the ParserContext instance that should be - * used. - */ - void doParse(CharReader &reader, ParserContext &ctx) override; -}; - -} - -#endif /* _OUSIA_XML_PARSER_HPP_ */ - |