From efe60ac3c3a8725ac71329c0bb19fa9d9c58f399 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:42:05 +0100 Subject: Moved specific file format parsers to formats/ folder, moved old tokenizer to css code (this is the only place where it is actually used) --- src/core/CodeTokenizer.cpp | 169 ------------- src/core/CodeTokenizer.hpp | 136 ---------- src/core/Tokenizer.cpp | 204 --------------- src/core/Tokenizer.hpp | 227 ----------------- src/core/parser/ParserStack.cpp | 216 ---------------- src/core/parser/ParserStack.hpp | 361 --------------------------- src/core/parser/ParserState.cpp | 161 ------------ src/core/parser/ParserState.hpp | 284 --------------------- src/core/parser/generic/ParserState.cpp | 161 ++++++++++++ src/core/parser/generic/ParserState.hpp | 284 +++++++++++++++++++++ src/core/parser/generic/ParserStateStack.cpp | 216 ++++++++++++++++ src/core/parser/generic/ParserStateStack.hpp | 361 +++++++++++++++++++++++++++ 12 files changed, 1022 insertions(+), 1758 deletions(-) delete mode 100644 src/core/CodeTokenizer.cpp delete mode 100644 src/core/CodeTokenizer.hpp delete mode 100644 src/core/Tokenizer.cpp delete mode 100644 src/core/Tokenizer.hpp delete mode 100644 src/core/parser/ParserStack.cpp delete mode 100644 src/core/parser/ParserStack.hpp delete mode 100644 src/core/parser/ParserState.cpp delete mode 100644 src/core/parser/ParserState.hpp create mode 100644 src/core/parser/generic/ParserState.cpp create mode 100644 src/core/parser/generic/ParserState.hpp create mode 100644 src/core/parser/generic/ParserStateStack.cpp create mode 100644 src/core/parser/generic/ParserStateStack.hpp (limited to 'src/core') diff --git a/src/core/CodeTokenizer.cpp b/src/core/CodeTokenizer.cpp deleted file mode 100644 index d65c514..0000000 --- a/src/core/CodeTokenizer.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "CodeTokenizer.hpp" - -namespace ousia { - -Token CodeTokenizer::constructToken(const Token &t) -{ - std::string content = buf.str(); - buf.str(std::string()); - return Token{ - returnTokenId, content, - SourceLocation{t.location.getSourceId(), startToken.location.getStart(), - t.location.getEnd()}}; -} - -void CodeTokenizer::buffer(const Token &t) { buf << t.content; } - -bool CodeTokenizer::doPrepare(const Token &t, std::deque &peeked) -{ - auto it = descriptors.find(t.tokenId); - CodeTokenMode mode = CodeTokenMode::NONE; - if (it != descriptors.end()) { - mode = it->second.mode; - } - - switch (state) { - case CodeTokenizerState::NORMAL: - switch (mode) { - case CodeTokenMode::STRING_START_END: - state = CodeTokenizerState::IN_STRING; - break; - case CodeTokenMode::BLOCK_COMMENT_START: - state = CodeTokenizerState::IN_BLOCK_COMMENT; - break; - case CodeTokenMode::LINE_COMMENT: - state = CodeTokenizerState::IN_LINE_COMMENT; - break; - case CodeTokenMode::LINEBREAK: - if (!ignoreLinebreaks) { - peeked.push_back( - {it->second.id, t.content, t.location}); - } - return !ignoreLinebreaks; - default: - bool empty = true; - if (t.tokenId == TOKEN_TEXT) { - int begin = -1; - for (size_t c = 0; c < t.content.length(); c++) { - bool isWhitespace = - t.content[c] == ' ' || t.content[c] == '\t'; - if (begin < 0) { - // if we have not yet set our beginning, - // we wait for the first - // non-whitespace-character to set it. - if (!isWhitespace) { - begin = c; - } - } else { - // if we have set our beginning, we wait for the - // first whitespace character, which marks the - // end of the current word. - if (isWhitespace) { - peeked.push_back(Token{ - TOKEN_TEXT, - t.content.substr(begin, (int)c - begin), - SourceLocation{ - t.location.getSourceId(), - t.location.getStart() + begin, - t.location.getStart() + c}}); - begin = -1; - empty = false; - } - } - } - if (begin >= 0) { - peeked.push_back(Token{ - TOKEN_TEXT, t.content.substr(begin), - SourceLocation{t.location.getSourceId(), - t.location.getStart() + begin, - t.location.getEnd()}}); - empty = false; - } - } else { - empty = false; - peeked.push_back(t); - } - return !empty; - } - startToken = t; - returnTokenId = it->second.id; - return false; - case CodeTokenizerState::IN_LINE_COMMENT: - switch (mode) { - case CodeTokenMode::LINEBREAK: - state = CodeTokenizerState::NORMAL; - if (!ignoreComments) { - peeked.push_back(constructToken(t)); - } - return !ignoreComments; - default: - if (!ignoreComments) { - buffer(t); - } - return false; - } - case CodeTokenizerState::IN_BLOCK_COMMENT: - switch (mode) { - case CodeTokenMode::BLOCK_COMMENT_END: - state = CodeTokenizerState::NORMAL; - if (!ignoreComments) { - peeked.push_back(constructToken(t)); - } - return !ignoreComments; - default: - if (!ignoreComments) { - buffer(t); - } - return false; - } - case CodeTokenizerState::IN_STRING: - switch (mode) { - case CodeTokenMode::ESCAPE: - if (escaped) { - buffer(t); - } - escaped = !escaped; - return false; - case CodeTokenMode::STRING_START_END: - if (escaped) { - buffer(t); - escaped = false; - return false; - } else { - peeked.push_back(constructToken(t)); - state = CodeTokenizerState::NORMAL; - return true; - } - default: - if (escaped) { - // TODO: handle escaped characters? - escaped = false; - } - buffer(t); - return false; - } - } - assert(false); - return false; -} -} diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp deleted file mode 100644 index 154f949..0000000 --- a/src/core/CodeTokenizer.hpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file CodeTokenizer.hpp - - * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) - */ -#ifndef _OUSIA_CODE_TOKENIZER_HPP_ -#define _OUSIA_CODE_TOKENIZER_HPP_ - -#include -#include - -#include -#include "Tokenizer.hpp" - -namespace ousia { - -/* - * This enum contains all special Token the CodeTokenizer supports, namely: - * - * 1.) An ambigous Tokens - in post programming languages single-quotes ' or - * double-quotes " - to delimit string tokens. - * 2.) A start token for line comments, which would e.g. be // in Java. - * 3.) A start token for a block comment - * 4.) An end token for a block comment. - * 5.) A linebreak token - * 6.) The escape token, which would e.g. be \ in java. - */ -enum class CodeTokenMode { - STRING_START_END, - LINE_COMMENT, - BLOCK_COMMENT_START, - BLOCK_COMMENT_END, - LINEBREAK, - ESCAPE, - NONE -}; - -/** - * A CodeTokenDescriptor defines the id the user likes to have returned for - * a Token of the mode specified, e.g. if you want to get the id 4 for a - * String Token the corresponding CodeTokenDescriptor would be inizialized - * with CodeTokenDescriptor myDesc {CodeTokenMode::STRING_START_END, 4}; - */ -struct CodeTokenDescriptor { - CodeTokenMode mode; - int id; - - CodeTokenDescriptor(CodeTokenMode mode, int id) : mode(mode), id(id) {} -}; - -/** - * The CodeTokenizer is a finite state machine with the states NORMAL, being - * IN_BLOCK_COMMENT, being IN_LINE_COMMENT or being IN_STRING. - */ -enum class CodeTokenizerState { - NORMAL, - IN_BLOCK_COMMENT, - IN_LINE_COMMENT, - IN_STRING -}; - -/** - * The purpose of a CodeTokenizer is to make it easier to parse classical - * programming Code. It adds the following features to a regular Tokenizer: - * 1.) String tokens (e.g. "string" in Java Code) instead of 3 separate tokens - * for the opening delimiter, the text and the closing delimiter. - * 2.) Escaping in String tokens. - * 3.) Comment Tokens (for line comments as well as block comments) - */ -class CodeTokenizer : public Tokenizer { -private: - std::map descriptors; - CodeTokenizerState state; - std::stringstream buf; - Token startToken; - int returnTokenId; - bool escaped = false; - - Token constructToken(const Token &t); - void buffer(const Token &t); - -protected: - bool doPrepare(const Token &t, std::deque &peeked) override; - -public: - /** - * If you do not want comment tokens to be returned you can set this to - * true. - */ - bool ignoreComments = false; - /** - * If you do not want linebreaks to be returned you can set this to true. - */ - bool ignoreLinebreaks = false; - - /** - * - * @param input a CharReader containing the input for this tokenizer, as - * with a regular tokenizer. - * @param root a TokenTreeNode representing the root of the TokenTree. - * Please note that you have to specify all tokenIDs here that you use - * in the descriptors map. - * @param descriptors a map mapping tokenIDs to CodeTokenDescriptors. - * In this way you can specify the meaning of certain Tokens. Say you - * specified the Token "//" with the id 1 in the TokenTree. Then you could - * add the entry "1" with the Mode "LINE_COMMENT" to the descriptors map - * and this CodeTokenizer would recognize the token "//" as starting a - * line comment. - */ - CodeTokenizer(CharReader &input, const TokenTreeNode &root, - std::map descriptors) - : Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL) - { - } -}; -} - -#endif diff --git a/src/core/Tokenizer.cpp b/src/core/Tokenizer.cpp deleted file mode 100644 index ab4735a..0000000 --- a/src/core/Tokenizer.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "Tokenizer.hpp" - -namespace ousia { - -static std::map buildChildren( - const std::map &inputs) -{ - std::map children; - std::map> nexts; - - for (auto &e : inputs) { - const std::string &s = e.first; - const int id = e.second; - if (s.empty()) { - continue; - } - char start = s[0]; - const std::string suffix = s.substr(1); - if (nexts.find(start) != nexts.end()) { - nexts[start].insert(std::make_pair(suffix, id)); - } else { - nexts.insert(std::make_pair( - start, std::map{{suffix, id}})); - } - } - - for (auto &n : nexts) { - children.insert(std::make_pair(n.first, TokenTreeNode{n.second})); - } - - return children; -} - -static int buildId(const std::map &inputs) -{ - int tokenId = TOKEN_NONE; - for (auto &e : inputs) { - if (e.first.empty()) { - if (tokenId != TOKEN_NONE) { - throw TokenizerException{std::string{"Ambigous token found: "} + - std::to_string(e.second)}; - } else { - tokenId = e.second; - } - } - } - return tokenId; -} - -TokenTreeNode::TokenTreeNode(const std::map &inputs) - : children(buildChildren(inputs)), tokenId(buildId(inputs)) -{ -} - -Tokenizer::Tokenizer(CharReader &input, const TokenTreeNode &root) - : input(input), root(root) -{ -} - -bool Tokenizer::prepare() -{ - std::stringstream buffer; - char c; - SourcePosition start = input.getOffset(); - bool bufEmpty = true; - while (input.peek(c)) { - if (root.children.find(c) != root.children.end()) { - // if there might be a special token, keep peeking forward - // until we find the token (or we don't). - TokenTreeNode const *n = &root; - std::stringstream tBuf; - int match = TOKEN_NONE; - while (true) { - tBuf << c; - n = &(n->children.at(c)); - if (n->tokenId != TOKEN_NONE) { - match = n->tokenId; - // from here on we found a token. If we have something - // in our buffer already, we end the search now. - if (!bufEmpty) { - break; - } else { - // if we want to return this token ( = we have nothing - // in our buffer yet) we look greedily for the longest - // possible token we can construct. - input.consumePeek(); - } - } - if (!input.peek(c)) { - // if we are at the end we break off the search. - break; - } - if (n->children.find(c) == n->children.end()) { - // if we do not find a possible continuation anymore, - // break off the search. - break; - } - } - //reset the peek pointer to the last valid position. - input.resetPeek(); - // check if we did indeed find a special token. - if (match != TOKEN_NONE) { - if (bufEmpty) { - // if we did not have text before, construct that token. - if (doPrepare( - Token{match, tBuf.str(), input.getLocation(start)}, - peeked)) { - return true; - } else { - start = input.getOffset(); - continue; - } - } else { - // otherwise we return the text before the token. - if (doPrepare(Token{TOKEN_TEXT, buffer.str(), input.getLocation(start)}, - peeked)) { - return true; - } else{ - //we need to clear the buffer here. After all the token - //corresponding to this buffer segment is already - //constructed. - buffer.str(std::string()); - bufEmpty = true; - start = input.getOffset(); - continue; - } - } - } else{ - //if we found nothing, read at least one character. - input.peek(c); - } - } - buffer << c; - bufEmpty = false; - input.consumePeek(); - } - if (!bufEmpty) { - return doPrepare(Token{TOKEN_TEXT, buffer.str(), input.getLocation(start)}, - peeked); - } - return false; -} - -bool Tokenizer::doPrepare(const Token &t, std::deque &peeked) -{ - peeked.push_back(t); - return true; -} - -bool Tokenizer::next(Token &t) -{ - if (peeked.empty()) { - if (!prepare()) { - return false; - } - } - t = peeked.front(); - peeked.pop_front(); - resetPeek(); - return true; -} - -bool Tokenizer::peek(Token &t) -{ - if (peekCursor >= peeked.size()) { - if (!prepare()) { - return false; - } - } - t = peeked[peekCursor]; - peekCursor++; - return true; -} - -void Tokenizer::resetPeek() { peekCursor = 0; } - -void Tokenizer::consumePeek() -{ - while (peekCursor > 0) { - peeked.pop_front(); - peekCursor--; - } -} -} diff --git a/src/core/Tokenizer.hpp b/src/core/Tokenizer.hpp deleted file mode 100644 index 50e458c..0000000 --- a/src/core/Tokenizer.hpp +++ /dev/null @@ -1,227 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_TOKENIZER_HPP_ -#define _OUSIA_TOKENIZER_HPP_ - -#include -#include -#include -#include - -#include - -namespace ousia { - -/** - * This exception is currently only thrown if errors are made during the - * initialization of the Tokenizer. Have a closer look at the documentation - * of the TokenTreeNode constructor for more information. - */ -class TokenizerException : public std::exception { -public: - const std::string msg; - - TokenizerException(const std::string &msg) : msg(msg){}; - - virtual const char *what() const noexcept override { return msg.c_str(); } -}; - -/** - * The Tokenizer internally uses a TokenTree to be efficiently able to identify - * the longest consecutive token in the text. This is equivalent to a prefix - * trie. - * - * The TokenTree is a construct that structures all special tokens this - * Tokenizer recognizes. Consider the Tokens "aab", "a" and "aac". Then - * the TokenTree would look like this: - * - * a - * | \ - * a $ - * | \ - * b c - * | | - * $ $ - * - * Every node in the TokenTree is a valid end state that has a $ attached to it. - * During the search algorithm the Tokenizer goes through the tree and stores - * the last valid position. If a character follows that does not lead to a new - * node in the TokenTree the search ends (and starts again at this character). - * The token corresponding to the last valid position is returned. - * - * This allows us to uniquely identify the matching token given a certain - * input text. Note that this is a greedy matching approach that does not - * work if you're using truly ambiguous tokens (that have the same text). - * - * It is also not allowed that tokens have common middle parts but varying - * pre- and suffixes. Consider the example of two tokens "abd" and "bc" and - * the input string "abc". In that case we start looking for "abd" at the - * start, won't find it, wenn we hit "c" and start the scanning process - * anew. Thus the "bc" token is not found. - * - * For most (well-behaved) tokenization schemes this is not the case, - * though. - */ -class TokenTreeNode { -public: - const std::map children; - const int tokenId; - - /** - * The TokenTreeNode constructor builds a TokenTree from the given token - * specifications. The node returned by this constructor then is the root of - * said TokenTree. - * @param inputs Specifications of tokens in map form. Each specification - * is a tuple of the text that should be matched and some unique ID (>= 0) - * that is returned to you if that Token is found in the text. - * An example for such a map would be - * { - * { "#" , 1}, - * { "##", 2}, - * { "/" , 3} - * } - * Note that IDs below zero are reserved for system Ids, mainly TOKEN_NONE - * (-1) and TOKEN_TEXT (-2). - */ - TokenTreeNode(const std::map &inputs); -}; - -/** - * This is a reserved constant for the empty token. - */ -static const int TOKEN_NONE = -1; -/** - * This is a reserved constant for every part of the input text that is not a - * specified token. - */ -static const int TOKEN_TEXT = -2; - -/** - * A token for us is identified by an integer tokenID (either one of the - * constants TOKEN_NONE or TOKEN_TEXT or one of the user-defined constants). - * Additionally we return the matched text (which should only be really - * interesting in case of TOKEN_TEXT tokens) and the position in the input text. - */ -struct Token { - int tokenId; - std::string content; - SourceLocation location; - - Token(int tokenId, std::string content, SourceLocation location) - : tokenId(tokenId), - content(content), - location(location) - { - } - - Token() : tokenId(TOKEN_NONE) {} -}; - -/** - * A Tokenizer has the purpose of subdividing an input text into tokens. In our - * definition here we distinguish between two kinds of tokens: - * 1.) User-specified tokens that match a fixed text. - * 2.) Any other text between those tokens. - * The user might want to specify the tokens '#{' and '#}' for example, because - * they have some meaning in her code. The user sets the IDs to 1 and 2. - * Given the input text - * "some text #{ special command #} some text" - * the tokenizer would return the tokens: - * 1.) "some text " with the id TOKEN_TEXT (-2). - * 2.) "#{" with the id 1. - * 3.) " special command " with the id TOKEN_TEXT (-2). - * 4.) "#}" with the id 2. - * 5.) " some text" with the id TOKEN_TEXT (-2). - * This makes the subsequent parsing of files of a specific type easier. - * Note that in case of tokens with that are prefixes of other tokens the - * longest possible match is returned. - */ -class Tokenizer { -private: - CharReader &input; - const TokenTreeNode &root; - std::deque peeked; - unsigned int peekCursor = 0; - - bool prepare(); - -protected: - /** - * This method is an interface to build multiple tokens from a single one in - * derived classes. This might be interesting if you want to implement - * further logic on text tokens or similar applications. - * - * @param t a Token the "basic" tokenizer found. - * @param peeked a reference to the deque containing all temporary Tokens. - * You are supposed to append your tokens there. In the trivial case you just - * put the given Token on top of the deque. - * @return false if no token was appended to the deque (meaning that you want - * to ignore the given token explicitly) and true in all other cases. - */ - virtual bool doPrepare(const Token &t, std::deque &peeked); - -public: - /** - * @param input The input of a Tokenizer is given in the form of a - * CharReader. Please refer to the respective documentation. - * @param root This is meant to be the root of a TokenTree giving the - * specification of user-defined tokens this Tokenizer should recognize. - * The Tokenizer promises to not change the TokenTree such that you can - * re-use the same specification for multiple inputs. - * Please refer to the TokenTreeNode documentation for more information. - */ - Tokenizer(CharReader &input, const TokenTreeNode &root); - - /** - * The next method consumes one Token from the input stream and gives - * it to the user (stored in the input argument). - * - * @param t a Token reference that is set to the next found token. - * @return true if a next token was found and false if the input is at its - * end. - */ - bool next(Token &t); - /** - * The peek method does not consume the next Token but buffers it and - * shows it to the user (stored in the input argument). - * - * @param t a Token reference that is set to the next found token. - * @return true if a next token was found and false if the input is at its - * end. - */ - bool peek(Token &t); - - /** - * Resets the peek pointer to the current position in the stream (to the - * beginning of the buffer). - */ - void resetPeek(); - - /** - * Clears the peek buffer, such that all peeked Tokens are consumed. - */ - void consumePeek(); - - const CharReader &getInput() const { return input; } - - CharReader &getInput() { return input; } -}; -} - -#endif diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp deleted file mode 100644 index 1265851..0000000 --- a/src/core/parser/ParserStack.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include -#include -#include - -#include "ParserScope.hpp" -#include "ParserStack.hpp" - -namespace ousia { - -/* A default handler */ - -/** - * The DefaultHandler class is used in case no element handler is specified in - * the ParserState descriptor. - */ -class DefaultHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override {} - - void end() override {} - - static Handler *create(const HandlerData &handlerData) - { - return new DefaultHandler{handlerData}; - } -}; - -/* Class Handler */ - -void Handler::data(const std::string &data, int field) -{ - if (Utils::hasNonWhitepaceChar(data)) { - logger().error("Expected command but found character data."); - } -} - -/* Class ParserStack */ - -/** - * Returns an Exception that should be thrown when a currently invalid command - * is thrown. - */ -static LoggableException InvalidCommand(const std::string &name, - const std::set &expected) -{ - if (expected.empty()) { - return LoggableException{ - std::string{"No nested elements allowed, but got \""} + name + - std::string{"\""}}; - } else { - return LoggableException{ - std::string{"Expected "} + - (expected.size() == 1 ? std::string{"\""} - : std::string{"one of \""}) + - Utils::join(expected, "\", \"") + std::string{"\", but got \""} + - name + std::string{"\""}}; - } -} - -ParserStack::ParserStack( - ParserContext &ctx, - const std::multimap &states) - : ctx(ctx), states(states) -{ -} - -bool ParserStack::deduceState() -{ - // Assemble all states - std::vector states; - for (const auto &e : this->states) { - states.push_back(e.second); - } - - // Fetch the type signature of the scope and derive all possible states, - // abort if no unique parser state was found - std::vector possibleStates = - ParserStateDeductor(ctx.getScope().getStackTypeSignature(), states) - .deduce(); - if (possibleStates.size() != 1) { - ctx.getLogger().error( - "Error while including file: Cannot deduce parser state."); - return false; - } - - // Switch to this state by creating a dummy handler - const ParserState *state = possibleStates[0]; - Handler *handler = - DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}}); - stack.emplace(handler); - return true; -} - -std::set ParserStack::expectedCommands() -{ - const ParserState *currentState = &(this->currentState()); - std::set res; - for (const auto &v : states) { - if (v.second->parents.count(currentState)) { - res.insert(v.first); - } - } - return res; -} - -const ParserState &ParserStack::currentState() -{ - return stack.empty() ? ParserStates::None : stack.top()->state(); -} - -std::string ParserStack::currentCommandName() -{ - return stack.empty() ? std::string{} : stack.top()->name(); -} - -const ParserState *ParserStack::findTargetState(const std::string &name) -{ - const ParserState *currentState = &(this->currentState()); - auto range = states.equal_range(name); - for (auto it = range.first; it != range.second; it++) { - const ParserStateSet &parents = it->second->parents; - if (parents.count(currentState) || parents.count(&ParserStates::All)) { - return it->second; - } - } - - return nullptr; -} - -void ParserStack::start(const std::string &name, Variant::mapType &args, - const SourceLocation &location) -{ - ParserState const *targetState = findTargetState(name); -// TODO: Andreas, please improve this. -// if (!Utils::isIdentifier(name)) { -// throw LoggableException(std::string("Invalid identifier \"") + name + -// std::string("\"")); -// } - - if (targetState == nullptr) { - targetState = findTargetState("*"); - } - if (targetState == nullptr) { - throw InvalidCommand(name, expectedCommands()); - } - - // Fetch the associated constructor - HandlerConstructor ctor = targetState->elementHandler - ? targetState->elementHandler - : DefaultHandler::create; - - // Canonicalize the arguments, allow additional arguments - targetState->arguments.validateMap(args, ctx.getLogger(), true); - - // Instantiate the handler and call its start function - Handler *handler = ctor({ctx, name, *targetState, currentState(), location}); - handler->start(args); - stack.emplace(handler); -} - -void ParserStack::start(std::string name, const Variant::mapType &args, - const SourceLocation &location) -{ - Variant::mapType argsCopy(args); - start(name, argsCopy); -} - -void ParserStack::end() -{ - // Check whether the current command could be ended - if (stack.empty()) { - throw LoggableException{"No command to end."}; - } - - // Remove the current HandlerInstance from the stack - std::shared_ptr inst{stack.top()}; - stack.pop(); - - // Call the end function of the last Handler - inst->end(); -} - -void ParserStack::data(const std::string &data, int field) -{ - // Check whether there is any command the data can be sent to - if (stack.empty()) { - throw LoggableException{"No command to receive data."}; - } - - // Pass the data to the current Handler instance - stack.top()->data(data, field); -} -} - diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp deleted file mode 100644 index efc4e4a..0000000 --- a/src/core/parser/ParserStack.hpp +++ /dev/null @@ -1,361 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file ParserStack.hpp - * - * Helper classes for document or description parsers. Contains the ParserStack - * class, which is an pushdown automaton responsible for accepting commands in - * the correct order and calling specified handlers. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_PARSER_STACK_HPP_ -#define _OUSIA_PARSER_STACK_HPP_ - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "Parser.hpp" -#include "ParserContext.hpp" -#include "ParserState.hpp" - -namespace ousia { - -/** - * Struct collecting all the data that is being passed to a Handler instance. - */ -struct HandlerData { - /** - * Reference to the ParserContext instance that should be used to resolve - * references to nodes in the Graph. - */ - ParserContext &ctx; - - /** - * Contains the name of the tag that is being handled. - */ - const std::string name; - - /** - * Contains the current state of the state machine. - */ - const ParserState &state; - - /** - * Contains the state of the state machine when the parent node was handled. - */ - const ParserState &parentState; - - /** - * Current source code location. - */ - const SourceLocation location; - - /** - * Constructor of the HandlerData class. - * - * @param ctx is the parser context the handler should be executed in. - * @param name is the name of the string. - * @param state is the state this handler was called for. - * @param parentState is the state of the parent command. - * @param location is the location at which the handler is created. - */ - HandlerData(ParserContext &ctx, std::string name, const ParserState &state, - const ParserState &parentState, const SourceLocation location) - : ctx(ctx), - name(std::move(name)), - state(state), - parentState(parentState), - location(location){}; -}; - -/** - * The handler class provides a context for handling an XML tag. It has to be - * overridden and registered in the StateStack class to form handlers for - * concrete XML tags. - */ -class Handler { -private: - /** - * Structure containing the internal handler data. - */ - const HandlerData handlerData; - -public: - /** - * Constructor of the Handler class. - * - * @param data is a structure containing all data being passed to the - * handler. - */ - Handler(const HandlerData &handlerData) : handlerData(handlerData){}; - - /** - * Virtual destructor. - */ - virtual ~Handler(){}; - - /** - * Returns a reference at the ParserContext. - * - * @return a reference at the ParserContext. - */ - ParserContext &context() { return handlerData.ctx; } - - /** - * Returns the command name for which the handler was created. - * - * @return a const reference at the command name. - */ - const std::string &name() { return handlerData.name; } - - /** - * Returns a reference at the ParserScope instance. - * - * @return a reference at the ParserScope instance. - */ - ParserScope &scope() { return handlerData.ctx.getScope(); } - - /** - * Returns a reference at the Manager instance which manages all nodes. - * - * @return a referance at the Manager instance. - */ - Manager &manager() { return handlerData.ctx.getManager(); } - - /** - * Returns a reference at the Logger instance used for logging error - * messages. - * - * @return a reference at the Logger instance. - */ - Logger &logger() { return handlerData.ctx.getLogger(); } - - /** - * Returns a reference at the Project Node, representing the project into - * which the file is currently being parsed. - * - * @return a referance at the Project Node. - */ - Rooted project() { return handlerData.ctx.getProject(); } - - /** - * Reference at the ParserState descriptor for which this Handler was - * created. - * - * @return a const reference at the constructing ParserState descriptor. - */ - const ParserState &state() { return handlerData.state; } - - /** - * Reference at the ParserState descriptor of the parent state of the state - * for which this Handler was created. Set to ParserStates::None if there - * is no parent state. - * - * @return a const reference at the parent state of the constructing - * ParserState descriptor. - */ - const ParserState &parentState() { return handlerData.parentState; } - - /** - * Returns the current location in the source file. - * - * @return the current location in the source file. - */ - SourceLocation location() { return handlerData.location; } - - /** - * Called when the command that was specified in the constructor is - * instanciated. - * - * @param args is a map from strings to variants (argument name and value). - */ - virtual void start(Variant::mapType &args) = 0; - - /** - * Called whenever the command for which this handler is defined ends. - */ - virtual void end() = 0; - - /** - * Called whenever raw data (int the form of a string) is available for the - * Handler instance. In the default handler an exception is raised if the - * received data contains non-whitespace characters. - * - * @param data is a pointer at the character data that is available for the - * Handler instance. - * @param field is the field number (the interpretation of this value - * depends on the format that is being parsed). - */ - virtual void data(const std::string &data, int field); -}; - -/** - * HandlerConstructor is a function pointer type used to create concrete - * instances of the Handler class. - * - * @param handlerData is the data that should be passed to the new handler - * instance. - * @return a newly created handler instance. - */ -using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); - -/** - * The ParserStack class is a pushdown automaton responsible for turning a - * command stream into a tree of Node instances. - */ -class ParserStack { -private: - /** - * Reference at the parser context. - */ - ParserContext &ctx; - - /** - * Map containing all registered command names and the corresponding - * state descriptors. - */ - const std::multimap &states; - - /** - * Internal stack used for managing the currently active Handler instances. - */ - std::stack> stack; - - /** - * Used internally to get all expected command names for the current state. - * This function is used to build error messages. - * - * @return a set of strings containing the names of the expected commands. - */ - std::set expectedCommands(); - - /** - * Returns the targetState for a command with the given name that can be - * reached from for the current state. - * - * @param name is the name of the requested command. - * @return nullptr if no target state was found, a pointer at the target - *state - * otherwise. - */ - const ParserState *findTargetState(const std::string &name); - -public: - /** - * Creates a new instance of the ParserStack class. - * - * @param ctx is the parser context the parser stack is working on. - * @param states is a map containing the command names and pointers at the - * corresponding ParserState instances. - */ - ParserStack(ParserContext &ctx, - const std::multimap &states); - - /** - * Tries to reconstruct the parser state from the Scope instance of the - * ParserContext given in the constructor. This functionality is needed for - * including files,as the Parser of the included file needs to be brought to - + an equivalent state as the one in the including file. - * - * @param scope is the ParserScope instance from which the ParserState - * should be reconstructed. - * @param logger is the logger instance to which error messages should be - * written. - * @return true if the operation was sucessful, false otherwise. - */ - bool deduceState(); - - /** - * Returns the state the ParserStack instance currently is in. - * - * @return the state of the currently active Handler instance or STATE_NONE - * if no handler is on the stack. - */ - const ParserState ¤tState(); - - /** - * Returns the command name that is currently being handled. - * - * @return the name of the command currently being handled by the active - * Handler instance or an empty string if no handler is currently active. - */ - std::string currentCommandName(); - - /** - * Function that should be called whenever a new command starts. - * - * @param name is the name of the command. - * @param args is a map from strings to variants (argument name and value). - * Note that the passed map will be modified. - * @param location is the location in the source file at which the command - * starts. - */ - void start(const std::string &name, Variant::mapType &args, - const SourceLocation &location = SourceLocation{}); - - /** - * Function that should be called whenever a new command starts. - * - * @param name is the name of the command. - * @param args is a map from strings to variants (argument name and value). - * @param location is the location in the source file at which the command - * starts. - */ - void start(std::string name, - const Variant::mapType &args = Variant::mapType{}, - const SourceLocation &location = SourceLocation{}); - - /** - * Function called whenever a command ends. - */ - void end(); - - /** - * Function that should be called whenever data is available for the - * command. - * - * @param data is the data that should be passed to the handler. - * @param field is the field number (the interpretation of this value - * depends on the format that is being parsed). - */ - void data(const std::string &data, int field = 0); - - /** - * Returns a reference to the parser context the parser stack is currently - * working on. - * - * @return a reference to the parser context. - */ - ParserContext &getContext() { return ctx; } -}; -} - -#endif /* _OUSIA_PARSER_STACK_HPP_ */ - diff --git a/src/core/parser/ParserState.cpp b/src/core/parser/ParserState.cpp deleted file mode 100644 index f635d86..0000000 --- a/src/core/parser/ParserState.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "ParserState.hpp" - -namespace ousia { - -/* Class ParserState */ - -ParserState::ParserState() : elementHandler(nullptr) {} - -ParserState::ParserState(ParserStateSet parents, Arguments arguments, - RttiSet createdNodeTypes, - HandlerConstructor elementHandler) - : parents(parents), - arguments(arguments), - createdNodeTypes(createdNodeTypes), - elementHandler(elementHandler) -{ -} - -ParserState::ParserState(const ParserStateBuilder &builder) - : ParserState(builder.build()) -{ -} - -/* Class ParserStateBuilder */ - -ParserStateBuilder &ParserStateBuilder::copy(const ParserState &state) -{ - this->state = state; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::parent(const ParserState *parent) -{ - state.parents = ParserStateSet{parent}; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::parents(const ParserStateSet &parents) -{ - state.parents = parents; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::arguments(const Arguments &arguments) -{ - state.arguments = arguments; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::createdNodeType(const Rtti *type) -{ - state.createdNodeTypes = RttiSet{type}; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::createdNodeTypes(const RttiSet &types) -{ - state.createdNodeTypes = types; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::elementHandler( - HandlerConstructor elementHandler) -{ - state.elementHandler = elementHandler; - return *this; -} - -const ParserState &ParserStateBuilder::build() const { return state; } - -/* Class ParserStateDeductor */ - -ParserStateDeductor::ParserStateDeductor( - std::vector signature, - std::vector states) - : tbl(signature.size()), - signature(std::move(signature)), - states(std::move(states)) -{ -} - -bool ParserStateDeductor::isActive(size_t d, const ParserState *s) -{ - // Lookup the "active" state of (d, s), if it was not already set - // (e.second is true) we'll have to calculate it - auto e = tbl[d].emplace(s, false); - bool &res = e.first->second; - if (!e.second) { - return res; - } - - // Check whether this node is generative (may have produced the Node - // described by the current Signature element) - bool isGenerative = signature[d]->isOneOf(s->createdNodeTypes); - - if (isGenerative && d == 0) { - // End of recursion -- the last signature element is reached and the - // node was generative - res = true; - } else { - // Try repetition of this node - if (isGenerative && isActive(d - 1, s)) { - res = true; - } else { - // Check whether any of the parent nodes were active -- either for - // the previous element (if this one is generative) or for the - // current element (assuming this node was not generative) - for (const ParserState *parent : s->parents) { - if ((isGenerative && isActive(d - 1, parent)) || - isActive(d, parent)) { - res = true; - break; - } - } - } - } - - return res; -} - -std::vector ParserStateDeductor::deduce() -{ - std::vector res; - if (!signature.empty()) { - const size_t D = signature.size(); - for (auto s : states) { - if (signature[D - 1]->isOneOf(s->createdNodeTypes) && - isActive(D - 1, s)) { - res.push_back(s); - } - } - } - return res; -} - -/* Constant initializations */ - -namespace ParserStates { -const ParserState All; -const ParserState None; -} -} - diff --git a/src/core/parser/ParserState.hpp b/src/core/parser/ParserState.hpp deleted file mode 100644 index 6487fdd..0000000 --- a/src/core/parser/ParserState.hpp +++ /dev/null @@ -1,284 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file ParserState.hpp - * - * Defines the ParserState class used within the ParserStack pushdown - * automaton and the ParserStateBuilder class for convenient construction of - * such classes. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_PARSER_STATE_HPP_ -#define _OUSIA_PARSER_STATE_HPP_ - -#include - -#include -#include - -namespace ousia { - -// Forward declarations -class ParserStateBuilder; -class ParserState; -class HandlerData; -class Handler; -using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); - -/** - * Set of pointers of parser states -- used for specifying a set of parent - * states. - */ -using ParserStateSet = std::unordered_set; - -/** - * Class used for the complete specification of a ParserState. Stores possible - * parent states, state handlers and arguments to be passed to that state. - */ -struct ParserState { - /** - * Vector containing all possible parent states. - */ - ParserStateSet parents; - - /** - * Descriptor of the arguments that should be passed to the handler. - */ - Arguments arguments; - - /** - * Set containing the types of the nodes that may be created in this - * ParserState. This information is needed for Parsers to reconstruct the - * current ParserState from a given ParserScope when a file is included. - */ - RttiSet createdNodeTypes; - - /** - * Pointer at a function which creates a new concrete Handler instance for - * the elements described by this state. May be nullptr in which case no - * handler instance is created. - */ - HandlerConstructor elementHandler; - - /** - * Default constructor, initializes the handlers with nullptr. - */ - ParserState(); - - /** - * Constructor taking values for all fields. Use the ParserStateBuilder - * class for a more convenient construction of ParserState instances. - * - * @param parents is a vector containing all possible parent states. - * @param arguments is a descriptor of arguments that should be passed to - * the handler. - * @param createdNodeTypes is a set containing the types of the nodes tha - * may be created in this ParserState. This information is needed for - * Parsers to reconstruct the current ParserState from a given ParserScope - * when a file is included. - * @param elementHandler is a pointer at a function which creates a new - * concrete Handler instance for the elements described by this state. May - * be nullptr in which case no handler instance is created. - */ - ParserState(ParserStateSet parents, Arguments arguments = Arguments{}, - RttiSet createdNodeTypes = RttiSet{}, - HandlerConstructor elementHandler = nullptr); - - /** - * Creates this ParserState from the given ParserStateBuilder instance. - */ - ParserState(const ParserStateBuilder &builder); -}; - -/** - * The ParserStateBuilder class is a class used for conveniently building new - * ParserState instances. - */ -class ParserStateBuilder { -private: - /** - * ParserState instance that is currently being built by the - * ParserStateBuilder. - */ - ParserState state; - -public: - /** - * Copies the ParserState instance and uses it as internal state. Overrides - * all changes made by the ParserStateBuilder. - * - * @param state is the state that should be copied. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder ©(const ParserState &state); - - /** - * Sets the possible parent states to the single given parent element. - * - * @param parent is a pointer at the parent ParserState instance that should - * be the possible parent state. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &parent(const ParserState *parent); - - /** - * Sets the ParserState instances in the given ParserStateSet as the list of - * supported parent states. - * - * @param parents is a set of pointers at ParserState instances that should - * be the possible parent states. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &parents(const ParserStateSet &parents); - - /** - * Sets the arguments that should be passed to the parser state handler to - * those given as argument. - * - * @param arguments is the Arguments instance describing the Arguments that - * should be parsed to a Handler for this ParserState. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &arguments(const Arguments &arguments); - - /** - * Sets the Node types this state may produce to the given Rtti descriptor. - * - * @param type is the Rtti descriptor of the Type that may be produced by - * this state. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &createdNodeType(const Rtti *type); - - /** - * Sets the Node types this state may produce to the given Rtti descriptors. - * - * @param types is a set of Rtti descriptors of the Types that may be - * produced by this state. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &createdNodeTypes(const RttiSet &types); - - /** - * Sets the constructor for the element handler. The constructor creates a - * new concrete Handler instance for the elements described by this state. - * May be nullptr in which case no handler instance is created (this is - * the default value). - * - * @param elementHandler is the HandlerConstructor that should create a - * new Handler instance. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &elementHandler(HandlerConstructor elementHandler); - - /** - * Returns a reference at the internal ParserState instance that was built - * using the ParserStateBuilder. - * - * @return the built ParserState. - */ - const ParserState &build() const; -}; - -/** - * Class used to deduce the ParserState a Parser is currently in based on the - * types of the Nodes that currently are on the ParserStack. Uses dynamic - * programming in order to solve this problem. - */ -class ParserStateDeductor { -public: - /** - * Type containing the dynamic programming table. - */ - using Table = std::vector>; - -private: - /** - * Dynamic programming table. - */ - Table tbl; - - /** - * Signature given in the constructor. - */ - const std::vector signature; - - /** - * List of states that should be checked for being active. - */ - const std::vector states; - - /** - * Used internally to check whether the given parser stack s may have been - * active for signature element d. - * - * @param d is the signature element. - * @param s is the parser state. - * @return true if the the given ParserState may have been active. - */ - bool isActive(size_t d, const ParserState *s); - -public: - /** - * Constructor of the ParserStateDeductor class. - * - * @param signature a Node type signature describing the types of the nodes - * which currently reside on e.g. the ParserScope stack. - * @param states is a list of states that should be checked. - */ - ParserStateDeductor(std::vector signature, - std::vector states); - - /** - * Selects all active states from the given states. Only considers those - * states that may have produced the last signature element. - * - * @return a list of states that may actually have been active. - */ - std::vector deduce(); -}; - -/** - * The ParserStates namespace contains all the global state constants used - * in the ParserStack class. - */ -namespace ParserStates { -/** - * State representing all states. - */ -extern const ParserState All; - -/** - * State representing the initial state. - */ -extern const ParserState None; -} -} - -#endif /* _OUSIA_PARSER_STATE_HPP_ */ - diff --git a/src/core/parser/generic/ParserState.cpp b/src/core/parser/generic/ParserState.cpp new file mode 100644 index 0000000..f635d86 --- /dev/null +++ b/src/core/parser/generic/ParserState.cpp @@ -0,0 +1,161 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "ParserState.hpp" + +namespace ousia { + +/* Class ParserState */ + +ParserState::ParserState() : elementHandler(nullptr) {} + +ParserState::ParserState(ParserStateSet parents, Arguments arguments, + RttiSet createdNodeTypes, + HandlerConstructor elementHandler) + : parents(parents), + arguments(arguments), + createdNodeTypes(createdNodeTypes), + elementHandler(elementHandler) +{ +} + +ParserState::ParserState(const ParserStateBuilder &builder) + : ParserState(builder.build()) +{ +} + +/* Class ParserStateBuilder */ + +ParserStateBuilder &ParserStateBuilder::copy(const ParserState &state) +{ + this->state = state; + return *this; +} + +ParserStateBuilder &ParserStateBuilder::parent(const ParserState *parent) +{ + state.parents = ParserStateSet{parent}; + return *this; +} + +ParserStateBuilder &ParserStateBuilder::parents(const ParserStateSet &parents) +{ + state.parents = parents; + return *this; +} + +ParserStateBuilder &ParserStateBuilder::arguments(const Arguments &arguments) +{ + state.arguments = arguments; + return *this; +} + +ParserStateBuilder &ParserStateBuilder::createdNodeType(const Rtti *type) +{ + state.createdNodeTypes = RttiSet{type}; + return *this; +} + +ParserStateBuilder &ParserStateBuilder::createdNodeTypes(const RttiSet &types) +{ + state.createdNodeTypes = types; + return *this; +} + +ParserStateBuilder &ParserStateBuilder::elementHandler( + HandlerConstructor elementHandler) +{ + state.elementHandler = elementHandler; + return *this; +} + +const ParserState &ParserStateBuilder::build() const { return state; } + +/* Class ParserStateDeductor */ + +ParserStateDeductor::ParserStateDeductor( + std::vector signature, + std::vector states) + : tbl(signature.size()), + signature(std::move(signature)), + states(std::move(states)) +{ +} + +bool ParserStateDeductor::isActive(size_t d, const ParserState *s) +{ + // Lookup the "active" state of (d, s), if it was not already set + // (e.second is true) we'll have to calculate it + auto e = tbl[d].emplace(s, false); + bool &res = e.first->second; + if (!e.second) { + return res; + } + + // Check whether this node is generative (may have produced the Node + // described by the current Signature element) + bool isGenerative = signature[d]->isOneOf(s->createdNodeTypes); + + if (isGenerative && d == 0) { + // End of recursion -- the last signature element is reached and the + // node was generative + res = true; + } else { + // Try repetition of this node + if (isGenerative && isActive(d - 1, s)) { + res = true; + } else { + // Check whether any of the parent nodes were active -- either for + // the previous element (if this one is generative) or for the + // current element (assuming this node was not generative) + for (const ParserState *parent : s->parents) { + if ((isGenerative && isActive(d - 1, parent)) || + isActive(d, parent)) { + res = true; + break; + } + } + } + } + + return res; +} + +std::vector ParserStateDeductor::deduce() +{ + std::vector res; + if (!signature.empty()) { + const size_t D = signature.size(); + for (auto s : states) { + if (signature[D - 1]->isOneOf(s->createdNodeTypes) && + isActive(D - 1, s)) { + res.push_back(s); + } + } + } + return res; +} + +/* Constant initializations */ + +namespace ParserStates { +const ParserState All; +const ParserState None; +} +} + diff --git a/src/core/parser/generic/ParserState.hpp b/src/core/parser/generic/ParserState.hpp new file mode 100644 index 0000000..6487fdd --- /dev/null +++ b/src/core/parser/generic/ParserState.hpp @@ -0,0 +1,284 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file ParserState.hpp + * + * Defines the ParserState class used within the ParserStack pushdown + * automaton and the ParserStateBuilder class for convenient construction of + * such classes. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STATE_HPP_ +#define _OUSIA_PARSER_STATE_HPP_ + +#include + +#include +#include + +namespace ousia { + +// Forward declarations +class ParserStateBuilder; +class ParserState; +class HandlerData; +class Handler; +using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); + +/** + * Set of pointers of parser states -- used for specifying a set of parent + * states. + */ +using ParserStateSet = std::unordered_set; + +/** + * Class used for the complete specification of a ParserState. Stores possible + * parent states, state handlers and arguments to be passed to that state. + */ +struct ParserState { + /** + * Vector containing all possible parent states. + */ + ParserStateSet parents; + + /** + * Descriptor of the arguments that should be passed to the handler. + */ + Arguments arguments; + + /** + * Set containing the types of the nodes that may be created in this + * ParserState. This information is needed for Parsers to reconstruct the + * current ParserState from a given ParserScope when a file is included. + */ + RttiSet createdNodeTypes; + + /** + * Pointer at a function which creates a new concrete Handler instance for + * the elements described by this state. May be nullptr in which case no + * handler instance is created. + */ + HandlerConstructor elementHandler; + + /** + * Default constructor, initializes the handlers with nullptr. + */ + ParserState(); + + /** + * Constructor taking values for all fields. Use the ParserStateBuilder + * class for a more convenient construction of ParserState instances. + * + * @param parents is a vector containing all possible parent states. + * @param arguments is a descriptor of arguments that should be passed to + * the handler. + * @param createdNodeTypes is a set containing the types of the nodes tha + * may be created in this ParserState. This information is needed for + * Parsers to reconstruct the current ParserState from a given ParserScope + * when a file is included. + * @param elementHandler is a pointer at a function which creates a new + * concrete Handler instance for the elements described by this state. May + * be nullptr in which case no handler instance is created. + */ + ParserState(ParserStateSet parents, Arguments arguments = Arguments{}, + RttiSet createdNodeTypes = RttiSet{}, + HandlerConstructor elementHandler = nullptr); + + /** + * Creates this ParserState from the given ParserStateBuilder instance. + */ + ParserState(const ParserStateBuilder &builder); +}; + +/** + * The ParserStateBuilder class is a class used for conveniently building new + * ParserState instances. + */ +class ParserStateBuilder { +private: + /** + * ParserState instance that is currently being built by the + * ParserStateBuilder. + */ + ParserState state; + +public: + /** + * Copies the ParserState instance and uses it as internal state. Overrides + * all changes made by the ParserStateBuilder. + * + * @param state is the state that should be copied. + * @return a reference at this ParserStateBuilder instance for method + * chaining. + */ + ParserStateBuilder ©(const ParserState &state); + + /** + * Sets the possible parent states to the single given parent element. + * + * @param parent is a pointer at the parent ParserState instance that should + * be the possible parent state. + * @return a reference at this ParserStateBuilder instance for method + * chaining. + */ + ParserStateBuilder &parent(const ParserState *parent); + + /** + * Sets the ParserState instances in the given ParserStateSet as the list of + * supported parent states. + * + * @param parents is a set of pointers at ParserState instances that should + * be the possible parent states. + * @return a reference at this ParserStateBuilder instance for method + * chaining. + */ + ParserStateBuilder &parents(const ParserStateSet &parents); + + /** + * Sets the arguments that should be passed to the parser state handler to + * those given as argument. + * + * @param arguments is the Arguments instance describing the Arguments that + * should be parsed to a Handler for this ParserState. + * @return a reference at this ParserStateBuilder instance for method + * chaining. + */ + ParserStateBuilder &arguments(const Arguments &arguments); + + /** + * Sets the Node types this state may produce to the given Rtti descriptor. + * + * @param type is the Rtti descriptor of the Type that may be produced by + * this state. + * @return a reference at this ParserStateBuilder instance for method + * chaining. + */ + ParserStateBuilder &createdNodeType(const Rtti *type); + + /** + * Sets the Node types this state may produce to the given Rtti descriptors. + * + * @param types is a set of Rtti descriptors of the Types that may be + * produced by this state. + * @return a reference at this ParserStateBuilder instance for method + * chaining. + */ + ParserStateBuilder &createdNodeTypes(const RttiSet &types); + + /** + * Sets the constructor for the element handler. The constructor creates a + * new concrete Handler instance for the elements described by this state. + * May be nullptr in which case no handler instance is created (this is + * the default value). + * + * @param elementHandler is the HandlerConstructor that should create a + * new Handler instance. + * @return a reference at this ParserStateBuilder instance for method + * chaining. + */ + ParserStateBuilder &elementHandler(HandlerConstructor elementHandler); + + /** + * Returns a reference at the internal ParserState instance that was built + * using the ParserStateBuilder. + * + * @return the built ParserState. + */ + const ParserState &build() const; +}; + +/** + * Class used to deduce the ParserState a Parser is currently in based on the + * types of the Nodes that currently are on the ParserStack. Uses dynamic + * programming in order to solve this problem. + */ +class ParserStateDeductor { +public: + /** + * Type containing the dynamic programming table. + */ + using Table = std::vector>; + +private: + /** + * Dynamic programming table. + */ + Table tbl; + + /** + * Signature given in the constructor. + */ + const std::vector signature; + + /** + * List of states that should be checked for being active. + */ + const std::vector states; + + /** + * Used internally to check whether the given parser stack s may have been + * active for signature element d. + * + * @param d is the signature element. + * @param s is the parser state. + * @return true if the the given ParserState may have been active. + */ + bool isActive(size_t d, const ParserState *s); + +public: + /** + * Constructor of the ParserStateDeductor class. + * + * @param signature a Node type signature describing the types of the nodes + * which currently reside on e.g. the ParserScope stack. + * @param states is a list of states that should be checked. + */ + ParserStateDeductor(std::vector signature, + std::vector states); + + /** + * Selects all active states from the given states. Only considers those + * states that may have produced the last signature element. + * + * @return a list of states that may actually have been active. + */ + std::vector deduce(); +}; + +/** + * The ParserStates namespace contains all the global state constants used + * in the ParserStack class. + */ +namespace ParserStates { +/** + * State representing all states. + */ +extern const ParserState All; + +/** + * State representing the initial state. + */ +extern const ParserState None; +} +} + +#endif /* _OUSIA_PARSER_STATE_HPP_ */ + diff --git a/src/core/parser/generic/ParserStateStack.cpp b/src/core/parser/generic/ParserStateStack.cpp new file mode 100644 index 0000000..1265851 --- /dev/null +++ b/src/core/parser/generic/ParserStateStack.cpp @@ -0,0 +1,216 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include +#include + +#include "ParserScope.hpp" +#include "ParserStack.hpp" + +namespace ousia { + +/* A default handler */ + +/** + * The DefaultHandler class is used in case no element handler is specified in + * the ParserState descriptor. + */ +class DefaultHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override {} + + void end() override {} + + static Handler *create(const HandlerData &handlerData) + { + return new DefaultHandler{handlerData}; + } +}; + +/* Class Handler */ + +void Handler::data(const std::string &data, int field) +{ + if (Utils::hasNonWhitepaceChar(data)) { + logger().error("Expected command but found character data."); + } +} + +/* Class ParserStack */ + +/** + * Returns an Exception that should be thrown when a currently invalid command + * is thrown. + */ +static LoggableException InvalidCommand(const std::string &name, + const std::set &expected) +{ + if (expected.empty()) { + return LoggableException{ + std::string{"No nested elements allowed, but got \""} + name + + std::string{"\""}}; + } else { + return LoggableException{ + std::string{"Expected "} + + (expected.size() == 1 ? std::string{"\""} + : std::string{"one of \""}) + + Utils::join(expected, "\", \"") + std::string{"\", but got \""} + + name + std::string{"\""}}; + } +} + +ParserStack::ParserStack( + ParserContext &ctx, + const std::multimap &states) + : ctx(ctx), states(states) +{ +} + +bool ParserStack::deduceState() +{ + // Assemble all states + std::vector states; + for (const auto &e : this->states) { + states.push_back(e.second); + } + + // Fetch the type signature of the scope and derive all possible states, + // abort if no unique parser state was found + std::vector possibleStates = + ParserStateDeductor(ctx.getScope().getStackTypeSignature(), states) + .deduce(); + if (possibleStates.size() != 1) { + ctx.getLogger().error( + "Error while including file: Cannot deduce parser state."); + return false; + } + + // Switch to this state by creating a dummy handler + const ParserState *state = possibleStates[0]; + Handler *handler = + DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}}); + stack.emplace(handler); + return true; +} + +std::set ParserStack::expectedCommands() +{ + const ParserState *currentState = &(this->currentState()); + std::set res; + for (const auto &v : states) { + if (v.second->parents.count(currentState)) { + res.insert(v.first); + } + } + return res; +} + +const ParserState &ParserStack::currentState() +{ + return stack.empty() ? ParserStates::None : stack.top()->state(); +} + +std::string ParserStack::currentCommandName() +{ + return stack.empty() ? std::string{} : stack.top()->name(); +} + +const ParserState *ParserStack::findTargetState(const std::string &name) +{ + const ParserState *currentState = &(this->currentState()); + auto range = states.equal_range(name); + for (auto it = range.first; it != range.second; it++) { + const ParserStateSet &parents = it->second->parents; + if (parents.count(currentState) || parents.count(&ParserStates::All)) { + return it->second; + } + } + + return nullptr; +} + +void ParserStack::start(const std::string &name, Variant::mapType &args, + const SourceLocation &location) +{ + ParserState const *targetState = findTargetState(name); +// TODO: Andreas, please improve this. +// if (!Utils::isIdentifier(name)) { +// throw LoggableException(std::string("Invalid identifier \"") + name + +// std::string("\"")); +// } + + if (targetState == nullptr) { + targetState = findTargetState("*"); + } + if (targetState == nullptr) { + throw InvalidCommand(name, expectedCommands()); + } + + // Fetch the associated constructor + HandlerConstructor ctor = targetState->elementHandler + ? targetState->elementHandler + : DefaultHandler::create; + + // Canonicalize the arguments, allow additional arguments + targetState->arguments.validateMap(args, ctx.getLogger(), true); + + // Instantiate the handler and call its start function + Handler *handler = ctor({ctx, name, *targetState, currentState(), location}); + handler->start(args); + stack.emplace(handler); +} + +void ParserStack::start(std::string name, const Variant::mapType &args, + const SourceLocation &location) +{ + Variant::mapType argsCopy(args); + start(name, argsCopy); +} + +void ParserStack::end() +{ + // Check whether the current command could be ended + if (stack.empty()) { + throw LoggableException{"No command to end."}; + } + + // Remove the current HandlerInstance from the stack + std::shared_ptr inst{stack.top()}; + stack.pop(); + + // Call the end function of the last Handler + inst->end(); +} + +void ParserStack::data(const std::string &data, int field) +{ + // Check whether there is any command the data can be sent to + if (stack.empty()) { + throw LoggableException{"No command to receive data."}; + } + + // Pass the data to the current Handler instance + stack.top()->data(data, field); +} +} + diff --git a/src/core/parser/generic/ParserStateStack.hpp b/src/core/parser/generic/ParserStateStack.hpp new file mode 100644 index 0000000..efc4e4a --- /dev/null +++ b/src/core/parser/generic/ParserStateStack.hpp @@ -0,0 +1,361 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file ParserStack.hpp + * + * Helper classes for document or description parsers. Contains the ParserStack + * class, which is an pushdown automaton responsible for accepting commands in + * the correct order and calling specified handlers. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_HPP_ +#define _OUSIA_PARSER_STACK_HPP_ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "Parser.hpp" +#include "ParserContext.hpp" +#include "ParserState.hpp" + +namespace ousia { + +/** + * Struct collecting all the data that is being passed to a Handler instance. + */ +struct HandlerData { + /** + * Reference to the ParserContext instance that should be used to resolve + * references to nodes in the Graph. + */ + ParserContext &ctx; + + /** + * Contains the name of the tag that is being handled. + */ + const std::string name; + + /** + * Contains the current state of the state machine. + */ + const ParserState &state; + + /** + * Contains the state of the state machine when the parent node was handled. + */ + const ParserState &parentState; + + /** + * Current source code location. + */ + const SourceLocation location; + + /** + * Constructor of the HandlerData class. + * + * @param ctx is the parser context the handler should be executed in. + * @param name is the name of the string. + * @param state is the state this handler was called for. + * @param parentState is the state of the parent command. + * @param location is the location at which the handler is created. + */ + HandlerData(ParserContext &ctx, std::string name, const ParserState &state, + const ParserState &parentState, const SourceLocation location) + : ctx(ctx), + name(std::move(name)), + state(state), + parentState(parentState), + location(location){}; +}; + +/** + * The handler class provides a context for handling an XML tag. It has to be + * overridden and registered in the StateStack class to form handlers for + * concrete XML tags. + */ +class Handler { +private: + /** + * Structure containing the internal handler data. + */ + const HandlerData handlerData; + +public: + /** + * Constructor of the Handler class. + * + * @param data is a structure containing all data being passed to the + * handler. + */ + Handler(const HandlerData &handlerData) : handlerData(handlerData){}; + + /** + * Virtual destructor. + */ + virtual ~Handler(){}; + + /** + * Returns a reference at the ParserContext. + * + * @return a reference at the ParserContext. + */ + ParserContext &context() { return handlerData.ctx; } + + /** + * Returns the command name for which the handler was created. + * + * @return a const reference at the command name. + */ + const std::string &name() { return handlerData.name; } + + /** + * Returns a reference at the ParserScope instance. + * + * @return a reference at the ParserScope instance. + */ + ParserScope &scope() { return handlerData.ctx.getScope(); } + + /** + * Returns a reference at the Manager instance which manages all nodes. + * + * @return a referance at the Manager instance. + */ + Manager &manager() { return handlerData.ctx.getManager(); } + + /** + * Returns a reference at the Logger instance used for logging error + * messages. + * + * @return a reference at the Logger instance. + */ + Logger &logger() { return handlerData.ctx.getLogger(); } + + /** + * Returns a reference at the Project Node, representing the project into + * which the file is currently being parsed. + * + * @return a referance at the Project Node. + */ + Rooted project() { return handlerData.ctx.getProject(); } + + /** + * Reference at the ParserState descriptor for which this Handler was + * created. + * + * @return a const reference at the constructing ParserState descriptor. + */ + const ParserState &state() { return handlerData.state; } + + /** + * Reference at the ParserState descriptor of the parent state of the state + * for which this Handler was created. Set to ParserStates::None if there + * is no parent state. + * + * @return a const reference at the parent state of the constructing + * ParserState descriptor. + */ + const ParserState &parentState() { return handlerData.parentState; } + + /** + * Returns the current location in the source file. + * + * @return the current location in the source file. + */ + SourceLocation location() { return handlerData.location; } + + /** + * Called when the command that was specified in the constructor is + * instanciated. + * + * @param args is a map from strings to variants (argument name and value). + */ + virtual void start(Variant::mapType &args) = 0; + + /** + * Called whenever the command for which this handler is defined ends. + */ + virtual void end() = 0; + + /** + * Called whenever raw data (int the form of a string) is available for the + * Handler instance. In the default handler an exception is raised if the + * received data contains non-whitespace characters. + * + * @param data is a pointer at the character data that is available for the + * Handler instance. + * @param field is the field number (the interpretation of this value + * depends on the format that is being parsed). + */ + virtual void data(const std::string &data, int field); +}; + +/** + * HandlerConstructor is a function pointer type used to create concrete + * instances of the Handler class. + * + * @param handlerData is the data that should be passed to the new handler + * instance. + * @return a newly created handler instance. + */ +using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); + +/** + * The ParserStack class is a pushdown automaton responsible for turning a + * command stream into a tree of Node instances. + */ +class ParserStack { +private: + /** + * Reference at the parser context. + */ + ParserContext &ctx; + + /** + * Map containing all registered command names and the corresponding + * state descriptors. + */ + const std::multimap &states; + + /** + * Internal stack used for managing the currently active Handler instances. + */ + std::stack> stack; + + /** + * Used internally to get all expected command names for the current state. + * This function is used to build error messages. + * + * @return a set of strings containing the names of the expected commands. + */ + std::set expectedCommands(); + + /** + * Returns the targetState for a command with the given name that can be + * reached from for the current state. + * + * @param name is the name of the requested command. + * @return nullptr if no target state was found, a pointer at the target + *state + * otherwise. + */ + const ParserState *findTargetState(const std::string &name); + +public: + /** + * Creates a new instance of the ParserStack class. + * + * @param ctx is the parser context the parser stack is working on. + * @param states is a map containing the command names and pointers at the + * corresponding ParserState instances. + */ + ParserStack(ParserContext &ctx, + const std::multimap &states); + + /** + * Tries to reconstruct the parser state from the Scope instance of the + * ParserContext given in the constructor. This functionality is needed for + * including files,as the Parser of the included file needs to be brought to + + an equivalent state as the one in the including file. + * + * @param scope is the ParserScope instance from which the ParserState + * should be reconstructed. + * @param logger is the logger instance to which error messages should be + * written. + * @return true if the operation was sucessful, false otherwise. + */ + bool deduceState(); + + /** + * Returns the state the ParserStack instance currently is in. + * + * @return the state of the currently active Handler instance or STATE_NONE + * if no handler is on the stack. + */ + const ParserState ¤tState(); + + /** + * Returns the command name that is currently being handled. + * + * @return the name of the command currently being handled by the active + * Handler instance or an empty string if no handler is currently active. + */ + std::string currentCommandName(); + + /** + * Function that should be called whenever a new command starts. + * + * @param name is the name of the command. + * @param args is a map from strings to variants (argument name and value). + * Note that the passed map will be modified. + * @param location is the location in the source file at which the command + * starts. + */ + void start(const std::string &name, Variant::mapType &args, + const SourceLocation &location = SourceLocation{}); + + /** + * Function that should be called whenever a new command starts. + * + * @param name is the name of the command. + * @param args is a map from strings to variants (argument name and value). + * @param location is the location in the source file at which the command + * starts. + */ + void start(std::string name, + const Variant::mapType &args = Variant::mapType{}, + const SourceLocation &location = SourceLocation{}); + + /** + * Function called whenever a command ends. + */ + void end(); + + /** + * Function that should be called whenever data is available for the + * command. + * + * @param data is the data that should be passed to the handler. + * @param field is the field number (the interpretation of this value + * depends on the format that is being parsed). + */ + void data(const std::string &data, int field = 0); + + /** + * Returns a reference to the parser context the parser stack is currently + * working on. + * + * @return a reference to the parser context. + */ + ParserContext &getContext() { return ctx; } +}; +} + +#endif /* _OUSIA_PARSER_STACK_HPP_ */ + -- cgit v1.2.3 From ce4fd84a714d80859aa01bbca32a81302b93c4d7 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:43:32 +0100 Subject: Moved code for handling whitespaces to own header, including the "WhitespaceMode" enum --- src/core/common/Utils.cpp | 7 -- src/core/common/Utils.hpp | 57 +-------- src/core/common/Whitespace.cpp | 38 ++++++ src/core/common/Whitespace.hpp | 120 ++++++++++++++++++ src/core/common/WhitespaceHandler.hpp | 223 ++++++++++++++++++++++++++++++++++ test/core/common/UtilsTest.cpp | 8 -- test/core/common/Whitespace.cpp | 41 +++++++ 7 files changed, 428 insertions(+), 66 deletions(-) create mode 100644 src/core/common/Whitespace.cpp create mode 100644 src/core/common/Whitespace.hpp create mode 100644 src/core/common/WhitespaceHandler.hpp create mode 100644 test/core/common/Whitespace.cpp (limited to 'src/core') diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index 563fe2a..4005143 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -18,19 +18,12 @@ #include #include -#include #include #include "Utils.hpp" namespace ousia { -std::string Utils::trim(const std::string &s) -{ - std::pair bounds = trim(s, Utils::isWhitespace); - return s.substr(bounds.first, bounds.second - bounds.first); -} - bool Utils::isIdentifier(const std::string &name) { bool first = true; diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 2c8a5b3..af7a773 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -78,12 +78,17 @@ public: */ static bool isIdentifier(const std::string &name); + /** + * Returns true if the given character is a linebreak character. + */ + static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); } + /** * Returns true if the given character is a whitespace character. */ static bool isWhitespace(const char c) { - return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); + return (c == ' ') || (c == '\t') || isLinebreak(c); } /** @@ -94,56 +99,6 @@ public: */ static bool hasNonWhitepaceChar(const std::string &s); - /** - * Returns true if the given character is a whitespace character. - */ - static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); } - - /** - * Removes whitespace at the beginning and the end of the given string. - * - * @param s is the string that should be trimmed. - * @return a trimmed copy of s. - */ - static std::string trim(const std::string &s); - - /** - * Trims the given string or vector of chars by returning the start and end - * index. - * - * @param s is the container that should be trimmed. - * @param f is a function that returns true for values that should be - * removed. - * @return start and end index. Note that "end" points at the character - * beyond the end, thus "end" minus "start" - */ - template - static std::pair trim(const T &s, Filter f) - { - size_t start = 0; - for (size_t i = 0; i < s.size(); i++) { - if (!f(s[i])) { - start = i; - break; - } - } - - size_t end = 0; - for (ssize_t i = s.size() - 1; i >= static_cast(start); i--) { - if (!f(s[i])) { - end = i + 1; - break; - } - } - - if (end < start) { - start = 0; - end = 0; - } - - return std::pair{start, end}; - } - /** * Turns the elements of a collection into a string separated by the * given delimiter. diff --git a/src/core/common/Whitespace.cpp b/src/core/common/Whitespace.cpp new file mode 100644 index 0000000..4d7c01a --- /dev/null +++ b/src/core/common/Whitespace.cpp @@ -0,0 +1,38 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "Whitespace.hpp" +#include "WhitespaceHandler.hpp" + +namespace ousia { + +std::string Utils::trim(const std::string &s) +{ + std::pair bounds = trim(s, Utils::isWhitespace); + return s.substr(bounds.first, bounds.second - bounds.first); +} + +std::string Utils::collapse(const std::string &s) +{ + CollapsingWhitespaceHandler h; + appendToWhitespaceHandler(h, s, 0); + return h.toString(); +} + +} + diff --git a/src/core/common/Whitespace.hpp b/src/core/common/Whitespace.hpp new file mode 100644 index 0000000..1e9f36a --- /dev/null +++ b/src/core/common/Whitespace.hpp @@ -0,0 +1,120 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Whitespace.hpp + * + * Contains the WhitespaceMode enum used in various places, as well es functions + * for trimming and collapsing whitespaces. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_WHITESPACE_HPP_ +#define _OUSIA_WHITESPACE_HPP_ + +#include +#include + +namespace ousia { + +/** + * Enum specifying the whitespace handling mode of the tokenizer and the + * parsers. + */ +enum class WhitespaceMode { + /** + * Preserves all whitespaces as they are found in the source file. + */ + PRESERVE, + + /** + * Trims whitespace at the beginning and the end of the found text. + */ + TRIM, + + /** + * Whitespaces are trimmed and collapsed, multiple whitespace characters + * are replaced by a single space character. + */ + COLLAPSE +}; + +/** + * Collection of functions for trimming or collapsing whitespace. + */ +class Whitespace { + /** + * Removes whitespace at the beginning and the end of the given string. + * + * @param s is the string that should be trimmed. + * @return a trimmed copy of s. + */ + static std::string trim(const std::string &s); + + /** + * Trims the given string or vector of chars by returning the start and end + * index. + * + * @param s is the container that should be trimmed. + * @param f is a function that returns true for values that should be + * removed. + * @return start and end index. Note that "end" points at the character + * beyond the end, thus "end" minus "start" + */ + template + static std::pair trim(const T &s, Filter f) + { + size_t start = 0; + for (size_t i = 0; i < s.size(); i++) { + if (!f(s[i])) { + start = i; + break; + } + } + + size_t end = 0; + for (ssize_t i = s.size() - 1; i >= static_cast(start); i--) { + if (!f(s[i])) { + end = i + 1; + break; + } + } + + if (end < start) { + start = 0; + end = 0; + } + + return std::pair{start, end}; + } + + /** + * Collapses the whitespaces in the given string (trims the string and + * replaces all whitespace characters by a single one). + * + * @param s is the string in which the whitespace should be collapsed. + * @return a copy of s with collapsed whitespace. + */ + static std::string collapse(const std::string &s); +}; + +} + +#endif /* _OUSIA_WHITESPACE_HPP_ */ + diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp new file mode 100644 index 0000000..1935c24 --- /dev/null +++ b/src/core/common/WhitespaceHandler.hpp @@ -0,0 +1,223 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file WhitespaceHandler.hpp + * + * Contains the WhitespaceHandler classes which are used in multiple places to + * trim, compact or preserve whitespaces while at the same time maintaining the + * position information associated with the input strings. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_WHITESPACE_HANDLER_HPP_ +#define _OUSIA_WHITESPACE_HANDLER_HPP_ + +#include +#include + +#include "WhitespaceHandler.hpp" + +namespace ousia { + +/** + * WhitespaceHandler is a based class that can be used to collect text on a + * character-by-character basis. Note that this class and its descendants are + * hoped to be inlined by the compiler (and used in conjunction with templates), + * thus they are fully defined inside this header. + */ +class WhitespaceHandler { +public: + /** + * Start position of the extracted text. + */ + size_t textStart; + + /** + * End position of the extracted text. + */ + size_t textEnd; + + /** + * Buffer containing the extracted text. + */ + std::vector textBuf; + + /** + * Constructor of the TextHandlerBase base class. Initializes the start and + * end position with zeros. + */ + WhitespaceHandler() : textStart(0), textEnd(0) {} + + /** + * Returns true if this whitespace handler has found any text and a text + * token could be emitted. + * + * @return true if the internal data buffer is non-empty. + */ + bool hasText() { return !textBuf.empty(); } + + /** + * Returns the content of the WhitespaceHandler as string. + */ + std::string toString() + { + return std::string(textBuf.data(), textBuf.size()); + } +}; + +/** + * The PreservingWhitespaceHandler class preserves all characters unmodified, + * including whitepace characters. + */ +class PreservingWhitespaceHandler : public WhitespaceHandler { +public: + /** + * Appends the given character to the internal text buffer, does not + * eliminate whitespace. + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + */ + void append(char c, size_t start, size_t end) + { + if (textBuf.empty()) { + textStart = start; + } + textEnd = end; + textBuf.push_back(c); + } +}; + +/** + * The TrimmingTextHandler class trims all whitespace characters at the begin + * and the end of a text section but leaves all other characters unmodified, + * including whitepace characters. + */ +class TrimmingWhitespaceHandler : public WhitespaceHandler { +public: + /** + * Buffer used internally to temporarily store all whitespace characters. + * They are only added to the output buffer if another non-whitespace + * character is reached. + */ + std::vector whitespaceBuf; + + /** + * Appends the given character to the internal text buffer, eliminates + * whitespace characters at the begin and end of the text. + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + */ + void append(char c, size_t start, size_t end) + { + // Handle whitespace characters + if (Utils::isWhitespace(c)) { + if (!textBuf.empty()) { + whitespaceBuf.push_back(c); + } + return; + } + + // Set the start and end offset correctly + if (textBuf.empty()) { + textStart = start; + } + textEnd = end; + + // Store the character + if (!whitespaceBuf.empty()) { + textBuf.insert(textBuf.end(), whitespaceBuf.begin(), + whitespaceBuf.end()); + whitespaceBuf.clear(); + } + textBuf.push_back(c); + } +}; + +/** + * The CollapsingTextHandler trims characters at the beginning and end of the + * text and reduced multiple whitespace characters to a single blank. + */ +class CollapsingWhitespaceHandler : public WhitespaceHandler { +public: + /** + * Flag set to true if a whitespace character was reached. + */ + bool hasWhitespace = false; + + /** + * Appends the given character to the internal text buffer, eliminates + * redundant whitespace characters. + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + */ + void append(char c, size_t start, size_t end) + { + // Handle whitespace characters + if (Utils::isWhitespace(c)) { + if (!textBuf.empty()) { + hasWhitespace = true; + } + return; + } + + // Set the start and end offset correctly + if (textBuf.empty()) { + textStart = start; + } + textEnd = end; + + // Store the character + if (hasWhitespace) { + textBuf.push_back(' '); + hasWhitespace = false; + } + textBuf.push_back(c); + } +}; + +/** + * Function that can be used to append the given buffer (e.g. a string or a + * vector) to the whitespace handler. + * + * @tparam WhitespaceHandler is one of the WhitespaceHandler classes. + * @tparam Buffer is an iterable type. + * @param handler is the handler to which the characters of the Buffer should be + * appended. + * @param buf is the buffer from which the characters should be read. + * @param start is the start byte offset. Each character is counted as one byte. + */ +template +inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf, + size_t start) +{ + for (auto elem : buf) { + handler.append(elem, start++); + } +} +} + +#endif /* _OUSIA_WHITESPACE_HANDLER_HPP_ */ + diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index 917f45c..6b8a916 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -32,14 +32,6 @@ TEST(Utils, isIdentifier) ASSERT_FALSE(Utils::isIdentifier("invalid key")); } -TEST(Utils, trim) -{ - ASSERT_EQ("hello world", Utils::trim("\t hello world \n\r\t")); - ASSERT_EQ("hello world", Utils::trim("hello world \n\r\t")); - ASSERT_EQ("hello world", Utils::trim(" hello world")); - ASSERT_EQ("hello world", Utils::trim("hello world")); -} - TEST(Utils, split) { ASSERT_EQ(std::vector({"ab"}), Utils::split("ab", '.')); diff --git a/test/core/common/Whitespace.cpp b/test/core/common/Whitespace.cpp new file mode 100644 index 0000000..d6df8b7 --- /dev/null +++ b/test/core/common/Whitespace.cpp @@ -0,0 +1,41 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +namespace ousia { + +TEST(Whitespace, trim) +{ + ASSERT_EQ("hello world", Whitespace::trim("\t hello world \n\r\t")); + ASSERT_EQ("hello world", Whitespace::trim("hello world \n\r\t")); + ASSERT_EQ("hello world", Whitespace::trim(" hello world")); + ASSERT_EQ("hello world", Whitespace::trim("hello world")); +} + +TEST(Whitespace, collapse) +{ + ASSERT("hello world", Whitespace::collapse(" hello \n\t\r world \n\r\t")); + ASSERT("hello world", Whitespace::collapse("hello \n\t\r world \n\r\t")); + ASSERT("hello world", Whitespace::collapse("hello \n\t\r world")); + ASSERT("hello world", Whitespace::collapse("hello world")); +} +} + -- cgit v1.2.3 From c04111da83612e942e3b6c0d624812c37228006a Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:44:16 +0100 Subject: Introduced "GenericParser" class for common code shared between parsers --- src/core/parser/generic/GenericParser.hpp | 93 ++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 2 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/generic/GenericParser.hpp b/src/core/parser/generic/GenericParser.hpp index 4f29f94..53cb982 100644 --- a/src/core/parser/generic/GenericParser.hpp +++ b/src/core/parser/generic/GenericParser.hpp @@ -33,14 +33,103 @@ #ifndef _OUSIA_GENERIC_PARSER_HPP_ #define _OUSIA_GENERIC_PARSER_HPP_ -#include +#include + +#include "ParserStateStack.hpp" +#include "ParserStateHandler.hpp" +#include "ParserState.hpp" namespace ousia { -class GenericParser : public Parser { +/** + * The abstract GenericParser class is merely a convenience class for Parsers + * which use the ParserStateStack class. It maintains a ParserStateStack + * instance and provides functions which directly forward the given data to the + * ParserStateStack. It also implements the ParserStateCallbacks inteface which + * is used by ParserStateHandlers to influence the parsing process (such as + * setting the whitespace mode or registering new entities). + */ +class GenericParser : public Parser, public ParserStateCallbacks { + +private: + /** + * Internal ParserStateStack instance. + */ + ParserStateStack stack; + +protected: + /** + * Forwards the "command" event to the ParserStateStack instance. + * + * @param name is the name of the command (including the namespace + * separator ':') and its corresponding location. Must be a string variant. + * @param args is a map variant containing the arguments that were passed to + * the command. + */ + void command(Variant name, Variant args) + { + stack.command(std::move(name), std::move(args)); + } + + /** + * Forwards the "fieldStart" event to the ParserStateStack instance. + */ + void fieldStart() + { + stack.fieldStart(); + } + + /** + * Forwards the "fieldEnd" event to the ParserStateStack instance. + */ + void fieldEnd() + { + stack.fieldEnd(); + } + + /** + * Forwards the "data" event to the ParserStateStack instance. + * + * @param data is a variant of any type containing the data that was parsed + * as data. + */ + void data(Variant data) + { + stack.data(std::move(data)); + } + /** + * Forwards the "annotationStart" event to the ParserStateStack instance. + * + * @param name is the name of the annotation class. + * @param args is a map variant containing the arguments that were passed + * to the annotation. + */ + void annotationStart(Variant name, Variant args) + { + stack.annotationStart(std::move(name), std::move(args)); + } + /** + * Forwards the "annotationEnd" event to the ParserStateStack instance. + * + * @param name is the name of the annotation class that was ended. + * @param annotationName is the name of the annotation that was ended. + */ + void annotationEnd(Variant name, Variant annotationName) + { + stack.annotationEnd(std::move(name), std::move(annotationName)); + } + /** + * Forwards the "token" call to the ParserStateStack instance. + * + * @param token is string variant containing the token that was encountered. + */ + void token(Variant token) + { + stack.token(std::move(token)); + } }; } -- cgit v1.2.3 From f5fe01547dd5f72f0332cb3553fbaf4ad5d8e196 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:45:13 +0100 Subject: Header for Callbacks that can be directed from a ParserHandler to the parser in order to control the parsing process --- src/core/parser/generic/ParserStateCallbacks.cpp | 24 ++++++ src/core/parser/generic/ParserStateCallbacks.hpp | 96 ++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 src/core/parser/generic/ParserStateCallbacks.cpp create mode 100644 src/core/parser/generic/ParserStateCallbacks.hpp (limited to 'src/core') diff --git a/src/core/parser/generic/ParserStateCallbacks.cpp b/src/core/parser/generic/ParserStateCallbacks.cpp new file mode 100644 index 0000000..e684ee4 --- /dev/null +++ b/src/core/parser/generic/ParserStateCallbacks.cpp @@ -0,0 +1,24 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +namespace ousia { + +} + diff --git a/src/core/parser/generic/ParserStateCallbacks.hpp b/src/core/parser/generic/ParserStateCallbacks.hpp new file mode 100644 index 0000000..c2d7cb3 --- /dev/null +++ b/src/core/parser/generic/ParserStateCallbacks.hpp @@ -0,0 +1,96 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file ParserStateCallbacks.hpp + * + * Contains an interface defining the callbacks that can be directed from a + * ParserStateHandler to the ParserStateStack, and from the ParserStateStack to + * the actual parser. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STATE_CALLBACKS_HPP_ +#define _OUSIA_PARSER_STATE_CALLBACKS_HPP_ + +#include + +#include + +namespace ousia { + +/** + * Interface defining a set of callback functions that can be directed from a + * ParserStateHandler to the ParserStateStack and form the ParserStateStack + * to the actual parser. + */ +class ParserStateCallbacks { +public: + /** + * Sets the whitespace mode that specifies how (string data) should be + * processed. + * + * @param whitespaceMode specifies one of the three WhitespaceMode constants + * PRESERVE, TRIM or COLLAPSE. + */ + virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0; + + /** + * Sets the type as which the variant data should be parsed. + * + * @param type is one of the VariantType constants, specifying with which + * type the data that is passed to the ParserStateHandler in the "data" + * function should be handled. + */ + virtual void setDataType(VariantType type) = 0; + + /** + * Checks whether the given token is supported by the parser. The parser + * returns true, if the token is supported, false if this token cannot be + * registered. Note that parsers that do not support the registration of + * tokens at all should always return "true". + * + * @param token is the token that should be checked for support. + * @return true if the token is generally supported (or the parser does not + * support registering tokens at all), false if the token is not supported, + * because e.g. it is a reserved token or it interferes with other tokens. + */ + virtual bool supportsToken(const std::string &token) = 0; + + /** + * Registers the given token as token that should be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be reported. + */ + virtual void registerToken(const std::string &token) = 0; + + /** + * Unregisters the given token, it will no longer be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be unregistered. + */ + virtual void unregisterToken(const std::string &token) = 0; +}; + +} + +#endif /* _OUSIA_PARSER_STATE_CALLBACKS_HPP_ */ + -- cgit v1.2.3 From 22c61026bf39c32a71cc3fbe76e1454c89a27c17 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:45:47 +0100 Subject: Moved code for the "Handler" class to own unit --- src/core/parser/generic/ParserStateHandler.cpp | 77 +++++++++ src/core/parser/generic/ParserStateHandler.hpp | 230 +++++++++++++++++++++++++ 2 files changed, 307 insertions(+) create mode 100644 src/core/parser/generic/ParserStateHandler.cpp create mode 100644 src/core/parser/generic/ParserStateHandler.hpp (limited to 'src/core') diff --git a/src/core/parser/generic/ParserStateHandler.cpp b/src/core/parser/generic/ParserStateHandler.cpp new file mode 100644 index 0000000..96b9217 --- /dev/null +++ b/src/core/parser/generic/ParserStateHandler.cpp @@ -0,0 +1,77 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "ParserStateHandler.hpp" + +namespace ousia { + +/* Class ParserStatedata */ + +ParserStatedata::ParserStatedata(ParserContext &ctx, std::string name, + const ParserState &state, + const ParserState &parentState, + const SourceLocation location) + : ctx(ctx), + name(std::move(name)), + state(state), + parentState(parentState), + location(location){}; + +/* Class ParserStateHandler */ + +ParserStateHandler::ParserStateHandler(const ParserStatedata &data) : data(data) +{ +} + +ParserContext &ParserStateHandler::context() { return data.ctx; } + +const std::string &ParserStateHandler::name() { return data.name; } + +ParserScope &ParserStateHandler::scope() { return data.ctx.getScope(); } + +Manager &ParserStateHandler::manager() { return data.ctx.getManager(); } + +Logger &ParserStateHandler::logger() { return data.ctx.getLogger(); } + +Rooted ParserStateHandler::project() { return data.ctx.getProject(); } + +const ParserState &ParserStateHandler::state() { return data.state; } + +SourceLocation ParserStateHandler::location() { return data.location; } + +void ParserStateHandler::data(const std::string &data, int field) +{ + if (Utils::hasNonWhitepaceChar(data)) { + logger().error("Expected command but found character data."); + } +} + +/* Class DefaultParserStateHandler */ + +void DefaultParserStateHandler::start(Variant::mapType &args) {} + +void DefaultParserStateHandler::end() {} + +ParserStateHandler *DefaultParserStateHandler::create(const data &data) +{ + return new DefaultHandler{data}; +} +} + diff --git a/src/core/parser/generic/ParserStateHandler.hpp b/src/core/parser/generic/ParserStateHandler.hpp new file mode 100644 index 0000000..35ad6eb --- /dev/null +++ b/src/core/parser/generic/ParserStateHandler.hpp @@ -0,0 +1,230 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef _OUSIA_PARSER_STATE_HANDLER_HPP_ +#define _OUSIA_PARSER_STATE_HANDLER_HPP_ + +#include +#include + +#include + +namespace ousia { + +// Forward declarations +class ParserContext; +class ParserState; +class ParserStateCallbacks; + +/** + * Class collecting all the data that is being passed to a ParserStateHandler + * instance. + */ +class ParserStateHandlerData { +public: + /** + * Reference to the ParserContext instance that should be used to resolve + * references to nodes in the Graph. + */ + ParserContext &ctx; + + /** + * Contains the name of the tag that is being handled. + */ + const std::string name; + + /** + * Contains the current state of the state machine. + */ + const ParserState &state; + + /** + * Contains the state of the state machine when the parent node was handled. + */ + const ParserState &parentState; + + /** + * Current source code location. + */ + const SourceLocation location; + + /** + * Constructor of the HandlerData class. + * + * @param ctx is the parser context the handler should be executed in. + * @param name is the name of the string. + * @param state is the state this handler was called for. + * @param parentState is the state of the parent command. + * @param location is the location at which the handler is created. + */ + ParserStateHandlerData(ParserContext &ctx, std::string name, + const ParserState &state, + const ParserState &parentState, + const SourceLocation location); +}; + +/** + * The handler class provides a context for handling an XML tag. It has to be + * overridden and registered in the StateStack class to form handlers for + * concrete XML tags. + */ +class ParserStateHandler { +private: + /** + * Structure containing the internal handler data. + */ + const ParserStateHandlerData data; + +protected: + /** + * Constructor of the Handler class. + * + * @param data is a structure containing all data being passed to the + * handler. + */ + ParserStateHandler(const ParserStateHandlerData &data){}; + +public: + /** + * Virtual destructor. + */ + virtual ~Handler(){}; + + /** + * Returns a reference at the ParserContext. + * + * @return a reference at the ParserContext. + */ + ParserContext &context() { return handlerData.ctx; } + + /** + * Returns the command name for which the handler was created. + * + * @return a const reference at the command name. + */ + const std::string &name() { return handlerData.name; } + + /** + * Returns a reference at the ParserScope instance. + * + * @return a reference at the ParserScope instance. + */ + ParserScope &scope() { return handlerData.ctx.getScope(); } + + /** + * Returns a reference at the Manager instance which manages all nodes. + * + * @return a referance at the Manager instance. + */ + Manager &manager() { return handlerData.ctx.getManager(); } + + /** + * Returns a reference at the Logger instance used for logging error + * messages. + * + * @return a reference at the Logger instance. + */ + Logger &logger() { return handlerData.ctx.getLogger(); } + + /** + * Returns a reference at the Project Node, representing the project into + * which the file is currently being parsed. + * + * @return a referance at the Project Node. + */ + Rooted project() { return handlerData.ctx.getProject(); } + + /** + * Reference at the ParserState descriptor for which this Handler was + * created. + * + * @return a const reference at the constructing ParserState descriptor. + */ + const ParserState &state() { return handlerData.state; } + + /** + * Reference at the ParserState descriptor of the parent state of the state + * for which this Handler was created. Set to ParserStates::None if there + * is no parent state. + * + * @return a const reference at the parent state of the constructing + * ParserState descriptor. + */ + const ParserState &parentState() { return handlerData.parentState; } + + /** + * Returns the current location in the source file. + * + * @return the current location in the source file. + */ + SourceLocation location() { return handlerData.location; } + + /** + * Called when the command that was specified in the constructor is + * instanciated. + * + * @param args is a map from strings to variants (argument name and value). + */ + virtual void start(Variant::mapType &args) = 0; + + /** + * Called whenever the command for which this handler is defined ends. + */ + virtual void end() = 0; + + /** + * Called whenever raw data (int the form of a string) is available for the + * Handler instance. In the default handler an exception is raised if the + * received data contains non-whitespace characters. + * + * @param data is a pointer at the character data that is available for the + * Handler instance. + * @param field is the field number (the interpretation of this value + * depends on the format that is being parsed). + */ + virtual void data(const std::string &data, int field); +}; + +/** + * HandlerConstructor is a function pointer type used to create concrete + * instances of the Handler class. + * + * @param handlerData is the data that should be passed to the new handler + * instance. + * @return a newly created handler instance. + */ +using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); + +/** + * The DefaultHandler class is used in case no element handler is specified in + * the ParserState descriptor. + */ +class DefaultParserStateHandler : public ParserStateHandler { +public: + using ParserStateHandler::ParserStateHandler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData); +}; +} + +#endif /* _OUSIA_PARSER_STATE_HANDLER_HPP_ */ + -- cgit v1.2.3 From 6df10a61562fa4be7c1c13e7457341571fa6139d Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:46:10 +0100 Subject: Renamed ParserStateStack to ParserStack --- src/core/parser/generic/ParserStateStack.cpp | 53 ++----- src/core/parser/generic/ParserStateStack.hpp | 206 ++------------------------- 2 files changed, 26 insertions(+), 233 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/generic/ParserStateStack.cpp b/src/core/parser/generic/ParserStateStack.cpp index 1265851..8c32f17 100644 --- a/src/core/parser/generic/ParserStateStack.cpp +++ b/src/core/parser/generic/ParserStateStack.cpp @@ -23,40 +23,11 @@ #include #include "ParserScope.hpp" -#include "ParserStack.hpp" +#include "ParserStateStack.hpp" namespace ousia { -/* A default handler */ - -/** - * The DefaultHandler class is used in case no element handler is specified in - * the ParserState descriptor. - */ -class DefaultHandler : public Handler { -public: - using Handler::Handler; - - void start(Variant::mapType &args) override {} - - void end() override {} - - static Handler *create(const HandlerData &handlerData) - { - return new DefaultHandler{handlerData}; - } -}; - -/* Class Handler */ - -void Handler::data(const std::string &data, int field) -{ - if (Utils::hasNonWhitepaceChar(data)) { - logger().error("Expected command but found character data."); - } -} - -/* Class ParserStack */ +/* Class ParserStateStack */ /** * Returns an Exception that should be thrown when a currently invalid command @@ -79,14 +50,14 @@ static LoggableException InvalidCommand(const std::string &name, } } -ParserStack::ParserStack( +ParserStateStack::ParserStateStack( ParserContext &ctx, const std::multimap &states) : ctx(ctx), states(states) { } -bool ParserStack::deduceState() +bool ParserStateStack::deduceState() { // Assemble all states std::vector states; @@ -113,7 +84,7 @@ bool ParserStack::deduceState() return true; } -std::set ParserStack::expectedCommands() +std::set ParserStateStack::expectedCommands() { const ParserState *currentState = &(this->currentState()); std::set res; @@ -125,17 +96,17 @@ std::set ParserStack::expectedCommands() return res; } -const ParserState &ParserStack::currentState() +const ParserState &ParserStateStack::currentState() { return stack.empty() ? ParserStates::None : stack.top()->state(); } -std::string ParserStack::currentCommandName() +std::string ParserStateStack::currentCommandName() { return stack.empty() ? std::string{} : stack.top()->name(); } -const ParserState *ParserStack::findTargetState(const std::string &name) +const ParserState *ParserStateStack::findTargetState(const std::string &name) { const ParserState *currentState = &(this->currentState()); auto range = states.equal_range(name); @@ -149,7 +120,7 @@ const ParserState *ParserStack::findTargetState(const std::string &name) return nullptr; } -void ParserStack::start(const std::string &name, Variant::mapType &args, +void ParserStateStack::start(const std::string &name, Variant::mapType &args, const SourceLocation &location) { ParserState const *targetState = findTargetState(name); @@ -180,14 +151,14 @@ void ParserStack::start(const std::string &name, Variant::mapType &args, stack.emplace(handler); } -void ParserStack::start(std::string name, const Variant::mapType &args, +void ParserStateStack::start(std::string name, const Variant::mapType &args, const SourceLocation &location) { Variant::mapType argsCopy(args); start(name, argsCopy); } -void ParserStack::end() +void ParserStateStack::end() { // Check whether the current command could be ended if (stack.empty()) { @@ -202,7 +173,7 @@ void ParserStack::end() inst->end(); } -void ParserStack::data(const std::string &data, int field) +void ParserStateStack::data(const std::string &data, int field) { // Check whether there is any command the data can be sent to if (stack.empty()) { diff --git a/src/core/parser/generic/ParserStateStack.hpp b/src/core/parser/generic/ParserStateStack.hpp index efc4e4a..68c4026 100644 --- a/src/core/parser/generic/ParserStateStack.hpp +++ b/src/core/parser/generic/ParserStateStack.hpp @@ -17,17 +17,17 @@ */ /** - * @file ParserStack.hpp + * @file ParserStateStack.hpp * - * Helper classes for document or description parsers. Contains the ParserStack - * class, which is an pushdown automaton responsible for accepting commands in - * the correct order and calling specified handlers. + * Helper classes for document or description parsers. Contains the + * ParserStateStack class, which is an pushdown automaton responsible for + * accepting commands in the correct order and calling specified handlers. * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ -#ifndef _OUSIA_PARSER_STACK_HPP_ -#define _OUSIA_PARSER_STACK_HPP_ +#ifndef _OUSIA_PARSER_STATE_STACK_HPP_ +#define _OUSIA_PARSER_STATE_STACK_HPP_ #include @@ -48,189 +48,10 @@ namespace ousia { /** - * Struct collecting all the data that is being passed to a Handler instance. - */ -struct HandlerData { - /** - * Reference to the ParserContext instance that should be used to resolve - * references to nodes in the Graph. - */ - ParserContext &ctx; - - /** - * Contains the name of the tag that is being handled. - */ - const std::string name; - - /** - * Contains the current state of the state machine. - */ - const ParserState &state; - - /** - * Contains the state of the state machine when the parent node was handled. - */ - const ParserState &parentState; - - /** - * Current source code location. - */ - const SourceLocation location; - - /** - * Constructor of the HandlerData class. - * - * @param ctx is the parser context the handler should be executed in. - * @param name is the name of the string. - * @param state is the state this handler was called for. - * @param parentState is the state of the parent command. - * @param location is the location at which the handler is created. - */ - HandlerData(ParserContext &ctx, std::string name, const ParserState &state, - const ParserState &parentState, const SourceLocation location) - : ctx(ctx), - name(std::move(name)), - state(state), - parentState(parentState), - location(location){}; -}; - -/** - * The handler class provides a context for handling an XML tag. It has to be - * overridden and registered in the StateStack class to form handlers for - * concrete XML tags. - */ -class Handler { -private: - /** - * Structure containing the internal handler data. - */ - const HandlerData handlerData; - -public: - /** - * Constructor of the Handler class. - * - * @param data is a structure containing all data being passed to the - * handler. - */ - Handler(const HandlerData &handlerData) : handlerData(handlerData){}; - - /** - * Virtual destructor. - */ - virtual ~Handler(){}; - - /** - * Returns a reference at the ParserContext. - * - * @return a reference at the ParserContext. - */ - ParserContext &context() { return handlerData.ctx; } - - /** - * Returns the command name for which the handler was created. - * - * @return a const reference at the command name. - */ - const std::string &name() { return handlerData.name; } - - /** - * Returns a reference at the ParserScope instance. - * - * @return a reference at the ParserScope instance. - */ - ParserScope &scope() { return handlerData.ctx.getScope(); } - - /** - * Returns a reference at the Manager instance which manages all nodes. - * - * @return a referance at the Manager instance. - */ - Manager &manager() { return handlerData.ctx.getManager(); } - - /** - * Returns a reference at the Logger instance used for logging error - * messages. - * - * @return a reference at the Logger instance. - */ - Logger &logger() { return handlerData.ctx.getLogger(); } - - /** - * Returns a reference at the Project Node, representing the project into - * which the file is currently being parsed. - * - * @return a referance at the Project Node. - */ - Rooted project() { return handlerData.ctx.getProject(); } - - /** - * Reference at the ParserState descriptor for which this Handler was - * created. - * - * @return a const reference at the constructing ParserState descriptor. - */ - const ParserState &state() { return handlerData.state; } - - /** - * Reference at the ParserState descriptor of the parent state of the state - * for which this Handler was created. Set to ParserStates::None if there - * is no parent state. - * - * @return a const reference at the parent state of the constructing - * ParserState descriptor. - */ - const ParserState &parentState() { return handlerData.parentState; } - - /** - * Returns the current location in the source file. - * - * @return the current location in the source file. - */ - SourceLocation location() { return handlerData.location; } - - /** - * Called when the command that was specified in the constructor is - * instanciated. - * - * @param args is a map from strings to variants (argument name and value). - */ - virtual void start(Variant::mapType &args) = 0; - - /** - * Called whenever the command for which this handler is defined ends. - */ - virtual void end() = 0; - - /** - * Called whenever raw data (int the form of a string) is available for the - * Handler instance. In the default handler an exception is raised if the - * received data contains non-whitespace characters. - * - * @param data is a pointer at the character data that is available for the - * Handler instance. - * @param field is the field number (the interpretation of this value - * depends on the format that is being parsed). - */ - virtual void data(const std::string &data, int field); -}; - -/** - * HandlerConstructor is a function pointer type used to create concrete - * instances of the Handler class. - * - * @param handlerData is the data that should be passed to the new handler - * instance. - * @return a newly created handler instance. - */ -using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); - -/** - * The ParserStack class is a pushdown automaton responsible for turning a + * The ParserStateStack class is a pushdown automaton responsible for turning a * command stream into a tree of Node instances. */ -class ParserStack { +class ParserStateStack { private: /** * Reference at the parser context. @@ -269,14 +90,15 @@ private: public: /** - * Creates a new instance of the ParserStack class. + * Creates a new instance of the ParserStateStack class. * * @param ctx is the parser context the parser stack is working on. * @param states is a map containing the command names and pointers at the * corresponding ParserState instances. */ - ParserStack(ParserContext &ctx, - const std::multimap &states); + ParserStateStack( + ParserContext &ctx, + const std::multimap &states); /** * Tries to reconstruct the parser state from the Scope instance of the @@ -293,7 +115,7 @@ public: bool deduceState(); /** - * Returns the state the ParserStack instance currently is in. + * Returns the state the ParserStateStack instance currently is in. * * @return the state of the currently active Handler instance or STATE_NONE * if no handler is on the stack. @@ -357,5 +179,5 @@ public: }; } -#endif /* _OUSIA_PARSER_STACK_HPP_ */ +#endif /* _OUSIA_PARSER_STATE_STACK_HPP_ */ -- cgit v1.2.3 From 65bbbd778f6e0a3668c859b0e22cced7075a726d Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:47:11 +0100 Subject: Moved DynamicTokenizer and TokenTrie to parser/utils --- src/core/parser/utils/TokenTrie.cpp | 119 +++++++++ src/core/parser/utils/TokenTrie.hpp | 150 +++++++++++ src/core/parser/utils/Tokenizer.cpp | 381 ++++++++++++++++++++++++++ src/core/parser/utils/Tokenizer.hpp | 231 ++++++++++++++++ src/formats/osdm/DynamicTokenizer.cpp | 381 -------------------------- src/formats/osdm/DynamicTokenizer.hpp | 231 ---------------- src/formats/osdm/TokenTrie.cpp | 119 --------- src/formats/osdm/TokenTrie.hpp | 150 ----------- test/core/parser/utils/TokenTrieTest.cpp | 92 +++++++ test/core/parser/utils/TokenizerTest.cpp | 415 +++++++++++++++++++++++++++++ test/formats/osdm/DynamicTokenizerTest.cpp | 415 ----------------------------- test/formats/osdm/TokenTrieTest.cpp | 92 ------- 12 files changed, 1388 insertions(+), 1388 deletions(-) create mode 100644 src/core/parser/utils/TokenTrie.cpp create mode 100644 src/core/parser/utils/TokenTrie.hpp create mode 100644 src/core/parser/utils/Tokenizer.cpp create mode 100644 src/core/parser/utils/Tokenizer.hpp delete mode 100644 src/formats/osdm/DynamicTokenizer.cpp delete mode 100644 src/formats/osdm/DynamicTokenizer.hpp delete mode 100644 src/formats/osdm/TokenTrie.cpp delete mode 100644 src/formats/osdm/TokenTrie.hpp create mode 100644 test/core/parser/utils/TokenTrieTest.cpp create mode 100644 test/core/parser/utils/TokenizerTest.cpp delete mode 100644 test/formats/osdm/DynamicTokenizerTest.cpp delete mode 100644 test/formats/osdm/TokenTrieTest.cpp (limited to 'src/core') diff --git a/src/core/parser/utils/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp new file mode 100644 index 0000000..4a0430b --- /dev/null +++ b/src/core/parser/utils/TokenTrie.cpp @@ -0,0 +1,119 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "TokenTrie.hpp" + +namespace ousia { + +/* Class DynamicTokenTree::Node */ + +TokenTrie::Node::Node() : type(EmptyToken) {} + +/* Class DynamicTokenTree */ + +bool TokenTrie::registerToken(const std::string &token, + TokenTypeId type) noexcept +{ + // Abort if the token is empty -- this would taint the root node + if (token.empty()) { + return false; + } + + // Iterate over each character in the given string and insert them as + // (new) nodes + Node *node = &root; + for (size_t i = 0; i < token.size(); i++) { + // Insert a new node if this one does not exist + const char c = token[i]; + auto it = node->children.find(c); + if (it == node->children.end()) { + it = node->children.emplace(c, std::make_shared()).first; + } + node = it->second.get(); + } + + // If the resulting node already has a type set, we're screwed. + if (node->type != EmptyToken) { + return false; + } + + // Otherwise just set the type to the given type. + node->type = type; + return true; +} + +bool TokenTrie::unregisterToken(const std::string &token) noexcept +{ + // We cannot remove empty tokens as we need to access the fist character + // upfront + if (token.empty()) { + return false; + } + + // First pass -- search the node in the path that can be deleted + Node *subtreeRoot = &root; + char subtreeKey = token[0]; + Node *node = &root; + for (size_t i = 0; i < token.size(); i++) { + // Go to the next node, abort if the tree ends unexpectedly + auto it = node->children.find(token[i]); + if (it == node->children.end()) { + return false; + } + + // Reset the subtree handler if this node has another type + node = it->second.get(); + if ((node->type != EmptyToken || node->children.size() > 1) && + (i + 1 != token.size())) { + subtreeRoot = node; + subtreeKey = token[i + 1]; + } + } + + // If the node type is already EmptyToken, we cannot do anything here + if (node->type == EmptyToken) { + return false; + } + + // If the target node has children, we cannot delete the subtree. Set the + // type to EmptyToken instead + if (!node->children.empty()) { + node->type = EmptyToken; + return true; + } + + // If we end up here, we can safely delete the complete subtree + subtreeRoot->children.erase(subtreeKey); + return true; +} + +TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept +{ + Node const *node = &root; + for (size_t i = 0; i < token.size(); i++) { + const char c = token[i]; + auto it = node->children.find(c); + if (it == node->children.end()) { + return EmptyToken; + } + node = it->second.get(); + } + return node->type; +} +} + diff --git a/src/core/parser/utils/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp new file mode 100644 index 0000000..36c2ffa --- /dev/null +++ b/src/core/parser/utils/TokenTrie.hpp @@ -0,0 +1,150 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file TokenTrie.hpp + * + * Class representing a token trie that can be updated dynamically. + * + * @author Benjamin Paaßen (astoecke@techfak.uni-bielefeld.de) + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_TOKEN_TRIE_HPP_ +#define _OUSIA_TOKEN_TRIE_HPP_ + +#include +#include +#include +#include + +namespace ousia { + +/** + * The TokenTypeId is used to give each token type a unique id. + */ +using TokenTypeId = uint32_t; + +/** + * Token which is not a token. + */ +constexpr TokenTypeId EmptyToken = std::numeric_limits::max(); + +/** + * Token which represents a text token. + */ +constexpr TokenTypeId TextToken = std::numeric_limits::max() - 1; + +/** + * The Tokenizer internally uses a TokenTrie to be efficiently able to identify + * the longest consecutive token in the text. This is equivalent to a prefix + * trie. + * + * A token trie is a construct that structures all special tokens a Tokenizer + * recognizes. Consider the tokens "aab", "a" and "bac" numbered as one, two and + * three. Then the token tree would look like this: + * + * \code{*.txt} + * ~ (0) + * / \ + * a (2) b (0) + * | | + * a (0) a (0) + * | | + * b (1) c (0) + * \endcode + * + * Where the number indicates the corresponding token descriptor identifier. + */ +class TokenTrie { +public: + /** + * Structure used to build the node tree. + */ + struct Node { + /** + * Type used for the child map. + */ + using ChildMap = std::unordered_map>; + + /** + * Map from single characters at the corresponding child nodes. + */ + ChildMap children; + + /** + * Reference at the corresponding token descriptor. Set to nullptr if + * no token is attached to this node. + */ + TokenTypeId type; + + /** + * Default constructor, initializes the descriptor with nullptr. + */ + Node(); + }; + +private: + /** + * Root node of the internal token tree. + */ + Node root; + +public: + /** + * Registers a token containing the given string. Returns false if the + * token already exists, true otherwise. + * + * @param token is the character sequence that should be registered as + * token. + * @param type is the descriptor that should be set for this token. + * @return true if the operation is successful, false otherwise. + */ + bool registerToken(const std::string &token, TokenTypeId type) noexcept; + + /** + * Unregisters the token from the token tree. Returns true if the token was + * unregistered successfully, false otherwise. + * + * @param token is the character sequence that should be unregistered. + * @return true if the operation was successful, false otherwise. + */ + bool unregisterToken(const std::string &token) noexcept; + + /** + * Returns true, if the given token exists within the TokenTree. This + * function is mostly thought for debugging and unit testing. + * + * @param token is the character sequence that should be searched. + * @return the attached token descriptor or nullptr if the given token is + * not found. + */ + TokenTypeId hasToken(const std::string &token) const noexcept; + + /** + * Returns a reference at the root node to be used for traversing the token + * tree. + * + * @return a reference at the root node. + */ + const Node *getRoot() const noexcept { return &root; } +}; +} + +#endif /* _OUSIA_TOKEN_TRIE_HPP_ */ + diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp new file mode 100644 index 0000000..1fac25a --- /dev/null +++ b/src/core/parser/utils/Tokenizer.cpp @@ -0,0 +1,381 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include + +#include +#include +#include +#include + +#include "DynamicTokenizer.hpp" + +namespace ousia { + +namespace { + +/* Internal class TokenMatch */ + +/** + * Contains information about a matching token. + */ +struct TokenMatch { + /** + * Token that was matched. + */ + DynamicToken token; + + /** + * Current length of the data within the text handler. The text buffer needs + * to be trimmed to this length if this token matches. + */ + size_t textLength; + + /** + * End location of the current text handler. This location needs to be used + * for the text token that is emitted before the actual token. + */ + size_t textEnd; + + /** + * Constructor of the TokenMatch class. + */ + TokenMatch() : textLength(0), textEnd(0) {} + + /** + * Returns true if this TokenMatch instance actually represents a match. + */ + bool hasMatch() { return token.type != EmptyToken; } +}; + +/* Internal class TokenLookup */ + +/** + * The TokenLookup class is used to represent a thread in a running token + * lookup. + */ +class TokenLookup { +private: + /** + * Current node within the token trie. + */ + TokenTrie::Node const *node; + + /** + * Start offset within the source file. + */ + size_t start; + + /** + * Current length of the data within the text handler. The text buffer needs + * to be trimmed to this length if this token matches. + */ + size_t textLength; + + /** + * End location of the current text handler. This location needs to be used + * for the text token that is emitted before the actual token. + */ + size_t textEnd; + +public: + /** + * Constructor of the TokenLookup class. + * + * @param node is the current node. + * @param start is the start position. + * @param textLength is the text buffer length of the previous text token. + * @param textEnd is the current end location of the previous text token. + */ + TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength, + size_t textEnd) + : node(node), start(start), textLength(textLength), textEnd(textEnd) + { + } + + /** + * Tries to extend the current path in the token trie with the given + * character. If a complete token is matched, stores this match in the + * tokens list (in case it is longer than any previous token). + * + * @param c is the character that should be appended to the current prefix. + * @param lookups is a list to which new TokeLookup instances are added -- + * which could potentially be expanded in the next iteration. + * @param match is the DynamicToken instance to which the matching token + * should be written. + * @param tokens is a reference at the internal token list of the + * DynamicTokenizer. + * @param end is the end byte offset of the current character. + * @param sourceId is the source if of this file. + */ + void advance(char c, std::vector &lookups, TokenMatch &match, + const std::vector &tokens, SourceOffset end, + SourceId sourceId) + { + // Check whether we can continue the current token path with the given + // character without visiting an already visited node + auto it = node->children.find(c); + if (it == node->children.end()) { + return; + } + + // Check whether the new node represents a complete token a whether it + // is longer than the current token. If yes, replace the current token. + node = it->second.get(); + if (node->type != EmptyToken) { + const std::string &str = tokens[node->type]; + size_t len = str.size(); + if (len > match.token.content.size()) { + match.token = + DynamicToken{node->type, str, {sourceId, start, end}}; + match.textLength = textLength; + match.textEnd = textEnd; + } + } + + // If this state can possibly be advanced, store it in the states list. + if (!node->children.empty()) { + lookups.emplace_back(*this); + } + } +}; + +/** + * Transforms the given token into a text token containing the extracted + * text. + * + * @param handler is the WhitespaceHandler containing the collected data. + * @param token is the output token to which the text should be written. + * @param sourceId is the source id of the underlying file. + */ +static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match, + SourceId sourceId) +{ + if (match.hasMatch()) { + match.token.content = + std::string{handler.textBuf.data(), match.textLength}; + match.token.location = + SourceLocation{sourceId, handler.textStart, match.textEnd}; + } else { + match.token.content = handler.toString(); + match.token.location = + SourceLocation{sourceId, handler.textStart, handler.textEnd}; + } + match.token.type = TextToken; +} +} + +/* Class DynamicTokenizer */ + +DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode) + : whitespaceMode(whitespaceMode), nextTokenTypeId(0) +{ +} + +template +bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token) +{ + // If we're in the read mode, reset the char reader peek position to the + // current read position + if (read) { + reader.resetPeek(); + } + + // Prepare the lookups in the token trie + const TokenTrie::Node *root = trie.getRoot(); + TokenMatch match; + std::vector lookups; + std::vector nextLookups; + + // Instantiate the text handler + TextHandler textHandler; + + // Peek characters from the reader and try to advance the current token tree + // cursor + char c; + size_t charStart = reader.getPeekOffset(); + const SourceId sourceId = reader.getSourceId(); + while (reader.peek(c)) { + const size_t charEnd = reader.getPeekOffset(); + const size_t textLength = textHandler.textBuf.size(); + const size_t textEnd = textHandler.textEnd; + + // If we do not have a match yet, start a new lookup from the root + if (!match.hasMatch()) { + TokenLookup{root, charStart, textLength, textEnd}.advance( + c, nextLookups, match, tokens, charEnd, sourceId); + } + + // Try to advance all other lookups with the new character + for (TokenLookup &lookup : lookups) { + lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId); + } + + // We have found a token and there are no more states to advance or the + // text handler has found something -- abort to return the new token + if (match.hasMatch()) { + if ((nextLookups.empty() || textHandler.hasText())) { + break; + } + } else { + // Record all incomming characters + textHandler.append(c, charStart, charEnd); + } + + // Swap the lookups and the nextLookups list + lookups = std::move(nextLookups); + nextLookups.clear(); + + // Advance the offset + charStart = charEnd; + } + + // If we found text, emit that text + if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) { + buildTextToken(textHandler, match, sourceId); + } + + // Move the read/peek cursor to the end of the token, abort if an error + // happens while doing so + if (match.hasMatch()) { + // Make sure we have a valid location + if (match.token.location.getEnd() == InvalidSourceOffset) { + throw OusiaException{"Token end position offset out of range"}; + } + + // Seek to the end of the current token + const size_t end = match.token.location.getEnd(); + if (read) { + reader.seek(end); + } else { + reader.seekPeekCursor(end); + } + token = match.token; + } else { + token = DynamicToken{}; + } + return match.hasMatch(); +} + +bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token) +{ + switch (whitespaceMode) { + case WhitespaceMode::PRESERVE: + return next(reader, token); + case WhitespaceMode::TRIM: + return next(reader, token); + case WhitespaceMode::COLLAPSE: + return next(reader, token); + } + return false; +} + +bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token) +{ + switch (whitespaceMode) { + case WhitespaceMode::PRESERVE: + return next(reader, token); + case WhitespaceMode::TRIM: + return next(reader, token); + case WhitespaceMode::COLLAPSE: + return next(reader, token); + } + return false; +} + +TokenTypeId DynamicTokenizer::registerToken(const std::string &token) +{ + // Abort if an empty token should be registered + if (token.empty()) { + return EmptyToken; + } + + // Search for a new slot in the tokens list + TokenTypeId type = EmptyToken; + for (size_t i = nextTokenTypeId; i < tokens.size(); i++) { + if (tokens[i].empty()) { + tokens[i] = token; + type = i; + break; + } + } + + // No existing slot was found, add a new one -- make sure we do not + // override the special token type handles + if (type == EmptyToken) { + type = tokens.size(); + if (type == TextToken || type == EmptyToken) { + throw OusiaException{"Token type ids depleted!"}; + } + tokens.emplace_back(token); + } + nextTokenTypeId = type + 1; + + // Try to register the token in the trie -- if this fails, remove it + // from the tokens list + if (!trie.registerToken(token, type)) { + tokens[type] = std::string{}; + nextTokenTypeId = type; + return EmptyToken; + } + return type; +} + +bool DynamicTokenizer::unregisterToken(TokenTypeId type) +{ + // Unregister the token from the trie, abort if an invalid type is given + if (type < tokens.size() && trie.unregisterToken(tokens[type])) { + tokens[type] = std::string{}; + nextTokenTypeId = type; + return true; + } + return false; +} + +std::string DynamicTokenizer::getTokenString(TokenTypeId type) +{ + if (type < tokens.size()) { + return tokens[type]; + } + return std::string{}; +} + +void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode) +{ + whitespaceMode = mode; +} + +WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; } + +/* Explicitly instantiate all possible instantiations of the "next" member + function */ +template bool DynamicTokenizer::next( + CharReader &reader, DynamicToken &token); +template bool DynamicTokenizer::next( + CharReader &reader, DynamicToken &token); +template bool DynamicTokenizer::next( + CharReader &reader, DynamicToken &token); +template bool DynamicTokenizer::next( + CharReader &reader, DynamicToken &token); +template bool DynamicTokenizer::next( + CharReader &reader, DynamicToken &token); +template bool DynamicTokenizer::next( + CharReader &reader, DynamicToken &token); +} + diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp new file mode 100644 index 0000000..3e5aeb3 --- /dev/null +++ b/src/core/parser/utils/Tokenizer.hpp @@ -0,0 +1,231 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file DynamicTokenizer.hpp + * + * Tokenizer that can be reconfigured at runtime used for parsing the plain + * text format. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_ +#define _OUSIA_DYNAMIC_TOKENIZER_HPP_ + +#include +#include +#include + +#include +#include + +#include "TokenTrie.hpp" + +namespace ousia { + +// Forward declarations +class CharReader; + +/** + * The DynamicToken structure describes a token discovered by the Tokenizer. + */ +struct DynamicToken { + /** + * Id of the type of this token. + */ + TokenTypeId type; + + /** + * String that was matched. + */ + std::string content; + + /** + * Location from which the string was extracted. + */ + SourceLocation location; + + /** + * Default constructor. + */ + DynamicToken() : type(EmptyToken) {} + + /** + * Constructor of the DynamicToken struct. + * + * @param id represents the token type. + * @param content is the string content that has been extracted. + * @param location is the location of the extracted string content in the + * source file. + */ + DynamicToken(TokenTypeId type, const std::string &content, + SourceLocation location) + : type(type), content(content), location(location) + { + } + + /** + * Constructor of the DynamicToken struct, only initializes the token type + * + * @param type is the id corresponding to the type of the token. + */ + DynamicToken(TokenTypeId type) : type(type) {} + + /** + * The getLocation function allows the tokens to be directly passed as + * parameter to Logger or LoggableException instances. + * + * @return a reference at the location field + */ + const SourceLocation &getLocation() const { return location; } +}; + +/** + * The DynamicTokenizer is used to extract tokens and chunks of text from a + * CharReader. It allows to register and unregister tokens while parsing and + * to modify the handling of whitespace characters. Note that the + * DynamicTokenizer always tries to extract the longest possible token from the + * tokenizer. + */ +class DynamicTokenizer { +private: + /** + * Internally used token trie. This object holds all registered tokens. + */ + TokenTrie trie; + + /** + * Flag defining whether whitespaces should be preserved or not. + */ + WhitespaceMode whitespaceMode; + + /** + * Vector containing all registered token types. + */ + std::vector tokens; + + /** + * Next index in the tokens list where to search for a new token id. + */ + size_t nextTokenTypeId; + + /** + * Templated function used internally to read the current token. The + * function is templated in order to force code generation for all six + * combiations of whitespace modes and reading/peeking. + * + * @tparam TextHandler is the type to be used for the textHandler instance. + * @tparam read specifies whether the function should start from and advance + * the read pointer of the char reader. + * @param reader is the CharReader instance from which the data should be + * read. + * @param token is the token structure into which the token information + * should be written. + * @return false if the end of the stream has been reached, true otherwise. + */ + template + bool next(CharReader &reader, DynamicToken &token); + +public: + /** + * Constructor of the DynamicTokenizer class. + * + * @param whitespaceMode specifies how whitespace should be handled. + */ + DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); + + /** + * Registers the given string as a token. Returns a const pointer at a + * TokenDescriptor that will be used to reference the newly created token. + * + * @param token is the token string that should be registered. + * @return a unique identifier for the registered token or EmptyToken if + * an error occured. + */ + TokenTypeId registerToken(const std::string &token); + + /** + * Unregisters the token belonging to the given TokenTypeId. + * + * @param type is the token type that should be unregistered. The + *TokenTypeId + * must have been returned by registerToken. + * @return true if the operation was successful, false otherwise (e.g. + * because the given TokenDescriptor was already unregistered). + */ + bool unregisterToken(TokenTypeId type); + + /** + * Returns the token that was registered under the given TokenTypeId id or + *an + * empty string if an invalid TokenTypeId id is given. + * + * @param type is the TokenTypeId id for which the corresponding token + *string + * should be returned. + * @return the registered token string or an empty string if the given type + * was invalid. + */ + std::string getTokenString(TokenTypeId type); + + /** + * Sets the whitespace mode. + * + * @param whitespaceMode defines how whitespace should be treated in text + * tokens. + */ + void setWhitespaceMode(WhitespaceMode mode); + + /** + * Returns the current value of the whitespace mode. + * + * @return the whitespace mode. + */ + WhitespaceMode getWhitespaceMode(); + + /** + * Reads a new token from the CharReader and stores it in the given + * DynamicToken instance. + * + * @param reader is the CharReader instance from which the data should be + * read. + * @param token is a reference at the token instance into which the Token + * information should be written. + * @return true if a token could be read, false if the end of the stream + * has been reached. + */ + bool read(CharReader &reader, DynamicToken &token); + + /** + * The peek method does not advance the read position of the char reader, + * but reads the next token from the current char reader peek position. + * + * @param reader is the CharReader instance from which the data should be + * read. + * @param token is a reference at the token instance into which the Token + * information should be written. + * @return true if a token could be read, false if the end of the stream + * has been reached. + */ + bool peek(CharReader &reader, DynamicToken &token); +}; +} + +#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */ + diff --git a/src/formats/osdm/DynamicTokenizer.cpp b/src/formats/osdm/DynamicTokenizer.cpp deleted file mode 100644 index 1fac25a..0000000 --- a/src/formats/osdm/DynamicTokenizer.cpp +++ /dev/null @@ -1,381 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include - -#include -#include -#include -#include - -#include "DynamicTokenizer.hpp" - -namespace ousia { - -namespace { - -/* Internal class TokenMatch */ - -/** - * Contains information about a matching token. - */ -struct TokenMatch { - /** - * Token that was matched. - */ - DynamicToken token; - - /** - * Current length of the data within the text handler. The text buffer needs - * to be trimmed to this length if this token matches. - */ - size_t textLength; - - /** - * End location of the current text handler. This location needs to be used - * for the text token that is emitted before the actual token. - */ - size_t textEnd; - - /** - * Constructor of the TokenMatch class. - */ - TokenMatch() : textLength(0), textEnd(0) {} - - /** - * Returns true if this TokenMatch instance actually represents a match. - */ - bool hasMatch() { return token.type != EmptyToken; } -}; - -/* Internal class TokenLookup */ - -/** - * The TokenLookup class is used to represent a thread in a running token - * lookup. - */ -class TokenLookup { -private: - /** - * Current node within the token trie. - */ - TokenTrie::Node const *node; - - /** - * Start offset within the source file. - */ - size_t start; - - /** - * Current length of the data within the text handler. The text buffer needs - * to be trimmed to this length if this token matches. - */ - size_t textLength; - - /** - * End location of the current text handler. This location needs to be used - * for the text token that is emitted before the actual token. - */ - size_t textEnd; - -public: - /** - * Constructor of the TokenLookup class. - * - * @param node is the current node. - * @param start is the start position. - * @param textLength is the text buffer length of the previous text token. - * @param textEnd is the current end location of the previous text token. - */ - TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength, - size_t textEnd) - : node(node), start(start), textLength(textLength), textEnd(textEnd) - { - } - - /** - * Tries to extend the current path in the token trie with the given - * character. If a complete token is matched, stores this match in the - * tokens list (in case it is longer than any previous token). - * - * @param c is the character that should be appended to the current prefix. - * @param lookups is a list to which new TokeLookup instances are added -- - * which could potentially be expanded in the next iteration. - * @param match is the DynamicToken instance to which the matching token - * should be written. - * @param tokens is a reference at the internal token list of the - * DynamicTokenizer. - * @param end is the end byte offset of the current character. - * @param sourceId is the source if of this file. - */ - void advance(char c, std::vector &lookups, TokenMatch &match, - const std::vector &tokens, SourceOffset end, - SourceId sourceId) - { - // Check whether we can continue the current token path with the given - // character without visiting an already visited node - auto it = node->children.find(c); - if (it == node->children.end()) { - return; - } - - // Check whether the new node represents a complete token a whether it - // is longer than the current token. If yes, replace the current token. - node = it->second.get(); - if (node->type != EmptyToken) { - const std::string &str = tokens[node->type]; - size_t len = str.size(); - if (len > match.token.content.size()) { - match.token = - DynamicToken{node->type, str, {sourceId, start, end}}; - match.textLength = textLength; - match.textEnd = textEnd; - } - } - - // If this state can possibly be advanced, store it in the states list. - if (!node->children.empty()) { - lookups.emplace_back(*this); - } - } -}; - -/** - * Transforms the given token into a text token containing the extracted - * text. - * - * @param handler is the WhitespaceHandler containing the collected data. - * @param token is the output token to which the text should be written. - * @param sourceId is the source id of the underlying file. - */ -static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match, - SourceId sourceId) -{ - if (match.hasMatch()) { - match.token.content = - std::string{handler.textBuf.data(), match.textLength}; - match.token.location = - SourceLocation{sourceId, handler.textStart, match.textEnd}; - } else { - match.token.content = handler.toString(); - match.token.location = - SourceLocation{sourceId, handler.textStart, handler.textEnd}; - } - match.token.type = TextToken; -} -} - -/* Class DynamicTokenizer */ - -DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode) - : whitespaceMode(whitespaceMode), nextTokenTypeId(0) -{ -} - -template -bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token) -{ - // If we're in the read mode, reset the char reader peek position to the - // current read position - if (read) { - reader.resetPeek(); - } - - // Prepare the lookups in the token trie - const TokenTrie::Node *root = trie.getRoot(); - TokenMatch match; - std::vector lookups; - std::vector nextLookups; - - // Instantiate the text handler - TextHandler textHandler; - - // Peek characters from the reader and try to advance the current token tree - // cursor - char c; - size_t charStart = reader.getPeekOffset(); - const SourceId sourceId = reader.getSourceId(); - while (reader.peek(c)) { - const size_t charEnd = reader.getPeekOffset(); - const size_t textLength = textHandler.textBuf.size(); - const size_t textEnd = textHandler.textEnd; - - // If we do not have a match yet, start a new lookup from the root - if (!match.hasMatch()) { - TokenLookup{root, charStart, textLength, textEnd}.advance( - c, nextLookups, match, tokens, charEnd, sourceId); - } - - // Try to advance all other lookups with the new character - for (TokenLookup &lookup : lookups) { - lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId); - } - - // We have found a token and there are no more states to advance or the - // text handler has found something -- abort to return the new token - if (match.hasMatch()) { - if ((nextLookups.empty() || textHandler.hasText())) { - break; - } - } else { - // Record all incomming characters - textHandler.append(c, charStart, charEnd); - } - - // Swap the lookups and the nextLookups list - lookups = std::move(nextLookups); - nextLookups.clear(); - - // Advance the offset - charStart = charEnd; - } - - // If we found text, emit that text - if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) { - buildTextToken(textHandler, match, sourceId); - } - - // Move the read/peek cursor to the end of the token, abort if an error - // happens while doing so - if (match.hasMatch()) { - // Make sure we have a valid location - if (match.token.location.getEnd() == InvalidSourceOffset) { - throw OusiaException{"Token end position offset out of range"}; - } - - // Seek to the end of the current token - const size_t end = match.token.location.getEnd(); - if (read) { - reader.seek(end); - } else { - reader.seekPeekCursor(end); - } - token = match.token; - } else { - token = DynamicToken{}; - } - return match.hasMatch(); -} - -bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token) -{ - switch (whitespaceMode) { - case WhitespaceMode::PRESERVE: - return next(reader, token); - case WhitespaceMode::TRIM: - return next(reader, token); - case WhitespaceMode::COLLAPSE: - return next(reader, token); - } - return false; -} - -bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token) -{ - switch (whitespaceMode) { - case WhitespaceMode::PRESERVE: - return next(reader, token); - case WhitespaceMode::TRIM: - return next(reader, token); - case WhitespaceMode::COLLAPSE: - return next(reader, token); - } - return false; -} - -TokenTypeId DynamicTokenizer::registerToken(const std::string &token) -{ - // Abort if an empty token should be registered - if (token.empty()) { - return EmptyToken; - } - - // Search for a new slot in the tokens list - TokenTypeId type = EmptyToken; - for (size_t i = nextTokenTypeId; i < tokens.size(); i++) { - if (tokens[i].empty()) { - tokens[i] = token; - type = i; - break; - } - } - - // No existing slot was found, add a new one -- make sure we do not - // override the special token type handles - if (type == EmptyToken) { - type = tokens.size(); - if (type == TextToken || type == EmptyToken) { - throw OusiaException{"Token type ids depleted!"}; - } - tokens.emplace_back(token); - } - nextTokenTypeId = type + 1; - - // Try to register the token in the trie -- if this fails, remove it - // from the tokens list - if (!trie.registerToken(token, type)) { - tokens[type] = std::string{}; - nextTokenTypeId = type; - return EmptyToken; - } - return type; -} - -bool DynamicTokenizer::unregisterToken(TokenTypeId type) -{ - // Unregister the token from the trie, abort if an invalid type is given - if (type < tokens.size() && trie.unregisterToken(tokens[type])) { - tokens[type] = std::string{}; - nextTokenTypeId = type; - return true; - } - return false; -} - -std::string DynamicTokenizer::getTokenString(TokenTypeId type) -{ - if (type < tokens.size()) { - return tokens[type]; - } - return std::string{}; -} - -void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode) -{ - whitespaceMode = mode; -} - -WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; } - -/* Explicitly instantiate all possible instantiations of the "next" member - function */ -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -} - diff --git a/src/formats/osdm/DynamicTokenizer.hpp b/src/formats/osdm/DynamicTokenizer.hpp deleted file mode 100644 index 3e5aeb3..0000000 --- a/src/formats/osdm/DynamicTokenizer.hpp +++ /dev/null @@ -1,231 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file DynamicTokenizer.hpp - * - * Tokenizer that can be reconfigured at runtime used for parsing the plain - * text format. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_ -#define _OUSIA_DYNAMIC_TOKENIZER_HPP_ - -#include -#include -#include - -#include -#include - -#include "TokenTrie.hpp" - -namespace ousia { - -// Forward declarations -class CharReader; - -/** - * The DynamicToken structure describes a token discovered by the Tokenizer. - */ -struct DynamicToken { - /** - * Id of the type of this token. - */ - TokenTypeId type; - - /** - * String that was matched. - */ - std::string content; - - /** - * Location from which the string was extracted. - */ - SourceLocation location; - - /** - * Default constructor. - */ - DynamicToken() : type(EmptyToken) {} - - /** - * Constructor of the DynamicToken struct. - * - * @param id represents the token type. - * @param content is the string content that has been extracted. - * @param location is the location of the extracted string content in the - * source file. - */ - DynamicToken(TokenTypeId type, const std::string &content, - SourceLocation location) - : type(type), content(content), location(location) - { - } - - /** - * Constructor of the DynamicToken struct, only initializes the token type - * - * @param type is the id corresponding to the type of the token. - */ - DynamicToken(TokenTypeId type) : type(type) {} - - /** - * The getLocation function allows the tokens to be directly passed as - * parameter to Logger or LoggableException instances. - * - * @return a reference at the location field - */ - const SourceLocation &getLocation() const { return location; } -}; - -/** - * The DynamicTokenizer is used to extract tokens and chunks of text from a - * CharReader. It allows to register and unregister tokens while parsing and - * to modify the handling of whitespace characters. Note that the - * DynamicTokenizer always tries to extract the longest possible token from the - * tokenizer. - */ -class DynamicTokenizer { -private: - /** - * Internally used token trie. This object holds all registered tokens. - */ - TokenTrie trie; - - /** - * Flag defining whether whitespaces should be preserved or not. - */ - WhitespaceMode whitespaceMode; - - /** - * Vector containing all registered token types. - */ - std::vector tokens; - - /** - * Next index in the tokens list where to search for a new token id. - */ - size_t nextTokenTypeId; - - /** - * Templated function used internally to read the current token. The - * function is templated in order to force code generation for all six - * combiations of whitespace modes and reading/peeking. - * - * @tparam TextHandler is the type to be used for the textHandler instance. - * @tparam read specifies whether the function should start from and advance - * the read pointer of the char reader. - * @param reader is the CharReader instance from which the data should be - * read. - * @param token is the token structure into which the token information - * should be written. - * @return false if the end of the stream has been reached, true otherwise. - */ - template - bool next(CharReader &reader, DynamicToken &token); - -public: - /** - * Constructor of the DynamicTokenizer class. - * - * @param whitespaceMode specifies how whitespace should be handled. - */ - DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); - - /** - * Registers the given string as a token. Returns a const pointer at a - * TokenDescriptor that will be used to reference the newly created token. - * - * @param token is the token string that should be registered. - * @return a unique identifier for the registered token or EmptyToken if - * an error occured. - */ - TokenTypeId registerToken(const std::string &token); - - /** - * Unregisters the token belonging to the given TokenTypeId. - * - * @param type is the token type that should be unregistered. The - *TokenTypeId - * must have been returned by registerToken. - * @return true if the operation was successful, false otherwise (e.g. - * because the given TokenDescriptor was already unregistered). - */ - bool unregisterToken(TokenTypeId type); - - /** - * Returns the token that was registered under the given TokenTypeId id or - *an - * empty string if an invalid TokenTypeId id is given. - * - * @param type is the TokenTypeId id for which the corresponding token - *string - * should be returned. - * @return the registered token string or an empty string if the given type - * was invalid. - */ - std::string getTokenString(TokenTypeId type); - - /** - * Sets the whitespace mode. - * - * @param whitespaceMode defines how whitespace should be treated in text - * tokens. - */ - void setWhitespaceMode(WhitespaceMode mode); - - /** - * Returns the current value of the whitespace mode. - * - * @return the whitespace mode. - */ - WhitespaceMode getWhitespaceMode(); - - /** - * Reads a new token from the CharReader and stores it in the given - * DynamicToken instance. - * - * @param reader is the CharReader instance from which the data should be - * read. - * @param token is a reference at the token instance into which the Token - * information should be written. - * @return true if a token could be read, false if the end of the stream - * has been reached. - */ - bool read(CharReader &reader, DynamicToken &token); - - /** - * The peek method does not advance the read position of the char reader, - * but reads the next token from the current char reader peek position. - * - * @param reader is the CharReader instance from which the data should be - * read. - * @param token is a reference at the token instance into which the Token - * information should be written. - * @return true if a token could be read, false if the end of the stream - * has been reached. - */ - bool peek(CharReader &reader, DynamicToken &token); -}; -} - -#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */ - diff --git a/src/formats/osdm/TokenTrie.cpp b/src/formats/osdm/TokenTrie.cpp deleted file mode 100644 index 4a0430b..0000000 --- a/src/formats/osdm/TokenTrie.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "TokenTrie.hpp" - -namespace ousia { - -/* Class DynamicTokenTree::Node */ - -TokenTrie::Node::Node() : type(EmptyToken) {} - -/* Class DynamicTokenTree */ - -bool TokenTrie::registerToken(const std::string &token, - TokenTypeId type) noexcept -{ - // Abort if the token is empty -- this would taint the root node - if (token.empty()) { - return false; - } - - // Iterate over each character in the given string and insert them as - // (new) nodes - Node *node = &root; - for (size_t i = 0; i < token.size(); i++) { - // Insert a new node if this one does not exist - const char c = token[i]; - auto it = node->children.find(c); - if (it == node->children.end()) { - it = node->children.emplace(c, std::make_shared()).first; - } - node = it->second.get(); - } - - // If the resulting node already has a type set, we're screwed. - if (node->type != EmptyToken) { - return false; - } - - // Otherwise just set the type to the given type. - node->type = type; - return true; -} - -bool TokenTrie::unregisterToken(const std::string &token) noexcept -{ - // We cannot remove empty tokens as we need to access the fist character - // upfront - if (token.empty()) { - return false; - } - - // First pass -- search the node in the path that can be deleted - Node *subtreeRoot = &root; - char subtreeKey = token[0]; - Node *node = &root; - for (size_t i = 0; i < token.size(); i++) { - // Go to the next node, abort if the tree ends unexpectedly - auto it = node->children.find(token[i]); - if (it == node->children.end()) { - return false; - } - - // Reset the subtree handler if this node has another type - node = it->second.get(); - if ((node->type != EmptyToken || node->children.size() > 1) && - (i + 1 != token.size())) { - subtreeRoot = node; - subtreeKey = token[i + 1]; - } - } - - // If the node type is already EmptyToken, we cannot do anything here - if (node->type == EmptyToken) { - return false; - } - - // If the target node has children, we cannot delete the subtree. Set the - // type to EmptyToken instead - if (!node->children.empty()) { - node->type = EmptyToken; - return true; - } - - // If we end up here, we can safely delete the complete subtree - subtreeRoot->children.erase(subtreeKey); - return true; -} - -TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept -{ - Node const *node = &root; - for (size_t i = 0; i < token.size(); i++) { - const char c = token[i]; - auto it = node->children.find(c); - if (it == node->children.end()) { - return EmptyToken; - } - node = it->second.get(); - } - return node->type; -} -} - diff --git a/src/formats/osdm/TokenTrie.hpp b/src/formats/osdm/TokenTrie.hpp deleted file mode 100644 index 36c2ffa..0000000 --- a/src/formats/osdm/TokenTrie.hpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file TokenTrie.hpp - * - * Class representing a token trie that can be updated dynamically. - * - * @author Benjamin Paaßen (astoecke@techfak.uni-bielefeld.de) - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_TOKEN_TRIE_HPP_ -#define _OUSIA_TOKEN_TRIE_HPP_ - -#include -#include -#include -#include - -namespace ousia { - -/** - * The TokenTypeId is used to give each token type a unique id. - */ -using TokenTypeId = uint32_t; - -/** - * Token which is not a token. - */ -constexpr TokenTypeId EmptyToken = std::numeric_limits::max(); - -/** - * Token which represents a text token. - */ -constexpr TokenTypeId TextToken = std::numeric_limits::max() - 1; - -/** - * The Tokenizer internally uses a TokenTrie to be efficiently able to identify - * the longest consecutive token in the text. This is equivalent to a prefix - * trie. - * - * A token trie is a construct that structures all special tokens a Tokenizer - * recognizes. Consider the tokens "aab", "a" and "bac" numbered as one, two and - * three. Then the token tree would look like this: - * - * \code{*.txt} - * ~ (0) - * / \ - * a (2) b (0) - * | | - * a (0) a (0) - * | | - * b (1) c (0) - * \endcode - * - * Where the number indicates the corresponding token descriptor identifier. - */ -class TokenTrie { -public: - /** - * Structure used to build the node tree. - */ - struct Node { - /** - * Type used for the child map. - */ - using ChildMap = std::unordered_map>; - - /** - * Map from single characters at the corresponding child nodes. - */ - ChildMap children; - - /** - * Reference at the corresponding token descriptor. Set to nullptr if - * no token is attached to this node. - */ - TokenTypeId type; - - /** - * Default constructor, initializes the descriptor with nullptr. - */ - Node(); - }; - -private: - /** - * Root node of the internal token tree. - */ - Node root; - -public: - /** - * Registers a token containing the given string. Returns false if the - * token already exists, true otherwise. - * - * @param token is the character sequence that should be registered as - * token. - * @param type is the descriptor that should be set for this token. - * @return true if the operation is successful, false otherwise. - */ - bool registerToken(const std::string &token, TokenTypeId type) noexcept; - - /** - * Unregisters the token from the token tree. Returns true if the token was - * unregistered successfully, false otherwise. - * - * @param token is the character sequence that should be unregistered. - * @return true if the operation was successful, false otherwise. - */ - bool unregisterToken(const std::string &token) noexcept; - - /** - * Returns true, if the given token exists within the TokenTree. This - * function is mostly thought for debugging and unit testing. - * - * @param token is the character sequence that should be searched. - * @return the attached token descriptor or nullptr if the given token is - * not found. - */ - TokenTypeId hasToken(const std::string &token) const noexcept; - - /** - * Returns a reference at the root node to be used for traversing the token - * tree. - * - * @return a reference at the root node. - */ - const Node *getRoot() const noexcept { return &root; } -}; -} - -#endif /* _OUSIA_TOKEN_TRIE_HPP_ */ - diff --git a/test/core/parser/utils/TokenTrieTest.cpp b/test/core/parser/utils/TokenTrieTest.cpp new file mode 100644 index 0000000..aacd6c0 --- /dev/null +++ b/test/core/parser/utils/TokenTrieTest.cpp @@ -0,0 +1,92 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +namespace ousia { + +static const TokenTypeId t1 = 0; +static const TokenTypeId t2 = 1; +static const TokenTypeId t3 = 2; +static const TokenTypeId t4 = 3; + +TEST(TokenTrie, registerToken) +{ + TokenTrie tree; + + ASSERT_TRUE(tree.registerToken("a", t1)); + ASSERT_TRUE(tree.registerToken("ab", t2)); + ASSERT_TRUE(tree.registerToken("b", t3)); + ASSERT_TRUE(tree.registerToken("hello", t4)); + + ASSERT_FALSE(tree.registerToken("", t1)); + ASSERT_FALSE(tree.registerToken("a", t4)); + ASSERT_FALSE(tree.registerToken("ab", t4)); + ASSERT_FALSE(tree.registerToken("b", t4)); + ASSERT_FALSE(tree.registerToken("hello", t4)); + + ASSERT_EQ(t1, tree.hasToken("a")); + ASSERT_EQ(t2, tree.hasToken("ab")); + ASSERT_EQ(t3, tree.hasToken("b")); + ASSERT_EQ(t4, tree.hasToken("hello")); + ASSERT_EQ(EmptyToken, tree.hasToken("")); + ASSERT_EQ(EmptyToken, tree.hasToken("abc")); +} + +TEST(TokenTrie, unregisterToken) +{ + TokenTrie tree; + + ASSERT_TRUE(tree.registerToken("a", t1)); + ASSERT_FALSE(tree.registerToken("a", t4)); + + ASSERT_TRUE(tree.registerToken("ab", t2)); + ASSERT_FALSE(tree.registerToken("ab", t4)); + + ASSERT_TRUE(tree.registerToken("b", t3)); + ASSERT_FALSE(tree.registerToken("b", t4)); + + ASSERT_EQ(t1, tree.hasToken("a")); + ASSERT_EQ(t2, tree.hasToken("ab")); + ASSERT_EQ(t3, tree.hasToken("b")); + + ASSERT_TRUE(tree.unregisterToken("a")); + ASSERT_FALSE(tree.unregisterToken("a")); + + ASSERT_EQ(EmptyToken, tree.hasToken("a")); + ASSERT_EQ(t2, tree.hasToken("ab")); + ASSERT_EQ(t3, tree.hasToken("b")); + + ASSERT_TRUE(tree.unregisterToken("b")); + ASSERT_FALSE(tree.unregisterToken("b")); + + ASSERT_EQ(EmptyToken, tree.hasToken("a")); + ASSERT_EQ(t2, tree.hasToken("ab")); + ASSERT_EQ(EmptyToken, tree.hasToken("b")); + + ASSERT_TRUE(tree.unregisterToken("ab")); + ASSERT_FALSE(tree.unregisterToken("ab")); + + ASSERT_EQ(EmptyToken, tree.hasToken("a")); + ASSERT_EQ(EmptyToken, tree.hasToken("ab")); + ASSERT_EQ(EmptyToken, tree.hasToken("b")); +} +} + diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp new file mode 100644 index 0000000..c1f8785 --- /dev/null +++ b/test/core/parser/utils/TokenizerTest.cpp @@ -0,0 +1,415 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include + +namespace ousia { + +TEST(DynamicTokenizer, tokenRegistration) +{ + DynamicTokenizer tokenizer; + + ASSERT_EQ(EmptyToken, tokenizer.registerToken("")); + + ASSERT_EQ(0U, tokenizer.registerToken("a")); + ASSERT_EQ(EmptyToken, tokenizer.registerToken("a")); + ASSERT_EQ("a", tokenizer.getTokenString(0U)); + + ASSERT_EQ(1U, tokenizer.registerToken("b")); + ASSERT_EQ(EmptyToken, tokenizer.registerToken("b")); + ASSERT_EQ("b", tokenizer.getTokenString(1U)); + + ASSERT_EQ(2U, tokenizer.registerToken("c")); + ASSERT_EQ(EmptyToken, tokenizer.registerToken("c")); + ASSERT_EQ("c", tokenizer.getTokenString(2U)); + + ASSERT_TRUE(tokenizer.unregisterToken(1U)); + ASSERT_FALSE(tokenizer.unregisterToken(1U)); + ASSERT_EQ("", tokenizer.getTokenString(1U)); + + ASSERT_EQ(1U, tokenizer.registerToken("d")); + ASSERT_EQ(EmptyToken, tokenizer.registerToken("d")); + ASSERT_EQ("d", tokenizer.getTokenString(1U)); +} + +TEST(DynamicTokenizer, textTokenPreserveWhitespace) +{ + { + CharReader reader{" this \t is only a \n\n test text "}; + // 012345 6789012345678 9 0123456789012345 + // 0 1 2 3 + DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(" this \t is only a \n\n test text ", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(36U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(reader, token)); + } + + { + CharReader reader{"this \t is only a \n\n test text"}; + // 01234 5678901234567 8 9012345678901 + // 0 1 2 3 + DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("this \t is only a \n\n test text", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(32U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(reader, token)); + } +} + +TEST(DynamicTokenizer, textTokenTrimWhitespace) +{ + { + CharReader reader{" this \t is only a \n\n test text "}; + // 012345 6789012345678 9 0123456789012345 + // 0 1 2 3 + DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("this \t is only a \n\n test text", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(33U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(reader, token)); + } + + { + CharReader reader{"this \t is only a \n\n test text"}; + // 01234 5678901234567 8 9012345678901 + // 0 1 2 3 + DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("this \t is only a \n\n test text", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(32U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(reader, token)); + } +} + +TEST(DynamicTokenizer, textTokenCollapseWhitespace) +{ + { + CharReader reader{" this \t is only a \n\n test text "}; + // 012345 6789012345678 9 0123456789012345 + // 0 1 2 3 + DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("this is only a test text", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(33U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(reader, token)); + } + + { + CharReader reader{"this \t is only a \n\n test text"}; + // 01234 5678901234567 8 9012345678901 + // 0 1 2 3 + DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("this is only a test text", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(32U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(reader, token)); + } +} + +TEST(DynamicTokenizer, simpleReadToken) +{ + CharReader reader{"test1:test2"}; + DynamicTokenizer tokenizer; + + const TokenTypeId tid = tokenizer.registerToken(":"); + ASSERT_EQ(0U, tid); + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test1", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + + char c; + ASSERT_TRUE(reader.peek(c)); + ASSERT_EQ(':', c); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + + ASSERT_EQ(tid, token.type); + ASSERT_EQ(":", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(5U, loc.getStart()); + ASSERT_EQ(6U, loc.getEnd()); + + char c; + ASSERT_TRUE(reader.peek(c)); + ASSERT_EQ('t', c); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test2", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(6U, loc.getStart()); + ASSERT_EQ(11U, loc.getEnd()); + + char c; + ASSERT_FALSE(reader.peek(c)); + } +} + +TEST(DynamicTokenizer, simplePeekToken) +{ + CharReader reader{"test1:test2"}; + DynamicTokenizer tokenizer; + + const TokenTypeId tid = tokenizer.registerToken(":"); + ASSERT_EQ(0U, tid); + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.peek(reader, token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test1", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + ASSERT_EQ(0U, reader.getOffset()); + ASSERT_EQ(5U, reader.getPeekOffset()); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.peek(reader, token)); + + ASSERT_EQ(tid, token.type); + ASSERT_EQ(":", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(5U, loc.getStart()); + ASSERT_EQ(6U, loc.getEnd()); + ASSERT_EQ(0U, reader.getOffset()); + ASSERT_EQ(6U, reader.getPeekOffset()); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.peek(reader, token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test2", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(6U, loc.getStart()); + ASSERT_EQ(11U, loc.getEnd()); + ASSERT_EQ(0U, reader.getOffset()); + ASSERT_EQ(11U, reader.getPeekOffset()); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test1", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(5U, loc.getEnd()); + ASSERT_EQ(5U, reader.getOffset()); + ASSERT_EQ(5U, reader.getPeekOffset()); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + + ASSERT_EQ(tid, token.type); + ASSERT_EQ(":", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(5U, loc.getStart()); + ASSERT_EQ(6U, loc.getEnd()); + ASSERT_EQ(6U, reader.getOffset()); + ASSERT_EQ(6U, reader.getPeekOffset()); + } + + { + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("test2", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(6U, loc.getStart()); + ASSERT_EQ(11U, loc.getEnd()); + ASSERT_EQ(11U, reader.getOffset()); + ASSERT_EQ(11U, reader.getPeekOffset()); + } +} + +TEST(DynamicTokenizer, ambiguousTokens) +{ + CharReader reader{"abc"}; + DynamicTokenizer tokenizer; + + TokenTypeId t1 = tokenizer.registerToken("abd"); + TokenTypeId t2 = tokenizer.registerToken("bc"); + + ASSERT_EQ(0U, t1); + ASSERT_EQ(1U, t2); + + DynamicToken token; + ASSERT_TRUE(tokenizer.read(reader, token)); + + ASSERT_EQ(TextToken, token.type); + ASSERT_EQ("a", token.content); + + SourceLocation loc = token.location; + ASSERT_EQ(0U, loc.getStart()); + ASSERT_EQ(1U, loc.getEnd()); + + ASSERT_TRUE(tokenizer.read(reader, token)); + + ASSERT_EQ(t2, token.type); + ASSERT_EQ("bc", token.content); + + loc = token.location; + ASSERT_EQ(1U, loc.getStart()); + ASSERT_EQ(3U, loc.getEnd()); + + ASSERT_FALSE(tokenizer.read(reader, token)); +} + +TEST(DynamicTokenizer, commentTestWhitespacePreserve) +{ + CharReader reader{"Test/Test /* Block Comment */", 0}; + // 012345678901234567890123456789 + // 0 1 2 + DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE); + + const TokenTypeId t1 = tokenizer.registerToken("/"); + const TokenTypeId t2 = tokenizer.registerToken("/*"); + const TokenTypeId t3 = tokenizer.registerToken("*/"); + + std::vector expected = { + {TextToken, "Test", SourceLocation{0, 0, 4}}, + {t1, "/", SourceLocation{0, 4, 5}}, + {TextToken, "Test ", SourceLocation{0, 5, 10}}, + {t2, "/*", SourceLocation{0, 10, 12}}, + {TextToken, " Block Comment ", SourceLocation{0, 12, 27}}, + {t3, "*/", SourceLocation{0, 27, 29}}}; + + DynamicToken t; + for (auto &te : expected) { + EXPECT_TRUE(tokenizer.read(reader, t)); + EXPECT_EQ(te.type, t.type); + EXPECT_EQ(te.content, t.content); + EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); + EXPECT_EQ(te.location.getStart(), t.location.getStart()); + EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); + } + ASSERT_FALSE(tokenizer.read(reader, t)); +} + +TEST(DynamicTokenizer, commentTestWhitespaceCollapse) +{ + CharReader reader{"Test/Test /* Block Comment */", 0}; + // 012345678901234567890123456789 + // 0 1 2 + DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE); + + const TokenTypeId t1 = tokenizer.registerToken("/"); + const TokenTypeId t2 = tokenizer.registerToken("/*"); + const TokenTypeId t3 = tokenizer.registerToken("*/"); + + std::vector expected = { + {TextToken, "Test", SourceLocation{0, 0, 4}}, + {t1, "/", SourceLocation{0, 4, 5}}, + {TextToken, "Test", SourceLocation{0, 5, 9}}, + {t2, "/*", SourceLocation{0, 10, 12}}, + {TextToken, "Block Comment", SourceLocation{0, 13, 26}}, + {t3, "*/", SourceLocation{0, 27, 29}}}; + + DynamicToken t; + for (auto &te : expected) { + EXPECT_TRUE(tokenizer.read(reader, t)); + EXPECT_EQ(te.type, t.type); + EXPECT_EQ(te.content, t.content); + EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); + EXPECT_EQ(te.location.getStart(), t.location.getStart()); + EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); + } + ASSERT_FALSE(tokenizer.read(reader, t)); +} + +} + diff --git a/test/formats/osdm/DynamicTokenizerTest.cpp b/test/formats/osdm/DynamicTokenizerTest.cpp deleted file mode 100644 index c1f8785..0000000 --- a/test/formats/osdm/DynamicTokenizerTest.cpp +++ /dev/null @@ -1,415 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include -#include - -namespace ousia { - -TEST(DynamicTokenizer, tokenRegistration) -{ - DynamicTokenizer tokenizer; - - ASSERT_EQ(EmptyToken, tokenizer.registerToken("")); - - ASSERT_EQ(0U, tokenizer.registerToken("a")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("a")); - ASSERT_EQ("a", tokenizer.getTokenString(0U)); - - ASSERT_EQ(1U, tokenizer.registerToken("b")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("b")); - ASSERT_EQ("b", tokenizer.getTokenString(1U)); - - ASSERT_EQ(2U, tokenizer.registerToken("c")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("c")); - ASSERT_EQ("c", tokenizer.getTokenString(2U)); - - ASSERT_TRUE(tokenizer.unregisterToken(1U)); - ASSERT_FALSE(tokenizer.unregisterToken(1U)); - ASSERT_EQ("", tokenizer.getTokenString(1U)); - - ASSERT_EQ(1U, tokenizer.registerToken("d")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("d")); - ASSERT_EQ("d", tokenizer.getTokenString(1U)); -} - -TEST(DynamicTokenizer, textTokenPreserveWhitespace) -{ - { - CharReader reader{" this \t is only a \n\n test text "}; - // 012345 6789012345678 9 0123456789012345 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ(" this \t is only a \n\n test text ", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(36U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } - - { - CharReader reader{"this \t is only a \n\n test text"}; - // 01234 5678901234567 8 9012345678901 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("this \t is only a \n\n test text", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } -} - -TEST(DynamicTokenizer, textTokenTrimWhitespace) -{ - { - CharReader reader{" this \t is only a \n\n test text "}; - // 012345 6789012345678 9 0123456789012345 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("this \t is only a \n\n test text", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(33U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } - - { - CharReader reader{"this \t is only a \n\n test text"}; - // 01234 5678901234567 8 9012345678901 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::TRIM}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("this \t is only a \n\n test text", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } -} - -TEST(DynamicTokenizer, textTokenCollapseWhitespace) -{ - { - CharReader reader{" this \t is only a \n\n test text "}; - // 012345 6789012345678 9 0123456789012345 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("this is only a test text", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(33U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } - - { - CharReader reader{"this \t is only a \n\n test text"}; - // 01234 5678901234567 8 9012345678901 - // 0 1 2 3 - DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE}; - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("this is only a test text", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(32U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); - } -} - -TEST(DynamicTokenizer, simpleReadToken) -{ - CharReader reader{"test1:test2"}; - DynamicTokenizer tokenizer; - - const TokenTypeId tid = tokenizer.registerToken(":"); - ASSERT_EQ(0U, tid); - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test1", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); - - char c; - ASSERT_TRUE(reader.peek(c)); - ASSERT_EQ(':', c); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(tid, token.type); - ASSERT_EQ(":", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(5U, loc.getStart()); - ASSERT_EQ(6U, loc.getEnd()); - - char c; - ASSERT_TRUE(reader.peek(c)); - ASSERT_EQ('t', c); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); - - char c; - ASSERT_FALSE(reader.peek(c)); - } -} - -TEST(DynamicTokenizer, simplePeekToken) -{ - CharReader reader{"test1:test2"}; - DynamicTokenizer tokenizer; - - const TokenTypeId tid = tokenizer.registerToken(":"); - ASSERT_EQ(0U, tid); - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.peek(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test1", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); - ASSERT_EQ(0U, reader.getOffset()); - ASSERT_EQ(5U, reader.getPeekOffset()); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.peek(reader, token)); - - ASSERT_EQ(tid, token.type); - ASSERT_EQ(":", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(5U, loc.getStart()); - ASSERT_EQ(6U, loc.getEnd()); - ASSERT_EQ(0U, reader.getOffset()); - ASSERT_EQ(6U, reader.getPeekOffset()); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.peek(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); - ASSERT_EQ(0U, reader.getOffset()); - ASSERT_EQ(11U, reader.getPeekOffset()); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test1", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(5U, loc.getEnd()); - ASSERT_EQ(5U, reader.getOffset()); - ASSERT_EQ(5U, reader.getPeekOffset()); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(tid, token.type); - ASSERT_EQ(":", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(5U, loc.getStart()); - ASSERT_EQ(6U, loc.getEnd()); - ASSERT_EQ(6U, reader.getOffset()); - ASSERT_EQ(6U, reader.getPeekOffset()); - } - - { - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("test2", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(6U, loc.getStart()); - ASSERT_EQ(11U, loc.getEnd()); - ASSERT_EQ(11U, reader.getOffset()); - ASSERT_EQ(11U, reader.getPeekOffset()); - } -} - -TEST(DynamicTokenizer, ambiguousTokens) -{ - CharReader reader{"abc"}; - DynamicTokenizer tokenizer; - - TokenTypeId t1 = tokenizer.registerToken("abd"); - TokenTypeId t2 = tokenizer.registerToken("bc"); - - ASSERT_EQ(0U, t1); - ASSERT_EQ(1U, t2); - - DynamicToken token; - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(TextToken, token.type); - ASSERT_EQ("a", token.content); - - SourceLocation loc = token.location; - ASSERT_EQ(0U, loc.getStart()); - ASSERT_EQ(1U, loc.getEnd()); - - ASSERT_TRUE(tokenizer.read(reader, token)); - - ASSERT_EQ(t2, token.type); - ASSERT_EQ("bc", token.content); - - loc = token.location; - ASSERT_EQ(1U, loc.getStart()); - ASSERT_EQ(3U, loc.getEnd()); - - ASSERT_FALSE(tokenizer.read(reader, token)); -} - -TEST(DynamicTokenizer, commentTestWhitespacePreserve) -{ - CharReader reader{"Test/Test /* Block Comment */", 0}; - // 012345678901234567890123456789 - // 0 1 2 - DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE); - - const TokenTypeId t1 = tokenizer.registerToken("/"); - const TokenTypeId t2 = tokenizer.registerToken("/*"); - const TokenTypeId t3 = tokenizer.registerToken("*/"); - - std::vector expected = { - {TextToken, "Test", SourceLocation{0, 0, 4}}, - {t1, "/", SourceLocation{0, 4, 5}}, - {TextToken, "Test ", SourceLocation{0, 5, 10}}, - {t2, "/*", SourceLocation{0, 10, 12}}, - {TextToken, " Block Comment ", SourceLocation{0, 12, 27}}, - {t3, "*/", SourceLocation{0, 27, 29}}}; - - DynamicToken t; - for (auto &te : expected) { - EXPECT_TRUE(tokenizer.read(reader, t)); - EXPECT_EQ(te.type, t.type); - EXPECT_EQ(te.content, t.content); - EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); - EXPECT_EQ(te.location.getStart(), t.location.getStart()); - EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); - } - ASSERT_FALSE(tokenizer.read(reader, t)); -} - -TEST(DynamicTokenizer, commentTestWhitespaceCollapse) -{ - CharReader reader{"Test/Test /* Block Comment */", 0}; - // 012345678901234567890123456789 - // 0 1 2 - DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE); - - const TokenTypeId t1 = tokenizer.registerToken("/"); - const TokenTypeId t2 = tokenizer.registerToken("/*"); - const TokenTypeId t3 = tokenizer.registerToken("*/"); - - std::vector expected = { - {TextToken, "Test", SourceLocation{0, 0, 4}}, - {t1, "/", SourceLocation{0, 4, 5}}, - {TextToken, "Test", SourceLocation{0, 5, 9}}, - {t2, "/*", SourceLocation{0, 10, 12}}, - {TextToken, "Block Comment", SourceLocation{0, 13, 26}}, - {t3, "*/", SourceLocation{0, 27, 29}}}; - - DynamicToken t; - for (auto &te : expected) { - EXPECT_TRUE(tokenizer.read(reader, t)); - EXPECT_EQ(te.type, t.type); - EXPECT_EQ(te.content, t.content); - EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); - EXPECT_EQ(te.location.getStart(), t.location.getStart()); - EXPECT_EQ(te.location.getEnd(), t.location.getEnd()); - } - ASSERT_FALSE(tokenizer.read(reader, t)); -} - -} - diff --git a/test/formats/osdm/TokenTrieTest.cpp b/test/formats/osdm/TokenTrieTest.cpp deleted file mode 100644 index aacd6c0..0000000 --- a/test/formats/osdm/TokenTrieTest.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include - -namespace ousia { - -static const TokenTypeId t1 = 0; -static const TokenTypeId t2 = 1; -static const TokenTypeId t3 = 2; -static const TokenTypeId t4 = 3; - -TEST(TokenTrie, registerToken) -{ - TokenTrie tree; - - ASSERT_TRUE(tree.registerToken("a", t1)); - ASSERT_TRUE(tree.registerToken("ab", t2)); - ASSERT_TRUE(tree.registerToken("b", t3)); - ASSERT_TRUE(tree.registerToken("hello", t4)); - - ASSERT_FALSE(tree.registerToken("", t1)); - ASSERT_FALSE(tree.registerToken("a", t4)); - ASSERT_FALSE(tree.registerToken("ab", t4)); - ASSERT_FALSE(tree.registerToken("b", t4)); - ASSERT_FALSE(tree.registerToken("hello", t4)); - - ASSERT_EQ(t1, tree.hasToken("a")); - ASSERT_EQ(t2, tree.hasToken("ab")); - ASSERT_EQ(t3, tree.hasToken("b")); - ASSERT_EQ(t4, tree.hasToken("hello")); - ASSERT_EQ(EmptyToken, tree.hasToken("")); - ASSERT_EQ(EmptyToken, tree.hasToken("abc")); -} - -TEST(TokenTrie, unregisterToken) -{ - TokenTrie tree; - - ASSERT_TRUE(tree.registerToken("a", t1)); - ASSERT_FALSE(tree.registerToken("a", t4)); - - ASSERT_TRUE(tree.registerToken("ab", t2)); - ASSERT_FALSE(tree.registerToken("ab", t4)); - - ASSERT_TRUE(tree.registerToken("b", t3)); - ASSERT_FALSE(tree.registerToken("b", t4)); - - ASSERT_EQ(t1, tree.hasToken("a")); - ASSERT_EQ(t2, tree.hasToken("ab")); - ASSERT_EQ(t3, tree.hasToken("b")); - - ASSERT_TRUE(tree.unregisterToken("a")); - ASSERT_FALSE(tree.unregisterToken("a")); - - ASSERT_EQ(EmptyToken, tree.hasToken("a")); - ASSERT_EQ(t2, tree.hasToken("ab")); - ASSERT_EQ(t3, tree.hasToken("b")); - - ASSERT_TRUE(tree.unregisterToken("b")); - ASSERT_FALSE(tree.unregisterToken("b")); - - ASSERT_EQ(EmptyToken, tree.hasToken("a")); - ASSERT_EQ(t2, tree.hasToken("ab")); - ASSERT_EQ(EmptyToken, tree.hasToken("b")); - - ASSERT_TRUE(tree.unregisterToken("ab")); - ASSERT_FALSE(tree.unregisterToken("ab")); - - ASSERT_EQ(EmptyToken, tree.hasToken("a")); - ASSERT_EQ(EmptyToken, tree.hasToken("ab")); - ASSERT_EQ(EmptyToken, tree.hasToken("b")); -} -} - -- cgit v1.2.3 From 919552bad0f3f4db20419d3d3771c724c2ab997f Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:47:25 +0100 Subject: Removed Whitespace file again --- src/core/common/Whitespace.cpp | 38 -------------------------------------- test/core/common/Whitespace.cpp | 41 ----------------------------------------- 2 files changed, 79 deletions(-) delete mode 100644 src/core/common/Whitespace.cpp delete mode 100644 test/core/common/Whitespace.cpp (limited to 'src/core') diff --git a/src/core/common/Whitespace.cpp b/src/core/common/Whitespace.cpp deleted file mode 100644 index 4d7c01a..0000000 --- a/src/core/common/Whitespace.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "Whitespace.hpp" -#include "WhitespaceHandler.hpp" - -namespace ousia { - -std::string Utils::trim(const std::string &s) -{ - std::pair bounds = trim(s, Utils::isWhitespace); - return s.substr(bounds.first, bounds.second - bounds.first); -} - -std::string Utils::collapse(const std::string &s) -{ - CollapsingWhitespaceHandler h; - appendToWhitespaceHandler(h, s, 0); - return h.toString(); -} - -} - diff --git a/test/core/common/Whitespace.cpp b/test/core/common/Whitespace.cpp deleted file mode 100644 index d6df8b7..0000000 --- a/test/core/common/Whitespace.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include - -namespace ousia { - -TEST(Whitespace, trim) -{ - ASSERT_EQ("hello world", Whitespace::trim("\t hello world \n\r\t")); - ASSERT_EQ("hello world", Whitespace::trim("hello world \n\r\t")); - ASSERT_EQ("hello world", Whitespace::trim(" hello world")); - ASSERT_EQ("hello world", Whitespace::trim("hello world")); -} - -TEST(Whitespace, collapse) -{ - ASSERT("hello world", Whitespace::collapse(" hello \n\t\r world \n\r\t")); - ASSERT("hello world", Whitespace::collapse("hello \n\t\r world \n\r\t")); - ASSERT("hello world", Whitespace::collapse("hello \n\t\r world")); - ASSERT("hello world", Whitespace::collapse("hello world")); -} -} - -- cgit v1.2.3 From 295783320ea3855a14123f9cea163f8f5f689e07 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:50:11 +0100 Subject: Moved some of the whitespace functionality back to Utils --- src/core/common/Utils.cpp | 25 ++++++++++++ src/core/common/Utils.hpp | 72 +++++++++++++++++++++++++++++++++++ src/core/common/Whitespace.hpp | 62 +----------------------------- src/core/common/WhitespaceHandler.hpp | 7 ++-- test/core/common/UtilsTest.cpp | 17 +++++++++ 5 files changed, 119 insertions(+), 64 deletions(-) (limited to 'src/core') diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index 4005143..3739c61 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -21,6 +21,7 @@ #include #include "Utils.hpp" +#include "WhitespaceHandler.hpp" namespace ousia { @@ -87,5 +88,29 @@ std::string Utils::extractFileExtension(const std::string &filename) } return std::string{}; } + +std::string Utils::trim(const std::string &s) +{ + std::pair bounds = trim(s, Utils::isWhitespace); + return s.substr(bounds.first, bounds.second - bounds.first); +} + +std::string Utils::collapse(const std::string &s) +{ + CollapsingWhitespaceHandler h; + appendToWhitespaceHandler(h, s, 0); + return h.toString(); +} + +bool Utils::startsWith(const std::string &s, const std::string &prefix) +{ + return prefix.size() <= s.size() && s.substr(0, prefix.size()) == prefix; +} + +bool Utils::endsWith(const std::string &s, const std::string &suffix) +{ + return suffix.size() <= s.size() && + s.substr(s.size() - suffix.size(), suffix.size()) == suffix; +} } diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index af7a773..16a9136 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -99,6 +99,60 @@ public: */ static bool hasNonWhitepaceChar(const std::string &s); + /** + * Removes whitespace at the beginning and the end of the given string. + * + * @param s is the string that should be trimmed. + * @return a trimmed copy of s. + */ + static std::string trim(const std::string &s); + + /** + * Trims the given string or vector of chars by returning the start and end + * index. + * + * @param s is the container that should be trimmed. + * @param f is a function that returns true for values that should be + * removed. + * @return start and end index. Note that "end" points at the character + * beyond the end, thus "end" minus "start" + */ + template + static std::pair trim(const T &s, Filter f) + { + size_t start = 0; + for (size_t i = 0; i < s.size(); i++) { + if (!f(s[i])) { + start = i; + break; + } + } + + size_t end = 0; + for (ssize_t i = s.size() - 1; i >= static_cast(start); i--) { + if (!f(s[i])) { + end = i + 1; + break; + } + } + + if (end < start) { + start = 0; + end = 0; + } + + return std::pair{start, end}; + } + + /** + * Collapses the whitespaces in the given string (trims the string and + * replaces all whitespace characters by a single one). + * + * @param s is the string in which the whitespace should be collapsed. + * @return a copy of s with collapsed whitespace. + */ + static std::string collapse(const std::string &s); + /** * Turns the elements of a collection into a string separated by the * given delimiter. @@ -159,6 +213,24 @@ public: */ static std::string extractFileExtension(const std::string &filename); + /** + * Checks whether the given string starts with the given prefix. + * + * @param s is the string. + * @param prefix is the string which should be checked for being a prefix of + * s. + */ + static bool startsWith(const std::string &s, const std::string &prefix); + + /** + * Checks whether the given string ends with the given suffix. + * + * @param s is the string. + * @param suffix is the string which should be checked for being a suffix of + * s. + */ + static bool endsWith(const std::string &s, const std::string &suffix); + /** * Hash functional to be used for enum classes. * See http://stackoverflow.com/a/24847480/2188211 diff --git a/src/core/common/Whitespace.hpp b/src/core/common/Whitespace.hpp index 1e9f36a..72a2291 100644 --- a/src/core/common/Whitespace.hpp +++ b/src/core/common/Whitespace.hpp @@ -19,8 +19,7 @@ /** * @file Whitespace.hpp * - * Contains the WhitespaceMode enum used in various places, as well es functions - * for trimming and collapsing whitespaces. + * Contains the WhitespaceMode enum used in various places. * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ @@ -55,65 +54,6 @@ enum class WhitespaceMode { COLLAPSE }; -/** - * Collection of functions for trimming or collapsing whitespace. - */ -class Whitespace { - /** - * Removes whitespace at the beginning and the end of the given string. - * - * @param s is the string that should be trimmed. - * @return a trimmed copy of s. - */ - static std::string trim(const std::string &s); - - /** - * Trims the given string or vector of chars by returning the start and end - * index. - * - * @param s is the container that should be trimmed. - * @param f is a function that returns true for values that should be - * removed. - * @return start and end index. Note that "end" points at the character - * beyond the end, thus "end" minus "start" - */ - template - static std::pair trim(const T &s, Filter f) - { - size_t start = 0; - for (size_t i = 0; i < s.size(); i++) { - if (!f(s[i])) { - start = i; - break; - } - } - - size_t end = 0; - for (ssize_t i = s.size() - 1; i >= static_cast(start); i--) { - if (!f(s[i])) { - end = i + 1; - break; - } - } - - if (end < start) { - start = 0; - end = 0; - } - - return std::pair{start, end}; - } - - /** - * Collapses the whitespaces in the given string (trims the string and - * replaces all whitespace characters by a single one). - * - * @param s is the string in which the whitespace should be collapsed. - * @return a copy of s with collapsed whitespace. - */ - static std::string collapse(const std::string &s); -}; - } #endif /* _OUSIA_WHITESPACE_HPP_ */ diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp index 1935c24..79e0518 100644 --- a/src/core/common/WhitespaceHandler.hpp +++ b/src/core/common/WhitespaceHandler.hpp @@ -32,7 +32,7 @@ #include #include -#include "WhitespaceHandler.hpp" +#include "Utils.hpp" namespace ousia { @@ -76,7 +76,7 @@ public: /** * Returns the content of the WhitespaceHandler as string. */ - std::string toString() + std::string toString() const { return std::string(textBuf.data(), textBuf.size()); } @@ -214,7 +214,8 @@ inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf, size_t start) { for (auto elem : buf) { - handler.append(elem, start++); + handler.append(elem, start, start + 1); + start++; } } } diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index 6b8a916..a4bf4b2 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -65,5 +65,22 @@ TEST(Utils, extractFileExtension) ASSERT_EQ("ext", Utils::extractFileExtension("foo.bar/test.EXT")); } +TEST(Utils, startsWith) +{ + ASSERT_TRUE(Utils::startsWith("foobar", "foo")); + ASSERT_TRUE(Utils::startsWith("foo", "foo")); + ASSERT_FALSE(Utils::startsWith("foo", "foobar")); + ASSERT_FALSE(Utils::startsWith("foobar", "bar")); + ASSERT_TRUE(Utils::startsWith("foo", "")); +} + +TEST(Utils, endsWith) +{ + ASSERT_FALSE(Utils::endsWith("foobar", "foo")); + ASSERT_TRUE(Utils::endsWith("foo", "foo")); + ASSERT_FALSE(Utils::endsWith("foo", "foobar")); + ASSERT_TRUE(Utils::endsWith("foobar", "bar")); + ASSERT_TRUE(Utils::endsWith("foo", "")); +} } -- cgit v1.2.3 From 7e5deb666c9087eedd786adaf38c6118ca91a09c Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:50:43 +0100 Subject: Removed GenericParser. All needed functionality will be present in the Stack class. --- src/core/parser/generic/GenericParser.cpp | 0 src/core/parser/generic/GenericParser.hpp | 138 ------------------------------ 2 files changed, 138 deletions(-) delete mode 100644 src/core/parser/generic/GenericParser.cpp delete mode 100644 src/core/parser/generic/GenericParser.hpp (limited to 'src/core') diff --git a/src/core/parser/generic/GenericParser.cpp b/src/core/parser/generic/GenericParser.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/src/core/parser/generic/GenericParser.hpp b/src/core/parser/generic/GenericParser.hpp deleted file mode 100644 index 53cb982..0000000 --- a/src/core/parser/generic/GenericParser.hpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file GenericParser.hpp - * - * The GenericParser class builds an abstraction layer that separates the - * underlying document format (e.g. osdm or osdmx) from the actual process of - * building the document model. It provides a set of genric functions that - * should be called by the inheriting concrete parser class, e.g. indicating a - * command with parameters, the start/end of a field or the start/end of an - * annotation. The GenericParser maintains an internal stack of - * ParserStateHandlers and relays the commands to the elements of this stack. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_GENERIC_PARSER_HPP_ -#define _OUSIA_GENERIC_PARSER_HPP_ - -#include - -#include "ParserStateStack.hpp" -#include "ParserStateHandler.hpp" -#include "ParserState.hpp" - -namespace ousia { - -/** - * The abstract GenericParser class is merely a convenience class for Parsers - * which use the ParserStateStack class. It maintains a ParserStateStack - * instance and provides functions which directly forward the given data to the - * ParserStateStack. It also implements the ParserStateCallbacks inteface which - * is used by ParserStateHandlers to influence the parsing process (such as - * setting the whitespace mode or registering new entities). - */ -class GenericParser : public Parser, public ParserStateCallbacks { - -private: - /** - * Internal ParserStateStack instance. - */ - ParserStateStack stack; - -protected: - /** - * Forwards the "command" event to the ParserStateStack instance. - * - * @param name is the name of the command (including the namespace - * separator ':') and its corresponding location. Must be a string variant. - * @param args is a map variant containing the arguments that were passed to - * the command. - */ - void command(Variant name, Variant args) - { - stack.command(std::move(name), std::move(args)); - } - - /** - * Forwards the "fieldStart" event to the ParserStateStack instance. - */ - void fieldStart() - { - stack.fieldStart(); - } - - /** - * Forwards the "fieldEnd" event to the ParserStateStack instance. - */ - void fieldEnd() - { - stack.fieldEnd(); - } - - /** - * Forwards the "data" event to the ParserStateStack instance. - * - * @param data is a variant of any type containing the data that was parsed - * as data. - */ - void data(Variant data) - { - stack.data(std::move(data)); - } - - /** - * Forwards the "annotationStart" event to the ParserStateStack instance. - * - * @param name is the name of the annotation class. - * @param args is a map variant containing the arguments that were passed - * to the annotation. - */ - void annotationStart(Variant name, Variant args) - { - stack.annotationStart(std::move(name), std::move(args)); - } - - /** - * Forwards the "annotationEnd" event to the ParserStateStack instance. - * - * @param name is the name of the annotation class that was ended. - * @param annotationName is the name of the annotation that was ended. - */ - void annotationEnd(Variant name, Variant annotationName) - { - stack.annotationEnd(std::move(name), std::move(annotationName)); - } - - /** - * Forwards the "token" call to the ParserStateStack instance. - * - * @param token is string variant containing the token that was encountered. - */ - void token(Variant token) - { - stack.token(std::move(token)); - } -}; - -} - -#endif _OUSIA_GENERIC_PARSER_HPP_ - -- cgit v1.2.3 From 35dbde2dbf65d4e35e64b0ffa3d43f6f96a7ef9a Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:51:41 +0100 Subject: Splitted ParserStateCallbacks into two classes, one for communication between the Handler and the Stack and one for communication between the Stack and the Parser --- src/core/parser/generic/ParserStateCallbacks.cpp | 2 ++ src/core/parser/generic/ParserStateCallbacks.hpp | 44 +++++++++++++++--------- 2 files changed, 29 insertions(+), 17 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/generic/ParserStateCallbacks.cpp b/src/core/parser/generic/ParserStateCallbacks.cpp index e684ee4..50bac57 100644 --- a/src/core/parser/generic/ParserStateCallbacks.cpp +++ b/src/core/parser/generic/ParserStateCallbacks.cpp @@ -20,5 +20,7 @@ namespace ousia { +/* Class ParserStateCallbacks */ + } diff --git a/src/core/parser/generic/ParserStateCallbacks.hpp b/src/core/parser/generic/ParserStateCallbacks.hpp index c2d7cb3..7ec5264 100644 --- a/src/core/parser/generic/ParserStateCallbacks.hpp +++ b/src/core/parser/generic/ParserStateCallbacks.hpp @@ -36,14 +36,18 @@ namespace ousia { /** - * Interface defining a set of callback functions that can be directed from a - * ParserStateHandler to the ParserStateStack and form the ParserStateStack - * to the actual parser. + * Interface defining a set of callback functions that act as a basis for the + * ParserStateStackCallbacks and the ParserCallbacks. */ class ParserStateCallbacks { public: /** - * Sets the whitespace mode that specifies how (string data) should be + * Virtual descructor. + */ + virtual ~ParserStateCallbacks() {}; + + /** + * Sets the whitespace mode that specifies how string data should be * processed. * * @param whitespaceMode specifies one of the three WhitespaceMode constants @@ -60,19 +64,6 @@ public: */ virtual void setDataType(VariantType type) = 0; - /** - * Checks whether the given token is supported by the parser. The parser - * returns true, if the token is supported, false if this token cannot be - * registered. Note that parsers that do not support the registration of - * tokens at all should always return "true". - * - * @param token is the token that should be checked for support. - * @return true if the token is generally supported (or the parser does not - * support registering tokens at all), false if the token is not supported, - * because e.g. it is a reserved token or it interferes with other tokens. - */ - virtual bool supportsToken(const std::string &token) = 0; - /** * Registers the given token as token that should be reported to the handler * using the "token" function. @@ -90,6 +81,25 @@ public: virtual void unregisterToken(const std::string &token) = 0; }; +/** + * Interface defining the callback functions that can be passed from a + * ParserStateStack to the underlying parser. + */ +class ParserCallbacks : public ParserStateCallbacks { + /** + * Checks whether the given token is supported by the parser. The parser + * returns true, if the token is supported, false if this token cannot be + * registered. Note that parsers that do not support the registration of + * tokens at all should always return "true". + * + * @param token is the token that should be checked for support. + * @return true if the token is generally supported (or the parser does not + * support registering tokens at all), false if the token is not supported, + * because e.g. it is a reserved token or it interferes with other tokens. + */ + virtual bool supportsToken(const std::string &token) = 0; +} + } #endif /* _OUSIA_PARSER_STATE_CALLBACKS_HPP_ */ -- cgit v1.2.3 From 8c9820889e0e23b3d6a349e3f85d3700f1462e3c Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:52:08 +0100 Subject: Implemented callback functions in ParserStateHandler --- src/core/parser/generic/ParserStateHandler.cpp | 31 ++++++++- src/core/parser/generic/ParserStateHandler.hpp | 91 ++++++++++++++++++++------ 2 files changed, 100 insertions(+), 22 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/generic/ParserStateHandler.cpp b/src/core/parser/generic/ParserStateHandler.cpp index 96b9217..64e2bfa 100644 --- a/src/core/parser/generic/ParserStateHandler.cpp +++ b/src/core/parser/generic/ParserStateHandler.cpp @@ -24,11 +24,13 @@ namespace ousia { /* Class ParserStatedata */ -ParserStatedata::ParserStatedata(ParserContext &ctx, std::string name, - const ParserState &state, +ParserStatedata::ParserStatedata(ParserContext &ctx, + ParserStateCallbacks &callbacks, + std::string name, const ParserState &state, const ParserState &parentState, const SourceLocation location) : ctx(ctx), + callbacks(callbacks), name(std::move(name)), state(state), parentState(parentState), @@ -56,6 +58,31 @@ const ParserState &ParserStateHandler::state() { return data.state; } SourceLocation ParserStateHandler::location() { return data.location; } +void ParserStateHandler::setWhitespaceMode(WhitespaceMode whitespaceMode) +{ + data.callbacks.setWhitespaceMode(whitespaceMode); +} + +void ParserStateHandler::setDataType(VariantType type) +{ + data.callbacks.setDataType(type); +} + +bool ParserStateHandler::supportsToken(const std::string &token) +{ + return data.callbacks.supportsToken(token); +} + +void ParserStateHandler::registerToken(const std::string &token) +{ + data.callbacks.registerToken(token); +} + +void ParserStateHandler::unregisterToken(const std::string &token) +{ + data.callbacks.unregisterToken(token); +} + void ParserStateHandler::data(const std::string &data, int field) { if (Utils::hasNonWhitepaceChar(data)) { diff --git a/src/core/parser/generic/ParserStateHandler.hpp b/src/core/parser/generic/ParserStateHandler.hpp index 35ad6eb..f3c836e 100644 --- a/src/core/parser/generic/ParserStateHandler.hpp +++ b/src/core/parser/generic/ParserStateHandler.hpp @@ -44,7 +44,14 @@ public: ParserContext &ctx; /** - * Contains the name of the tag that is being handled. + * Reference at an instance of the ParserStateCallbacks class, used for + * modifying the behaviour of the parser (like registering tokens, setting + * the data type or changing the whitespace handling mode). + */ + ParserStateCallbacks &callbacks; + + /** + * Contains the name of the command that is being handled. */ const std::string name; @@ -67,15 +74,17 @@ public: * Constructor of the HandlerData class. * * @param ctx is the parser context the handler should be executed in. + * @param callbacks is an instance of ParserStateCallbacks used to notify + * the parser about certain state changes. * @param name is the name of the string. * @param state is the state this handler was called for. * @param parentState is the state of the parent command. * @param location is the location at which the handler is created. */ - ParserStateHandlerData(ParserContext &ctx, std::string name, - const ParserState &state, + ParserStateHandlerData(ParserContext &ctx, ParserStateCallbacks &callbacks, + std::string name, const ParserState &state, const ParserState &parentState, - const SourceLocation location); + const SourceLocation &location); }; /** @@ -110,28 +119,28 @@ public: * * @return a reference at the ParserContext. */ - ParserContext &context() { return handlerData.ctx; } + ParserContext &context(); /** * Returns the command name for which the handler was created. * * @return a const reference at the command name. */ - const std::string &name() { return handlerData.name; } + const std::string &name(); /** * Returns a reference at the ParserScope instance. * * @return a reference at the ParserScope instance. */ - ParserScope &scope() { return handlerData.ctx.getScope(); } + ParserScope &scope(); /** * Returns a reference at the Manager instance which manages all nodes. * * @return a referance at the Manager instance. */ - Manager &manager() { return handlerData.ctx.getManager(); } + Manager &manager(); /** * Returns a reference at the Logger instance used for logging error @@ -139,7 +148,7 @@ public: * * @return a reference at the Logger instance. */ - Logger &logger() { return handlerData.ctx.getLogger(); } + Logger &logger(); /** * Returns a reference at the Project Node, representing the project into @@ -147,7 +156,7 @@ public: * * @return a referance at the Project Node. */ - Rooted project() { return handlerData.ctx.getProject(); } + Rooted project(); /** * Reference at the ParserState descriptor for which this Handler was @@ -155,24 +164,66 @@ public: * * @return a const reference at the constructing ParserState descriptor. */ - const ParserState &state() { return handlerData.state; } + const ParserState &state(); /** - * Reference at the ParserState descriptor of the parent state of the state - * for which this Handler was created. Set to ParserStates::None if there - * is no parent state. + * Returns the current location in the source file. * - * @return a const reference at the parent state of the constructing - * ParserState descriptor. + * @return the current location in the source file. */ - const ParserState &parentState() { return handlerData.parentState; } + SourceLocation location(); /** - * Returns the current location in the source file. + * Calls the corresponding function in the ParserStateCallbacks instance. + * Sets the whitespace mode that specifies how string data should be + * processed. * - * @return the current location in the source file. + * @param whitespaceMode specifies one of the three WhitespaceMode constants + * PRESERVE, TRIM or COLLAPSE. + */ + void setWhitespaceMode(WhitespaceMode whitespaceMode); + + /** + * Calls the corresponding function in the ParserStateCallbacks instance. + * Sets the type as which the variant data should be parsed. + * + * @param type is one of the VariantType constants, specifying with which + * type the data that is passed to the ParserStateHandler in the "data" + * function should be handled. + */ + void setDataType(VariantType type); + + /** + * Calls the corresponding function in the ParserStateCallbacks instance. + * Checks whether the given token is supported by the parser. The parser + * returns true, if the token is supported, false if this token cannot be + * registered. Note that parsers that do not support the registration of + * tokens at all should always return "true". + * + * @param token is the token that should be checked for support. + * @return true if the token is generally supported (or the parser does not + * support registering tokens at all), false if the token is not supported, + * because e.g. it is a reserved token or it interferes with other tokens. + */ + bool supportsToken(const std::string &token); + + /** + * Calls the corresponding function in the ParserStateCallbacks instance. + * Registers the given token as token that should be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be reported. + */ + void registerToken(const std::string &token); + + /** + * Calls the corresponding function in the ParserStateCallbacks instance. + * Unregisters the given token, it will no longer be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be unregistered. */ - SourceLocation location() { return handlerData.location; } + void unregisterToken(const std::string &token); /** * Called when the command that was specified in the constructor is -- cgit v1.2.3 From 0b93aa3ff50756fbb1d93c7134fe2cc7f093fa75 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:53:10 +0100 Subject: Redefined public interface of ParserStateStack --- src/core/parser/generic/ParserStateStack.hpp | 64 ++++++++++++++++------------ 1 file changed, 36 insertions(+), 28 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/generic/ParserStateStack.hpp b/src/core/parser/generic/ParserStateStack.hpp index 68c4026..b106475 100644 --- a/src/core/parser/generic/ParserStateStack.hpp +++ b/src/core/parser/generic/ParserStateStack.hpp @@ -131,51 +131,59 @@ public: std::string currentCommandName(); /** - * Function that should be called whenever a new command starts. + * Function that should be called whenever a new command is reached. * - * @param name is the name of the command. - * @param args is a map from strings to variants (argument name and value). - * Note that the passed map will be modified. - * @param location is the location in the source file at which the command - * starts. + * @param name is the name of the command (including the namespace + * separator ':') and its corresponding location. Must be a string variant. + * @param args is a map variant containing the arguments that were passed to + * the command. */ - void start(const std::string &name, Variant::mapType &args, - const SourceLocation &location = SourceLocation{}); + void command(Variant name, Variant args); /** - * Function that should be called whenever a new command starts. + * Function that should be called whenever a new field starts. Fields of the + * same command may not be separated by calls to + */ + void fieldStart(); + + /** + * Function that should be called whenever a field ends. + */ + void fieldEnd(); + + /** + * Function that shuold be called whenever character data is found in the + * input stream. * - * @param name is the name of the command. - * @param args is a map from strings to variants (argument name and value). - * @param location is the location in the source file at which the command - * starts. + * @param data is a variant of any type containing the data that was parsed + * as data. */ - void start(std::string name, - const Variant::mapType &args = Variant::mapType{}, - const SourceLocation &location = SourceLocation{}); + void data(Variant data); /** - * Function called whenever a command ends. + * Function that should be called whenever an annotation starts. + * + * @param name is the name of the annotation class. + * @param args is a map variant containing the arguments that were passed + * to the annotation. */ - void end(); + void annotationStart(Variant name, Variant args); /** - * Function that should be called whenever data is available for the - * command. + * Function that should be called whenever an annotation ends. * - * @param data is the data that should be passed to the handler. - * @param field is the field number (the interpretation of this value - * depends on the format that is being parsed). + * @param name is the name of the annotation class that was ended. + * @param annotationName is the name of the annotation that was ended. */ - void data(const std::string &data, int field = 0); + void annotationEnd(Variant name, Variant annotationName); /** - * Returns a reference to the parser context the parser stack is currently - * working on. + * Function that should be called whenever a previously registered token + * is found in the input stream. * - * @return a reference to the parser context. + * @param token is string variant containing the token that was encountered. */ - ParserContext &getContext() { return ctx; } + void token(Variant token); }; } -- cgit v1.2.3 From 9f9e51974e782c4eb6f393ca3d4c3382df093bf1 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:58:55 +0100 Subject: Moved Tokenizer to core/parser/utils and adapted name --- src/core/parser/utils/Tokenizer.cpp | 56 ++++++++++++++++++------------------- src/core/parser/utils/Tokenizer.hpp | 34 +++++++++++----------- 2 files changed, 45 insertions(+), 45 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp index 1fac25a..3c8177d 100644 --- a/src/core/parser/utils/Tokenizer.cpp +++ b/src/core/parser/utils/Tokenizer.cpp @@ -24,7 +24,7 @@ #include #include -#include "DynamicTokenizer.hpp" +#include "Tokenizer.hpp" namespace ousia { @@ -39,7 +39,7 @@ struct TokenMatch { /** * Token that was matched. */ - DynamicToken token; + Token token; /** * Current length of the data within the text handler. The text buffer needs @@ -117,10 +117,10 @@ public: * @param c is the character that should be appended to the current prefix. * @param lookups is a list to which new TokeLookup instances are added -- * which could potentially be expanded in the next iteration. - * @param match is the DynamicToken instance to which the matching token + * @param match is the Token instance to which the matching token * should be written. * @param tokens is a reference at the internal token list of the - * DynamicTokenizer. + * Tokenizer. * @param end is the end byte offset of the current character. * @param sourceId is the source if of this file. */ @@ -143,7 +143,7 @@ public: size_t len = str.size(); if (len > match.token.content.size()) { match.token = - DynamicToken{node->type, str, {sourceId, start, end}}; + Token{node->type, str, {sourceId, start, end}}; match.textLength = textLength; match.textEnd = textEnd; } @@ -181,15 +181,15 @@ static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match, } } -/* Class DynamicTokenizer */ +/* Class Tokenizer */ -DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode) +Tokenizer::Tokenizer(WhitespaceMode whitespaceMode) : whitespaceMode(whitespaceMode), nextTokenTypeId(0) { } template -bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token) +bool Tokenizer::next(CharReader &reader, Token &token) { // If we're in the read mode, reset the char reader peek position to the // current read position @@ -268,12 +268,12 @@ bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token) } token = match.token; } else { - token = DynamicToken{}; + token = Token{}; } return match.hasMatch(); } -bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token) +bool Tokenizer::read(CharReader &reader, Token &token) { switch (whitespaceMode) { case WhitespaceMode::PRESERVE: @@ -286,7 +286,7 @@ bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token) return false; } -bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token) +bool Tokenizer::peek(CharReader &reader, Token &token) { switch (whitespaceMode) { case WhitespaceMode::PRESERVE: @@ -299,7 +299,7 @@ bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token) return false; } -TokenTypeId DynamicTokenizer::registerToken(const std::string &token) +TokenTypeId Tokenizer::registerToken(const std::string &token) { // Abort if an empty token should be registered if (token.empty()) { @@ -337,7 +337,7 @@ TokenTypeId DynamicTokenizer::registerToken(const std::string &token) return type; } -bool DynamicTokenizer::unregisterToken(TokenTypeId type) +bool Tokenizer::unregisterToken(TokenTypeId type) { // Unregister the token from the trie, abort if an invalid type is given if (type < tokens.size() && trie.unregisterToken(tokens[type])) { @@ -348,7 +348,7 @@ bool DynamicTokenizer::unregisterToken(TokenTypeId type) return false; } -std::string DynamicTokenizer::getTokenString(TokenTypeId type) +std::string Tokenizer::getTokenString(TokenTypeId type) { if (type < tokens.size()) { return tokens[type]; @@ -356,26 +356,26 @@ std::string DynamicTokenizer::getTokenString(TokenTypeId type) return std::string{}; } -void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode) +void Tokenizer::setWhitespaceMode(WhitespaceMode mode) { whitespaceMode = mode; } -WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; } +WhitespaceMode Tokenizer::getWhitespaceMode() { return whitespaceMode; } /* Explicitly instantiate all possible instantiations of the "next" member function */ -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); -template bool DynamicTokenizer::next( - CharReader &reader, DynamicToken &token); +template bool Tokenizer::next( + CharReader &reader, Token &token); +template bool Tokenizer::next( + CharReader &reader, Token &token); +template bool Tokenizer::next( + CharReader &reader, Token &token); +template bool Tokenizer::next( + CharReader &reader, Token &token); +template bool Tokenizer::next( + CharReader &reader, Token &token); +template bool Tokenizer::next( + CharReader &reader, Token &token); } diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp index 3e5aeb3..6b4e116 100644 --- a/src/core/parser/utils/Tokenizer.hpp +++ b/src/core/parser/utils/Tokenizer.hpp @@ -17,7 +17,7 @@ */ /** - * @file DynamicTokenizer.hpp + * @file Tokenizer.hpp * * Tokenizer that can be reconfigured at runtime used for parsing the plain * text format. @@ -43,9 +43,9 @@ namespace ousia { class CharReader; /** - * The DynamicToken structure describes a token discovered by the Tokenizer. + * The Token structure describes a token discovered by the Tokenizer. */ -struct DynamicToken { +struct Token { /** * Id of the type of this token. */ @@ -64,28 +64,28 @@ struct DynamicToken { /** * Default constructor. */ - DynamicToken() : type(EmptyToken) {} + Token() : type(EmptyToken) {} /** - * Constructor of the DynamicToken struct. + * Constructor of the Token struct. * * @param id represents the token type. * @param content is the string content that has been extracted. * @param location is the location of the extracted string content in the * source file. */ - DynamicToken(TokenTypeId type, const std::string &content, + Token(TokenTypeId type, const std::string &content, SourceLocation location) : type(type), content(content), location(location) { } /** - * Constructor of the DynamicToken struct, only initializes the token type + * Constructor of the Token struct, only initializes the token type * * @param type is the id corresponding to the type of the token. */ - DynamicToken(TokenTypeId type) : type(type) {} + Token(TokenTypeId type) : type(type) {} /** * The getLocation function allows the tokens to be directly passed as @@ -97,13 +97,13 @@ struct DynamicToken { }; /** - * The DynamicTokenizer is used to extract tokens and chunks of text from a + * The Tokenizer is used to extract tokens and chunks of text from a * CharReader. It allows to register and unregister tokens while parsing and * to modify the handling of whitespace characters. Note that the - * DynamicTokenizer always tries to extract the longest possible token from the + * Tokenizer always tries to extract the longest possible token from the * tokenizer. */ -class DynamicTokenizer { +class Tokenizer { private: /** * Internally used token trie. This object holds all registered tokens. @@ -140,15 +140,15 @@ private: * @return false if the end of the stream has been reached, true otherwise. */ template - bool next(CharReader &reader, DynamicToken &token); + bool next(CharReader &reader, Token &token); public: /** - * Constructor of the DynamicTokenizer class. + * Constructor of the Tokenizer class. * * @param whitespaceMode specifies how whitespace should be handled. */ - DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); + Tokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * Registers the given string as a token. Returns a const pointer at a @@ -201,7 +201,7 @@ public: /** * Reads a new token from the CharReader and stores it in the given - * DynamicToken instance. + * Token instance. * * @param reader is the CharReader instance from which the data should be * read. @@ -210,7 +210,7 @@ public: * @return true if a token could be read, false if the end of the stream * has been reached. */ - bool read(CharReader &reader, DynamicToken &token); + bool read(CharReader &reader, Token &token); /** * The peek method does not advance the read position of the char reader, @@ -223,7 +223,7 @@ public: * @return true if a token could be read, false if the end of the stream * has been reached. */ - bool peek(CharReader &reader, DynamicToken &token); + bool peek(CharReader &reader, Token &token); }; } -- cgit v1.2.3 From 2659b4595d809cbd69a77e5ff7e2fc08d225f065 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:02:54 +0100 Subject: Tidied OsxmlEventParser up, implemented correct whitespace handling, started to write unit tests for the osxml parser --- CMakeLists.txt | 93 +++--- src/core/common/Utils.hpp | 21 +- src/core/common/WhitespaceHandler.hpp | 60 ++++ src/formats/osxml/OsxmlAttributeLocator.cpp | 144 ++++++++++ src/formats/osxml/OsxmlAttributeLocator.hpp | 67 +++++ src/formats/osxml/OsxmlEventParser.cpp | 425 +++++++++++++++------------- src/formats/osxml/OsxmlEventParser.hpp | 44 +-- test/formats/osml/OsmlStreamParserTest.cpp | 1 + test/formats/osxml/OsxmlEventParserTest.cpp | 222 +++++++++++++++ 9 files changed, 811 insertions(+), 266 deletions(-) create mode 100644 src/formats/osxml/OsxmlAttributeLocator.cpp create mode 100644 src/formats/osxml/OsxmlAttributeLocator.hpp create mode 100644 test/formats/osxml/OsxmlEventParserTest.cpp (limited to 'src/core') diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e3b90f..bdc9541 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,9 +147,9 @@ ADD_LIBRARY(ousia_core src/core/model/RootNode src/core/model/Style src/core/model/Typesystem -# src/core/parser/Parser -# src/core/parser/ParserContext -# src/core/parser/ParserScope + src/core/parser/Parser + src/core/parser/ParserContext + src/core/parser/ParserScope # src/core/parser/generic/ParserState # src/core/parser/generic/ParserStateCallbacks # src/core/parser/generic/ParserStateHandler @@ -183,36 +183,37 @@ TARGET_LINK_LIBRARIES(ousia_osml ousia_core ) -#ADD_LIBRARY(ousia_osxml -# src/formats/osxml/osxmlParser -#) +ADD_LIBRARY(ousia_osxml + src/formats/osxml/OsxmlAttributeLocator + src/formats/osxml/OsxmlEventParser +) -#TARGET_LINK_LIBRARIES(ousia_osxml -# ousia_core -# ${EXPAT_LIBRARIES} -#) +TARGET_LINK_LIBRARIES(ousia_osxml + ousia_core + ${EXPAT_LIBRARIES} +) # Resource locators -#ADD_LIBRARY(ousia_filesystem -# src/plugins/filesystem/FileLocator -# src/plugins/filesystem/SpecialPaths -#) +ADD_LIBRARY(ousia_filesystem + src/plugins/filesystem/FileLocator + src/plugins/filesystem/SpecialPaths +) -#TARGET_LINK_LIBRARIES(ousia_filesystem -# ousia_core -# ${Boost_LIBRARIES} -#) +TARGET_LINK_LIBRARIES(ousia_filesystem + ousia_core + ${Boost_LIBRARIES} +) # Output libraries -#ADD_LIBRARY(ousia_html -# src/plugins/html/DemoOutput -#) +ADD_LIBRARY(ousia_html + src/plugins/html/DemoOutput +) -#TARGET_LINK_LIBRARIES(ousia_html -# ousia_core -#) +TARGET_LINK_LIBRARIES(ousia_html + ousia_core +) #ADD_LIBRARY(ousia_mozjs # src/plugins/mozjs/MozJsScriptEngine @@ -247,7 +248,7 @@ IF(TEST) ADD_EXECUTABLE(ousia_test_core test/core/RangeSetTest -# test/core/RegistryTest + test/core/RegistryTest test/core/XMLTest test/core/common/ArgumentTest test/core/common/CharReaderTest @@ -272,7 +273,7 @@ IF(TEST) test/core/model/NodeTest test/core/model/StyleTest test/core/model/TypesystemTest -# test/core/parser/ParserScopeTest + test/core/parser/ParserScopeTest # test/core/parser/ParserStackTest # test/core/parser/ParserStateTest test/core/parser/utils/TokenizerTest @@ -311,15 +312,15 @@ IF(TEST) # ousia_css # ) -# ADD_EXECUTABLE(ousia_test_html -# test/plugins/html/DemoOutputTest -# ) + ADD_EXECUTABLE(ousia_test_html + test/plugins/html/DemoOutputTest + ) -# TARGET_LINK_LIBRARIES(ousia_test_html -# ${GTEST_LIBRARIES} -# ousia_core -# ousia_html -# ) + TARGET_LINK_LIBRARIES(ousia_test_html + ${GTEST_LIBRARIES} + ousia_core + ousia_html + ) ADD_EXECUTABLE(ousia_test_osml test/formats/osml/OsmlStreamParserTest @@ -331,16 +332,16 @@ IF(TEST) ousia_osml ) -# ADD_EXECUTABLE(ousia_test_osxml -# test/plugins/xml/XmlParserTest -# ) + ADD_EXECUTABLE(ousia_test_osxml + test/formats/osxml/OsxmlEventParserTest + ) -# TARGET_LINK_LIBRARIES(ousia_test_osxml -# ${GTEST_LIBRARIES} -# ousia_core -# ousia_osml -# ousia_filesystem -# ) + TARGET_LINK_LIBRARIES(ousia_test_osxml + ${GTEST_LIBRARIES} + ousia_core + ousia_osxml + ousia_filesystem + ) # ADD_EXECUTABLE(ousia_test_mozjs # test/plugins/mozjs/MozJsScriptEngineTest @@ -354,11 +355,11 @@ IF(TEST) # Register the unit tests ADD_TEST(ousia_test_core ousia_test_core) -# ADD_TEST(ousia_test_filesystem ousia_test_filesystem) + ADD_TEST(ousia_test_filesystem ousia_test_filesystem) # ADD_TEST(ousia_test_css ousia_test_css) -# ADD_TEST(ousia_test_html ousia_test_html) + ADD_TEST(ousia_test_html ousia_test_html) ADD_TEST(ousia_test_osml ousia_test_osml) -# ADD_TEST(ousia_test_osxml ousia_test_osxml) + ADD_TEST(ousia_test_osxml ousia_test_osxml) # ADD_TEST(ousia_test_mozjs ousia_test_mozjs) ENDIF() diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 16a9136..8361973 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -119,9 +119,26 @@ public: */ template static std::pair trim(const T &s, Filter f) + { + return trim(s, s.size(), f); + } + + /** + * Trims the given string or vector of chars by returning the start and end + * index. + * + * @param s is the container that should be trimmed. + * @param len is the number of elements in the container. + * @param f is a function that returns true for values that should be + * removed. + * @return start and end index. Note that "end" points at the character + * beyond the end, thus "end" minus "start" + */ + template + static std::pair trim(const T &s, size_t len, Filter f) { size_t start = 0; - for (size_t i = 0; i < s.size(); i++) { + for (size_t i = 0; i < len; i++) { if (!f(s[i])) { start = i; break; @@ -129,7 +146,7 @@ public: } size_t end = 0; - for (ssize_t i = s.size() - 1; i >= static_cast(start); i--) { + for (ssize_t i = len - 1; i >= static_cast(start); i--) { if (!f(s[i])) { end = i + 1; break; diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp index 79e0518..ed52ea3 100644 --- a/src/core/common/WhitespaceHandler.hpp +++ b/src/core/common/WhitespaceHandler.hpp @@ -97,6 +97,25 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd); + } + + /** + * Static version of PreservingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd) { if (textBuf.empty()) { textStart = start; @@ -129,6 +148,27 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf); + } + + /** + * Static version of TrimmingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param whitespaceBuf is a reference at the buffer for storing whitespace + * characters. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd, std::vector &whitespaceBuf) { // Handle whitespace characters if (Utils::isWhitespace(c)) { @@ -174,6 +214,26 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd, hasWhitespace); + } + + /** + * Static version of CollapsingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param hasWhitespace is a reference at the "hasWhitespace" flag. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd, bool &hasWhitespace) { // Handle whitespace characters if (Utils::isWhitespace(c)) { diff --git a/src/formats/osxml/OsxmlAttributeLocator.cpp b/src/formats/osxml/OsxmlAttributeLocator.cpp new file mode 100644 index 0000000..e37446a --- /dev/null +++ b/src/formats/osxml/OsxmlAttributeLocator.cpp @@ -0,0 +1,144 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include + +#include "OsxmlAttributeLocator.hpp" + +namespace ousia { + +/** + * Enum used internally in the statemachine of the xml argument parser. + */ +enum class XmlAttributeState { + IN_TAG_NAME, + SEARCH_ATTR, + IN_ATTR_NAME, + HAS_ATTR_NAME, + HAS_ATTR_EQUALS, + IN_ATTR_DATA +}; + +std::map OsxmlAttributeLocator::locate( + CharReader &reader, size_t offs) +{ + std::map res; + + // Fork the reader, we don't want to mess up the XML parsing process, do we? + CharReaderFork readerFork = reader.fork(); + + // Move the read cursor to the start location, abort if this does not work + if (offs != readerFork.seek(offs)) { + return res; + } + + // Now all we need to do is to implement one half of an XML parser. As this + // is inherently complicated we'll totaly fail at it. Don't care. All we + // want to get is those darn offsets for pretty error messages... (and we + // can assume the XML is valid as it was already read by expat) + XmlAttributeState state = XmlAttributeState::IN_TAG_NAME; + char c; + std::stringstream attrName; + while (readerFork.read(c)) { + // Abort at the end of the tag + if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) { + return res; + } + + // One state machine to rule them all, one state machine to find them, + // One state machine to bring them all and in the darkness bind them + // (the byte offsets) + switch (state) { + case XmlAttributeState::IN_TAG_NAME: + if (Utils::isWhitespace(c)) { + res.emplace("$tag", + SourceLocation{reader.getSourceId(), offs + 1, + readerFork.getOffset() - 1}); + state = XmlAttributeState::SEARCH_ATTR; + } + break; + case XmlAttributeState::SEARCH_ATTR: + if (!Utils::isWhitespace(c)) { + state = XmlAttributeState::IN_ATTR_NAME; + attrName << c; + } + break; + case XmlAttributeState::IN_ATTR_NAME: + if (Utils::isWhitespace(c)) { + state = XmlAttributeState::HAS_ATTR_NAME; + } else if (c == '=') { + state = XmlAttributeState::HAS_ATTR_EQUALS; + } else { + attrName << c; + } + break; + case XmlAttributeState::HAS_ATTR_NAME: + if (!Utils::isWhitespace(c)) { + if (c == '=') { + state = XmlAttributeState::HAS_ATTR_EQUALS; + break; + } + // Well, this is a strange XML file... We expected to + // see a '=' here! Try to continue with the + // "HAS_ATTR_EQUALS" state as this state will hopefully + // inlcude some error recovery + } else { + // Skip whitespace here + break; + } + // Fallthrough + case XmlAttributeState::HAS_ATTR_EQUALS: + if (!Utils::isWhitespace(c)) { + if (c == '"') { + // Here we are! We have found the beginning of an + // attribute. Let's quickly lock the current offset away + // in the result map + res.emplace(attrName.str(), + SourceLocation{reader.getSourceId(), + readerFork.getOffset()}); + state = XmlAttributeState::IN_ATTR_DATA; + } else { + // No, this XML file is not well formed. Assume we're in + // an attribute name once again + attrName.str(std::string{&c, 1}); + state = XmlAttributeState::IN_ATTR_NAME; + } + } + break; + case XmlAttributeState::IN_ATTR_DATA: + if (c == '"') { + // We're at the end of the attribute data, set the end + // location + auto it = res.find(attrName.str()); + if (it != res.end()) { + it->second.setEnd(readerFork.getOffset() - 1); + } + + // Reset the attribute name and restart the search + attrName.str(std::string{}); + state = XmlAttributeState::SEARCH_ATTR; + } + break; + } + } + return res; +} +} + diff --git a/src/formats/osxml/OsxmlAttributeLocator.hpp b/src/formats/osxml/OsxmlAttributeLocator.hpp new file mode 100644 index 0000000..f9a3437 --- /dev/null +++ b/src/formats/osxml/OsxmlAttributeLocator.hpp @@ -0,0 +1,67 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file OsxmlAttributeLocator.hpp + * + * Contains a class used for locating the byte offsets of the attributes given + * in a XML tag. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ +#define _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ + +#include + +namespace ousia { + +// Forward declarations +class CharReader; +class SourceLocation; + +/** + * Class containing one static function for locating the byte offsets of the + * attributes in a XML tag. This are not retrieved by our xml parser, so we have + * to do this manually. + */ +class OsxmlAttributeLocator { +public: + /** + * Function used to reconstruct the location of the attributes of a XML tag + * in the source code. This is necessary, as the xml parser only returns an + * offset to the begining of a tag and not to the position of the individual + * arguments. + * + * @param reader is the char reader from which the character data should be + * read. + * @param offs is a byte offset in the xml file pointing at the "<" + * character of the tag. + * @return a map from attribute keys to the corresponding location + * (including range) of the atribute. Also contains the location of the + * tagname in the form of the virtual attribute "$tag". + */ + static std::map locate(CharReader &reader, + size_t offs); +}; + +} + +#endif /* _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ */ + diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp index 2ef170e..b4aff77 100644 --- a/src/formats/osxml/OsxmlEventParser.cpp +++ b/src/formats/osxml/OsxmlEventParser.cpp @@ -18,14 +18,22 @@ #include +#include + +#include #include #include +#include #include +#include +#include "OsxmlAttributeLocator.hpp" #include "OsxmlEventParser.hpp" namespace ousia { +/* Class OsxmlEventParser */ + /** * Class containing data used by the internal functions. */ @@ -43,41 +51,75 @@ public: */ ssize_t annotationEndTagDepth; + /** + * Current character data buffer. + */ + std::vector textBuf; + + /** + * Current whitespace buffer (for the trimming whitspace mode) + */ + std::vector whitespaceBuf; + + /** + * Flag indicating whether a whitespace character was present (for the + * collapsing whitespace mode). + */ + bool hasWhitespace; + + /** + * Current character data start. + */ + size_t textStart; + + /** + * Current character data end. + */ + size_t textEnd; + /** * Default constructor. */ - OsxmlEventParserData() : depth(0), annotationEndTagDepth(-1) {} + OsxmlEventParserData(); /** * Increments the depth. */ - void incrDepth() { depth++; } + void incrDepth(); /** * Decrement the depth and reset the annotationEndTagDepth flag. */ - void decrDepth() - { - if (depth > 0) { - depth--; - } - if (depth < annotationEndTagDepth) { - annotationEndTagDepth = -1; - } - } + void decrDepth(); /** * Returns true if we're currently inside an end tag. */ - bool inAnnotationEndTag() { depth >= annotationEndTagDepth; } + bool inAnnotationEndTag(); + + /** + * Returns true if character data is available. + * + * @return true if character data is available. + */ + bool hasText(); + + /** + * Returns a Variant containing the character data and its location. + * + * @return a string variant containing the text data and the character + * location. + */ + Variant getText(SourceId sourceId); }; -namespace { +/* Class GuardedExpatXmlParser */ + /** * Wrapper class around the XML_Parser pointer which safely frees it whenever * the scope is left (e.g. because an exception was thrown). */ -class ScopedExpatXmlParser { +class GuardedExpatXmlParser { private: /** * Internal pointer to the XML_Parser instance. @@ -86,14 +128,14 @@ private: public: /** - * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS + * Constructor of the GuardedExpatXmlParser class. Calls XML_ParserCreateNS * from the expat library. Throws a parser exception if the XML parser * cannot be initialized. * * @param encoding is the protocol-defined encoding passed to expat (or * nullptr if expat should determine the encoding by itself). */ - ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) + GuardedExpatXmlParser(const XML_Char *encoding) : parser(nullptr) { parser = XML_ParserCreate(encoding); if (!parser) { @@ -103,9 +145,9 @@ public: } /** - * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance. + * Destuctor of the GuardedExpatXmlParser, frees the XML parser instance. */ - ~ScopedExpatXmlParser() + ~GuardedExpatXmlParser() { if (parser) { XML_ParserFree(parser); @@ -120,134 +162,20 @@ public: }; /** - * Enum used internally in the statemachine of the micro-xml argument parser. + * Name of the special outer tag used for allowing multiple top-level elements + * in an xml file. */ -enum class XmlAttributeState { - IN_TAG_NAME, - SEARCH_ATTR, - IN_ATTR_NAME, - HAS_ATTR_NAME, - HAS_ATTR_EQUALS, - IN_ATTR_DATA -}; +static const std::string TOP_LEVEL_TAG{"ousia"}; /** - * Function used to reconstruct the location of the attributes of a XML tag in - * the source code. This is necessary, as the xml parser only returns an offset - * to the begining of a tag and not to the position of the individual arguments. - * - * @param reader is the char reader from which the character data should be - * read. - * @param offs is a byte offset in the xml file pointing at the "<" character of - * the tag. - * @return a map from attribute keys to the corresponding location (including - * range) of the atribute. Also contains the location of the tagname in the - * form of the virtual attribute "$tag". + * Prefix used to indicate the start of an annoation (note the trailing colon) */ -static std::map xmlReconstructAttributeOffsets( - CharReader &reader, size_t offs) -{ - std::map res; - - // Fork the reader, we don't want to mess up the XML parsing process, do we? - CharReaderFork readerFork = reader.fork(); - - // Move the read cursor to the start location, abort if this does not work - if (!location.isValid() || offs != readerFork.seek(offs)) { - return res; - } - - // Now all we need to do is to implement one half of an XML parser. As this - // is inherently complicated we'll totaly fail at it. Don't care. All we - // want to get is those darn offsets for pretty error messages... (and we - // can assume the XML is valid as it was already read by expat) - XmlAttributeState state = XmlAttributeState::IN_TAG_NAME; - char c; - std::stringstream attrName; - while (readerFork.read(c)) { - // Abort at the end of the tag - if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) { - return res; - } +static const std::string ANNOTATION_START_PREFIX{"a:start:"}; - // One state machine to rule them all, one state machine to find them, - // One state machine to bring them all and in the darkness bind them - // (the byte offsets) - switch (state) { - case XmlAttributeState::IN_TAG_NAME: - if (Utils::isWhitespace(c)) { - res.emplace("$tag", - SourceLocation{reader.getSourceId(), offs + 1, - readerFork.getOffset() - 1}); - state = XmlAttributeState::SEARCH_ATTR; - } - break; - case XmlAttributeState::SEARCH_ATTR: - if (!Utils::isWhitespace(c)) { - state = XmlAttributeState::IN_ATTR_NAME; - attrName << c; - } - break; - case XmlAttributeState::IN_ATTR_NAME: - if (Utils::isWhitespace(c)) { - state = XmlAttributeState::HAS_ATTR_NAME; - } else if (c == '=') { - state = XmlAttributeState::HAS_ATTR_EQUALS; - } else { - attrName << c; - } - break; - case XmlAttributeState::HAS_ATTR_NAME: - if (!Utils::isWhitespace(c)) { - if (c == '=') { - state = XmlAttributeState::HAS_ATTR_EQUALS; - break; - } - // Well, this is a strange XML file... We expected to - // see a '=' here! Try to continue with the - // "HAS_ATTR_EQUALS" state as this state will hopefully - // inlcude some error recovery - } else { - // Skip whitespace here - break; - } - // Fallthrough - case XmlAttributeState::HAS_ATTR_EQUALS: - if (!Utils::isWhitespace(c)) { - if (c == '"') { - // Here we are! We have found the beginning of an - // attribute. Let's quickly lock the current offset away - // in the result map - res.emplace(attrName.str(), - SourceLocation{reader.getSourceId(), - readerFork.getOffset()}); - state = XmlAttributeState::IN_ATTR_DATA; - } else { - // No, this XML file is not well formed. Assume we're in - // an attribute name once again - attrName.str(std::string{&c, 1}); - state = XmlAttributeState::IN_ATTR_NAME; - } - } - break; - case XmlAttributeState::IN_ATTR_DATA: - if (c == '"') { - // We're at the end of the attribute data, set the end - // location - auto it = res.find(attrName.str()); - if (it != res.end()) { - it->second.setEnd(readerFork.getOffset() - 1); - } - - // Reset the attribute name and restart the search - attrName.str(std::string{}); - state = XmlAttributeState::SEARCH_ATTR; - } - break; - } - } - return res; -} +/** + * Prefix used to indicate the end of an annotation. + */ +static const std::string ANNOTATION_END_PREFIX{"a:end"}; /** * Synchronizes the position of the xml parser with the default location of the @@ -268,22 +196,12 @@ static SourceLocation xmlSyncLoggerPosition(XML_Parser p, size_t len = 0) size_t offs = XML_GetCurrentByteIndex(p); SourceLocation loc = SourceLocation{parser->getReader().getSourceId(), offs, offs + len}; - parser->getLogger().setDefaultLocation(location); + parser->getLogger().setDefaultLocation(loc); // Return the fetched location return loc; } -/** - * Prefix used to indicate the start of an annoation, - */ -static const std::string ANNOTATION_START_PREFIX{"a:start:"}; - -/** - * Prefix used to indicate the end of an annotation. - */ -static const std::string ANNOTATION_END_PREFIX{"a:end"}; - /** * Callback called by eXpat whenever a start handler is reached. */ @@ -292,14 +210,21 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, { // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser XML_Parser p = static_cast(ref); - OsxmlEventParser *parser = static_cast(XML_GetUserData(p)); + OsxmlEventParser *parser = + static_cast(XML_GetUserData(p)); + + // If there is any text data in the buffer, issue that first + if (parser->getData().hasText()) { + parser->getEvents().data( + parser->getData().getText(parser->getReader().getSourceId())); + } // Read the argument locations -- this is only a stupid and slow hack, // but it is necessary, as expat doesn't give use the byte offset of the // arguments. std::map attributeOffsets = - xmlReconstructXMLAttributeOffsets(*userData->reader, - XML_GetCurrentByteIndex(p)); + OsxmlAttributeLocator::locate(parser->getReader(), + XML_GetCurrentByteIndex(p)); // Update the logger position SourceLocation loc = xmlSyncLoggerPosition(p); @@ -316,7 +241,8 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, // Make sure we're currently not inside an annotation end tag -- this would // be highly illegal! if (parser->getData().inAnnotationEndTag()) { - logger.error("No tags allowed inside an annotation end tag", nameLoc); + parser->getLogger().error( + "No tags allowed inside an annotation end tag", nameLoc); return; } @@ -336,36 +262,33 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, // Parse the string, pass the location of the key std::pair value = VariantReader::parseGenericString( - *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(), + *(attr++), parser->getLogger(), keyLoc.getSourceId(), keyLoc.getStart()); // Set the overall location of the parsed element to the attribute // location - value.second->setLocation(keyLoc); - - // Store the - if (!args.emplace(key, value.second).second) { - parser->getLogger().warning( - std::string("Attribute \"") + key + - "\" defined multiple times, only using first definition", - keyLoc); - } + value.second.setLocation(keyLoc); + + // Store the keys in the map + args.emplace(key, value.second).second; } // Fetch the name of the tag, check for special tags std::string nameStr(name); - if (nameStr == "ousia" && parser->getData().depth == 1) { - // We're in the top-level and the magic "ousia" tag is reached -- just + if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 1) { + // We're in the top-level and the magic tag is reached -- just // ignore it and issue a warning for each argument that has been given for (const auto &arg : args) { - parser->getLogger().warning( - std::string("Ignoring attribute \"") + arg.first + - std::string("\" for magic tag \"ousia\""), - arg.second); + parser->getLogger().warning(std::string("Ignoring attribute \"") + + arg.first + + std::string("\" for magic tag \"") + + TOP_LEVEL_TAG + std::string("\""), + arg.second); } } else if (Utils::startsWith(nameStr, ANNOTATION_START_PREFIX)) { // Assemble a name variant containing the name minus the prefix - Variant nameVar = nameStr.substr(ANNOTATION_START_PREFIX.size()); + Variant nameVar = + Variant::fromString(nameStr.substr(ANNOTATION_START_PREFIX.size())); nameVar.setLocation(nameLoc); // Issue the "annotationStart" event @@ -410,25 +333,34 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name, } } -static void xmlEndElementHandler(void *p, const XML_Char *name) +static void xmlEndElementHandler(void *ref, const XML_Char *name) { // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser XML_Parser p = static_cast(ref); - OsxmlEventParser *parser = static_cast(XML_GetUserData(p)); + OsxmlEventParser *parser = + static_cast(XML_GetUserData(p)); // Synchronize the position of the logger with teh position - xmlSyncLoggerPosition(parser); - - // Decrement the current depth - parser->getData().decrDepth(); + xmlSyncLoggerPosition(p); // Abort as long as we're in an annotation end tag if (parser->getData().inAnnotationEndTag()) { + parser->getData().decrDepth(); return; } + // Decrement the current depth + parser->getData().decrDepth(); + + // If there is any text data in the buffer, issue that first + if (parser->getData().hasText()) { + parser->getEvents().data( + parser->getData().getText(parser->getReader().getSourceId())); + } + // Abort if the special ousia tag ends here - if (nameStr == "ousia" && parser->getData().depth == 0) { + std::string nameStr{name}; + if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 0) { return; } @@ -436,20 +368,105 @@ static void xmlEndElementHandler(void *p, const XML_Char *name) parser->getEvents().fieldEnd(); } -static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len) +static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len) { // Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser XML_Parser p = static_cast(ref); - OsxmlEventParser *parser = static_cast(XML_GetUserData(p)); - - // TODO -/* size_t ulen = len > 0 ? static_cast(len) : 0; - syncLoggerPosition(parser, ulen); - const std::string data = Utils::trim(std::string{s, ulen}); - if (!data.empty()) { - stack->data(data); - }*/ + OsxmlEventParser *parser = + static_cast(XML_GetUserData(p)); + + // Abort as long as we're in an annotation end tag + if (parser->getData().inAnnotationEndTag()) { + return; + } + + // Convert the signed (smell the 90's C library here?) length to an usigned + // value + size_t ulen = len > 0 ? static_cast(len) : 0; + + // Synchronize the logger position + SourceLocation loc = xmlSyncLoggerPosition(p, ulen); + + // Fetch some variables for convenience + const WhitespaceMode mode = parser->getWhitespaceMode(); + OsxmlEventParserData &data = parser->getData(); + std::vector &textBuf = data.textBuf; + std::vector &whitespaceBuf = data.whitespaceBuf; + bool &hasWhitespace = data.hasWhitespace; + size_t &textStart = data.textStart; + size_t &textEnd = data.textEnd; + + size_t pos = loc.getStart(); + for (size_t i = 0; i < ulen; i++, pos++) { + switch (mode) { + case WhitespaceMode::PRESERVE: + PreservingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd); + break; + case WhitespaceMode::TRIM: + TrimmingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd, + whitespaceBuf); + break; + case WhitespaceMode::COLLAPSE: + CollapsingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf, + textStart, textEnd, + hasWhitespace); + break; + } + } +} + +/* Class OsxmlEvents */ + +OsxmlEvents::~OsxmlEvents() {} + +/* Class OsxmlEventParser */ + +OsxmlEventParserData::OsxmlEventParserData() + : depth(0), + annotationEndTagDepth(-1), + hasWhitespace(false), + textStart(0), + textEnd(0) +{ +} + +void OsxmlEventParserData::incrDepth() { depth++; } + +void OsxmlEventParserData::decrDepth() +{ + if (depth > 0) { + depth--; + } + if (depth < annotationEndTagDepth) { + annotationEndTagDepth = -1; + } +} + +bool OsxmlEventParserData::inAnnotationEndTag() +{ + return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth); } + +bool OsxmlEventParserData::hasText() { return !textBuf.empty(); } + +Variant OsxmlEventParserData::getText(SourceId sourceId) +{ + // Create a variant containing the string data and the location + Variant var = + Variant::fromString(std::string{textBuf.data(), textBuf.size()}); + var.setLocation({sourceId, textStart, textEnd}); + + // Reset the text buffers + textBuf.clear(); + whitespaceBuf.clear(); + hasWhitespace = false; + textStart = 0; + textEnd = 0; + + // Return the variant + return var; } /* Class OsxmlEventParser */ @@ -459,21 +476,22 @@ OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events, : reader(reader), events(events), logger(logger), - whitespaceMode(WhitespaceMode::COLLAPSE), + whitespaceMode(WhitespaceMode::TRIM), data(new OsxmlEventParserData()) { } -void OsxmlEventParser::parse(CharReader &reader) +OsxmlEventParser::~OsxmlEventParser() {} + +void OsxmlEventParser::parse() { // Create the parser object - ScopedExpatXmlParser p{"UTF-8"}; + GuardedExpatXmlParser p{"UTF-8"}; // Reset the depth - depth = 0; + data->depth = 0; - // Pass the reference to the ParserStack to the XML handler - XMLUserData data(&stack, &reader); + // Pass the reference to this parser instance to the XML handler XML_SetUserData(&p, this); XML_UseParserAsHandlerArg(&p); @@ -498,7 +516,7 @@ void OsxmlEventParser::parse(CharReader &reader) if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { throw LoggableException{ "XML: " + std::string{XML_ErrorString(XML_GetErrorCode(&p))}, - xmlSyncLoggerPosition(p)}; + xmlSyncLoggerPosition(&p)}; } // Abort once there are no more bytes in the stream @@ -513,12 +531,17 @@ void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode) this->whitespaceMode = whitespaceMode; } -CharReader &OsxmlEventParser::getCharReader() { return charReader; } +WhitespaceMode OsxmlEventParser::getWhitespaceMode() const +{ + return whitespaceMode; +} + +CharReader &OsxmlEventParser::getReader() const { return reader; } -Logger &OsxmlEventParser::getLogger() { return logger; } +Logger &OsxmlEventParser::getLogger() const { return logger; } -OsxmlEvents &OsxmlEventParser::getEvents() { return events; } +OsxmlEvents &OsxmlEventParser::getEvents() const { return events; } -OsxmlEventParserData &OsxmlEventParser::getData() { return *data; } +OsxmlEventParserData &OsxmlEventParser::getData() const { return *data; } } diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp index 5319ca6..aa20ea9 100644 --- a/src/formats/osxml/OsxmlEventParser.hpp +++ b/src/formats/osxml/OsxmlEventParser.hpp @@ -42,7 +42,7 @@ class Variant; class OsxmlEventParserData; /** - * Interface which defines the callback functions which are called by the + * Interface which defines the callback functions which are called by the * OsxmlEventParser whenever an event occurs. */ class OsxmlEvents { @@ -50,13 +50,13 @@ public: /** * Virtual destructor. */ - virtual ~OsxmlEvents() {} + virtual ~OsxmlEvents(); /** * Called whenever a command starts. Note that this implicitly always starts * the default field of the command. * - * @param name is a string variant containing name and location of the + * @param name is a string variant containing name and location of the * command. * @param args is a map variant containing the arguments that were given * to the command. @@ -67,12 +67,12 @@ public: * Called whenever an annotation starts. Note that this implicitly always * starts the default field of the annotation. * - * @param name is a string variant containing the name of the annotation + * @param name is a string variant containing the name of the annotation * class and the location of the annotation definition. * @param args is a map variant containing the arguments that were given * to the annotation definition. */ - virtual void annotationStart(Variant name, Variant args); + virtual void annotationStart(Variant name, Variant args) = 0; /** * Called whenever the range of an annotation ends. The callee must @@ -85,12 +85,12 @@ public: * ended here. May be empty (or nullptr), if no elementName has been * specified at the end of the annotation. */ - virtual void annotationEnd(Variant name, Variant elementName); + virtual void annotationEnd(Variant name, Variant elementName) = 0; /** - * Called whenever the default field which was implicitly started by + * Called whenever the default field which was implicitly started by * commandStart or annotationStart ends. Note that this does not end the - * range of an annotation, but the default field of the annotation. To + * range of an annotation, but the default field of the annotation. To * signal the end of the annotation this, the annotationEnd method will be * invoked. */ @@ -102,11 +102,10 @@ public: * is not called if the parsing failed, the parser prints an error message * instead. * - * @param data is the already parsed data that should be passed to the + * @param data is the already parsed data that should be passed to the * handler. */ virtual void data(Variant data) = 0; - }; /** @@ -148,7 +147,7 @@ public: * Constructor fo the OsxmlEventParser. Takes a reference at the OsxmlEvents * of which the callback functions are called. * - * @param reader is a reference to the CharReader instance from which the + * @param reader is a reference to the CharReader instance from which the * XML should be read. * @param events is a refence at an instance of the OsxmlEvents class. All * events are forwarded to this class. @@ -157,6 +156,11 @@ public: */ OsxmlEventParser(CharReader &reader, OsxmlEvents &events, Logger &logger); + /** + * Destructor of OsxmlEventParser (needed for unique_ptr to incomplete type) + */ + ~OsxmlEventParser(); + /** * Performs the actual parsing. Reads the XML using eXpat and calles the * callbacks in the event listener instance whenever something interesting @@ -167,38 +171,44 @@ public: /** * Sets the whitespace handling mode. * - * @param whitespaceMode defines how whitespace in the data should be + * @param whitespaceMode defines how whitespace in the data should be * handled. */ void setWhitespaceMode(WhitespaceMode whitespaceMode); + /** + * Returns the current whitespace handling mode. + * + * @return the currently set whitespace handling mode. + */ + WhitespaceMode getWhitespaceMode() const; + /** * Returns the internal CharReader reference. * * @return the CharReader reference. */ - CharReader &getCharReader(); + CharReader &getReader() const; /** * Returns the internal Logger reference. * * @return the internal Logger reference. */ - Logger &getLogger(); + Logger &getLogger() const; /** * Returns the internal OsxmlEvents reference. * * @return the internal OsxmlEvents reference. */ - OsxmlEvents &getEvents(); + OsxmlEvents &getEvents() const; /** * Returns a reference at the internal data. */ - OsxmlEventParserData &getData(); + OsxmlEventParserData &getData() const; }; - } #endif /* _OSXML_EVENT_PARSER_HPP_ */ diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index e5eff05..b944af8 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -28,6 +28,7 @@ namespace ousia { static TerminalLogger logger(std::cerr, true); +//static ConcreteLogger logger; TEST(OsmlStreamParser, empty) { diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp new file mode 100644 index 0000000..06c800f --- /dev/null +++ b/test/formats/osxml/OsxmlEventParserTest.cpp @@ -0,0 +1,222 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include +#include + +#include + +namespace ousia { + +static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; + +namespace { +enum class OsxmlEvent { + COMMAND_START, + ANNOTATION_START, + ANNOTATION_END, + FIELD_END, + DATA +}; + +class TestOsxmlEventListener : public OsxmlEvents { +public: + std::vector> events; + + void commandStart(Variant name, Variant args) override + { + events.emplace_back(OsxmlEvent::COMMAND_START, + Variant::arrayType{name, args}); + } + + void annotationStart(Variant name, Variant args) override + { + events.emplace_back(OsxmlEvent::ANNOTATION_START, + Variant::arrayType{name, args}); + } + + void annotationEnd(Variant name, Variant elementName) override + { + events.emplace_back(OsxmlEvent::ANNOTATION_END, + Variant::arrayType{name, elementName}); + } + + void fieldEnd() override + { + events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{}); + } + + void data(Variant data) override + { + events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data}); + } +}; + +static std::vector> parseXml( + const char *testString, + WhitespaceMode whitespaceMode = WhitespaceMode::TRIM) +{ + TestOsxmlEventListener listener; + CharReader reader(testString); + OsxmlEventParser parser(reader, listener, logger); + parser.setWhitespaceMode(whitespaceMode); + parser.parse(); + return listener.events; +} +} + +TEST(OsxmlEventParser, simpleCommandWithArgs) +{ + const char *testString = ""; + // 01234567 89012 3456 78 9012 34 5678 90123 456 + // 0 1 2 3 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{ + "a", Variant::mapType{ + {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); + + // Check the locations (I'll do this one time and then just assume it works) + ASSERT_EQ(1U, events[0].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(2U, events[0].second.asArray()[0].getLocation().getEnd()); + ASSERT_EQ( + 9U, + events[0].second.asArray()[1].asMap()["name"].getLocation().getStart()); + ASSERT_EQ( + 13U, + events[0].second.asArray()[1].asMap()["name"].getLocation().getEnd()); + ASSERT_EQ( + 18U, + events[0].second.asArray()[1].asMap()["a"].getLocation().getStart()); + ASSERT_EQ( + 19U, events[0].second.asArray()[1].asMap()["a"].getLocation().getEnd()); + ASSERT_EQ( + 24U, + events[0].second.asArray()[1].asMap()["b"].getLocation().getStart()); + ASSERT_EQ( + 25U, events[0].second.asArray()[1].asMap()["b"].getLocation().getEnd()); + ASSERT_EQ( + 30U, + events[0].second.asArray()[1].asMap()["c"].getLocation().getStart()); + ASSERT_EQ( + 34U, events[0].second.asArray()[1].asMap()["c"].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, magicTopLevelTag) +{ + const char *testString = ""; + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"b", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, magicTopLevelTagInside) +{ + const char *testString = ""; + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"a", Variant::mapType{}}}}, + {OsxmlEvent::COMMAND_START, + Variant::arrayType{{"ousia", Variant::mapType{}}}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString); + ASSERT_EQ(expectedEvents, events); +} + +TEST(OsxmlEventParser, commandWithDataPreserveWhitespace) +{ + const char *testString = " hello \n world "; + // 012345678901 234567890123 + // 0 1 2 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{" hello \n world "}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::PRESERVE); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataTrimWhitespace) +{ + const char *testString = " hello \n world "; + // 012345678901 234567890123 + // 0 1 2 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{"hello \n world"}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::TRIM); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +TEST(OsxmlEventParser, commandWithDataCollapseWhitespace) +{ + const char *testString = " hello \n world "; + // 012345678901 234567890123 + // 0 1 2 + + std::vector> expectedEvents{ + {OsxmlEvent::COMMAND_START, + Variant::arrayType{"a", Variant::mapType{}}}, + {OsxmlEvent::DATA, Variant::arrayType{"hello world"}}, + {OsxmlEvent::FIELD_END, Variant::arrayType{}}}; + + auto events = parseXml(testString, WhitespaceMode::COLLAPSE); + ASSERT_EQ(expectedEvents, events); + + // Check the location of the text + ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart()); + ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd()); +} + +} + -- cgit v1.2.3 From cc281d91def921b7bbf5d3d4a0fce53afc5a317b Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:07:58 +0100 Subject: Renamed parser/generic to parser/stack and made filenames much shorter --- src/core/parser/generic/ParserState.cpp | 161 ------------ src/core/parser/generic/ParserState.hpp | 284 --------------------- src/core/parser/generic/ParserStateCallbacks.cpp | 26 -- src/core/parser/generic/ParserStateCallbacks.hpp | 106 -------- src/core/parser/generic/ParserStateHandler.cpp | 104 -------- src/core/parser/generic/ParserStateHandler.hpp | 281 --------------------- src/core/parser/generic/ParserStateStack.cpp | 187 -------------- src/core/parser/generic/ParserStateStack.hpp | 191 -------------- src/core/parser/stack/Callbacks.cpp | 23 ++ src/core/parser/stack/Callbacks.hpp | 99 ++++++++ src/core/parser/stack/Handler.cpp | 90 +++++++ src/core/parser/stack/Handler.hpp | 302 ++++++++++++++++++++++ src/core/parser/stack/Stack.cpp | 188 ++++++++++++++ src/core/parser/stack/Stack.hpp | 191 ++++++++++++++ src/core/parser/stack/State.cpp | 171 +++++++++++++ src/core/parser/stack/State.hpp | 307 +++++++++++++++++++++++ test/core/parser/ParserStateTest.cpp | 77 ------ test/core/parser/stack/StateTest.cpp | 79 ++++++ 18 files changed, 1450 insertions(+), 1417 deletions(-) delete mode 100644 src/core/parser/generic/ParserState.cpp delete mode 100644 src/core/parser/generic/ParserState.hpp delete mode 100644 src/core/parser/generic/ParserStateCallbacks.cpp delete mode 100644 src/core/parser/generic/ParserStateCallbacks.hpp delete mode 100644 src/core/parser/generic/ParserStateHandler.cpp delete mode 100644 src/core/parser/generic/ParserStateHandler.hpp delete mode 100644 src/core/parser/generic/ParserStateStack.cpp delete mode 100644 src/core/parser/generic/ParserStateStack.hpp create mode 100644 src/core/parser/stack/Callbacks.cpp create mode 100644 src/core/parser/stack/Callbacks.hpp create mode 100644 src/core/parser/stack/Handler.cpp create mode 100644 src/core/parser/stack/Handler.hpp create mode 100644 src/core/parser/stack/Stack.cpp create mode 100644 src/core/parser/stack/Stack.hpp create mode 100644 src/core/parser/stack/State.cpp create mode 100644 src/core/parser/stack/State.hpp delete mode 100644 test/core/parser/ParserStateTest.cpp create mode 100644 test/core/parser/stack/StateTest.cpp (limited to 'src/core') diff --git a/src/core/parser/generic/ParserState.cpp b/src/core/parser/generic/ParserState.cpp deleted file mode 100644 index f635d86..0000000 --- a/src/core/parser/generic/ParserState.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "ParserState.hpp" - -namespace ousia { - -/* Class ParserState */ - -ParserState::ParserState() : elementHandler(nullptr) {} - -ParserState::ParserState(ParserStateSet parents, Arguments arguments, - RttiSet createdNodeTypes, - HandlerConstructor elementHandler) - : parents(parents), - arguments(arguments), - createdNodeTypes(createdNodeTypes), - elementHandler(elementHandler) -{ -} - -ParserState::ParserState(const ParserStateBuilder &builder) - : ParserState(builder.build()) -{ -} - -/* Class ParserStateBuilder */ - -ParserStateBuilder &ParserStateBuilder::copy(const ParserState &state) -{ - this->state = state; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::parent(const ParserState *parent) -{ - state.parents = ParserStateSet{parent}; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::parents(const ParserStateSet &parents) -{ - state.parents = parents; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::arguments(const Arguments &arguments) -{ - state.arguments = arguments; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::createdNodeType(const Rtti *type) -{ - state.createdNodeTypes = RttiSet{type}; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::createdNodeTypes(const RttiSet &types) -{ - state.createdNodeTypes = types; - return *this; -} - -ParserStateBuilder &ParserStateBuilder::elementHandler( - HandlerConstructor elementHandler) -{ - state.elementHandler = elementHandler; - return *this; -} - -const ParserState &ParserStateBuilder::build() const { return state; } - -/* Class ParserStateDeductor */ - -ParserStateDeductor::ParserStateDeductor( - std::vector signature, - std::vector states) - : tbl(signature.size()), - signature(std::move(signature)), - states(std::move(states)) -{ -} - -bool ParserStateDeductor::isActive(size_t d, const ParserState *s) -{ - // Lookup the "active" state of (d, s), if it was not already set - // (e.second is true) we'll have to calculate it - auto e = tbl[d].emplace(s, false); - bool &res = e.first->second; - if (!e.second) { - return res; - } - - // Check whether this node is generative (may have produced the Node - // described by the current Signature element) - bool isGenerative = signature[d]->isOneOf(s->createdNodeTypes); - - if (isGenerative && d == 0) { - // End of recursion -- the last signature element is reached and the - // node was generative - res = true; - } else { - // Try repetition of this node - if (isGenerative && isActive(d - 1, s)) { - res = true; - } else { - // Check whether any of the parent nodes were active -- either for - // the previous element (if this one is generative) or for the - // current element (assuming this node was not generative) - for (const ParserState *parent : s->parents) { - if ((isGenerative && isActive(d - 1, parent)) || - isActive(d, parent)) { - res = true; - break; - } - } - } - } - - return res; -} - -std::vector ParserStateDeductor::deduce() -{ - std::vector res; - if (!signature.empty()) { - const size_t D = signature.size(); - for (auto s : states) { - if (signature[D - 1]->isOneOf(s->createdNodeTypes) && - isActive(D - 1, s)) { - res.push_back(s); - } - } - } - return res; -} - -/* Constant initializations */ - -namespace ParserStates { -const ParserState All; -const ParserState None; -} -} - diff --git a/src/core/parser/generic/ParserState.hpp b/src/core/parser/generic/ParserState.hpp deleted file mode 100644 index 6487fdd..0000000 --- a/src/core/parser/generic/ParserState.hpp +++ /dev/null @@ -1,284 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file ParserState.hpp - * - * Defines the ParserState class used within the ParserStack pushdown - * automaton and the ParserStateBuilder class for convenient construction of - * such classes. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_PARSER_STATE_HPP_ -#define _OUSIA_PARSER_STATE_HPP_ - -#include - -#include -#include - -namespace ousia { - -// Forward declarations -class ParserStateBuilder; -class ParserState; -class HandlerData; -class Handler; -using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); - -/** - * Set of pointers of parser states -- used for specifying a set of parent - * states. - */ -using ParserStateSet = std::unordered_set; - -/** - * Class used for the complete specification of a ParserState. Stores possible - * parent states, state handlers and arguments to be passed to that state. - */ -struct ParserState { - /** - * Vector containing all possible parent states. - */ - ParserStateSet parents; - - /** - * Descriptor of the arguments that should be passed to the handler. - */ - Arguments arguments; - - /** - * Set containing the types of the nodes that may be created in this - * ParserState. This information is needed for Parsers to reconstruct the - * current ParserState from a given ParserScope when a file is included. - */ - RttiSet createdNodeTypes; - - /** - * Pointer at a function which creates a new concrete Handler instance for - * the elements described by this state. May be nullptr in which case no - * handler instance is created. - */ - HandlerConstructor elementHandler; - - /** - * Default constructor, initializes the handlers with nullptr. - */ - ParserState(); - - /** - * Constructor taking values for all fields. Use the ParserStateBuilder - * class for a more convenient construction of ParserState instances. - * - * @param parents is a vector containing all possible parent states. - * @param arguments is a descriptor of arguments that should be passed to - * the handler. - * @param createdNodeTypes is a set containing the types of the nodes tha - * may be created in this ParserState. This information is needed for - * Parsers to reconstruct the current ParserState from a given ParserScope - * when a file is included. - * @param elementHandler is a pointer at a function which creates a new - * concrete Handler instance for the elements described by this state. May - * be nullptr in which case no handler instance is created. - */ - ParserState(ParserStateSet parents, Arguments arguments = Arguments{}, - RttiSet createdNodeTypes = RttiSet{}, - HandlerConstructor elementHandler = nullptr); - - /** - * Creates this ParserState from the given ParserStateBuilder instance. - */ - ParserState(const ParserStateBuilder &builder); -}; - -/** - * The ParserStateBuilder class is a class used for conveniently building new - * ParserState instances. - */ -class ParserStateBuilder { -private: - /** - * ParserState instance that is currently being built by the - * ParserStateBuilder. - */ - ParserState state; - -public: - /** - * Copies the ParserState instance and uses it as internal state. Overrides - * all changes made by the ParserStateBuilder. - * - * @param state is the state that should be copied. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder ©(const ParserState &state); - - /** - * Sets the possible parent states to the single given parent element. - * - * @param parent is a pointer at the parent ParserState instance that should - * be the possible parent state. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &parent(const ParserState *parent); - - /** - * Sets the ParserState instances in the given ParserStateSet as the list of - * supported parent states. - * - * @param parents is a set of pointers at ParserState instances that should - * be the possible parent states. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &parents(const ParserStateSet &parents); - - /** - * Sets the arguments that should be passed to the parser state handler to - * those given as argument. - * - * @param arguments is the Arguments instance describing the Arguments that - * should be parsed to a Handler for this ParserState. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &arguments(const Arguments &arguments); - - /** - * Sets the Node types this state may produce to the given Rtti descriptor. - * - * @param type is the Rtti descriptor of the Type that may be produced by - * this state. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &createdNodeType(const Rtti *type); - - /** - * Sets the Node types this state may produce to the given Rtti descriptors. - * - * @param types is a set of Rtti descriptors of the Types that may be - * produced by this state. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &createdNodeTypes(const RttiSet &types); - - /** - * Sets the constructor for the element handler. The constructor creates a - * new concrete Handler instance for the elements described by this state. - * May be nullptr in which case no handler instance is created (this is - * the default value). - * - * @param elementHandler is the HandlerConstructor that should create a - * new Handler instance. - * @return a reference at this ParserStateBuilder instance for method - * chaining. - */ - ParserStateBuilder &elementHandler(HandlerConstructor elementHandler); - - /** - * Returns a reference at the internal ParserState instance that was built - * using the ParserStateBuilder. - * - * @return the built ParserState. - */ - const ParserState &build() const; -}; - -/** - * Class used to deduce the ParserState a Parser is currently in based on the - * types of the Nodes that currently are on the ParserStack. Uses dynamic - * programming in order to solve this problem. - */ -class ParserStateDeductor { -public: - /** - * Type containing the dynamic programming table. - */ - using Table = std::vector>; - -private: - /** - * Dynamic programming table. - */ - Table tbl; - - /** - * Signature given in the constructor. - */ - const std::vector signature; - - /** - * List of states that should be checked for being active. - */ - const std::vector states; - - /** - * Used internally to check whether the given parser stack s may have been - * active for signature element d. - * - * @param d is the signature element. - * @param s is the parser state. - * @return true if the the given ParserState may have been active. - */ - bool isActive(size_t d, const ParserState *s); - -public: - /** - * Constructor of the ParserStateDeductor class. - * - * @param signature a Node type signature describing the types of the nodes - * which currently reside on e.g. the ParserScope stack. - * @param states is a list of states that should be checked. - */ - ParserStateDeductor(std::vector signature, - std::vector states); - - /** - * Selects all active states from the given states. Only considers those - * states that may have produced the last signature element. - * - * @return a list of states that may actually have been active. - */ - std::vector deduce(); -}; - -/** - * The ParserStates namespace contains all the global state constants used - * in the ParserStack class. - */ -namespace ParserStates { -/** - * State representing all states. - */ -extern const ParserState All; - -/** - * State representing the initial state. - */ -extern const ParserState None; -} -} - -#endif /* _OUSIA_PARSER_STATE_HPP_ */ - diff --git a/src/core/parser/generic/ParserStateCallbacks.cpp b/src/core/parser/generic/ParserStateCallbacks.cpp deleted file mode 100644 index 50bac57..0000000 --- a/src/core/parser/generic/ParserStateCallbacks.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -namespace ousia { - -/* Class ParserStateCallbacks */ - -} - diff --git a/src/core/parser/generic/ParserStateCallbacks.hpp b/src/core/parser/generic/ParserStateCallbacks.hpp deleted file mode 100644 index 7ec5264..0000000 --- a/src/core/parser/generic/ParserStateCallbacks.hpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file ParserStateCallbacks.hpp - * - * Contains an interface defining the callbacks that can be directed from a - * ParserStateHandler to the ParserStateStack, and from the ParserStateStack to - * the actual parser. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_PARSER_STATE_CALLBACKS_HPP_ -#define _OUSIA_PARSER_STATE_CALLBACKS_HPP_ - -#include - -#include - -namespace ousia { - -/** - * Interface defining a set of callback functions that act as a basis for the - * ParserStateStackCallbacks and the ParserCallbacks. - */ -class ParserStateCallbacks { -public: - /** - * Virtual descructor. - */ - virtual ~ParserStateCallbacks() {}; - - /** - * Sets the whitespace mode that specifies how string data should be - * processed. - * - * @param whitespaceMode specifies one of the three WhitespaceMode constants - * PRESERVE, TRIM or COLLAPSE. - */ - virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0; - - /** - * Sets the type as which the variant data should be parsed. - * - * @param type is one of the VariantType constants, specifying with which - * type the data that is passed to the ParserStateHandler in the "data" - * function should be handled. - */ - virtual void setDataType(VariantType type) = 0; - - /** - * Registers the given token as token that should be reported to the handler - * using the "token" function. - * - * @param token is the token string that should be reported. - */ - virtual void registerToken(const std::string &token) = 0; - - /** - * Unregisters the given token, it will no longer be reported to the handler - * using the "token" function. - * - * @param token is the token string that should be unregistered. - */ - virtual void unregisterToken(const std::string &token) = 0; -}; - -/** - * Interface defining the callback functions that can be passed from a - * ParserStateStack to the underlying parser. - */ -class ParserCallbacks : public ParserStateCallbacks { - /** - * Checks whether the given token is supported by the parser. The parser - * returns true, if the token is supported, false if this token cannot be - * registered. Note that parsers that do not support the registration of - * tokens at all should always return "true". - * - * @param token is the token that should be checked for support. - * @return true if the token is generally supported (or the parser does not - * support registering tokens at all), false if the token is not supported, - * because e.g. it is a reserved token or it interferes with other tokens. - */ - virtual bool supportsToken(const std::string &token) = 0; -} - -} - -#endif /* _OUSIA_PARSER_STATE_CALLBACKS_HPP_ */ - diff --git a/src/core/parser/generic/ParserStateHandler.cpp b/src/core/parser/generic/ParserStateHandler.cpp deleted file mode 100644 index 64e2bfa..0000000 --- a/src/core/parser/generic/ParserStateHandler.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "ParserStateHandler.hpp" - -namespace ousia { - -/* Class ParserStatedata */ - -ParserStatedata::ParserStatedata(ParserContext &ctx, - ParserStateCallbacks &callbacks, - std::string name, const ParserState &state, - const ParserState &parentState, - const SourceLocation location) - : ctx(ctx), - callbacks(callbacks), - name(std::move(name)), - state(state), - parentState(parentState), - location(location){}; - -/* Class ParserStateHandler */ - -ParserStateHandler::ParserStateHandler(const ParserStatedata &data) : data(data) -{ -} - -ParserContext &ParserStateHandler::context() { return data.ctx; } - -const std::string &ParserStateHandler::name() { return data.name; } - -ParserScope &ParserStateHandler::scope() { return data.ctx.getScope(); } - -Manager &ParserStateHandler::manager() { return data.ctx.getManager(); } - -Logger &ParserStateHandler::logger() { return data.ctx.getLogger(); } - -Rooted ParserStateHandler::project() { return data.ctx.getProject(); } - -const ParserState &ParserStateHandler::state() { return data.state; } - -SourceLocation ParserStateHandler::location() { return data.location; } - -void ParserStateHandler::setWhitespaceMode(WhitespaceMode whitespaceMode) -{ - data.callbacks.setWhitespaceMode(whitespaceMode); -} - -void ParserStateHandler::setDataType(VariantType type) -{ - data.callbacks.setDataType(type); -} - -bool ParserStateHandler::supportsToken(const std::string &token) -{ - return data.callbacks.supportsToken(token); -} - -void ParserStateHandler::registerToken(const std::string &token) -{ - data.callbacks.registerToken(token); -} - -void ParserStateHandler::unregisterToken(const std::string &token) -{ - data.callbacks.unregisterToken(token); -} - -void ParserStateHandler::data(const std::string &data, int field) -{ - if (Utils::hasNonWhitepaceChar(data)) { - logger().error("Expected command but found character data."); - } -} - -/* Class DefaultParserStateHandler */ - -void DefaultParserStateHandler::start(Variant::mapType &args) {} - -void DefaultParserStateHandler::end() {} - -ParserStateHandler *DefaultParserStateHandler::create(const data &data) -{ - return new DefaultHandler{data}; -} -} - diff --git a/src/core/parser/generic/ParserStateHandler.hpp b/src/core/parser/generic/ParserStateHandler.hpp deleted file mode 100644 index f3c836e..0000000 --- a/src/core/parser/generic/ParserStateHandler.hpp +++ /dev/null @@ -1,281 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_PARSER_STATE_HANDLER_HPP_ -#define _OUSIA_PARSER_STATE_HANDLER_HPP_ - -#include -#include - -#include - -namespace ousia { - -// Forward declarations -class ParserContext; -class ParserState; -class ParserStateCallbacks; - -/** - * Class collecting all the data that is being passed to a ParserStateHandler - * instance. - */ -class ParserStateHandlerData { -public: - /** - * Reference to the ParserContext instance that should be used to resolve - * references to nodes in the Graph. - */ - ParserContext &ctx; - - /** - * Reference at an instance of the ParserStateCallbacks class, used for - * modifying the behaviour of the parser (like registering tokens, setting - * the data type or changing the whitespace handling mode). - */ - ParserStateCallbacks &callbacks; - - /** - * Contains the name of the command that is being handled. - */ - const std::string name; - - /** - * Contains the current state of the state machine. - */ - const ParserState &state; - - /** - * Contains the state of the state machine when the parent node was handled. - */ - const ParserState &parentState; - - /** - * Current source code location. - */ - const SourceLocation location; - - /** - * Constructor of the HandlerData class. - * - * @param ctx is the parser context the handler should be executed in. - * @param callbacks is an instance of ParserStateCallbacks used to notify - * the parser about certain state changes. - * @param name is the name of the string. - * @param state is the state this handler was called for. - * @param parentState is the state of the parent command. - * @param location is the location at which the handler is created. - */ - ParserStateHandlerData(ParserContext &ctx, ParserStateCallbacks &callbacks, - std::string name, const ParserState &state, - const ParserState &parentState, - const SourceLocation &location); -}; - -/** - * The handler class provides a context for handling an XML tag. It has to be - * overridden and registered in the StateStack class to form handlers for - * concrete XML tags. - */ -class ParserStateHandler { -private: - /** - * Structure containing the internal handler data. - */ - const ParserStateHandlerData data; - -protected: - /** - * Constructor of the Handler class. - * - * @param data is a structure containing all data being passed to the - * handler. - */ - ParserStateHandler(const ParserStateHandlerData &data){}; - -public: - /** - * Virtual destructor. - */ - virtual ~Handler(){}; - - /** - * Returns a reference at the ParserContext. - * - * @return a reference at the ParserContext. - */ - ParserContext &context(); - - /** - * Returns the command name for which the handler was created. - * - * @return a const reference at the command name. - */ - const std::string &name(); - - /** - * Returns a reference at the ParserScope instance. - * - * @return a reference at the ParserScope instance. - */ - ParserScope &scope(); - - /** - * Returns a reference at the Manager instance which manages all nodes. - * - * @return a referance at the Manager instance. - */ - Manager &manager(); - - /** - * Returns a reference at the Logger instance used for logging error - * messages. - * - * @return a reference at the Logger instance. - */ - Logger &logger(); - - /** - * Returns a reference at the Project Node, representing the project into - * which the file is currently being parsed. - * - * @return a referance at the Project Node. - */ - Rooted project(); - - /** - * Reference at the ParserState descriptor for which this Handler was - * created. - * - * @return a const reference at the constructing ParserState descriptor. - */ - const ParserState &state(); - - /** - * Returns the current location in the source file. - * - * @return the current location in the source file. - */ - SourceLocation location(); - - /** - * Calls the corresponding function in the ParserStateCallbacks instance. - * Sets the whitespace mode that specifies how string data should be - * processed. - * - * @param whitespaceMode specifies one of the three WhitespaceMode constants - * PRESERVE, TRIM or COLLAPSE. - */ - void setWhitespaceMode(WhitespaceMode whitespaceMode); - - /** - * Calls the corresponding function in the ParserStateCallbacks instance. - * Sets the type as which the variant data should be parsed. - * - * @param type is one of the VariantType constants, specifying with which - * type the data that is passed to the ParserStateHandler in the "data" - * function should be handled. - */ - void setDataType(VariantType type); - - /** - * Calls the corresponding function in the ParserStateCallbacks instance. - * Checks whether the given token is supported by the parser. The parser - * returns true, if the token is supported, false if this token cannot be - * registered. Note that parsers that do not support the registration of - * tokens at all should always return "true". - * - * @param token is the token that should be checked for support. - * @return true if the token is generally supported (or the parser does not - * support registering tokens at all), false if the token is not supported, - * because e.g. it is a reserved token or it interferes with other tokens. - */ - bool supportsToken(const std::string &token); - - /** - * Calls the corresponding function in the ParserStateCallbacks instance. - * Registers the given token as token that should be reported to the handler - * using the "token" function. - * - * @param token is the token string that should be reported. - */ - void registerToken(const std::string &token); - - /** - * Calls the corresponding function in the ParserStateCallbacks instance. - * Unregisters the given token, it will no longer be reported to the handler - * using the "token" function. - * - * @param token is the token string that should be unregistered. - */ - void unregisterToken(const std::string &token); - - /** - * Called when the command that was specified in the constructor is - * instanciated. - * - * @param args is a map from strings to variants (argument name and value). - */ - virtual void start(Variant::mapType &args) = 0; - - /** - * Called whenever the command for which this handler is defined ends. - */ - virtual void end() = 0; - - /** - * Called whenever raw data (int the form of a string) is available for the - * Handler instance. In the default handler an exception is raised if the - * received data contains non-whitespace characters. - * - * @param data is a pointer at the character data that is available for the - * Handler instance. - * @param field is the field number (the interpretation of this value - * depends on the format that is being parsed). - */ - virtual void data(const std::string &data, int field); -}; - -/** - * HandlerConstructor is a function pointer type used to create concrete - * instances of the Handler class. - * - * @param handlerData is the data that should be passed to the new handler - * instance. - * @return a newly created handler instance. - */ -using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); - -/** - * The DefaultHandler class is used in case no element handler is specified in - * the ParserState descriptor. - */ -class DefaultParserStateHandler : public ParserStateHandler { -public: - using ParserStateHandler::ParserStateHandler; - - void start(Variant::mapType &args) override; - - void end() override; - - static Handler *create(const HandlerData &handlerData); -}; -} - -#endif /* _OUSIA_PARSER_STATE_HANDLER_HPP_ */ - diff --git a/src/core/parser/generic/ParserStateStack.cpp b/src/core/parser/generic/ParserStateStack.cpp deleted file mode 100644 index 8c32f17..0000000 --- a/src/core/parser/generic/ParserStateStack.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include -#include -#include - -#include "ParserScope.hpp" -#include "ParserStateStack.hpp" - -namespace ousia { - -/* Class ParserStateStack */ - -/** - * Returns an Exception that should be thrown when a currently invalid command - * is thrown. - */ -static LoggableException InvalidCommand(const std::string &name, - const std::set &expected) -{ - if (expected.empty()) { - return LoggableException{ - std::string{"No nested elements allowed, but got \""} + name + - std::string{"\""}}; - } else { - return LoggableException{ - std::string{"Expected "} + - (expected.size() == 1 ? std::string{"\""} - : std::string{"one of \""}) + - Utils::join(expected, "\", \"") + std::string{"\", but got \""} + - name + std::string{"\""}}; - } -} - -ParserStateStack::ParserStateStack( - ParserContext &ctx, - const std::multimap &states) - : ctx(ctx), states(states) -{ -} - -bool ParserStateStack::deduceState() -{ - // Assemble all states - std::vector states; - for (const auto &e : this->states) { - states.push_back(e.second); - } - - // Fetch the type signature of the scope and derive all possible states, - // abort if no unique parser state was found - std::vector possibleStates = - ParserStateDeductor(ctx.getScope().getStackTypeSignature(), states) - .deduce(); - if (possibleStates.size() != 1) { - ctx.getLogger().error( - "Error while including file: Cannot deduce parser state."); - return false; - } - - // Switch to this state by creating a dummy handler - const ParserState *state = possibleStates[0]; - Handler *handler = - DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}}); - stack.emplace(handler); - return true; -} - -std::set ParserStateStack::expectedCommands() -{ - const ParserState *currentState = &(this->currentState()); - std::set res; - for (const auto &v : states) { - if (v.second->parents.count(currentState)) { - res.insert(v.first); - } - } - return res; -} - -const ParserState &ParserStateStack::currentState() -{ - return stack.empty() ? ParserStates::None : stack.top()->state(); -} - -std::string ParserStateStack::currentCommandName() -{ - return stack.empty() ? std::string{} : stack.top()->name(); -} - -const ParserState *ParserStateStack::findTargetState(const std::string &name) -{ - const ParserState *currentState = &(this->currentState()); - auto range = states.equal_range(name); - for (auto it = range.first; it != range.second; it++) { - const ParserStateSet &parents = it->second->parents; - if (parents.count(currentState) || parents.count(&ParserStates::All)) { - return it->second; - } - } - - return nullptr; -} - -void ParserStateStack::start(const std::string &name, Variant::mapType &args, - const SourceLocation &location) -{ - ParserState const *targetState = findTargetState(name); -// TODO: Andreas, please improve this. -// if (!Utils::isIdentifier(name)) { -// throw LoggableException(std::string("Invalid identifier \"") + name + -// std::string("\"")); -// } - - if (targetState == nullptr) { - targetState = findTargetState("*"); - } - if (targetState == nullptr) { - throw InvalidCommand(name, expectedCommands()); - } - - // Fetch the associated constructor - HandlerConstructor ctor = targetState->elementHandler - ? targetState->elementHandler - : DefaultHandler::create; - - // Canonicalize the arguments, allow additional arguments - targetState->arguments.validateMap(args, ctx.getLogger(), true); - - // Instantiate the handler and call its start function - Handler *handler = ctor({ctx, name, *targetState, currentState(), location}); - handler->start(args); - stack.emplace(handler); -} - -void ParserStateStack::start(std::string name, const Variant::mapType &args, - const SourceLocation &location) -{ - Variant::mapType argsCopy(args); - start(name, argsCopy); -} - -void ParserStateStack::end() -{ - // Check whether the current command could be ended - if (stack.empty()) { - throw LoggableException{"No command to end."}; - } - - // Remove the current HandlerInstance from the stack - std::shared_ptr inst{stack.top()}; - stack.pop(); - - // Call the end function of the last Handler - inst->end(); -} - -void ParserStateStack::data(const std::string &data, int field) -{ - // Check whether there is any command the data can be sent to - if (stack.empty()) { - throw LoggableException{"No command to receive data."}; - } - - // Pass the data to the current Handler instance - stack.top()->data(data, field); -} -} - diff --git a/src/core/parser/generic/ParserStateStack.hpp b/src/core/parser/generic/ParserStateStack.hpp deleted file mode 100644 index b106475..0000000 --- a/src/core/parser/generic/ParserStateStack.hpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file ParserStateStack.hpp - * - * Helper classes for document or description parsers. Contains the - * ParserStateStack class, which is an pushdown automaton responsible for - * accepting commands in the correct order and calling specified handlers. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_PARSER_STATE_STACK_HPP_ -#define _OUSIA_PARSER_STATE_STACK_HPP_ - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "Parser.hpp" -#include "ParserContext.hpp" -#include "ParserState.hpp" - -namespace ousia { - -/** - * The ParserStateStack class is a pushdown automaton responsible for turning a - * command stream into a tree of Node instances. - */ -class ParserStateStack { -private: - /** - * Reference at the parser context. - */ - ParserContext &ctx; - - /** - * Map containing all registered command names and the corresponding - * state descriptors. - */ - const std::multimap &states; - - /** - * Internal stack used for managing the currently active Handler instances. - */ - std::stack> stack; - - /** - * Used internally to get all expected command names for the current state. - * This function is used to build error messages. - * - * @return a set of strings containing the names of the expected commands. - */ - std::set expectedCommands(); - - /** - * Returns the targetState for a command with the given name that can be - * reached from for the current state. - * - * @param name is the name of the requested command. - * @return nullptr if no target state was found, a pointer at the target - *state - * otherwise. - */ - const ParserState *findTargetState(const std::string &name); - -public: - /** - * Creates a new instance of the ParserStateStack class. - * - * @param ctx is the parser context the parser stack is working on. - * @param states is a map containing the command names and pointers at the - * corresponding ParserState instances. - */ - ParserStateStack( - ParserContext &ctx, - const std::multimap &states); - - /** - * Tries to reconstruct the parser state from the Scope instance of the - * ParserContext given in the constructor. This functionality is needed for - * including files,as the Parser of the included file needs to be brought to - + an equivalent state as the one in the including file. - * - * @param scope is the ParserScope instance from which the ParserState - * should be reconstructed. - * @param logger is the logger instance to which error messages should be - * written. - * @return true if the operation was sucessful, false otherwise. - */ - bool deduceState(); - - /** - * Returns the state the ParserStateStack instance currently is in. - * - * @return the state of the currently active Handler instance or STATE_NONE - * if no handler is on the stack. - */ - const ParserState ¤tState(); - - /** - * Returns the command name that is currently being handled. - * - * @return the name of the command currently being handled by the active - * Handler instance or an empty string if no handler is currently active. - */ - std::string currentCommandName(); - - /** - * Function that should be called whenever a new command is reached. - * - * @param name is the name of the command (including the namespace - * separator ':') and its corresponding location. Must be a string variant. - * @param args is a map variant containing the arguments that were passed to - * the command. - */ - void command(Variant name, Variant args); - - /** - * Function that should be called whenever a new field starts. Fields of the - * same command may not be separated by calls to - */ - void fieldStart(); - - /** - * Function that should be called whenever a field ends. - */ - void fieldEnd(); - - /** - * Function that shuold be called whenever character data is found in the - * input stream. - * - * @param data is a variant of any type containing the data that was parsed - * as data. - */ - void data(Variant data); - - /** - * Function that should be called whenever an annotation starts. - * - * @param name is the name of the annotation class. - * @param args is a map variant containing the arguments that were passed - * to the annotation. - */ - void annotationStart(Variant name, Variant args); - - /** - * Function that should be called whenever an annotation ends. - * - * @param name is the name of the annotation class that was ended. - * @param annotationName is the name of the annotation that was ended. - */ - void annotationEnd(Variant name, Variant annotationName); - - /** - * Function that should be called whenever a previously registered token - * is found in the input stream. - * - * @param token is string variant containing the token that was encountered. - */ - void token(Variant token); -}; -} - -#endif /* _OUSIA_PARSER_STATE_STACK_HPP_ */ - diff --git a/src/core/parser/stack/Callbacks.cpp b/src/core/parser/stack/Callbacks.cpp new file mode 100644 index 0000000..6ebc549 --- /dev/null +++ b/src/core/parser/stack/Callbacks.cpp @@ -0,0 +1,23 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "Callbacks.hpp" + +namespace ousia { +} + diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp new file mode 100644 index 0000000..bb56e44 --- /dev/null +++ b/src/core/parser/stack/Callbacks.hpp @@ -0,0 +1,99 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Callbacks.hpp + * + * Contains an interface defining the callbacks that can be directed from a + * StateHandler to the StateStack, and from the StateStack to + * the actual parser. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STATE_CALLBACKS_HPP_ +#define _OUSIA_PARSER_STATE_CALLBACKS_HPP_ + +#include + +#include + +namespace ousia { +namespace parser_stack { + +/** + * Interface defining a set of callback functions that act as a basis for the + * StateStackCallbacks and the ParserCallbacks. + */ +class Callbacks { +public: + /** + * Virtual descructor. + */ + virtual ~Callbacks() {}; + + /** + * Sets the whitespace mode that specifies how string data should be + * processed. + * + * @param whitespaceMode specifies one of the three WhitespaceMode constants + * PRESERVE, TRIM or COLLAPSE. + */ + virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0; + + /** + * Registers the given token as token that should be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be reported. + */ + virtual void registerToken(const std::string &token) = 0; + + /** + * Unregisters the given token, it will no longer be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be unregistered. + */ + virtual void unregisterToken(const std::string &token) = 0; +}; + +/** + * Interface defining the callback functions that can be passed from a + * StateStack to the underlying parser. + */ +class ParserCallbacks : public Callbacks { + /** + * Checks whether the given token is supported by the parser. The parser + * returns true, if the token is supported, false if this token cannot be + * registered. Note that parsers that do not support the registration of + * tokens at all should always return "true". + * + * @param token is the token that should be checked for support. + * @return true if the token is generally supported (or the parser does not + * support registering tokens at all), false if the token is not supported, + * because e.g. it is a reserved token or it interferes with other tokens. + */ + virtual bool supportsToken(const std::string &token) = 0; +}; + +} +} + +#endif /* _OUSIA_PARSER_STATE_CALLBACKS_HPP_ */ + diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp new file mode 100644 index 0000000..66af2a4 --- /dev/null +++ b/src/core/parser/stack/Handler.cpp @@ -0,0 +1,90 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "Callbacks.hpp" +#include "Handler.hpp" +#include "State.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class HandlerData */ + +HandlerData::HandlerData(ParserContext &ctx, Callbacks &callbacks, + std::string name, const State &state, + const SourceLocation &location) + : ctx(ctx), + callbacks(callbacks), + name(std::move(name)), + state(state), + location(location) +{ +} + +/* Class Handler */ + +Handler::Handler(const HandlerData &internalData) : internalData(internalData) +{ +} + +Handler::~Handler() {} + +ParserContext &Handler::context() { return internalData.ctx; } + +const std::string &Handler::name() { return internalData.name; } + +ParserScope &Handler::scope() { return internalData.ctx.getScope(); } + +Manager &Handler::manager() { return internalData.ctx.getManager(); } + +Logger &Handler::logger() { return internalData.ctx.getLogger(); } + +const State &Handler::state() { return internalData.state; } + +SourceLocation Handler::location() { return internalData.location; } + +void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode) +{ + internalData.callbacks.setWhitespaceMode(whitespaceMode); +} + +void Handler::registerToken(const std::string &token) +{ + internalData.callbacks.registerToken(token); +} + +void Handler::unregisterToken(const std::string &token) +{ + internalData.callbacks.unregisterToken(token); +} + +/* Class DefaultHandler */ + +/*void DefaultHandler::start(Variant::mapType &args) {} + +void DefaultHandler::end() {} + +Handler *DefaultHandler::create(const data &data) +{ + return new DefaultHandler{data}; +}*/ +} +} + diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp new file mode 100644 index 0000000..0701343 --- /dev/null +++ b/src/core/parser/stack/Handler.hpp @@ -0,0 +1,302 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef _OUSIA_PARSER_STATE_HANDLER_HPP_ +#define _OUSIA_PARSER_STATE_HANDLER_HPP_ + +#include +#include + +#include +#include + +namespace ousia { + +// Forward declarations +class ParserContext; +class Callbacks; +class Logger; +class Project; + +namespace parser_stack { + +// More forward declarations +class State; + +/** + * Class collecting all the data that is being passed to a Handler + * instance. + */ +class HandlerData { +public: + /** + * Reference to the ParserContext instance that should be used to resolve + * references to nodes in the Graph. + */ + ParserContext &ctx; + + /** + * Reference at an instance of the Callbacks class, used for + * modifying the behaviour of the parser (like registering tokens, setting + * the data type or changing the whitespace handling mode). + */ + Callbacks &callbacks; + + /** + * Contains the name of the command that is being handled. + */ + std::string name; + + /** + * Contains the current state of the state machine. + */ + const State &state; + + /** + * Current source code location. + */ + SourceLocation location; + + /** + * Constructor of the HandlerData class. + * + * @param ctx is the parser context the handler should be executed in. + * @param callbacks is an instance of Callbacks used to notify + * the parser about certain state changes. + * @param name is the name of the string. + * @param state is the state this handler was called for. + * @param location is the location at which the handler is created. + */ + HandlerData(ParserContext &ctx, Callbacks &callbacks, std::string name, + const State &state, const SourceLocation &location); +}; + +/** + * The Handler class provides a context for handling a generic stack element. + * It has to beoverridden and registered in the StateStack class to form + * handlers for concrete XML tags. + */ +class Handler { +private: + /** + * Structure containing the internal handler data. + */ + const HandlerData internalData; + +protected: + /** + * Constructor of the Handler class. + * + * @param data is a structure containing all data being passed to the + * handler. + */ + Handler(const HandlerData &internalData); + + /** + * Returns a reference at the ParserContext. + * + * @return a reference at the ParserContext. + */ + ParserContext &context(); + + /** + * Returns the command name for which the handler was created. + * + * @return a const reference at the command name. + */ + const std::string &name(); + + /** + * Returns a reference at the ParserScope instance. + * + * @return a reference at the ParserScope instance. + */ + ParserScope &scope(); + + /** + * Returns a reference at the Manager instance which manages all nodes. + * + * @return a referance at the Manager instance. + */ + Manager &manager(); + + /** + * Returns a reference at the Logger instance used for logging error + * messages. + * + * @return a reference at the Logger instance. + */ + Logger &logger(); + + /** + * Reference at the State descriptor for which this Handler was created. + * + * @return a const reference at the constructing State descriptor. + */ + const State &state(); + + /** + * Returns the current location in the source file. + * + * @return the current location in the source file. + */ + SourceLocation location(); + +public: + /** + * Virtual destructor. + */ + virtual ~Handler(); + + /** + * Calls the corresponding function in the Callbacks instance. Sets the + * whitespace mode that specifies how string data should be processed. The + * calls to this function are placed on a stack by the underlying Stack + * class. + * + * @param whitespaceMode specifies one of the three WhitespaceMode constants + * PRESERVE, TRIM or COLLAPSE. + */ + void setWhitespaceMode(WhitespaceMode whitespaceMode); + + /** + * Calls the corresponding function in the Callbacks instance. + * Registers the given token as token that should be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be reported. + */ + void registerToken(const std::string &token); + + /** + * Calls the corresponding function in the Callbacks instance. + * Unregisters the given token, it will no longer be reported to the handler + * using the "token" function. + * + * @param token is the token string that should be unregistered. + */ + void unregisterToken(const std::string &token); + + /** + * Called when the command that was specified in the constructor is + * instanciated. + * + * @param args is a map from strings to variants (argument name and value). + * @return true if the handler was successful in starting the element it + * represents, false otherwise. + */ + virtual bool start(Variant::mapType &args) = 0; + + /** + * Called before the command for which this handler is defined ends (is + * forever removed from the stack). + */ + virtual void end() = 0; + + /** + * Called when a new field starts, while the handler is active. This + * function should return true if the field is supported, false otherwise. + * No error should be logged if the field cannot be started, the caller will + * take care of that (since it is always valid to start a default field, + * even though the corresponding structure does not have a field, as long as + * no data is fed into the field). + * + * @param isDefaultField is set to true if the field that is being started + * is the default/tree field. The handler should set the value of this + * variable to true if the referenced field is indeed the default field. + * @param isImplicit is set to true if the field is implicitly being started + * by the stack (this field always implies isDefaultField being set to + * true). + * @param fieldIndex is the numerical index of the field. + */ + virtual bool fieldStart(bool &isDefaultField, bool isImplicit, + size_t fieldIndex) = 0; + + /** + * Called when a previously opened field ends, while the handler is active. + * Note that a "fieldStart" and "fieldEnd" are always called alternately. + */ + virtual void fieldEnd() = 0; + + /** + * Called whenever an annotation starts while this handler is active. The + * function should return true if starting the annotation was successful, + * false otherwise. + * + * @param className is a string variant containing the name of the + * annotation class and the location of the name in the source code. + * @param args is a map from strings to variants (argument name and value). + * @return true if the mentioned annotation could be started here, false + * if an error occurred. + */ + virtual bool annotationStart(Variant className, Variant::mapType &args) = 0; + + /** + * Called whenever an annotation ends while this handler is active. The + * function should return true if ending the annotation was successful, + * false otherwise. + * + * @param className is a string variant containing the name of the + * annotation class and the location of the class name in the source code. + * @param elementName is a string variant containing the name of the + * annotation class and the location of the element name in the source code. + * @return true if the mentioned annotation could be started here, false if + * an error occurred. + */ + virtual bool annotationEnd(Variant className, Variant elementName) = 0; + + /** + * Called whenever raw data (int the form of a string) is available for the + * Handler instance. + * + * @param data is a string variant containing the character data and its + * location. + */ + virtual void data(Variant data) = 0; +}; + +/** + * HandlerConstructor is a function pointer type used to create concrete + * instances of the Handler class. + * + * @param handlerData is the data that should be passed to the new handler + * instance. + * @return a newly created handler instance. + */ +using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); + +/** + * The DefaultHandler class is used in case no element handler is specified in + * the State descriptor. + */ +/*class EmptyHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData); +};*/ + +} +} + +#endif /* _OUSIA_PARSER_STATE_HANDLER_HPP_ */ + diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp new file mode 100644 index 0000000..1d83a68 --- /dev/null +++ b/src/core/parser/stack/Stack.cpp @@ -0,0 +1,188 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include +#include + +#include "Stack.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class StateStack */ + +/** + * Returns an Exception that should be thrown when a currently invalid command + * is thrown. + */ +static LoggableException InvalidCommand(const std::string &name, + const std::set &expected) +{ + if (expected.empty()) { + return LoggableException{ + std::string{"No nested elements allowed, but got \""} + name + + std::string{"\""}}; + } else { + return LoggableException{ + std::string{"Expected "} + + (expected.size() == 1 ? std::string{"\""} + : std::string{"one of \""}) + + Utils::join(expected, "\", \"") + std::string{"\", but got \""} + + name + std::string{"\""}}; + } +} + +StateStack::StateStack( + ParserContext &ctx, + const std::multimap &states) + : ctx(ctx), states(states) +{ +} + +bool StateStack::deduceState() +{ + // Assemble all states + std::vector states; + for (const auto &e : this->states) { + states.push_back(e.second); + } + + // Fetch the type signature of the scope and derive all possible states, + // abort if no unique parser state was found + std::vector possibleStates = + StateDeductor(ctx.getScope().getStackTypeSignature(), states) + .deduce(); + if (possibleStates.size() != 1) { + ctx.getLogger().error( + "Error while including file: Cannot deduce parser state."); + return false; + } + + // Switch to this state by creating a dummy handler + const State *state = possibleStates[0]; + Handler *handler = + DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}}); + stack.emplace(handler); + return true; +} + +std::set StateStack::expectedCommands() +{ + const State *currentState = &(this->currentState()); + std::set res; + for (const auto &v : states) { + if (v.second->parents.count(currentState)) { + res.insert(v.first); + } + } + return res; +} + +const State &StateStack::currentState() +{ + return stack.empty() ? States::None : stack.top()->state(); +} + +std::string StateStack::currentCommandName() +{ + return stack.empty() ? std::string{} : stack.top()->name(); +} + +const State *StateStack::findTargetState(const std::string &name) +{ + const State *currentState = &(this->currentState()); + auto range = states.equal_range(name); + for (auto it = range.first; it != range.second; it++) { + const StateSet &parents = it->second->parents; + if (parents.count(currentState) || parents.count(&States::All)) { + return it->second; + } + } + + return nullptr; +} + +void StateStack::start(const std::string &name, Variant::mapType &args, + const SourceLocation &location) +{ + State const *targetState = findTargetState(name); +// TODO: Andreas, please improve this. +// if (!Utils::isIdentifier(name)) { +// throw LoggableException(std::string("Invalid identifier \"") + name + +// std::string("\"")); +// } + + if (targetState == nullptr) { + targetState = findTargetState("*"); + } + if (targetState == nullptr) { + throw InvalidCommand(name, expectedCommands()); + } + + // Fetch the associated constructor + HandlerConstructor ctor = targetState->elementHandler + ? targetState->elementHandler + : DefaultHandler::create; + + // Canonicalize the arguments, allow additional arguments + targetState->arguments.validateMap(args, ctx.getLogger(), true); + + // Instantiate the handler and call its start function + Handler *handler = ctor({ctx, name, *targetState, currentState(), location}); + handler->start(args); + stack.emplace(handler); +} + +void StateStack::start(std::string name, const Variant::mapType &args, + const SourceLocation &location) +{ + Variant::mapType argsCopy(args); + start(name, argsCopy); +} + +void StateStack::end() +{ + // Check whether the current command could be ended + if (stack.empty()) { + throw LoggableException{"No command to end."}; + } + + // Remove the current HandlerInstance from the stack + std::shared_ptr inst{stack.top()}; + stack.pop(); + + // Call the end function of the last Handler + inst->end(); +} + +void StateStack::data(const std::string &data, int field) +{ + // Check whether there is any command the data can be sent to + if (stack.empty()) { + throw LoggableException{"No command to receive data."}; + } + + // Pass the data to the current Handler instance + stack.top()->data(data, field); +} +} +} + diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp new file mode 100644 index 0000000..b106475 --- /dev/null +++ b/src/core/parser/stack/Stack.hpp @@ -0,0 +1,191 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file ParserStateStack.hpp + * + * Helper classes for document or description parsers. Contains the + * ParserStateStack class, which is an pushdown automaton responsible for + * accepting commands in the correct order and calling specified handlers. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STATE_STACK_HPP_ +#define _OUSIA_PARSER_STATE_STACK_HPP_ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "Parser.hpp" +#include "ParserContext.hpp" +#include "ParserState.hpp" + +namespace ousia { + +/** + * The ParserStateStack class is a pushdown automaton responsible for turning a + * command stream into a tree of Node instances. + */ +class ParserStateStack { +private: + /** + * Reference at the parser context. + */ + ParserContext &ctx; + + /** + * Map containing all registered command names and the corresponding + * state descriptors. + */ + const std::multimap &states; + + /** + * Internal stack used for managing the currently active Handler instances. + */ + std::stack> stack; + + /** + * Used internally to get all expected command names for the current state. + * This function is used to build error messages. + * + * @return a set of strings containing the names of the expected commands. + */ + std::set expectedCommands(); + + /** + * Returns the targetState for a command with the given name that can be + * reached from for the current state. + * + * @param name is the name of the requested command. + * @return nullptr if no target state was found, a pointer at the target + *state + * otherwise. + */ + const ParserState *findTargetState(const std::string &name); + +public: + /** + * Creates a new instance of the ParserStateStack class. + * + * @param ctx is the parser context the parser stack is working on. + * @param states is a map containing the command names and pointers at the + * corresponding ParserState instances. + */ + ParserStateStack( + ParserContext &ctx, + const std::multimap &states); + + /** + * Tries to reconstruct the parser state from the Scope instance of the + * ParserContext given in the constructor. This functionality is needed for + * including files,as the Parser of the included file needs to be brought to + + an equivalent state as the one in the including file. + * + * @param scope is the ParserScope instance from which the ParserState + * should be reconstructed. + * @param logger is the logger instance to which error messages should be + * written. + * @return true if the operation was sucessful, false otherwise. + */ + bool deduceState(); + + /** + * Returns the state the ParserStateStack instance currently is in. + * + * @return the state of the currently active Handler instance or STATE_NONE + * if no handler is on the stack. + */ + const ParserState ¤tState(); + + /** + * Returns the command name that is currently being handled. + * + * @return the name of the command currently being handled by the active + * Handler instance or an empty string if no handler is currently active. + */ + std::string currentCommandName(); + + /** + * Function that should be called whenever a new command is reached. + * + * @param name is the name of the command (including the namespace + * separator ':') and its corresponding location. Must be a string variant. + * @param args is a map variant containing the arguments that were passed to + * the command. + */ + void command(Variant name, Variant args); + + /** + * Function that should be called whenever a new field starts. Fields of the + * same command may not be separated by calls to + */ + void fieldStart(); + + /** + * Function that should be called whenever a field ends. + */ + void fieldEnd(); + + /** + * Function that shuold be called whenever character data is found in the + * input stream. + * + * @param data is a variant of any type containing the data that was parsed + * as data. + */ + void data(Variant data); + + /** + * Function that should be called whenever an annotation starts. + * + * @param name is the name of the annotation class. + * @param args is a map variant containing the arguments that were passed + * to the annotation. + */ + void annotationStart(Variant name, Variant args); + + /** + * Function that should be called whenever an annotation ends. + * + * @param name is the name of the annotation class that was ended. + * @param annotationName is the name of the annotation that was ended. + */ + void annotationEnd(Variant name, Variant annotationName); + + /** + * Function that should be called whenever a previously registered token + * is found in the input stream. + * + * @param token is string variant containing the token that was encountered. + */ + void token(Variant token); +}; +} + +#endif /* _OUSIA_PARSER_STATE_STACK_HPP_ */ + diff --git a/src/core/parser/stack/State.cpp b/src/core/parser/stack/State.cpp new file mode 100644 index 0000000..d72f533 --- /dev/null +++ b/src/core/parser/stack/State.cpp @@ -0,0 +1,171 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "State.hpp" + +namespace ousia { +namespace parser_stack { + +/* Class State */ + +State::State() : elementHandler(nullptr) {} + +State::State(StateSet parents, Arguments arguments, + RttiSet createdNodeTypes, + HandlerConstructor elementHandler, + bool supportsAnnotations) + : parents(parents), + arguments(arguments), + createdNodeTypes(createdNodeTypes), + elementHandler(elementHandler), + supportsAnnotations(supportsAnnotations) +{ +} + +State::State(const StateBuilder &builder) + : State(builder.build()) +{ +} + +/* Class StateBuilder */ + +StateBuilder &StateBuilder::copy(const State &state) +{ + this->state = state; + return *this; +} + +StateBuilder &StateBuilder::parent(const State *parent) +{ + state.parents = StateSet{parent}; + return *this; +} + +StateBuilder &StateBuilder::parents(const StateSet &parents) +{ + state.parents = parents; + return *this; +} + +StateBuilder &StateBuilder::arguments(const Arguments &arguments) +{ + state.arguments = arguments; + return *this; +} + +StateBuilder &StateBuilder::createdNodeType(const Rtti *type) +{ + state.createdNodeTypes = RttiSet{type}; + return *this; +} + +StateBuilder &StateBuilder::createdNodeTypes(const RttiSet &types) +{ + state.createdNodeTypes = types; + return *this; +} + +StateBuilder &StateBuilder::elementHandler( + HandlerConstructor elementHandler) +{ + state.elementHandler = elementHandler; + return *this; +} + +StateBuilder &StateBuilder::supportsAnnotations(bool supportsAnnotations) +{ + state.supportsAnnotations = supportsAnnotations; + return *this; +} + +const State &StateBuilder::build() const { return state; } + +/* Class StateDeductor */ + +StateDeductor::StateDeductor( + std::vector signature, + std::vector states) + : tbl(signature.size()), + signature(std::move(signature)), + states(std::move(states)) +{ +} + +bool StateDeductor::isActive(size_t d, const State *s) +{ + // Lookup the "active" state of (d, s), if it was not already set + // (e.second is true) we'll have to calculate it + auto e = tbl[d].emplace(s, false); + bool &res = e.first->second; + if (!e.second) { + return res; + } + + // Check whether this node is generative (may have produced the Node + // described by the current Signature element) + bool isGenerative = signature[d]->isOneOf(s->createdNodeTypes); + + if (isGenerative && d == 0) { + // End of recursion -- the last signature element is reached and the + // node was generative + res = true; + } else { + // Try repetition of this node + if (isGenerative && isActive(d - 1, s)) { + res = true; + } else { + // Check whether any of the parent nodes were active -- either for + // the previous element (if this one is generative) or for the + // current element (assuming this node was not generative) + for (const State *parent : s->parents) { + if ((isGenerative && isActive(d - 1, parent)) || + isActive(d, parent)) { + res = true; + break; + } + } + } + } + + return res; +} + +std::vector StateDeductor::deduce() +{ + std::vector res; + if (!signature.empty()) { + const size_t D = signature.size(); + for (auto s : states) { + if (signature[D - 1]->isOneOf(s->createdNodeTypes) && + isActive(D - 1, s)) { + res.push_back(s); + } + } + } + return res; +} + +/* Constant initializations */ + +namespace States { +const State All; +const State None; +} +} +} + diff --git a/src/core/parser/stack/State.hpp b/src/core/parser/stack/State.hpp new file mode 100644 index 0000000..ea326ec --- /dev/null +++ b/src/core/parser/stack/State.hpp @@ -0,0 +1,307 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file State.hpp + * + * Defines the State class used within the ParserStack pushdown + * automaton and the StateBuilder class for convenient construction of + * such classes. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STATE_HPP_ +#define _OUSIA_PARSER_STATE_HPP_ + +#include + +#include +#include + +namespace ousia { +namespace parser_stack { + +// Forward declarations +class StateBuilder; +class State; +class HandlerData; +class Handler; +using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); + +/** + * Set of pointers of parser states -- used for specifying a set of parent + * states. + */ +using StateSet = std::unordered_set; + +/** + * Class used for the complete specification of a State. Stores possible + * parent states, state handlers and arguments to be passed to that state. + */ +struct State { + /** + * Vector containing all possible parent states. + */ + StateSet parents; + + /** + * Descriptor of the arguments that should be passed to the handler. + */ + Arguments arguments; + + /** + * Set containing the types of the nodes that may be created in this + * State. This information is needed for Parsers to reconstruct the + * current State from a given ParserScope when a file is included. + */ + RttiSet createdNodeTypes; + + /** + * Pointer at a function which creates a new concrete Handler instance for + * the elements described by this state. May be nullptr in which case no + * handler instance is created. + */ + HandlerConstructor elementHandler; + + /** + * Set to true if this handler does support annotations. This is almost + * always false (e.g. all description handlers), except for document + * element handlers. + */ + bool supportsAnnotations; + + /** + * Default constructor, initializes the handlers with nullptr. + */ + State(); + + /** + * Constructor taking values for all fields. Use the StateBuilder + * class for a more convenient construction of State instances. + * + * @param parents is a vector containing all possible parent states. + * @param arguments is a descriptor of arguments that should be passed to + * the handler. + * @param createdNodeTypes is a set containing the types of the nodes tha + * may be created in this State. This information is needed for + * Parsers to reconstruct the current State from a given ParserScope + * when a file is included. + * @param elementHandler is a pointer at a function which creates a new + * concrete Handler instance for the elements described by this state. May + * be nullptr in which case no handler instance is created. + * @param supportsAnnotations specifies whether annotations are supported + * here at all. + */ + State(StateSet parents, Arguments arguments = Arguments{}, + RttiSet createdNodeTypes = RttiSet{}, + HandlerConstructor elementHandler = nullptr, + bool supportsAnnotations = false); + + /** + * Creates this State from the given StateBuilder instance. + */ + State(const StateBuilder &builder); +}; + +/** + * The StateBuilder class is a class used for conveniently building new + * State instances. + */ +class StateBuilder { +private: + /** + * State instance that is currently being built by the + * StateBuilder. + */ + State state; + +public: + /** + * Copies the State instance and uses it as internal state. Overrides + * all changes made by the StateBuilder. + * + * @param state is the state that should be copied. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder ©(const State &state); + + /** + * Sets the possible parent states to the single given parent element. + * + * @param parent is a pointer at the parent State instance that should + * be the possible parent state. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &parent(const State *parent); + + /** + * Sets the State instances in the given StateSet as the list of + * supported parent states. + * + * @param parents is a set of pointers at State instances that should + * be the possible parent states. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &parents(const StateSet &parents); + + /** + * Sets the arguments that should be passed to the parser state handler to + * those given as argument. + * + * @param arguments is the Arguments instance describing the Arguments that + * should be parsed to a Handler for this State. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &arguments(const Arguments &arguments); + + /** + * Sets the Node types this state may produce to the given Rtti descriptor. + * + * @param type is the Rtti descriptor of the Type that may be produced by + * this state. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &createdNodeType(const Rtti *type); + + /** + * Sets the Node types this state may produce to the given Rtti descriptors. + * + * @param types is a set of Rtti descriptors of the Types that may be + * produced by this state. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &createdNodeTypes(const RttiSet &types); + + /** + * Sets the constructor for the element handler. The constructor creates a + * new concrete Handler instance for the elements described by this state. + * May be nullptr in which case no handler instance is created (this is + * the default value). + * + * @param elementHandler is the HandlerConstructor that should create a + * new Handler instance. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &elementHandler(HandlerConstructor elementHandler); + + /** + * Sets the state of the "supportsAnnotations" flags (default value is + * false) + * + * @param supportsAnnotations should be set to true, if annotations are + * supported for the handlers associated with this document. + * @return a reference at this StateBuilder instance for method + * chaining. + */ + StateBuilder &supportsAnnotations(bool supportsAnnotations); + + /** + * Returns a reference at the internal State instance that was built + * using the StateBuilder. + * + * @return the built State. + */ + const State &build() const; +}; + +/** + * Class used to deduce the State a Parser is currently in based on the + * types of the Nodes that currently are on the ParserStack. Uses dynamic + * programming in order to solve this problem. + */ +class StateDeductor { +public: + /** + * Type containing the dynamic programming table. + */ + using Table = std::vector>; + +private: + /** + * Dynamic programming table. + */ + Table tbl; + + /** + * Signature given in the constructor. + */ + const std::vector signature; + + /** + * List of states that should be checked for being active. + */ + const std::vector states; + + /** + * Used internally to check whether the given parser stack s may have been + * active for signature element d. + * + * @param d is the signature element. + * @param s is the parser state. + * @return true if the the given State may have been active. + */ + bool isActive(size_t d, const State *s); + +public: + /** + * Constructor of the StateDeductor class. + * + * @param signature a Node type signature describing the types of the nodes + * which currently reside on e.g. the ParserScope stack. + * @param states is a list of states that should be checked. + */ + StateDeductor(std::vector signature, + std::vector states); + + /** + * Selects all active states from the given states. Only considers those + * states that may have produced the last signature element. + * + * @return a list of states that may actually have been active. + */ + std::vector deduce(); +}; + +/** + * The States namespace contains all the global state constants used + * in the ParserStack class. + */ +namespace States { +/** + * State representing all states. + */ +extern const State All; + +/** + * State representing the initial state. + */ +extern const State None; +} +} +} + +#endif /* _OUSIA_PARSER_STATE_HPP_ */ + diff --git a/test/core/parser/ParserStateTest.cpp b/test/core/parser/ParserStateTest.cpp deleted file mode 100644 index 91d8dcd..0000000 --- a/test/core/parser/ParserStateTest.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include -#include - -namespace ousia { - -static const Rtti t1; -static const Rtti t2; -static const Rtti t3; -static const Rtti t4; -static const Rtti t5; - -static const ParserState s1 = ParserStateBuilder().createdNodeType(&t1); -static const ParserState s2a = - ParserStateBuilder().parent(&s1).createdNodeType(&t2); -static const ParserState s2b = - ParserStateBuilder().parent(&s1).createdNodeType(&t2); -static const ParserState s3 = - ParserStateBuilder().parents({&s2a, &s1}).createdNodeType(&t3); -static const ParserState s4 = - ParserStateBuilder().parent(&s3).createdNodeType(&t4); -static const ParserState s5 = - ParserStateBuilder().parent(&s2b).createdNodeType(&t5); - -TEST(ParserStateDeductor, deduce) -{ - using Result = std::vector; - using Signature = std::vector; - std::vector states{&s1, &s2a, &s2b, &s3, &s4, &s5}; - - // Should not crash on empty signature - ASSERT_EQ(Result{}, ParserStateDeductor(Signature{}, states).deduce()); - - // Try repeating signature elements - ASSERT_EQ(Result({&s1}), - ParserStateDeductor(Signature({&t1}), states).deduce()); - ASSERT_EQ(Result({&s1}), - ParserStateDeductor(Signature({&t1, &t1}), states).deduce()); - ASSERT_EQ(Result({&s1}), - ParserStateDeductor(Signature({&t1, &t1, &t1}), states).deduce()); - - // Go to another state - ASSERT_EQ(Result({&s2a, &s2b}), - ParserStateDeductor(Signature({&t1, &t1, &t2}), states).deduce()); - ASSERT_EQ(Result({&s4}), - ParserStateDeductor(Signature({&t1, &t3, &t4}), states).deduce()); - - // Skip one state - ASSERT_EQ(Result({&s4}), - ParserStateDeductor(Signature({&t2, &t4}), states).deduce()); - - // Impossible signature - ASSERT_EQ(Result({}), - ParserStateDeductor(Signature({&t4, &t5}), states).deduce()); - -} -} - diff --git a/test/core/parser/stack/StateTest.cpp b/test/core/parser/stack/StateTest.cpp new file mode 100644 index 0000000..e503d30 --- /dev/null +++ b/test/core/parser/stack/StateTest.cpp @@ -0,0 +1,79 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include + +namespace ousia { +namespace parser_stack { + +static const Rtti t1; +static const Rtti t2; +static const Rtti t3; +static const Rtti t4; +static const Rtti t5; + +static const State s1 = StateBuilder().createdNodeType(&t1); +static const State s2a = + StateBuilder().parent(&s1).createdNodeType(&t2); +static const State s2b = + StateBuilder().parent(&s1).createdNodeType(&t2); +static const State s3 = + StateBuilder().parents({&s2a, &s1}).createdNodeType(&t3); +static const State s4 = + StateBuilder().parent(&s3).createdNodeType(&t4); +static const State s5 = + StateBuilder().parent(&s2b).createdNodeType(&t5); + +TEST(StateDeductor, deduce) +{ + using Result = std::vector; + using Signature = std::vector; + std::vector states{&s1, &s2a, &s2b, &s3, &s4, &s5}; + + // Should not crash on empty signature + ASSERT_EQ(Result{}, StateDeductor(Signature{}, states).deduce()); + + // Try repeating signature elements + ASSERT_EQ(Result({&s1}), + StateDeductor(Signature({&t1}), states).deduce()); + ASSERT_EQ(Result({&s1}), + StateDeductor(Signature({&t1, &t1}), states).deduce()); + ASSERT_EQ(Result({&s1}), + StateDeductor(Signature({&t1, &t1, &t1}), states).deduce()); + + // Go to another state + ASSERT_EQ(Result({&s2a, &s2b}), + StateDeductor(Signature({&t1, &t1, &t2}), states).deduce()); + ASSERT_EQ(Result({&s4}), + StateDeductor(Signature({&t1, &t3, &t4}), states).deduce()); + + // Skip one state + ASSERT_EQ(Result({&s4}), + StateDeductor(Signature({&t2, &t4}), states).deduce()); + + // Impossible signature + ASSERT_EQ(Result({}), + StateDeductor(Signature({&t4, &t5}), states).deduce()); + +} +} +} + -- cgit v1.2.3 From b04364cdbc2144661a28f78e0aa4e5e337254c50 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:10:16 +0100 Subject: Added isNamespacedIdentifier method to Utils --- src/core/common/Utils.cpp | 15 +++++++++++++++ src/core/common/Utils.hpp | 21 ++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'src/core') diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index 3739c61..fc8ee00 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -40,6 +40,21 @@ bool Utils::isIdentifier(const std::string &name) return true; } +bool Utils::isNamespaceIdentifier(const std::string &name) +{ + bool first = true; + for (char c : name) { + if (first && !isIdentifierStartCharacter(c)) { + return false; + } + if (!first && (!isIdentifierCharacter(c) || c == ':')) { + return false; + } + first = (c == ':'); + } + return true; +} + bool Utils::hasNonWhitepaceChar(const std::string &s) { for (char c : s) { diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 8361973..b5cd178 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -74,10 +74,29 @@ public: } /** - * Returns true if the given character is in [A-Za-z][A-Za-z0-9_-]* + * Returns true if the given string is in + * \code{.txt} + * [A-Za-z][A-Za-z0-9_-]* + * \endCode + * + * @param name is the string that should be tested. + * @return true if the string matches the regular expression given above, + * false otherwise. */ static bool isIdentifier(const std::string &name); + /** + * Returns true if the given string is in + * \code{.txt} + * ([A-Za-z][A-Za-z0-9_-]*)(:[A-Za-z][A-Za-z0-9_-]*)* + * \endCode + * + * @param name is the string that should be tested. + * @return true if the string matches the regular expression given above, + * false otherwise. + */ + static bool isNamespacedIdentifier(const std::string &name); + /** * Returns true if the given character is a linebreak character. */ -- cgit v1.2.3 From 343900991ee5e2558d45187fe0129a144a5e013a Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:11:11 +0100 Subject: Implemented StaticFieldHandler and EmptyHandler default handler classes --- src/core/parser/stack/Handler.cpp | 184 +++++++++++++++++++++++++++++++++----- src/core/parser/stack/Handler.hpp | 169 +++++++++++++++++++++++++--------- 2 files changed, 288 insertions(+), 65 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index 66af2a4..73084cd 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -16,6 +16,8 @@ along with this program. If not, see . */ +#include +#include #include #include "Callbacks.hpp" @@ -40,51 +42,191 @@ HandlerData::HandlerData(ParserContext &ctx, Callbacks &callbacks, /* Class Handler */ -Handler::Handler(const HandlerData &internalData) : internalData(internalData) -{ -} +Handler::Handler(const HandlerData &handlerData) : handlerData(handlerData) {} Handler::~Handler() {} -ParserContext &Handler::context() { return internalData.ctx; } - -const std::string &Handler::name() { return internalData.name; } - -ParserScope &Handler::scope() { return internalData.ctx.getScope(); } +ParserContext &Handler::context() { return handlerData.ctx; } -Manager &Handler::manager() { return internalData.ctx.getManager(); } +ParserScope &Handler::scope() { return handlerData.ctx.getScope(); } -Logger &Handler::logger() { return internalData.ctx.getLogger(); } +Manager &Handler::manager() { return handlerData.ctx.getManager(); } -const State &Handler::state() { return internalData.state; } +Logger &Handler::logger() { return handlerData.ctx.getLogger(); } -SourceLocation Handler::location() { return internalData.location; } +SourceLocation Handler::location() { return handlerData.location; } void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode) { - internalData.callbacks.setWhitespaceMode(whitespaceMode); + handlerData.callbacks.setWhitespaceMode(whitespaceMode); } void Handler::registerToken(const std::string &token) { - internalData.callbacks.registerToken(token); + handlerData.callbacks.registerToken(token); } void Handler::unregisterToken(const std::string &token) { - internalData.callbacks.unregisterToken(token); + handlerData.callbacks.unregisterToken(token); +} + +const std::string &Handler::getName() const { return handlerData.name; } + +const State &Handler::getState() const { return handlerData.state; } + +/* Class EmptyHandler */ + +bool EmptyHandler::start(const Variant::mapType &args) +{ + // Just accept anything + return true; +} + +void EmptyHandler::end() +{ + // Do nothing if a command ends +} + +bool EmptyHandler::fieldStart(bool &isDefaultField, size_t fieldIndex) +{ + // Accept any field + return true; +} + +void EmptyHandler::fieldEnd() +{ + // Do not handle fields +} + +bool EmptyHandler::annotationStart(const Variant &className, + const Variant::mapType &args) +{ + // Accept any data + return true; +} + +bool EmptyHandler::annotationEnd(const Variant &className, + const Variant &elementName) +{ + // Accept any annotation + return true; +} + +bool EmptyHandler::data(const Variant &data) +{ + // Support any data + return true; +} + +/* Class StaticHandler */ + +bool StaticHandler::start(const Variant::mapType &args) +{ + // Do nothing in the default implementation, accept anything + return true; } -/* Class DefaultHandler */ +void StaticHandler::end() +{ + // Do nothing here +} -/*void DefaultHandler::start(Variant::mapType &args) {} +bool StaticHandler::fieldStart(bool &isDefault, size_t fieldIdx) +{ + // Return true if either the default field is requested or the field index + // is zero. This simulates that there is exactly one field (a default field) + if (fieldIdx == 0) { + isDefault = true; + return true; + } + return false; +} -void DefaultHandler::end() {} +void StaticHandler::fieldEnd() +{ + // Do nothing here +} + +bool StaticHandler::annotationStart(const Variant &className, + const Variant::mapType &args) +{ + // No annotations supported + return false; +} + +bool StaticHandler::annotationEnd(const Variant &className, + const Variant &elementName) +{ + // No annotations supported + return false; +} -Handler *DefaultHandler::create(const data &data) +bool StaticHandler::data(const Variant &data) { - return new DefaultHandler{data}; -}*/ + // No data supported + return false; +} + +/* Class StaticFieldHandler */ + +StaticFieldHandler::StaticFieldHandler(const HandlerData &handlerData, + const std::string &argName) + : StaticHandler(handlerData), argName(argName), handled(false) +{ +} + +bool StaticFieldHandler::start(const Variant::mapType &args) +{ + if (!argName.empty()) { + auto it = args.find(argName); + if (it != args.end()) { + handled = true; + doHandle(it->second, args); + return true; + } + } + + this->args = args; + return true; +} + +void StaticFieldHandler::end() +{ + if (!handled) { + if (!argName.empty()) { + logger().error(std::string("Required argument \"") + argName + + std::string("\" is missing."), + location()); + } else { + logger().error("Command requires data, but no data given", + location()); + } + } +} + +bool StaticFieldHandler::data(const Variant &data) +{ + // Call the doHandle function if this has not been done before + if (!handled) { + handled = true; + doHandle(data, args); + return true; + } + + // The doHandle function was already called, print an error message + logger().error( + std::string("Found data, but the corresponding argument \"") + argName + + std::string("\" was already specified"), + data); + + // Print the location at which the attribute was originally specified + auto it = args.find(argName); + if (it != args.end()) { + logger().note(std::string("Attribute was specified here:"), it->second); + } + return false; +} } } diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp index 0701343..8c3d8c4 100644 --- a/src/core/parser/stack/Handler.hpp +++ b/src/core/parser/stack/Handler.hpp @@ -16,10 +16,9 @@ along with this program. If not, see . */ -#ifndef _OUSIA_PARSER_STATE_HANDLER_HPP_ -#define _OUSIA_PARSER_STATE_HANDLER_HPP_ +#ifndef _OUSIA_PARSER_STACK_HANDLER_HPP_ +#define _OUSIA_PARSER_STACK_HANDLER_HPP_ -#include #include #include @@ -29,13 +28,12 @@ namespace ousia { // Forward declarations class ParserContext; -class Callbacks; class Logger; -class Project; namespace parser_stack { // More forward declarations +class Callbacks; class State; /** @@ -96,7 +94,7 @@ private: /** * Structure containing the internal handler data. */ - const HandlerData internalData; + const HandlerData handlerData; protected: /** @@ -105,7 +103,7 @@ protected: * @param data is a structure containing all data being passed to the * handler. */ - Handler(const HandlerData &internalData); + Handler(const HandlerData &handlerData); /** * Returns a reference at the ParserContext. @@ -114,13 +112,6 @@ protected: */ ParserContext &context(); - /** - * Returns the command name for which the handler was created. - * - * @return a const reference at the command name. - */ - const std::string &name(); - /** * Returns a reference at the ParserScope instance. * @@ -143,13 +134,6 @@ protected: */ Logger &logger(); - /** - * Reference at the State descriptor for which this Handler was created. - * - * @return a const reference at the constructing State descriptor. - */ - const State &state(); - /** * Returns the current location in the source file. * @@ -192,6 +176,20 @@ public: */ void unregisterToken(const std::string &token); + /** + * Returns the command name for which the handler was created. + * + * @return a const reference at the command name. + */ + const std::string &getName() const; + + /** + * Reference at the State descriptor for which this Handler was created. + * + * @return a const reference at the constructing State descriptor. + */ + const State &getState() const; + /** * Called when the command that was specified in the constructor is * instanciated. @@ -200,7 +198,7 @@ public: * @return true if the handler was successful in starting the element it * represents, false otherwise. */ - virtual bool start(Variant::mapType &args) = 0; + virtual bool start(const Variant::mapType &args) = 0; /** * Called before the command for which this handler is defined ends (is @@ -216,16 +214,12 @@ public: * even though the corresponding structure does not have a field, as long as * no data is fed into the field). * - * @param isDefaultField is set to true if the field that is being started - * is the default/tree field. The handler should set the value of this - * variable to true if the referenced field is indeed the default field. - * @param isImplicit is set to true if the field is implicitly being started - * by the stack (this field always implies isDefaultField being set to - * true). - * @param fieldIndex is the numerical index of the field. + * @param isDefault is set to true if the field that is being started is the + * default/tree field. The handler should set the value of this variable to + * true if the referenced field is indeed the default field. + * @param fieldIdx is the numerical index of the field. */ - virtual bool fieldStart(bool &isDefaultField, bool isImplicit, - size_t fieldIndex) = 0; + virtual bool fieldStart(bool &isDefault, size_t fieldIdx) = 0; /** * Called when a previously opened field ends, while the handler is active. @@ -244,10 +238,11 @@ public: * @return true if the mentioned annotation could be started here, false * if an error occurred. */ - virtual bool annotationStart(Variant className, Variant::mapType &args) = 0; + virtual bool annotationStart(const Variant &className, + const Variant::mapType &args) = 0; /** - * Called whenever an annotation ends while this handler is active. The + * Called whenever an annotation ends while this handler is active. The * function should return true if ending the annotation was successful, * false otherwise. * @@ -258,16 +253,19 @@ public: * @return true if the mentioned annotation could be started here, false if * an error occurred. */ - virtual bool annotationEnd(Variant className, Variant elementName) = 0; + virtual bool annotationEnd(const Variant &className, + const Variant &elementName) = 0; /** * Called whenever raw data (int the form of a string) is available for the - * Handler instance. + * Handler instance. Should return true if the data could be handled, false + * otherwise. * * @param data is a string variant containing the character data and its * location. + * @return true if the data could be handled, false otherwise. */ - virtual void data(Variant data) = 0; + virtual bool data(const Variant &data) = 0; }; /** @@ -281,22 +279,105 @@ public: using HandlerConstructor = Handler *(*)(const HandlerData &handlerData); /** - * The DefaultHandler class is used in case no element handler is specified in - * the State descriptor. + * The EmptyHandler class is used in case no element handler is specified in + * the State descriptor. It just accepts all data and does nothing. */ -/*class EmptyHandler : public Handler { -public: +class EmptyHandler : public Handler { +protected: using Handler::Handler; - void start(Variant::mapType &args) override; - +public: + bool start(const Variant::mapType &args) override; void end() override; + bool fieldStart(bool &isDefault, size_t fieldIdx) override; + void fieldEnd() override; + bool annotationStart(const Variant &className, + const Variant::mapType &args) override; + bool annotationEnd(const Variant &className, + const Variant &elementName) override; + bool data(const Variant &data) override; + /** + * Creates an instance of the EmptyHandler class. + */ static Handler *create(const HandlerData &handlerData); -};*/ +}; +/** + * The StaticHandler class is used to handle predifined commands which do + * neither support annotations, nor multiple fields. Child classes can decide + * whether a single data field should be used. + */ +class StaticHandler : public Handler { +protected: + using Handler::Handler; + +public: + bool start(const Variant::mapType &args) override; + void end() override; + bool fieldStart(bool &isDefault, size_t fieldIdx) override; + void fieldEnd() override; + bool annotationStart(const Variant &className, + const Variant::mapType &args) override; + bool annotationEnd(const Variant &className, + const Variant &elementName) override; + bool data(const Variant &data) override; +}; + +/** + * The StaticFieldHandler class is used to handle predifined commands which do + * neither support annotations, nor multiple fields. Additionally, it captures a + * data entry from a single default field. + */ +class StaticFieldHandler : public StaticHandler { +private: + /** + * Set to the name of the data argument that should be used instead of the + * data field, if no data field is given. + */ + std::string argName; + + /** + * Set to true, once the "doHandle" function has been called. + */ + bool handled; + + /** + * Map containing the arguments given in the start function. + */ + Variant::mapType args; + +protected: + /** + * Constructor of the StaticFieldHandler class. + * + * @param handlerData is a structure containing the internal data that + * should be stored inside the handler. + * @param name of the data argument that -- if present -- should be used + * instead of the data field. If empty, data is not captured from the + * arguments. If both, data in the data field and the argument, are given, + * this results in an error. + */ + StaticFieldHandler(const HandlerData &handlerData, + const std::string &argName); + + /** + * Function that should be overriden in order to handle the field data and + * the other arguments. This function is not called if no data was given. + * + * @param fieldData is the captured field data. + * @param args are the arguments that were given in the "start" function. + */ + virtual void doHandle(const Variant &fieldData, + const Variant::mapType &args) = 0; + +public: + bool start(const Variant::mapType &args) override; + void end() override; + bool data(const Variant &data) override; +}; } } -#endif /* _OUSIA_PARSER_STATE_HANDLER_HPP_ */ +#endif /* _OUSIA_PARSER_STACK_HANDLER_HPP_ */ -- cgit v1.2.3 From c5fde12cbac6907da4e267492206b2df3dad01f8 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:11:39 +0100 Subject: Renamed header guard --- src/core/parser/stack/Callbacks.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp index bb56e44..9c61000 100644 --- a/src/core/parser/stack/Callbacks.hpp +++ b/src/core/parser/stack/Callbacks.hpp @@ -26,8 +26,8 @@ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ -#ifndef _OUSIA_PARSER_STATE_CALLBACKS_HPP_ -#define _OUSIA_PARSER_STATE_CALLBACKS_HPP_ +#ifndef _OUSIA_PARSER_STACK_CALLBACKS_HPP_ +#define _OUSIA_PARSER_STACK_CALLBACKS_HPP_ #include @@ -95,5 +95,5 @@ class ParserCallbacks : public Callbacks { } } -#endif /* _OUSIA_PARSER_STATE_CALLBACKS_HPP_ */ +#endif /* _OUSIA_PARSER_STACK_CALLBACKS_HPP_ */ -- cgit v1.2.3 From 26766a588d988e635112878aba71c69c8f057c16 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:12:04 +0100 Subject: Renamed StateStack to Stack --- src/core/parser/stack/Stack.cpp | 151 ++++++++++++++++++++++--------- src/core/parser/stack/Stack.hpp | 195 ++++++++++++++++++++++++++++++---------- 2 files changed, 256 insertions(+), 90 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp index 1d83a68..b0df39b 100644 --- a/src/core/parser/stack/Stack.cpp +++ b/src/core/parser/stack/Stack.cpp @@ -21,20 +21,67 @@ #include #include #include +#include +#include "Handler.hpp" #include "Stack.hpp" +#include "State.hpp" namespace ousia { namespace parser_stack { -/* Class StateStack */ +/* Class HandlerInfo */ + +HandlerInfo::HandlerInfo() : HandlerInfo(nullptr) {} + +HandlerInfo::HandlerInfo(std::shared_ptr handler) + : handler(handler), + fieldIdx(0), + inField(false), + inDefaultField(false), + inImplicitDefaultField(false), + hasDefaultField(false) +{ +} + +HandlerInfo::~HandlerInfo() +{ + // Do nothing +} + +void HandlerInfo::fieldStart(bool isDefault, bool isImplicit, bool isValid) +{ + inField = true; + inDefaultField = isDefault || isImplicit; + inImplicitDefaultField = isImplicit; + inValidField = isValid; + hasDefaultField = hasDefaultField || inDefaultField; + fieldIdx++; +} + +void HandlerInfo::fieldEnd() +{ + inField = false; + inDefaultField = false; + inImplicitDefaultField = false; + inValidField = false; + if (fieldIdx > 0) { + fieldIdx--; + } +} + +/* Helper functions */ /** * Returns an Exception that should be thrown when a currently invalid command * is thrown. + * + * @param name is the name of the command for which no state transition is + * found. + * @param expected is a set containing the names of the expected commands. */ -static LoggableException InvalidCommand(const std::string &name, - const std::set &expected) +static LoggableException buildInvalidCommandException( + const std::string &name, const std::set &expected) { if (expected.empty()) { return LoggableException{ @@ -50,14 +97,22 @@ static LoggableException InvalidCommand(const std::string &name, } } -StateStack::StateStack( - ParserContext &ctx, - const std::multimap &states) +/* Class Stack */ + +Stack::Stack(ParserContext &ctx, + const std::multimap &states) : ctx(ctx), states(states) { + // If the scope instance is not empty we need to deduce the current parser + // state + if (!ctx.getScope().isEmpty()) { + deduceState(); + } } -bool StateStack::deduceState() +Stack::~Stack() {} + +bool Stack::deduceState() { // Assemble all states std::vector states; @@ -68,23 +123,28 @@ bool StateStack::deduceState() // Fetch the type signature of the scope and derive all possible states, // abort if no unique parser state was found std::vector possibleStates = - StateDeductor(ctx.getScope().getStackTypeSignature(), states) - .deduce(); - if (possibleStates.size() != 1) { - ctx.getLogger().error( - "Error while including file: Cannot deduce parser state."); - return false; + StateDeductor(ctx.getScope().getStackTypeSignature(), states).deduce(); + if (possibleStates.size() != 1U) { + throw LoggableException{ + "Error while including file: Cannot deduce parser state."}; } // Switch to this state by creating a dummy handler const State *state = possibleStates[0]; - Handler *handler = - DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}}); - stack.emplace(handler); + stack.emplace(std::shared_ptr{EmptyHandler::create({ctx, "", *state, *state, SourceLocation{}})}); +} + +bool Stack::handlersValid() +{ + for (auto it = stack.crbegin(); it != stack.crend(); it++) { + if (!it->valid) { + return false; + } + } return true; } -std::set StateStack::expectedCommands() +std::set Stack::expectedCommands() { const State *currentState = &(this->currentState()); std::set res; @@ -96,17 +156,17 @@ std::set StateStack::expectedCommands() return res; } -const State &StateStack::currentState() +const State &Stack::currentState() { return stack.empty() ? States::None : stack.top()->state(); } -std::string StateStack::currentCommandName() +std::string Stack::currentCommandName() { return stack.empty() ? std::string{} : stack.top()->name(); } -const State *StateStack::findTargetState(const std::string &name) +const State *Stack::findTargetState(const std::string &name) { const State *currentState = &(this->currentState()); auto range = states.equal_range(name); @@ -120,21 +180,26 @@ const State *StateStack::findTargetState(const std::string &name) return nullptr; } -void StateStack::start(const std::string &name, Variant::mapType &args, - const SourceLocation &location) +void Stack::command(const Variant &name, const Variant::mapType &args) { - State const *targetState = findTargetState(name); -// TODO: Andreas, please improve this. -// if (!Utils::isIdentifier(name)) { -// throw LoggableException(std::string("Invalid identifier \"") + name + -// std::string("\"")); -// } + // Make sure the given identifier is valid + if (!Utils::isNamespacedIdentifier(name.asString())) { + throw LoggableException(std::string("Invalid identifier \"") + + name.asString() + std::string("\""), name); + } + + // Try to find a target state for the given command + State const *targetState = findTargetState(name.asString()); + // No target state is found, try to find a wildcard handler for the current + // state if (targetState == nullptr) { targetState = findTargetState("*"); } + + // No handler has been found at all, if (targetState == nullptr) { - throw InvalidCommand(name, expectedCommands()); + throw buildInvalidCommandException(name.asString(), expectedCommands()); } // Fetch the associated constructor @@ -145,20 +210,24 @@ void StateStack::start(const std::string &name, Variant::mapType &args, // Canonicalize the arguments, allow additional arguments targetState->arguments.validateMap(args, ctx.getLogger(), true); - // Instantiate the handler and call its start function - Handler *handler = ctor({ctx, name, *targetState, currentState(), location}); - handler->start(args); - stack.emplace(handler); -} + // Instantiate the handler and push it onto the stack + Handler *handler = + ctor({ctx, name.asString(), *targetState, currentState(), name.getLocation()}); + stack.emplace_back(std::shared_ptr{handler}); -void StateStack::start(std::string name, const Variant::mapType &args, - const SourceLocation &location) -{ - Variant::mapType argsCopy(args); - start(name, argsCopy); + // Call the "start" method of the handler, store the result of the start + // method as the validity of the handler -- do not call the start method + // if the stack is currently invalid (as this may cause further, unwanted + // errors) + try { + stack.back().valid = handlersValid() && handler->start(args); + } catch (LoggableException ex) { + stack.back().valid = false; + logger.log(ex, ) + } } -void StateStack::end() +void Stack::end() { // Check whether the current command could be ended if (stack.empty()) { @@ -173,7 +242,7 @@ void StateStack::end() inst->end(); } -void StateStack::data(const std::string &data, int field) +void Stack::data(const std::string &data, int field) { // Check whether there is any command the data can be sent to if (stack.empty()) { diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp index b106475..294f7ec 100644 --- a/src/core/parser/stack/Stack.hpp +++ b/src/core/parser/stack/Stack.hpp @@ -17,41 +17,126 @@ */ /** - * @file ParserStateStack.hpp + * @file Stack.hpp * * Helper classes for document or description parsers. Contains the - * ParserStateStack class, which is an pushdown automaton responsible for + * Stack class, which is an pushdown automaton responsible for * accepting commands in the correct order and calling specified handlers. * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ -#ifndef _OUSIA_PARSER_STATE_STACK_HPP_ -#define _OUSIA_PARSER_STATE_STACK_HPP_ +#ifndef _OUSIA_PARSER_STACK_STACK_HPP_ +#define _OUSIA_PARSER_STACK_STACK_HPP_ #include #include #include #include -#include #include #include -#include -#include - -#include "Parser.hpp" -#include "ParserContext.hpp" -#include "ParserState.hpp" +#include namespace ousia { +// Forward declarations +class ParserContext; + +namespace parser_stack { + +// Forward declarations +class Handler; +class State; + /** - * The ParserStateStack class is a pushdown automaton responsible for turning a - * command stream into a tree of Node instances. + * The HandlerInfo class is used internally by the stack to associate additional + * (mutable) data with a handler instance. */ -class ParserStateStack { +class HandlerInfo { +public: + /** + * Pointer pointing at the actual handler instance. + */ + std::shared_ptr handler; + + /** + * Next field index to be passed to the "fieldStart" function of the Handler + * class. + */ + size_t fieldIdx; + + /** + * Set to true if the handler is valid (which is the case if the "start" + * method has returned true). If the handler is invalid, no more calls are + * directed at it until it can be removed from the stack. + */ + bool valid : 1; + + /** + * Set to true if the handler currently is in a filed. + */ + bool inField : 1; + + /** + * Set to true if the handler currently is in the default field. + */ + bool inDefaultField : 1; + + /** + * Set to true if the handler currently is in an implicitly started default + * field. + */ + bool inImplicitDefaultField : 1; + + /** + * Set to false if this field is only opened pro-forma and does not accept + * any data. Otherwise set to true. + */ + bool inValidField : 1; + + /** + * Set to true, if the default field was already started. + */ + bool hasDefaultField : 1; + + /** + * Default constructor of the HandlerInfo class. + */ + HandlerInfo(); + + /** + * Constructor of the HandlerInfo class, taking a shared_ptr to the handler + * to which additional information should be attached. + */ + HandlerInfo(std::shared_ptr handler); + + /** + * Destructor of the HandlerInfo class (to allow Handler to be forward + * declared). + */ + ~HandlerInfo(); + + /** + * Updates the "field" flags according to a "fieldStart" event. + */ + void fieldStart(bool isDefault, bool isImplicit, bool isValid); + + /** + * Updates the "fields" flags according to a "fieldEnd" event. + */ + void fieldEnd(); +}; + + +/** + * The Stack class is a pushdown automaton responsible for turning a command + * stream into a tree of Node instances. It does so by following a state + * transition graph and creating a set of Handler instances, which are placed + * on the stack. + */ +class Stack { private: /** * Reference at the parser context. @@ -62,12 +147,12 @@ private: * Map containing all registered command names and the corresponding * state descriptors. */ - const std::multimap &states; + const std::multimap &states; /** * Internal stack used for managing the currently active Handler instances. */ - std::stack> stack; + std::vector stack; /** * Used internally to get all expected command names for the current state. @@ -83,44 +168,50 @@ private: * * @param name is the name of the requested command. * @return nullptr if no target state was found, a pointer at the target - *state - * otherwise. + * state otherwise. */ - const ParserState *findTargetState(const std::string &name); + const State *findTargetState(const std::string &name); + + /** + * Tries to reconstruct the parser state from the Scope instance of the + * ParserContext given in the constructor. This functionality is needed for + * including files,as the Parser of the included file needs to be brought to + * an equivalent state as the one in the including file. + */ + void deduceState(); + + /** + * Returns true if all handlers on the stack are currently valid, or false + * if at least one handler is invalid. + * + * @return true if all handlers on the stack are valid. + */ + bool handlersValid(); public: /** - * Creates a new instance of the ParserStateStack class. + * Creates a new instance of the Stack class. * * @param ctx is the parser context the parser stack is working on. * @param states is a map containing the command names and pointers at the - * corresponding ParserState instances. + * corresponding State instances. */ - ParserStateStack( + Stack( ParserContext &ctx, - const std::multimap &states); + const std::multimap &states); /** - * Tries to reconstruct the parser state from the Scope instance of the - * ParserContext given in the constructor. This functionality is needed for - * including files,as the Parser of the included file needs to be brought to - + an equivalent state as the one in the including file. - * - * @param scope is the ParserScope instance from which the ParserState - * should be reconstructed. - * @param logger is the logger instance to which error messages should be - * written. - * @return true if the operation was sucessful, false otherwise. + * Destructor of the Stack class. */ - bool deduceState(); + ~Stack(); /** - * Returns the state the ParserStateStack instance currently is in. + * Returns the state the Stack instance currently is in. * * @return the state of the currently active Handler instance or STATE_NONE * if no handler is on the stack. */ - const ParserState ¤tState(); + const State ¤tState(); /** * Returns the command name that is currently being handled. @@ -135,30 +226,35 @@ public: * * @param name is the name of the command (including the namespace * separator ':') and its corresponding location. Must be a string variant. - * @param args is a map variant containing the arguments that were passed to - * the command. + * @param args is a map containing the arguments that were passed to the + * command. */ - void command(Variant name, Variant args); + void command(const Variant &name, const Variant::mapType &args); /** * Function that should be called whenever a new field starts. Fields of the - * same command may not be separated by calls to + * same command may not be separated by calls to data or annotations. Doing + * so will result in a LoggableException. + * + * @param isDefault should be set to true if the started field explicitly + * is the default field. */ - void fieldStart(); + void fieldStart(bool isDefault); /** - * Function that should be called whenever a field ends. + * Function that should be called whenever a field ends. Calling this + * function if there is no field to end will result in a LoggableException. */ void fieldEnd(); /** * Function that shuold be called whenever character data is found in the - * input stream. + * input stream. May only be called if the currently is a command on the + * stack. * - * @param data is a variant of any type containing the data that was parsed - * as data. + * @param data is a string variant containing the data that has been found. */ - void data(Variant data); + void data(const Variant &data); /** * Function that should be called whenever an annotation starts. @@ -167,7 +263,7 @@ public: * @param args is a map variant containing the arguments that were passed * to the annotation. */ - void annotationStart(Variant name, Variant args); + void annotationStart(const Variant &className, const Variant &args); /** * Function that should be called whenever an annotation ends. @@ -175,7 +271,7 @@ public: * @param name is the name of the annotation class that was ended. * @param annotationName is the name of the annotation that was ended. */ - void annotationEnd(Variant name, Variant annotationName); + void annotationEnd(const Variant &className, const Variant &elementName); /** * Function that should be called whenever a previously registered token @@ -186,6 +282,7 @@ public: void token(Variant token); }; } +} -#endif /* _OUSIA_PARSER_STATE_STACK_HPP_ */ +#endif /* _OUSIA_STACK_HPP_ */ -- cgit v1.2.3 From 9acab70815a0f62bdaf2c7f01e588066b818d330 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 22:45:19 +0100 Subject: Fixed isIdentifier and isNamespacedIdentifier, added and used isIdentifierOrEmpty for use in Node --- src/core/common/Utils.cpp | 13 +++++++++---- src/core/common/Utils.hpp | 5 +++++ src/core/model/Node.cpp | 2 +- test/core/common/UtilsTest.cpp | 39 +++++++++++++++++++++++++++++++++------ 4 files changed, 48 insertions(+), 11 deletions(-) (limited to 'src/core') diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index fc8ee00..f8b53c6 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -37,22 +37,27 @@ bool Utils::isIdentifier(const std::string &name) } first = false; } - return true; + return !first; } -bool Utils::isNamespaceIdentifier(const std::string &name) +bool Utils::isIdentifierOrEmpty(const std::string &name) +{ + return name.empty() || isIdentifier(name); +} + +bool Utils::isNamespacedIdentifier(const std::string &name) { bool first = true; for (char c : name) { if (first && !isIdentifierStartCharacter(c)) { return false; } - if (!first && (!isIdentifierCharacter(c) || c == ':')) { + if (!first && (!isIdentifierCharacter(c) && c != ':')) { return false; } first = (c == ':'); } - return true; + return !first; } bool Utils::hasNonWhitepaceChar(const std::string &s) diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index b5cd178..b5a54fc 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -85,6 +85,11 @@ public: */ static bool isIdentifier(const std::string &name); + /** + * Returns true if the given string is an identifier or an empty string. + */ + static bool isIdentifierOrEmpty(const std::string &name); + /** * Returns true if the given string is in * \code{.txt} diff --git a/src/core/model/Node.cpp b/src/core/model/Node.cpp index 39ee2e4..ce15cad 100644 --- a/src/core/model/Node.cpp +++ b/src/core/model/Node.cpp @@ -448,7 +448,7 @@ bool Node::doValidate(Logger &logger) const { return true; } bool Node::validateName(Logger &logger) const { - if (!Utils::isIdentifier(name)) { + if (!Utils::isIdentifierOrEmpty(name)) { logger.error(type()->name + std::string(" name \"") + name + std::string("\" is not a valid identifier"), this); diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index a4bf4b2..7801296 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -24,14 +24,40 @@ namespace ousia { TEST(Utils, isIdentifier) { - ASSERT_TRUE(Utils::isIdentifier("test")); - ASSERT_TRUE(Utils::isIdentifier("t0-_est")); - ASSERT_FALSE(Utils::isIdentifier("_t0-_EST")); - ASSERT_FALSE(Utils::isIdentifier("-t0-_EST")); - ASSERT_FALSE(Utils::isIdentifier("0t-_EST")); - ASSERT_FALSE(Utils::isIdentifier("invalid key")); + EXPECT_TRUE(Utils::isIdentifier("test")); + EXPECT_TRUE(Utils::isIdentifier("t0-_est")); + EXPECT_FALSE(Utils::isIdentifier("_t0-_EST")); + EXPECT_FALSE(Utils::isIdentifier("-t0-_EST")); + EXPECT_FALSE(Utils::isIdentifier("0t-_EST")); + EXPECT_FALSE(Utils::isIdentifier("_A")); + EXPECT_FALSE(Utils::isIdentifier("invalid key")); + EXPECT_FALSE(Utils::isIdentifier("")); } + +TEST(Utils, isNamespacedIdentifier) +{ + EXPECT_TRUE(Utils::isNamespacedIdentifier("test")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("_t0-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("-t0-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("0t-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("invalid key")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("_A")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("")); + EXPECT_FALSE(Utils::isNamespacedIdentifier(":")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("test:a")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est:b")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("test:test")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est:t0-_est")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("test:_A")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("test::a")); + EXPECT_FALSE(Utils::isNamespacedIdentifier(":test")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("t0-_est:_t0-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("t0-_est: b")); +} + + TEST(Utils, split) { ASSERT_EQ(std::vector({"ab"}), Utils::split("ab", '.')); @@ -82,5 +108,6 @@ TEST(Utils, endsWith) ASSERT_TRUE(Utils::endsWith("foobar", "bar")); ASSERT_TRUE(Utils::endsWith("foo", "")); } + } -- cgit v1.2.3 From 22c9d5b5504c81902ccbfae386cf69351d7d0209 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:14:12 +0100 Subject: Commented out Callbacks in Handler, this is not implemented yet --- src/core/parser/stack/Handler.cpp | 40 ++++++++++++++++++++++++++++---------- src/core/parser/stack/Handler.hpp | 41 ++++++++++++++++++++++++++++++++++----- 2 files changed, 66 insertions(+), 15 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index 73084cd..54dfe3e 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -29,12 +29,12 @@ namespace parser_stack { /* Class HandlerData */ -HandlerData::HandlerData(ParserContext &ctx, Callbacks &callbacks, - std::string name, const State &state, +HandlerData::HandlerData(ParserContext &ctx, /*Callbacks &callbacks,*/ + const std::string &name, const State &state, const SourceLocation &location) : ctx(ctx), - callbacks(callbacks), - name(std::move(name)), + /*callbacks(callbacks),*/ + name(name), state(state), location(location) { @@ -42,7 +42,10 @@ HandlerData::HandlerData(ParserContext &ctx, Callbacks &callbacks, /* Class Handler */ -Handler::Handler(const HandlerData &handlerData) : handlerData(handlerData) {} +Handler::Handler(const HandlerData &handlerData) + : handlerData(handlerData), internalLogger(nullptr) +{ +} Handler::~Handler() {} @@ -52,29 +55,41 @@ ParserScope &Handler::scope() { return handlerData.ctx.getScope(); } Manager &Handler::manager() { return handlerData.ctx.getManager(); } -Logger &Handler::logger() { return handlerData.ctx.getLogger(); } +Logger &Handler::logger() +{ + if (internalLogger != nullptr) { + return *internalLogger; + } + return handlerData.ctx.getLogger(); +} -SourceLocation Handler::location() { return handlerData.location; } +const SourceLocation &Handler::location() const { return handlerData.location; } void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode) { - handlerData.callbacks.setWhitespaceMode(whitespaceMode); + /*handlerData.callbacks.setWhitespaceMode(whitespaceMode);*/ } void Handler::registerToken(const std::string &token) { - handlerData.callbacks.registerToken(token); + /*handlerData.callbacks.registerToken(token);*/ } void Handler::unregisterToken(const std::string &token) { - handlerData.callbacks.unregisterToken(token); + /*handlerData.callbacks.unregisterToken(token);*/ } const std::string &Handler::getName() const { return handlerData.name; } const State &Handler::getState() const { return handlerData.state; } +void Handler::setLogger(Logger &logger) { internalLogger = &logger; } + +void Handler::resetLogger() { internalLogger = nullptr; } + +const SourceLocation &Handler::getLocation() const { return location(); } + /* Class EmptyHandler */ bool EmptyHandler::start(const Variant::mapType &args) @@ -119,6 +134,11 @@ bool EmptyHandler::data(const Variant &data) return true; } +Handler *EmptyHandler::create(const HandlerData &handlerData) +{ + return new EmptyHandler(handlerData); +} + /* Class StaticHandler */ bool StaticHandler::start(const Variant::mapType &args) diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp index 8c3d8c4..eeaf555 100644 --- a/src/core/parser/stack/Handler.hpp +++ b/src/core/parser/stack/Handler.hpp @@ -23,10 +23,12 @@ #include #include +#include namespace ousia { // Forward declarations +class ParserScope; class ParserContext; class Logger; @@ -53,7 +55,7 @@ public: * modifying the behaviour of the parser (like registering tokens, setting * the data type or changing the whitespace handling mode). */ - Callbacks &callbacks; + // Callbacks &callbacks; /** * Contains the name of the command that is being handled. @@ -80,7 +82,8 @@ public: * @param state is the state this handler was called for. * @param location is the location at which the handler is created. */ - HandlerData(ParserContext &ctx, Callbacks &callbacks, std::string name, + HandlerData(ParserContext &ctx, + /*Callbacks &callbacks,*/ const std::string &name, const State &state, const SourceLocation &location); }; @@ -96,6 +99,12 @@ private: */ const HandlerData handlerData; + /** + * Reference at the current logger. If not nullptr, this will override the + * logger from the ParserContext specified in the handlerData. + */ + Logger *internalLogger; + protected: /** * Constructor of the Handler class. @@ -135,11 +144,12 @@ protected: Logger &logger(); /** - * Returns the current location in the source file. + * Returns the location of the element in the source file, for which this + * Handler was created. * - * @return the current location in the source file. + * @return the location of the Handler in the source file. */ - SourceLocation location(); + const SourceLocation &location() const; public: /** @@ -190,6 +200,27 @@ public: */ const State &getState() const; + /** + * Sets the internal logger to the given logger instance. + * + * @param logger is the Logger instance to which the logger should be set. + */ + void setLogger(Logger &logger); + + /** + * Resets the logger instance to the logger instance provided in the + * ParserContext. + */ + void resetLogger(); + + /** + * Returns the location of the element in the source file, for which this + * Handler was created. + * + * @return the location of the Handler in the source file. + */ + const SourceLocation &getLocation() const; + /** * Called when the command that was specified in the constructor is * instanciated. -- cgit v1.2.3 From 0a8a012850bb7c730ccac4c91c7aca5c88cbedc9 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:14:58 +0100 Subject: Implemented most of the desired behaviour of the Stack class, added unit tests --- src/core/parser/stack/Stack.cpp | 437 ++++++++++++++++++++---- src/core/parser/stack/Stack.hpp | 85 ++++- test/core/parser/stack/StackTest.cpp | 639 +++++++++++++++++++++++++++++++++++ 3 files changed, 1075 insertions(+), 86 deletions(-) create mode 100644 test/core/parser/stack/StackTest.cpp (limited to 'src/core') diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp index b0df39b..d84a19c 100644 --- a/src/core/parser/stack/Stack.cpp +++ b/src/core/parser/stack/Stack.cpp @@ -18,6 +18,7 @@ #include +#include #include #include #include @@ -37,10 +38,28 @@ HandlerInfo::HandlerInfo() : HandlerInfo(nullptr) {} HandlerInfo::HandlerInfo(std::shared_ptr handler) : handler(handler), fieldIdx(0), + valid(true), + implicit(false), inField(false), inDefaultField(false), inImplicitDefaultField(false), - hasDefaultField(false) + inValidField(false), + hadDefaultField(false) +{ +} + +HandlerInfo::HandlerInfo(bool valid, bool implicit, bool inField, + bool inDefaultField, bool inImplicitDefaultField, + bool inValidField) + : handler(nullptr), + fieldIdx(0), + valid(valid), + implicit(implicit), + inField(inField), + inDefaultField(inDefaultField), + inImplicitDefaultField(inImplicitDefaultField), + inValidField(inValidField), + hadDefaultField(false) { } @@ -55,7 +74,7 @@ void HandlerInfo::fieldStart(bool isDefault, bool isImplicit, bool isValid) inDefaultField = isDefault || isImplicit; inImplicitDefaultField = isImplicit; inValidField = isValid; - hasDefaultField = hasDefaultField || inDefaultField; + hadDefaultField = hadDefaultField || inDefaultField; fieldIdx++; } @@ -65,11 +84,13 @@ void HandlerInfo::fieldEnd() inDefaultField = false; inImplicitDefaultField = false; inValidField = false; - if (fieldIdx > 0) { - fieldIdx--; - } } +/** + * Stub instance of HandlerInfo containing no handler information. + */ +static HandlerInfo EmptyHandlerInfo{true, true, true, true, false, true}; + /* Helper functions */ /** @@ -110,9 +131,31 @@ Stack::Stack(ParserContext &ctx, } } -Stack::~Stack() {} +Stack::~Stack() +{ + while (!stack.empty()) { + // Fetch the topmost stack element + HandlerInfo &info = currentInfo(); + + // It is an error if we're still in a field of an element while the + // Stack instance is destroyed. Log that + if (handlersValid()) { + if (info.inField && !info.implicit && + !info.inImplicitDefaultField) { + logger().error( + std::string("Reached end of stream, but command \"") + + info.handler->getName() + + "\" has not ended yet. Command was started here:", + info.handler->getLocation()); + } + } -bool Stack::deduceState() + // Remove the command from the stack + endCurrentHandler(); + } +} + +void Stack::deduceState() { // Assemble all states std::vector states; @@ -125,23 +168,24 @@ bool Stack::deduceState() std::vector possibleStates = StateDeductor(ctx.getScope().getStackTypeSignature(), states).deduce(); if (possibleStates.size() != 1U) { - throw LoggableException{ - "Error while including file: Cannot deduce parser state."}; + throw LoggableException( + "Error while including file: Cannot deduce parser state."); } - // Switch to this state by creating a dummy handler - const State *state = possibleStates[0]; - stack.emplace(std::shared_ptr{EmptyHandler::create({ctx, "", *state, *state, SourceLocation{}})}); -} + // Switch to this state by creating a handler, but do not call its start + // function + const State &state = *possibleStates[0]; + HandlerConstructor ctor = + state.elementHandler ? state.elementHandler : EmptyHandler::create; -bool Stack::handlersValid() -{ - for (auto it = stack.crbegin(); it != stack.crend(); it++) { - if (!it->valid) { - return false; - } - } - return true; + std::shared_ptr handler = + std::shared_ptr{ctor({ctx, "", state, SourceLocation{}})}; + stack.emplace_back(handler); + + // Set the correct flags for this implicit handler + HandlerInfo &info = currentInfo(); + info.implicit = true; + info.fieldStart(true, false, true); } std::set Stack::expectedCommands() @@ -158,12 +202,12 @@ std::set Stack::expectedCommands() const State &Stack::currentState() { - return stack.empty() ? States::None : stack.top()->state(); + return stack.empty() ? States::None : stack.back().handler->getState(); } std::string Stack::currentCommandName() { - return stack.empty() ? std::string{} : stack.top()->name(); + return stack.empty() ? std::string{} : stack.back().handler->getName(); } const State *Stack::findTargetState(const std::string &name) @@ -180,77 +224,330 @@ const State *Stack::findTargetState(const std::string &name) return nullptr; } +const State *Stack::findTargetStateOrWildcard(const std::string &name) +{ + // Try to find the target state with the given name, if none is found, try + // find a matching "*" state. + State const *targetState = findTargetState(name); + if (targetState == nullptr) { + return findTargetState("*"); + } + return targetState; +} + +HandlerInfo &Stack::currentInfo() +{ + return stack.empty() ? EmptyHandlerInfo : stack.back(); +} +HandlerInfo &Stack::lastInfo() +{ + return stack.size() < 2U ? EmptyHandlerInfo : stack[stack.size() - 2]; +} + +void Stack::endCurrentHandler() +{ + if (!stack.empty()) { + // Fetch the handler info for the current top-level element + HandlerInfo &info = stack.back(); + + // Do not call any callback functions while the stack is marked as + // invalid or this is an elment marked as "implicit" + if (!info.implicit && handlersValid()) { + // Make sure the fieldEnd handler is called if the element still + // is in a field + if (info.inField) { + info.handler->fieldEnd(); + info.fieldEnd(); + } + + // Call the "end" function of the corresponding Handler instance + info.handler->end(); + } + + // Remove the element from the stack + stack.pop_back(); + } +} + +bool Stack::ensureHandlerIsInField() +{ + // If the current handler is not in a field (and actually has a handler) + // try to start a default field + HandlerInfo &info = currentInfo(); + if (!info.inField && info.handler != nullptr) { + // Abort if the element already had a default field + if (info.hadDefaultField) { + return false; + } + + // Try to start a new default field, abort if this did not work + bool isDefault = true; + if (!info.handler->fieldStart(isDefault, info.fieldIdx)) { + info.handler->fieldEnd(); + endCurrentHandler(); + return false; + } + + // Mark the field as started + info.fieldStart(true, true, true); + } + return true; +} + +bool Stack::handlersValid() +{ + for (auto it = stack.crbegin(); it != stack.crend(); it++) { + if (!it->valid) { + return false; + } + } + return true; +} + +Logger &Stack::logger() { return ctx.getLogger(); } + void Stack::command(const Variant &name, const Variant::mapType &args) { - // Make sure the given identifier is valid + // Make sure the given identifier is valid (preventing "*" from being + // malicously passed to this function) if (!Utils::isNamespacedIdentifier(name.asString())) { throw LoggableException(std::string("Invalid identifier \"") + - name.asString() + std::string("\""), name); + name.asString() + std::string("\""), + name); } - // Try to find a target state for the given command - State const *targetState = findTargetState(name.asString()); + State const *lastTargetState = nullptr; + Variant::mapType canonicalArgs; + while (true) { + // Try to find a target state for the given command, if none can be + // found and the current command does not have an open field, then try + // to create an empty default field, otherwise this is an exception + const State *targetState = findTargetStateOrWildcard(name.asString()); + if (targetState == nullptr) { + if (!currentInfo().inField) { + endCurrentHandler(); + continue; + } else { + throw buildInvalidCommandException(name.asString(), + expectedCommands()); + } + } + + // Make sure we're currently inside a field + if (!ensureHandlerIsInField()) { + endCurrentHandler(); + continue; + } - // No target state is found, try to find a wildcard handler for the current - // state - if (targetState == nullptr) { - targetState = findTargetState("*"); - } + // Fork the logger. We do not want any validation errors to skip + LoggerFork loggerFork = logger().fork(); - // No handler has been found at all, - if (targetState == nullptr) { - throw buildInvalidCommandException(name.asString(), expectedCommands()); + // Canonicalize the arguments (if this has not already been done), allow + // additional arguments + if (lastTargetState != targetState) { + canonicalArgs = args; + targetState->arguments.validateMap(canonicalArgs, loggerFork, true); + lastTargetState = targetState; + } + + // Instantiate the handler and push it onto the stack + HandlerConstructor ctor = targetState->elementHandler + ? targetState->elementHandler + : EmptyHandler::create; + std::shared_ptr handler{ + ctor({ctx, name.asString(), *targetState, name.getLocation()})}; + stack.emplace_back(handler); + + // Fetch the HandlerInfo for the parent element and the current element + HandlerInfo &parentInfo = lastInfo(); + HandlerInfo &info = currentInfo(); + + // Call the "start" method of the handler, store the result of the start + // method as the validity of the handler -- do not call the start method + // if the stack is currently invalid (as this may cause further, + // unwanted errors) + bool validStack = handlersValid(); + info.valid = false; + if (validStack) { + handler->setLogger(loggerFork); + try { + info.valid = handler->start(canonicalArgs); + } + catch (LoggableException ex) { + loggerFork.log(ex); + } + handler->resetLogger(); + } + + // We started the command within an implicit default field and it is not + // valid -- remove both the new handler and the parent field from the + // stack + if (!info.valid && parentInfo.inImplicitDefaultField) { + endCurrentHandler(); + endCurrentHandler(); + continue; + } + + // If we ended up here, starting the command may or may not have worked, + // but after all, we cannot unroll the stack any further. Update the + // "valid" flag, commit any potential error messages and return. + info.valid = parentInfo.valid && info.valid; + loggerFork.commit(); + return; } +} + +void Stack::data(const Variant &data) +{ + while (true) { + // Check whether there is any command the data can be sent to + if (stack.empty()) { + throw LoggableException("No command here to receive data."); + } + + // Fetch the current command handler information + HandlerInfo &info = currentInfo(); + + // Make sure the current handler has an open field + if (!ensureHandlerIsInField()) { + endCurrentHandler(); + continue; + } + + // If this field should not get any data, log an error and do not call + // the "data" handler + if (!info.inValidField) { + logger().error("Did not expect any data here", data); + } + + if (handlersValid() && info.inValidField) { + // Fork the logger and set it as temporary logger for the "start" + // method. We only want to keep error messages if this was not a try + // to implicitly open a default field. + LoggerFork loggerFork = logger().fork(); + info.handler->setLogger(loggerFork); + + // Pass the data to the current Handler instance + bool valid = false; + try { + valid = info.handler->data(data); + } + catch (LoggableException ex) { + loggerFork.log(ex); + } + + // Reset the logger instance as soon as possible + info.handler->resetLogger(); + + // If placing the data here failed and we're currently in an + // implicitly opened field, just unroll the stack to the next field + // and try again + if (!valid && info.inImplicitDefaultField) { + endCurrentHandler(); + continue; + } + + // Commit the content of the logger fork. Do not change the valid + // flag. + loggerFork.commit(); + } - // Fetch the associated constructor - HandlerConstructor ctor = targetState->elementHandler - ? targetState->elementHandler - : DefaultHandler::create; - - // Canonicalize the arguments, allow additional arguments - targetState->arguments.validateMap(args, ctx.getLogger(), true); - - // Instantiate the handler and push it onto the stack - Handler *handler = - ctor({ctx, name.asString(), *targetState, currentState(), name.getLocation()}); - stack.emplace_back(std::shared_ptr{handler}); - - // Call the "start" method of the handler, store the result of the start - // method as the validity of the handler -- do not call the start method - // if the stack is currently invalid (as this may cause further, unwanted - // errors) - try { - stack.back().valid = handlersValid() && handler->start(args); - } catch (LoggableException ex) { - stack.back().valid = false; - logger.log(ex, ) + // There was no reason to unroll the stack any further, so continue + return; } } -void Stack::end() +void Stack::fieldStart(bool isDefault) { - // Check whether the current command could be ended + // Make sure the current handler stack is not empty if (stack.empty()) { - throw LoggableException{"No command to end."}; + throw LoggableException( + "No command for which a field could be started"); } - // Remove the current HandlerInstance from the stack - std::shared_ptr inst{stack.top()}; - stack.pop(); + // Fetch the information attached to the current handler + HandlerInfo &info = currentInfo(); + if (info.inField) { + logger().error( + "Got field start, but there is no command for which to start the " + "field."); + return; + } + + // Copy the isDefault flag to a local variable, the fieldStart method will + // write into this variable + bool defaultField = isDefault; + + // Do not call the "fieldStart" function if we're in an invalid subtree + bool valid = false; + if (handlersValid()) { + try { + valid = info.handler->fieldStart(defaultField, info.fieldIdx); + } + catch (LoggableException ex) { + logger().log(ex); + } + if (!valid && !defaultField) { + logger().error( + std::string("Cannot start a new field here (index ") + + std::to_string(info.fieldIdx + 1) + + std::string("), field does not exist")); + } + } - // Call the end function of the last Handler - inst->end(); + // Mark the field as started + info.fieldStart(defaultField, false, valid); } -void Stack::data(const std::string &data, int field) +void Stack::fieldEnd() { - // Check whether there is any command the data can be sent to + // Make sure the current handler stack is not empty if (stack.empty()) { - throw LoggableException{"No command to receive data."}; + throw LoggableException("No command for which a field could be ended"); } - // Pass the data to the current Handler instance - stack.top()->data(data, field); + // Fetch the information attached to the current handler + HandlerInfo &info = currentInfo(); + if (!info.inField) { + logger().error( + "Got field end, but there is no command for which to end the " + "field."); + return; + } + + // Only continue if the current handler stack is in a valid state, do not + // call the fieldEnd function if something went wrong before + if (handlersValid()) { + try { + info.handler->fieldEnd(); + } + catch (LoggableException ex) { + logger().log(ex); + } + } + + // This command no longer is in a field + info.fieldEnd(); + + // As soon as this command had a default field, remove it from the stack + if (info.hadDefaultField) { + endCurrentHandler(); + } +} + +void Stack::annotationStart(const Variant &className, const Variant &args) +{ + // TODO +} + +void Stack::annotationEnd(const Variant &className, const Variant &elementName) +{ + // TODO +} + +void Stack::token(Variant token) +{ + // TODO } } } diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp index 294f7ec..76eefd9 100644 --- a/src/core/parser/stack/Stack.hpp +++ b/src/core/parser/stack/Stack.hpp @@ -43,6 +43,7 @@ namespace ousia { // Forward declarations class ParserContext; +class Logger; namespace parser_stack { @@ -75,7 +76,13 @@ public: bool valid : 1; /** - * Set to true if the handler currently is in a filed. + * Set to true if this is an implicit handler, that was created when the + * current stack state was deduced. + */ + bool implicit : 1; + + /** + * Set to true if the handler currently is in a field. */ bool inField : 1; @@ -99,12 +106,17 @@ public: /** * Set to true, if the default field was already started. */ - bool hasDefaultField : 1; + bool hadDefaultField : 1; /** * Default constructor of the HandlerInfo class. */ HandlerInfo(); + /** + * Constructor of the HandlerInfo class, allows to set all flags manually. + */ + HandlerInfo(bool valid, bool implicit, bool inField, bool inDefaultField, + bool inImplicitDefaultField, bool inValidField); /** * Constructor of the HandlerInfo class, taking a shared_ptr to the handler @@ -129,7 +141,6 @@ public: void fieldEnd(); }; - /** * The Stack class is a pushdown automaton responsible for turning a command * stream into a tree of Node instances. It does so by following a state @@ -154,6 +165,11 @@ private: */ std::vector stack; + /** + * Return the reference in the Logger instance stored within the context. + */ + Logger &logger(); + /** * Used internally to get all expected command names for the current state. * This function is used to build error messages. @@ -164,7 +180,7 @@ private: /** * Returns the targetState for a command with the given name that can be - * reached from for the current state. + * reached from the current state. * * @param name is the name of the requested command. * @return nullptr if no target state was found, a pointer at the target @@ -172,6 +188,17 @@ private: */ const State *findTargetState(const std::string &name); + /** + * Returns the targetState for a command with the given name that can be + * reached from the current state, also including the wildcard "*" state. + * Throws an exception if the given target state is not a valid identifier. + * + * @param name is the name of the requested command. + * @return nullptr if no target state was found, a pointer at the target + * state otherwise. + */ + const State *findTargetStateOrWildcard(const std::string &name); + /** * Tries to reconstruct the parser state from the Scope instance of the * ParserContext given in the constructor. This functionality is needed for @@ -180,6 +207,33 @@ private: */ void deduceState(); + /** + * Returns a reference at the current HandlerInfo instance (or a stub + * HandlerInfo instance if the stack is empty). + */ + HandlerInfo ¤tInfo(); + + /** + * Returns a reference at the last HandlerInfo instance (or a stub + * HandlerInfo instance if the stack has only one element). + */ + HandlerInfo &lastInfo(); + + /** + * Ends the current handler and removes the corresponding element from the + * stack. + */ + void endCurrentHandler(); + + /** + * Tries to start a default field for the current handler, if currently the + * handler is not inside a field and did not have a default field yet. + * + * @return true if the handler is inside a field, false if no field could + * be started. + */ + bool ensureHandlerIsInField(); + /** * Returns true if all handlers on the stack are currently valid, or false * if at least one handler is invalid. @@ -196,9 +250,8 @@ public: * @param states is a map containing the command names and pointers at the * corresponding State instances. */ - Stack( - ParserContext &ctx, - const std::multimap &states); + Stack(ParserContext &ctx, + const std::multimap &states); /** * Destructor of the Stack class. @@ -231,6 +284,15 @@ public: */ void command(const Variant &name, const Variant::mapType &args); + /** + * Function that shuold be called whenever character data is found in the + * input stream. May only be called if the currently is a command on the + * stack. + * + * @param data is a string variant containing the data that has been found. + */ + void data(const Variant &data); + /** * Function that should be called whenever a new field starts. Fields of the * same command may not be separated by calls to data or annotations. Doing @@ -247,15 +309,6 @@ public: */ void fieldEnd(); - /** - * Function that shuold be called whenever character data is found in the - * input stream. May only be called if the currently is a command on the - * stack. - * - * @param data is a string variant containing the data that has been found. - */ - void data(const Variant &data); - /** * Function that should be called whenever an annotation starts. * diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp new file mode 100644 index 0000000..7cc8bc5 --- /dev/null +++ b/test/core/parser/stack/StackTest.cpp @@ -0,0 +1,639 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +#include +#include +#include +#include + +#include + +namespace ousia { +namespace parser_stack { + +// Build an instance of the StandaloneEnvironment used for this unit test +static TerminalLogger logger(std::cerr, true); +// static ConcreteLogger logger; +static StandaloneEnvironment env(logger); + +namespace { + +struct Tracker { + int startCount; + int endCount; + int fieldStartCount; + int fieldEndCount; + int annotationStartCount; + int annotationEndCount; + int dataCount; + + Variant::mapType startArgs; + bool fieldStartIsDefault; + size_t fieldStartIdx; + Variant annotationStartClassName; + Variant::mapType annotationStartArgs; + Variant annotationEndClassName; + Variant annotationEndElementName; + Variant dataData; + + bool startResult; + bool fieldStartSetIsDefault; + bool fieldStartResult; + bool annotationStartResult; + bool annotationEndResult; + bool dataResult; + + Tracker() { reset(); } + + void reset() + { + startCount = 0; + endCount = 0; + fieldStartCount = 0; + fieldEndCount = 0; + annotationStartCount = 0; + annotationEndCount = 0; + dataCount = 0; + + startArgs = Variant::mapType{}; + fieldStartIsDefault = false; + fieldStartIdx = 0; + annotationStartClassName = Variant::fromString(std::string{}); + annotationStartArgs = Variant::mapType{}; + annotationEndClassName = Variant::fromString(std::string{}); + annotationEndElementName = Variant::fromString(std::string{}); + dataData = Variant::fromString(std::string{}); + + startResult = true; + fieldStartSetIsDefault = false; + fieldStartResult = true; + annotationStartResult = true; + annotationEndResult = true; + dataResult = true; + } + + void expect(int startCount, int endCount, int fieldStartCount, + int fieldEndCount, int annotationStartCount, + int annotationEndCount, int dataCount) + { + EXPECT_EQ(startCount, this->startCount); + EXPECT_EQ(endCount, this->endCount); + EXPECT_EQ(fieldStartCount, this->fieldStartCount); + EXPECT_EQ(fieldEndCount, this->fieldEndCount); + EXPECT_EQ(annotationStartCount, this->annotationStartCount); + EXPECT_EQ(annotationEndCount, this->annotationEndCount); + EXPECT_EQ(dataCount, this->dataCount); + } +}; + +static Tracker tracker; + +class TestHandler : public Handler { +private: + TestHandler(const HandlerData &handlerData) : Handler(handlerData) {} + +public: + bool start(const Variant::mapType &args) + { + tracker.startCount++; + tracker.startArgs = args; + return tracker.startResult; + } + + void end() { tracker.endCount++; } + + bool fieldStart(bool &isDefault, size_t fieldIdx) + { + tracker.fieldStartCount++; + tracker.fieldStartIsDefault = isDefault; + tracker.fieldStartIdx = fieldIdx; + if (tracker.fieldStartSetIsDefault) { + isDefault = true; + } + return tracker.fieldStartResult; + } + + void fieldEnd() { tracker.fieldEndCount++; } + + bool annotationStart(const Variant &className, const Variant::mapType &args) + { + tracker.annotationStartCount++; + tracker.annotationStartClassName = className; + tracker.annotationStartArgs = args; + return tracker.annotationStartResult; + } + + bool annotationEnd(const Variant &className, const Variant &elementName) + { + tracker.annotationEndCount++; + tracker.annotationEndClassName = className; + tracker.annotationEndElementName = elementName; + return tracker.annotationEndResult; + } + + bool data(const Variant &data) + { + tracker.dataCount++; + tracker.dataData = data; + return tracker.dataResult; + } + + static Handler *create(const HandlerData &handlerData) + { + return new TestHandler(handlerData); + } +}; +} + +namespace States { +static const State Document = + StateBuilder().parent(&None).elementHandler(TestHandler::create); +static const State Body = + StateBuilder().parent(&Document).elementHandler(TestHandler::create); +static const State Empty = + StateBuilder().parent(&Document).elementHandler(TestHandler::create); +static const State Special = + StateBuilder().parent(&All).elementHandler(TestHandler::create); +static const State Arguments = + StateBuilder().parent(&None).elementHandler(TestHandler::create).arguments( + {Argument::Int("a"), Argument::String("b")}); +static const State BodyChildren = + StateBuilder().parent(&Body).elementHandler(TestHandler::create); +static const State Any = + StateBuilder().parents({&None, &Any}).elementHandler(TestHandler::create); + +static const std::multimap TestHandlers{ + {"document", &Document}, + {"body", &Body}, + {"empty", &Empty}, + {"special", &Special}, + {"arguments", &Arguments}, + {"*", &BodyChildren}}; + +static const std::multimap AnyHandlers{{"*", &Any}}; +} + +TEST(Stack, basicTest) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::TestHandlers}; + + EXPECT_EQ("", s.currentCommandName()); + EXPECT_EQ(&States::None, &s.currentState()); + + s.command("document", {}); + s.fieldStart(true); + s.data("test1"); + + EXPECT_EQ("document", s.currentCommandName()); + EXPECT_EQ(&States::Document, &s.currentState()); + tracker.expect(1, 0, 1, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + + s.command("body", {}); + s.fieldStart(true); + s.data("test2"); + EXPECT_EQ("body", s.currentCommandName()); + EXPECT_EQ(&States::Body, &s.currentState()); + tracker.expect(2, 0, 2, 0, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + + s.command("inner", {}); + s.fieldStart(true); + EXPECT_EQ("inner", s.currentCommandName()); + EXPECT_EQ(&States::BodyChildren, &s.currentState()); + + s.fieldEnd(); + tracker.expect(3, 1, 3, 1, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldEnd(); + EXPECT_EQ("document", s.currentCommandName()); + EXPECT_EQ(&States::Document, &s.currentState()); + tracker.expect(3, 2, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + + s.command("body", {}); + s.fieldStart(true); + s.data("test3"); + EXPECT_EQ("body", s.currentCommandName()); + EXPECT_EQ(&States::Body, &s.currentState()); + s.fieldEnd(); + tracker.expect(4, 3, 4, 3, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + + EXPECT_EQ("document", s.currentCommandName()); + EXPECT_EQ(&States::Document, &s.currentState()); + + s.fieldEnd(); + tracker.expect(4, 4, 4, 4, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + + EXPECT_EQ("", s.currentCommandName()); + EXPECT_EQ(&States::None, &s.currentState()); + } + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, errorInvalidCommands) +{ + Stack s{env.context, States::TestHandlers}; + tracker.reset(); + EXPECT_THROW(s.command("body", {}), LoggableException); + s.command("document", {}); + s.fieldStart(true); + EXPECT_THROW(s.command("document", {}), LoggableException); + s.command("empty", {}); + s.fieldStart(true); + EXPECT_THROW(s.command("body", {}), LoggableException); + s.command("special", {}); + s.fieldStart(true); + s.fieldEnd(); + s.fieldEnd(); + s.fieldEnd(); + EXPECT_EQ(&States::None, &s.currentState()); + ASSERT_THROW(s.fieldEnd(), LoggableException); + ASSERT_THROW(s.data("test"), LoggableException); +} + +TEST(Stack, validation) +{ + Stack s{env.context, States::TestHandlers}; + tracker.reset(); + logger.reset(); + + s.command("arguments", {}); + EXPECT_TRUE(logger.hasError()); + s.fieldStart(true); + s.fieldEnd(); + + logger.reset(); + s.command("arguments", {{"a", 5}}); + EXPECT_TRUE(logger.hasError()); + s.fieldStart(true); + s.fieldEnd(); + + logger.reset(); + s.command("arguments", {{"a", 5}, {"b", "test"}}); + EXPECT_FALSE(logger.hasError()); + s.fieldStart(true); + s.fieldEnd(); +} + +TEST(Stack, invalidCommandName) +{ + Stack s{env.context, States::AnyHandlers}; + tracker.reset(); + logger.reset(); + + s.command("a", {}); + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.command("a_", {}); + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(2, 2, 2, 2, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.command("a_:b", {}); + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(3, 3, 3, 3, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + ASSERT_THROW(s.command("_a", {}), LoggableException); + ASSERT_THROW(s.command("a:", {}), LoggableException); + ASSERT_THROW(s.command("a:_b", {}), LoggableException); + tracker.expect(3, 3, 3, 3, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, multipleFields) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {{"a", false}}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_EQ("a", s.currentCommandName()); + EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startArgs); + + s.fieldStart(false); + tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_FALSE(tracker.fieldStartIsDefault); + EXPECT_EQ(0U, tracker.fieldStartIdx); + + s.data("test"); + tracker.expect(1, 0, 1, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_EQ("test", tracker.dataData); + + s.fieldEnd(); + tracker.expect(1, 0, 1, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldStart(false); + tracker.expect(1, 0, 2, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_FALSE(tracker.fieldStartIsDefault); + EXPECT_EQ(1U, tracker.fieldStartIdx); + + s.data("test2"); + tracker.expect(1, 0, 2, 1, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_EQ("test2", tracker.dataData); + + s.fieldEnd(); + tracker.expect(1, 0, 2, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldStart(true); + tracker.expect(1, 0, 3, 2, 0, 0, 2); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_TRUE(tracker.fieldStartIsDefault); + EXPECT_EQ(2U, tracker.fieldStartIdx); + + s.data("test3"); + tracker.expect(1, 0, 3, 2, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + EXPECT_EQ("test3", tracker.dataData); + + s.fieldEnd(); + tracker.expect(1, 1, 3, 3, 0, 0, 3); // sc, ec, fsc, fse, asc, aec, dc + } + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, implicitDefaultFieldOnNewCommand) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.command("b", {}); + tracker.expect(2, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(2, 2, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, implicitDefaultFieldOnNewCommandWithExplicitDefaultField) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + + s.command("b", {}); + tracker.expect(2, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("b", s.currentCommandName()); + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(2, 1, 2, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + } + tracker.expect(2, 2, 2, 2, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, noImplicitDefaultFieldOnIncompatibleCommand) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + + tracker.fieldStartResult = false; + s.command("b", {}); + tracker.expect(2, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("b", s.currentCommandName()); + } + tracker.expect(2, 2, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, noImplicitDefaultFieldIfDefaultFieldGiven) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + s.fieldStart(true); + tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + s.fieldEnd(); + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("", s.currentCommandName()); + + s.command("b", {}); + tracker.expect(2, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("b", s.currentCommandName()); + } + tracker.expect(2, 2, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, implicitDefaultFieldOnData) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.data("test"); + tracker.expect(1, 0, 1, 0, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 1); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, autoFieldEnd) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, autoImplicitFieldEnd) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + s.command("b", {}); + s.command("c", {}); + s.command("d", {}); + s.command("e", {}); + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(5, 1, 5, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(5, 5, 5, 5, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, invalidDefaultField) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.fieldStartResult = false; + s.fieldStart(true); + s.fieldEnd(); + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_FALSE(logger.hasError()); +} + +TEST(Stack, errorInvalidDefaultFieldData) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.fieldStartResult = false; + s.fieldStart(true); + ASSERT_FALSE(logger.hasError()); + s.data("test"); + ASSERT_TRUE(logger.hasError()); + s.fieldEnd(); + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorInvalidFieldData) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.fieldStartResult = false; + ASSERT_FALSE(logger.hasError()); + s.fieldStart(false); + ASSERT_TRUE(logger.hasError()); + s.data("test"); + s.fieldEnd(); + tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorFieldStartNoCommand) +{ + tracker.reset(); + logger.reset(); + + Stack s{env.context, States::AnyHandlers}; + ASSERT_THROW(s.fieldStart(false), LoggableException); + ASSERT_THROW(s.fieldStart(true), LoggableException); + tracker.expect(0, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorMutlipleFieldStarts) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldStart(false); + ASSERT_FALSE(logger.hasError()); + s.fieldStart(false); + ASSERT_TRUE(logger.hasError()); + tracker.expect(1, 0, 1, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldEnd(); + tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorMutlipleFieldEnds) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldStart(false); + s.fieldEnd(); + ASSERT_FALSE(logger.hasError()); + tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + s.fieldEnd(); + ASSERT_TRUE(logger.hasError()); + tracker.expect(1, 0, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + } + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} + +TEST(Stack, errorOpenField) +{ + tracker.reset(); + logger.reset(); + + { + Stack s{env.context, States::AnyHandlers}; + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + + s.fieldStart(false); + ASSERT_FALSE(logger.hasError()); + } + ASSERT_TRUE(logger.hasError()); + tracker.expect(1, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc +} +} +} + -- cgit v1.2.3 From f93bb53648da893cb0ad5a7f91c168106cca4ce0 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:16:03 +0100 Subject: Added missing header inclusion --- src/core/parser/stack/State.hpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/core') diff --git a/src/core/parser/stack/State.hpp b/src/core/parser/stack/State.hpp index ea326ec..4766235 100644 --- a/src/core/parser/stack/State.hpp +++ b/src/core/parser/stack/State.hpp @@ -33,6 +33,7 @@ #include #include +#include namespace ousia { namespace parser_stack { -- cgit v1.2.3 From 551b7be64f207845cb05b8ec593f9bf2d7f0c940 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:25:38 +0100 Subject: Included handler classes from master --- CMakeLists.txt | 4 + src/core/parser/stack/DocumentHandler.cpp | 252 ++++++++++++++++++++++ src/core/parser/stack/DocumentHandler.hpp | 88 ++++++++ src/core/parser/stack/DomainHandler.cpp | 276 +++++++++++++++++++++++++ src/core/parser/stack/DomainHandler.hpp | 200 ++++++++++++++++++ src/core/parser/stack/Handler.cpp | 2 +- src/core/parser/stack/ImportIncludeHandler.cpp | 96 +++++++++ src/core/parser/stack/ImportIncludeHandler.hpp | 90 ++++++++ src/core/parser/stack/TypesystemHandler.cpp | 175 ++++++++++++++++ src/core/parser/stack/TypesystemHandler.hpp | 121 +++++++++++ 10 files changed, 1303 insertions(+), 1 deletion(-) create mode 100644 src/core/parser/stack/DocumentHandler.cpp create mode 100644 src/core/parser/stack/DocumentHandler.hpp create mode 100644 src/core/parser/stack/DomainHandler.cpp create mode 100644 src/core/parser/stack/DomainHandler.hpp create mode 100644 src/core/parser/stack/ImportIncludeHandler.cpp create mode 100644 src/core/parser/stack/ImportIncludeHandler.hpp create mode 100644 src/core/parser/stack/TypesystemHandler.cpp create mode 100644 src/core/parser/stack/TypesystemHandler.hpp (limited to 'src/core') diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f9a8d3..4a3db32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -163,9 +163,13 @@ ADD_LIBRARY(ousia_core src/core/parser/ParserContext src/core/parser/ParserScope src/core/parser/stack/Callbacks + src/core/parser/stack/DocumentHandler + src/core/parser/stack/DomainHandler src/core/parser/stack/Handler + src/core/parser/stack/ImportIncludeHandler src/core/parser/stack/State src/core/parser/stack/Stack + src/core/parser/stack/TypesystemHandler src/core/parser/utils/Tokenizer src/core/parser/utils/TokenTrie src/core/resource/Resource diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp new file mode 100644 index 0000000..ba7430d --- /dev/null +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -0,0 +1,252 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "DocumentHandler.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +namespace ousia { + +/* DocumentHandler */ + +void DocumentHandler::start(Variant::mapType &args) +{ + Rooted document = + project()->createDocument(args["name"].asString()); + document->setLocation(location()); + scope().push(document); + scope().setFlag(ParserFlag::POST_HEAD, false); +} + +void DocumentHandler::end() { scope().pop(); } + +/* DocumentChildHandler */ + +void DocumentChildHandler::preamble(Handle parentNode, + std::string &fieldName, + DocumentEntity *&parent, bool &inField) +{ + // check if the parent in the structure tree was an explicit field + // reference. + inField = parentNode->isa(&RttiTypes::DocumentField); + if (inField) { + fieldName = parentNode->getName(); + parentNode = scope().selectOrThrow( + {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity}); + } else { + // if it wasn't an explicit reference, we use the default field. + fieldName = DEFAULT_FIELD_NAME; + } + // reference the parent entity explicitly. + parent = nullptr; + if (parentNode->isa(&RttiTypes::StructuredEntity)) { + parent = static_cast( + parentNode.cast().get()); + } else if (parentNode->isa(&RttiTypes::AnnotationEntity)) { + parent = static_cast( + parentNode.cast().get()); + } +} + +void DocumentChildHandler::createPath(const NodeVector &path, + DocumentEntity *&parent) +{ + size_t S = path.size(); + for (size_t p = 1; p < S; p = p + 2) { + parent = static_cast( + parent->createChildStructuredEntity( + path[p].cast(), Variant::mapType{}, + path[p - 1]->getName(), "").get()); + } +} + +void DocumentChildHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + Rooted parentNode = scope().selectOrThrow( + {&RttiTypes::Document, &RttiTypes::StructuredEntity, + &RttiTypes::AnnotationEntity, &RttiTypes::DocumentField}); + + std::string fieldName; + DocumentEntity *parent; + bool inField; + + preamble(parentNode, fieldName, parent, inField); + + // try to find a FieldDescriptor for the given tag if we are not in a + // field already. This does _not_ try to construct transparent paths + // in between. + if (!inField && parent != nullptr && + parent->getDescriptor()->hasField(name())) { + Rooted field{ + new DocumentField(parentNode->getManager(), fieldName, parentNode)}; + field->setLocation(location()); + scope().push(field); + return; + } + + // Otherwise create a new StructuredEntity + // TODO: Consider Anchors and AnnotationEntities + Rooted strct = + scope().resolve(Utils::split(name(), ':'), logger()); + if (strct == nullptr) { + // if we could not resolve the name, throw an exception. + throw LoggableException( + std::string("\"") + name() + "\" could not be resolved.", + location()); + } + + std::string name; + auto it = args.find("name"); + if (it != args.end()) { + name = it->second.asString(); + args.erase(it); + } + + Rooted entity; + if (parentNode->isa(&RttiTypes::Document)) { + entity = parentNode.cast()->createRootStructuredEntity( + strct, args, name); + } else { + // calculate a path if transparent entities are needed in between. + auto path = parent->getDescriptor()->pathTo(strct, logger()); + if (path.empty()) { + throw LoggableException( + std::string("An instance of \"") + strct->getName() + + "\" is not allowed as child of an instance of \"" + + parent->getDescriptor()->getName() + "\"", + location()); + } + + // create all transparent entities until the last field. + createPath(path, parent); + entity = + parent->createChildStructuredEntity(strct, args, fieldName, name); + } + entity->setLocation(location()); + scope().push(entity); +} + +void DocumentChildHandler::end() { scope().pop(); } + +std::pair DocumentChildHandler::convertData( + Handle field, Logger &logger, const std::string &data) +{ + // if the content is supposed to be of type string, we can finish + // directly. + auto vts = field->getPrimitiveType()->getVariantTypes(); + if (std::find(vts.begin(), vts.end(), VariantType::STRING) != vts.end()) { + return std::make_pair(true, Variant::fromString(data)); + } + + // then try to parse the content using the type specification. + auto res = field->getPrimitiveType()->read( + data, logger, location().getSourceId(), location().getStart()); + return res; +} + +void DocumentChildHandler::data(const std::string &data, int fieldIdx) +{ + Rooted parentNode = scope().selectOrThrow( + {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity, + &RttiTypes::DocumentField}); + + std::string fieldName; + DocumentEntity *parent; + bool inField; + + preamble(parentNode, fieldName, parent, inField); + + Rooted desc = parent->getDescriptor(); + /* + * We distinguish two cases here: One for fields that are given. + */ + if (fieldName != DEFAULT_FIELD_NAME) { + // retrieve the actual FieldDescriptor + Rooted field = desc->getFieldDescriptor(fieldName); + if (field == nullptr) { + logger().error( + std::string("Can't handle data because no field with name \"") + + fieldName + "\" exists in descriptor\"" + desc->getName() + + "\".", + location()); + return; + } + // if it is not primitive at all, we can't parse the content. + if (!field->isPrimitive()) { + logger().error(std::string("Can't handle data because field \"") + + fieldName + "\" of descriptor \"" + + desc->getName() + "\" is not primitive!", + location()); + return; + } + // then try to parse the content using the type specification. + auto res = convertData(field, logger(), data); + // add it as primitive content. + if (res.first) { + parent->createChildDocumentPrimitive(res.second, fieldName); + } + } else { + /* + * The second case is for primitive fields. Here we search through + * all FieldDescriptors that allow primitive content at this point + * and could be constructed via transparent intermediate entities. + * We then try to parse the data using the type specified by the + * respective field. If that does not work we proceed to the next + * possible field. + */ + // retrieve all fields. + NodeVector fields = desc->getDefaultFields(); + std::vector forks; + for (auto field : fields) { + // then try to parse the content using the type specification. + forks.emplace_back(logger().fork()); + auto res = convertData(field, forks.back(), data); + if (res.first) { + forks.back().commit(); + // if that worked, construct the necessary path. + auto pathRes = desc->pathTo(field, logger()); + assert(pathRes.second); + NodeVector path = pathRes.first; + createPath(path, parent); + // then create the primitive element. + parent->createChildDocumentPrimitive(res.second, fieldName); + return; + } + } + logger().error("Could not read data with any of the possible fields:"); + for (size_t f = 0; f < fields.size(); f++) { + logger().note(Utils::join(fields[f]->path(), ".") + ":", + SourceLocation{}, MessageMode::NO_CONTEXT); + forks[f].commit(); + } + } +} + +namespace RttiTypes { +const Rtti DocumentField = + RttiBuilder("DocumentField").parent(&Node); +} +} diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp new file mode 100644 index 0000000..475fe69 --- /dev/null +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -0,0 +1,88 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file DocumentHandler.hpp + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_DOCUMENT_HANDLER_HPP_ +#define _OUSIA_DOCUMENT_HANDLER_HPP_ + +#include + +#include "Handler.hpp" + +namespace ousia { + +// Forward declarations +class Rtti; +class DocumentEntity; +class FieldDescriptor; + +class DocumentHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DocumentHandler{handlerData}; + } +}; + +class DocumentField : public Node { +public: + using Node::Node; +}; + +class DocumentChildHandler : public StaticHandler { +private: + void preamble(Handle parentNode, std::string &fieldName, + DocumentEntity *&parent, bool &inField); + + void createPath(const NodeVector &path, DocumentEntity *&parent); + + std::pair convertData(Handle field, + Logger &logger, + const std::string &data); + +public: + using Handler::Handler; + + bool start(Variant::mapType &args) override; + + void end() override; + + bool data(const Variant &data) override; + + static Handler *create(const HandlerData &handlerData) + { + return new DocumentChildHandler{handlerData}; + } +}; + +namespace RttiTypes { +extern const Rtti DocumentField; +} +} +#endif diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp new file mode 100644 index 0000000..6571717 --- /dev/null +++ b/src/core/parser/stack/DomainHandler.cpp @@ -0,0 +1,276 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "DomainHandler.hpp" + +#include +#include +#include + +namespace ousia { + +/* DomainHandler */ + +void DomainHandler::start(Variant::mapType &args) +{ + Rooted domain = project()->createDomain(args["name"].asString()); + domain->setLocation(location()); + + scope().push(domain); +} + +void DomainHandler::end() { scope().pop(); } + +/* DomainStructHandler */ + +void DomainStructHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + + Rooted domain = scope().selectOrThrow(); + + Rooted structuredClass = domain->createStructuredClass( + args["name"].asString(), args["cardinality"].asCardinality(), nullptr, + args["transparent"].asBool(), args["isRoot"].asBool()); + structuredClass->setLocation(location()); + + const std::string &isa = args["isa"].asString(); + if (!isa.empty()) { + scope().resolve( + isa, structuredClass, logger(), + [](Handle superclass, Handle structuredClass, + Logger &logger) { + if (superclass != nullptr) { + structuredClass.cast()->setSuperclass( + superclass.cast(), logger); + } + }); + } + + scope().push(structuredClass); +} + +void DomainStructHandler::end() { scope().pop(); } + +/* DomainAnnotationHandler */ +void DomainAnnotationHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + + Rooted domain = scope().selectOrThrow(); + + Rooted annotationClass = + domain->createAnnotationClass(args["name"].asString()); + annotationClass->setLocation(location()); + + scope().push(annotationClass); +} + +void DomainAnnotationHandler::end() { scope().pop(); } + +/* DomainAttributesHandler */ + +void DomainAttributesHandler::start(Variant::mapType &args) +{ + // Fetch the current typesystem and create the struct node + Rooted parent = scope().selectOrThrow(); + + Rooted attrDesc = parent->getAttributesDescriptor(); + attrDesc->setLocation(location()); + + scope().push(attrDesc); +} + +void DomainAttributesHandler::end() { scope().pop(); } + +/* DomainFieldHandler */ + +void DomainFieldHandler::start(Variant::mapType &args) +{ + FieldDescriptor::FieldType type; + if (args["isSubtree"].asBool()) { + type = FieldDescriptor::FieldType::SUBTREE; + } else { + type = FieldDescriptor::FieldType::TREE; + } + + Rooted parent = scope().selectOrThrow(); + + Rooted field = parent->createFieldDescriptor( + logger(), type, args["name"].asString(), args["optional"].asBool()); + field->setLocation(location()); + + scope().push(field); +} + +void DomainFieldHandler::end() { scope().pop(); } + +/* DomainFieldRefHandler */ + +void DomainFieldRefHandler::start(Variant::mapType &args) +{ + Rooted parent = scope().selectOrThrow(); + + const std::string &name = args["ref"].asString(); + scope().resolveFieldDescriptor( + name, parent, logger(), + [](Handle field, Handle parent, Logger &logger) { + if (field != nullptr) { + parent.cast()->addFieldDescriptor( + field.cast(), logger); + } + }); +} + +void DomainFieldRefHandler::end() {} + +/* DomainPrimitiveHandler */ + +void DomainPrimitiveHandler::start(Variant::mapType &args) +{ + Rooted parent = scope().selectOrThrow(); + + FieldDescriptor::FieldType fieldType; + if (args["isSubtree"].asBool()) { + fieldType = FieldDescriptor::FieldType::SUBTREE; + } else { + fieldType = FieldDescriptor::FieldType::TREE; + } + + Rooted field = parent->createPrimitiveFieldDescriptor( + new UnknownType(manager()), logger(), fieldType, + args["name"].asString(), args["optional"].asBool()); + field->setLocation(location()); + + const std::string &type = args["type"].asString(); + scope().resolve(type, field, logger(), + [](Handle type, Handle field, + Logger &logger) { + if (type != nullptr) { + field.cast()->setPrimitiveType(type.cast()); + } + }); + + scope().push(field); +} + +void DomainPrimitiveHandler::end() { scope().pop(); } + +/* DomainChildHandler */ + +void DomainChildHandler::start(Variant::mapType &args) +{ + Rooted field = scope().selectOrThrow(); + + const std::string &ref = args["ref"].asString(); + scope().resolve( + ref, field, logger(), + [](Handle child, Handle field, Logger &logger) { + if (child != nullptr) { + field.cast()->addChild( + child.cast()); + } + }); +} + +void DomainChildHandler::end() {} + +/* DomainParentHandler */ + +void DomainParentHandler::start(Variant::mapType &args) +{ + Rooted strct = scope().selectOrThrow(); + + Rooted parent{ + new DomainParent(strct->getManager(), args["ref"].asString(), strct)}; + parent->setLocation(location()); + scope().push(parent); +} + +void DomainParentHandler::end() { scope().pop(); } + +/* DomainParentFieldHandler */ +void DomainParentFieldHandler::start(Variant::mapType &args) +{ + Rooted parentNameNode = scope().selectOrThrow(); + FieldDescriptor::FieldType type; + if (args["isSubtree"].asBool()) { + type = FieldDescriptor::FieldType::SUBTREE; + } else { + type = FieldDescriptor::FieldType::TREE; + } + + const std::string &name = args["name"].asString(); + const bool optional = args["optional"].asBool(); + Rooted strct = + parentNameNode->getParent().cast(); + + // resolve the parent, create the declared field and add the declared + // StructuredClass as child to it. + scope().resolve( + parentNameNode->getName(), strct, logger(), + [type, name, optional](Handle parent, Handle strct, + Logger &logger) { + if (parent != nullptr) { + Rooted field = + parent.cast()->createFieldDescriptor( + logger, type, name, optional); + field->addChild(strct.cast()); + } + }); +} + +void DomainParentFieldHandler::end() {} + +/* DomainParentFieldRefHandler */ + +void DomainParentFieldRefHandler::start(Variant::mapType &args) +{ + Rooted parentNameNode = scope().selectOrThrow(); + + const std::string &name = args["ref"].asString(); + Rooted strct = + parentNameNode->getParent().cast(); + auto loc = location(); + + // resolve the parent, get the referenced field and add the declared + // StructuredClass as child to it. + scope().resolve( + parentNameNode->getName(), strct, logger(), + [name, loc](Handle parent, Handle strct, Logger &logger) { + if (parent != nullptr) { + Rooted field = + parent.cast()->getFieldDescriptor(name); + if (field == nullptr) { + logger.error( + std::string("Could not find referenced field ") + name, + loc); + return; + } + field->addChild(strct.cast()); + } + }); +} + +void DomainParentFieldRefHandler::end() {} + +namespace RttiTypes { +const Rtti DomainParent = + RttiBuilder("DomainParent").parent(&Node); +} +} diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp new file mode 100644 index 0000000..5e8ea60 --- /dev/null +++ b/src/core/parser/stack/DomainHandler.hpp @@ -0,0 +1,200 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file DomainHandler.hpp + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_DOMAIN_HANDLER_HPP_ +#define _OUSIA_DOMAIN_HANDLER_HPP_ + +#include + +#include "Handler.hpp" + +namespace ousia { + +// Forward declarations +class Rtti; + +class DomainHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainHandler{handlerData}; + } +}; + +class DomainStructHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainStructHandler{handlerData}; + } +}; + +class DomainAnnotationHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainAnnotationHandler{handlerData}; + } +}; + +class DomainAttributesHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainAttributesHandler{handlerData}; + } +}; + +class DomainFieldHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainFieldHandler{handlerData}; + } +}; + +class DomainFieldRefHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainFieldRefHandler{handlerData}; + } +}; + +class DomainPrimitiveHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainPrimitiveHandler{handlerData}; + } +}; + +class DomainChildHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainChildHandler{handlerData}; + } +}; + +class DomainParent : public Node { +public: + using Node::Node; +}; + +namespace RttiTypes { +extern const Rtti DomainParent; +} + +class DomainParentHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainParentHandler{handlerData}; + } +}; + +class DomainParentFieldHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainParentFieldHandler{handlerData}; + } +}; + +class DomainParentFieldRefHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new DomainParentFieldRefHandler{handlerData}; + } +}; +} +#endif diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index 54dfe3e..a608f7f 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -184,7 +184,7 @@ bool StaticHandler::annotationEnd(const Variant &className, bool StaticHandler::data(const Variant &data) { - // No data supported + logger().error("Did not expect any data here", data); return false; } diff --git a/src/core/parser/stack/ImportIncludeHandler.cpp b/src/core/parser/stack/ImportIncludeHandler.cpp new file mode 100644 index 0000000..94ee82d --- /dev/null +++ b/src/core/parser/stack/ImportIncludeHandler.cpp @@ -0,0 +1,96 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "ImportIncludeHandler.hpp" + +#include + +namespace ousia { + +/* ImportIncludeHandler */ + +void ImportIncludeHandler::start(Variant::mapType &args) +{ + rel = args["rel"].asString(); + type = args["type"].asString(); + src = args["src"].asString(); + srcInArgs = !src.empty(); +} + +void ImportIncludeHandler::data(const std::string &data, int field) +{ + if (srcInArgs) { + logger().error("\"src\" attribute has already been set"); + return; + } + if (field != 0) { + logger().error("Command has only one field."); + return; + } + src.append(data); +} + +/* ImportHandler */ + +void ImportHandler::start(Variant::mapType &args) +{ + ImportIncludeHandler::start(args); + + // Make sure imports are still possible + if (scope().getFlag(ParserFlag::POST_HEAD)) { + logger().error("Imports must be listed before other commands.", + location()); + return; + } +} + +void ImportHandler::end() +{ + // Fetch the last node and check whether an import is valid at this + // position + Rooted leaf = scope().getLeaf(); + if (leaf == nullptr || !leaf->isa(&RttiTypes::RootNode)) { + logger().error( + "Import not supported here, must be inside a document, domain " + "or typesystem command.", + location()); + return; + } + Rooted leafRootNode = leaf.cast(); + + // Perform the actual import, register the imported node within the leaf + // node + Rooted imported = + context().import(src, type, rel, leafRootNode->getReferenceTypes()); + if (imported != nullptr) { + leafRootNode->reference(imported); + } +} + +/* IncludeHandler */ + +void IncludeHandler::start(Variant::mapType &args) +{ + ImportIncludeHandler::start(args); +} + +void IncludeHandler::end() +{ + context().include(src, type, rel, {&RttiTypes::Node}); +} +} diff --git a/src/core/parser/stack/ImportIncludeHandler.hpp b/src/core/parser/stack/ImportIncludeHandler.hpp new file mode 100644 index 0000000..f9abe55 --- /dev/null +++ b/src/core/parser/stack/ImportIncludeHandler.hpp @@ -0,0 +1,90 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file ImportIncludeHandler.hpp + * + * Contains the conceptually similar handlers for the "include" and "import" + * commands. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_IMPORT_INCLUDE_HANDLER_HPP_ +#define _OUSIA_IMPORT_INCLUDE_HANDLER_HPP_ + +#include +#include + +namespace ousia { + +/** + * The ImportHandler is responsible for handling the "import" command. An import + * creates a reference to a specified file. The specified file is parsed (if + * this has not already been done) outside of the context of the current file. + * If the specified resource has already been parsed, a reference to the already + * parsed file is inserted. Imports are only possible before no other content + * has been parsed. + */ +class ImportHandler : public StaticFieldHandler { +public: + using StaticFieldHandler::StaticFieldHandler; + + void doHandle(const Variant &fieldData, + const Variant::mapType &args) override; + + /** + * Creates a new instance of the ImportHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ + static Handler *create(const HandlerData &handlerData) + { + return new ImportHandler{handlerData}; + } +}; + +/** + * The IncludeHandler is responsible for handling the "include" command. The + * included file is parsed in the context of the current file and will change + * the content that is currently being parsed. Includes are possible at (almost) + * any position in the source file. + */ +class IncludeHandler : public StaticFieldHandler { +public: + using StaticFieldHandler::StaticFieldHandler; + + void doHandle(const Variant &fieldData, + const Variant::mapType &args) override; + + /** + * Creates a new instance of the IncludeHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ + static Handler *create(const HandlerData &handlerData) + { + return new IncludeHandler{handlerData}; + } +}; +} +#endif diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp new file mode 100644 index 0000000..2cc7dfb --- /dev/null +++ b/src/core/parser/stack/TypesystemHandler.cpp @@ -0,0 +1,175 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "TypesystemHandler.hpp" + +#include +#include + +namespace ousia { + +/* TypesystemHandler */ + +void TypesystemHandler::start(Variant::mapType &args) +{ + // Create the typesystem instance + Rooted typesystem = + project()->createTypesystem(args["name"].asString()); + typesystem->setLocation(location()); + + // Push the typesystem onto the scope, set the POST_HEAD flag to true + scope().push(typesystem); + scope().setFlag(ParserFlag::POST_HEAD, false); +} + +void TypesystemHandler::end() { scope().pop(); } + +/* TypesystemEnumHandler */ + +void TypesystemEnumHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + + // Fetch the current typesystem and create the enum node + Rooted typesystem = scope().selectOrThrow(); + Rooted enumType = + typesystem->createEnumType(args["name"].asString()); + enumType->setLocation(location()); + + scope().push(enumType); +} + +void TypesystemEnumHandler::end() { scope().pop(); } + +/* TypesystemEnumEntryHandler */ + +void TypesystemEnumEntryHandler::start(Variant::mapType &args) {} + +void TypesystemEnumEntryHandler::end() +{ + Rooted enumType = scope().selectOrThrow(); + enumType->addEntry(entry, logger()); +} + +void TypesystemEnumEntryHandler::data(const std::string &data, int field) +{ + if (field != 0) { + // TODO: This should be stored in the HandlerData + logger().error("Enum entry only has one field."); + return; + } + entry.append(data); +} + +/* TypesystemStructHandler */ + +void TypesystemStructHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + + // Fetch the arguments used for creating this type + const std::string &name = args["name"].asString(); + const std::string &parent = args["parent"].asString(); + + // Fetch the current typesystem and create the struct node + Rooted typesystem = scope().selectOrThrow(); + Rooted structType = typesystem->createStructType(name); + structType->setLocation(location()); + + // Try to resolve the parent type and set it as parent structure + if (!parent.empty()) { + scope().resolve( + parent, structType, logger(), + [](Handle parent, Handle structType, Logger &logger) { + if (parent != nullptr) { + structType.cast()->setParentStructure( + parent.cast(), logger); + } + }); + } + scope().push(structType); +} + +void TypesystemStructHandler::end() { scope().pop(); } + +/* TypesystemStructFieldHandler */ + +void TypesystemStructFieldHandler::start(Variant::mapType &args) +{ + // Read the argument values + const std::string &name = args["name"].asString(); + const std::string &type = args["type"].asString(); + const Variant &defaultValue = args["default"]; + const bool optional = + !(defaultValue.isObject() && defaultValue.asObject() == nullptr); + + Rooted structType = scope().selectOrThrow(); + Rooted attribute = + structType->createAttribute(name, defaultValue, optional, logger()); + attribute->setLocation(location()); + + // Try to resolve the type and default value + if (optional) { + scope().resolveTypeWithValue( + type, attribute, attribute->getDefaultValue(), logger(), + [](Handle type, Handle attribute, Logger &logger) { + if (type != nullptr) { + attribute.cast()->setType(type.cast(), + logger); + } + }); + } else { + scope().resolveType(type, attribute, logger(), + [](Handle type, Handle attribute, + Logger &logger) { + if (type != nullptr) { + attribute.cast()->setType(type.cast(), logger); + } + }); + } +} + +void TypesystemStructFieldHandler::end() {} + +/* TypesystemConstantHandler */ + +void TypesystemConstantHandler::start(Variant::mapType &args) +{ + scope().setFlag(ParserFlag::POST_HEAD, true); + + // Read the argument values + const std::string &name = args["name"].asString(); + const std::string &type = args["type"].asString(); + const Variant &value = args["value"]; + + Rooted typesystem = scope().selectOrThrow(); + Rooted constant = typesystem->createConstant(name, value); + constant->setLocation(location()); + + // Try to resolve the type + scope().resolveTypeWithValue( + type, constant, constant->getValue(), logger(), + [](Handle type, Handle constant, Logger &logger) { + if (type != nullptr) { + constant.cast()->setType(type.cast(), logger); + } + }); +} + +void TypesystemConstantHandler::end() {} +} diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp new file mode 100644 index 0000000..76a7bc9 --- /dev/null +++ b/src/core/parser/stack/TypesystemHandler.hpp @@ -0,0 +1,121 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file TypesystemHandler.hpp + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_TYPESYSTEM_HANDLER_HPP_ +#define _OUSIA_TYPESYSTEM_HANDLER_HPP_ + +#include +#include + +namespace ousia { + +class TypesystemHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemHandler{handlerData}; + } +}; + +class TypesystemEnumHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemEnumHandler{handlerData}; + } +}; + +class TypesystemEnumEntryHandler : public Handler { +public: + using Handler::Handler; + + std::string entry; + + void start(Variant::mapType &args) override; + + void end() override; + + void data(const std::string &data, int field) override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemEnumEntryHandler{handlerData}; + } +}; + +class TypesystemStructHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemStructHandler{handlerData}; + } +}; + +class TypesystemStructFieldHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemStructFieldHandler{handlerData}; + } +}; + +class TypesystemConstantHandler : public Handler { +public: + using Handler::Handler; + + void start(Variant::mapType &args) override; + + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new TypesystemConstantHandler{handlerData}; + } +}; +} +#endif -- cgit v1.2.3 From a0f181e667f3d3b299784809cd610a4782e5be92 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 01:18:00 +0100 Subject: Fixed compilation of VariantReader and Typesystem on GCC 4.9 --- src/core/common/VariantReader.cpp | 2 +- src/core/common/VariantReader.hpp | 2 +- src/core/model/Typesystem.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/core') diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp index 3f02226..fb93ad0 100644 --- a/src/core/common/VariantReader.cpp +++ b/src/core/common/VariantReader.cpp @@ -495,7 +495,7 @@ std::pair VariantReader::parseBool(CharReader &reader, bool val = false; CharReaderFork readerFork = reader.fork(); LoggerFork loggerFork = logger.fork(); - auto res = parseToken(readerFork, loggerFork, {}); + auto res = parseToken(readerFork, loggerFork, std::unordered_set{}); if (res.first) { bool valid = false; if (res.second == "true") { diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp index 1232f6e..44132a0 100644 --- a/src/core/common/VariantReader.hpp +++ b/src/core/common/VariantReader.hpp @@ -322,7 +322,7 @@ public: */ static std::pair parseTyped( VariantType type, CharReader &reader, Logger &logger, - const std::unordered_set &delims = {}); + const std::unordered_set &delims = std::unordered_set{}); /** * Tries to parse an instance of the given type from the given string. The * called method is one of the parse methods defined here and adheres to the diff --git a/src/core/model/Typesystem.hpp b/src/core/model/Typesystem.hpp index 9f9470e..9d22c0b 100644 --- a/src/core/model/Typesystem.hpp +++ b/src/core/model/Typesystem.hpp @@ -179,7 +179,7 @@ public: * These characters are not included in the result. May not be nullptr. */ std::pair read(CharReader &reader, Logger &logger, - const std::unordered_set &delims = {}); + const std::unordered_set &delims = std::unordered_set{}); /** * Tries to parse an instance of this type from the given string. -- cgit v1.2.3 From f44dac145adabb580cc36b31079ae963bf59b096 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 14:54:35 +0100 Subject: Added "markAsMagic" function, autoformat --- src/core/common/Variant.hpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'src/core') diff --git a/src/core/common/Variant.hpp b/src/core/common/Variant.hpp index 6eae7e1..ddd17d7 100644 --- a/src/core/common/Variant.hpp +++ b/src/core/common/Variant.hpp @@ -883,6 +883,21 @@ public: return asObj(VariantType::FUNCTION); } + /** + * If the value of the variant already is a string, the markAsMagic function + * marks this string as a "magic" value (a variant which might also be an + * identifier). Throws an exception if the variant is not a string or magic + * value. + */ + void markAsMagic() + { + if (getType() == VariantType::STRING) { + meta.setType(VariantType::MAGIC); + return; + } + throw TypeException{getType(), VariantType::STRING}; + } + /** * Returns the value of the Variant as boolean, performs type conversion. * @@ -1146,10 +1161,7 @@ public: * * @retun true if the */ - bool hasLocation() const - { - return meta.hasLocation(); - } + bool hasLocation() const { return meta.hasLocation(); } /** * Unpacks ans returns the stored source location. Note that the returned @@ -1158,10 +1170,7 @@ public: * * @return the stored SourceLocation. */ - SourceLocation getLocation() const - { - return meta.getLocation(); - } + SourceLocation getLocation() const { return meta.getLocation(); } /** * Packs the given source location and stores it in the metadata. Not all -- cgit v1.2.3 From ac61fe02b333c4928fb79e309c3aa065117aea7e Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 14:55:49 +0100 Subject: Removed Typesystem read code again -- using ParseGenericString insteads. It occured to me, that this was the way this problem was once meant to be solved. --- src/core/model/Typesystem.cpp | 60 ----------------- src/core/model/Typesystem.hpp | 151 +++++++----------------------------------- 2 files changed, 24 insertions(+), 187 deletions(-) (limited to 'src/core') diff --git a/src/core/model/Typesystem.cpp b/src/core/model/Typesystem.cpp index b34687e..fb99f87 100644 --- a/src/core/model/Typesystem.cpp +++ b/src/core/model/Typesystem.cpp @@ -21,7 +21,6 @@ #include #include #include -#include namespace ousia { @@ -68,65 +67,6 @@ bool Type::build(Variant &data, Logger &logger) const return build(data, logger, NullMagicCallback); } -std::pair Type::read(CharReader &reader, Logger &logger, - const std::unordered_set &delims) -{ - // try all variant types of this type and use the first successful one. - Variant v; - bool success = false; - for (auto t : getVariantTypes()) { - auto res = VariantReader::parseTyped(t, reader, logger, delims); - if (res.first) { - v = res.second; - success = true; - break; - } - } - - if (!success) { - return std::make_pair(false, Variant{}); - } - if (!build(v, logger)) { - return std::make_pair(false, Variant{}); - } - return std::make_pair(true, v); -} - -std::pair Type::read(const std::string &str, Logger &logger, - SourceId sourceId, size_t offs) -{ - // try all variant types of this type and use the first successful one. - Variant v; - bool success = false; - std::vector forks; - auto vts = getVariantTypes(); - for (auto vt : vts) { - forks.emplace_back(logger.fork()); - auto res = - VariantReader::parseTyped(vt, str, forks.back(), sourceId, offs); - if (res.first) { - v = res.second; - success = true; - forks.back().commit(); - break; - } - } - - if (!success) { - logger.error("Could not read data with any of the possible types:"); - for (size_t t = 0; t < forks.size(); t++) { - logger.note(std::string(Variant::getTypeName(vts[t])) + ":", - SourceLocation{}, MessageMode::NO_CONTEXT); - forks[t].commit(); - } - return std::make_pair(false, Variant{}); - } - if (!build(v, logger)) { - return std::make_pair(false, Variant{}); - } - return std::make_pair(true, v); -} - bool Type::doCheckIsa(Handle type) const { return false; } bool Type::checkIsa(Handle type) const diff --git a/src/core/model/Typesystem.hpp b/src/core/model/Typesystem.hpp index 9d22c0b..53fb0df 100644 --- a/src/core/model/Typesystem.hpp +++ b/src/core/model/Typesystem.hpp @@ -59,7 +59,27 @@ class SystemTypesystem; */ class Type : public Node { public: - enum class MagicCallbackResult { NOT_FOUND, FOUND_INVALID, FOUND_VALID }; + /** + * Enum describing the result of the MagicCallback. + */ + enum class MagicCallbackResult { + /** + * A magic value with the given name could not be resolved. + */ + NOT_FOUND, + + /** + * A magic value with the given name could be resolved, but is of the + * wrong type. + */ + FOUND_INVALID, + + /** + * A magic value with the given name could be resolved and is of the + * correct type. + */ + FOUND_VALID + }; /** * Callback function called when a variant with "magic" value is reached. @@ -70,7 +90,9 @@ public: * to which the value of the looked up constant should be written. * @param type is a const pointer at the type. TODO: Replace this with a * "ConstHandle". - * @return true if a constant was found, false otherwise. + * @return a MagicCallbackResult describing whether the magic value could + * not be resolved, could be resolved but is of the wrong type or could be + * resolved and is of the correct type. */ using MagicCallback = std::function; @@ -168,32 +190,6 @@ public: */ bool build(Variant &data, Logger &logger) const; - /** - * Tries to parse an instance of this type from the given stream. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned - * at the end of the type instance (or the delimiting character). - * @param delims is a set of characters which will terminate the typed - * instance if the according parser uses delimiting characters. - * These characters are not included in the result. May not be nullptr. - */ - std::pair read(CharReader &reader, Logger &logger, - const std::unordered_set &delims = std::unordered_set{}); - - /** - * Tries to parse an instance of this type from the given string. - * - * @param str is the string from which the value should be read. - * @param sourceId is an optional descriptor of the source file from which - * the element is being read. - * @param offs is the by offset in the source file at which the string - * starts. - */ - std::pair read(const std::string &str, Logger &logger, - SourceId sourceId = InvalidSourceId, - size_t offs = 0); - /** * Returns true if and only if the given Variant adheres to this Type. In * essence this just calls the build method on a copy of the input Variant. @@ -230,23 +226,6 @@ public: { return this->getParent().cast(); } - - /** - * Returns the VariantTypes whose instances are proper input for building an - * instance of this type. - * More specifically: Every returned VariantType T should be such that: - * If a string s can be parsed according to T to a Variant v then the call - * build(v, logger) should only fail (return false) if the variant content - * does not adhere to the specific type specification. But it should be a - * properly typed input for build. - * The order of the types returned by this function determines the order in - * which a parser should try to interpret an input string s. - * - * @return the VariantTypes that arethe basis for parsing an instance of - *this - * type. - */ - virtual std::vector getVariantTypes() const = 0; }; /** @@ -287,16 +266,6 @@ public: * @return a variant containing an empty string. */ Variant create() const override { return Variant{""}; } - - /** - * Returns the String VariantType. - * - * @return the String VariantType. - */ - std::vector getVariantTypes() const override - { - return {VariantType::STRING}; - } }; /** @@ -336,16 +305,6 @@ public: * @return the integer value zero. */ Variant create() const override { return Variant{0}; } - - /** - * Returns the Int VariantType. - * - * @return the Int VariantType. - */ - std::vector getVariantTypes() const override - { - return {VariantType::INT}; - } }; /** @@ -385,16 +344,6 @@ public: * @return the double value zero. */ Variant create() const override { return Variant{0.0}; } - - /** - * Returns the Double VariantType. - * - * @return the Double VariantType. - */ - std::vector getVariantTypes() const override - { - return {VariantType::DOUBLE}; - } }; /** @@ -434,16 +383,6 @@ public: * @return a Variant with the boolean value false. */ Variant create() const override { return Variant{false}; } - - /** - * Returns the bool VariantType. - * - * @return the bool VariantType. - */ - std::vector getVariantTypes() const override - { - return {VariantType::BOOL}; - } }; /** @@ -560,16 +499,6 @@ public: * name. Throws a LoggableException if the string does not exist. */ Ordinal valueOf(const std::string &name) const; - - /** - * Returns the int and string VariantTypes. - * - * @return the int and string VariantTypes. - */ - std::vector getVariantTypes() const override - { - return {VariantType::INT, VariantType::STRING}; - } }; /** @@ -1005,15 +934,6 @@ public: * @return true if the requested attribute name exists, false otherwise. */ bool hasAttribute(const std::string &name) const; - /** - * Returns the array and map VariantTypes. - * - * @return the array and map VariantTypes. - */ - std::vector getVariantTypes() const override - { - return {VariantType::MAP}; - } }; /** @@ -1079,15 +999,6 @@ public: * @return Rooted reference pointing at the innerType. */ Rooted getInnerType() { return innerType; } - /** - * Returns the array VariantType. - * - * @return the array VariantType. - */ - std::vector getVariantTypes() const override - { - return {VariantType::ARRAY}; - } }; /** @@ -1126,20 +1037,6 @@ public: * @return a Variant instance with nullptr value. */ Variant create() const override; - /** - * Returns all parseable VariantTypes (bool, int, double, array, map, - *cardinality, object, string). - * - * @return all parseable VariantTypes (bool, int, double, array, map, - *cardinality, object, string). - */ - std::vector getVariantTypes() const override - { - return {VariantType::BOOL, VariantType::INT, - VariantType::DOUBLE, VariantType::ARRAY, - VariantType::MAP, VariantType::CARDINALITY, - VariantType::OBJECT, VariantType::STRING}; - } }; /** -- cgit v1.2.3 From 2aced1289813b3dae9c79f3c6878788343e3d997 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 14:56:29 +0100 Subject: Removed unneeded owner parameter from ParserScope --- src/core/parser/ParserScope.cpp | 5 ++--- src/core/parser/ParserScope.hpp | 4 +--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/ParserScope.cpp b/src/core/parser/ParserScope.cpp index 3929abf..ce3dc94 100644 --- a/src/core/parser/ParserScope.cpp +++ b/src/core/parser/ParserScope.cpp @@ -351,8 +351,7 @@ bool ParserScope::resolveType(const std::string &name, Handle owner, return resolveType(Utils::split(name, '.'), owner, logger, resultCallback); } -bool ParserScope::resolveValue(Variant &data, Handle type, - Handle owner, Logger &logger) +bool ParserScope::resolveValue(Variant &data, Handle type, Logger &logger) { return type->build( data, logger, @@ -408,7 +407,7 @@ bool ParserScope::resolveTypeWithValue(const std::vector &path, [=](Handle resolved, Handle owner, Logger &logger) mutable { if (resolved != nullptr) { Rooted type = resolved.cast(); - scope.resolveValue(*valuePtr, type, owner, logger); + scope.resolveValue(*valuePtr, type, logger); } // Call the result callback with the type diff --git a/src/core/parser/ParserScope.hpp b/src/core/parser/ParserScope.hpp index 58fc037..185b845 100644 --- a/src/core/parser/ParserScope.hpp +++ b/src/core/parser/ParserScope.hpp @@ -702,13 +702,11 @@ public: * (even in inner structures). The data will be passed to the "build" * function of the given type. * @param type is the Typesystem type the data should be interpreted with. - * @param owner is the node for which the resolution takes place. * @param logger is the logger instance into which resolution problems * should be logged. * @return true if the value was successfully built. */ - bool resolveValue(Variant &data, Handle type, Handle owner, - Logger &logger); + bool resolveValue(Variant &data, Handle type, Logger &logger); /** * Resolves a type and makes sure the corresponding value is of the correct -- cgit v1.2.3 From 69ebaddbeaea1aa651a0f0babbf9283240d9c07b Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 14:58:46 +0100 Subject: Slightly adapted Handler instances to new Handler, once again passing non-const references to data and start, using "parseGenericString" in DocumentHandler for resolving non-string values, added unit test for testing whether "end()" is not called if "start()" fails. --- src/core/parser/stack/DocumentHandler.cpp | 141 +++++++++++++++---------- src/core/parser/stack/DocumentHandler.hpp | 96 ++++++++++++++--- src/core/parser/stack/DomainHandler.cpp | 51 +++++---- src/core/parser/stack/DomainHandler.hpp | 28 +++-- src/core/parser/stack/Handler.cpp | 20 ++-- src/core/parser/stack/Handler.hpp | 31 +++--- src/core/parser/stack/ImportIncludeHandler.cpp | 54 ++-------- src/core/parser/stack/ImportIncludeHandler.hpp | 13 ++- src/core/parser/stack/Stack.cpp | 18 ++-- src/core/parser/stack/TypesystemHandler.cpp | 48 ++++----- src/core/parser/stack/TypesystemHandler.hpp | 131 +++++++++++++++++------ test/core/parser/stack/StackTest.cpp | 41 +++++-- 12 files changed, 422 insertions(+), 250 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp index ba7430d..b28f0fb 100644 --- a/src/core/parser/stack/DocumentHandler.cpp +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -22,22 +22,28 @@ #include #include +#include #include #include +#include #include #include +#include namespace ousia { +namespace parser_stack { /* DocumentHandler */ -void DocumentHandler::start(Variant::mapType &args) +bool DocumentHandler::start(Variant::mapType &args) { Rooted document = - project()->createDocument(args["name"].asString()); + context().getProject()->createDocument(args["name"].asString()); document->setLocation(location()); scope().push(document); scope().setFlag(ParserFlag::POST_HEAD, false); + + return true; } void DocumentHandler::end() { scope().pop(); } @@ -48,7 +54,7 @@ void DocumentChildHandler::preamble(Handle parentNode, std::string &fieldName, DocumentEntity *&parent, bool &inField) { - // check if the parent in the structure tree was an explicit field + // Check if the parent in the structure tree was an explicit field // reference. inField = parentNode->isa(&RttiTypes::DocumentField); if (inField) { @@ -56,10 +62,11 @@ void DocumentChildHandler::preamble(Handle parentNode, parentNode = scope().selectOrThrow( {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity}); } else { - // if it wasn't an explicit reference, we use the default field. + // If it wasn't an explicit reference, we use the default field. fieldName = DEFAULT_FIELD_NAME; } - // reference the parent entity explicitly. + + // Reference the parent entity explicitly. parent = nullptr; if (parentNode->isa(&RttiTypes::StructuredEntity)) { parent = static_cast( @@ -73,6 +80,8 @@ void DocumentChildHandler::preamble(Handle parentNode, void DocumentChildHandler::createPath(const NodeVector &path, DocumentEntity *&parent) { + // TODO (@benjamin): These should be pushed onto the scope and poped once + // the scope is left. Otherwise stuff may not be correclty resolved. size_t S = path.size(); for (size_t p = 1; p < S; p = p + 2) { parent = static_cast( @@ -82,7 +91,7 @@ void DocumentChildHandler::createPath(const NodeVector &path, } } -void DocumentChildHandler::start(Variant::mapType &args) +bool DocumentChildHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); Rooted parentNode = scope().selectOrThrow( @@ -95,7 +104,7 @@ void DocumentChildHandler::start(Variant::mapType &args) preamble(parentNode, fieldName, parent, inField); - // try to find a FieldDescriptor for the given tag if we are not in a + // Try to find a FieldDescriptor for the given tag if we are not in a // field already. This does _not_ try to construct transparent paths // in between. if (!inField && parent != nullptr && @@ -104,7 +113,7 @@ void DocumentChildHandler::start(Variant::mapType &args) new DocumentField(parentNode->getManager(), fieldName, parentNode)}; field->setLocation(location()); scope().push(field); - return; + return true; } // Otherwise create a new StructuredEntity @@ -147,27 +156,39 @@ void DocumentChildHandler::start(Variant::mapType &args) } entity->setLocation(location()); scope().push(entity); + return true; } void DocumentChildHandler::end() { scope().pop(); } -std::pair DocumentChildHandler::convertData( - Handle field, Logger &logger, const std::string &data) +bool DocumentChildHandler::convertData(Handle field, + Variant &data, Logger &logger) { - // if the content is supposed to be of type string, we can finish - // directly. - auto vts = field->getPrimitiveType()->getVariantTypes(); - if (std::find(vts.begin(), vts.end(), VariantType::STRING) != vts.end()) { - return std::make_pair(true, Variant::fromString(data)); + bool valid = true; + Rooted type = field->getPrimitiveType(); + + // If the content is supposed to be of type string, we only need to check + // for "magic" values -- otherwise just call the "parseGenericString" + // function on the string data + if (type->isa(&RttiTypes::StringType)) { + const std::string &str = data.asString(); + // TODO: Referencing constants with "." separator should also work + if (Utils::isIdentifier(str)) { + data.markAsMagic(); + } + } else { + // Parse the string as generic string, assign the result + auto res = VariantReader::parseGenericString( + data.asString(), logger, data.getLocation().getSourceId(), + data.getLocation().getStart()); + data = res.second; } - // then try to parse the content using the type specification. - auto res = field->getPrimitiveType()->read( - data, logger, location().getSourceId(), location().getStart()); - return res; + // Now try to resolve the value for the primitive type + return valid && scope().resolveValue(data, type, logger); } -void DocumentChildHandler::data(const std::string &data, int fieldIdx) +bool DocumentChildHandler::data(Variant &data) { Rooted parentNode = scope().selectOrThrow( {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity, @@ -180,11 +201,10 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx) preamble(parentNode, fieldName, parent, inField); Rooted desc = parent->getDescriptor(); - /* - * We distinguish two cases here: One for fields that are given. - */ + + // We distinguish two cases here: One for fields that are given. if (fieldName != DEFAULT_FIELD_NAME) { - // retrieve the actual FieldDescriptor + // Retrieve the actual FieldDescriptor Rooted field = desc->getFieldDescriptor(fieldName); if (field == nullptr) { logger().error( @@ -192,49 +212,57 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx) fieldName + "\" exists in descriptor\"" + desc->getName() + "\".", location()); - return; + return false; } - // if it is not primitive at all, we can't parse the content. + // If it is not primitive at all, we can't parse the content. if (!field->isPrimitive()) { logger().error(std::string("Can't handle data because field \"") + fieldName + "\" of descriptor \"" + desc->getName() + "\" is not primitive!", location()); - return; + return false; } - // then try to parse the content using the type specification. - auto res = convertData(field, logger(), data); - // add it as primitive content. - if (res.first) { - parent->createChildDocumentPrimitive(res.second, fieldName); + + // Try to convert the data variable to the correct format, abort if this + // does not work + if (!convertData(field, data, logger())) { + return false; } + + // Add it as primitive content + parent->createChildDocumentPrimitive(data, fieldName); + return true; } else { - /* - * The second case is for primitive fields. Here we search through - * all FieldDescriptors that allow primitive content at this point - * and could be constructed via transparent intermediate entities. - * We then try to parse the data using the type specified by the - * respective field. If that does not work we proceed to the next - * possible field. - */ - // retrieve all fields. + // The second case is for primitive fields. Here we search through + // all FieldDescriptors that allow primitive content at this point + // and could be constructed via transparent intermediate entities. + // We then try to parse the data using the type specified by the + // respective field. If that does not work we proceed to the next + // possible field. NodeVector fields = desc->getDefaultFields(); std::vector forks; for (auto field : fields) { - // then try to parse the content using the type specification. + // Then try to parse the content using the type specification forks.emplace_back(logger().fork()); - auto res = convertData(field, forks.back(), data); - if (res.first) { - forks.back().commit(); - // if that worked, construct the necessary path. - auto pathRes = desc->pathTo(field, logger()); - assert(pathRes.second); - NodeVector path = pathRes.first; - createPath(path, parent); - // then create the primitive element. - parent->createChildDocumentPrimitive(res.second, fieldName); - return; + + // Try to convert the data variable to the correct format, abort if + // this does not work + if (!convertData(field, data, forks.back())) { + return false; } + + // Show possible warnings that were emitted by this type conversion + forks.back().commit(); + + // If that worked, construct the necessary path + auto pathRes = desc->pathTo(field, logger()); + assert(pathRes.second); + NodeVector path = pathRes.first; + createPath(path, parent); + + // Then create the primitive element + parent->createChildDocumentPrimitive(data, fieldName); + return true; } logger().error("Could not read data with any of the possible fields:"); for (size_t f = 0; f < fields.size(); f++) { @@ -242,11 +270,14 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx) SourceLocation{}, MessageMode::NO_CONTEXT); forks[f].commit(); } + return false; } + return true; +} } namespace RttiTypes { -const Rtti DocumentField = - RttiBuilder("DocumentField").parent(&Node); +const Rtti DocumentField = RttiBuilder( + "DocumentField").parent(&Node); } } diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp index 475fe69..7dc4c86 100644 --- a/src/core/parser/stack/DocumentHandler.hpp +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -19,13 +19,19 @@ /** * @file DocumentHandler.hpp * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + * Contains the Handler instances used for parsing actual documents. This file + * declares to classes: The Document handler which parses the "document" command + * that introduces a new document and the "DocumentChildHandler" which parses + * the actual user defined tags. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) */ -#ifndef _OUSIA_DOCUMENT_HANDLER_HPP_ -#define _OUSIA_DOCUMENT_HANDLER_HPP_ +#ifndef _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ +#define _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ #include +#include #include "Handler.hpp" @@ -36,53 +42,117 @@ class Rtti; class DocumentEntity; class FieldDescriptor; +namespace parser_stack { +/** + * The DocumentHandler class parses the "document" tag that is used to introduce + * a new document. Note that this tag is not mandatory in osml files -- if the + * first command is not a typesystem, domain or any other declarative command, + * the DocumentHandler will be implicitly called. + */ class DocumentHandler : public StaticHandler { public: using StaticHandler::StaticHandler; bool start(Variant::mapType &args) override; - void end() override; + /** + * Creates a new instance of the ImportHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new DocumentHandler{handlerData}; } }; +/** + * Temporary Node that is being pushed onto the ParserScope in order to indicate + * the field the parser is currently in. The name of the Node is stored in the + * "name" field of the parent Node class. + */ class DocumentField : public Node { public: using Node::Node; }; +/** + * The DocumentChildHandler class performs the actual parsing of the user + * defined elements in an Ousía document. + */ class DocumentChildHandler : public StaticHandler { private: + /** + * Code shared by both the start() and the end() method. Checks whether the + * parser currently is in a field and returns the name of this field. + * + * @param parentNode is the next possible parent node (a document, + * a structured entity, an annotation entity or a field). + * @param fieldName is an output parameter to which the name of the current + * field is written (or unchanged if we're not in a field). + * @param parent is an output parameter to which the parent document entity + * will be written. + * @param inField is set to true if we actually are in a field. + */ void preamble(Handle parentNode, std::string &fieldName, DocumentEntity *&parent, bool &inField); + /** + * Constructs all structured entites along the given path and inserts them + * into the document graph. + * + * @param path is a path containing an alternating series of structured + * classes and fields. + * @pram parent is the root entity from which the process should be started. + */ void createPath(const NodeVector &path, DocumentEntity *&parent); - std::pair convertData(Handle field, - Logger &logger, - const std::string &data); + /** + * Tries to convert the given data to the type that is specified in the + * given primitive field. + * + * @param field is the primitive field for which the data is intended. + * @param data is the is the data that should be converted, the result is + * written into this argument as output variable. + * @param logger is the Logger instance to which error messages should be + * written. Needed to allow the convertData function to write to a forked + * Logger instance. + * @return true if the operation was successful, false otherwise. + */ + bool convertData(Handle field, Variant &data, + Logger &logger); public: - using Handler::Handler; + using StaticHandler::StaticHandler; bool start(Variant::mapType &args) override; - void end() override; - - bool data(const Variant &data) override; - + bool data(Variant &data) override; + + /** + * Creates a new instance of the DocumentChildHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new DocumentChildHandler{handlerData}; } }; +} namespace RttiTypes { +/** + * RttiType for the internally used DocumentField class. + */ extern const Rtti DocumentField; } } -#endif + +#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */ + diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp index 6571717..cb12543 100644 --- a/src/core/parser/stack/DomainHandler.cpp +++ b/src/core/parser/stack/DomainHandler.cpp @@ -20,25 +20,30 @@ #include #include +#include #include +#include namespace ousia { +namespace parser_stack { /* DomainHandler */ -void DomainHandler::start(Variant::mapType &args) +bool DomainHandler::start(Variant::mapType &args) { - Rooted domain = project()->createDomain(args["name"].asString()); + Rooted domain = + context().getProject()->createDomain(args["name"].asString()); domain->setLocation(location()); scope().push(domain); + return true; } void DomainHandler::end() { scope().pop(); } /* DomainStructHandler */ -void DomainStructHandler::start(Variant::mapType &args) +bool DomainStructHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -63,12 +68,13 @@ void DomainStructHandler::start(Variant::mapType &args) } scope().push(structuredClass); + return true; } void DomainStructHandler::end() { scope().pop(); } /* DomainAnnotationHandler */ -void DomainAnnotationHandler::start(Variant::mapType &args) +bool DomainAnnotationHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -79,13 +85,14 @@ void DomainAnnotationHandler::start(Variant::mapType &args) annotationClass->setLocation(location()); scope().push(annotationClass); + return true; } void DomainAnnotationHandler::end() { scope().pop(); } /* DomainAttributesHandler */ -void DomainAttributesHandler::start(Variant::mapType &args) +bool DomainAttributesHandler::start(Variant::mapType &args) { // Fetch the current typesystem and create the struct node Rooted parent = scope().selectOrThrow(); @@ -94,13 +101,14 @@ void DomainAttributesHandler::start(Variant::mapType &args) attrDesc->setLocation(location()); scope().push(attrDesc); + return true; } void DomainAttributesHandler::end() { scope().pop(); } /* DomainFieldHandler */ -void DomainFieldHandler::start(Variant::mapType &args) +bool DomainFieldHandler::start(Variant::mapType &args) { FieldDescriptor::FieldType type; if (args["isSubtree"].asBool()) { @@ -116,13 +124,14 @@ void DomainFieldHandler::start(Variant::mapType &args) field->setLocation(location()); scope().push(field); + return true; } void DomainFieldHandler::end() { scope().pop(); } /* DomainFieldRefHandler */ -void DomainFieldRefHandler::start(Variant::mapType &args) +bool DomainFieldRefHandler::start(Variant::mapType &args) { Rooted parent = scope().selectOrThrow(); @@ -135,13 +144,14 @@ void DomainFieldRefHandler::start(Variant::mapType &args) field.cast(), logger); } }); + return true; } void DomainFieldRefHandler::end() {} /* DomainPrimitiveHandler */ -void DomainPrimitiveHandler::start(Variant::mapType &args) +bool DomainPrimitiveHandler::start(Variant::mapType &args) { Rooted parent = scope().selectOrThrow(); @@ -167,13 +177,14 @@ void DomainPrimitiveHandler::start(Variant::mapType &args) }); scope().push(field); + return true; } void DomainPrimitiveHandler::end() { scope().pop(); } /* DomainChildHandler */ -void DomainChildHandler::start(Variant::mapType &args) +bool DomainChildHandler::start(Variant::mapType &args) { Rooted field = scope().selectOrThrow(); @@ -186,13 +197,12 @@ void DomainChildHandler::start(Variant::mapType &args) child.cast()); } }); + return true; } -void DomainChildHandler::end() {} - /* DomainParentHandler */ -void DomainParentHandler::start(Variant::mapType &args) +bool DomainParentHandler::start(Variant::mapType &args) { Rooted strct = scope().selectOrThrow(); @@ -200,12 +210,14 @@ void DomainParentHandler::start(Variant::mapType &args) new DomainParent(strct->getManager(), args["ref"].asString(), strct)}; parent->setLocation(location()); scope().push(parent); + return true; } void DomainParentHandler::end() { scope().pop(); } /* DomainParentFieldHandler */ -void DomainParentFieldHandler::start(Variant::mapType &args) + +bool DomainParentFieldHandler::start(Variant::mapType &args) { Rooted parentNameNode = scope().selectOrThrow(); FieldDescriptor::FieldType type; @@ -233,13 +245,12 @@ void DomainParentFieldHandler::start(Variant::mapType &args) field->addChild(strct.cast()); } }); + return true; } -void DomainParentFieldHandler::end() {} - /* DomainParentFieldRefHandler */ -void DomainParentFieldRefHandler::start(Variant::mapType &args) +bool DomainParentFieldRefHandler::start(Variant::mapType &args) { Rooted parentNameNode = scope().selectOrThrow(); @@ -265,12 +276,12 @@ void DomainParentFieldRefHandler::start(Variant::mapType &args) field->addChild(strct.cast()); } }); + return true; +} } - -void DomainParentFieldRefHandler::end() {} namespace RttiTypes { -const Rtti DomainParent = - RttiBuilder("DomainParent").parent(&Node); +const Rtti DomainParent = RttiBuilder( + "DomainParent").parent(&Node); } } diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp index 5e8ea60..917d65d 100644 --- a/src/core/parser/stack/DomainHandler.hpp +++ b/src/core/parser/stack/DomainHandler.hpp @@ -19,17 +19,24 @@ /** * @file DomainHandler.hpp * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + * Contains the Handler classes used for parsing Domain descriptors. This + * includes the "domain" tag and all describing tags below the "domain" tag. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) */ #ifndef _OUSIA_DOMAIN_HANDLER_HPP_ #define _OUSIA_DOMAIN_HANDLER_HPP_ #include +#include #include "Handler.hpp" namespace ousia { +namespace parser_stack { + +// TODO: Documentation // Forward declarations class Rtti; @@ -39,7 +46,6 @@ public: using StaticHandler::StaticHandler; bool start(Variant::mapType &args) override; - void end() override; static Handler *create(const HandlerData &handlerData) @@ -53,7 +59,6 @@ public: using StaticHandler::StaticHandler; bool start(Variant::mapType &args) override; - void end() override; static Handler *create(const HandlerData &handlerData) @@ -67,7 +72,6 @@ public: using StaticHandler::StaticHandler; bool start(Variant::mapType &args) override; - void end() override; static Handler *create(const HandlerData &handlerData) @@ -81,7 +85,6 @@ public: using StaticHandler::StaticHandler; bool start(Variant::mapType &args) override; - void end() override; static Handler *create(const HandlerData &handlerData) @@ -95,7 +98,6 @@ public: using StaticHandler::StaticHandler; bool start(Variant::mapType &args) override; - void end() override; static Handler *create(const HandlerData &handlerData) @@ -109,7 +111,6 @@ public: using StaticHandler::StaticHandler; bool start(Variant::mapType &args) override; - void end() override; static Handler *create(const HandlerData &handlerData) @@ -123,7 +124,6 @@ public: using StaticHandler::StaticHandler; bool start(Variant::mapType &args) override; - void end() override; static Handler *create(const HandlerData &handlerData) @@ -138,8 +138,6 @@ public: bool start(Variant::mapType &args) override; - void end() override; - static Handler *create(const HandlerData &handlerData) { return new DomainChildHandler{handlerData}; @@ -160,7 +158,6 @@ public: using StaticHandler::StaticHandler; bool start(Variant::mapType &args) override; - void end() override; static Handler *create(const HandlerData &handlerData) @@ -175,8 +172,6 @@ public: bool start(Variant::mapType &args) override; - void end() override; - static Handler *create(const HandlerData &handlerData) { return new DomainParentFieldHandler{handlerData}; @@ -189,12 +184,15 @@ public: bool start(Variant::mapType &args) override; - void end() override; - static Handler *create(const HandlerData &handlerData) { return new DomainParentFieldRefHandler{handlerData}; } }; } + +namespace RttiTypes { +extern const Rtti DomainParent; +} +} #endif diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index a608f7f..86000c4 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -65,6 +65,8 @@ Logger &Handler::logger() const SourceLocation &Handler::location() const { return handlerData.location; } +const std::string &Handler::name() const { return handlerData.name; } + void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode) { /*handlerData.callbacks.setWhitespaceMode(whitespaceMode);*/ @@ -80,7 +82,7 @@ void Handler::unregisterToken(const std::string &token) /*handlerData.callbacks.unregisterToken(token);*/ } -const std::string &Handler::getName() const { return handlerData.name; } +const std::string &Handler::getName() const { return name(); } const State &Handler::getState() const { return handlerData.state; } @@ -92,7 +94,7 @@ const SourceLocation &Handler::getLocation() const { return location(); } /* Class EmptyHandler */ -bool EmptyHandler::start(const Variant::mapType &args) +bool EmptyHandler::start(Variant::mapType &args) { // Just accept anything return true; @@ -115,7 +117,7 @@ void EmptyHandler::fieldEnd() } bool EmptyHandler::annotationStart(const Variant &className, - const Variant::mapType &args) + Variant::mapType &args) { // Accept any data return true; @@ -128,7 +130,7 @@ bool EmptyHandler::annotationEnd(const Variant &className, return true; } -bool EmptyHandler::data(const Variant &data) +bool EmptyHandler::data(Variant &data) { // Support any data return true; @@ -141,7 +143,7 @@ Handler *EmptyHandler::create(const HandlerData &handlerData) /* Class StaticHandler */ -bool StaticHandler::start(const Variant::mapType &args) +bool StaticHandler::start(Variant::mapType &args) { // Do nothing in the default implementation, accept anything return true; @@ -169,7 +171,7 @@ void StaticHandler::fieldEnd() } bool StaticHandler::annotationStart(const Variant &className, - const Variant::mapType &args) + Variant::mapType &args) { // No annotations supported return false; @@ -182,7 +184,7 @@ bool StaticHandler::annotationEnd(const Variant &className, return false; } -bool StaticHandler::data(const Variant &data) +bool StaticHandler::data(Variant &data) { logger().error("Did not expect any data here", data); return false; @@ -196,7 +198,7 @@ StaticFieldHandler::StaticFieldHandler(const HandlerData &handlerData, { } -bool StaticFieldHandler::start(const Variant::mapType &args) +bool StaticFieldHandler::start(Variant::mapType &args) { if (!argName.empty()) { auto it = args.find(argName); @@ -225,7 +227,7 @@ void StaticFieldHandler::end() } } -bool StaticFieldHandler::data(const Variant &data) +bool StaticFieldHandler::data(Variant &data) { // Call the doHandle function if this has not been done before if (!handled) { diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp index eeaf555..7cda7a4 100644 --- a/src/core/parser/stack/Handler.hpp +++ b/src/core/parser/stack/Handler.hpp @@ -151,6 +151,13 @@ protected: */ const SourceLocation &location() const; + /** + * Returns the command name for which the handler was created. + * + * @return a const reference at the command name. + */ + const std::string &name() const; + public: /** * Virtual destructor. @@ -229,7 +236,7 @@ public: * @return true if the handler was successful in starting the element it * represents, false otherwise. */ - virtual bool start(const Variant::mapType &args) = 0; + virtual bool start(Variant::mapType &args) = 0; /** * Called before the command for which this handler is defined ends (is @@ -270,7 +277,7 @@ public: * if an error occurred. */ virtual bool annotationStart(const Variant &className, - const Variant::mapType &args) = 0; + Variant::mapType &args) = 0; /** * Called whenever an annotation ends while this handler is active. The @@ -296,7 +303,7 @@ public: * location. * @return true if the data could be handled, false otherwise. */ - virtual bool data(const Variant &data) = 0; + virtual bool data(Variant &data) = 0; }; /** @@ -318,15 +325,15 @@ protected: using Handler::Handler; public: - bool start(const Variant::mapType &args) override; + bool start(Variant::mapType &args) override; void end() override; bool fieldStart(bool &isDefault, size_t fieldIdx) override; void fieldEnd() override; bool annotationStart(const Variant &className, - const Variant::mapType &args) override; + Variant::mapType &args) override; bool annotationEnd(const Variant &className, const Variant &elementName) override; - bool data(const Variant &data) override; + bool data(Variant &data) override; /** * Creates an instance of the EmptyHandler class. @@ -344,15 +351,15 @@ protected: using Handler::Handler; public: - bool start(const Variant::mapType &args) override; + bool start(Variant::mapType &args) override; void end() override; bool fieldStart(bool &isDefault, size_t fieldIdx) override; void fieldEnd() override; bool annotationStart(const Variant &className, - const Variant::mapType &args) override; + Variant::mapType &args) override; bool annotationEnd(const Variant &className, const Variant &elementName) override; - bool data(const Variant &data) override; + bool data(Variant &data) override; }; /** @@ -400,12 +407,12 @@ protected: * @param args are the arguments that were given in the "start" function. */ virtual void doHandle(const Variant &fieldData, - const Variant::mapType &args) = 0; + Variant::mapType &args) = 0; public: - bool start(const Variant::mapType &args) override; + bool start(Variant::mapType &args) override; void end() override; - bool data(const Variant &data) override; + bool data(Variant &data) override; }; } } diff --git a/src/core/parser/stack/ImportIncludeHandler.cpp b/src/core/parser/stack/ImportIncludeHandler.cpp index 94ee82d..797dd8d 100644 --- a/src/core/parser/stack/ImportIncludeHandler.cpp +++ b/src/core/parser/stack/ImportIncludeHandler.cpp @@ -18,48 +18,16 @@ #include "ImportIncludeHandler.hpp" +#include #include +#include namespace ousia { - -/* ImportIncludeHandler */ - -void ImportIncludeHandler::start(Variant::mapType &args) -{ - rel = args["rel"].asString(); - type = args["type"].asString(); - src = args["src"].asString(); - srcInArgs = !src.empty(); -} - -void ImportIncludeHandler::data(const std::string &data, int field) -{ - if (srcInArgs) { - logger().error("\"src\" attribute has already been set"); - return; - } - if (field != 0) { - logger().error("Command has only one field."); - return; - } - src.append(data); -} +namespace parser_stack { /* ImportHandler */ -void ImportHandler::start(Variant::mapType &args) -{ - ImportIncludeHandler::start(args); - - // Make sure imports are still possible - if (scope().getFlag(ParserFlag::POST_HEAD)) { - logger().error("Imports must be listed before other commands.", - location()); - return; - } -} - -void ImportHandler::end() +void ImportHandler::doHandle(const Variant &fieldData, Variant::mapType &args) { // Fetch the last node and check whether an import is valid at this // position @@ -75,8 +43,9 @@ void ImportHandler::end() // Perform the actual import, register the imported node within the leaf // node - Rooted imported = - context().import(src, type, rel, leafRootNode->getReferenceTypes()); + Rooted imported = context().import( + fieldData.asString(), args["type"].asString(), args["rel"].asString(), + leafRootNode->getReferenceTypes()); if (imported != nullptr) { leafRootNode->reference(imported); } @@ -84,13 +53,10 @@ void ImportHandler::end() /* IncludeHandler */ -void IncludeHandler::start(Variant::mapType &args) +void IncludeHandler::doHandle(const Variant &fieldData, Variant::mapType &args) { - ImportIncludeHandler::start(args); + context().include(fieldData.asString(), args["type"].asString(), + args["rel"].asString(), {&RttiTypes::Node}); } - -void IncludeHandler::end() -{ - context().include(src, type, rel, {&RttiTypes::Node}); } } diff --git a/src/core/parser/stack/ImportIncludeHandler.hpp b/src/core/parser/stack/ImportIncludeHandler.hpp index f9abe55..8f3d3d0 100644 --- a/src/core/parser/stack/ImportIncludeHandler.hpp +++ b/src/core/parser/stack/ImportIncludeHandler.hpp @@ -29,9 +29,11 @@ #define _OUSIA_IMPORT_INCLUDE_HANDLER_HPP_ #include -#include + +#include "Handler.hpp" namespace ousia { +namespace parser_stack { /** * The ImportHandler is responsible for handling the "import" command. An import @@ -46,7 +48,7 @@ public: using StaticFieldHandler::StaticFieldHandler; void doHandle(const Variant &fieldData, - const Variant::mapType &args) override; + Variant::mapType &args) override; /** * Creates a new instance of the ImportHandler. @@ -57,7 +59,7 @@ public: */ static Handler *create(const HandlerData &handlerData) { - return new ImportHandler{handlerData}; + return new ImportHandler{handlerData, "src"}; } }; @@ -72,7 +74,7 @@ public: using StaticFieldHandler::StaticFieldHandler; void doHandle(const Variant &fieldData, - const Variant::mapType &args) override; + Variant::mapType &args) override; /** * Creates a new instance of the IncludeHandler. @@ -83,8 +85,9 @@ public: */ static Handler *create(const HandlerData &handlerData) { - return new IncludeHandler{handlerData}; + return new IncludeHandler{handlerData, "src"}; } }; } +} #endif diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp index d84a19c..47f7d2c 100644 --- a/src/core/parser/stack/Stack.cpp +++ b/src/core/parser/stack/Stack.cpp @@ -316,8 +316,6 @@ void Stack::command(const Variant &name, const Variant::mapType &args) name); } - State const *lastTargetState = nullptr; - Variant::mapType canonicalArgs; while (true) { // Try to find a target state for the given command, if none can be // found and the current command does not have an open field, then try @@ -342,14 +340,6 @@ void Stack::command(const Variant &name, const Variant::mapType &args) // Fork the logger. We do not want any validation errors to skip LoggerFork loggerFork = logger().fork(); - // Canonicalize the arguments (if this has not already been done), allow - // additional arguments - if (lastTargetState != targetState) { - canonicalArgs = args; - targetState->arguments.validateMap(canonicalArgs, loggerFork, true); - lastTargetState = targetState; - } - // Instantiate the handler and push it onto the stack HandlerConstructor ctor = targetState->elementHandler ? targetState->elementHandler @@ -369,6 +359,11 @@ void Stack::command(const Variant &name, const Variant::mapType &args) bool validStack = handlersValid(); info.valid = false; if (validStack) { + // Canonicalize the arguments (if this has not already been done), + // allow additional arguments + Variant::mapType canonicalArgs = args; + targetState->arguments.validateMap(canonicalArgs, loggerFork, true); + handler->setLogger(loggerFork); try { info.valid = handler->start(canonicalArgs); @@ -430,7 +425,8 @@ void Stack::data(const Variant &data) // Pass the data to the current Handler instance bool valid = false; try { - valid = info.handler->data(data); + Variant dataCopy = data; + valid = info.handler->data(dataCopy); } catch (LoggableException ex) { loggerFork.log(ex); diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp index 2cc7dfb..34f64f9 100644 --- a/src/core/parser/stack/TypesystemHandler.cpp +++ b/src/core/parser/stack/TypesystemHandler.cpp @@ -20,28 +20,33 @@ #include #include +#include + namespace ousia { +namespace parser_stack { /* TypesystemHandler */ -void TypesystemHandler::start(Variant::mapType &args) +bool TypesystemHandler::start(Variant::mapType &args) { // Create the typesystem instance Rooted typesystem = - project()->createTypesystem(args["name"].asString()); + context().getProject()->createTypesystem(args["name"].asString()); typesystem->setLocation(location()); // Push the typesystem onto the scope, set the POST_HEAD flag to true scope().push(typesystem); scope().setFlag(ParserFlag::POST_HEAD, false); + + return true; } void TypesystemHandler::end() { scope().pop(); } /* TypesystemEnumHandler */ -void TypesystemEnumHandler::start(Variant::mapType &args) +bool TypesystemEnumHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -52,33 +57,24 @@ void TypesystemEnumHandler::start(Variant::mapType &args) enumType->setLocation(location()); scope().push(enumType); + + return true; } void TypesystemEnumHandler::end() { scope().pop(); } /* TypesystemEnumEntryHandler */ -void TypesystemEnumEntryHandler::start(Variant::mapType &args) {} - -void TypesystemEnumEntryHandler::end() +void TypesystemEnumEntryHandler::doHandle(const Variant &fieldData, + Variant::mapType &args) { Rooted enumType = scope().selectOrThrow(); - enumType->addEntry(entry, logger()); -} - -void TypesystemEnumEntryHandler::data(const std::string &data, int field) -{ - if (field != 0) { - // TODO: This should be stored in the HandlerData - logger().error("Enum entry only has one field."); - return; - } - entry.append(data); + enumType->addEntry(fieldData.asString(), logger()); } /* TypesystemStructHandler */ -void TypesystemStructHandler::start(Variant::mapType &args) +bool TypesystemStructHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -103,13 +99,15 @@ void TypesystemStructHandler::start(Variant::mapType &args) }); } scope().push(structType); + + return true; } void TypesystemStructHandler::end() { scope().pop(); } /* TypesystemStructFieldHandler */ -void TypesystemStructFieldHandler::start(Variant::mapType &args) +bool TypesystemStructFieldHandler::start(Variant::mapType &args) { // Read the argument values const std::string &name = args["name"].asString(); @@ -142,13 +140,13 @@ void TypesystemStructFieldHandler::start(Variant::mapType &args) } }); } -} -void TypesystemStructFieldHandler::end() {} + return true; +} /* TypesystemConstantHandler */ -void TypesystemConstantHandler::start(Variant::mapType &args) +bool TypesystemConstantHandler::start(Variant::mapType &args) { scope().setFlag(ParserFlag::POST_HEAD, true); @@ -169,7 +167,9 @@ void TypesystemConstantHandler::start(Variant::mapType &args) constant.cast()->setType(type.cast(), logger); } }); -} -void TypesystemConstantHandler::end() {} + return true; } +} +} + diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp index 76a7bc9..55277a1 100644 --- a/src/core/parser/stack/TypesystemHandler.hpp +++ b/src/core/parser/stack/TypesystemHandler.hpp @@ -19,6 +19,9 @@ /** * @file TypesystemHandler.hpp * + * Contains the Handler classes used to parse Typesystem descriptions. The + * Handlers parse all the tags found below and including the "typesystem" tag. + * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ @@ -26,96 +29,154 @@ #define _OUSIA_TYPESYSTEM_HANDLER_HPP_ #include -#include + +#include "Handler.hpp" namespace ousia { +namespace parser_stack { -class TypesystemHandler : public Handler { +/** + * Handles the occurance of the "typesystem" tag. Creates a new Typesystem + * instance and places it on the ParserScope. + */ +class TypesystemHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the TypesystemHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemHandler{handlerData}; } }; -class TypesystemEnumHandler : public Handler { +/** + * Handles the occurance of the "enum" tag. Creates a new EnumType instance and + * places it on the ParserScope. + */ +class TypesystemEnumHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the TypesystemEnumHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemEnumHandler{handlerData}; } }; -class TypesystemEnumEntryHandler : public Handler { +/** + * Handles the occurance of the "entry" tag within an "enum" tag. Creates a new + * EnumType instance and places it on the ParserScope. + */ +class TypesystemEnumEntryHandler : public StaticFieldHandler { public: - using Handler::Handler; - - std::string entry; - - void start(Variant::mapType &args) override; - - void end() override; - - void data(const std::string &data, int field) override; - + using StaticFieldHandler::StaticFieldHandler; + + void doHandle(const Variant &fieldData, + Variant::mapType &args) override; + + /** + * Creates a new instance of the TypesystemEnumEntryHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { - return new TypesystemEnumEntryHandler{handlerData}; + return new TypesystemEnumEntryHandler{handlerData, "name"}; } }; -class TypesystemStructHandler : public Handler { +/** + * Handles the occurance of the "struct" tag within a typesystem description. + * Creates a new StructType instance and places it on the ParserScope. + */ +class TypesystemStructHandler : public StaticHandler { public: - using Handler::Handler; - - void start(Variant::mapType &args) override; + using StaticHandler::StaticHandler; + bool start(Variant::mapType &args) override; void end() override; + /** + * Creates a new instance of the TypesystemStructHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemStructHandler{handlerData}; } }; -class TypesystemStructFieldHandler : public Handler { +/** + * Handles the occurance of the "field" tag within a typesystem structure + * description. Places a new Attribute instance in the StructType instance + * that is currently at the top of the scope. + */ +class TypesystemStructFieldHandler : public StaticHandler { public: - using Handler::Handler; + using StaticHandler::StaticHandler; - void start(Variant::mapType &args) override; - - void end() override; + bool start(Variant::mapType &args) override; + /** + * Creates a new instance of the TypesystemStructFieldHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemStructFieldHandler{handlerData}; } }; -class TypesystemConstantHandler : public Handler { +/** + * Handles the occurance of the "constant" tag within a typesystem structure + * description. Places a new Constant instance in the current typesystem. + */ +class TypesystemConstantHandler : public StaticHandler { public: - using Handler::Handler; + using StaticHandler::StaticHandler; - void start(Variant::mapType &args) override; - - void end() override; + bool start(Variant::mapType &args) override; + /** + * Creates a new instance of the TypesystemConstantHandler. + * + * @param handlerData is the data that is passed to the constructor of the + * Handler base class and used there to e.g. access the ParserContext and + * the Callbacks instance. + */ static Handler *create(const HandlerData &handlerData) { return new TypesystemConstantHandler{handlerData}; } }; } +} #endif diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp index 7cc8bc5..321d471 100644 --- a/test/core/parser/stack/StackTest.cpp +++ b/test/core/parser/stack/StackTest.cpp @@ -112,16 +112,21 @@ private: TestHandler(const HandlerData &handlerData) : Handler(handlerData) {} public: - bool start(const Variant::mapType &args) + bool start(Variant::mapType &args) override { tracker.startCount++; tracker.startArgs = args; + if (!tracker.startResult) { + logger().error( + "The TestHandler was told not to allow a field start. So it " + "doesn't. The TestHandler always obeys its master."); + } return tracker.startResult; } - void end() { tracker.endCount++; } + void end() override { tracker.endCount++; } - bool fieldStart(bool &isDefault, size_t fieldIdx) + bool fieldStart(bool &isDefault, size_t fieldIdx) override { tracker.fieldStartCount++; tracker.fieldStartIsDefault = isDefault; @@ -132,9 +137,10 @@ public: return tracker.fieldStartResult; } - void fieldEnd() { tracker.fieldEndCount++; } + void fieldEnd() override { tracker.fieldEndCount++; } - bool annotationStart(const Variant &className, const Variant::mapType &args) + bool annotationStart(const Variant &className, + Variant::mapType &args) override { tracker.annotationStartCount++; tracker.annotationStartClassName = className; @@ -142,7 +148,8 @@ public: return tracker.annotationStartResult; } - bool annotationEnd(const Variant &className, const Variant &elementName) + bool annotationEnd(const Variant &className, + const Variant &elementName) override { tracker.annotationEndCount++; tracker.annotationEndClassName = className; @@ -150,7 +157,7 @@ public: return tracker.annotationEndResult; } - bool data(const Variant &data) + bool data(Variant &data) override { tracker.dataCount++; tracker.dataData = data; @@ -458,6 +465,26 @@ TEST(Stack, noImplicitDefaultFieldIfDefaultFieldGiven) ASSERT_FALSE(logger.hasError()); } +TEST(Stack, noEndIfStartFails) +{ + tracker.reset(); + logger.reset(); + { + Stack s{env.context, States::AnyHandlers}; + + s.command("a", {}); + tracker.expect(1, 0, 0, 0, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("a", s.currentCommandName()); + + tracker.startResult = false; + s.command("b", {}); + tracker.expect(3, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_EQ("b", s.currentCommandName()); + } + tracker.expect(3, 1, 1, 1, 0, 0, 0); // sc, ec, fsc, fse, asc, aec, dc + ASSERT_TRUE(logger.hasError()); +} + TEST(Stack, implicitDefaultFieldOnData) { tracker.reset(); -- cgit v1.2.3 From 7907e9407f499354e0c3a0a402217b760fab9ad7 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 20:55:30 +0100 Subject: Displaying error messages at the location of the argument --- src/core/common/Argument.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/core') diff --git a/src/core/common/Argument.cpp b/src/core/common/Argument.cpp index bfe74a4..b10fad3 100644 --- a/src/core/common/Argument.cpp +++ b/src/core/common/Argument.cpp @@ -302,10 +302,10 @@ bool Arguments::validateMap(Variant::mapType &map, Logger &logger, } else { if (ignoreUnknown) { logger.note(std::string("Ignoring argument \"") + e.first + - std::string("\"")); + std::string("\""), e.second); } else { logger.error(std::string("Unknown argument \"") + e.first + - std::string("\"")); + std::string("\""), e.second); ok = false; } } -- cgit v1.2.3 From e2e32eef55406519c744002a404e7e5ca66b29a1 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 20:57:27 +0100 Subject: Declaring States in the Handler classes --- CMakeLists.txt | 1 + src/core/parser/stack/DocumentHandler.cpp | 21 +++++- src/core/parser/stack/DocumentHandler.hpp | 14 ++++ src/core/parser/stack/DomainHandler.cpp | 100 ++++++++++++++++++++++++- src/core/parser/stack/DomainHandler.hpp | 73 ++++++++++++++++-- src/core/parser/stack/GenericParserStates.cpp | 53 +++++++++++++ src/core/parser/stack/GenericParserStates.hpp | 49 ++++++++++++ src/core/parser/stack/ImportIncludeHandler.cpp | 24 +++++- src/core/parser/stack/ImportIncludeHandler.hpp | 13 ++++ src/core/parser/stack/TypesystemHandler.cpp | 48 +++++++++++- src/core/parser/stack/TypesystemHandler.hpp | 30 +++++++- 11 files changed, 409 insertions(+), 17 deletions(-) create mode 100644 src/core/parser/stack/GenericParserStates.cpp create mode 100644 src/core/parser/stack/GenericParserStates.hpp (limited to 'src/core') diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a3db32..2106cf0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,6 +165,7 @@ ADD_LIBRARY(ousia_core src/core/parser/stack/Callbacks src/core/parser/stack/DocumentHandler src/core/parser/stack/DomainHandler + src/core/parser/stack/GenericParserStates src/core/parser/stack/Handler src/core/parser/stack/ImportIncludeHandler src/core/parser/stack/State diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp index b28f0fb..9fedabb 100644 --- a/src/core/parser/stack/DocumentHandler.cpp +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -16,8 +16,6 @@ along with this program. If not, see . */ -#include "DocumentHandler.hpp" - #include #include @@ -30,6 +28,9 @@ #include #include +#include "DocumentHandler.hpp" +#include "State.hpp" + namespace ousia { namespace parser_stack { @@ -274,6 +275,22 @@ bool DocumentChildHandler::data(Variant &data) } return true; } + +namespace States { +const State Document = StateBuilder() + .parent(&None) + .createdNodeType(&RttiTypes::Document) + .elementHandler(DocumentHandler::create) + .arguments({Argument::String("name", "")}); + +const State DocumentChild = + StateBuilder() + .parents({&Document, &DocumentChild}) + .createdNodeTypes({&RttiTypes::StructureNode, + &RttiTypes::AnnotationEntity, + &RttiTypes::DocumentField}) + .elementHandler(DocumentChildHandler::create); +} } namespace RttiTypes { diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp index 7dc4c86..b339b96 100644 --- a/src/core/parser/stack/DocumentHandler.hpp +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -144,6 +144,19 @@ public: return new DocumentChildHandler{handlerData}; } }; + +namespace States { +/** + * State constant representing the "document" tag. + */ +extern const State Document; + +/** + * State contstant representing any user-defined element within a document. + */ +extern const State DocumentChild; +} + } namespace RttiTypes { @@ -152,6 +165,7 @@ namespace RttiTypes { */ extern const Rtti DocumentField; } + } #endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */ diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp index cb12543..24a6f1a 100644 --- a/src/core/parser/stack/DomainHandler.cpp +++ b/src/core/parser/stack/DomainHandler.cpp @@ -16,14 +16,18 @@ along with this program. If not, see . */ -#include "DomainHandler.hpp" - #include +#include #include #include #include #include +#include "DocumentHandler.hpp" +#include "DomainHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" + namespace ousia { namespace parser_stack { @@ -278,6 +282,98 @@ bool DomainParentFieldRefHandler::start(Variant::mapType &args) }); return true; } + +namespace States { +const State Domain = StateBuilder() + .parents({&None, &Document}) + .createdNodeType(&RttiTypes::Domain) + .elementHandler(DomainHandler::create) + .arguments({Argument::String("name")}); + +const State DomainStruct = + StateBuilder() + .parent(&Domain) + .createdNodeType(&RttiTypes::StructuredClass) + .elementHandler(DomainStructHandler::create) + .arguments({Argument::String("name"), + Argument::Cardinality("cardinality", Cardinality::any()), + Argument::Bool("isRoot", false), + Argument::Bool("transparent", false), + Argument::String("isa", "")}); + +const State DomainAnnotation = + StateBuilder() + .parent(&Domain) + .createdNodeType(&RttiTypes::AnnotationClass) + .elementHandler(DomainAnnotationHandler::create) + .arguments({Argument::String("name")}); + +const State DomainAttributes = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::StructType) + .elementHandler(DomainAttributesHandler::create) + .arguments({}); + +const State DomainAttribute = + StateBuilder() + .parent(&DomainAttributes) + .elementHandler(TypesystemStructFieldHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("default", Variant::fromObject(nullptr))}); + +const State DomainField = StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainFieldHandler::create) + .arguments({Argument::String("name", ""), + Argument::Bool("isSubtree", false), + Argument::Bool("optional", false)}); + +const State DomainFieldRef = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainFieldRefHandler::create) + .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); + +const State DomainStructPrimitive = + StateBuilder() + .parents({&DomainStruct, &DomainAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainPrimitiveHandler::create) + .arguments( + {Argument::String("name", ""), Argument::Bool("isSubtree", false), + Argument::Bool("optional", false), Argument::String("type")}); + +const State DomainStructChild = StateBuilder() + .parent(&DomainField) + .elementHandler(DomainChildHandler::create) + .arguments({Argument::String("ref")}); + +const State DomainStructParent = + StateBuilder() + .parent(&DomainStruct) + .createdNodeType(&RttiTypes::DomainParent) + .elementHandler(DomainParentHandler::create) + .arguments({Argument::String("ref")}); + +const State DomainStructParentField = + StateBuilder() + .parent(&DomainStructParent) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainParentFieldHandler::create) + .arguments({Argument::String("name", ""), + Argument::Bool("isSubtree", false), + Argument::Bool("optional", false)}); + +const State DomainStructParentFieldRef = + StateBuilder() + .parent(&DomainStructParent) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(DomainParentFieldRefHandler::create) + .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); +} } namespace RttiTypes { diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp index 917d65d..76172d6 100644 --- a/src/core/parser/stack/DomainHandler.hpp +++ b/src/core/parser/stack/DomainHandler.hpp @@ -34,13 +34,14 @@ #include "Handler.hpp" namespace ousia { -namespace parser_stack { - -// TODO: Documentation // Forward declarations class Rtti; +namespace parser_stack { + +// TODO: Documentation + class DomainHandler : public StaticHandler { public: using StaticHandler::StaticHandler; @@ -149,10 +150,6 @@ public: using Node::Node; }; -namespace RttiTypes { -extern const Rtti DomainParent; -} - class DomainParentHandler : public StaticHandler { public: using StaticHandler::StaticHandler; @@ -189,6 +186,68 @@ public: return new DomainParentFieldRefHandler{handlerData}; } }; + +namespace States { +/** + * State representing a "domain" struct. + */ +extern const State Domain; + +/** + * State representing a "struct" tag within a domain description. + */ +extern const State DomainStruct; + +/** + * State representing an "annotation" tag within a domain description. + */ +extern const State DomainAnnotation; + +/** + * State representing an "attributes" tag within a structure or annotation. + */ +extern const State DomainAttributes; + +/** + * State representing an "attribute" tag within the "attributes". + */ +extern const State DomainAttribute; + +/** + * State representing a "field" tag within a structure or annotation. + */ +extern const State DomainField; + +/** + * State representing a "fieldref" tag within a structure or annotation. + */ +extern const State DomainFieldRef; + +/** + * State representing a "primitive" tag within a structure or annotation. + */ +extern const State DomainStructPrimitive; + +/** + * State representing a "child" tag within a structure or annotation. + */ +extern const State DomainStructChild; + +/** + * State representing a "parent" tag within a structure or annotation. + */ +extern const State DomainStructParent; + +/** + * State representing a "field" tag within a "parent" tag. + */ +extern const State DomainStructParentField; + +/** + * State representing a "fieldRef" tag within a "parent" tag. + */ +extern const State DomainStructParentFieldRef; +} } namespace RttiTypes { diff --git a/src/core/parser/stack/GenericParserStates.cpp b/src/core/parser/stack/GenericParserStates.cpp new file mode 100644 index 0000000..69a6e0e --- /dev/null +++ b/src/core/parser/stack/GenericParserStates.cpp @@ -0,0 +1,53 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "DocumentHandler.hpp" +#include "DomainHandler.hpp" +#include "GenericParserStates.hpp" +#include "ImportIncludeHandler.hpp" +#include "TypesystemHandler.hpp" + +namespace ousia { +namespace parser_stack { + +const std::multimap GenericParserStates{ + {"document", &States::Document}, + {"*", &States::DocumentChild}, + {"domain", &States::Domain}, + {"struct", &States::DomainStruct}, + {"annotation", &States::DomainAnnotation}, + {"attributes", &States::DomainAttributes}, + {"attribute", &States::DomainAttribute}, + {"field", &States::DomainField}, + {"fieldRef", &States::DomainFieldRef}, + {"primitive", &States::DomainStructPrimitive}, + {"childRef", &States::DomainStructChild}, + {"parentRef", &States::DomainStructParent}, + {"field", &States::DomainStructParentField}, + {"fieldRef", &States::DomainStructParentFieldRef}, + {"typesystem", &States::Typesystem}, + {"enum", &States::TypesystemEnum}, + {"entry", &States::TypesystemEnumEntry}, + {"struct", &States::TypesystemStruct}, + {"field", &States::TypesystemStructField}, + {"constant", &States::TypesystemConstant}, + {"import", &States::Import}, + {"include", &States::Include}}; +} +} + diff --git a/src/core/parser/stack/GenericParserStates.hpp b/src/core/parser/stack/GenericParserStates.hpp new file mode 100644 index 0000000..552eee5 --- /dev/null +++ b/src/core/parser/stack/GenericParserStates.hpp @@ -0,0 +1,49 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file GenericParserStates.hpp + * + * Contains a multimap which maps between tag/command names to the corresponding + * state descriptors. This multimap is used to initialize the push down + * automaton residing inside the "Stack" class. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ +#define _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ + +#include +#include + +namespace ousia { +namespace parser_stack { + +// Forward declarations +class State; + +/** + * Map between tagnames and references to the corresponding State instances. + */ +extern const std::multimap GenericParserStates; +} +} + +#endif /* _OUSIA_PARSER_STACK_GENERIC_PARSER_STATES_HPP_ */ + diff --git a/src/core/parser/stack/ImportIncludeHandler.cpp b/src/core/parser/stack/ImportIncludeHandler.cpp index 797dd8d..d1ea97d 100644 --- a/src/core/parser/stack/ImportIncludeHandler.cpp +++ b/src/core/parser/stack/ImportIncludeHandler.cpp @@ -16,12 +16,16 @@ along with this program. If not, see . */ -#include "ImportIncludeHandler.hpp" - #include #include #include +#include "DomainHandler.hpp" +#include "DocumentHandler.hpp" +#include "ImportIncludeHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" + namespace ousia { namespace parser_stack { @@ -58,5 +62,21 @@ void IncludeHandler::doHandle(const Variant &fieldData, Variant::mapType &args) context().include(fieldData.asString(), args["type"].asString(), args["rel"].asString(), {&RttiTypes::Node}); } + +namespace States { +const State Import = + StateBuilder() + .parents({&Document, &Typesystem, &Domain}) + .elementHandler(ImportHandler::create) + .arguments({Argument::String("rel", ""), Argument::String("type", ""), + Argument::String("src", "")}); + +const State Include = + StateBuilder() + .parent(&All) + .elementHandler(IncludeHandler::create) + .arguments({Argument::String("rel", ""), Argument::String("type", ""), + Argument::String("src", "")}); +} } } diff --git a/src/core/parser/stack/ImportIncludeHandler.hpp b/src/core/parser/stack/ImportIncludeHandler.hpp index 8f3d3d0..6168639 100644 --- a/src/core/parser/stack/ImportIncludeHandler.hpp +++ b/src/core/parser/stack/ImportIncludeHandler.hpp @@ -88,6 +88,19 @@ public: return new IncludeHandler{handlerData, "src"}; } }; + +namespace States { +/** + * State representing the "import" command. + */ +extern const State Import; + +/** + * State representing the "include" command. + */ +extern const State Include; +} + } } #endif diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp index 34f64f9..d053699 100644 --- a/src/core/parser/stack/TypesystemHandler.cpp +++ b/src/core/parser/stack/TypesystemHandler.cpp @@ -16,12 +16,14 @@ along with this program. If not, see . */ -#include "TypesystemHandler.hpp" - #include +#include #include #include +#include "DomainHandler.hpp" +#include "State.hpp" +#include "TypesystemHandler.hpp" namespace ousia { namespace parser_stack { @@ -170,6 +172,48 @@ bool TypesystemConstantHandler::start(Variant::mapType &args) return true; } + +namespace States { +const State Typesystem = StateBuilder() + .parents({&None, &Domain}) + .createdNodeType(&RttiTypes::Typesystem) + .elementHandler(TypesystemHandler::create) + .arguments({Argument::String("name", "")}); + +const State TypesystemEnum = StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::EnumType) + .elementHandler(TypesystemEnumHandler::create) + .arguments({Argument::String("name")}); + +const State TypesystemEnumEntry = + StateBuilder() + .parent(&TypesystemEnum) + .elementHandler(TypesystemEnumEntryHandler::create) + .arguments({}); + +const State TypesystemStruct = + StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::StructType) + .elementHandler(TypesystemStructHandler::create) + .arguments({Argument::String("name"), Argument::String("parent", "")}); + +const State TypesystemStructField = + StateBuilder() + .parent(&TypesystemStruct) + .elementHandler(TypesystemStructFieldHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("default", Variant::fromObject(nullptr))}); + +const State TypesystemConstant = + StateBuilder() + .parent(&Typesystem) + .createdNodeType(&RttiTypes::Constant) + .elementHandler(TypesystemConstantHandler::create) + .arguments({Argument::String("name"), Argument::String("type"), + Argument::Any("value")}); +} } } diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp index 55277a1..85494f1 100644 --- a/src/core/parser/stack/TypesystemHandler.hpp +++ b/src/core/parser/stack/TypesystemHandler.hpp @@ -91,8 +91,7 @@ class TypesystemEnumEntryHandler : public StaticFieldHandler { public: using StaticFieldHandler::StaticFieldHandler; - void doHandle(const Variant &fieldData, - Variant::mapType &args) override; + void doHandle(const Variant &fieldData, Variant::mapType &args) override; /** * Creates a new instance of the TypesystemEnumEntryHandler. @@ -177,6 +176,33 @@ public: return new TypesystemConstantHandler{handlerData}; } }; + +namespace States { +/** + * State representing the "typesystem" tag. + */ +extern const State Typesystem; +/** + * State representing the "enum" tag within a typesystem. + */ +extern const State TypesystemEnum; +/** + * State representing the "entry" tag within an enum. + */ +extern const State TypesystemEnumEntry; +/** + * State representing the "struct" tag within a typesystem. + */ +extern const State TypesystemStruct; +/** + * State representing the "field" tag within a typesystem structure. + */ +extern const State TypesystemStructField; +/** + * State representing the "constant" tag within a typesystem. + */ +extern const State TypesystemConstant; +} } } #endif -- cgit v1.2.3 From d0afc14f19509ba0d870c10d989151827c9bff57 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 20:58:52 +0100 Subject: Domains an Typesystems can also be declared inline --- src/core/parser/stack/DomainHandler.cpp | 10 ++++++++++ src/core/parser/stack/TypesystemHandler.cpp | 7 +++++++ 2 files changed, 17 insertions(+) (limited to 'src/core') diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp index 24a6f1a..a2c8eec 100644 --- a/src/core/parser/stack/DomainHandler.cpp +++ b/src/core/parser/stack/DomainHandler.cpp @@ -35,11 +35,21 @@ namespace parser_stack { bool DomainHandler::start(Variant::mapType &args) { + // Create the Domain node Rooted domain = context().getProject()->createDomain(args["name"].asString()); domain->setLocation(location()); + // If the domain is defined inside a document, add the reference to the + // document + Rooted document = scope().select(); + if (document != nullptr) { + document->reference(domain); + } + + // Push the typesystem onto the scope, set the POST_HEAD flag to true scope().push(domain); + scope().setFlag(ParserFlag::POST_HEAD, false); return true; } diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp index d053699..8fd9525 100644 --- a/src/core/parser/stack/TypesystemHandler.cpp +++ b/src/core/parser/stack/TypesystemHandler.cpp @@ -37,6 +37,13 @@ bool TypesystemHandler::start(Variant::mapType &args) context().getProject()->createTypesystem(args["name"].asString()); typesystem->setLocation(location()); + // If the typesystem is defined inside a domain, add a reference to the + // typesystem to the domain + Rooted domain = scope().select(); + if (domain != nullptr) { + domain->reference(typesystem); + } + // Push the typesystem onto the scope, set the POST_HEAD flag to true scope().push(typesystem); scope().setFlag(ParserFlag::POST_HEAD, false); -- cgit v1.2.3 From 40f4666c43211d9071a827ad8a2524688e7f678f Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 20:59:12 +0100 Subject: Fixed StaticFieldHandler using empty strings --- src/core/parser/stack/Handler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/core') diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index 86000c4..bf5d4ea 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -202,7 +202,7 @@ bool StaticFieldHandler::start(Variant::mapType &args) { if (!argName.empty()) { auto it = args.find(argName); - if (it != args.end()) { + if (it != args.end() && !it->second.toString().empty()) { handled = true; doHandle(it->second, args); return true; -- cgit v1.2.3 From b09d5434def76b67e9f87869eeaec2f9266f9535 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 21:33:13 +0100 Subject: Removed no longer available getVariantTypes --- src/core/model/Typesystem.hpp | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'src/core') diff --git a/src/core/model/Typesystem.hpp b/src/core/model/Typesystem.hpp index 39f777f..8079578 100644 --- a/src/core/model/Typesystem.hpp +++ b/src/core/model/Typesystem.hpp @@ -422,16 +422,6 @@ public: * @return a Variant with the cardinality value "any". */ Variant create() const override { return Variant{Cardinality::any()}; } - - /** - * Returns the cardinality VariantType. - * - * @return the cardinality VariantType. - */ - std::vector getVariantTypes() const override - { - return {VariantType::CARDINALITY}; - } }; /** @@ -1473,4 +1463,4 @@ extern const Rtti SystemTypesystem; } } -#endif /* _OUSIA_MODEL_TYPESYSTEM_HPP_ */ \ No newline at end of file +#endif /* _OUSIA_MODEL_TYPESYSTEM_HPP_ */ -- cgit v1.2.3 From 4f2872d9968aec93bebff90d1238347c8a364949 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 21:33:35 +0100 Subject: Removed createPath declaration in DocumentHandler --- src/core/parser/stack/DocumentHandler.hpp | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'src/core') diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp index b339b96..2c474f9 100644 --- a/src/core/parser/stack/DocumentHandler.hpp +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -100,16 +100,6 @@ private: void preamble(Handle parentNode, std::string &fieldName, DocumentEntity *&parent, bool &inField); - /** - * Constructs all structured entites along the given path and inserts them - * into the document graph. - * - * @param path is a path containing an alternating series of structured - * classes and fields. - * @pram parent is the root entity from which the process should be started. - */ - void createPath(const NodeVector &path, DocumentEntity *&parent); - /** * Tries to convert the given data to the type that is specified in the * given primitive field. -- cgit v1.2.3