From 5d6ee07995c7f59e66e0df558c8ebe7d2a8d1f68 Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Mon, 2 Mar 2015 15:52:13 +0100 Subject: refactored SyntaxDescriptor to Token.hpp and added TokenDescriptor class. --- CMakeLists.txt | 1 + src/core/common/Token.cpp | 14 --- src/core/common/Token.hpp | 67 +----------- src/core/model/Syntax.cpp | 58 +++++++++++ src/core/model/Syntax.hpp | 196 +++++++++++++++++++++++++++++++++++ src/core/parser/stack/Callbacks.hpp | 3 +- src/core/parser/stack/Handler.cpp | 2 +- src/core/parser/stack/Handler.hpp | 3 +- src/core/parser/stack/TokenStack.cpp | 4 +- src/core/parser/stack/TokenStack.hpp | 5 +- 10 files changed, 266 insertions(+), 87 deletions(-) create mode 100644 src/core/model/Syntax.cpp create mode 100644 src/core/model/Syntax.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b206458..13de9ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -176,6 +176,7 @@ ADD_LIBRARY(ousia_core src/core/model/Project src/core/model/RootNode src/core/model/Style + src/core/model/Syntax src/core/model/Typesystem src/core/parser/Parser src/core/parser/ParserContext diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp index e454ae4..17ce03e 100644 --- a/src/core/common/Token.cpp +++ b/src/core/common/Token.cpp @@ -20,19 +20,5 @@ namespace ousia { -/* Class TokenSyntaxDescriptor */ - -void TokenSyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const -{ - if (start != Tokens::Empty) { - set.insert(start); - } - if (end != Tokens::Empty) { - set.insert(end); - } - if (shortForm != Tokens::Empty) { - set.insert(shortForm); - } -} } diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp index f89a0ce..f37151f 100644 --- a/src/core/common/Token.hpp +++ b/src/core/common/Token.hpp @@ -173,71 +173,6 @@ struct Token { const SourceLocation &getLocation() const { return location; } }; -/** - * Class describing the user defined syntax for a single field or annotation. - */ -struct TokenSyntaxDescriptor { - /** - * Possible start token or Tokens::Empty if no token is set. - */ - TokenId start; - - /** - * Possible end token or Tokens::Empty if no token is set. - */ - TokenId end; - - /** - * Possible representation token or Tokens::Empty if no token is set. - */ - TokenId shortForm; - - /** - * Flag specifying whether this TokenSyntaxDescriptor describes an - * annotation. - */ - bool isAnnotation; - - /** - * Default constructor, sets all token ids to Tokens::Empty and isAnnotation - * to false. - */ - TokenSyntaxDescriptor() - : start(Tokens::Empty), - end(Tokens::Empty), - shortForm(Tokens::Empty), - isAnnotation(false) - { - } - - /** - * Member initializer constructor. - * - * @param start is a possible start token. - * @param end is a possible end token. - * @param shortForm is a possible short form token. - * @param isAnnotation is set to true if this syntax descriptor describes an - * annotation. - */ - TokenSyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm, - bool isAnnotation) - : start(start), - end(end), - shortForm(shortForm), - isAnnotation(isAnnotation) - { - } - - /** - * Inserts all tokens referenced in this TokenSyntaxDescriptor into the - * given TokenSet. Skips token ids set to Tokens::Empty. - * - * @param set is the TokenSet instance into which the Tokens should be - * inserted. - */ - void insertIntoTokenSet(TokenSet &set) const; -}; } -#endif /* _OUSIA_TOKENS_HPP_ */ - +#endif /* _OUSIA_TOKENS_HPP_ */ \ No newline at end of file diff --git a/src/core/model/Syntax.cpp b/src/core/model/Syntax.cpp new file mode 100644 index 0000000..9dbaccc --- /dev/null +++ b/src/core/model/Syntax.cpp @@ -0,0 +1,58 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "Syntax.hpp" + +#include "Domain.hpp" + +namespace ousia { + +/* Class TokenSyntaxDescriptor */ + +bool SyntaxDescriptor::isAnnotation() const +{ + return descriptor->isa(&RttiTypes::AnnotationClass); +} +bool SyntaxDescriptor::isFieldDescriptor() const +{ + return descriptor->isa(&RttiTypes::FieldDescriptor); +} +bool SyntaxDescriptor::isStruct() const +{ + return descriptor->isa(&RttiTypes::StructuredClass); +} + +void SyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const +{ + if (start != Tokens::Empty) { + set.insert(start); + } + if (end != Tokens::Empty) { + set.insert(end); + } + if (shortForm != Tokens::Empty) { + set.insert(shortForm); + } +} + +bool SyntaxDescriptor::isEmpty() const +{ + return start == Tokens::Empty && end == Tokens::Empty && + shortForm == Tokens::Empty; +} +} \ No newline at end of file diff --git a/src/core/model/Syntax.hpp b/src/core/model/Syntax.hpp new file mode 100644 index 0000000..4da3408 --- /dev/null +++ b/src/core/model/Syntax.hpp @@ -0,0 +1,196 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Syntax.hpp + * + * This header contains the Descriptor classes for user definable syntax for + * Document entities or fields. These classes are referenced in Ontology.hpp. + */ + +#ifndef _OUSIA_MODEL_SYNTAX_HPP_ +#define _OUSIA_MODEL_SYNTAX_HPP_ + +#include +#include "Node.hpp" + +namespace ousia { + +/** + * Class to describe a single token that shall be used as user-defined syntax. + */ +struct TokenDescriptor { + /** + * The string content of this token, if it is not a special one. + */ + std::string token; + /** + * A flag to be set true if this TokenDescriptor uses a special token. + */ + bool special; + /** + * An id to uniquely identify this token. + */ + TokenId id; + + /** + * Constructor for non-special tokens. The special flag is set to false and + * the id to Tokens::Empty. + * + * @param token The string content of this token, if it is not a special + * one. + */ + TokenDescriptor(std::string token = std::string()) + : token(std::move(token)), special(false), id(Tokens::Empty) + { + } + + /** + * Constructor for special tokens. The token is set to an empty string and + * the special flag to true. + * + * @param id the id of the special token. + */ + TokenDescriptor(TokenId id) : special(true), id(id) {} + + /** + * Returns true if and only if neither a string nor an ID is given. + * + * @return true if and only if neither a string nor an ID is given. + */ + bool isEmpty() const { return token.empty() && id == Tokens::Empty; } +}; + +/** + * Class describing the user defined syntax for a StructuredClass, + * AnnotationClass or FieldDescriptor. + * + * This class is used during parsing of a Document. It is used to describe + * the tokens relevant for one Descriptor that could be created at this point + * during parsing. + */ +struct SyntaxDescriptor { + /** + * Possible start token or Tokens::Empty if no token is set. + */ + TokenId start; + + /** + * Possible end token or Tokens::Empty if no token is set. + */ + TokenId end; + + /** + * Possible representation token or Tokens::Empty if no token is set. + */ + TokenId shortForm; + + /* + * The Descriptor this SyntaxDescriptor belongs to. As this may be + * a FieldDescriptor as well as a class Descriptor (StructuredClass or + * AnnotationClass) we can only use the class Node as inner argument here. + */ + Rooted descriptor; + /* + * Given the current leaf in the parsed document the depth of a + * SyntaxDescriptor is defined as the number of transparent elements that + * would be needed to construct an instance of the referenced descriptor. + */ + ssize_t depth; + + /** + * Default constructor, sets all token ids to Tokens::Empty and the + * descriptor handle to nullptr. + */ + SyntaxDescriptor() + : start(Tokens::Empty), + end(Tokens::Empty), + shortForm(Tokens::Empty), + descriptor(nullptr), + depth(-1) + { + } + + /** + * Member initializer constructor. + * + * @param start is a possible start token. + * @param end is a possible end token. + * @param shortForm is a possible short form token. + * @param descriptor The Descriptor this SyntaxDescriptor belongs to. + * @param depth Given the current leaf in the parsed document the depth of a + * SyntaxDescriptor is defined as the number of transparent elements that + * would be needed to construct an instance of the referenced descriptor. + */ + SyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm, + Handle descriptor, ssize_t depth) + : start(start), + end(end), + shortForm(shortForm), + descriptor(descriptor), + depth(depth) + { + } + + /** + * Inserts all tokens referenced in this SyntaxDescriptor into the + * given TokenSet. Skips token ids set to Tokens::Empty. + * + * @param set is the TokenSet instance into which the Tokens should be + * inserted. + */ + void insertIntoTokenSet(TokenSet &set) const; + + /** + * Returns true if and only if this SyntaxDescriptor belongs to an + * AnnotationClass. + * + * @return true if and only if this SyntaxDescriptor belongs to an + * AnnotationClass. + */ + bool isAnnotation() const; + + /** + * Returns true if and only if this SyntaxDescriptor belongs to a + * StrcturedClass. + * + * @return true if and only if this SyntaxDescriptor belongs to a + * StrcturedClass. + */ + bool isStruct() const; + + /** + * Returns true if and only if this SyntaxDescriptor belongs to a + * FieldDescriptor. + * + * @return true if and only if this SyntaxDescriptor belongs to a + * FieldDescriptor. + */ + bool isFieldDescriptor() const; + + /** + * Returns true if and only if this SyntaxDescriptor has only empty + * entries in start, end and short. + * + * @return true if and only if this SyntaxDescriptor has only empty + * entries in start, end and short. + */ + bool isEmpty() const; +}; +} +#endif \ No newline at end of file diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp index d7b2547..e471881 100644 --- a/src/core/parser/stack/Callbacks.hpp +++ b/src/core/parser/stack/Callbacks.hpp @@ -34,6 +34,7 @@ #include #include +#include namespace ousia { @@ -96,7 +97,7 @@ public: * @param tokens is a list of TokenSyntaxDescriptor instances that should be * stored on the stack. */ - void pushTokens(const std::vector &tokens); + void pushTokens(const std::vector &tokens); /** * Removes the previously pushed list of tokens from the stack. diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index 734976a..12df0fd 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -74,7 +74,7 @@ Variant Handler::readData() return handlerData.callbacks.readData(); } -void Handler::pushTokens(const std::vector &tokens) +void Handler::pushTokens(const std::vector &tokens) { handlerData.callbacks.pushTokens(tokens); } diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp index 848d395..19660d0 100644 --- a/src/core/parser/stack/Handler.hpp +++ b/src/core/parser/stack/Handler.hpp @@ -24,6 +24,7 @@ #include #include #include +#include namespace ousia { @@ -200,7 +201,7 @@ protected: * @param tokens is a list of TokenSyntaxDescriptor instances that should be * stored on the stack. */ - void pushTokens(const std::vector &tokens); + void pushTokens(const std::vector &tokens); /** * Calls the corresponding function in the HandlerCallbacks instance. diff --git a/src/core/parser/stack/TokenStack.cpp b/src/core/parser/stack/TokenStack.cpp index 6afeaed..ac1d94e 100644 --- a/src/core/parser/stack/TokenStack.cpp +++ b/src/core/parser/stack/TokenStack.cpp @@ -21,7 +21,7 @@ namespace ousia { namespace parser_stack { -void TokenStack::pushTokens(const std::vector &tokens) +void TokenStack::pushTokens(const std::vector &tokens) { stack.push_back(tokens); } @@ -35,7 +35,7 @@ TokenSet TokenStack::tokens() const } TokenSet res; - for (const TokenSyntaxDescriptor &descr : stack.back()) { + for (const SyntaxDescriptor &descr : stack.back()) { descr.insertIntoTokenSet(res); } return res; diff --git a/src/core/parser/stack/TokenStack.hpp b/src/core/parser/stack/TokenStack.hpp index 9669f50..af734bb 100644 --- a/src/core/parser/stack/TokenStack.hpp +++ b/src/core/parser/stack/TokenStack.hpp @@ -32,6 +32,7 @@ #include #include +#include namespace ousia { namespace parser_stack { @@ -52,7 +53,7 @@ private: * Stack containing vectors of TokenSyntaxDescriptor instances as given by * the user. */ - std::vector> stack; + std::vector> stack; /** * Constructor of the TokenStack class. @@ -86,7 +87,7 @@ public: * @param tokens is a list of TokenSyntaxDescriptor instances that should be * stored on the stack. */ - void pushTokens(const std::vector &tokens); + void pushTokens(const std::vector &tokens); /** * Removes the previously pushed list of tokens from the stack. -- cgit v1.2.3 From 522580cfdfc9e6dc3448240448c29533e68f240f Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Mon, 2 Mar 2015 15:52:34 +0100 Subject: added check for witespace characters in Utils::isUserDefinedToken --- src/core/common/Utils.cpp | 15 +++++++++++---- src/core/common/Utils.hpp | 1 + test/core/common/UtilsTest.cpp | 2 ++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index 219b437..a87ff6d 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -124,7 +124,8 @@ bool Utils::isUserDefinedToken(const std::string &token) // Make sure the token meets is neither empty, nor starts or ends with an // alphanumeric character const size_t len = token.size(); - if (len == 0 || isAlphanumeric(token[0]) || isAlphanumeric(token[len - 1])) { + if (len == 0 || isAlphanumeric(token[0]) || + isAlphanumeric(token[len - 1])) { return false; } @@ -134,13 +135,19 @@ bool Utils::isUserDefinedToken(const std::string &token) return false; } + // Make sure the token does not contain any whitespaces. + for (char c : token) { + if (isWhitespace(c)) { + return false; + } + } + // Make sure the token contains other characters but { and } - for (char c: token) { + for (char c : token) { if (c != '{' && c != '}') { return true; } } return false; } -} - +} \ No newline at end of file diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 25a4de5..d9e26da 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -117,6 +117,7 @@ public: *
  • '%', '%{', '}%'
  • * * + *
  • The token does not contain any whitespaces.
  • * */ static bool isUserDefinedToken(const std::string &token); diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index 54890ee..2aaa430 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -148,6 +148,7 @@ TEST(Utils, isUserDefinedToken) EXPECT_TRUE(Utils::isUserDefinedToken("`")); EXPECT_TRUE(Utils::isUserDefinedToken("<")); EXPECT_TRUE(Utils::isUserDefinedToken(">")); + EXPECT_TRUE(Utils::isUserDefinedToken("<+>")); EXPECT_FALSE(Utils::isUserDefinedToken("a:")); EXPECT_FALSE(Utils::isUserDefinedToken("a:a")); EXPECT_FALSE(Utils::isUserDefinedToken(":a")); @@ -158,6 +159,7 @@ TEST(Utils, isUserDefinedToken) EXPECT_FALSE(Utils::isUserDefinedToken("<\\")); EXPECT_FALSE(Utils::isUserDefinedToken("\\>")); EXPECT_FALSE(Utils::isUserDefinedToken("{!")); + EXPECT_FALSE(Utils::isUserDefinedToken("< + >")); } } -- cgit v1.2.3 From ee943c5e9b60cf577ff236a694df180db89b0972 Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Mon, 2 Mar 2015 15:53:20 +0100 Subject: integrated syntax tokens in Domain. --- src/core/model/Domain.cpp | 193 +++++++++++++++++++++++--- src/core/model/Domain.hpp | 297 +++++++++++++++++++++++++++++++++++++---- test/core/model/DomainTest.cpp | 165 ++++++++++++++++++++++- 3 files changed, 607 insertions(+), 48 deletions(-) diff --git a/src/core/model/Domain.cpp b/src/core/model/Domain.cpp index 8255401..587a382 100644 --- a/src/core/model/Domain.cpp +++ b/src/core/model/Domain.cpp @@ -20,8 +20,9 @@ #include #include -#include #include +#include +#include #include "Domain.hpp" @@ -169,52 +170,60 @@ static NodeVector pathTo(const Node *start, Logger &logger, return shortest; } +struct CollectState { + Node *n; + size_t depth; + + CollectState(Node *n, size_t depth) : n(n), depth(depth) {} +}; + template static NodeVector collect(const Node *start, F match) { // result NodeVector res; // queue for breadth-first search of graph. - std::queue> q; + std::queue q; // put the initial node on the stack. - q.push(const_cast(start)); + q.push(CollectState(const_cast(start), 0)); // set of visited nodes. std::unordered_set visited; while (!q.empty()) { - Rooted n = q.front(); + CollectState state = q.front(); q.pop(); // do not proceed if this node was already visited. - if (!visited.insert(n.get()).second) { + if (!visited.insert(state.n).second) { continue; } - if (n->isa(&RttiTypes::StructuredClass)) { - Rooted strct = n.cast(); + if (state.n->isa(&RttiTypes::Descriptor)) { + Rooted strct{static_cast(state.n)}; // look through all fields. NodeVector fields = strct->getFieldDescriptors(); for (auto fd : fields) { // note matches. - if (match(fd)) { + if (match(fd, state.depth)) { res.push_back(fd); } // only continue in the TREE field. if (fd->getFieldType() == FieldDescriptor::FieldType::TREE) { - q.push(fd); + q.push(CollectState(fd.get(), state.depth)); } } } else { // otherwise this is a FieldDescriptor. - Rooted field = n.cast(); + Rooted field{ + static_cast(state.n)}; // and we proceed by visiting all permitted children. for (auto c : field->getChildrenWithSubclasses()) { // note matches. - if (match(c)) { + if (match(c, state.depth)) { res.push_back(c); } // We only continue our search via transparent children. if (c->isTransparent()) { - q.push(c); + q.push(CollectState(c.get(), state.depth + 1)); } } } @@ -222,28 +231,59 @@ static NodeVector collect(const Node *start, F match) return res; } +static std::vector collectPermittedTokens( + const Node *start, Handle domain) +{ + // gather SyntaxDescriptors for structure children first. + std::vector res; + collect(start, [&res](Handle n, size_t depth) { + SyntaxDescriptor stx; + if (n->isa(&RttiTypes::FieldDescriptor)) { + stx = n.cast()->getSyntaxDescriptor(depth); + } else { + stx = n.cast()->getSyntaxDescriptor(depth); + } + // do not add trivial SyntaxDescriptors. + if (!stx.isEmpty()) { + res.push_back(stx); + } + return false; + }); + // gather SyntaxDescriptors for AnnotationClasses. + for (auto a : domain->getAnnotationClasses()) { + SyntaxDescriptor stx = a->getSyntaxDescriptor(); + if (!stx.isEmpty()) { + res.push_back(stx); + } + } + return res; +} + /* Class FieldDescriptor */ FieldDescriptor::FieldDescriptor(Manager &mgr, Handle primitiveType, Handle parent, FieldType fieldType, - std::string name, bool optional) + std::string name, bool optional, + WhitespaceMode whitespaceMode) : Node(mgr, std::move(name), parent), children(this), fieldType(fieldType), primitiveType(acquire(primitiveType)), optional(optional), - primitive(true) + primitive(true), + whitespaceMode(whitespaceMode) { } FieldDescriptor::FieldDescriptor(Manager &mgr, Handle parent, FieldType fieldType, std::string name, - bool optional) + bool optional, WhitespaceMode whitespaceMode) : Node(mgr, std::move(name), parent), children(this), fieldType(fieldType), optional(optional), - primitive(false) + primitive(false), + whitespaceMode(whitespaceMode) { } @@ -272,6 +312,25 @@ bool FieldDescriptor::doValidate(Logger &logger) const } else { valid = valid & validateName(logger); } + // check start and end token. + if (!startToken.special && !startToken.token.empty() && + !Utils::isUserDefinedToken(startToken.token)) { + // TODO: Correct error message. + logger.error(std::string("Field \"") + getName() + + "\" has an invalid custom start token: " + + startToken.token, + *this); + valid = false; + } + if (!endToken.special && !endToken.token.empty() && + !Utils::isUserDefinedToken(endToken.token)) { + // TODO: Correct error message. + logger.error(std::string("Field \"") + getName() + + "\" has an invalid custom end token: " + + endToken.token, + *this); + valid = false; + } // check consistency of FieldType with the rest of the FieldDescriptor. if (primitive) { @@ -325,7 +384,7 @@ bool FieldDescriptor::doValidate(Logger &logger) const } static void gatherSubclasses( - std::unordered_set& visited, + std::unordered_set &visited, NodeVector &res, Handle strct) { // this check is to prevent cycles. @@ -334,7 +393,7 @@ static void gatherSubclasses( } for (auto sub : strct->getSubclasses()) { // this check is to prevent cycles. - if(visited.count(sub.get())){ + if (visited.count(sub.get())) { continue; } res.push_back(sub); @@ -381,7 +440,7 @@ NodeVector FieldDescriptor::pathTo(Handle field, NodeVector FieldDescriptor::getDefaultFields() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector nodes = collect(this, [](Handle n) { + NodeVector nodes = collect(this, [](Handle n, size_t depth) { if (!n->isa(&RttiTypes::FieldDescriptor)) { return false; } @@ -396,6 +455,16 @@ NodeVector FieldDescriptor::getDefaultFields() const return res; } +std::vector FieldDescriptor::getPermittedTokens() const +{ + if (getParent() == nullptr || + getParent().cast()->getParent() == nullptr) { + return std::vector(); + } + return collectPermittedTokens( + this, getParent().cast()->getParent().cast()); +} + /* Class Descriptor */ void Descriptor::doResolve(ResolutionState &state) @@ -443,6 +512,25 @@ bool Descriptor::doValidate(Logger &logger) const } valid = valid & attributesDescriptor->validate(logger); } + + // check start and end token. + if (!startToken.special && !startToken.token.empty() && + !Utils::isUserDefinedToken(startToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom start token: " + + startToken.token, + *this); + valid = false; + } + if (!endToken.special && !endToken.token.empty() && + !Utils::isUserDefinedToken(endToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom end token: " + + endToken.token, + *this); + valid = false; + } + // check that only one FieldDescriptor is of type TREE. auto fds = Descriptor::getFieldDescriptors(); bool hasTREE = false; @@ -483,7 +571,7 @@ std::pair, bool> Descriptor::pathTo( NodeVector Descriptor::getDefaultFields() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector nodes = collect(this, [](Handle n) { + NodeVector nodes = collect(this, [](Handle n, size_t depth) { if (!n->isa(&RttiTypes::FieldDescriptor)) { return false; } @@ -501,7 +589,7 @@ NodeVector Descriptor::getDefaultFields() const NodeVector Descriptor::getPermittedChildren() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector nodes = collect(this, [](Handle n) { + NodeVector nodes = collect(this, [](Handle n, size_t depth) { return n->isa(&RttiTypes::StructuredClass); }); NodeVector res; @@ -669,6 +757,14 @@ std::pair, bool> Descriptor::createFieldDescriptor( return std::make_pair(fd, sorted); } +std::vector Descriptor::getPermittedTokens() const +{ + if (getParent() == nullptr) { + return std::vector(); + } + return collectPermittedTokens(this, getParent().cast()); +} + /* Class StructuredClass */ StructuredClass::StructuredClass(Manager &mgr, std::string name, @@ -709,6 +805,16 @@ bool StructuredClass::doValidate(Logger &logger) const logger.error(cardinality.toString() + " is not a cardinality!", *this); valid = false; } + + // check short token. + if (!shortToken.special && !shortToken.token.empty() && + !Utils::isUserDefinedToken(shortToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom short form token: " + + shortToken.token, + *this); + valid = false; + } // check the validity of this superclass. if (superclass != nullptr) { valid = valid & superclass->validate(logger); @@ -961,6 +1067,51 @@ Rooted Domain::createAnnotationClass(std::string name) new AnnotationClass(getManager(), std::move(name), this)}; } +static void gatherTokenDescriptors( + Handle desc, std::vector &res, + std::unordered_set &visited) +{ + // add the TokenDescriptors for the Descriptor itself. + if (!desc->getStartToken().isEmpty()) { + res.push_back(desc->getStartTokenPointer()); + } + if (!desc->getEndToken().isEmpty()) { + res.push_back(desc->getEndTokenPointer()); + } + // add the TokenDescriptors for its FieldDescriptors. + for (auto fd : desc->getFieldDescriptors()) { + if (!visited.insert(fd.get()).second) { + continue; + } + if (!fd->getStartToken().isEmpty()) { + res.push_back(fd->getStartTokenPointer()); + } + if (!fd->getEndToken().isEmpty()) { + res.push_back(fd->getEndTokenPointer()); + } + } +} + +std::vector Domain::getAllTokenDescriptors() const +{ + std::vector res; + // note all fields that are already visited because FieldReferences might + // lead to doubled fields. + std::unordered_set visited; + // add the TokenDescriptors for the StructuredClasses (and their fields). + for (auto s : structuredClasses) { + if (!s->getShortToken().isEmpty()) { + res.push_back(s->getShortTokenPointer()); + } + gatherTokenDescriptors(s, res, visited); + } + // add the TokenDescriptors for the AnnotationClasses (and their fields). + for (auto a : annotationClasses) { + gatherTokenDescriptors(a, res, visited); + } + return res; +} + /* Type registrations */ namespace RttiTypes { diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 7e10d91..e984ed9 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -167,11 +167,13 @@ #ifndef _OUSIA_MODEL_DOMAIN_HPP_ #define _OUSIA_MODEL_DOMAIN_HPP_ +#include #include #include #include "Node.hpp" #include "RootNode.hpp" +#include "Syntax.hpp" #include "Typesystem.hpp" namespace ousia { @@ -225,6 +227,9 @@ private: Owned primitiveType; bool optional; bool primitive; + TokenDescriptor startToken; + TokenDescriptor endToken; + WhitespaceMode whitespaceMode; protected: bool doValidate(Logger &logger) const override; @@ -233,39 +238,46 @@ public: /** * This is the constructor for primitive fields. * - * @param mgr is the global Manager instance. - * @param parent is a handle of the Descriptor node that has this - * FieldDescriptor. - * @param primitiveType is a handle to some Type in some Typesystem of which - * one instance is allowed to fill this field. - * @param name is the name of this field. - * @param optional should be set to 'false' is this field needs to be - * filled in order for an instance of the parent - * Descriptor to be valid. + * @param mgr is the global Manager instance. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param primitiveType is a handle to some Type in some Typesystem of + *which + * one instance is allowed to fill this field. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + * @param whitespaceMode the WhitespaceMode to be used when an instance of + * this FieldDescriptor is parsed. */ FieldDescriptor(Manager &mgr, Handle primitiveType, Handle parent, FieldType fieldType = FieldType::TREE, - std::string name = "", bool optional = false); + std::string name = "", bool optional = false, + WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * This is the constructor for non-primitive fields. You have to provide * children here later on. * - * @param mgr is the global Manager instance. - * @param parent is a handle of the Descriptor node that has this - * FieldDescriptor. - * @param fieldType is the FieldType of this FieldDescriptor, either - * TREE for the main or default structure or SUBTREE - * for supporting structures. - * @param name is the name of this field. - * @param optional should be set to 'false' is this field needs to be - * filled in order for an instance of the parent - * Descriptor to be valid. + * @param mgr is the global Manager instance. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param fieldType is the FieldType of this FieldDescriptor, either + * TREE for the main or default structure or SUBTREE + * for supporting structures. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + * @param whitespaceMode the WhitespaceMode to be used when an instance of + * this FieldDescriptor is parsed. */ FieldDescriptor(Manager &mgr, Handle parent = nullptr, FieldType fieldType = FieldType::TREE, - std::string name = "", bool optional = false); + std::string name = "", bool optional = false, + WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * Returns a const reference to the NodeVector of StructuredClasses whose @@ -437,6 +449,109 @@ public: * children of an instance of this Descriptor. */ NodeVector getDefaultFields() const; + + /** + * Returns a pointer to the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * Note that this does not invalidate the FieldDescriptor. So use with + * care. + * + * @return a pointer to the start TokenDescriptor. + */ + TokenDescriptor *getStartTokenPointer() { return &startToken; } + + /** + * Returns a copy of the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a copy of the start TokenDescriptor. + */ + TokenDescriptor getStartToken() const { return startToken; } + + /** + * Sets the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @param st the new start TokenDescriptor. + */ + void setStartToken(TokenDescriptor st) + { + invalidate(); + startToken = st; + } + + /** + * Returns a pointer to the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a pointer to the end TokenDescriptor. + */ + TokenDescriptor *getEndTokenPointer() { return &endToken; } + + /** + * Returns a copy of the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a copy of the end TokenDescriptor. + */ + TokenDescriptor getEndToken() const { return endToken; } + + /** + * Sets the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @param e the new end TokenDescriptor. + */ + void setEndToken(TokenDescriptor e) + { + invalidate(); + endToken = e; + } + + /** + * Returns the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + * + * @return the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + */ + WhitespaceMode getWhitespaceMode() const { return whitespaceMode; } + + /** + * Sets the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + * + * @param wm the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + */ + WhitespaceMode setWhitespaceMode(WhitespaceMode wm) + { + return whitespaceMode = wm; + } + + /** + * Returns the SyntaxDescriptor for this FieldDescriptor. + * + * @return the SyntaxDescriptor for this FieldDescriptor. + */ + SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) + { + SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, + const_cast(this), depth}; + return stx; + } + + /** + * Returns a vector of SyntaxDescriptors, one for each Descriptor + * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is + * permitted as child of this FieldDescriptor. This also makes use + * of transparency. + * + * @return a vector of SyntaxDescriptors, one for each Descriptor that is + * permitted as child of this FieldDescriptor + */ + std::vector getPermittedTokens() const; }; /** @@ -460,7 +575,10 @@ public: * * \endcode * - * key="value" inside the A-node would be an attribute, while value + * key="value" inside the A-node would be an attribute, while + * \code{.xml} + * value + * \endcode * would be a primitive field. While equivalent in XML the semantics are * different: An attribute describes indeed attributes, features of one single * node whereas a primitive field describes the _content_ of a node. @@ -472,6 +590,8 @@ class Descriptor : public Node { private: Owned attributesDescriptor; NodeVector fieldDescriptors; + TokenDescriptor startToken; + TokenDescriptor endToken; bool addAndSortFieldDescriptor(Handle fd, Logger &logger); @@ -720,6 +840,85 @@ public: * of an instance of this Descriptor in the structure tree. */ NodeVector getPermittedChildren() const; + + /** + * Returns a pointer to the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a pointer to the start TokenDescriptor. + */ + TokenDescriptor *getStartTokenPointer() { return &startToken; } + + /** + * Returns a copy of the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a copy of the start TokenDescriptor. + */ + TokenDescriptor getStartToken() const { return startToken; } + + /** + * Sets the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @param st the new start TokenDescriptor. + */ + void setStartToken(TokenDescriptor st) + { + invalidate(); + startToken = st; + } + + /** + * Returns a pointer to the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a pointer to the end TokenDescriptor. + */ + TokenDescriptor *getEndTokenPointer() { return &endToken; } + + /** + * Returns a copy of the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a copy of the end TokenDescriptor. + */ + TokenDescriptor getEndToken() const { return endToken; } + + /** + * Sets the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @param e the new end TokenDescriptor. + */ + void setEndToken(TokenDescriptor e) + { + invalidate(); + endToken = e; + } + + /** + * Returns the SyntaxDescriptor for this Descriptor. + * + * @return the SyntaxDescriptor for this Descriptor. + */ + virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) + { + SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, + const_cast(this), depth}; + return stx; + } + + /** + * Returns a vector of SyntaxDescriptors, one for each Descriptor + * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is + * permitted as child of this Descriptor. This also makes use + * of transparency. + * + * @return a vector of SyntaxDescriptors, one for each Descriptor that is + * permitted as child of this Descriptor. + */ + std::vector getPermittedTokens() const; }; /* * TODO: We should discuss Cardinalities one more time. Is it smart to define @@ -806,6 +1005,7 @@ private: NodeVector subclasses; bool transparent; bool root; + TokenDescriptor shortToken; /** * Helper method for getFieldDescriptors. @@ -963,6 +1163,50 @@ public: invalidate(); root = std::move(r); } + + /** + * Returns a pointer to the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @return a pointer to the short TokenDescriptor. + */ + TokenDescriptor *getShortTokenPointer() { return &shortToken; } + + /** + * Returns a copy of the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @return a copy of the short TokenDescriptor. + */ + TokenDescriptor getShortToken() const { return shortToken; } + + /** + * Sets the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @param s the new short TokenDescriptor. + */ + void setShortToken(TokenDescriptor s) + { + invalidate(); + shortToken = s; + } + + /** + * Returns the SyntaxDescriptor for this StructuredClass. + * + * @return the SyntaxDescriptor for this StructuredClass. + */ + SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override + { + SyntaxDescriptor stx{getStartToken().id, getEndToken().id, + shortToken.id, const_cast(this), + depth}; + return stx; + } }; /** @@ -1188,6 +1432,13 @@ public: { domains.insert(domains.end(), ds.begin(), ds.end()); } + + /** + * Returns all TokenDescriptors of classes and fields in this Ontology. + * + * @return all TokenDescriptors of classes and fields in this Ontology. + */ + std::vector getAllTokenDescriptors() const; }; namespace RttiTypes { @@ -1200,4 +1451,4 @@ extern const Rtti Domain; } } -#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */ +#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */ \ No newline at end of file diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp index 6bbf26d..f59e745 100644 --- a/test/core/model/DomainTest.cpp +++ b/test/core/model/DomainTest.cpp @@ -82,9 +82,7 @@ TEST(Domain, testDomainResolving) } // i use this wrapper due to the strange behaviour of GTEST. -static void assertFalse(bool b){ - ASSERT_FALSE(b); -} +static void assertFalse(bool b) { ASSERT_FALSE(b); } static Rooted createUnsortedPrimitiveField( Handle strct, Handle type, Logger &logger, bool tree, @@ -170,7 +168,6 @@ TEST(StructuredClass, getFieldDescriptors) } } - TEST(StructuredClass, getFieldDescriptorsCycles) { Logger logger; @@ -523,6 +520,91 @@ TEST(Descriptor, getPermittedChildrenCycles) ASSERT_EQ(A, children[0]); } +TEST(Descriptor, getSyntaxDescriptor) +{ + // build an ontology with some custom syntax. + Manager mgr{1}; + Logger logger; + Rooted sys{new SystemTypesystem(mgr)}; + // Construct the domain + Rooted domain{new Domain(mgr, sys, "ontology")}; + Rooted A{new StructuredClass( + mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)}; + A->setStartToken(TokenDescriptor(Tokens::Indent)); + A->setEndToken(TokenDescriptor(Tokens::Dedent)); + { + TokenDescriptor sh{"<+>"}; + sh.id = 1; + A->setShortToken(sh); + } + // check the SyntaxDescriptor + SyntaxDescriptor stx = A->getSyntaxDescriptor(); + ASSERT_EQ(Tokens::Indent, stx.start); + ASSERT_EQ(Tokens::Dedent, stx.end); + ASSERT_EQ(1, stx.shortForm); + ASSERT_EQ(A, stx.descriptor); + ASSERT_TRUE(stx.isStruct()); + ASSERT_FALSE(stx.isAnnotation()); + ASSERT_FALSE(stx.isFieldDescriptor()); +} + +TEST(Descriptor, getPermittedTokens) +{ + // build an ontology with some custom syntax. + Manager mgr{1}; + Logger logger; + Rooted sys{new SystemTypesystem(mgr)}; + // Construct the domain + Rooted domain{new Domain(mgr, sys, "ontology")}; + // add one StructuredClass with all tokens set. + Rooted A{new StructuredClass( + mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)}; + A->setStartToken(TokenDescriptor(Tokens::Indent)); + A->setEndToken(TokenDescriptor(Tokens::Dedent)); + { + TokenDescriptor sh{"<+>"}; + sh.id = 1; + A->setShortToken(sh); + } + // add a field with one token set. + Rooted A_field = A->createFieldDescriptor(logger).first; + A_field->setEndToken(TokenDescriptor(Tokens::Newline)); + A_field->addChild(A); + // add an annotation with start and end set. + Rooted A_anno = domain->createAnnotationClass("A"); + { + TokenDescriptor start{"<"}; + start.id = 7; + A_anno->setStartToken(start); + } + { + TokenDescriptor end{">"}; + end.id = 8; + A_anno->setEndToken(end); + } + // add a trivial annotation, which should not be returned. + Rooted B_anno = domain->createAnnotationClass("B"); + ASSERT_TRUE(domain->validate(logger)); + + // check result. + std::vector stxs = A->getPermittedTokens(); + ASSERT_EQ(3, stxs.size()); + // the field should be first, because A itself should not be collected + // directly. + ASSERT_EQ(A_field, stxs[0].descriptor); + ASSERT_EQ(Tokens::Empty, stxs[0].start); + ASSERT_EQ(Tokens::Newline, stxs[0].end); + ASSERT_EQ(Tokens::Empty, stxs[0].shortForm); + ASSERT_EQ(A, stxs[1].descriptor); + ASSERT_EQ(Tokens::Indent, stxs[1].start); + ASSERT_EQ(Tokens::Dedent, stxs[1].end); + ASSERT_EQ(1, stxs[1].shortForm); + ASSERT_EQ(A_anno, stxs[2].descriptor); + ASSERT_EQ(7, stxs[2].start); + ASSERT_EQ(8, stxs[2].end); + ASSERT_EQ(Tokens::Empty, stxs[2].shortForm); +} + TEST(StructuredClass, isSubclassOf) { // create an inheritance hierarchy. @@ -629,6 +711,14 @@ TEST(Domain, validate) base_field->setPrimitiveType(sys->getStringType()); ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState()); ASSERT_TRUE(domain->validate(logger)); + // add an invalid start token. + base_field->setStartToken(TokenDescriptor("< + >")); + ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState()); + ASSERT_FALSE(domain->validate(logger)); + // make it valid. + base_field->setStartToken(TokenDescriptor("<")); + ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState()); + ASSERT_TRUE(domain->validate(logger)); // add a subclass for our base class. Rooted sub{new StructuredClass(mgr, "sub", domain)}; // this should be valid in itself. @@ -686,4 +776,71 @@ TEST(Domain, validate) ASSERT_TRUE(domain->validate(logger)); } } + +TEST(Domain, getAllTokenDescriptors) +{ + // build an ontology with some custom syntax. + Manager mgr{1}; + Logger logger; + Rooted sys{new SystemTypesystem(mgr)}; + // Construct the domain + Rooted domain{new Domain(mgr, sys, "ontology")}; + // add one StructuredClass with all tokens set. + Rooted A{new StructuredClass( + mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)}; + A->setStartToken(TokenDescriptor(Tokens::Indent)); + A->setEndToken(TokenDescriptor(Tokens::Dedent)); + { + TokenDescriptor sh{"<+>"}; + sh.id = 1; + A->setShortToken(sh); + } + // add a field with one token set. + Rooted A_field = A->createFieldDescriptor(logger).first; + A_field->setEndToken(TokenDescriptor(Tokens::Newline)); + A_field->addChild(A); + // add an annotation with start and end set. + Rooted A_anno = domain->createAnnotationClass("A"); + { + TokenDescriptor start{"<"}; + start.id = 7; + A_anno->setStartToken(start); + } + { + TokenDescriptor end{">"}; + end.id = 8; + A_anno->setEndToken(end); + } + // add a trivial annotation, which should not be returned. + Rooted B_anno = domain->createAnnotationClass("B"); + ASSERT_TRUE(domain->validate(logger)); + + // check the result. + std::vector tks = domain->getAllTokenDescriptors(); + + // A short token + ASSERT_EQ("<+>", tks[0]->token); + ASSERT_EQ(1, tks[0]->id); + ASSERT_FALSE(tks[0]->special); + // A start token + ASSERT_EQ("", tks[1]->token); + ASSERT_EQ(Tokens::Indent, tks[1]->id); + ASSERT_TRUE(tks[1]->special); + // A end token + ASSERT_EQ("", tks[2]->token); + ASSERT_EQ(Tokens::Dedent, tks[2]->id); + ASSERT_TRUE(tks[2]->special); + // A field end token + ASSERT_EQ("", tks[3]->token); + ASSERT_EQ(Tokens::Newline, tks[3]->id); + ASSERT_TRUE(tks[3]->special); + // A anno start token + ASSERT_EQ("<", tks[4]->token); + ASSERT_EQ(7, tks[4]->id); + ASSERT_FALSE(tks[4]->special); + // A anno end token + ASSERT_EQ(">", tks[5]->token); + ASSERT_EQ(8, tks[5]->id); + ASSERT_FALSE(tks[5]->special); +} } \ No newline at end of file -- cgit v1.2.3 From 4b5f37d07e4e691848b243ae795bb59893a6379c Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Mon, 2 Mar 2015 15:55:41 +0100 Subject: added another domain test case for invalid syntax tokens. --- test/core/model/DomainTest.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp index f59e745..b3c5771 100644 --- a/test/core/model/DomainTest.cpp +++ b/test/core/model/DomainTest.cpp @@ -701,6 +701,14 @@ TEST(Domain, validate) base->setName("myClass"); ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState()); ASSERT_TRUE(domain->validate(logger)); + // add an invalid short token. + base->setShortToken(TokenDescriptor("bla")); + ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState()); + ASSERT_FALSE(domain->validate(logger)); + // make it valid. + base->setShortToken(TokenDescriptor("!bla!")); + ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState()); + ASSERT_TRUE(domain->validate(logger)); // Let's add a primitive field (without a primitive type at first) Rooted base_field = base->createPrimitiveFieldDescriptor(nullptr, logger).first; -- cgit v1.2.3