diff options
Diffstat (limited to 'src/core/model')
-rw-r--r-- | src/core/model/Ontology.cpp | 193 | ||||
-rw-r--r-- | src/core/model/Ontology.hpp | 297 | ||||
-rw-r--r-- | src/core/model/Syntax.cpp | 58 | ||||
-rw-r--r-- | src/core/model/Syntax.hpp | 196 |
4 files changed, 700 insertions, 44 deletions
diff --git a/src/core/model/Ontology.cpp b/src/core/model/Ontology.cpp index 8829139..3af727d 100644 --- a/src/core/model/Ontology.cpp +++ b/src/core/model/Ontology.cpp @@ -20,8 +20,9 @@ #include <queue> #include <set> -#include <core/common/RttiBuilder.hpp> #include <core/common/Exceptions.hpp> +#include <core/common/RttiBuilder.hpp> +#include <core/common/Utils.hpp> #include "Ontology.hpp" @@ -169,52 +170,60 @@ static NodeVector<Node> pathTo(const Node *start, Logger &logger, return shortest; } +struct CollectState { + Node *n; + size_t depth; + + CollectState(Node *n, size_t depth) : n(n), depth(depth) {} +}; + template <typename F> static NodeVector<Node> collect(const Node *start, F match) { // result NodeVector<Node> res; // queue for breadth-first search of graph. - std::queue<Rooted<Node>> q; + std::queue<CollectState> q; // put the initial node on the stack. - q.push(const_cast<Node *>(start)); + q.push(CollectState(const_cast<Node *>(start), 0)); // set of visited nodes. std::unordered_set<const Node *> visited; while (!q.empty()) { - Rooted<Node> n = q.front(); + CollectState state = q.front(); q.pop(); // do not proceed if this node was already visited. - if (!visited.insert(n.get()).second) { + if (!visited.insert(state.n).second) { continue; } - if (n->isa(&RttiTypes::StructuredClass)) { - Rooted<StructuredClass> strct = n.cast<StructuredClass>(); + if (state.n->isa(&RttiTypes::Descriptor)) { + Rooted<Descriptor> strct{static_cast<Descriptor *>(state.n)}; // look through all fields. NodeVector<FieldDescriptor> fields = strct->getFieldDescriptors(); for (auto fd : fields) { // note matches. - if (match(fd)) { + if (match(fd, state.depth)) { res.push_back(fd); } // only continue in the TREE field. if (fd->getFieldType() == FieldDescriptor::FieldType::TREE) { - q.push(fd); + q.push(CollectState(fd.get(), state.depth)); } } } else { // otherwise this is a FieldDescriptor. - Rooted<FieldDescriptor> field = n.cast<FieldDescriptor>(); + Rooted<FieldDescriptor> field{ + static_cast<FieldDescriptor *>(state.n)}; // and we proceed by visiting all permitted children. for (auto c : field->getChildrenWithSubclasses()) { // note matches. - if (match(c)) { + if (match(c, state.depth)) { res.push_back(c); } // We only continue our search via transparent children. if (c->isTransparent()) { - q.push(c); + q.push(CollectState(c.get(), state.depth + 1)); } } } @@ -222,28 +231,59 @@ static NodeVector<Node> collect(const Node *start, F match) return res; } +static std::vector<SyntaxDescriptor> collectPermittedTokens( + const Node *start, Handle<Domain> domain) +{ + // gather SyntaxDescriptors for structure children first. + std::vector<SyntaxDescriptor> res; + collect(start, [&res](Handle<Node> n, size_t depth) { + SyntaxDescriptor stx; + if (n->isa(&RttiTypes::FieldDescriptor)) { + stx = n.cast<FieldDescriptor>()->getSyntaxDescriptor(depth); + } else { + stx = n.cast<Descriptor>()->getSyntaxDescriptor(depth); + } + // do not add trivial SyntaxDescriptors. + if (!stx.isEmpty()) { + res.push_back(stx); + } + return false; + }); + // gather SyntaxDescriptors for AnnotationClasses. + for (auto a : domain->getAnnotationClasses()) { + SyntaxDescriptor stx = a->getSyntaxDescriptor(); + if (!stx.isEmpty()) { + res.push_back(stx); + } + } + return res; +} + /* Class FieldDescriptor */ FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Type> primitiveType, Handle<Descriptor> parent, FieldType fieldType, - std::string name, bool optional) + std::string name, bool optional, + WhitespaceMode whitespaceMode) : Node(mgr, std::move(name), parent), children(this), fieldType(fieldType), primitiveType(acquire(primitiveType)), optional(optional), - primitive(true) + primitive(true), + whitespaceMode(whitespaceMode) { } FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Descriptor> parent, FieldType fieldType, std::string name, - bool optional) + bool optional, WhitespaceMode whitespaceMode) : Node(mgr, std::move(name), parent), children(this), fieldType(fieldType), optional(optional), - primitive(false) + primitive(false), + whitespaceMode(whitespaceMode) { } @@ -272,6 +312,25 @@ bool FieldDescriptor::doValidate(Logger &logger) const } else { valid = valid & validateName(logger); } + // check start and end token. + if (!startToken.special && !startToken.token.empty() && + !Utils::isUserDefinedToken(startToken.token)) { + // TODO: Correct error message. + logger.error(std::string("Field \"") + getName() + + "\" has an invalid custom start token: " + + startToken.token, + *this); + valid = false; + } + if (!endToken.special && !endToken.token.empty() && + !Utils::isUserDefinedToken(endToken.token)) { + // TODO: Correct error message. + logger.error(std::string("Field \"") + getName() + + "\" has an invalid custom end token: " + + endToken.token, + *this); + valid = false; + } // check consistency of FieldType with the rest of the FieldDescriptor. if (primitive) { @@ -325,7 +384,7 @@ bool FieldDescriptor::doValidate(Logger &logger) const } static void gatherSubclasses( - std::unordered_set<const StructuredClass *>& visited, + std::unordered_set<const StructuredClass *> &visited, NodeVector<StructuredClass> &res, Handle<StructuredClass> strct) { // this check is to prevent cycles. @@ -334,7 +393,7 @@ static void gatherSubclasses( } for (auto sub : strct->getSubclasses()) { // this check is to prevent cycles. - if(visited.count(sub.get())){ + if (visited.count(sub.get())) { continue; } res.push_back(sub); @@ -381,7 +440,7 @@ NodeVector<Node> FieldDescriptor::pathTo(Handle<FieldDescriptor> field, NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { + NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) { if (!n->isa(&RttiTypes::FieldDescriptor)) { return false; } @@ -396,6 +455,16 @@ NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const return res; } +std::vector<SyntaxDescriptor> FieldDescriptor::getPermittedTokens() const +{ + if (getParent() == nullptr || + getParent().cast<Descriptor>()->getParent() == nullptr) { + return std::vector<SyntaxDescriptor>(); + } + return collectPermittedTokens( + this, getParent().cast<Descriptor>()->getParent().cast<Domain>()); +} + /* Class Descriptor */ void Descriptor::doResolve(ResolutionState &state) @@ -443,6 +512,25 @@ bool Descriptor::doValidate(Logger &logger) const } valid = valid & attributesDescriptor->validate(logger); } + + // check start and end token. + if (!startToken.special && !startToken.token.empty() && + !Utils::isUserDefinedToken(startToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom start token: " + + startToken.token, + *this); + valid = false; + } + if (!endToken.special && !endToken.token.empty() && + !Utils::isUserDefinedToken(endToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom end token: " + + endToken.token, + *this); + valid = false; + } + // check that only one FieldDescriptor is of type TREE. auto fds = Descriptor::getFieldDescriptors(); bool hasTREE = false; @@ -483,7 +571,7 @@ std::pair<NodeVector<Node>, bool> Descriptor::pathTo( NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { + NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) { if (!n->isa(&RttiTypes::FieldDescriptor)) { return false; } @@ -501,7 +589,7 @@ NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const NodeVector<StructuredClass> Descriptor::getPermittedChildren() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { + NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) { return n->isa(&RttiTypes::StructuredClass); }); NodeVector<StructuredClass> res; @@ -669,6 +757,14 @@ std::pair<Rooted<FieldDescriptor>, bool> Descriptor::createFieldDescriptor( return std::make_pair(fd, sorted); } +std::vector<SyntaxDescriptor> Descriptor::getPermittedTokens() const +{ + if (getParent() == nullptr) { + return std::vector<SyntaxDescriptor>(); + } + return collectPermittedTokens(this, getParent().cast<Domain>()); +} + /* Class StructuredClass */ StructuredClass::StructuredClass(Manager &mgr, std::string name, @@ -709,6 +805,16 @@ bool StructuredClass::doValidate(Logger &logger) const logger.error(cardinality.toString() + " is not a cardinality!", *this); valid = false; } + + // check short token. + if (!shortToken.special && !shortToken.token.empty() && + !Utils::isUserDefinedToken(shortToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom short form token: " + + shortToken.token, + *this); + valid = false; + } // check the validity of this superclass. if (superclass != nullptr) { valid = valid & superclass->validate(logger); @@ -961,6 +1067,51 @@ Rooted<AnnotationClass> Ontology::createAnnotationClass(std::string name) new AnnotationClass(getManager(), std::move(name), this)}; } +static void gatherTokenDescriptors( + Handle<Descriptor> desc, std::vector<TokenDescriptor *> &res, + std::unordered_set<FieldDescriptor *> &visited) +{ + // add the TokenDescriptors for the Descriptor itself. + if (!desc->getStartToken().isEmpty()) { + res.push_back(desc->getStartTokenPointer()); + } + if (!desc->getEndToken().isEmpty()) { + res.push_back(desc->getEndTokenPointer()); + } + // add the TokenDescriptors for its FieldDescriptors. + for (auto fd : desc->getFieldDescriptors()) { + if (!visited.insert(fd.get()).second) { + continue; + } + if (!fd->getStartToken().isEmpty()) { + res.push_back(fd->getStartTokenPointer()); + } + if (!fd->getEndToken().isEmpty()) { + res.push_back(fd->getEndTokenPointer()); + } + } +} + +std::vector<TokenDescriptor *> Domain::getAllTokenDescriptors() const +{ + std::vector<TokenDescriptor *> res; + // note all fields that are already visited because FieldReferences might + // lead to doubled fields. + std::unordered_set<FieldDescriptor *> visited; + // add the TokenDescriptors for the StructuredClasses (and their fields). + for (auto s : structuredClasses) { + if (!s->getShortToken().isEmpty()) { + res.push_back(s->getShortTokenPointer()); + } + gatherTokenDescriptors(s, res, visited); + } + // add the TokenDescriptors for the AnnotationClasses (and their fields). + for (auto a : annotationClasses) { + gatherTokenDescriptors(a, res, visited); + } + return res; +} + /* Type registrations */ namespace RttiTypes { diff --git a/src/core/model/Ontology.hpp b/src/core/model/Ontology.hpp index e1fbe96..d682bdf 100644 --- a/src/core/model/Ontology.hpp +++ b/src/core/model/Ontology.hpp @@ -168,11 +168,13 @@ #ifndef _OUSIA_MODEL_DOMAIN_HPP_ #define _OUSIA_MODEL_DOMAIN_HPP_ +#include <core/common/Whitespace.hpp> #include <core/managed/ManagedContainer.hpp> #include <core/RangeSet.hpp> #include "Node.hpp" #include "RootNode.hpp" +#include "Syntax.hpp" #include "Typesystem.hpp" namespace ousia { @@ -226,6 +228,9 @@ private: Owned<Type> primitiveType; bool optional; bool primitive; + TokenDescriptor startToken; + TokenDescriptor endToken; + WhitespaceMode whitespaceMode; protected: bool doValidate(Logger &logger) const override; @@ -234,39 +239,46 @@ public: /** * This is the constructor for primitive fields. * - * @param mgr is the global Manager instance. - * @param parent is a handle of the Descriptor node that has this - * FieldDescriptor. - * @param primitiveType is a handle to some Type in some Typesystem of which - * one instance is allowed to fill this field. - * @param name is the name of this field. - * @param optional should be set to 'false' is this field needs to be - * filled in order for an instance of the parent - * Descriptor to be valid. + * @param mgr is the global Manager instance. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param primitiveType is a handle to some Type in some Typesystem of + *which + * one instance is allowed to fill this field. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + * @param whitespaceMode the WhitespaceMode to be used when an instance of + * this FieldDescriptor is parsed. */ FieldDescriptor(Manager &mgr, Handle<Type> primitiveType, Handle<Descriptor> parent, FieldType fieldType = FieldType::TREE, - std::string name = "", bool optional = false); + std::string name = "", bool optional = false, + WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * This is the constructor for non-primitive fields. You have to provide * children here later on. * - * @param mgr is the global Manager instance. - * @param parent is a handle of the Descriptor node that has this - * FieldDescriptor. - * @param fieldType is the FieldType of this FieldDescriptor, either - * TREE for the main or default structure or SUBTREE - * for supporting structures. - * @param name is the name of this field. - * @param optional should be set to 'false' is this field needs to be - * filled in order for an instance of the parent - * Descriptor to be valid. + * @param mgr is the global Manager instance. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param fieldType is the FieldType of this FieldDescriptor, either + * TREE for the main or default structure or SUBTREE + * for supporting structures. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + * @param whitespaceMode the WhitespaceMode to be used when an instance of + * this FieldDescriptor is parsed. */ FieldDescriptor(Manager &mgr, Handle<Descriptor> parent = nullptr, FieldType fieldType = FieldType::TREE, - std::string name = "", bool optional = false); + std::string name = "", bool optional = false, + WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * Returns a const reference to the NodeVector of StructuredClasses whose @@ -455,6 +467,109 @@ public: return std::move(name); } } + + /** + * Returns a pointer to the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * Note that this does not invalidate the FieldDescriptor. So use with + * care. + * + * @return a pointer to the start TokenDescriptor. + */ + TokenDescriptor *getStartTokenPointer() { return &startToken; } + + /** + * Returns a copy of the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a copy of the start TokenDescriptor. + */ + TokenDescriptor getStartToken() const { return startToken; } + + /** + * Sets the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @param st the new start TokenDescriptor. + */ + void setStartToken(TokenDescriptor st) + { + invalidate(); + startToken = st; + } + + /** + * Returns a pointer to the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a pointer to the end TokenDescriptor. + */ + TokenDescriptor *getEndTokenPointer() { return &endToken; } + + /** + * Returns a copy of the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a copy of the end TokenDescriptor. + */ + TokenDescriptor getEndToken() const { return endToken; } + + /** + * Sets the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @param e the new end TokenDescriptor. + */ + void setEndToken(TokenDescriptor e) + { + invalidate(); + endToken = e; + } + + /** + * Returns the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + * + * @return the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + */ + WhitespaceMode getWhitespaceMode() const { return whitespaceMode; } + + /** + * Sets the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + * + * @param wm the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + */ + WhitespaceMode setWhitespaceMode(WhitespaceMode wm) + { + return whitespaceMode = wm; + } + + /** + * Returns the SyntaxDescriptor for this FieldDescriptor. + * + * @return the SyntaxDescriptor for this FieldDescriptor. + */ + SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) + { + SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, + const_cast<FieldDescriptor *>(this), depth}; + return stx; + } + + /** + * Returns a vector of SyntaxDescriptors, one for each Descriptor + * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is + * permitted as child of this FieldDescriptor. This also makes use + * of transparency. + * + * @return a vector of SyntaxDescriptors, one for each Descriptor that is + * permitted as child of this FieldDescriptor + */ + std::vector<SyntaxDescriptor> getPermittedTokens() const; }; /** @@ -478,7 +593,10 @@ public: * </A> * \endcode * - * key="value" inside the A-node would be an attribute, while <key>value</key> + * key="value" inside the A-node would be an attribute, while + * \code{.xml} + * <key>value</key> + * \endcode * would be a primitive field. While equivalent in XML the semantics are * different: An attribute describes indeed attributes, features of one single * node whereas a primitive field describes the _content_ of a node. @@ -490,6 +608,8 @@ class Descriptor : public Node { private: Owned<StructType> attributesDescriptor; NodeVector<FieldDescriptor> fieldDescriptors; + TokenDescriptor startToken; + TokenDescriptor endToken; bool addAndSortFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger); @@ -738,6 +858,85 @@ public: * of an instance of this Descriptor in the structure tree. */ NodeVector<StructuredClass> getPermittedChildren() const; + + /** + * Returns a pointer to the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a pointer to the start TokenDescriptor. + */ + TokenDescriptor *getStartTokenPointer() { return &startToken; } + + /** + * Returns a copy of the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a copy of the start TokenDescriptor. + */ + TokenDescriptor getStartToken() const { return startToken; } + + /** + * Sets the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @param st the new start TokenDescriptor. + */ + void setStartToken(TokenDescriptor st) + { + invalidate(); + startToken = st; + } + + /** + * Returns a pointer to the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a pointer to the end TokenDescriptor. + */ + TokenDescriptor *getEndTokenPointer() { return &endToken; } + + /** + * Returns a copy of the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a copy of the end TokenDescriptor. + */ + TokenDescriptor getEndToken() const { return endToken; } + + /** + * Sets the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @param e the new end TokenDescriptor. + */ + void setEndToken(TokenDescriptor e) + { + invalidate(); + endToken = e; + } + + /** + * Returns the SyntaxDescriptor for this Descriptor. + * + * @return the SyntaxDescriptor for this Descriptor. + */ + virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) + { + SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, + const_cast<Descriptor *>(this), depth}; + return stx; + } + + /** + * Returns a vector of SyntaxDescriptors, one for each Descriptor + * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is + * permitted as child of this Descriptor. This also makes use + * of transparency. + * + * @return a vector of SyntaxDescriptors, one for each Descriptor that is + * permitted as child of this Descriptor. + */ + std::vector<SyntaxDescriptor> getPermittedTokens() const; }; /* * TODO: We should discuss Cardinalities one more time. Is it smart to define @@ -824,6 +1023,7 @@ private: NodeVector<StructuredClass> subclasses; bool transparent; bool root; + TokenDescriptor shortToken; /** * Helper method for getFieldDescriptors. @@ -981,6 +1181,50 @@ public: invalidate(); root = std::move(r); } + + /** + * Returns a pointer to the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @return a pointer to the short TokenDescriptor. + */ + TokenDescriptor *getShortTokenPointer() { return &shortToken; } + + /** + * Returns a copy of the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @return a copy of the short TokenDescriptor. + */ + TokenDescriptor getShortToken() const { return shortToken; } + + /** + * Sets the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @param s the new short TokenDescriptor. + */ + void setShortToken(TokenDescriptor s) + { + invalidate(); + shortToken = s; + } + + /** + * Returns the SyntaxDescriptor for this StructuredClass. + * + * @return the SyntaxDescriptor for this StructuredClass. + */ + SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override + { + SyntaxDescriptor stx{getStartToken().id, getEndToken().id, + shortToken.id, const_cast<StructuredClass *>(this), + depth}; + return stx; + } }; /** @@ -1207,6 +1451,13 @@ public: { ontologies.insert(ontologies.end(), ds.begin(), ds.end()); } + + /** + * Returns all TokenDescriptors of classes and fields in this Ontology. + * + * @return all TokenDescriptors of classes and fields in this Ontology. + */ + std::vector<TokenDescriptor *> getAllTokenDescriptors() const; }; namespace RttiTypes { @@ -1219,4 +1470,4 @@ extern const Rtti Ontology; } } -#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
\ No newline at end of file +#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */ diff --git a/src/core/model/Syntax.cpp b/src/core/model/Syntax.cpp new file mode 100644 index 0000000..9dbaccc --- /dev/null +++ b/src/core/model/Syntax.cpp @@ -0,0 +1,58 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "Syntax.hpp" + +#include "Domain.hpp" + +namespace ousia { + +/* Class TokenSyntaxDescriptor */ + +bool SyntaxDescriptor::isAnnotation() const +{ + return descriptor->isa(&RttiTypes::AnnotationClass); +} +bool SyntaxDescriptor::isFieldDescriptor() const +{ + return descriptor->isa(&RttiTypes::FieldDescriptor); +} +bool SyntaxDescriptor::isStruct() const +{ + return descriptor->isa(&RttiTypes::StructuredClass); +} + +void SyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const +{ + if (start != Tokens::Empty) { + set.insert(start); + } + if (end != Tokens::Empty) { + set.insert(end); + } + if (shortForm != Tokens::Empty) { + set.insert(shortForm); + } +} + +bool SyntaxDescriptor::isEmpty() const +{ + return start == Tokens::Empty && end == Tokens::Empty && + shortForm == Tokens::Empty; +} +}
\ No newline at end of file diff --git a/src/core/model/Syntax.hpp b/src/core/model/Syntax.hpp new file mode 100644 index 0000000..4da3408 --- /dev/null +++ b/src/core/model/Syntax.hpp @@ -0,0 +1,196 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Syntax.hpp + * + * This header contains the Descriptor classes for user definable syntax for + * Document entities or fields. These classes are referenced in Ontology.hpp. + */ + +#ifndef _OUSIA_MODEL_SYNTAX_HPP_ +#define _OUSIA_MODEL_SYNTAX_HPP_ + +#include <core/common/Token.hpp> +#include "Node.hpp" + +namespace ousia { + +/** + * Class to describe a single token that shall be used as user-defined syntax. + */ +struct TokenDescriptor { + /** + * The string content of this token, if it is not a special one. + */ + std::string token; + /** + * A flag to be set true if this TokenDescriptor uses a special token. + */ + bool special; + /** + * An id to uniquely identify this token. + */ + TokenId id; + + /** + * Constructor for non-special tokens. The special flag is set to false and + * the id to Tokens::Empty. + * + * @param token The string content of this token, if it is not a special + * one. + */ + TokenDescriptor(std::string token = std::string()) + : token(std::move(token)), special(false), id(Tokens::Empty) + { + } + + /** + * Constructor for special tokens. The token is set to an empty string and + * the special flag to true. + * + * @param id the id of the special token. + */ + TokenDescriptor(TokenId id) : special(true), id(id) {} + + /** + * Returns true if and only if neither a string nor an ID is given. + * + * @return true if and only if neither a string nor an ID is given. + */ + bool isEmpty() const { return token.empty() && id == Tokens::Empty; } +}; + +/** + * Class describing the user defined syntax for a StructuredClass, + * AnnotationClass or FieldDescriptor. + * + * This class is used during parsing of a Document. It is used to describe + * the tokens relevant for one Descriptor that could be created at this point + * during parsing. + */ +struct SyntaxDescriptor { + /** + * Possible start token or Tokens::Empty if no token is set. + */ + TokenId start; + + /** + * Possible end token or Tokens::Empty if no token is set. + */ + TokenId end; + + /** + * Possible representation token or Tokens::Empty if no token is set. + */ + TokenId shortForm; + + /* + * The Descriptor this SyntaxDescriptor belongs to. As this may be + * a FieldDescriptor as well as a class Descriptor (StructuredClass or + * AnnotationClass) we can only use the class Node as inner argument here. + */ + Rooted<Node> descriptor; + /* + * Given the current leaf in the parsed document the depth of a + * SyntaxDescriptor is defined as the number of transparent elements that + * would be needed to construct an instance of the referenced descriptor. + */ + ssize_t depth; + + /** + * Default constructor, sets all token ids to Tokens::Empty and the + * descriptor handle to nullptr. + */ + SyntaxDescriptor() + : start(Tokens::Empty), + end(Tokens::Empty), + shortForm(Tokens::Empty), + descriptor(nullptr), + depth(-1) + { + } + + /** + * Member initializer constructor. + * + * @param start is a possible start token. + * @param end is a possible end token. + * @param shortForm is a possible short form token. + * @param descriptor The Descriptor this SyntaxDescriptor belongs to. + * @param depth Given the current leaf in the parsed document the depth of a + * SyntaxDescriptor is defined as the number of transparent elements that + * would be needed to construct an instance of the referenced descriptor. + */ + SyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm, + Handle<Node> descriptor, ssize_t depth) + : start(start), + end(end), + shortForm(shortForm), + descriptor(descriptor), + depth(depth) + { + } + + /** + * Inserts all tokens referenced in this SyntaxDescriptor into the + * given TokenSet. Skips token ids set to Tokens::Empty. + * + * @param set is the TokenSet instance into which the Tokens should be + * inserted. + */ + void insertIntoTokenSet(TokenSet &set) const; + + /** + * Returns true if and only if this SyntaxDescriptor belongs to an + * AnnotationClass. + * + * @return true if and only if this SyntaxDescriptor belongs to an + * AnnotationClass. + */ + bool isAnnotation() const; + + /** + * Returns true if and only if this SyntaxDescriptor belongs to a + * StrcturedClass. + * + * @return true if and only if this SyntaxDescriptor belongs to a + * StrcturedClass. + */ + bool isStruct() const; + + /** + * Returns true if and only if this SyntaxDescriptor belongs to a + * FieldDescriptor. + * + * @return true if and only if this SyntaxDescriptor belongs to a + * FieldDescriptor. + */ + bool isFieldDescriptor() const; + + /** + * Returns true if and only if this SyntaxDescriptor has only empty + * entries in start, end and short. + * + * @return true if and only if this SyntaxDescriptor has only empty + * entries in start, end and short. + */ + bool isEmpty() const; +}; +} +#endif
\ No newline at end of file |