diff options
author | Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de> | 2015-03-02 15:53:20 +0100 |
---|---|---|
committer | Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de> | 2015-03-02 15:53:20 +0100 |
commit | ee943c5e9b60cf577ff236a694df180db89b0972 (patch) | |
tree | 465d99c0b4c627f09334645f5df5a8fb2d6e6c7e /src | |
parent | 522580cfdfc9e6dc3448240448c29533e68f240f (diff) |
integrated syntax tokens in Domain.
Diffstat (limited to 'src')
-rw-r--r-- | src/core/model/Domain.cpp | 193 | ||||
-rw-r--r-- | src/core/model/Domain.hpp | 297 |
2 files changed, 446 insertions, 44 deletions
diff --git a/src/core/model/Domain.cpp b/src/core/model/Domain.cpp index 8255401..587a382 100644 --- a/src/core/model/Domain.cpp +++ b/src/core/model/Domain.cpp @@ -20,8 +20,9 @@ #include <queue> #include <set> -#include <core/common/RttiBuilder.hpp> #include <core/common/Exceptions.hpp> +#include <core/common/RttiBuilder.hpp> +#include <core/common/Utils.hpp> #include "Domain.hpp" @@ -169,52 +170,60 @@ static NodeVector<Node> pathTo(const Node *start, Logger &logger, return shortest; } +struct CollectState { + Node *n; + size_t depth; + + CollectState(Node *n, size_t depth) : n(n), depth(depth) {} +}; + template <typename F> static NodeVector<Node> collect(const Node *start, F match) { // result NodeVector<Node> res; // queue for breadth-first search of graph. - std::queue<Rooted<Node>> q; + std::queue<CollectState> q; // put the initial node on the stack. - q.push(const_cast<Node *>(start)); + q.push(CollectState(const_cast<Node *>(start), 0)); // set of visited nodes. std::unordered_set<const Node *> visited; while (!q.empty()) { - Rooted<Node> n = q.front(); + CollectState state = q.front(); q.pop(); // do not proceed if this node was already visited. - if (!visited.insert(n.get()).second) { + if (!visited.insert(state.n).second) { continue; } - if (n->isa(&RttiTypes::StructuredClass)) { - Rooted<StructuredClass> strct = n.cast<StructuredClass>(); + if (state.n->isa(&RttiTypes::Descriptor)) { + Rooted<Descriptor> strct{static_cast<Descriptor *>(state.n)}; // look through all fields. NodeVector<FieldDescriptor> fields = strct->getFieldDescriptors(); for (auto fd : fields) { // note matches. - if (match(fd)) { + if (match(fd, state.depth)) { res.push_back(fd); } // only continue in the TREE field. if (fd->getFieldType() == FieldDescriptor::FieldType::TREE) { - q.push(fd); + q.push(CollectState(fd.get(), state.depth)); } } } else { // otherwise this is a FieldDescriptor. - Rooted<FieldDescriptor> field = n.cast<FieldDescriptor>(); + Rooted<FieldDescriptor> field{ + static_cast<FieldDescriptor *>(state.n)}; // and we proceed by visiting all permitted children. for (auto c : field->getChildrenWithSubclasses()) { // note matches. - if (match(c)) { + if (match(c, state.depth)) { res.push_back(c); } // We only continue our search via transparent children. if (c->isTransparent()) { - q.push(c); + q.push(CollectState(c.get(), state.depth + 1)); } } } @@ -222,28 +231,59 @@ static NodeVector<Node> collect(const Node *start, F match) return res; } +static std::vector<SyntaxDescriptor> collectPermittedTokens( + const Node *start, Handle<Domain> domain) +{ + // gather SyntaxDescriptors for structure children first. + std::vector<SyntaxDescriptor> res; + collect(start, [&res](Handle<Node> n, size_t depth) { + SyntaxDescriptor stx; + if (n->isa(&RttiTypes::FieldDescriptor)) { + stx = n.cast<FieldDescriptor>()->getSyntaxDescriptor(depth); + } else { + stx = n.cast<Descriptor>()->getSyntaxDescriptor(depth); + } + // do not add trivial SyntaxDescriptors. + if (!stx.isEmpty()) { + res.push_back(stx); + } + return false; + }); + // gather SyntaxDescriptors for AnnotationClasses. + for (auto a : domain->getAnnotationClasses()) { + SyntaxDescriptor stx = a->getSyntaxDescriptor(); + if (!stx.isEmpty()) { + res.push_back(stx); + } + } + return res; +} + /* Class FieldDescriptor */ FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Type> primitiveType, Handle<Descriptor> parent, FieldType fieldType, - std::string name, bool optional) + std::string name, bool optional, + WhitespaceMode whitespaceMode) : Node(mgr, std::move(name), parent), children(this), fieldType(fieldType), primitiveType(acquire(primitiveType)), optional(optional), - primitive(true) + primitive(true), + whitespaceMode(whitespaceMode) { } FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Descriptor> parent, FieldType fieldType, std::string name, - bool optional) + bool optional, WhitespaceMode whitespaceMode) : Node(mgr, std::move(name), parent), children(this), fieldType(fieldType), optional(optional), - primitive(false) + primitive(false), + whitespaceMode(whitespaceMode) { } @@ -272,6 +312,25 @@ bool FieldDescriptor::doValidate(Logger &logger) const } else { valid = valid & validateName(logger); } + // check start and end token. + if (!startToken.special && !startToken.token.empty() && + !Utils::isUserDefinedToken(startToken.token)) { + // TODO: Correct error message. + logger.error(std::string("Field \"") + getName() + + "\" has an invalid custom start token: " + + startToken.token, + *this); + valid = false; + } + if (!endToken.special && !endToken.token.empty() && + !Utils::isUserDefinedToken(endToken.token)) { + // TODO: Correct error message. + logger.error(std::string("Field \"") + getName() + + "\" has an invalid custom end token: " + + endToken.token, + *this); + valid = false; + } // check consistency of FieldType with the rest of the FieldDescriptor. if (primitive) { @@ -325,7 +384,7 @@ bool FieldDescriptor::doValidate(Logger &logger) const } static void gatherSubclasses( - std::unordered_set<const StructuredClass *>& visited, + std::unordered_set<const StructuredClass *> &visited, NodeVector<StructuredClass> &res, Handle<StructuredClass> strct) { // this check is to prevent cycles. @@ -334,7 +393,7 @@ static void gatherSubclasses( } for (auto sub : strct->getSubclasses()) { // this check is to prevent cycles. - if(visited.count(sub.get())){ + if (visited.count(sub.get())) { continue; } res.push_back(sub); @@ -381,7 +440,7 @@ NodeVector<Node> FieldDescriptor::pathTo(Handle<FieldDescriptor> field, NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { + NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) { if (!n->isa(&RttiTypes::FieldDescriptor)) { return false; } @@ -396,6 +455,16 @@ NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const return res; } +std::vector<SyntaxDescriptor> FieldDescriptor::getPermittedTokens() const +{ + if (getParent() == nullptr || + getParent().cast<Descriptor>()->getParent() == nullptr) { + return std::vector<SyntaxDescriptor>(); + } + return collectPermittedTokens( + this, getParent().cast<Descriptor>()->getParent().cast<Domain>()); +} + /* Class Descriptor */ void Descriptor::doResolve(ResolutionState &state) @@ -443,6 +512,25 @@ bool Descriptor::doValidate(Logger &logger) const } valid = valid & attributesDescriptor->validate(logger); } + + // check start and end token. + if (!startToken.special && !startToken.token.empty() && + !Utils::isUserDefinedToken(startToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom start token: " + + startToken.token, + *this); + valid = false; + } + if (!endToken.special && !endToken.token.empty() && + !Utils::isUserDefinedToken(endToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom end token: " + + endToken.token, + *this); + valid = false; + } + // check that only one FieldDescriptor is of type TREE. auto fds = Descriptor::getFieldDescriptors(); bool hasTREE = false; @@ -483,7 +571,7 @@ std::pair<NodeVector<Node>, bool> Descriptor::pathTo( NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { + NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) { if (!n->isa(&RttiTypes::FieldDescriptor)) { return false; } @@ -501,7 +589,7 @@ NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const NodeVector<StructuredClass> Descriptor::getPermittedChildren() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { + NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) { return n->isa(&RttiTypes::StructuredClass); }); NodeVector<StructuredClass> res; @@ -669,6 +757,14 @@ std::pair<Rooted<FieldDescriptor>, bool> Descriptor::createFieldDescriptor( return std::make_pair(fd, sorted); } +std::vector<SyntaxDescriptor> Descriptor::getPermittedTokens() const +{ + if (getParent() == nullptr) { + return std::vector<SyntaxDescriptor>(); + } + return collectPermittedTokens(this, getParent().cast<Domain>()); +} + /* Class StructuredClass */ StructuredClass::StructuredClass(Manager &mgr, std::string name, @@ -709,6 +805,16 @@ bool StructuredClass::doValidate(Logger &logger) const logger.error(cardinality.toString() + " is not a cardinality!", *this); valid = false; } + + // check short token. + if (!shortToken.special && !shortToken.token.empty() && + !Utils::isUserDefinedToken(shortToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom short form token: " + + shortToken.token, + *this); + valid = false; + } // check the validity of this superclass. if (superclass != nullptr) { valid = valid & superclass->validate(logger); @@ -961,6 +1067,51 @@ Rooted<AnnotationClass> Domain::createAnnotationClass(std::string name) new AnnotationClass(getManager(), std::move(name), this)}; } +static void gatherTokenDescriptors( + Handle<Descriptor> desc, std::vector<TokenDescriptor *> &res, + std::unordered_set<FieldDescriptor *> &visited) +{ + // add the TokenDescriptors for the Descriptor itself. + if (!desc->getStartToken().isEmpty()) { + res.push_back(desc->getStartTokenPointer()); + } + if (!desc->getEndToken().isEmpty()) { + res.push_back(desc->getEndTokenPointer()); + } + // add the TokenDescriptors for its FieldDescriptors. + for (auto fd : desc->getFieldDescriptors()) { + if (!visited.insert(fd.get()).second) { + continue; + } + if (!fd->getStartToken().isEmpty()) { + res.push_back(fd->getStartTokenPointer()); + } + if (!fd->getEndToken().isEmpty()) { + res.push_back(fd->getEndTokenPointer()); + } + } +} + +std::vector<TokenDescriptor *> Domain::getAllTokenDescriptors() const +{ + std::vector<TokenDescriptor *> res; + // note all fields that are already visited because FieldReferences might + // lead to doubled fields. + std::unordered_set<FieldDescriptor *> visited; + // add the TokenDescriptors for the StructuredClasses (and their fields). + for (auto s : structuredClasses) { + if (!s->getShortToken().isEmpty()) { + res.push_back(s->getShortTokenPointer()); + } + gatherTokenDescriptors(s, res, visited); + } + // add the TokenDescriptors for the AnnotationClasses (and their fields). + for (auto a : annotationClasses) { + gatherTokenDescriptors(a, res, visited); + } + return res; +} + /* Type registrations */ namespace RttiTypes { diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 7e10d91..e984ed9 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -167,11 +167,13 @@ #ifndef _OUSIA_MODEL_DOMAIN_HPP_ #define _OUSIA_MODEL_DOMAIN_HPP_ +#include <core/common/Whitespace.hpp> #include <core/managed/ManagedContainer.hpp> #include <core/RangeSet.hpp> #include "Node.hpp" #include "RootNode.hpp" +#include "Syntax.hpp" #include "Typesystem.hpp" namespace ousia { @@ -225,6 +227,9 @@ private: Owned<Type> primitiveType; bool optional; bool primitive; + TokenDescriptor startToken; + TokenDescriptor endToken; + WhitespaceMode whitespaceMode; protected: bool doValidate(Logger &logger) const override; @@ -233,39 +238,46 @@ public: /** * This is the constructor for primitive fields. * - * @param mgr is the global Manager instance. - * @param parent is a handle of the Descriptor node that has this - * FieldDescriptor. - * @param primitiveType is a handle to some Type in some Typesystem of which - * one instance is allowed to fill this field. - * @param name is the name of this field. - * @param optional should be set to 'false' is this field needs to be - * filled in order for an instance of the parent - * Descriptor to be valid. + * @param mgr is the global Manager instance. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param primitiveType is a handle to some Type in some Typesystem of + *which + * one instance is allowed to fill this field. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + * @param whitespaceMode the WhitespaceMode to be used when an instance of + * this FieldDescriptor is parsed. */ FieldDescriptor(Manager &mgr, Handle<Type> primitiveType, Handle<Descriptor> parent, FieldType fieldType = FieldType::TREE, - std::string name = "", bool optional = false); + std::string name = "", bool optional = false, + WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * This is the constructor for non-primitive fields. You have to provide * children here later on. * - * @param mgr is the global Manager instance. - * @param parent is a handle of the Descriptor node that has this - * FieldDescriptor. - * @param fieldType is the FieldType of this FieldDescriptor, either - * TREE for the main or default structure or SUBTREE - * for supporting structures. - * @param name is the name of this field. - * @param optional should be set to 'false' is this field needs to be - * filled in order for an instance of the parent - * Descriptor to be valid. + * @param mgr is the global Manager instance. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param fieldType is the FieldType of this FieldDescriptor, either + * TREE for the main or default structure or SUBTREE + * for supporting structures. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + * @param whitespaceMode the WhitespaceMode to be used when an instance of + * this FieldDescriptor is parsed. */ FieldDescriptor(Manager &mgr, Handle<Descriptor> parent = nullptr, FieldType fieldType = FieldType::TREE, - std::string name = "", bool optional = false); + std::string name = "", bool optional = false, + WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * Returns a const reference to the NodeVector of StructuredClasses whose @@ -437,6 +449,109 @@ public: * children of an instance of this Descriptor. */ NodeVector<FieldDescriptor> getDefaultFields() const; + + /** + * Returns a pointer to the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * Note that this does not invalidate the FieldDescriptor. So use with + * care. + * + * @return a pointer to the start TokenDescriptor. + */ + TokenDescriptor *getStartTokenPointer() { return &startToken; } + + /** + * Returns a copy of the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a copy of the start TokenDescriptor. + */ + TokenDescriptor getStartToken() const { return startToken; } + + /** + * Sets the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @param st the new start TokenDescriptor. + */ + void setStartToken(TokenDescriptor st) + { + invalidate(); + startToken = st; + } + + /** + * Returns a pointer to the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a pointer to the end TokenDescriptor. + */ + TokenDescriptor *getEndTokenPointer() { return &endToken; } + + /** + * Returns a copy of the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a copy of the end TokenDescriptor. + */ + TokenDescriptor getEndToken() const { return endToken; } + + /** + * Sets the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @param e the new end TokenDescriptor. + */ + void setEndToken(TokenDescriptor e) + { + invalidate(); + endToken = e; + } + + /** + * Returns the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + * + * @return the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + */ + WhitespaceMode getWhitespaceMode() const { return whitespaceMode; } + + /** + * Sets the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + * + * @param wm the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + */ + WhitespaceMode setWhitespaceMode(WhitespaceMode wm) + { + return whitespaceMode = wm; + } + + /** + * Returns the SyntaxDescriptor for this FieldDescriptor. + * + * @return the SyntaxDescriptor for this FieldDescriptor. + */ + SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) + { + SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, + const_cast<FieldDescriptor *>(this), depth}; + return stx; + } + + /** + * Returns a vector of SyntaxDescriptors, one for each Descriptor + * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is + * permitted as child of this FieldDescriptor. This also makes use + * of transparency. + * + * @return a vector of SyntaxDescriptors, one for each Descriptor that is + * permitted as child of this FieldDescriptor + */ + std::vector<SyntaxDescriptor> getPermittedTokens() const; }; /** @@ -460,7 +575,10 @@ public: * </A> * \endcode * - * key="value" inside the A-node would be an attribute, while <key>value</key> + * key="value" inside the A-node would be an attribute, while + * \code{.xml} + * <key>value</key> + * \endcode * would be a primitive field. While equivalent in XML the semantics are * different: An attribute describes indeed attributes, features of one single * node whereas a primitive field describes the _content_ of a node. @@ -472,6 +590,8 @@ class Descriptor : public Node { private: Owned<StructType> attributesDescriptor; NodeVector<FieldDescriptor> fieldDescriptors; + TokenDescriptor startToken; + TokenDescriptor endToken; bool addAndSortFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger); @@ -720,6 +840,85 @@ public: * of an instance of this Descriptor in the structure tree. */ NodeVector<StructuredClass> getPermittedChildren() const; + + /** + * Returns a pointer to the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a pointer to the start TokenDescriptor. + */ + TokenDescriptor *getStartTokenPointer() { return &startToken; } + + /** + * Returns a copy of the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a copy of the start TokenDescriptor. + */ + TokenDescriptor getStartToken() const { return startToken; } + + /** + * Sets the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @param st the new start TokenDescriptor. + */ + void setStartToken(TokenDescriptor st) + { + invalidate(); + startToken = st; + } + + /** + * Returns a pointer to the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a pointer to the end TokenDescriptor. + */ + TokenDescriptor *getEndTokenPointer() { return &endToken; } + + /** + * Returns a copy of the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a copy of the end TokenDescriptor. + */ + TokenDescriptor getEndToken() const { return endToken; } + + /** + * Sets the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @param e the new end TokenDescriptor. + */ + void setEndToken(TokenDescriptor e) + { + invalidate(); + endToken = e; + } + + /** + * Returns the SyntaxDescriptor for this Descriptor. + * + * @return the SyntaxDescriptor for this Descriptor. + */ + virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) + { + SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, + const_cast<Descriptor *>(this), depth}; + return stx; + } + + /** + * Returns a vector of SyntaxDescriptors, one for each Descriptor + * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is + * permitted as child of this Descriptor. This also makes use + * of transparency. + * + * @return a vector of SyntaxDescriptors, one for each Descriptor that is + * permitted as child of this Descriptor. + */ + std::vector<SyntaxDescriptor> getPermittedTokens() const; }; /* * TODO: We should discuss Cardinalities one more time. Is it smart to define @@ -806,6 +1005,7 @@ private: NodeVector<StructuredClass> subclasses; bool transparent; bool root; + TokenDescriptor shortToken; /** * Helper method for getFieldDescriptors. @@ -963,6 +1163,50 @@ public: invalidate(); root = std::move(r); } + + /** + * Returns a pointer to the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @return a pointer to the short TokenDescriptor. + */ + TokenDescriptor *getShortTokenPointer() { return &shortToken; } + + /** + * Returns a copy of the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @return a copy of the short TokenDescriptor. + */ + TokenDescriptor getShortToken() const { return shortToken; } + + /** + * Sets the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @param s the new short TokenDescriptor. + */ + void setShortToken(TokenDescriptor s) + { + invalidate(); + shortToken = s; + } + + /** + * Returns the SyntaxDescriptor for this StructuredClass. + * + * @return the SyntaxDescriptor for this StructuredClass. + */ + SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override + { + SyntaxDescriptor stx{getStartToken().id, getEndToken().id, + shortToken.id, const_cast<StructuredClass *>(this), + depth}; + return stx; + } }; /** @@ -1188,6 +1432,13 @@ public: { domains.insert(domains.end(), ds.begin(), ds.end()); } + + /** + * Returns all TokenDescriptors of classes and fields in this Ontology. + * + * @return all TokenDescriptors of classes and fields in this Ontology. + */ + std::vector<TokenDescriptor *> getAllTokenDescriptors() const; }; namespace RttiTypes { @@ -1200,4 +1451,4 @@ extern const Rtti Domain; } } -#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */ +#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
\ No newline at end of file |