diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-03-02 18:09:11 +0100 |
---|---|---|
committer | Andreas Stöckel <andreas@somweyr.de> | 2015-03-02 18:09:11 +0100 |
commit | 6f85642ed2a76701dc1811aaa0616180ac01a9fa (patch) | |
tree | 958a792585829c0caefd0522eb34e95da8c74ff2 /src/core | |
parent | 3cc6ebf406c53b0c82a52f0daf1ce14c62f7b521 (diff) | |
parent | 24c7a8d1e62dc52298ea1abdc8b44d70fff94b54 (diff) |
Merge branch 'astoecke_tokens' of ssh://somweyr.de/var/local/git/ousia into astoecke_tokens
Conflicts:
application/src/core/parser/stack/Handler.hpp
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/common/Token.cpp | 14 | ||||
-rw-r--r-- | src/core/common/Token.hpp | 67 | ||||
-rw-r--r-- | src/core/common/Utils.cpp | 15 | ||||
-rw-r--r-- | src/core/common/Utils.hpp | 1 | ||||
-rw-r--r-- | src/core/model/Domain.cpp | 193 | ||||
-rw-r--r-- | src/core/model/Domain.hpp | 297 | ||||
-rw-r--r-- | src/core/model/Syntax.cpp | 58 | ||||
-rw-r--r-- | src/core/model/Syntax.hpp | 196 | ||||
-rw-r--r-- | src/core/parser/stack/Callbacks.hpp | 5 | ||||
-rw-r--r-- | src/core/parser/stack/Handler.cpp | 2 | ||||
-rw-r--r-- | src/core/parser/stack/Handler.hpp | 190 | ||||
-rw-r--r-- | src/core/parser/stack/TokenStack.cpp | 4 | ||||
-rw-r--r-- | src/core/parser/stack/TokenStack.hpp | 5 |
13 files changed, 845 insertions, 202 deletions
diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp index e454ae4..17ce03e 100644 --- a/src/core/common/Token.cpp +++ b/src/core/common/Token.cpp @@ -20,19 +20,5 @@ namespace ousia { -/* Class TokenSyntaxDescriptor */ - -void TokenSyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const -{ - if (start != Tokens::Empty) { - set.insert(start); - } - if (end != Tokens::Empty) { - set.insert(end); - } - if (shortForm != Tokens::Empty) { - set.insert(shortForm); - } -} } diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp index f89a0ce..f37151f 100644 --- a/src/core/common/Token.hpp +++ b/src/core/common/Token.hpp @@ -173,71 +173,6 @@ struct Token { const SourceLocation &getLocation() const { return location; } }; -/** - * Class describing the user defined syntax for a single field or annotation. - */ -struct TokenSyntaxDescriptor { - /** - * Possible start token or Tokens::Empty if no token is set. - */ - TokenId start; - - /** - * Possible end token or Tokens::Empty if no token is set. - */ - TokenId end; - - /** - * Possible representation token or Tokens::Empty if no token is set. - */ - TokenId shortForm; - - /** - * Flag specifying whether this TokenSyntaxDescriptor describes an - * annotation. - */ - bool isAnnotation; - - /** - * Default constructor, sets all token ids to Tokens::Empty and isAnnotation - * to false. - */ - TokenSyntaxDescriptor() - : start(Tokens::Empty), - end(Tokens::Empty), - shortForm(Tokens::Empty), - isAnnotation(false) - { - } - - /** - * Member initializer constructor. - * - * @param start is a possible start token. - * @param end is a possible end token. - * @param shortForm is a possible short form token. - * @param isAnnotation is set to true if this syntax descriptor describes an - * annotation. - */ - TokenSyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm, - bool isAnnotation) - : start(start), - end(end), - shortForm(shortForm), - isAnnotation(isAnnotation) - { - } - - /** - * Inserts all tokens referenced in this TokenSyntaxDescriptor into the - * given TokenSet. Skips token ids set to Tokens::Empty. - * - * @param set is the TokenSet instance into which the Tokens should be - * inserted. - */ - void insertIntoTokenSet(TokenSet &set) const; -}; } -#endif /* _OUSIA_TOKENS_HPP_ */ - +#endif /* _OUSIA_TOKENS_HPP_ */
\ No newline at end of file diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index 219b437..a87ff6d 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -124,7 +124,8 @@ bool Utils::isUserDefinedToken(const std::string &token) // Make sure the token meets is neither empty, nor starts or ends with an // alphanumeric character const size_t len = token.size(); - if (len == 0 || isAlphanumeric(token[0]) || isAlphanumeric(token[len - 1])) { + if (len == 0 || isAlphanumeric(token[0]) || + isAlphanumeric(token[len - 1])) { return false; } @@ -134,13 +135,19 @@ bool Utils::isUserDefinedToken(const std::string &token) return false; } + // Make sure the token does not contain any whitespaces. + for (char c : token) { + if (isWhitespace(c)) { + return false; + } + } + // Make sure the token contains other characters but { and } - for (char c: token) { + for (char c : token) { if (c != '{' && c != '}') { return true; } } return false; } -} - +}
\ No newline at end of file diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 25a4de5..d9e26da 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -117,6 +117,7 @@ public: * <li>'%', '%{', '}%'</li> * </ul> * </li> + * <li>The token does not contain any whitespaces.</li> * </ul> */ static bool isUserDefinedToken(const std::string &token); diff --git a/src/core/model/Domain.cpp b/src/core/model/Domain.cpp index 8255401..587a382 100644 --- a/src/core/model/Domain.cpp +++ b/src/core/model/Domain.cpp @@ -20,8 +20,9 @@ #include <queue> #include <set> -#include <core/common/RttiBuilder.hpp> #include <core/common/Exceptions.hpp> +#include <core/common/RttiBuilder.hpp> +#include <core/common/Utils.hpp> #include "Domain.hpp" @@ -169,52 +170,60 @@ static NodeVector<Node> pathTo(const Node *start, Logger &logger, return shortest; } +struct CollectState { + Node *n; + size_t depth; + + CollectState(Node *n, size_t depth) : n(n), depth(depth) {} +}; + template <typename F> static NodeVector<Node> collect(const Node *start, F match) { // result NodeVector<Node> res; // queue for breadth-first search of graph. - std::queue<Rooted<Node>> q; + std::queue<CollectState> q; // put the initial node on the stack. - q.push(const_cast<Node *>(start)); + q.push(CollectState(const_cast<Node *>(start), 0)); // set of visited nodes. std::unordered_set<const Node *> visited; while (!q.empty()) { - Rooted<Node> n = q.front(); + CollectState state = q.front(); q.pop(); // do not proceed if this node was already visited. - if (!visited.insert(n.get()).second) { + if (!visited.insert(state.n).second) { continue; } - if (n->isa(&RttiTypes::StructuredClass)) { - Rooted<StructuredClass> strct = n.cast<StructuredClass>(); + if (state.n->isa(&RttiTypes::Descriptor)) { + Rooted<Descriptor> strct{static_cast<Descriptor *>(state.n)}; // look through all fields. NodeVector<FieldDescriptor> fields = strct->getFieldDescriptors(); for (auto fd : fields) { // note matches. - if (match(fd)) { + if (match(fd, state.depth)) { res.push_back(fd); } // only continue in the TREE field. if (fd->getFieldType() == FieldDescriptor::FieldType::TREE) { - q.push(fd); + q.push(CollectState(fd.get(), state.depth)); } } } else { // otherwise this is a FieldDescriptor. - Rooted<FieldDescriptor> field = n.cast<FieldDescriptor>(); + Rooted<FieldDescriptor> field{ + static_cast<FieldDescriptor *>(state.n)}; // and we proceed by visiting all permitted children. for (auto c : field->getChildrenWithSubclasses()) { // note matches. - if (match(c)) { + if (match(c, state.depth)) { res.push_back(c); } // We only continue our search via transparent children. if (c->isTransparent()) { - q.push(c); + q.push(CollectState(c.get(), state.depth + 1)); } } } @@ -222,28 +231,59 @@ static NodeVector<Node> collect(const Node *start, F match) return res; } +static std::vector<SyntaxDescriptor> collectPermittedTokens( + const Node *start, Handle<Domain> domain) +{ + // gather SyntaxDescriptors for structure children first. + std::vector<SyntaxDescriptor> res; + collect(start, [&res](Handle<Node> n, size_t depth) { + SyntaxDescriptor stx; + if (n->isa(&RttiTypes::FieldDescriptor)) { + stx = n.cast<FieldDescriptor>()->getSyntaxDescriptor(depth); + } else { + stx = n.cast<Descriptor>()->getSyntaxDescriptor(depth); + } + // do not add trivial SyntaxDescriptors. + if (!stx.isEmpty()) { + res.push_back(stx); + } + return false; + }); + // gather SyntaxDescriptors for AnnotationClasses. + for (auto a : domain->getAnnotationClasses()) { + SyntaxDescriptor stx = a->getSyntaxDescriptor(); + if (!stx.isEmpty()) { + res.push_back(stx); + } + } + return res; +} + /* Class FieldDescriptor */ FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Type> primitiveType, Handle<Descriptor> parent, FieldType fieldType, - std::string name, bool optional) + std::string name, bool optional, + WhitespaceMode whitespaceMode) : Node(mgr, std::move(name), parent), children(this), fieldType(fieldType), primitiveType(acquire(primitiveType)), optional(optional), - primitive(true) + primitive(true), + whitespaceMode(whitespaceMode) { } FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Descriptor> parent, FieldType fieldType, std::string name, - bool optional) + bool optional, WhitespaceMode whitespaceMode) : Node(mgr, std::move(name), parent), children(this), fieldType(fieldType), optional(optional), - primitive(false) + primitive(false), + whitespaceMode(whitespaceMode) { } @@ -272,6 +312,25 @@ bool FieldDescriptor::doValidate(Logger &logger) const } else { valid = valid & validateName(logger); } + // check start and end token. + if (!startToken.special && !startToken.token.empty() && + !Utils::isUserDefinedToken(startToken.token)) { + // TODO: Correct error message. + logger.error(std::string("Field \"") + getName() + + "\" has an invalid custom start token: " + + startToken.token, + *this); + valid = false; + } + if (!endToken.special && !endToken.token.empty() && + !Utils::isUserDefinedToken(endToken.token)) { + // TODO: Correct error message. + logger.error(std::string("Field \"") + getName() + + "\" has an invalid custom end token: " + + endToken.token, + *this); + valid = false; + } // check consistency of FieldType with the rest of the FieldDescriptor. if (primitive) { @@ -325,7 +384,7 @@ bool FieldDescriptor::doValidate(Logger &logger) const } static void gatherSubclasses( - std::unordered_set<const StructuredClass *>& visited, + std::unordered_set<const StructuredClass *> &visited, NodeVector<StructuredClass> &res, Handle<StructuredClass> strct) { // this check is to prevent cycles. @@ -334,7 +393,7 @@ static void gatherSubclasses( } for (auto sub : strct->getSubclasses()) { // this check is to prevent cycles. - if(visited.count(sub.get())){ + if (visited.count(sub.get())) { continue; } res.push_back(sub); @@ -381,7 +440,7 @@ NodeVector<Node> FieldDescriptor::pathTo(Handle<FieldDescriptor> field, NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { + NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) { if (!n->isa(&RttiTypes::FieldDescriptor)) { return false; } @@ -396,6 +455,16 @@ NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const return res; } +std::vector<SyntaxDescriptor> FieldDescriptor::getPermittedTokens() const +{ + if (getParent() == nullptr || + getParent().cast<Descriptor>()->getParent() == nullptr) { + return std::vector<SyntaxDescriptor>(); + } + return collectPermittedTokens( + this, getParent().cast<Descriptor>()->getParent().cast<Domain>()); +} + /* Class Descriptor */ void Descriptor::doResolve(ResolutionState &state) @@ -443,6 +512,25 @@ bool Descriptor::doValidate(Logger &logger) const } valid = valid & attributesDescriptor->validate(logger); } + + // check start and end token. + if (!startToken.special && !startToken.token.empty() && + !Utils::isUserDefinedToken(startToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom start token: " + + startToken.token, + *this); + valid = false; + } + if (!endToken.special && !endToken.token.empty() && + !Utils::isUserDefinedToken(endToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom end token: " + + endToken.token, + *this); + valid = false; + } + // check that only one FieldDescriptor is of type TREE. auto fds = Descriptor::getFieldDescriptors(); bool hasTREE = false; @@ -483,7 +571,7 @@ std::pair<NodeVector<Node>, bool> Descriptor::pathTo( NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { + NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) { if (!n->isa(&RttiTypes::FieldDescriptor)) { return false; } @@ -501,7 +589,7 @@ NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const NodeVector<StructuredClass> Descriptor::getPermittedChildren() const { // TODO: In principle a cast would be nicer here, but for now we copy. - NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { + NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) { return n->isa(&RttiTypes::StructuredClass); }); NodeVector<StructuredClass> res; @@ -669,6 +757,14 @@ std::pair<Rooted<FieldDescriptor>, bool> Descriptor::createFieldDescriptor( return std::make_pair(fd, sorted); } +std::vector<SyntaxDescriptor> Descriptor::getPermittedTokens() const +{ + if (getParent() == nullptr) { + return std::vector<SyntaxDescriptor>(); + } + return collectPermittedTokens(this, getParent().cast<Domain>()); +} + /* Class StructuredClass */ StructuredClass::StructuredClass(Manager &mgr, std::string name, @@ -709,6 +805,16 @@ bool StructuredClass::doValidate(Logger &logger) const logger.error(cardinality.toString() + " is not a cardinality!", *this); valid = false; } + + // check short token. + if (!shortToken.special && !shortToken.token.empty() && + !Utils::isUserDefinedToken(shortToken.token)) { + logger.error(std::string("Descriptor \"") + getName() + + "\" has an invalid custom short form token: " + + shortToken.token, + *this); + valid = false; + } // check the validity of this superclass. if (superclass != nullptr) { valid = valid & superclass->validate(logger); @@ -961,6 +1067,51 @@ Rooted<AnnotationClass> Domain::createAnnotationClass(std::string name) new AnnotationClass(getManager(), std::move(name), this)}; } +static void gatherTokenDescriptors( + Handle<Descriptor> desc, std::vector<TokenDescriptor *> &res, + std::unordered_set<FieldDescriptor *> &visited) +{ + // add the TokenDescriptors for the Descriptor itself. + if (!desc->getStartToken().isEmpty()) { + res.push_back(desc->getStartTokenPointer()); + } + if (!desc->getEndToken().isEmpty()) { + res.push_back(desc->getEndTokenPointer()); + } + // add the TokenDescriptors for its FieldDescriptors. + for (auto fd : desc->getFieldDescriptors()) { + if (!visited.insert(fd.get()).second) { + continue; + } + if (!fd->getStartToken().isEmpty()) { + res.push_back(fd->getStartTokenPointer()); + } + if (!fd->getEndToken().isEmpty()) { + res.push_back(fd->getEndTokenPointer()); + } + } +} + +std::vector<TokenDescriptor *> Domain::getAllTokenDescriptors() const +{ + std::vector<TokenDescriptor *> res; + // note all fields that are already visited because FieldReferences might + // lead to doubled fields. + std::unordered_set<FieldDescriptor *> visited; + // add the TokenDescriptors for the StructuredClasses (and their fields). + for (auto s : structuredClasses) { + if (!s->getShortToken().isEmpty()) { + res.push_back(s->getShortTokenPointer()); + } + gatherTokenDescriptors(s, res, visited); + } + // add the TokenDescriptors for the AnnotationClasses (and their fields). + for (auto a : annotationClasses) { + gatherTokenDescriptors(a, res, visited); + } + return res; +} + /* Type registrations */ namespace RttiTypes { diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 7e10d91..e984ed9 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -167,11 +167,13 @@ #ifndef _OUSIA_MODEL_DOMAIN_HPP_ #define _OUSIA_MODEL_DOMAIN_HPP_ +#include <core/common/Whitespace.hpp> #include <core/managed/ManagedContainer.hpp> #include <core/RangeSet.hpp> #include "Node.hpp" #include "RootNode.hpp" +#include "Syntax.hpp" #include "Typesystem.hpp" namespace ousia { @@ -225,6 +227,9 @@ private: Owned<Type> primitiveType; bool optional; bool primitive; + TokenDescriptor startToken; + TokenDescriptor endToken; + WhitespaceMode whitespaceMode; protected: bool doValidate(Logger &logger) const override; @@ -233,39 +238,46 @@ public: /** * This is the constructor for primitive fields. * - * @param mgr is the global Manager instance. - * @param parent is a handle of the Descriptor node that has this - * FieldDescriptor. - * @param primitiveType is a handle to some Type in some Typesystem of which - * one instance is allowed to fill this field. - * @param name is the name of this field. - * @param optional should be set to 'false' is this field needs to be - * filled in order for an instance of the parent - * Descriptor to be valid. + * @param mgr is the global Manager instance. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param primitiveType is a handle to some Type in some Typesystem of + *which + * one instance is allowed to fill this field. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + * @param whitespaceMode the WhitespaceMode to be used when an instance of + * this FieldDescriptor is parsed. */ FieldDescriptor(Manager &mgr, Handle<Type> primitiveType, Handle<Descriptor> parent, FieldType fieldType = FieldType::TREE, - std::string name = "", bool optional = false); + std::string name = "", bool optional = false, + WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * This is the constructor for non-primitive fields. You have to provide * children here later on. * - * @param mgr is the global Manager instance. - * @param parent is a handle of the Descriptor node that has this - * FieldDescriptor. - * @param fieldType is the FieldType of this FieldDescriptor, either - * TREE for the main or default structure or SUBTREE - * for supporting structures. - * @param name is the name of this field. - * @param optional should be set to 'false' is this field needs to be - * filled in order for an instance of the parent - * Descriptor to be valid. + * @param mgr is the global Manager instance. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param fieldType is the FieldType of this FieldDescriptor, either + * TREE for the main or default structure or SUBTREE + * for supporting structures. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + * @param whitespaceMode the WhitespaceMode to be used when an instance of + * this FieldDescriptor is parsed. */ FieldDescriptor(Manager &mgr, Handle<Descriptor> parent = nullptr, FieldType fieldType = FieldType::TREE, - std::string name = "", bool optional = false); + std::string name = "", bool optional = false, + WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE); /** * Returns a const reference to the NodeVector of StructuredClasses whose @@ -437,6 +449,109 @@ public: * children of an instance of this Descriptor. */ NodeVector<FieldDescriptor> getDefaultFields() const; + + /** + * Returns a pointer to the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * Note that this does not invalidate the FieldDescriptor. So use with + * care. + * + * @return a pointer to the start TokenDescriptor. + */ + TokenDescriptor *getStartTokenPointer() { return &startToken; } + + /** + * Returns a copy of the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a copy of the start TokenDescriptor. + */ + TokenDescriptor getStartToken() const { return startToken; } + + /** + * Sets the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @param st the new start TokenDescriptor. + */ + void setStartToken(TokenDescriptor st) + { + invalidate(); + startToken = st; + } + + /** + * Returns a pointer to the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a pointer to the end TokenDescriptor. + */ + TokenDescriptor *getEndTokenPointer() { return &endToken; } + + /** + * Returns a copy of the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a copy of the end TokenDescriptor. + */ + TokenDescriptor getEndToken() const { return endToken; } + + /** + * Sets the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @param e the new end TokenDescriptor. + */ + void setEndToken(TokenDescriptor e) + { + invalidate(); + endToken = e; + } + + /** + * Returns the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + * + * @return the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + */ + WhitespaceMode getWhitespaceMode() const { return whitespaceMode; } + + /** + * Sets the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + * + * @param wm the WhitespaceMode to be used when an instance of this + * FieldDescriptor is parsed. + */ + WhitespaceMode setWhitespaceMode(WhitespaceMode wm) + { + return whitespaceMode = wm; + } + + /** + * Returns the SyntaxDescriptor for this FieldDescriptor. + * + * @return the SyntaxDescriptor for this FieldDescriptor. + */ + SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) + { + SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, + const_cast<FieldDescriptor *>(this), depth}; + return stx; + } + + /** + * Returns a vector of SyntaxDescriptors, one for each Descriptor + * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is + * permitted as child of this FieldDescriptor. This also makes use + * of transparency. + * + * @return a vector of SyntaxDescriptors, one for each Descriptor that is + * permitted as child of this FieldDescriptor + */ + std::vector<SyntaxDescriptor> getPermittedTokens() const; }; /** @@ -460,7 +575,10 @@ public: * </A> * \endcode * - * key="value" inside the A-node would be an attribute, while <key>value</key> + * key="value" inside the A-node would be an attribute, while + * \code{.xml} + * <key>value</key> + * \endcode * would be a primitive field. While equivalent in XML the semantics are * different: An attribute describes indeed attributes, features of one single * node whereas a primitive field describes the _content_ of a node. @@ -472,6 +590,8 @@ class Descriptor : public Node { private: Owned<StructType> attributesDescriptor; NodeVector<FieldDescriptor> fieldDescriptors; + TokenDescriptor startToken; + TokenDescriptor endToken; bool addAndSortFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger); @@ -720,6 +840,85 @@ public: * of an instance of this Descriptor in the structure tree. */ NodeVector<StructuredClass> getPermittedChildren() const; + + /** + * Returns a pointer to the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a pointer to the start TokenDescriptor. + */ + TokenDescriptor *getStartTokenPointer() { return &startToken; } + + /** + * Returns a copy of the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @return a copy of the start TokenDescriptor. + */ + TokenDescriptor getStartToken() const { return startToken; } + + /** + * Sets the start TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor starts. + * + * @param st the new start TokenDescriptor. + */ + void setStartToken(TokenDescriptor st) + { + invalidate(); + startToken = st; + } + + /** + * Returns a pointer to the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a pointer to the end TokenDescriptor. + */ + TokenDescriptor *getEndTokenPointer() { return &endToken; } + + /** + * Returns a copy of the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @return a copy of the end TokenDescriptor. + */ + TokenDescriptor getEndToken() const { return endToken; } + + /** + * Sets the end TokenDescriptor. This Token is used as a + * signifier during parsing that an instance of this FieldDescriptor ends. + * + * @param e the new end TokenDescriptor. + */ + void setEndToken(TokenDescriptor e) + { + invalidate(); + endToken = e; + } + + /** + * Returns the SyntaxDescriptor for this Descriptor. + * + * @return the SyntaxDescriptor for this Descriptor. + */ + virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) + { + SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, + const_cast<Descriptor *>(this), depth}; + return stx; + } + + /** + * Returns a vector of SyntaxDescriptors, one for each Descriptor + * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is + * permitted as child of this Descriptor. This also makes use + * of transparency. + * + * @return a vector of SyntaxDescriptors, one for each Descriptor that is + * permitted as child of this Descriptor. + */ + std::vector<SyntaxDescriptor> getPermittedTokens() const; }; /* * TODO: We should discuss Cardinalities one more time. Is it smart to define @@ -806,6 +1005,7 @@ private: NodeVector<StructuredClass> subclasses; bool transparent; bool root; + TokenDescriptor shortToken; /** * Helper method for getFieldDescriptors. @@ -963,6 +1163,50 @@ public: invalidate(); root = std::move(r); } + + /** + * Returns a pointer to the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @return a pointer to the short TokenDescriptor. + */ + TokenDescriptor *getShortTokenPointer() { return &shortToken; } + + /** + * Returns a copy of the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @return a copy of the short TokenDescriptor. + */ + TokenDescriptor getShortToken() const { return shortToken; } + + /** + * Sets the short TokenDescriptor. During parsing an + * occurence of this token will be translated to an empty instance of this + * StructuredClass. + * + * @param s the new short TokenDescriptor. + */ + void setShortToken(TokenDescriptor s) + { + invalidate(); + shortToken = s; + } + + /** + * Returns the SyntaxDescriptor for this StructuredClass. + * + * @return the SyntaxDescriptor for this StructuredClass. + */ + SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override + { + SyntaxDescriptor stx{getStartToken().id, getEndToken().id, + shortToken.id, const_cast<StructuredClass *>(this), + depth}; + return stx; + } }; /** @@ -1188,6 +1432,13 @@ public: { domains.insert(domains.end(), ds.begin(), ds.end()); } + + /** + * Returns all TokenDescriptors of classes and fields in this Ontology. + * + * @return all TokenDescriptors of classes and fields in this Ontology. + */ + std::vector<TokenDescriptor *> getAllTokenDescriptors() const; }; namespace RttiTypes { @@ -1200,4 +1451,4 @@ extern const Rtti Domain; } } -#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */ +#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
\ No newline at end of file diff --git a/src/core/model/Syntax.cpp b/src/core/model/Syntax.cpp new file mode 100644 index 0000000..9dbaccc --- /dev/null +++ b/src/core/model/Syntax.cpp @@ -0,0 +1,58 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "Syntax.hpp" + +#include "Domain.hpp" + +namespace ousia { + +/* Class TokenSyntaxDescriptor */ + +bool SyntaxDescriptor::isAnnotation() const +{ + return descriptor->isa(&RttiTypes::AnnotationClass); +} +bool SyntaxDescriptor::isFieldDescriptor() const +{ + return descriptor->isa(&RttiTypes::FieldDescriptor); +} +bool SyntaxDescriptor::isStruct() const +{ + return descriptor->isa(&RttiTypes::StructuredClass); +} + +void SyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const +{ + if (start != Tokens::Empty) { + set.insert(start); + } + if (end != Tokens::Empty) { + set.insert(end); + } + if (shortForm != Tokens::Empty) { + set.insert(shortForm); + } +} + +bool SyntaxDescriptor::isEmpty() const +{ + return start == Tokens::Empty && end == Tokens::Empty && + shortForm == Tokens::Empty; +} +}
\ No newline at end of file diff --git a/src/core/model/Syntax.hpp b/src/core/model/Syntax.hpp new file mode 100644 index 0000000..4da3408 --- /dev/null +++ b/src/core/model/Syntax.hpp @@ -0,0 +1,196 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Syntax.hpp + * + * This header contains the Descriptor classes for user definable syntax for + * Document entities or fields. These classes are referenced in Ontology.hpp. + */ + +#ifndef _OUSIA_MODEL_SYNTAX_HPP_ +#define _OUSIA_MODEL_SYNTAX_HPP_ + +#include <core/common/Token.hpp> +#include "Node.hpp" + +namespace ousia { + +/** + * Class to describe a single token that shall be used as user-defined syntax. + */ +struct TokenDescriptor { + /** + * The string content of this token, if it is not a special one. + */ + std::string token; + /** + * A flag to be set true if this TokenDescriptor uses a special token. + */ + bool special; + /** + * An id to uniquely identify this token. + */ + TokenId id; + + /** + * Constructor for non-special tokens. The special flag is set to false and + * the id to Tokens::Empty. + * + * @param token The string content of this token, if it is not a special + * one. + */ + TokenDescriptor(std::string token = std::string()) + : token(std::move(token)), special(false), id(Tokens::Empty) + { + } + + /** + * Constructor for special tokens. The token is set to an empty string and + * the special flag to true. + * + * @param id the id of the special token. + */ + TokenDescriptor(TokenId id) : special(true), id(id) {} + + /** + * Returns true if and only if neither a string nor an ID is given. + * + * @return true if and only if neither a string nor an ID is given. + */ + bool isEmpty() const { return token.empty() && id == Tokens::Empty; } +}; + +/** + * Class describing the user defined syntax for a StructuredClass, + * AnnotationClass or FieldDescriptor. + * + * This class is used during parsing of a Document. It is used to describe + * the tokens relevant for one Descriptor that could be created at this point + * during parsing. + */ +struct SyntaxDescriptor { + /** + * Possible start token or Tokens::Empty if no token is set. + */ + TokenId start; + + /** + * Possible end token or Tokens::Empty if no token is set. + */ + TokenId end; + + /** + * Possible representation token or Tokens::Empty if no token is set. + */ + TokenId shortForm; + + /* + * The Descriptor this SyntaxDescriptor belongs to. As this may be + * a FieldDescriptor as well as a class Descriptor (StructuredClass or + * AnnotationClass) we can only use the class Node as inner argument here. + */ + Rooted<Node> descriptor; + /* + * Given the current leaf in the parsed document the depth of a + * SyntaxDescriptor is defined as the number of transparent elements that + * would be needed to construct an instance of the referenced descriptor. + */ + ssize_t depth; + + /** + * Default constructor, sets all token ids to Tokens::Empty and the + * descriptor handle to nullptr. + */ + SyntaxDescriptor() + : start(Tokens::Empty), + end(Tokens::Empty), + shortForm(Tokens::Empty), + descriptor(nullptr), + depth(-1) + { + } + + /** + * Member initializer constructor. + * + * @param start is a possible start token. + * @param end is a possible end token. + * @param shortForm is a possible short form token. + * @param descriptor The Descriptor this SyntaxDescriptor belongs to. + * @param depth Given the current leaf in the parsed document the depth of a + * SyntaxDescriptor is defined as the number of transparent elements that + * would be needed to construct an instance of the referenced descriptor. + */ + SyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm, + Handle<Node> descriptor, ssize_t depth) + : start(start), + end(end), + shortForm(shortForm), + descriptor(descriptor), + depth(depth) + { + } + + /** + * Inserts all tokens referenced in this SyntaxDescriptor into the + * given TokenSet. Skips token ids set to Tokens::Empty. + * + * @param set is the TokenSet instance into which the Tokens should be + * inserted. + */ + void insertIntoTokenSet(TokenSet &set) const; + + /** + * Returns true if and only if this SyntaxDescriptor belongs to an + * AnnotationClass. + * + * @return true if and only if this SyntaxDescriptor belongs to an + * AnnotationClass. + */ + bool isAnnotation() const; + + /** + * Returns true if and only if this SyntaxDescriptor belongs to a + * StrcturedClass. + * + * @return true if and only if this SyntaxDescriptor belongs to a + * StrcturedClass. + */ + bool isStruct() const; + + /** + * Returns true if and only if this SyntaxDescriptor belongs to a + * FieldDescriptor. + * + * @return true if and only if this SyntaxDescriptor belongs to a + * FieldDescriptor. + */ + bool isFieldDescriptor() const; + + /** + * Returns true if and only if this SyntaxDescriptor has only empty + * entries in start, end and short. + * + * @return true if and only if this SyntaxDescriptor has only empty + * entries in start, end and short. + */ + bool isEmpty() const; +}; +} +#endif
\ No newline at end of file diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp index 8acc02d..092664a 100644 --- a/src/core/parser/stack/Callbacks.hpp +++ b/src/core/parser/stack/Callbacks.hpp @@ -34,6 +34,7 @@ #include <core/common/Whitespace.hpp> #include <core/common/Token.hpp> +#include <core/model/Syntax.hpp> namespace ousia { @@ -86,9 +87,7 @@ public: * @param tokens is a list of TokenSyntaxDescriptor instances that should be * stored on the stack. */ - virtual void pushTokens( - const std::vector<TokenSyntaxDescriptor> &tokens) = 0; - + void pushTokens(const std::vector<SyntaxDescriptor> &tokens); /** * Removes the previously pushed list of tokens from the stack. */ diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index f9cefc2..006e521 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -64,7 +64,7 @@ const SourceLocation &Handler::location() const { return handlerData.location; } Variant Handler::readData() { return handlerData.callbacks.readData(); } -void Handler::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens) +void Handler::pushTokens(const std::vector<SyntaxDescriptor> &tokens) { handlerData.callbacks.pushTokens(tokens); } diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp index f0968e7..67fde06 100644 --- a/src/core/parser/stack/Handler.hpp +++ b/src/core/parser/stack/Handler.hpp @@ -1,6 +1,6 @@ /* Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,6 +16,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +/** + * @file Handler.hpp + * + * Contains the definition of the Handler class, used for representing Handlers + * for certain syntactic elements. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + #ifndef _OUSIA_PARSER_STACK_HANDLER_HPP_ #define _OUSIA_PARSER_STACK_HANDLER_HPP_ @@ -26,6 +35,7 @@ #include <core/common/Whitespace.hpp> #include <core/common/Token.hpp> #include <core/model/Node.hpp> +#include <core/model/Syntax.hpp> namespace ousia { @@ -43,6 +53,13 @@ class HandlerCallbacks; class State; /** + * Enum describing the type of the Handler instance -- a document handler may + * be created for handling a simple command, a token or an annotation start and + * end. + */ +enum class HandlerType { COMMAND, ANNOTATION_START, ANNOTATION_END, TOKEN }; + +/** * Class collecting all the data that is being passed to a Handler * instance. */ @@ -67,9 +84,16 @@ public: const State &state; /** - * Current source code location. + * Token containing the name of the command that is being handled, the + * location of the element in the source code or the token id of the token + * that is being handled. + */ + Token token; + + /** + * Type describing for which purpose the HandlerData instance was created. */ - SourceLocation location; + HandlerType type; /** * Constructor of the HandlerData class. @@ -78,10 +102,12 @@ public: * @param callbacks is an instance of Callbacks used to notify * the parser about certain state changes. * @param state is the state this handler was called for. - * @param location is the location at which the handler is created. + * @param token contains name, token id and location of the command that is + * being handled. + * @param type describes the purpose of the Handler instance at hand. */ HandlerData(ParserContext &ctx, HandlerCallbacks &callbacks, - const State &state, const SourceLocation &location); + const State &state, const Token &token, HandlerType type); }; /** @@ -112,43 +138,6 @@ protected: Handler(const HandlerData &handlerData); /** - * Returns a reference at the ParserContext. - * - * @return a reference at the ParserContext. - */ - ParserContext &context(); - - /** - * Returns a reference at the ParserScope instance. - * - * @return a reference at the ParserScope instance. - */ - ParserScope &scope(); - - /** - * Returns a reference at the Manager instance which manages all nodes. - * - * @return a referance at the Manager instance. - */ - Manager &manager(); - - /** - * Returns a reference at the Logger instance used for logging error - * messages. - * - * @return a reference at the Logger instance. - */ - Logger &logger(); - - /** - * Returns the location of the element in the source file, for which this - * Handler was created. - * - * @return the location of the Handler in the source file. - */ - const SourceLocation &location() const; - - /** * Calls the corresponding function in the HandlerCallbacks instance. This * method registers the given tokens as tokens that are generally available, * tokens must be explicitly enabled using the "pushTokens" and "popTokens" @@ -178,7 +167,7 @@ protected: * @param tokens is a list of TokenSyntaxDescriptor instances that should be * stored on the stack. */ - void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens); + void pushTokens(const std::vector<SyntaxDescriptor> &tokens); /** * Calls the corresponding function in the HandlerCallbacks instance. @@ -237,11 +226,84 @@ public: virtual ~Handler(); /** - * Reference at the State descriptor for which this Handler was created. + * Returns a reference at the ParserContext. + * + * @return a reference at the ParserContext. + */ + ParserContext &context(); + + /** + * Returns a reference at the ParserScope instance. + * + * @return a reference at the ParserScope instance. + */ + ParserScope &scope(); + + /** + * Returns a reference at the Manager instance which manages all nodes. + * + * @return a referance at the Manager instance. + */ + Manager &manager(); + + /** + * Returns a reference at the Logger instance used for logging error + * messages. + * + * @return a reference at the Logger instance. + */ + Logger &logger(); + + /** + * Returns the name of the command or annotation the handler is currently + * handling. In case the command is currently handling a token, the name + * corresponds to the token string sequence. + * + * @return the name of the command or the string sequence of the token that + * is being handled by this handler. + */ + const std::string &name() const; + + /** + * Returns the token id of the token that is currently being handled by the + * handler. In case the handler currently handles a command or annotation, + * the token id is set to Tokens::Data. + * + * @return the current token id or Tokens::Data if no token is being + * handled. + */ + TokenId tokenId() const; + + /** + * Returns a reference at the Token instance, containing either the token + * that is currently being handled or the name of the command and annotation + * and their location. + * + * @return a const reference at the internal token instance. + */ + const Token &token() const; + + /** + * Returns the location of the element in the source file, for which this + * Handler was created. + * + * @return the location of the Handler in the source file. + */ + const SourceLocation &location() const; + + /** + * Returns the type describing the purpose for which the handler instance + * was created. + */ + HandlerType type() const; + + /** + * Returns a reference at the State descriptor for which this Handler was + * created. * * @return a const reference at the constructing State descriptor. */ - const State &getState() const; + const State &state() const; /** * Sets the internal logger to the given logger instance. @@ -267,42 +329,41 @@ public: /** * Called whenever the handler should handle the start of a command. This * method (or any other of the "start" methods) is called exactly once, - * after the constructor. + * after the constructor. The name of the command that is started here can + * be accessed using the name() method. * - * @param name is the name of the command that is started here. * @param args is a map from strings to variants (argument name and value). * @return true if the handler was successful in starting an element with * the given name represents, false otherwise. */ - virtual bool startCommand(const std::string &commandName, - Variant::mapType &args) = 0; + virtual bool startCommand(Variant::mapType &args) = 0; /** * Called whenever the handler should handle the start of an annotation. * This method (or any other of the "start" methods) is called exactly once, * after the constructor. This method is only called if the * "supportsAnnotations" flag of the State instance referencing this Handler - * is set to true. + * is set to true. The name of the command that is started here can be + * accessed using the name() method. * - * @param name is the name of the annotation that is started here. * @param args is a map from strings to variants (argument name and value). * @param type specifies whether this handler should handle the start of an * annotation or the end of an annotation. */ - virtual bool startAnnotation(const std::string &name, - Variant::mapType &args, + virtual bool startAnnotation(Variant::mapType &args, AnnotationType annotationType) = 0; /** * Called whenever the handler should handle the start of a token. This * method (or any other of the "start" methods) is called exactly once, * after the constructor. This method is only called if the "supportsTokens" - * flag of the State instance referencing this Handler is set to true. + * flag of the State instance referencing this Handler is set to true. The + * token id of the token that is should be handled can be accessed using the + * tokenId() method. * - * @param token is the Token for which the handler should be started. * @param node is the node for which this token was registered. */ - virtual bool startToken(const Token &token, Handle<Node> node) = 0; + virtual bool startToken(Handle<Node> node) = 0; /** * Called whenever a token is marked as "end" token and this handler happens @@ -380,11 +441,10 @@ protected: using Handler::Handler; public: - bool startCommand(const std::string &commandName, - Variant::mapType &args) override; - bool startAnnotation(const std::string &name, Variant::mapType &args, + bool startCommand(Variant::mapType &args) override; + bool startAnnotation(Variant::mapType &args, AnnotationType annotationType) override; - bool startToken(const Token &token, Handle<Node> node) override; + bool startToken(Handle<Node> node) override; EndTokenResult endToken(const Token &token, Handle<Node> node) override; void end() override; bool fieldStart(bool &isDefault, size_t fieldIdx) override; @@ -407,11 +467,10 @@ protected: using Handler::Handler; public: - bool startCommand(const std::string &commandName, - Variant::mapType &args) override; - bool startAnnotation(const std::string &name, Variant::mapType &args, + bool startCommand(Variant::mapType &args) override; + bool startAnnotation(Variant::mapType &args, AnnotationType annotationType) override; - bool startToken(const Token &token, Handle<Node> node) override; + bool startToken(Handle<Node> node) override; EndTokenResult endToken(const Token &token, Handle<Node> node) override; void end() override; bool fieldStart(bool &isDefault, size_t fieldIdx) override; @@ -466,8 +525,7 @@ protected: virtual void doHandle(const Variant &fieldData, Variant::mapType &args) = 0; public: - bool startCommand(const std::string &commandName, - Variant::mapType &args) override; + bool startCommand(Variant::mapType &args) override; bool data() override; void end() override; }; diff --git a/src/core/parser/stack/TokenStack.cpp b/src/core/parser/stack/TokenStack.cpp index 6afeaed..ac1d94e 100644 --- a/src/core/parser/stack/TokenStack.cpp +++ b/src/core/parser/stack/TokenStack.cpp @@ -21,7 +21,7 @@ namespace ousia { namespace parser_stack { -void TokenStack::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens) +void TokenStack::pushTokens(const std::vector<SyntaxDescriptor> &tokens) { stack.push_back(tokens); } @@ -35,7 +35,7 @@ TokenSet TokenStack::tokens() const } TokenSet res; - for (const TokenSyntaxDescriptor &descr : stack.back()) { + for (const SyntaxDescriptor &descr : stack.back()) { descr.insertIntoTokenSet(res); } return res; diff --git a/src/core/parser/stack/TokenStack.hpp b/src/core/parser/stack/TokenStack.hpp index 9669f50..af734bb 100644 --- a/src/core/parser/stack/TokenStack.hpp +++ b/src/core/parser/stack/TokenStack.hpp @@ -32,6 +32,7 @@ #include <vector> #include <core/common/Token.hpp> +#include <core/model/Syntax.hpp> namespace ousia { namespace parser_stack { @@ -52,7 +53,7 @@ private: * Stack containing vectors of TokenSyntaxDescriptor instances as given by * the user. */ - std::vector<std::vector<TokenSyntaxDescriptor>> stack; + std::vector<std::vector<SyntaxDescriptor>> stack; /** * Constructor of the TokenStack class. @@ -86,7 +87,7 @@ public: * @param tokens is a list of TokenSyntaxDescriptor instances that should be * stored on the stack. */ - void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens); + void pushTokens(const std::vector<SyntaxDescriptor> &tokens); /** * Removes the previously pushed list of tokens from the stack. |