diff options
Diffstat (limited to 'src/core/model')
| -rw-r--r-- | src/core/model/Domain.cpp | 193 | ||||
| -rw-r--r-- | src/core/model/Domain.hpp | 297 | ||||
| -rw-r--r-- | src/core/model/Syntax.cpp | 58 | ||||
| -rw-r--r-- | src/core/model/Syntax.hpp | 196 | 
4 files changed, 700 insertions, 44 deletions
diff --git a/src/core/model/Domain.cpp b/src/core/model/Domain.cpp index 8255401..587a382 100644 --- a/src/core/model/Domain.cpp +++ b/src/core/model/Domain.cpp @@ -20,8 +20,9 @@  #include <queue>  #include <set> -#include <core/common/RttiBuilder.hpp>  #include <core/common/Exceptions.hpp> +#include <core/common/RttiBuilder.hpp> +#include <core/common/Utils.hpp>  #include "Domain.hpp" @@ -169,52 +170,60 @@ static NodeVector<Node> pathTo(const Node *start, Logger &logger,  	return shortest;  } +struct CollectState { +	Node *n; +	size_t depth; + +	CollectState(Node *n, size_t depth) : n(n), depth(depth) {} +}; +  template <typename F>  static NodeVector<Node> collect(const Node *start, F match)  {  	// result  	NodeVector<Node> res;  	// queue for breadth-first search of graph. -	std::queue<Rooted<Node>> q; +	std::queue<CollectState> q;  	// put the initial node on the stack. -	q.push(const_cast<Node *>(start)); +	q.push(CollectState(const_cast<Node *>(start), 0));  	// set of visited nodes.  	std::unordered_set<const Node *> visited;  	while (!q.empty()) { -		Rooted<Node> n = q.front(); +		CollectState state = q.front();  		q.pop();  		// do not proceed if this node was already visited. -		if (!visited.insert(n.get()).second) { +		if (!visited.insert(state.n).second) {  			continue;  		} -		if (n->isa(&RttiTypes::StructuredClass)) { -			Rooted<StructuredClass> strct = n.cast<StructuredClass>(); +		if (state.n->isa(&RttiTypes::Descriptor)) { +			Rooted<Descriptor> strct{static_cast<Descriptor *>(state.n)};  			// look through all fields.  			NodeVector<FieldDescriptor> fields = strct->getFieldDescriptors();  			for (auto fd : fields) {  				// note matches. -				if (match(fd)) { +				if (match(fd, state.depth)) {  					res.push_back(fd);  				}  				// only continue in the TREE field.  				if (fd->getFieldType() == FieldDescriptor::FieldType::TREE) { -					q.push(fd); +					q.push(CollectState(fd.get(), state.depth));  				}  			}  		} else {  			// otherwise this is a FieldDescriptor. -			Rooted<FieldDescriptor> field = n.cast<FieldDescriptor>(); +			Rooted<FieldDescriptor> field{ +			    static_cast<FieldDescriptor *>(state.n)};  			// and we proceed by visiting all permitted children.  			for (auto c : field->getChildrenWithSubclasses()) {  				// note matches. -				if (match(c)) { +				if (match(c, state.depth)) {  					res.push_back(c);  				}  				// We only continue our search via transparent children.  				if (c->isTransparent()) { -					q.push(c); +					q.push(CollectState(c.get(), state.depth + 1));  				}  			}  		} @@ -222,28 +231,59 @@ static NodeVector<Node> collect(const Node *start, F match)  	return res;  } +static std::vector<SyntaxDescriptor> collectPermittedTokens( +    const Node *start, Handle<Domain> domain) +{ +	// gather SyntaxDescriptors for structure children first. +	std::vector<SyntaxDescriptor> res; +	collect(start, [&res](Handle<Node> n, size_t depth) { +		SyntaxDescriptor stx; +		if (n->isa(&RttiTypes::FieldDescriptor)) { +			stx = n.cast<FieldDescriptor>()->getSyntaxDescriptor(depth); +		} else { +			stx = n.cast<Descriptor>()->getSyntaxDescriptor(depth); +		} +		// do not add trivial SyntaxDescriptors. +		if (!stx.isEmpty()) { +			res.push_back(stx); +		} +		return false; +	}); +	// gather SyntaxDescriptors for AnnotationClasses. +	for (auto a : domain->getAnnotationClasses()) { +		SyntaxDescriptor stx = a->getSyntaxDescriptor(); +		if (!stx.isEmpty()) { +			res.push_back(stx); +		} +	} +	return res; +} +  /* Class FieldDescriptor */  FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Type> primitiveType,                                   Handle<Descriptor> parent, FieldType fieldType, -                                 std::string name, bool optional) +                                 std::string name, bool optional, +                                 WhitespaceMode whitespaceMode)      : Node(mgr, std::move(name), parent),        children(this),        fieldType(fieldType),        primitiveType(acquire(primitiveType)),        optional(optional), -      primitive(true) +      primitive(true), +      whitespaceMode(whitespaceMode)  {  }  FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Descriptor> parent,                                   FieldType fieldType, std::string name, -                                 bool optional) +                                 bool optional, WhitespaceMode whitespaceMode)      : Node(mgr, std::move(name), parent),        children(this),        fieldType(fieldType),        optional(optional), -      primitive(false) +      primitive(false), +      whitespaceMode(whitespaceMode)  {  } @@ -272,6 +312,25 @@ bool FieldDescriptor::doValidate(Logger &logger) const  	} else {  		valid = valid & validateName(logger);  	} +	// check start and end token. +	if (!startToken.special && !startToken.token.empty() && +	    !Utils::isUserDefinedToken(startToken.token)) { +		// TODO: Correct error message. +		logger.error(std::string("Field \"") + getName() + +		                 "\" has an invalid custom start token: " + +		                 startToken.token, +		             *this); +		valid = false; +	} +	if (!endToken.special && !endToken.token.empty() && +	    !Utils::isUserDefinedToken(endToken.token)) { +		// TODO: Correct error message. +		logger.error(std::string("Field \"") + getName() + +		                 "\" has an invalid custom end token: " + +		                 endToken.token, +		             *this); +		valid = false; +	}  	// check consistency of FieldType with the rest of the FieldDescriptor.  	if (primitive) { @@ -325,7 +384,7 @@ bool FieldDescriptor::doValidate(Logger &logger) const  }  static void gatherSubclasses( -    std::unordered_set<const StructuredClass *>& visited, +    std::unordered_set<const StructuredClass *> &visited,      NodeVector<StructuredClass> &res, Handle<StructuredClass> strct)  {  	// this check is to prevent cycles. @@ -334,7 +393,7 @@ static void gatherSubclasses(  	}  	for (auto sub : strct->getSubclasses()) {  		// this check is to prevent cycles. -		if(visited.count(sub.get())){ +		if (visited.count(sub.get())) {  			continue;  		}  		res.push_back(sub); @@ -381,7 +440,7 @@ NodeVector<Node> FieldDescriptor::pathTo(Handle<FieldDescriptor> field,  NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const  {  	// TODO: In principle a cast would be nicer here, but for now we copy. -	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { +	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {  		if (!n->isa(&RttiTypes::FieldDescriptor)) {  			return false;  		} @@ -396,6 +455,16 @@ NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const  	return res;  } +std::vector<SyntaxDescriptor> FieldDescriptor::getPermittedTokens() const +{ +	if (getParent() == nullptr || +	    getParent().cast<Descriptor>()->getParent() == nullptr) { +		return std::vector<SyntaxDescriptor>(); +	} +	return collectPermittedTokens( +	    this, getParent().cast<Descriptor>()->getParent().cast<Domain>()); +} +  /* Class Descriptor */  void Descriptor::doResolve(ResolutionState &state) @@ -443,6 +512,25 @@ bool Descriptor::doValidate(Logger &logger) const  		}  		valid = valid & attributesDescriptor->validate(logger);  	} + +	// check start and end token. +	if (!startToken.special && !startToken.token.empty() && +	    !Utils::isUserDefinedToken(startToken.token)) { +		logger.error(std::string("Descriptor \"") + getName() + +		                 "\" has an invalid custom start token: " + +		                 startToken.token, +		             *this); +		valid = false; +	} +	if (!endToken.special && !endToken.token.empty() && +	    !Utils::isUserDefinedToken(endToken.token)) { +		logger.error(std::string("Descriptor \"") + getName() + +		                 "\" has an invalid custom end token: " + +		                 endToken.token, +		             *this); +		valid = false; +	} +  	// check that only one FieldDescriptor is of type TREE.  	auto fds = Descriptor::getFieldDescriptors();  	bool hasTREE = false; @@ -483,7 +571,7 @@ std::pair<NodeVector<Node>, bool> Descriptor::pathTo(  NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const  {  	// TODO: In principle a cast would be nicer here, but for now we copy. -	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { +	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {  		if (!n->isa(&RttiTypes::FieldDescriptor)) {  			return false;  		} @@ -501,7 +589,7 @@ NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const  NodeVector<StructuredClass> Descriptor::getPermittedChildren() const  {  	// TODO: In principle a cast would be nicer here, but for now we copy. -	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) { +	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {  		return n->isa(&RttiTypes::StructuredClass);  	});  	NodeVector<StructuredClass> res; @@ -669,6 +757,14 @@ std::pair<Rooted<FieldDescriptor>, bool> Descriptor::createFieldDescriptor(  	return std::make_pair(fd, sorted);  } +std::vector<SyntaxDescriptor> Descriptor::getPermittedTokens() const +{ +	if (getParent() == nullptr) { +		return std::vector<SyntaxDescriptor>(); +	} +	return collectPermittedTokens(this, getParent().cast<Domain>()); +} +  /* Class StructuredClass */  StructuredClass::StructuredClass(Manager &mgr, std::string name, @@ -709,6 +805,16 @@ bool StructuredClass::doValidate(Logger &logger) const  		logger.error(cardinality.toString() + " is not a cardinality!", *this);  		valid = false;  	} + +	// check short token. +	if (!shortToken.special && !shortToken.token.empty() && +	    !Utils::isUserDefinedToken(shortToken.token)) { +		logger.error(std::string("Descriptor \"") + getName() + +		                 "\" has an invalid custom short form token: " + +		                 shortToken.token, +		             *this); +		valid = false; +	}  	// check the validity of this superclass.  	if (superclass != nullptr) {  		valid = valid & superclass->validate(logger); @@ -961,6 +1067,51 @@ Rooted<AnnotationClass> Domain::createAnnotationClass(std::string name)  	    new AnnotationClass(getManager(), std::move(name), this)};  } +static void gatherTokenDescriptors( +    Handle<Descriptor> desc, std::vector<TokenDescriptor *> &res, +    std::unordered_set<FieldDescriptor *> &visited) +{ +	// add the TokenDescriptors for the Descriptor itself. +	if (!desc->getStartToken().isEmpty()) { +		res.push_back(desc->getStartTokenPointer()); +	} +	if (!desc->getEndToken().isEmpty()) { +		res.push_back(desc->getEndTokenPointer()); +	} +	// add the TokenDescriptors for its FieldDescriptors. +	for (auto fd : desc->getFieldDescriptors()) { +		if (!visited.insert(fd.get()).second) { +			continue; +		} +		if (!fd->getStartToken().isEmpty()) { +			res.push_back(fd->getStartTokenPointer()); +		} +		if (!fd->getEndToken().isEmpty()) { +			res.push_back(fd->getEndTokenPointer()); +		} +	} +} + +std::vector<TokenDescriptor *> Domain::getAllTokenDescriptors() const +{ +	std::vector<TokenDescriptor *> res; +	// note all fields that are already visited because FieldReferences might +	// lead to doubled fields. +	std::unordered_set<FieldDescriptor *> visited; +	// add the TokenDescriptors for the StructuredClasses (and their fields). +	for (auto s : structuredClasses) { +		if (!s->getShortToken().isEmpty()) { +			res.push_back(s->getShortTokenPointer()); +		} +		gatherTokenDescriptors(s, res, visited); +	} +	// add the TokenDescriptors for the AnnotationClasses (and their fields). +	for (auto a : annotationClasses) { +		gatherTokenDescriptors(a, res, visited); +	} +	return res; +} +  /* Type registrations */  namespace RttiTypes { diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 7e10d91..e984ed9 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -167,11 +167,13 @@  #ifndef _OUSIA_MODEL_DOMAIN_HPP_  #define _OUSIA_MODEL_DOMAIN_HPP_ +#include <core/common/Whitespace.hpp>  #include <core/managed/ManagedContainer.hpp>  #include <core/RangeSet.hpp>  #include "Node.hpp"  #include "RootNode.hpp" +#include "Syntax.hpp"  #include "Typesystem.hpp"  namespace ousia { @@ -225,6 +227,9 @@ private:  	Owned<Type> primitiveType;  	bool optional;  	bool primitive; +	TokenDescriptor startToken; +	TokenDescriptor endToken; +	WhitespaceMode whitespaceMode;  protected:  	bool doValidate(Logger &logger) const override; @@ -233,39 +238,46 @@ public:  	/**  	 * This is the constructor for primitive fields.  	 * -	 * @param mgr           is the global Manager instance. -	 * @param parent        is a handle of the Descriptor node that has this -	 *                      FieldDescriptor. -	 * @param primitiveType is a handle to some Type in some Typesystem of which -	 *                      one instance is allowed to fill this field. -	 * @param name          is the name of this field. -	 * @param optional      should be set to 'false' is this field needs to be -	 *                      filled in order for an instance of the parent -	 *                      Descriptor to be valid. +	 * @param mgr            is the global Manager instance. +	 * @param parent         is a handle of the Descriptor node that has this +	 *                       FieldDescriptor. +	 * @param primitiveType  is a handle to some Type in some Typesystem of +	 *which +	 *                       one instance is allowed to fill this field. +	 * @param name           is the name of this field. +	 * @param optional       should be set to 'false' is this field needs to be +	 *                       filled in order for an instance of the parent +	 *                       Descriptor to be valid. +	 * @param whitespaceMode the WhitespaceMode to be used when an instance of +	 *                       this FieldDescriptor is parsed.  	 */  	FieldDescriptor(Manager &mgr, Handle<Type> primitiveType,  	                Handle<Descriptor> parent,  	                FieldType fieldType = FieldType::TREE, -	                std::string name = "", bool optional = false); +	                std::string name = "", bool optional = false, +	                WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);  	/**  	 * This is the constructor for non-primitive fields. You have to provide  	 * children here later on.  	 * -	 * @param mgr           is the global Manager instance. -	 * @param parent        is a handle of the Descriptor node that has this -	 *                      FieldDescriptor. -	 * @param fieldType     is the FieldType of this FieldDescriptor, either -	 *                      TREE for the main or default structure or SUBTREE -	 *                      for supporting structures. -	 * @param name          is the name of this field. -	 * @param optional      should be set to 'false' is this field needs to be -	 *                      filled in order for an instance of the parent -	 *                      Descriptor to be valid. +	 * @param mgr            is the global Manager instance. +	 * @param parent         is a handle of the Descriptor node that has this +	 *                       FieldDescriptor. +	 * @param fieldType      is the FieldType of this FieldDescriptor, either +	 *                       TREE for the main or default structure or SUBTREE +	 *                       for supporting structures. +	 * @param name           is the name of this field. +	 * @param optional       should be set to 'false' is this field needs to be +	 *                       filled in order for an instance of the parent +	 *                       Descriptor to be valid. +	 * @param whitespaceMode the WhitespaceMode to be used when an instance of +	 *                       this FieldDescriptor is parsed.  	 */  	FieldDescriptor(Manager &mgr, Handle<Descriptor> parent = nullptr,  	                FieldType fieldType = FieldType::TREE, -	                std::string name = "", bool optional = false); +	                std::string name = "", bool optional = false, +	                WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);  	/**  	 * Returns a const reference to the NodeVector of StructuredClasses whose @@ -437,6 +449,109 @@ public:  	 *         children of an instance of this Descriptor.  	 */  	NodeVector<FieldDescriptor> getDefaultFields() const; + +	/** +	 * Returns a pointer to the start TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor starts. +	 * +	 * Note that this does not invalidate the FieldDescriptor. So use with +	 * care. +	 * +	 * @return a pointer to the start TokenDescriptor. +	 */ +	TokenDescriptor *getStartTokenPointer() { return &startToken; } + +	/** +	 * Returns a copy of the start TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor starts. +	 * +	 * @return a copy of the start TokenDescriptor. +	 */ +	TokenDescriptor getStartToken() const { return startToken; } + +	/** +	 * Sets the start TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor starts. +	 * +	 * @param st the new start TokenDescriptor. +	 */ +	void setStartToken(TokenDescriptor st) +	{ +		invalidate(); +		startToken = st; +	} + +	/** +	 * Returns a pointer to the end TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor ends. +	 * +	 * @return a pointer to the end TokenDescriptor. +	 */ +	TokenDescriptor *getEndTokenPointer() { return &endToken; } + +	/** +	 * Returns a copy of the end TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor ends. +	 * +	 * @return a copy of the end TokenDescriptor. +	 */ +	TokenDescriptor getEndToken() const { return endToken; } + +	/** +	 * Sets the end TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor ends. +	 * +	 * @param e the new end TokenDescriptor. +	 */ +	void setEndToken(TokenDescriptor e) +	{ +		invalidate(); +		endToken = e; +	} + +	/** +	 * Returns the WhitespaceMode to be used when an instance of this +	 * FieldDescriptor is parsed. +	 * +	 * @return the WhitespaceMode to be used when an instance of this +	 * FieldDescriptor is parsed. +	 */ +	WhitespaceMode getWhitespaceMode() const { return whitespaceMode; } + +	/** +	 * Sets the WhitespaceMode to be used when an instance of this +	 * FieldDescriptor is parsed. +	 * +	 * @param wm the WhitespaceMode to be used when an instance of this +	 * FieldDescriptor is parsed. +	 */ +	WhitespaceMode setWhitespaceMode(WhitespaceMode wm) +	{ +		return whitespaceMode = wm; +	} + +	/** +	 * Returns the SyntaxDescriptor for this FieldDescriptor. +	 * +	 * @return the SyntaxDescriptor for this FieldDescriptor. +	 */ +	SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) +	{ +		SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, +		                     const_cast<FieldDescriptor *>(this), depth}; +		return stx; +	} + +	/** +	 * Returns a vector of SyntaxDescriptors, one for each Descriptor +	 * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is +	 * permitted as child of this FieldDescriptor. This also makes use +	 * of transparency. +	 * +	 * @return a vector of SyntaxDescriptors, one for each Descriptor that is +	 *         permitted as child of this FieldDescriptor +	 */ +	std::vector<SyntaxDescriptor> getPermittedTokens() const;  };  /** @@ -460,7 +575,10 @@ public:   * </A>   * \endcode   * - * key="value" inside the A-node would be an attribute, while <key>value</key> + * key="value" inside the A-node would be an attribute, while + * \code{.xml} + *   <key>value</key> + * \endcode   * would be a primitive field. While equivalent in XML the semantics are   * different: An attribute describes indeed attributes, features of one single   * node whereas a primitive field describes the _content_ of a node. @@ -472,6 +590,8 @@ class Descriptor : public Node {  private:  	Owned<StructType> attributesDescriptor;  	NodeVector<FieldDescriptor> fieldDescriptors; +	TokenDescriptor startToken; +	TokenDescriptor endToken;  	bool addAndSortFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger); @@ -720,6 +840,85 @@ public:  	 *         of an instance of this Descriptor in the structure tree.  	 */  	NodeVector<StructuredClass> getPermittedChildren() const; + +	/** +	 * Returns a pointer to the start TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor starts. +	 * +	 * @return a pointer to the start TokenDescriptor. +	 */ +	TokenDescriptor *getStartTokenPointer() { return &startToken; } + +	/** +	 * Returns a copy of the start TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor starts. +	 * +	 * @return a copy of the start TokenDescriptor. +	 */ +	TokenDescriptor getStartToken() const { return startToken; } + +	/** +	 * Sets the start TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor starts. +	 * +	 * @param st the new start TokenDescriptor. +	 */ +	void setStartToken(TokenDescriptor st) +	{ +		invalidate(); +		startToken = st; +	} + +	/** +	 * Returns a pointer to the end TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor ends. +	 * +	 * @return a pointer to the end TokenDescriptor. +	 */ +	TokenDescriptor *getEndTokenPointer() { return &endToken; } + +	/** +	 * Returns a copy of the end TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor ends. +	 * +	 * @return a copy of the end TokenDescriptor. +	 */ +	TokenDescriptor getEndToken() const { return endToken; } + +	/** +	 * Sets the end TokenDescriptor. This Token is used as a +	 * signifier during parsing that an instance of this FieldDescriptor ends. +	 * +	 * @param e the new end TokenDescriptor. +	 */ +	void setEndToken(TokenDescriptor e) +	{ +		invalidate(); +		endToken = e; +	} + +	/** +	 * Returns the SyntaxDescriptor for this Descriptor. +	 * +	 * @return the SyntaxDescriptor for this Descriptor. +	 */ +	virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) +	{ +		SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, +		                     const_cast<Descriptor *>(this), depth}; +		return stx; +	} + +	/** +	 * Returns a vector of SyntaxDescriptors, one for each Descriptor +	 * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is +	 * permitted as child of this Descriptor. This also makes use +	 * of transparency. +	 * +	 * @return a vector of SyntaxDescriptors, one for each Descriptor that is +	 *         permitted as child of this Descriptor. +	 */ +	std::vector<SyntaxDescriptor> getPermittedTokens() const;  };  /*   * TODO: We should discuss Cardinalities one more time. Is it smart to define @@ -806,6 +1005,7 @@ private:  	NodeVector<StructuredClass> subclasses;  	bool transparent;  	bool root; +	TokenDescriptor shortToken;  	/**  	 * Helper method for getFieldDescriptors. @@ -963,6 +1163,50 @@ public:  		invalidate();  		root = std::move(r);  	} + +	/** +	 * Returns a pointer to the short TokenDescriptor. During parsing an +	 * occurence of this token will be translated to an empty instance of this +	 * StructuredClass. +	 * +	 * @return a pointer to the short TokenDescriptor. +	 */ +	TokenDescriptor *getShortTokenPointer() { return &shortToken; } + +	/** +	 * Returns a copy of the short TokenDescriptor. During parsing an +	 * occurence of this token will be translated to an empty instance of this +	 * StructuredClass. +	 * +	 * @return a copy of the short TokenDescriptor. +	 */ +	TokenDescriptor getShortToken() const { return shortToken; } + +	/** +	 * Sets the short TokenDescriptor. During parsing an +	 * occurence of this token will be translated to an empty instance of this +	 * StructuredClass. +	 * +	 * @param s the new short TokenDescriptor. +	 */ +	void setShortToken(TokenDescriptor s) +	{ +		invalidate(); +		shortToken = s; +	} + +	/** +	 * Returns the SyntaxDescriptor for this StructuredClass. +	 * +	 * @return the SyntaxDescriptor for this StructuredClass. +	 */ +	SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override +	{ +		SyntaxDescriptor stx{getStartToken().id, getEndToken().id, +		                     shortToken.id, const_cast<StructuredClass *>(this), +		                     depth}; +		return stx; +	}  };  /** @@ -1188,6 +1432,13 @@ public:  	{  		domains.insert(domains.end(), ds.begin(), ds.end());  	} + +	/** +	 * Returns all TokenDescriptors of classes and fields in this Ontology. +	 * +	 * @return all TokenDescriptors of classes and fields in this Ontology. +	 */ +	std::vector<TokenDescriptor *> getAllTokenDescriptors() const;  };  namespace RttiTypes { @@ -1200,4 +1451,4 @@ extern const Rtti Domain;  }  } -#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */ +#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
\ No newline at end of file diff --git a/src/core/model/Syntax.cpp b/src/core/model/Syntax.cpp new file mode 100644 index 0000000..9dbaccc --- /dev/null +++ b/src/core/model/Syntax.cpp @@ -0,0 +1,58 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "Syntax.hpp" + +#include "Domain.hpp" + +namespace ousia { + +/* Class TokenSyntaxDescriptor */ + +bool SyntaxDescriptor::isAnnotation() const +{ +	return descriptor->isa(&RttiTypes::AnnotationClass); +} +bool SyntaxDescriptor::isFieldDescriptor() const +{ +	return descriptor->isa(&RttiTypes::FieldDescriptor); +} +bool SyntaxDescriptor::isStruct() const +{ +	return descriptor->isa(&RttiTypes::StructuredClass); +} + +void SyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const +{ +	if (start != Tokens::Empty) { +		set.insert(start); +	} +	if (end != Tokens::Empty) { +		set.insert(end); +	} +	if (shortForm != Tokens::Empty) { +		set.insert(shortForm); +	} +} + +bool SyntaxDescriptor::isEmpty() const +{ +	return start == Tokens::Empty && end == Tokens::Empty && +	       shortForm == Tokens::Empty; +} +}
\ No newline at end of file diff --git a/src/core/model/Syntax.hpp b/src/core/model/Syntax.hpp new file mode 100644 index 0000000..4da3408 --- /dev/null +++ b/src/core/model/Syntax.hpp @@ -0,0 +1,196 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Syntax.hpp + * + * This header contains the Descriptor classes for user definable syntax for + * Document entities or fields. These classes are referenced in Ontology.hpp. + */ + +#ifndef _OUSIA_MODEL_SYNTAX_HPP_ +#define _OUSIA_MODEL_SYNTAX_HPP_ + +#include <core/common/Token.hpp> +#include "Node.hpp" + +namespace ousia { + +/** + * Class to describe a single token that shall be used as user-defined syntax. + */ +struct TokenDescriptor { +	/** +	 * The string content of this token, if it is not a special one. +	 */ +	std::string token; +	/** +	 * A flag to be set true if this TokenDescriptor uses a special token. +	 */ +	bool special; +	/** +	 * An id to uniquely identify this token. +	 */ +	TokenId id; + +	/** +	 * Constructor for non-special tokens. The special flag is set to false and +	 * the id to Tokens::Empty. +	 * +	 * @param token The string content of this token, if it is not a special +	 *              one. +	 */ +	TokenDescriptor(std::string token = std::string()) +	    : token(std::move(token)), special(false), id(Tokens::Empty) +	{ +	} + +	/** +	 * Constructor for special tokens. The token is set to an empty string and +	 * the special flag to true. +	 * +	 * @param id the id of the special token. +	 */ +	TokenDescriptor(TokenId id) : special(true), id(id) {} + +	/** +	 * Returns true if and only if neither a string nor an ID is given. +	 * +	 * @return true if and only if neither a string nor an ID is given. +	 */ +	bool isEmpty() const { return token.empty() && id == Tokens::Empty; } +}; + +/** + * Class describing the user defined syntax for a StructuredClass, + * AnnotationClass or FieldDescriptor. + * + * This class is used during parsing of a Document. It is used to describe + * the tokens relevant for one Descriptor that could be created at this point + * during parsing. + */ +struct SyntaxDescriptor { +	/** +	 * Possible start token or Tokens::Empty if no token is set. +	 */ +	TokenId start; + +	/** +	 * Possible end token or Tokens::Empty if no token is set. +	 */ +	TokenId end; + +	/** +	 * Possible representation token or Tokens::Empty if no token is set. +	 */ +	TokenId shortForm; + +	/* +	 * The Descriptor this SyntaxDescriptor belongs to. As this may be +	 * a FieldDescriptor as well as a class Descriptor (StructuredClass or +	 * AnnotationClass) we can only use the class Node as inner argument here. +	 */ +	Rooted<Node> descriptor; +	/* +	 * Given the current leaf in the parsed document the depth of a +	 * SyntaxDescriptor is defined as the number of transparent elements that +	 * would be needed to construct an instance of the referenced descriptor. +	 */ +	ssize_t depth; + +	/** +	 * Default constructor, sets all token ids to Tokens::Empty and the +	 * descriptor handle to nullptr. +	 */ +	SyntaxDescriptor() +	    : start(Tokens::Empty), +	      end(Tokens::Empty), +	      shortForm(Tokens::Empty), +	      descriptor(nullptr), +	      depth(-1) +	{ +	} + +	/** +	 * Member initializer constructor. +	 * +	 * @param start is a possible start token. +	 * @param end is a possible end token. +	 * @param shortForm is a possible short form token. +	 * @param descriptor The Descriptor this SyntaxDescriptor belongs to. +	 * @param depth Given the current leaf in the parsed document the depth of a +	 * SyntaxDescriptor is defined as the number of transparent elements that +	 * would be needed to construct an instance of the referenced descriptor. +	 */ +	SyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm, +	                 Handle<Node> descriptor, ssize_t depth) +	    : start(start), +	      end(end), +	      shortForm(shortForm), +	      descriptor(descriptor), +	      depth(depth) +	{ +	} + +	/** +	 * Inserts all tokens referenced in this SyntaxDescriptor into the +	 * given TokenSet. Skips token ids set to Tokens::Empty. +	 * +	 * @param set is the TokenSet instance into which the Tokens should be +	 * inserted. +	 */ +	void insertIntoTokenSet(TokenSet &set) const; + +	/** +	 * Returns true if and only if this SyntaxDescriptor belongs to an +	 * AnnotationClass. +	 * +	 * @return true if and only if this SyntaxDescriptor belongs to an +	 * AnnotationClass. +	 */ +	bool isAnnotation() const; + +	/** +	 * Returns true if and only if this SyntaxDescriptor belongs to a +	 * StrcturedClass. +	 * +	 * @return true if and only if this SyntaxDescriptor belongs to a +	 * StrcturedClass. +	 */ +	bool isStruct() const; + +	/** +	 * Returns true if and only if this SyntaxDescriptor belongs to a +	 * FieldDescriptor. +	 * +	 * @return true if and only if this SyntaxDescriptor belongs to a +	 * FieldDescriptor. +	 */ +	bool isFieldDescriptor() const; + +	/** +	 * Returns true if and only if this SyntaxDescriptor has only empty +	 * entries in start, end and short. +	 * +	 * @return true if and only if this SyntaxDescriptor has only empty +	 * entries in start, end and short. +	 */ +	bool isEmpty() const; +}; +} +#endif
\ No newline at end of file  | 
