From 522580cfdfc9e6dc3448240448c29533e68f240f Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:52:34 +0100
Subject: added check for witespace characters in Utils::isUserDefinedToken

---
 test/core/common/UtilsTest.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'test')
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 54890ee..2aaa430 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -148,6 +148,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_TRUE(Utils::isUserDefinedToken("`"));
 	EXPECT_TRUE(Utils::isUserDefinedToken("<"));
 	EXPECT_TRUE(Utils::isUserDefinedToken(">"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<+>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:a"));
 	EXPECT_FALSE(Utils::isUserDefinedToken(":a"));
@@ -158,6 +159,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_FALSE(Utils::isUserDefinedToken("<\\"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("\\>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("{!"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("< + >"));
 }
 
 }
-- 
cgit v1.2.3


From ee943c5e9b60cf577ff236a694df180db89b0972 Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:53:20 +0100
Subject: integrated syntax tokens in Domain.

---
 src/core/model/Domain.cpp      | 193 +++++++++++++++++++++++---
 src/core/model/Domain.hpp      | 297 +++++++++++++++++++++++++++++++++++++----
 test/core/model/DomainTest.cpp | 165 ++++++++++++++++++++++-
 3 files changed, 607 insertions(+), 48 deletions(-)

(limited to 'test')

diff --git a/src/core/model/Domain.cpp b/src/core/model/Domain.cpp
index 8255401..587a382 100644
--- a/src/core/model/Domain.cpp
+++ b/src/core/model/Domain.cpp
@@ -20,8 +20,9 @@
 #include <queue>
 #include <set>
 
-#include <core/common/RttiBuilder.hpp>
 #include <core/common/Exceptions.hpp>
+#include <core/common/RttiBuilder.hpp>
+#include <core/common/Utils.hpp>
 
 #include "Domain.hpp"
 
@@ -169,52 +170,60 @@ static NodeVector<Node> pathTo(const Node *start, Logger &logger,
 	return shortest;
 }
 
+struct CollectState {
+	Node *n;
+	size_t depth;
+
+	CollectState(Node *n, size_t depth) : n(n), depth(depth) {}
+};
+
 template <typename F>
 static NodeVector<Node> collect(const Node *start, F match)
 {
 	// result
 	NodeVector<Node> res;
 	// queue for breadth-first search of graph.
-	std::queue<Rooted<Node>> q;
+	std::queue<CollectState> q;
 	// put the initial node on the stack.
-	q.push(const_cast<Node *>(start));
+	q.push(CollectState(const_cast<Node *>(start), 0));
 	// set of visited nodes.
 	std::unordered_set<const Node *> visited;
 	while (!q.empty()) {
-		Rooted<Node> n = q.front();
+		CollectState state = q.front();
 		q.pop();
 		// do not proceed if this node was already visited.
-		if (!visited.insert(n.get()).second) {
+		if (!visited.insert(state.n).second) {
 			continue;
 		}
 
-		if (n->isa(&RttiTypes::StructuredClass)) {
-			Rooted<StructuredClass> strct = n.cast<StructuredClass>();
+		if (state.n->isa(&RttiTypes::Descriptor)) {
+			Rooted<Descriptor> strct{static_cast<Descriptor *>(state.n)};
 
 			// look through all fields.
 			NodeVector<FieldDescriptor> fields = strct->getFieldDescriptors();
 			for (auto fd : fields) {
 				// note matches.
-				if (match(fd)) {
+				if (match(fd, state.depth)) {
 					res.push_back(fd);
 				}
 				// only continue in the TREE field.
 				if (fd->getFieldType() == FieldDescriptor::FieldType::TREE) {
-					q.push(fd);
+					q.push(CollectState(fd.get(), state.depth));
 				}
 			}
 		} else {
 			// otherwise this is a FieldDescriptor.
-			Rooted<FieldDescriptor> field = n.cast<FieldDescriptor>();
+			Rooted<FieldDescriptor> field{
+			    static_cast<FieldDescriptor *>(state.n)};
 			// and we proceed by visiting all permitted children.
 			for (auto c : field->getChildrenWithSubclasses()) {
 				// note matches.
-				if (match(c)) {
+				if (match(c, state.depth)) {
 					res.push_back(c);
 				}
 				// We only continue our search via transparent children.
 				if (c->isTransparent()) {
-					q.push(c);
+					q.push(CollectState(c.get(), state.depth + 1));
 				}
 			}
 		}
@@ -222,28 +231,59 @@ static NodeVector<Node> collect(const Node *start, F match)
 	return res;
 }
 
+static std::vector<SyntaxDescriptor> collectPermittedTokens(
+    const Node *start, Handle<Domain> domain)
+{
+	// gather SyntaxDescriptors for structure children first.
+	std::vector<SyntaxDescriptor> res;
+	collect(start, [&res](Handle<Node> n, size_t depth) {
+		SyntaxDescriptor stx;
+		if (n->isa(&RttiTypes::FieldDescriptor)) {
+			stx = n.cast<FieldDescriptor>()->getSyntaxDescriptor(depth);
+		} else {
+			stx = n.cast<Descriptor>()->getSyntaxDescriptor(depth);
+		}
+		// do not add trivial SyntaxDescriptors.
+		if (!stx.isEmpty()) {
+			res.push_back(stx);
+		}
+		return false;
+	});
+	// gather SyntaxDescriptors for AnnotationClasses.
+	for (auto a : domain->getAnnotationClasses()) {
+		SyntaxDescriptor stx = a->getSyntaxDescriptor();
+		if (!stx.isEmpty()) {
+			res.push_back(stx);
+		}
+	}
+	return res;
+}
+
 /* Class FieldDescriptor */
 
 FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Type> primitiveType,
                                  Handle<Descriptor> parent, FieldType fieldType,
-                                 std::string name, bool optional)
+                                 std::string name, bool optional,
+                                 WhitespaceMode whitespaceMode)
     : Node(mgr, std::move(name), parent),
       children(this),
       fieldType(fieldType),
       primitiveType(acquire(primitiveType)),
       optional(optional),
-      primitive(true)
+      primitive(true),
+      whitespaceMode(whitespaceMode)
 {
 }
 
 FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Descriptor> parent,
                                  FieldType fieldType, std::string name,
-                                 bool optional)
+                                 bool optional, WhitespaceMode whitespaceMode)
     : Node(mgr, std::move(name), parent),
       children(this),
       fieldType(fieldType),
       optional(optional),
-      primitive(false)
+      primitive(false),
+      whitespaceMode(whitespaceMode)
 {
 }
 
@@ -272,6 +312,25 @@ bool FieldDescriptor::doValidate(Logger &logger) const
 	} else {
 		valid = valid & validateName(logger);
 	}
+	// check start and end token.
+	if (!startToken.special && !startToken.token.empty() &&
+	    !Utils::isUserDefinedToken(startToken.token)) {
+		// TODO: Correct error message.
+		logger.error(std::string("Field \"") + getName() +
+		                 "\" has an invalid custom start token: " +
+		                 startToken.token,
+		             *this);
+		valid = false;
+	}
+	if (!endToken.special && !endToken.token.empty() &&
+	    !Utils::isUserDefinedToken(endToken.token)) {
+		// TODO: Correct error message.
+		logger.error(std::string("Field \"") + getName() +
+		                 "\" has an invalid custom end token: " +
+		                 endToken.token,
+		             *this);
+		valid = false;
+	}
 
 	// check consistency of FieldType with the rest of the FieldDescriptor.
 	if (primitive) {
@@ -325,7 +384,7 @@ bool FieldDescriptor::doValidate(Logger &logger) const
 }
 
 static void gatherSubclasses(
-    std::unordered_set<const StructuredClass *>& visited,
+    std::unordered_set<const StructuredClass *> &visited,
     NodeVector<StructuredClass> &res, Handle<StructuredClass> strct)
 {
 	// this check is to prevent cycles.
@@ -334,7 +393,7 @@ static void gatherSubclasses(
 	}
 	for (auto sub : strct->getSubclasses()) {
 		// this check is to prevent cycles.
-		if(visited.count(sub.get())){
+		if (visited.count(sub.get())) {
 			continue;
 		}
 		res.push_back(sub);
@@ -381,7 +440,7 @@ NodeVector<Node> FieldDescriptor::pathTo(Handle<FieldDescriptor> field,
 NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		if (!n->isa(&RttiTypes::FieldDescriptor)) {
 			return false;
 		}
@@ -396,6 +455,16 @@ NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const
 	return res;
 }
 
+std::vector<SyntaxDescriptor> FieldDescriptor::getPermittedTokens() const
+{
+	if (getParent() == nullptr ||
+	    getParent().cast<Descriptor>()->getParent() == nullptr) {
+		return std::vector<SyntaxDescriptor>();
+	}
+	return collectPermittedTokens(
+	    this, getParent().cast<Descriptor>()->getParent().cast<Domain>());
+}
+
 /* Class Descriptor */
 
 void Descriptor::doResolve(ResolutionState &state)
@@ -443,6 +512,25 @@ bool Descriptor::doValidate(Logger &logger) const
 		}
 		valid = valid & attributesDescriptor->validate(logger);
 	}
+
+	// check start and end token.
+	if (!startToken.special && !startToken.token.empty() &&
+	    !Utils::isUserDefinedToken(startToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom start token: " +
+		                 startToken.token,
+		             *this);
+		valid = false;
+	}
+	if (!endToken.special && !endToken.token.empty() &&
+	    !Utils::isUserDefinedToken(endToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom end token: " +
+		                 endToken.token,
+		             *this);
+		valid = false;
+	}
+
 	// check that only one FieldDescriptor is of type TREE.
 	auto fds = Descriptor::getFieldDescriptors();
 	bool hasTREE = false;
@@ -483,7 +571,7 @@ std::pair<NodeVector<Node>, bool> Descriptor::pathTo(
 NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		if (!n->isa(&RttiTypes::FieldDescriptor)) {
 			return false;
 		}
@@ -501,7 +589,7 @@ NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const
 NodeVector<StructuredClass> Descriptor::getPermittedChildren() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		return n->isa(&RttiTypes::StructuredClass);
 	});
 	NodeVector<StructuredClass> res;
@@ -669,6 +757,14 @@ std::pair<Rooted<FieldDescriptor>, bool> Descriptor::createFieldDescriptor(
 	return std::make_pair(fd, sorted);
 }
 
+std::vector<SyntaxDescriptor> Descriptor::getPermittedTokens() const
+{
+	if (getParent() == nullptr) {
+		return std::vector<SyntaxDescriptor>();
+	}
+	return collectPermittedTokens(this, getParent().cast<Domain>());
+}
+
 /* Class StructuredClass */
 
 StructuredClass::StructuredClass(Manager &mgr, std::string name,
@@ -709,6 +805,16 @@ bool StructuredClass::doValidate(Logger &logger) const
 		logger.error(cardinality.toString() + " is not a cardinality!", *this);
 		valid = false;
 	}
+
+	// check short token.
+	if (!shortToken.special && !shortToken.token.empty() &&
+	    !Utils::isUserDefinedToken(shortToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom short form token: " +
+		                 shortToken.token,
+		             *this);
+		valid = false;
+	}
 	// check the validity of this superclass.
 	if (superclass != nullptr) {
 		valid = valid & superclass->validate(logger);
@@ -961,6 +1067,51 @@ Rooted<AnnotationClass> Domain::createAnnotationClass(std::string name)
 	    new AnnotationClass(getManager(), std::move(name), this)};
 }
 
+static void gatherTokenDescriptors(
+    Handle<Descriptor> desc, std::vector<TokenDescriptor *> &res,
+    std::unordered_set<FieldDescriptor *> &visited)
+{
+	// add the TokenDescriptors for the Descriptor itself.
+	if (!desc->getStartToken().isEmpty()) {
+		res.push_back(desc->getStartTokenPointer());
+	}
+	if (!desc->getEndToken().isEmpty()) {
+		res.push_back(desc->getEndTokenPointer());
+	}
+	// add the TokenDescriptors for its FieldDescriptors.
+	for (auto fd : desc->getFieldDescriptors()) {
+		if (!visited.insert(fd.get()).second) {
+			continue;
+		}
+		if (!fd->getStartToken().isEmpty()) {
+			res.push_back(fd->getStartTokenPointer());
+		}
+		if (!fd->getEndToken().isEmpty()) {
+			res.push_back(fd->getEndTokenPointer());
+		}
+	}
+}
+
+std::vector<TokenDescriptor *> Domain::getAllTokenDescriptors() const
+{
+	std::vector<TokenDescriptor *> res;
+	// note all fields that are already visited because FieldReferences might
+	// lead to doubled fields.
+	std::unordered_set<FieldDescriptor *> visited;
+	// add the TokenDescriptors for the StructuredClasses (and their fields).
+	for (auto s : structuredClasses) {
+		if (!s->getShortToken().isEmpty()) {
+			res.push_back(s->getShortTokenPointer());
+		}
+		gatherTokenDescriptors(s, res, visited);
+	}
+	// add the TokenDescriptors for the AnnotationClasses (and their fields).
+	for (auto a : annotationClasses) {
+		gatherTokenDescriptors(a, res, visited);
+	}
+	return res;
+}
+
 /* Type registrations */
 
 namespace RttiTypes {
diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp
index 7e10d91..e984ed9 100644
--- a/src/core/model/Domain.hpp
+++ b/src/core/model/Domain.hpp
@@ -167,11 +167,13 @@
 #ifndef _OUSIA_MODEL_DOMAIN_HPP_
 #define _OUSIA_MODEL_DOMAIN_HPP_
 
+#include <core/common/Whitespace.hpp>
 #include <core/managed/ManagedContainer.hpp>
 #include <core/RangeSet.hpp>
 
 #include "Node.hpp"
 #include "RootNode.hpp"
+#include "Syntax.hpp"
 #include "Typesystem.hpp"
 
 namespace ousia {
@@ -225,6 +227,9 @@ private:
 	Owned<Type> primitiveType;
 	bool optional;
 	bool primitive;
+	TokenDescriptor startToken;
+	TokenDescriptor endToken;
+	WhitespaceMode whitespaceMode;
 
 protected:
 	bool doValidate(Logger &logger) const override;
@@ -233,39 +238,46 @@ public:
 	/**
 	 * This is the constructor for primitive fields.
 	 *
-	 * @param mgr           is the global Manager instance.
-	 * @param parent        is a handle of the Descriptor node that has this
-	 *                      FieldDescriptor.
-	 * @param primitiveType is a handle to some Type in some Typesystem of which
-	 *                      one instance is allowed to fill this field.
-	 * @param name          is the name of this field.
-	 * @param optional      should be set to 'false' is this field needs to be
-	 *                      filled in order for an instance of the parent
-	 *                      Descriptor to be valid.
+	 * @param mgr            is the global Manager instance.
+	 * @param parent         is a handle of the Descriptor node that has this
+	 *                       FieldDescriptor.
+	 * @param primitiveType  is a handle to some Type in some Typesystem of
+	 *which
+	 *                       one instance is allowed to fill this field.
+	 * @param name           is the name of this field.
+	 * @param optional       should be set to 'false' is this field needs to be
+	 *                       filled in order for an instance of the parent
+	 *                       Descriptor to be valid.
+	 * @param whitespaceMode the WhitespaceMode to be used when an instance of
+	 *                       this FieldDescriptor is parsed.
 	 */
 	FieldDescriptor(Manager &mgr, Handle<Type> primitiveType,
 	                Handle<Descriptor> parent,
 	                FieldType fieldType = FieldType::TREE,
-	                std::string name = "", bool optional = false);
+	                std::string name = "", bool optional = false,
+	                WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * This is the constructor for non-primitive fields. You have to provide
 	 * children here later on.
 	 *
-	 * @param mgr           is the global Manager instance.
-	 * @param parent        is a handle of the Descriptor node that has this
-	 *                      FieldDescriptor.
-	 * @param fieldType     is the FieldType of this FieldDescriptor, either
-	 *                      TREE for the main or default structure or SUBTREE
-	 *                      for supporting structures.
-	 * @param name          is the name of this field.
-	 * @param optional      should be set to 'false' is this field needs to be
-	 *                      filled in order for an instance of the parent
-	 *                      Descriptor to be valid.
+	 * @param mgr            is the global Manager instance.
+	 * @param parent         is a handle of the Descriptor node that has this
+	 *                       FieldDescriptor.
+	 * @param fieldType      is the FieldType of this FieldDescriptor, either
+	 *                       TREE for the main or default structure or SUBTREE
+	 *                       for supporting structures.
+	 * @param name           is the name of this field.
+	 * @param optional       should be set to 'false' is this field needs to be
+	 *                       filled in order for an instance of the parent
+	 *                       Descriptor to be valid.
+	 * @param whitespaceMode the WhitespaceMode to be used when an instance of
+	 *                       this FieldDescriptor is parsed.
 	 */
 	FieldDescriptor(Manager &mgr, Handle<Descriptor> parent = nullptr,
 	                FieldType fieldType = FieldType::TREE,
-	                std::string name = "", bool optional = false);
+	                std::string name = "", bool optional = false,
+	                WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * Returns a const reference to the NodeVector of StructuredClasses whose
@@ -437,6 +449,109 @@ public:
 	 *         children of an instance of this Descriptor.
 	 */
 	NodeVector<FieldDescriptor> getDefaultFields() const;
+
+	/**
+	 * Returns a pointer to the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * Note that this does not invalidate the FieldDescriptor. So use with
+	 * care.
+	 *
+	 * @return a pointer to the start TokenDescriptor.
+	 */
+	TokenDescriptor *getStartTokenPointer() { return &startToken; }
+
+	/**
+	 * Returns a copy of the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a copy of the start TokenDescriptor.
+	 */
+	TokenDescriptor getStartToken() const { return startToken; }
+
+	/**
+	 * Sets the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @param st the new start TokenDescriptor.
+	 */
+	void setStartToken(TokenDescriptor st)
+	{
+		invalidate();
+		startToken = st;
+	}
+
+	/**
+	 * Returns a pointer to the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a pointer to the end TokenDescriptor.
+	 */
+	TokenDescriptor *getEndTokenPointer() { return &endToken; }
+
+	/**
+	 * Returns a copy of the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a copy of the end TokenDescriptor.
+	 */
+	TokenDescriptor getEndToken() const { return endToken; }
+
+	/**
+	 * Sets the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @param e the new end TokenDescriptor.
+	 */
+	void setEndToken(TokenDescriptor e)
+	{
+		invalidate();
+		endToken = e;
+	}
+
+	/**
+	 * Returns the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 *
+	 * @return the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 */
+	WhitespaceMode getWhitespaceMode() const { return whitespaceMode; }
+
+	/**
+	 * Sets the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 *
+	 * @param wm the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 */
+	WhitespaceMode setWhitespaceMode(WhitespaceMode wm)
+	{
+		return whitespaceMode = wm;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this FieldDescriptor.
+	 *
+	 * @return the SyntaxDescriptor for this FieldDescriptor.
+	 */
+	SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1)
+	{
+		SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty,
+		                     const_cast<FieldDescriptor *>(this), depth};
+		return stx;
+	}
+
+	/**
+	 * Returns a vector of SyntaxDescriptors, one for each Descriptor
+	 * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is
+	 * permitted as child of this FieldDescriptor. This also makes use
+	 * of transparency.
+	 *
+	 * @return a vector of SyntaxDescriptors, one for each Descriptor that is
+	 *         permitted as child of this FieldDescriptor
+	 */
+	std::vector<SyntaxDescriptor> getPermittedTokens() const;
 };
 
 /**
@@ -460,7 +575,10 @@ public:
  * </A>
  * \endcode
  *
- * key="value" inside the A-node would be an attribute, while <key>value</key>
+ * key="value" inside the A-node would be an attribute, while
+ * \code{.xml}
+ *   <key>value</key>
+ * \endcode
  * would be a primitive field. While equivalent in XML the semantics are
  * different: An attribute describes indeed attributes, features of one single
  * node whereas a primitive field describes the _content_ of a node.
@@ -472,6 +590,8 @@ class Descriptor : public Node {
 private:
 	Owned<StructType> attributesDescriptor;
 	NodeVector<FieldDescriptor> fieldDescriptors;
+	TokenDescriptor startToken;
+	TokenDescriptor endToken;
 
 	bool addAndSortFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger);
 
@@ -720,6 +840,85 @@ public:
 	 *         of an instance of this Descriptor in the structure tree.
 	 */
 	NodeVector<StructuredClass> getPermittedChildren() const;
+
+	/**
+	 * Returns a pointer to the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a pointer to the start TokenDescriptor.
+	 */
+	TokenDescriptor *getStartTokenPointer() { return &startToken; }
+
+	/**
+	 * Returns a copy of the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a copy of the start TokenDescriptor.
+	 */
+	TokenDescriptor getStartToken() const { return startToken; }
+
+	/**
+	 * Sets the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @param st the new start TokenDescriptor.
+	 */
+	void setStartToken(TokenDescriptor st)
+	{
+		invalidate();
+		startToken = st;
+	}
+
+	/**
+	 * Returns a pointer to the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a pointer to the end TokenDescriptor.
+	 */
+	TokenDescriptor *getEndTokenPointer() { return &endToken; }
+
+	/**
+	 * Returns a copy of the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a copy of the end TokenDescriptor.
+	 */
+	TokenDescriptor getEndToken() const { return endToken; }
+
+	/**
+	 * Sets the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @param e the new end TokenDescriptor.
+	 */
+	void setEndToken(TokenDescriptor e)
+	{
+		invalidate();
+		endToken = e;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this Descriptor.
+	 *
+	 * @return the SyntaxDescriptor for this Descriptor.
+	 */
+	virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1)
+	{
+		SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty,
+		                     const_cast<Descriptor *>(this), depth};
+		return stx;
+	}
+
+	/**
+	 * Returns a vector of SyntaxDescriptors, one for each Descriptor
+	 * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is
+	 * permitted as child of this Descriptor. This also makes use
+	 * of transparency.
+	 *
+	 * @return a vector of SyntaxDescriptors, one for each Descriptor that is
+	 *         permitted as child of this Descriptor.
+	 */
+	std::vector<SyntaxDescriptor> getPermittedTokens() const;
 };
 /*
  * TODO: We should discuss Cardinalities one more time. Is it smart to define
@@ -806,6 +1005,7 @@ private:
 	NodeVector<StructuredClass> subclasses;
 	bool transparent;
 	bool root;
+	TokenDescriptor shortToken;
 
 	/**
 	 * Helper method for getFieldDescriptors.
@@ -963,6 +1163,50 @@ public:
 		invalidate();
 		root = std::move(r);
 	}
+
+	/**
+	 * Returns a pointer to the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @return a pointer to the short TokenDescriptor.
+	 */
+	TokenDescriptor *getShortTokenPointer() { return &shortToken; }
+
+	/**
+	 * Returns a copy of the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @return a copy of the short TokenDescriptor.
+	 */
+	TokenDescriptor getShortToken() const { return shortToken; }
+
+	/**
+	 * Sets the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @param s the new short TokenDescriptor.
+	 */
+	void setShortToken(TokenDescriptor s)
+	{
+		invalidate();
+		shortToken = s;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this StructuredClass.
+	 *
+	 * @return the SyntaxDescriptor for this StructuredClass.
+	 */
+	SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override
+	{
+		SyntaxDescriptor stx{getStartToken().id, getEndToken().id,
+		                     shortToken.id, const_cast<StructuredClass *>(this),
+		                     depth};
+		return stx;
+	}
 };
 
 /**
@@ -1188,6 +1432,13 @@ public:
 	{
 		domains.insert(domains.end(), ds.begin(), ds.end());
 	}
+
+	/**
+	 * Returns all TokenDescriptors of classes and fields in this Ontology.
+	 *
+	 * @return all TokenDescriptors of classes and fields in this Ontology.
+	 */
+	std::vector<TokenDescriptor *> getAllTokenDescriptors() const;
 };
 
 namespace RttiTypes {
@@ -1200,4 +1451,4 @@ extern const Rtti Domain;
 }
 }
 
-#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
+#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
\ No newline at end of file
diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp
index 6bbf26d..f59e745 100644
--- a/test/core/model/DomainTest.cpp
+++ b/test/core/model/DomainTest.cpp
@@ -82,9 +82,7 @@ TEST(Domain, testDomainResolving)
 }
 
 // i use this wrapper due to the strange behaviour of GTEST.
-static void assertFalse(bool b){
-	ASSERT_FALSE(b);
-}
+static void assertFalse(bool b) { ASSERT_FALSE(b); }
 
 static Rooted<FieldDescriptor> createUnsortedPrimitiveField(
     Handle<StructuredClass> strct, Handle<Type> type, Logger &logger, bool tree,
@@ -170,7 +168,6 @@ TEST(StructuredClass, getFieldDescriptors)
 	}
 }
 
-
 TEST(StructuredClass, getFieldDescriptorsCycles)
 {
 	Logger logger;
@@ -523,6 +520,91 @@ TEST(Descriptor, getPermittedChildrenCycles)
 	ASSERT_EQ(A, children[0]);
 }
 
+TEST(Descriptor, getSyntaxDescriptor)
+{
+	// build an ontology with some custom syntax.
+	Manager mgr{1};
+	Logger logger;
+	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};
+	// Construct the domain
+	Rooted<Domain> domain{new Domain(mgr, sys, "ontology")};
+	Rooted<StructuredClass> A{new StructuredClass(
+	    mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)};
+	A->setStartToken(TokenDescriptor(Tokens::Indent));
+	A->setEndToken(TokenDescriptor(Tokens::Dedent));
+	{
+		TokenDescriptor sh{"<+>"};
+		sh.id = 1;
+		A->setShortToken(sh);
+	}
+	// check the SyntaxDescriptor
+	SyntaxDescriptor stx = A->getSyntaxDescriptor();
+	ASSERT_EQ(Tokens::Indent, stx.start);
+	ASSERT_EQ(Tokens::Dedent, stx.end);
+	ASSERT_EQ(1, stx.shortForm);
+	ASSERT_EQ(A, stx.descriptor);
+	ASSERT_TRUE(stx.isStruct());
+	ASSERT_FALSE(stx.isAnnotation());
+	ASSERT_FALSE(stx.isFieldDescriptor());
+}
+
+TEST(Descriptor, getPermittedTokens)
+{
+	// build an ontology with some custom syntax.
+	Manager mgr{1};
+	Logger logger;
+	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};
+	// Construct the domain
+	Rooted<Domain> domain{new Domain(mgr, sys, "ontology")};
+	// add one StructuredClass with all tokens set.
+	Rooted<StructuredClass> A{new StructuredClass(
+	    mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)};
+	A->setStartToken(TokenDescriptor(Tokens::Indent));
+	A->setEndToken(TokenDescriptor(Tokens::Dedent));
+	{
+		TokenDescriptor sh{"<+>"};
+		sh.id = 1;
+		A->setShortToken(sh);
+	}
+	// add a field with one token set.
+	Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first;
+	A_field->setEndToken(TokenDescriptor(Tokens::Newline));
+	A_field->addChild(A);
+	// add an annotation with start and end set.
+	Rooted<AnnotationClass> A_anno = domain->createAnnotationClass("A");
+	{
+		TokenDescriptor start{"<"};
+		start.id = 7;
+		A_anno->setStartToken(start);
+	}
+	{
+		TokenDescriptor end{">"};
+		end.id = 8;
+		A_anno->setEndToken(end);
+	}
+	// add a trivial annotation, which should not be returned.
+	Rooted<AnnotationClass> B_anno = domain->createAnnotationClass("B");
+	ASSERT_TRUE(domain->validate(logger));
+
+	// check result.
+	std::vector<SyntaxDescriptor> stxs = A->getPermittedTokens();
+	ASSERT_EQ(3, stxs.size());
+	// the field should be first, because A itself should not be collected
+	// directly.
+	ASSERT_EQ(A_field, stxs[0].descriptor);
+	ASSERT_EQ(Tokens::Empty, stxs[0].start);
+	ASSERT_EQ(Tokens::Newline, stxs[0].end);
+	ASSERT_EQ(Tokens::Empty, stxs[0].shortForm);
+	ASSERT_EQ(A, stxs[1].descriptor);
+	ASSERT_EQ(Tokens::Indent, stxs[1].start);
+	ASSERT_EQ(Tokens::Dedent, stxs[1].end);
+	ASSERT_EQ(1, stxs[1].shortForm);
+	ASSERT_EQ(A_anno, stxs[2].descriptor);
+	ASSERT_EQ(7, stxs[2].start);
+	ASSERT_EQ(8, stxs[2].end);
+	ASSERT_EQ(Tokens::Empty, stxs[2].shortForm);
+}
+
 TEST(StructuredClass, isSubclassOf)
 {
 	// create an inheritance hierarchy.
@@ -629,6 +711,14 @@ TEST(Domain, validate)
 		base_field->setPrimitiveType(sys->getStringType());
 		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
 		ASSERT_TRUE(domain->validate(logger));
+		// add an invalid start token.
+		base_field->setStartToken(TokenDescriptor("< + >"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_FALSE(domain->validate(logger));
+		// make it valid.
+		base_field->setStartToken(TokenDescriptor("<"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_TRUE(domain->validate(logger));
 		// add a subclass for our base class.
 		Rooted<StructuredClass> sub{new StructuredClass(mgr, "sub", domain)};
 		// this should be valid in itself.
@@ -686,4 +776,71 @@ TEST(Domain, validate)
 		ASSERT_TRUE(domain->validate(logger));
 	}
 }
+
+TEST(Domain, getAllTokenDescriptors)
+{
+	// build an ontology with some custom syntax.
+	Manager mgr{1};
+	Logger logger;
+	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};
+	// Construct the domain
+	Rooted<Domain> domain{new Domain(mgr, sys, "ontology")};
+	// add one StructuredClass with all tokens set.
+	Rooted<StructuredClass> A{new StructuredClass(
+	    mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)};
+	A->setStartToken(TokenDescriptor(Tokens::Indent));
+	A->setEndToken(TokenDescriptor(Tokens::Dedent));
+	{
+		TokenDescriptor sh{"<+>"};
+		sh.id = 1;
+		A->setShortToken(sh);
+	}
+	// add a field with one token set.
+	Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first;
+	A_field->setEndToken(TokenDescriptor(Tokens::Newline));
+	A_field->addChild(A);
+	// add an annotation with start and end set.
+	Rooted<AnnotationClass> A_anno = domain->createAnnotationClass("A");
+	{
+		TokenDescriptor start{"<"};
+		start.id = 7;
+		A_anno->setStartToken(start);
+	}
+	{
+		TokenDescriptor end{">"};
+		end.id = 8;
+		A_anno->setEndToken(end);
+	}
+	// add a trivial annotation, which should not be returned.
+	Rooted<AnnotationClass> B_anno = domain->createAnnotationClass("B");
+	ASSERT_TRUE(domain->validate(logger));
+
+	// check the result.
+	std::vector<TokenDescriptor *> tks = domain->getAllTokenDescriptors();
+
+	// A short token
+	ASSERT_EQ("<+>", tks[0]->token);
+	ASSERT_EQ(1, tks[0]->id);
+	ASSERT_FALSE(tks[0]->special);
+	// A start token
+	ASSERT_EQ("", tks[1]->token);
+	ASSERT_EQ(Tokens::Indent, tks[1]->id);
+	ASSERT_TRUE(tks[1]->special);
+	// A end token
+	ASSERT_EQ("", tks[2]->token);
+	ASSERT_EQ(Tokens::Dedent, tks[2]->id);
+	ASSERT_TRUE(tks[2]->special);
+	// A field end token
+	ASSERT_EQ("", tks[3]->token);
+	ASSERT_EQ(Tokens::Newline, tks[3]->id);
+	ASSERT_TRUE(tks[3]->special);
+	// A anno start token
+	ASSERT_EQ("<", tks[4]->token);
+	ASSERT_EQ(7, tks[4]->id);
+	ASSERT_FALSE(tks[4]->special);
+	// A anno end token
+	ASSERT_EQ(">", tks[5]->token);
+	ASSERT_EQ(8, tks[5]->id);
+	ASSERT_FALSE(tks[5]->special);
+}
 }
\ No newline at end of file
-- 
cgit v1.2.3


From 4b5f37d07e4e691848b243ae795bb59893a6379c Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:55:41 +0100
Subject: added another domain test case for invalid syntax tokens.

---
 test/core/model/DomainTest.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'test')

diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp
index f59e745..b3c5771 100644
--- a/test/core/model/DomainTest.cpp
+++ b/test/core/model/DomainTest.cpp
@@ -701,6 +701,14 @@ TEST(Domain, validate)
 		base->setName("myClass");
 		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
 		ASSERT_TRUE(domain->validate(logger));
+		// add an invalid short token.
+		base->setShortToken(TokenDescriptor("bla"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_FALSE(domain->validate(logger));
+		// make it valid.
+		base->setShortToken(TokenDescriptor("!bla!"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_TRUE(domain->validate(logger));
 		// Let's add a primitive field (without a primitive type at first)
 		Rooted<FieldDescriptor> base_field =
 		    base->createPrimitiveFieldDescriptor(nullptr, logger).first;
-- 
cgit v1.2.3