From 5d6ee07995c7f59e66e0df558c8ebe7d2a8d1f68 Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:52:13 +0100
Subject: refactored SyntaxDescriptor to Token.hpp and added TokenDescriptor
 class.

---
 CMakeLists.txt                       |   1 +
 src/core/common/Token.cpp            |  14 ---
 src/core/common/Token.hpp            |  67 +-----------
 src/core/model/Syntax.cpp            |  58 +++++++++++
 src/core/model/Syntax.hpp            | 196 +++++++++++++++++++++++++++++++++++
 src/core/parser/stack/Callbacks.hpp  |   3 +-
 src/core/parser/stack/Handler.cpp    |   2 +-
 src/core/parser/stack/Handler.hpp    |   3 +-
 src/core/parser/stack/TokenStack.cpp |   4 +-
 src/core/parser/stack/TokenStack.hpp |   5 +-
 10 files changed, 266 insertions(+), 87 deletions(-)
 create mode 100644 src/core/model/Syntax.cpp
 create mode 100644 src/core/model/Syntax.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b206458..13de9ac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,6 +176,7 @@ ADD_LIBRARY(ousia_core
 	src/core/model/Project
 	src/core/model/RootNode
 	src/core/model/Style
+	src/core/model/Syntax
 	src/core/model/Typesystem
 	src/core/parser/Parser
 	src/core/parser/ParserContext
diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp
index e454ae4..17ce03e 100644
--- a/src/core/common/Token.cpp
+++ b/src/core/common/Token.cpp
@@ -20,19 +20,5 @@
 
 namespace ousia {
 
-/* Class TokenSyntaxDescriptor */
-
-void TokenSyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const
-{
-	if (start != Tokens::Empty) {
-		set.insert(start);
-	}
-	if (end != Tokens::Empty) {
-		set.insert(end);
-	}
-	if (shortForm != Tokens::Empty) {
-		set.insert(shortForm);
-	}
-}
 }
 
diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
index f89a0ce..f37151f 100644
--- a/src/core/common/Token.hpp
+++ b/src/core/common/Token.hpp
@@ -173,71 +173,6 @@ struct Token {
 	const SourceLocation &getLocation() const { return location; }
 };
 
-/**
- * Class describing the user defined syntax for a single field or annotation.
- */
-struct TokenSyntaxDescriptor {
-	/**
-	 * Possible start token or Tokens::Empty if no token is set.
-	 */
-	TokenId start;
-
-	/**
-	 * Possible end token or Tokens::Empty if no token is set.
-	 */
-	TokenId end;
-
-	/**
-	 * Possible representation token or Tokens::Empty if no token is set.
-	 */
-	TokenId shortForm;
-
-	/**
-	 * Flag specifying whether this TokenSyntaxDescriptor describes an
-	 * annotation.
-	 */
-	bool isAnnotation;
-
-	/**
-	 * Default constructor, sets all token ids to Tokens::Empty and isAnnotation
-	 * to false.
-	 */
-	TokenSyntaxDescriptor()
-	    : start(Tokens::Empty),
-	      end(Tokens::Empty),
-	      shortForm(Tokens::Empty),
-	      isAnnotation(false)
-	{
-	}
-
-	/**
-	 * Member initializer constructor.
-	 *
-	 * @param start is a possible start token.
-	 * @param end is a possible end token.
-	 * @param shortForm is a possible short form token.
-	 * @param isAnnotation is set to true if this syntax descriptor describes an
-	 * annotation.
-	 */
-	TokenSyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm,
-	                      bool isAnnotation)
-	    : start(start),
-	      end(end),
-	      shortForm(shortForm),
-	      isAnnotation(isAnnotation)
-	{
-	}
-
-	/**
-	 * Inserts all tokens referenced in this TokenSyntaxDescriptor into the
-	 * given TokenSet. Skips token ids set to Tokens::Empty.
-	 *
-	 * @param set is the TokenSet instance into which the Tokens should be
-	 * inserted.
-	 */
-	void insertIntoTokenSet(TokenSet &set) const;
-};
 }
 
-#endif /* _OUSIA_TOKENS_HPP_ */
-
+#endif /* _OUSIA_TOKENS_HPP_ */
\ No newline at end of file
diff --git a/src/core/model/Syntax.cpp b/src/core/model/Syntax.cpp
new file mode 100644
index 0000000..9dbaccc
--- /dev/null
+++ b/src/core/model/Syntax.cpp
@@ -0,0 +1,58 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Syntax.hpp"
+
+#include "Domain.hpp"
+
+namespace ousia {
+
+/* Class TokenSyntaxDescriptor */
+
+bool SyntaxDescriptor::isAnnotation() const
+{
+	return descriptor->isa(&RttiTypes::AnnotationClass);
+}
+bool SyntaxDescriptor::isFieldDescriptor() const
+{
+	return descriptor->isa(&RttiTypes::FieldDescriptor);
+}
+bool SyntaxDescriptor::isStruct() const
+{
+	return descriptor->isa(&RttiTypes::StructuredClass);
+}
+
+void SyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const
+{
+	if (start != Tokens::Empty) {
+		set.insert(start);
+	}
+	if (end != Tokens::Empty) {
+		set.insert(end);
+	}
+	if (shortForm != Tokens::Empty) {
+		set.insert(shortForm);
+	}
+}
+
+bool SyntaxDescriptor::isEmpty() const
+{
+	return start == Tokens::Empty && end == Tokens::Empty &&
+	       shortForm == Tokens::Empty;
+}
+}
\ No newline at end of file
diff --git a/src/core/model/Syntax.hpp b/src/core/model/Syntax.hpp
new file mode 100644
index 0000000..4da3408
--- /dev/null
+++ b/src/core/model/Syntax.hpp
@@ -0,0 +1,196 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Syntax.hpp
+ *
+ * This header contains the Descriptor classes for user definable syntax for
+ * Document entities or fields. These classes are referenced in Ontology.hpp.
+ */
+
+#ifndef _OUSIA_MODEL_SYNTAX_HPP_
+#define _OUSIA_MODEL_SYNTAX_HPP_
+
+#include <core/common/Token.hpp>
+#include "Node.hpp"
+
+namespace ousia {
+
+/**
+ * Class to describe a single token that shall be used as user-defined syntax.
+ */
+struct TokenDescriptor {
+	/**
+	 * The string content of this token, if it is not a special one.
+	 */
+	std::string token;
+	/**
+	 * A flag to be set true if this TokenDescriptor uses a special token.
+	 */
+	bool special;
+	/**
+	 * An id to uniquely identify this token.
+	 */
+	TokenId id;
+
+	/**
+	 * Constructor for non-special tokens. The special flag is set to false and
+	 * the id to Tokens::Empty.
+	 *
+	 * @param token The string content of this token, if it is not a special
+	 *              one.
+	 */
+	TokenDescriptor(std::string token = std::string())
+	    : token(std::move(token)), special(false), id(Tokens::Empty)
+	{
+	}
+
+	/**
+	 * Constructor for special tokens. The token is set to an empty string and
+	 * the special flag to true.
+	 *
+	 * @param id the id of the special token.
+	 */
+	TokenDescriptor(TokenId id) : special(true), id(id) {}
+
+	/**
+	 * Returns true if and only if neither a string nor an ID is given.
+	 *
+	 * @return true if and only if neither a string nor an ID is given.
+	 */
+	bool isEmpty() const { return token.empty() && id == Tokens::Empty; }
+};
+
+/**
+ * Class describing the user defined syntax for a StructuredClass,
+ * AnnotationClass or FieldDescriptor.
+ *
+ * This class is used during parsing of a Document. It is used to describe
+ * the tokens relevant for one Descriptor that could be created at this point
+ * during parsing.
+ */
+struct SyntaxDescriptor {
+	/**
+	 * Possible start token or Tokens::Empty if no token is set.
+	 */
+	TokenId start;
+
+	/**
+	 * Possible end token or Tokens::Empty if no token is set.
+	 */
+	TokenId end;
+
+	/**
+	 * Possible representation token or Tokens::Empty if no token is set.
+	 */
+	TokenId shortForm;
+
+	/*
+	 * The Descriptor this SyntaxDescriptor belongs to. As this may be
+	 * a FieldDescriptor as well as a class Descriptor (StructuredClass or
+	 * AnnotationClass) we can only use the class Node as inner argument here.
+	 */
+	Rooted<Node> descriptor;
+	/*
+	 * Given the current leaf in the parsed document the depth of a
+	 * SyntaxDescriptor is defined as the number of transparent elements that
+	 * would be needed to construct an instance of the referenced descriptor.
+	 */
+	ssize_t depth;
+
+	/**
+	 * Default constructor, sets all token ids to Tokens::Empty and the
+	 * descriptor handle to nullptr.
+	 */
+	SyntaxDescriptor()
+	    : start(Tokens::Empty),
+	      end(Tokens::Empty),
+	      shortForm(Tokens::Empty),
+	      descriptor(nullptr),
+	      depth(-1)
+	{
+	}
+
+	/**
+	 * Member initializer constructor.
+	 *
+	 * @param start is a possible start token.
+	 * @param end is a possible end token.
+	 * @param shortForm is a possible short form token.
+	 * @param descriptor The Descriptor this SyntaxDescriptor belongs to.
+	 * @param depth Given the current leaf in the parsed document the depth of a
+	 * SyntaxDescriptor is defined as the number of transparent elements that
+	 * would be needed to construct an instance of the referenced descriptor.
+	 */
+	SyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm,
+	                 Handle<Node> descriptor, ssize_t depth)
+	    : start(start),
+	      end(end),
+	      shortForm(shortForm),
+	      descriptor(descriptor),
+	      depth(depth)
+	{
+	}
+
+	/**
+	 * Inserts all tokens referenced in this SyntaxDescriptor into the
+	 * given TokenSet. Skips token ids set to Tokens::Empty.
+	 *
+	 * @param set is the TokenSet instance into which the Tokens should be
+	 * inserted.
+	 */
+	void insertIntoTokenSet(TokenSet &set) const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to an
+	 * AnnotationClass.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to an
+	 * AnnotationClass.
+	 */
+	bool isAnnotation() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to a
+	 * StrcturedClass.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to a
+	 * StrcturedClass.
+	 */
+	bool isStruct() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to a
+	 * FieldDescriptor.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to a
+	 * FieldDescriptor.
+	 */
+	bool isFieldDescriptor() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor has only empty
+	 * entries in start, end and short.
+	 *
+	 * @return true if and only if this SyntaxDescriptor has only empty
+	 * entries in start, end and short.
+	 */
+	bool isEmpty() const;
+};
+}
+#endif
\ No newline at end of file
diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp
index d7b2547..e471881 100644
--- a/src/core/parser/stack/Callbacks.hpp
+++ b/src/core/parser/stack/Callbacks.hpp
@@ -34,6 +34,7 @@
 
 #include <core/common/Whitespace.hpp>
 #include <core/common/Token.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
 
@@ -96,7 +97,7 @@ public:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
 
 	/**
 	 * Removes the previously pushed list of tokens from the stack.
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
index 734976a..12df0fd 100644
--- a/src/core/parser/stack/Handler.cpp
+++ b/src/core/parser/stack/Handler.cpp
@@ -74,7 +74,7 @@ Variant Handler::readData()
 	return handlerData.callbacks.readData();
 }
 
-void Handler::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
+void Handler::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
 {
 	handlerData.callbacks.pushTokens(tokens);
 }
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 848d395..19660d0 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -24,6 +24,7 @@
 #include <core/common/Location.hpp>
 #include <core/common/Variant.hpp>
 #include <core/common/Whitespace.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
 
@@ -200,7 +201,7 @@ protected:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
 
 	/**
 	 * Calls the corresponding function in the HandlerCallbacks instance.
diff --git a/src/core/parser/stack/TokenStack.cpp b/src/core/parser/stack/TokenStack.cpp
index 6afeaed..ac1d94e 100644
--- a/src/core/parser/stack/TokenStack.cpp
+++ b/src/core/parser/stack/TokenStack.cpp
@@ -21,7 +21,7 @@
 namespace ousia {
 namespace parser_stack {
 
-void TokenStack::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
+void TokenStack::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
 {
 	stack.push_back(tokens);
 }
@@ -35,7 +35,7 @@ TokenSet TokenStack::tokens() const
 	}
 
 	TokenSet res;
-	for (const TokenSyntaxDescriptor &descr : stack.back()) {
+	for (const SyntaxDescriptor &descr : stack.back()) {
 		descr.insertIntoTokenSet(res);
 	}
 	return res;
diff --git a/src/core/parser/stack/TokenStack.hpp b/src/core/parser/stack/TokenStack.hpp
index 9669f50..af734bb 100644
--- a/src/core/parser/stack/TokenStack.hpp
+++ b/src/core/parser/stack/TokenStack.hpp
@@ -32,6 +32,7 @@
 #include <vector>
 
 #include <core/common/Token.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
 namespace parser_stack {
@@ -52,7 +53,7 @@ private:
 	 * Stack containing vectors of TokenSyntaxDescriptor instances as given by
 	 * the user.
 	 */
-	std::vector<std::vector<TokenSyntaxDescriptor>> stack;
+	std::vector<std::vector<SyntaxDescriptor>> stack;
 
 	/**
 	 * Constructor of the TokenStack class.
@@ -86,7 +87,7 @@ public:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
 
 	/**
 	 * Removes the previously pushed list of tokens from the stack.
-- 
cgit v1.2.3


From 522580cfdfc9e6dc3448240448c29533e68f240f Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:52:34 +0100
Subject: added check for witespace characters in Utils::isUserDefinedToken

---
 src/core/common/Utils.cpp      | 15 +++++++++++----
 src/core/common/Utils.hpp      |  1 +
 test/core/common/UtilsTest.cpp |  2 ++
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 219b437..a87ff6d 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -124,7 +124,8 @@ bool Utils::isUserDefinedToken(const std::string &token)
 	// Make sure the token meets is neither empty, nor starts or ends with an
 	// alphanumeric character
 	const size_t len = token.size();
-	if (len == 0 || isAlphanumeric(token[0]) || isAlphanumeric(token[len - 1])) {
+	if (len == 0 || isAlphanumeric(token[0]) ||
+	    isAlphanumeric(token[len - 1])) {
 		return false;
 	}
 
@@ -134,13 +135,19 @@ bool Utils::isUserDefinedToken(const std::string &token)
 		return false;
 	}
 
+	// Make sure the token does not contain any whitespaces.
+	for (char c : token) {
+		if (isWhitespace(c)) {
+			return false;
+		}
+	}
+
 	// Make sure the token contains other characters but { and }
-	for (char c: token) {
+	for (char c : token) {
 		if (c != '{' && c != '}') {
 			return true;
 		}
 	}
 	return false;
 }
-}
-
+}
\ No newline at end of file
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 25a4de5..d9e26da 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -117,6 +117,7 @@ public:
 	 *        <li>'%', '%{', '}%'</li>
 	 *      </ul>
 	 *   </li>
+	 *   <li>The token does not contain any whitespaces.</li>
 	 * </ul>
 	 */
 	static bool isUserDefinedToken(const std::string &token);
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 54890ee..2aaa430 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -148,6 +148,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_TRUE(Utils::isUserDefinedToken("`"));
 	EXPECT_TRUE(Utils::isUserDefinedToken("<"));
 	EXPECT_TRUE(Utils::isUserDefinedToken(">"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<+>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:a"));
 	EXPECT_FALSE(Utils::isUserDefinedToken(":a"));
@@ -158,6 +159,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_FALSE(Utils::isUserDefinedToken("<\\"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("\\>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("{!"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("< + >"));
 }
 
 }
-- 
cgit v1.2.3


From ee943c5e9b60cf577ff236a694df180db89b0972 Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:53:20 +0100
Subject: integrated syntax tokens in Domain.

---
 src/core/model/Domain.cpp      | 193 +++++++++++++++++++++++---
 src/core/model/Domain.hpp      | 297 +++++++++++++++++++++++++++++++++++++----
 test/core/model/DomainTest.cpp | 165 ++++++++++++++++++++++-
 3 files changed, 607 insertions(+), 48 deletions(-)

diff --git a/src/core/model/Domain.cpp b/src/core/model/Domain.cpp
index 8255401..587a382 100644
--- a/src/core/model/Domain.cpp
+++ b/src/core/model/Domain.cpp
@@ -20,8 +20,9 @@
 #include <queue>
 #include <set>
 
-#include <core/common/RttiBuilder.hpp>
 #include <core/common/Exceptions.hpp>
+#include <core/common/RttiBuilder.hpp>
+#include <core/common/Utils.hpp>
 
 #include "Domain.hpp"
 
@@ -169,52 +170,60 @@ static NodeVector<Node> pathTo(const Node *start, Logger &logger,
 	return shortest;
 }
 
+struct CollectState {
+	Node *n;
+	size_t depth;
+
+	CollectState(Node *n, size_t depth) : n(n), depth(depth) {}
+};
+
 template <typename F>
 static NodeVector<Node> collect(const Node *start, F match)
 {
 	// result
 	NodeVector<Node> res;
 	// queue for breadth-first search of graph.
-	std::queue<Rooted<Node>> q;
+	std::queue<CollectState> q;
 	// put the initial node on the stack.
-	q.push(const_cast<Node *>(start));
+	q.push(CollectState(const_cast<Node *>(start), 0));
 	// set of visited nodes.
 	std::unordered_set<const Node *> visited;
 	while (!q.empty()) {
-		Rooted<Node> n = q.front();
+		CollectState state = q.front();
 		q.pop();
 		// do not proceed if this node was already visited.
-		if (!visited.insert(n.get()).second) {
+		if (!visited.insert(state.n).second) {
 			continue;
 		}
 
-		if (n->isa(&RttiTypes::StructuredClass)) {
-			Rooted<StructuredClass> strct = n.cast<StructuredClass>();
+		if (state.n->isa(&RttiTypes::Descriptor)) {
+			Rooted<Descriptor> strct{static_cast<Descriptor *>(state.n)};
 
 			// look through all fields.
 			NodeVector<FieldDescriptor> fields = strct->getFieldDescriptors();
 			for (auto fd : fields) {
 				// note matches.
-				if (match(fd)) {
+				if (match(fd, state.depth)) {
 					res.push_back(fd);
 				}
 				// only continue in the TREE field.
 				if (fd->getFieldType() == FieldDescriptor::FieldType::TREE) {
-					q.push(fd);
+					q.push(CollectState(fd.get(), state.depth));
 				}
 			}
 		} else {
 			// otherwise this is a FieldDescriptor.
-			Rooted<FieldDescriptor> field = n.cast<FieldDescriptor>();
+			Rooted<FieldDescriptor> field{
+			    static_cast<FieldDescriptor *>(state.n)};
 			// and we proceed by visiting all permitted children.
 			for (auto c : field->getChildrenWithSubclasses()) {
 				// note matches.
-				if (match(c)) {
+				if (match(c, state.depth)) {
 					res.push_back(c);
 				}
 				// We only continue our search via transparent children.
 				if (c->isTransparent()) {
-					q.push(c);
+					q.push(CollectState(c.get(), state.depth + 1));
 				}
 			}
 		}
@@ -222,28 +231,59 @@ static NodeVector<Node> collect(const Node *start, F match)
 	return res;
 }
 
+static std::vector<SyntaxDescriptor> collectPermittedTokens(
+    const Node *start, Handle<Domain> domain)
+{
+	// gather SyntaxDescriptors for structure children first.
+	std::vector<SyntaxDescriptor> res;
+	collect(start, [&res](Handle<Node> n, size_t depth) {
+		SyntaxDescriptor stx;
+		if (n->isa(&RttiTypes::FieldDescriptor)) {
+			stx = n.cast<FieldDescriptor>()->getSyntaxDescriptor(depth);
+		} else {
+			stx = n.cast<Descriptor>()->getSyntaxDescriptor(depth);
+		}
+		// do not add trivial SyntaxDescriptors.
+		if (!stx.isEmpty()) {
+			res.push_back(stx);
+		}
+		return false;
+	});
+	// gather SyntaxDescriptors for AnnotationClasses.
+	for (auto a : domain->getAnnotationClasses()) {
+		SyntaxDescriptor stx = a->getSyntaxDescriptor();
+		if (!stx.isEmpty()) {
+			res.push_back(stx);
+		}
+	}
+	return res;
+}
+
 /* Class FieldDescriptor */
 
 FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Type> primitiveType,
                                  Handle<Descriptor> parent, FieldType fieldType,
-                                 std::string name, bool optional)
+                                 std::string name, bool optional,
+                                 WhitespaceMode whitespaceMode)
     : Node(mgr, std::move(name), parent),
       children(this),
       fieldType(fieldType),
       primitiveType(acquire(primitiveType)),
       optional(optional),
-      primitive(true)
+      primitive(true),
+      whitespaceMode(whitespaceMode)
 {
 }
 
 FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Descriptor> parent,
                                  FieldType fieldType, std::string name,
-                                 bool optional)
+                                 bool optional, WhitespaceMode whitespaceMode)
     : Node(mgr, std::move(name), parent),
       children(this),
       fieldType(fieldType),
       optional(optional),
-      primitive(false)
+      primitive(false),
+      whitespaceMode(whitespaceMode)
 {
 }
 
@@ -272,6 +312,25 @@ bool FieldDescriptor::doValidate(Logger &logger) const
 	} else {
 		valid = valid & validateName(logger);
 	}
+	// check start and end token.
+	if (!startToken.special && !startToken.token.empty() &&
+	    !Utils::isUserDefinedToken(startToken.token)) {
+		// TODO: Correct error message.
+		logger.error(std::string("Field \"") + getName() +
+		                 "\" has an invalid custom start token: " +
+		                 startToken.token,
+		             *this);
+		valid = false;
+	}
+	if (!endToken.special && !endToken.token.empty() &&
+	    !Utils::isUserDefinedToken(endToken.token)) {
+		// TODO: Correct error message.
+		logger.error(std::string("Field \"") + getName() +
+		                 "\" has an invalid custom end token: " +
+		                 endToken.token,
+		             *this);
+		valid = false;
+	}
 
 	// check consistency of FieldType with the rest of the FieldDescriptor.
 	if (primitive) {
@@ -325,7 +384,7 @@ bool FieldDescriptor::doValidate(Logger &logger) const
 }
 
 static void gatherSubclasses(
-    std::unordered_set<const StructuredClass *>& visited,
+    std::unordered_set<const StructuredClass *> &visited,
     NodeVector<StructuredClass> &res, Handle<StructuredClass> strct)
 {
 	// this check is to prevent cycles.
@@ -334,7 +393,7 @@ static void gatherSubclasses(
 	}
 	for (auto sub : strct->getSubclasses()) {
 		// this check is to prevent cycles.
-		if(visited.count(sub.get())){
+		if (visited.count(sub.get())) {
 			continue;
 		}
 		res.push_back(sub);
@@ -381,7 +440,7 @@ NodeVector<Node> FieldDescriptor::pathTo(Handle<FieldDescriptor> field,
 NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		if (!n->isa(&RttiTypes::FieldDescriptor)) {
 			return false;
 		}
@@ -396,6 +455,16 @@ NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const
 	return res;
 }
 
+std::vector<SyntaxDescriptor> FieldDescriptor::getPermittedTokens() const
+{
+	if (getParent() == nullptr ||
+	    getParent().cast<Descriptor>()->getParent() == nullptr) {
+		return std::vector<SyntaxDescriptor>();
+	}
+	return collectPermittedTokens(
+	    this, getParent().cast<Descriptor>()->getParent().cast<Domain>());
+}
+
 /* Class Descriptor */
 
 void Descriptor::doResolve(ResolutionState &state)
@@ -443,6 +512,25 @@ bool Descriptor::doValidate(Logger &logger) const
 		}
 		valid = valid & attributesDescriptor->validate(logger);
 	}
+
+	// check start and end token.
+	if (!startToken.special && !startToken.token.empty() &&
+	    !Utils::isUserDefinedToken(startToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom start token: " +
+		                 startToken.token,
+		             *this);
+		valid = false;
+	}
+	if (!endToken.special && !endToken.token.empty() &&
+	    !Utils::isUserDefinedToken(endToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom end token: " +
+		                 endToken.token,
+		             *this);
+		valid = false;
+	}
+
 	// check that only one FieldDescriptor is of type TREE.
 	auto fds = Descriptor::getFieldDescriptors();
 	bool hasTREE = false;
@@ -483,7 +571,7 @@ std::pair<NodeVector<Node>, bool> Descriptor::pathTo(
 NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		if (!n->isa(&RttiTypes::FieldDescriptor)) {
 			return false;
 		}
@@ -501,7 +589,7 @@ NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const
 NodeVector<StructuredClass> Descriptor::getPermittedChildren() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		return n->isa(&RttiTypes::StructuredClass);
 	});
 	NodeVector<StructuredClass> res;
@@ -669,6 +757,14 @@ std::pair<Rooted<FieldDescriptor>, bool> Descriptor::createFieldDescriptor(
 	return std::make_pair(fd, sorted);
 }
 
+std::vector<SyntaxDescriptor> Descriptor::getPermittedTokens() const
+{
+	if (getParent() == nullptr) {
+		return std::vector<SyntaxDescriptor>();
+	}
+	return collectPermittedTokens(this, getParent().cast<Domain>());
+}
+
 /* Class StructuredClass */
 
 StructuredClass::StructuredClass(Manager &mgr, std::string name,
@@ -709,6 +805,16 @@ bool StructuredClass::doValidate(Logger &logger) const
 		logger.error(cardinality.toString() + " is not a cardinality!", *this);
 		valid = false;
 	}
+
+	// check short token.
+	if (!shortToken.special && !shortToken.token.empty() &&
+	    !Utils::isUserDefinedToken(shortToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom short form token: " +
+		                 shortToken.token,
+		             *this);
+		valid = false;
+	}
 	// check the validity of this superclass.
 	if (superclass != nullptr) {
 		valid = valid & superclass->validate(logger);
@@ -961,6 +1067,51 @@ Rooted<AnnotationClass> Domain::createAnnotationClass(std::string name)
 	    new AnnotationClass(getManager(), std::move(name), this)};
 }
 
+static void gatherTokenDescriptors(
+    Handle<Descriptor> desc, std::vector<TokenDescriptor *> &res,
+    std::unordered_set<FieldDescriptor *> &visited)
+{
+	// add the TokenDescriptors for the Descriptor itself.
+	if (!desc->getStartToken().isEmpty()) {
+		res.push_back(desc->getStartTokenPointer());
+	}
+	if (!desc->getEndToken().isEmpty()) {
+		res.push_back(desc->getEndTokenPointer());
+	}
+	// add the TokenDescriptors for its FieldDescriptors.
+	for (auto fd : desc->getFieldDescriptors()) {
+		if (!visited.insert(fd.get()).second) {
+			continue;
+		}
+		if (!fd->getStartToken().isEmpty()) {
+			res.push_back(fd->getStartTokenPointer());
+		}
+		if (!fd->getEndToken().isEmpty()) {
+			res.push_back(fd->getEndTokenPointer());
+		}
+	}
+}
+
+std::vector<TokenDescriptor *> Domain::getAllTokenDescriptors() const
+{
+	std::vector<TokenDescriptor *> res;
+	// note all fields that are already visited because FieldReferences might
+	// lead to doubled fields.
+	std::unordered_set<FieldDescriptor *> visited;
+	// add the TokenDescriptors for the StructuredClasses (and their fields).
+	for (auto s : structuredClasses) {
+		if (!s->getShortToken().isEmpty()) {
+			res.push_back(s->getShortTokenPointer());
+		}
+		gatherTokenDescriptors(s, res, visited);
+	}
+	// add the TokenDescriptors for the AnnotationClasses (and their fields).
+	for (auto a : annotationClasses) {
+		gatherTokenDescriptors(a, res, visited);
+	}
+	return res;
+}
+
 /* Type registrations */
 
 namespace RttiTypes {
diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp
index 7e10d91..e984ed9 100644
--- a/src/core/model/Domain.hpp
+++ b/src/core/model/Domain.hpp
@@ -167,11 +167,13 @@
 #ifndef _OUSIA_MODEL_DOMAIN_HPP_
 #define _OUSIA_MODEL_DOMAIN_HPP_
 
+#include <core/common/Whitespace.hpp>
 #include <core/managed/ManagedContainer.hpp>
 #include <core/RangeSet.hpp>
 
 #include "Node.hpp"
 #include "RootNode.hpp"
+#include "Syntax.hpp"
 #include "Typesystem.hpp"
 
 namespace ousia {
@@ -225,6 +227,9 @@ private:
 	Owned<Type> primitiveType;
 	bool optional;
 	bool primitive;
+	TokenDescriptor startToken;
+	TokenDescriptor endToken;
+	WhitespaceMode whitespaceMode;
 
 protected:
 	bool doValidate(Logger &logger) const override;
@@ -233,39 +238,46 @@ public:
 	/**
 	 * This is the constructor for primitive fields.
 	 *
-	 * @param mgr           is the global Manager instance.
-	 * @param parent        is a handle of the Descriptor node that has this
-	 *                      FieldDescriptor.
-	 * @param primitiveType is a handle to some Type in some Typesystem of which
-	 *                      one instance is allowed to fill this field.
-	 * @param name          is the name of this field.
-	 * @param optional      should be set to 'false' is this field needs to be
-	 *                      filled in order for an instance of the parent
-	 *                      Descriptor to be valid.
+	 * @param mgr            is the global Manager instance.
+	 * @param parent         is a handle of the Descriptor node that has this
+	 *                       FieldDescriptor.
+	 * @param primitiveType  is a handle to some Type in some Typesystem of
+	 *which
+	 *                       one instance is allowed to fill this field.
+	 * @param name           is the name of this field.
+	 * @param optional       should be set to 'false' is this field needs to be
+	 *                       filled in order for an instance of the parent
+	 *                       Descriptor to be valid.
+	 * @param whitespaceMode the WhitespaceMode to be used when an instance of
+	 *                       this FieldDescriptor is parsed.
 	 */
 	FieldDescriptor(Manager &mgr, Handle<Type> primitiveType,
 	                Handle<Descriptor> parent,
 	                FieldType fieldType = FieldType::TREE,
-	                std::string name = "", bool optional = false);
+	                std::string name = "", bool optional = false,
+	                WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * This is the constructor for non-primitive fields. You have to provide
 	 * children here later on.
 	 *
-	 * @param mgr           is the global Manager instance.
-	 * @param parent        is a handle of the Descriptor node that has this
-	 *                      FieldDescriptor.
-	 * @param fieldType     is the FieldType of this FieldDescriptor, either
-	 *                      TREE for the main or default structure or SUBTREE
-	 *                      for supporting structures.
-	 * @param name          is the name of this field.
-	 * @param optional      should be set to 'false' is this field needs to be
-	 *                      filled in order for an instance of the parent
-	 *                      Descriptor to be valid.
+	 * @param mgr            is the global Manager instance.
+	 * @param parent         is a handle of the Descriptor node that has this
+	 *                       FieldDescriptor.
+	 * @param fieldType      is the FieldType of this FieldDescriptor, either
+	 *                       TREE for the main or default structure or SUBTREE
+	 *                       for supporting structures.
+	 * @param name           is the name of this field.
+	 * @param optional       should be set to 'false' is this field needs to be
+	 *                       filled in order for an instance of the parent
+	 *                       Descriptor to be valid.
+	 * @param whitespaceMode the WhitespaceMode to be used when an instance of
+	 *                       this FieldDescriptor is parsed.
 	 */
 	FieldDescriptor(Manager &mgr, Handle<Descriptor> parent = nullptr,
 	                FieldType fieldType = FieldType::TREE,
-	                std::string name = "", bool optional = false);
+	                std::string name = "", bool optional = false,
+	                WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * Returns a const reference to the NodeVector of StructuredClasses whose
@@ -437,6 +449,109 @@ public:
 	 *         children of an instance of this Descriptor.
 	 */
 	NodeVector<FieldDescriptor> getDefaultFields() const;
+
+	/**
+	 * Returns a pointer to the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * Note that this does not invalidate the FieldDescriptor. So use with
+	 * care.
+	 *
+	 * @return a pointer to the start TokenDescriptor.
+	 */
+	TokenDescriptor *getStartTokenPointer() { return &startToken; }
+
+	/**
+	 * Returns a copy of the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a copy of the start TokenDescriptor.
+	 */
+	TokenDescriptor getStartToken() const { return startToken; }
+
+	/**
+	 * Sets the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @param st the new start TokenDescriptor.
+	 */
+	void setStartToken(TokenDescriptor st)
+	{
+		invalidate();
+		startToken = st;
+	}
+
+	/**
+	 * Returns a pointer to the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a pointer to the end TokenDescriptor.
+	 */
+	TokenDescriptor *getEndTokenPointer() { return &endToken; }
+
+	/**
+	 * Returns a copy of the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a copy of the end TokenDescriptor.
+	 */
+	TokenDescriptor getEndToken() const { return endToken; }
+
+	/**
+	 * Sets the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @param e the new end TokenDescriptor.
+	 */
+	void setEndToken(TokenDescriptor e)
+	{
+		invalidate();
+		endToken = e;
+	}
+
+	/**
+	 * Returns the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 *
+	 * @return the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 */
+	WhitespaceMode getWhitespaceMode() const { return whitespaceMode; }
+
+	/**
+	 * Sets the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 *
+	 * @param wm the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 */
+	WhitespaceMode setWhitespaceMode(WhitespaceMode wm)
+	{
+		return whitespaceMode = wm;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this FieldDescriptor.
+	 *
+	 * @return the SyntaxDescriptor for this FieldDescriptor.
+	 */
+	SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1)
+	{
+		SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty,
+		                     const_cast<FieldDescriptor *>(this), depth};
+		return stx;
+	}
+
+	/**
+	 * Returns a vector of SyntaxDescriptors, one for each Descriptor
+	 * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is
+	 * permitted as child of this FieldDescriptor. This also makes use
+	 * of transparency.
+	 *
+	 * @return a vector of SyntaxDescriptors, one for each Descriptor that is
+	 *         permitted as child of this FieldDescriptor
+	 */
+	std::vector<SyntaxDescriptor> getPermittedTokens() const;
 };
 
 /**
@@ -460,7 +575,10 @@ public:
  * </A>
  * \endcode
  *
- * key="value" inside the A-node would be an attribute, while <key>value</key>
+ * key="value" inside the A-node would be an attribute, while
+ * \code{.xml}
+ *   <key>value</key>
+ * \endcode
  * would be a primitive field. While equivalent in XML the semantics are
  * different: An attribute describes indeed attributes, features of one single
  * node whereas a primitive field describes the _content_ of a node.
@@ -472,6 +590,8 @@ class Descriptor : public Node {
 private:
 	Owned<StructType> attributesDescriptor;
 	NodeVector<FieldDescriptor> fieldDescriptors;
+	TokenDescriptor startToken;
+	TokenDescriptor endToken;
 
 	bool addAndSortFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger);
 
@@ -720,6 +840,85 @@ public:
 	 *         of an instance of this Descriptor in the structure tree.
 	 */
 	NodeVector<StructuredClass> getPermittedChildren() const;
+
+	/**
+	 * Returns a pointer to the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a pointer to the start TokenDescriptor.
+	 */
+	TokenDescriptor *getStartTokenPointer() { return &startToken; }
+
+	/**
+	 * Returns a copy of the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a copy of the start TokenDescriptor.
+	 */
+	TokenDescriptor getStartToken() const { return startToken; }
+
+	/**
+	 * Sets the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @param st the new start TokenDescriptor.
+	 */
+	void setStartToken(TokenDescriptor st)
+	{
+		invalidate();
+		startToken = st;
+	}
+
+	/**
+	 * Returns a pointer to the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a pointer to the end TokenDescriptor.
+	 */
+	TokenDescriptor *getEndTokenPointer() { return &endToken; }
+
+	/**
+	 * Returns a copy of the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a copy of the end TokenDescriptor.
+	 */
+	TokenDescriptor getEndToken() const { return endToken; }
+
+	/**
+	 * Sets the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @param e the new end TokenDescriptor.
+	 */
+	void setEndToken(TokenDescriptor e)
+	{
+		invalidate();
+		endToken = e;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this Descriptor.
+	 *
+	 * @return the SyntaxDescriptor for this Descriptor.
+	 */
+	virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1)
+	{
+		SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty,
+		                     const_cast<Descriptor *>(this), depth};
+		return stx;
+	}
+
+	/**
+	 * Returns a vector of SyntaxDescriptors, one for each Descriptor
+	 * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is
+	 * permitted as child of this Descriptor. This also makes use
+	 * of transparency.
+	 *
+	 * @return a vector of SyntaxDescriptors, one for each Descriptor that is
+	 *         permitted as child of this Descriptor.
+	 */
+	std::vector<SyntaxDescriptor> getPermittedTokens() const;
 };
 /*
  * TODO: We should discuss Cardinalities one more time. Is it smart to define
@@ -806,6 +1005,7 @@ private:
 	NodeVector<StructuredClass> subclasses;
 	bool transparent;
 	bool root;
+	TokenDescriptor shortToken;
 
 	/**
 	 * Helper method for getFieldDescriptors.
@@ -963,6 +1163,50 @@ public:
 		invalidate();
 		root = std::move(r);
 	}
+
+	/**
+	 * Returns a pointer to the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @return a pointer to the short TokenDescriptor.
+	 */
+	TokenDescriptor *getShortTokenPointer() { return &shortToken; }
+
+	/**
+	 * Returns a copy of the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @return a copy of the short TokenDescriptor.
+	 */
+	TokenDescriptor getShortToken() const { return shortToken; }
+
+	/**
+	 * Sets the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @param s the new short TokenDescriptor.
+	 */
+	void setShortToken(TokenDescriptor s)
+	{
+		invalidate();
+		shortToken = s;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this StructuredClass.
+	 *
+	 * @return the SyntaxDescriptor for this StructuredClass.
+	 */
+	SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override
+	{
+		SyntaxDescriptor stx{getStartToken().id, getEndToken().id,
+		                     shortToken.id, const_cast<StructuredClass *>(this),
+		                     depth};
+		return stx;
+	}
 };
 
 /**
@@ -1188,6 +1432,13 @@ public:
 	{
 		domains.insert(domains.end(), ds.begin(), ds.end());
 	}
+
+	/**
+	 * Returns all TokenDescriptors of classes and fields in this Ontology.
+	 *
+	 * @return all TokenDescriptors of classes and fields in this Ontology.
+	 */
+	std::vector<TokenDescriptor *> getAllTokenDescriptors() const;
 };
 
 namespace RttiTypes {
@@ -1200,4 +1451,4 @@ extern const Rtti Domain;
 }
 }
 
-#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
+#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
\ No newline at end of file
diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp
index 6bbf26d..f59e745 100644
--- a/test/core/model/DomainTest.cpp
+++ b/test/core/model/DomainTest.cpp
@@ -82,9 +82,7 @@ TEST(Domain, testDomainResolving)
 }
 
 // i use this wrapper due to the strange behaviour of GTEST.
-static void assertFalse(bool b){
-	ASSERT_FALSE(b);
-}
+static void assertFalse(bool b) { ASSERT_FALSE(b); }
 
 static Rooted<FieldDescriptor> createUnsortedPrimitiveField(
     Handle<StructuredClass> strct, Handle<Type> type, Logger &logger, bool tree,
@@ -170,7 +168,6 @@ TEST(StructuredClass, getFieldDescriptors)
 	}
 }
 
-
 TEST(StructuredClass, getFieldDescriptorsCycles)
 {
 	Logger logger;
@@ -523,6 +520,91 @@ TEST(Descriptor, getPermittedChildrenCycles)
 	ASSERT_EQ(A, children[0]);
 }
 
+TEST(Descriptor, getSyntaxDescriptor)
+{
+	// build an ontology with some custom syntax.
+	Manager mgr{1};
+	Logger logger;
+	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};
+	// Construct the domain
+	Rooted<Domain> domain{new Domain(mgr, sys, "ontology")};
+	Rooted<StructuredClass> A{new StructuredClass(
+	    mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)};
+	A->setStartToken(TokenDescriptor(Tokens::Indent));
+	A->setEndToken(TokenDescriptor(Tokens::Dedent));
+	{
+		TokenDescriptor sh{"<+>"};
+		sh.id = 1;
+		A->setShortToken(sh);
+	}
+	// check the SyntaxDescriptor
+	SyntaxDescriptor stx = A->getSyntaxDescriptor();
+	ASSERT_EQ(Tokens::Indent, stx.start);
+	ASSERT_EQ(Tokens::Dedent, stx.end);
+	ASSERT_EQ(1, stx.shortForm);
+	ASSERT_EQ(A, stx.descriptor);
+	ASSERT_TRUE(stx.isStruct());
+	ASSERT_FALSE(stx.isAnnotation());
+	ASSERT_FALSE(stx.isFieldDescriptor());
+}
+
+TEST(Descriptor, getPermittedTokens)
+{
+	// build an ontology with some custom syntax.
+	Manager mgr{1};
+	Logger logger;
+	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};
+	// Construct the domain
+	Rooted<Domain> domain{new Domain(mgr, sys, "ontology")};
+	// add one StructuredClass with all tokens set.
+	Rooted<StructuredClass> A{new StructuredClass(
+	    mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)};
+	A->setStartToken(TokenDescriptor(Tokens::Indent));
+	A->setEndToken(TokenDescriptor(Tokens::Dedent));
+	{
+		TokenDescriptor sh{"<+>"};
+		sh.id = 1;
+		A->setShortToken(sh);
+	}
+	// add a field with one token set.
+	Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first;
+	A_field->setEndToken(TokenDescriptor(Tokens::Newline));
+	A_field->addChild(A);
+	// add an annotation with start and end set.
+	Rooted<AnnotationClass> A_anno = domain->createAnnotationClass("A");
+	{
+		TokenDescriptor start{"<"};
+		start.id = 7;
+		A_anno->setStartToken(start);
+	}
+	{
+		TokenDescriptor end{">"};
+		end.id = 8;
+		A_anno->setEndToken(end);
+	}
+	// add a trivial annotation, which should not be returned.
+	Rooted<AnnotationClass> B_anno = domain->createAnnotationClass("B");
+	ASSERT_TRUE(domain->validate(logger));
+
+	// check result.
+	std::vector<SyntaxDescriptor> stxs = A->getPermittedTokens();
+	ASSERT_EQ(3, stxs.size());
+	// the field should be first, because A itself should not be collected
+	// directly.
+	ASSERT_EQ(A_field, stxs[0].descriptor);
+	ASSERT_EQ(Tokens::Empty, stxs[0].start);
+	ASSERT_EQ(Tokens::Newline, stxs[0].end);
+	ASSERT_EQ(Tokens::Empty, stxs[0].shortForm);
+	ASSERT_EQ(A, stxs[1].descriptor);
+	ASSERT_EQ(Tokens::Indent, stxs[1].start);
+	ASSERT_EQ(Tokens::Dedent, stxs[1].end);
+	ASSERT_EQ(1, stxs[1].shortForm);
+	ASSERT_EQ(A_anno, stxs[2].descriptor);
+	ASSERT_EQ(7, stxs[2].start);
+	ASSERT_EQ(8, stxs[2].end);
+	ASSERT_EQ(Tokens::Empty, stxs[2].shortForm);
+}
+
 TEST(StructuredClass, isSubclassOf)
 {
 	// create an inheritance hierarchy.
@@ -629,6 +711,14 @@ TEST(Domain, validate)
 		base_field->setPrimitiveType(sys->getStringType());
 		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
 		ASSERT_TRUE(domain->validate(logger));
+		// add an invalid start token.
+		base_field->setStartToken(TokenDescriptor("< + >"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_FALSE(domain->validate(logger));
+		// make it valid.
+		base_field->setStartToken(TokenDescriptor("<"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_TRUE(domain->validate(logger));
 		// add a subclass for our base class.
 		Rooted<StructuredClass> sub{new StructuredClass(mgr, "sub", domain)};
 		// this should be valid in itself.
@@ -686,4 +776,71 @@ TEST(Domain, validate)
 		ASSERT_TRUE(domain->validate(logger));
 	}
 }
+
+TEST(Domain, getAllTokenDescriptors)
+{
+	// build an ontology with some custom syntax.
+	Manager mgr{1};
+	Logger logger;
+	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};
+	// Construct the domain
+	Rooted<Domain> domain{new Domain(mgr, sys, "ontology")};
+	// add one StructuredClass with all tokens set.
+	Rooted<StructuredClass> A{new StructuredClass(
+	    mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)};
+	A->setStartToken(TokenDescriptor(Tokens::Indent));
+	A->setEndToken(TokenDescriptor(Tokens::Dedent));
+	{
+		TokenDescriptor sh{"<+>"};
+		sh.id = 1;
+		A->setShortToken(sh);
+	}
+	// add a field with one token set.
+	Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first;
+	A_field->setEndToken(TokenDescriptor(Tokens::Newline));
+	A_field->addChild(A);
+	// add an annotation with start and end set.
+	Rooted<AnnotationClass> A_anno = domain->createAnnotationClass("A");
+	{
+		TokenDescriptor start{"<"};
+		start.id = 7;
+		A_anno->setStartToken(start);
+	}
+	{
+		TokenDescriptor end{">"};
+		end.id = 8;
+		A_anno->setEndToken(end);
+	}
+	// add a trivial annotation, which should not be returned.
+	Rooted<AnnotationClass> B_anno = domain->createAnnotationClass("B");
+	ASSERT_TRUE(domain->validate(logger));
+
+	// check the result.
+	std::vector<TokenDescriptor *> tks = domain->getAllTokenDescriptors();
+
+	// A short token
+	ASSERT_EQ("<+>", tks[0]->token);
+	ASSERT_EQ(1, tks[0]->id);
+	ASSERT_FALSE(tks[0]->special);
+	// A start token
+	ASSERT_EQ("", tks[1]->token);
+	ASSERT_EQ(Tokens::Indent, tks[1]->id);
+	ASSERT_TRUE(tks[1]->special);
+	// A end token
+	ASSERT_EQ("", tks[2]->token);
+	ASSERT_EQ(Tokens::Dedent, tks[2]->id);
+	ASSERT_TRUE(tks[2]->special);
+	// A field end token
+	ASSERT_EQ("", tks[3]->token);
+	ASSERT_EQ(Tokens::Newline, tks[3]->id);
+	ASSERT_TRUE(tks[3]->special);
+	// A anno start token
+	ASSERT_EQ("<", tks[4]->token);
+	ASSERT_EQ(7, tks[4]->id);
+	ASSERT_FALSE(tks[4]->special);
+	// A anno end token
+	ASSERT_EQ(">", tks[5]->token);
+	ASSERT_EQ(8, tks[5]->id);
+	ASSERT_FALSE(tks[5]->special);
+}
 }
\ No newline at end of file
-- 
cgit v1.2.3


From 4b5f37d07e4e691848b243ae795bb59893a6379c Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:55:41 +0100
Subject: added another domain test case for invalid syntax tokens.

---
 test/core/model/DomainTest.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp
index f59e745..b3c5771 100644
--- a/test/core/model/DomainTest.cpp
+++ b/test/core/model/DomainTest.cpp
@@ -701,6 +701,14 @@ TEST(Domain, validate)
 		base->setName("myClass");
 		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
 		ASSERT_TRUE(domain->validate(logger));
+		// add an invalid short token.
+		base->setShortToken(TokenDescriptor("bla"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_FALSE(domain->validate(logger));
+		// make it valid.
+		base->setShortToken(TokenDescriptor("!bla!"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_TRUE(domain->validate(logger));
 		// Let's add a primitive field (without a primitive type at first)
 		Rooted<FieldDescriptor> base_field =
 		    base->createPrimitiveFieldDescriptor(nullptr, logger).first;
-- 
cgit v1.2.3