35 files changed, 3272 insertions, 1430 deletions
diff --git a/src/core/common/SourceContextReader.cpp b/src/core/common/SourceContextReader.cpp
index d5d379c..f7dbdf3 100644
--- a/src/core/common/SourceContextReader.cpp
+++ b/src/core/common/SourceContextReader.cpp
@@ -149,8 +149,9 @@ SourceContext SourceContextReader::readContext(CharReader &reader,
 	ctx.relLen = end - start;           // end >= start (I2)
 
 	// Remove linebreaks at the beginning and the end
-	const std::pair<size_t, size_t> b =
-	    Utils::trim(lineBuf, Utils::isLinebreak);
+	const std::pair<size_t, size_t> b = Utils::trim(
+	    lineBuf,
+	    [&lineBuf](size_t i) { return Utils::isLinebreak(lineBuf[i]); });
 	ssize_t s = b.first, e = b.second;
 	s = std::min(s, static_cast<ssize_t>(ctx.relPos));
 
diff --git a/src/core/parser/utils/Token.cpp b/src/core/common/Token.cpp
index 8bcdbb5..17ce03e 100644
--- a/src/core/parser/utils/Token.cpp
+++ b/src/core/common/Token.cpp
@@ -19,6 +19,6 @@
 #include "Token.hpp"
 
 namespace ousia {
-// Stub to make sure Tokens.hpp is valid
+
 }
 
diff --git a/src/core/parser/utils/Token.hpp b/src/core/common/Token.hpp
index f907450..4b56f1a 100644
--- a/src/core/parser/utils/Token.hpp
+++ b/src/core/common/Token.hpp
@@ -30,6 +30,7 @@
 #include <cstdint>
 #include <limits>
 #include <string>
+#include <unordered_set>
 
 #include <core/common/Location.hpp>
 
@@ -46,6 +47,11 @@ using TokenId = uint32_t;
 using TokenLength = uint16_t;
 
 /**
+ * Type used for storing token sets.
+ */
+using TokenSet = std::unordered_set<TokenId>;
+
+/**
  * Namespace containing constants for TokenId instances with special meaning.
  */
 namespace Tokens {
@@ -66,15 +72,29 @@ constexpr TokenId Newline = std::numeric_limits<TokenId>::max() - 2;
 
 /**
  * Token which represents a paragraph token -- issued if two consecutive
- * newlines occur with optionally any amout of whitespace between them.
+ * newlines occur with optionally any amout of whitespace between them. The
+ * paragraph token is not repeated until more text is reached.
  */
 constexpr TokenId Paragraph = std::numeric_limits<TokenId>::max() - 3;
 
 /**
+ * Token which represents a section token -- issued if three or more
+ * consecutive newlines occur with optionally any amout of whitespace between
+ * them. The section token is not repeated until more text is reached.
+ */
+constexpr TokenId Section = std::numeric_limits<TokenId>::max() - 4;
+
+/**
  * Token which represents an indentation token -- issued if the indentation of
- * this line is larget than the indentation of the previous line.
+ * this line is larger than the indentation of the previous line.
+ */
+constexpr TokenId Indent = std::numeric_limits<TokenId>::max() - 5;
+
+/**
+ * Token which represents an dedentation -- issued if the indentation of
+ * this line is smaller than the indentation of the previous line.
  */
-constexpr TokenId Indentation = std::numeric_limits<TokenId>::max() - 4;
+constexpr TokenId Dedent = std::numeric_limits<TokenId>::max() - 6;
 
 /**
  * Maximum token id to be used. Tokens allocated for users should not surpass
@@ -109,6 +129,16 @@ struct Token {
 	Token() : id(Tokens::Empty) {}
 
 	/**
+	 * Constructor of a "data" token with no explicit content.
+	 *
+	 * @param location is the location of the extracted string content in the
+	 * source file.
+	 */
+	Token(const SourceLocation &location) : id(Tokens::Data), location(location)
+	{
+	}
+
+	/**
 	 * Constructor of the Token struct.
 	 *
 	 * @param id represents the token id.
@@ -116,12 +146,26 @@ struct Token {
 	 * @param location is the location of the extracted string content in the
 	 * source file.
 	 */
-	Token(TokenId id, const std::string &content, SourceLocation location)
+	Token(TokenId id, const std::string &content,
+	      const SourceLocation &location)
 	    : id(id), content(content), location(location)
 	{
 	}
 
 	/**
+	 * Constructor of the a "data" Token with the given string data and
+	 * location.
+	 *
+	 * @param content is the string content that should be stored in the token.
+	 * @param location is the location of the content within the source file.
+	 */
+	Token(const std::string &content,
+	      const SourceLocation &location = SourceLocation{})
+	    : id(Tokens::Data), content(content), location(location)
+	{
+	}
+
+	/**
 	 * Constructor of the Token struct, only initializes the token id
 	 *
 	 * @param id is the id corresponding to the id of the token.
@@ -129,6 +173,14 @@ struct Token {
 	Token(TokenId id) : id(id) {}
 
 	/**
+	 * Returns true if this token is special.
+	 *
+	 * @return true if the TokenId indicates that this token is a "special"
+	 * token.
+	 */
+	bool isSpecial() const { return id > Tokens::MaxTokenId; }
+
+	/**
 	 * The getLocation function allows the tokens to be directly passed as
 	 * parameter to Logger or LoggableException instances.
 	 *
@@ -139,4 +191,3 @@ struct Token {
 }
 
 #endif /* _OUSIA_TOKENS_HPP_ */
-
diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index a77951e..a87ff6d 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -108,12 +108,6 @@ std::string Utils::extractFileExtension(const std::string &filename)
 	return std::string{};
 }
 
-std::string Utils::trim(const std::string &s)
-{
-	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace);
-	return s.substr(bounds.first, bounds.second - bounds.first);
-}
-
 bool Utils::startsWith(const std::string &s, const std::string &prefix)
 {
 	return prefix.size() <= s.size() && s.substr(0, prefix.size()) == prefix;
@@ -124,5 +118,36 @@ bool Utils::endsWith(const std::string &s, const std::string &suffix)
 	return suffix.size() <= s.size() &&
 	       s.substr(s.size() - suffix.size(), suffix.size()) == suffix;
 }
-}
 
+bool Utils::isUserDefinedToken(const std::string &token)
+{
+	// Make sure the token meets is neither empty, nor starts or ends with an
+	// alphanumeric character
+	const size_t len = token.size();
+	if (len == 0 || isAlphanumeric(token[0]) ||
+	    isAlphanumeric(token[len - 1])) {
+		return false;
+	}
+
+	// Make sure the token is not any special OSML token
+	if (token == "\\" || token == "%" || token == "%{" || token == "}%" ||
+	    token == "{!" || token == "<\\" || token == "\\>") {
+		return false;
+	}
+
+	// Make sure the token does not contain any whitespaces.
+	for (char c : token) {
+		if (isWhitespace(c)) {
+			return false;
+		}
+	}
+
+	// Make sure the token contains other characters but { and }
+	for (char c : token) {
+		if (c != '{' && c != '}') {
+			return true;
+		}
+	}
+	return false;
+}
+}
+\ No newline at end of file
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 7d96562..d9e26da 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -103,6 +103,26 @@ public:
 	static bool isNamespacedIdentifier(const std::string &name);
 
 	/**
+	 * Returns true if the given characters form a valid user-defined token.
+	 * This function returns true under the following circumstances:
+	 * <ul>
+	 *   <li>The given token is not empty</li>
+	 *   <li>The given token starts and ends with a non-alphanumeric character
+	 *       </li>
+	 *   <li>The token is none of the following character sequences (which are
+	 *       special in OSML):
+	 *      <ul>
+	 *        <li>'{', '}' or any combined repetition of these characters</li>
+	 *        <li>'\', '{!', '<\', '\>'</li>
+	 *        <li>'%', '%{', '}%'</li>
+	 *      </ul>
+	 *   </li>
+	 *   <li>The token does not contain any whitespaces.</li>
+	 * </ul>
+	 */
+	static bool isUserDefinedToken(const std::string &token);
+
+	/**
 	 * Returns true if the given character is a linebreak character.
 	 */
 	static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); }
@@ -124,14 +144,6 @@ public:
 	static bool hasNonWhitepaceChar(const std::string &s);
 
 	/**
-	 * Removes whitespace at the beginning and the end of the given string.
-	 *
-	 * @param s is the string that should be trimmed.
-	 * @return a trimmed copy of s.
-	 */
-	static std::string trim(const std::string &s);
-
-	/**
 	 * Trims the given string or vector of chars by returning the start and end
 	 * index.
 	 *
@@ -153,8 +165,8 @@ public:
 	 *
 	 * @param s is the container that should be trimmed.
 	 * @param len is the number of elements in the container.
-	 * @param f is a function that returns true for values that should be
-	 * removed.
+	 * @param f is a function that returns true for values at a certain index
+	 * that should be removed.
 	 * @return start and end index. Note that "end" points at the character
 	 * beyond the end, thus "end" minus "start"
 	 */
@@ -163,7 +175,7 @@ public:
 	{
 		size_t start = 0;
 		for (size_t i = 0; i < len; i++) {
-			if (!f(s[i])) {
+			if (!f(i)) {
 				start = i;
 				break;
 			}
@@ -171,7 +183,7 @@ public:
 
 		size_t end = 0;
 		for (ssize_t i = len - 1; i >= static_cast<ssize_t>(start); i--) {
-			if (!f(s[i])) {
+			if (!f(i)) {
 				end = i + 1;
 				break;
 			}
@@ -198,17 +210,33 @@ public:
 	 * the collapsed version of the string ends.
 	 * @return start and end index. Note that "end" points at the character
 	 * beyond the end, thus "end" minus "start"
+	 * @param f is a function that returns true for values at a certain index
+	 * that should be removed.
 	 */
-	template <class T>
-	static std::string trim(const T &s, size_t len, size_t &start, size_t &end)
+	template <class T, class Filter>
+	static std::string trim(const T &s, size_t len, size_t &start, size_t &end,
+	                        Filter f)
 	{
-		auto res = trim(s, len, isWhitespace);
+		auto res = trim(s, len, f);
 		start = res.first;
 		end = res.second;
 		return std::string(&s[start], end - start);
 	}
 
 	/**
+	 * Removes whitespace at the beginning and the end of the given string.
+	 *
+	 * @param s is the string that should be trimmed.
+	 * @return a trimmed copy of s.
+	 */
+	static std::string trim(const std::string &s)
+	{
+		std::pair<size_t, size_t> bounds =
+		    trim(s, [&s](size_t i) { return isWhitespace(s[i]); });
+		return s.substr(bounds.first, bounds.second - bounds.first);
+	}
+
+	/**
 	 * Collapses the whitespaces in the given string (trims the string and
 	 * replaces all whitespace characters by a single one).
 	 *
@@ -219,7 +247,8 @@ public:
 	{
 		size_t start;
 		size_t end;
-		return collapse(s, s.size(), start, end);
+		return collapse(s, s.size(), start, end,
+		                [&s](size_t i) { return isWhitespace(s[i]); });
 	}
 
 	/**
@@ -236,7 +265,8 @@ public:
 	static std::string collapse(const std::string &s, size_t &start,
 	                            size_t &end)
 	{
-		return collapse(s, s.size(), start, end);
+		return collapse(s, s.size(), start, end,
+		                [&s](size_t i) { return isWhitespace(s[i]); });
 	}
 
 	/**
@@ -244,6 +274,8 @@ public:
 	 * replaces all whitespace characters by a single one).
 	 *
 	 * @tparam T is the string type that should be used.
+	 * @tparam Filter is a filter function used for detecting the character
+	 * indices that might be removed.
 	 * @param s is the string in which the whitespace should be collapsed.
 	 * @param len is the length of the input string
 	 * @param start is an output parameter which is set to the offset at which
@@ -252,9 +284,9 @@ public:
 	 * the collapsed version of the string ends.
 	 * @return a copy of s with collapsed whitespace.
 	 */
-	template <class T>
+	template <class T, class Filter>
 	static std::string collapse(const T &s, size_t len, size_t &start,
-	                            size_t &end)
+	                            size_t &end, Filter f)
 	{
 		// Result vector
 		std::vector<char> res;
@@ -268,8 +300,7 @@ public:
 		bool hadWhitespace = false;
 		for (size_t i = 0; i < len; i++) {
 			const char c = s[i];
-			const bool whitespace = isWhitespace(c);
-			if (whitespace) {
+			if (f(i)) {
 				hadWhitespace = !res.empty();
 			} else {
 				// Adapt the start and end position
diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp
deleted file mode 100644
index ed52ea3..0000000
--- a/src/core/common/WhitespaceHandler.hpp
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file WhitespaceHandler.hpp
- *
- * Contains the WhitespaceHandler classes which are used in multiple places to
- * trim, compact or preserve whitespaces while at the same time maintaining the
- * position information associated with the input strings.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_WHITESPACE_HANDLER_HPP_
-#define _OUSIA_WHITESPACE_HANDLER_HPP_
-
-#include <string>
-#include <vector>
-
-#include "Utils.hpp"
-
-namespace ousia {
-
-/**
- * WhitespaceHandler is a based class that can be used to collect text on a
- * character-by-character basis. Note that this class and its descendants are
- * hoped to be inlined by the compiler (and used in conjunction with templates),
- * thus they are fully defined inside this header.
- */
-class WhitespaceHandler {
-public:
-	/**
-	 * Start position of the extracted text.
-	 */
-	size_t textStart;
-
-	/**
-	 * End position of the extracted text.
-	 */
-	size_t textEnd;
-
-	/**
-	 * Buffer containing the extracted text.
-	 */
-	std::vector<char> textBuf;
-
-	/**
-	 * Constructor of the TextHandlerBase base class. Initializes the start and
-	 * end position with zeros.
-	 */
-	WhitespaceHandler() : textStart(0), textEnd(0) {}
-
-	/**
-	 * Returns true if this whitespace handler has found any text and a text
-	 * token could be emitted.
-	 *
-	 * @return true if the internal data buffer is non-empty.
-	 */
-	bool hasText() { return !textBuf.empty(); }
-
-	/**
-	 * Returns the content of the WhitespaceHandler as string.
-	 */
-	std::string toString() const
-	{
-		return std::string(textBuf.data(), textBuf.size());
-	}
-};
-
-/**
- * The PreservingWhitespaceHandler class preserves all characters unmodified,
- * including whitepace characters.
- */
-class PreservingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Appends the given character to the internal text buffer, does not
-	 * eliminate whitespace.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd);
-	}
-
-	/**
-	 * Static version of PreservingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd)
-	{
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * The TrimmingTextHandler class trims all whitespace characters at the begin
- * and the end of a text section but leaves all other characters unmodified,
- * including whitepace characters.
- */
-class TrimmingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Buffer used internally to temporarily store all whitespace characters.
-	 * They are only added to the output buffer if another non-whitespace
-	 * character is reached.
-	 */
-	std::vector<char> whitespaceBuf;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * whitespace characters at the begin and end of the text.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf);
-	}
-
-	/**
-	 * Static version of TrimmingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 * @param whitespaceBuf is a reference at the buffer for storing whitespace
-	 * characters.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd, std::vector<char> &whitespaceBuf)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				whitespaceBuf.push_back(c);
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (!whitespaceBuf.empty()) {
-			textBuf.insert(textBuf.end(), whitespaceBuf.begin(),
-			               whitespaceBuf.end());
-			whitespaceBuf.clear();
-		}
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * The CollapsingTextHandler trims characters at the beginning and end of the
- * text and reduced multiple whitespace characters to a single blank.
- */
-class CollapsingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Flag set to true if a whitespace character was reached.
-	 */
-	bool hasWhitespace = false;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * redundant whitespace characters.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd, hasWhitespace);
-	}
-
-	/**
-	 * Static version of CollapsingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 * @param hasWhitespace is a reference at the "hasWhitespace" flag.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd, bool &hasWhitespace)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				hasWhitespace = true;
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (hasWhitespace) {
-			textBuf.push_back(' ');
-			hasWhitespace = false;
-		}
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * Function that can be used to append the given buffer (e.g. a string or a
- * vector) to the whitespace handler.
- *
- * @tparam WhitespaceHandler is one of the WhitespaceHandler classes.
- * @tparam Buffer is an iterable type.
- * @param handler is the handler to which the characters of the Buffer should be
- * appended.
- * @param buf is the buffer from which the characters should be read.
- * @param start is the start byte offset. Each character is counted as one byte.
- */
-template <typename WhitespaceHandler, typename Buffer>
-inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf,
-                                      size_t start)
-{
-	for (auto elem : buf) {
-		handler.append(elem, start, start + 1);
-		start++;
-	}
-}
-}
-
-#endif /* _OUSIA_WHITESPACE_HANDLER_HPP_ */
-
diff --git a/src/core/model/Ontology.cpp b/src/core/model/Ontology.cpp
index 8829139..3af727d 100644
--- a/src/core/model/Ontology.cpp
+++ b/src/core/model/Ontology.cpp
@@ -20,8 +20,9 @@
 #include <queue>
 #include <set>
 
-#include <core/common/RttiBuilder.hpp>
 #include <core/common/Exceptions.hpp>
+#include <core/common/RttiBuilder.hpp>
+#include <core/common/Utils.hpp>
 
 #include "Ontology.hpp"
 
@@ -169,52 +170,60 @@ static NodeVector<Node> pathTo(const Node *start, Logger &logger,
 	return shortest;
 }
 
+struct CollectState {
+	Node *n;
+	size_t depth;
+
+	CollectState(Node *n, size_t depth) : n(n), depth(depth) {}
+};
+
 template <typename F>
 static NodeVector<Node> collect(const Node *start, F match)
 {
 	// result
 	NodeVector<Node> res;
 	// queue for breadth-first search of graph.
-	std::queue<Rooted<Node>> q;
+	std::queue<CollectState> q;
 	// put the initial node on the stack.
-	q.push(const_cast<Node *>(start));
+	q.push(CollectState(const_cast<Node *>(start), 0));
 	// set of visited nodes.
 	std::unordered_set<const Node *> visited;
 	while (!q.empty()) {
-		Rooted<Node> n = q.front();
+		CollectState state = q.front();
 		q.pop();
 		// do not proceed if this node was already visited.
-		if (!visited.insert(n.get()).second) {
+		if (!visited.insert(state.n).second) {
 			continue;
 		}
 
-		if (n->isa(&RttiTypes::StructuredClass)) {
-			Rooted<StructuredClass> strct = n.cast<StructuredClass>();
+		if (state.n->isa(&RttiTypes::Descriptor)) {
+			Rooted<Descriptor> strct{static_cast<Descriptor *>(state.n)};
 
 			// look through all fields.
 			NodeVector<FieldDescriptor> fields = strct->getFieldDescriptors();
 			for (auto fd : fields) {
 				// note matches.
-				if (match(fd)) {
+				if (match(fd, state.depth)) {
 					res.push_back(fd);
 				}
 				// only continue in the TREE field.
 				if (fd->getFieldType() == FieldDescriptor::FieldType::TREE) {
-					q.push(fd);
+					q.push(CollectState(fd.get(), state.depth));
 				}
 			}
 		} else {
 			// otherwise this is a FieldDescriptor.
-			Rooted<FieldDescriptor> field = n.cast<FieldDescriptor>();
+			Rooted<FieldDescriptor> field{
+			    static_cast<FieldDescriptor *>(state.n)};
 			// and we proceed by visiting all permitted children.
 			for (auto c : field->getChildrenWithSubclasses()) {
 				// note matches.
-				if (match(c)) {
+				if (match(c, state.depth)) {
 					res.push_back(c);
 				}
 				// We only continue our search via transparent children.
 				if (c->isTransparent()) {
-					q.push(c);
+					q.push(CollectState(c.get(), state.depth + 1));
 				}
 			}
 		}
@@ -222,28 +231,59 @@ static NodeVector<Node> collect(const Node *start, F match)
 	return res;
 }
 
+static std::vector<SyntaxDescriptor> collectPermittedTokens(
+    const Node *start, Handle<Domain> domain)
+{
+	// gather SyntaxDescriptors for structure children first.
+	std::vector<SyntaxDescriptor> res;
+	collect(start, [&res](Handle<Node> n, size_t depth) {
+		SyntaxDescriptor stx;
+		if (n->isa(&RttiTypes::FieldDescriptor)) {
+			stx = n.cast<FieldDescriptor>()->getSyntaxDescriptor(depth);
+		} else {
+			stx = n.cast<Descriptor>()->getSyntaxDescriptor(depth);
+		}
+		// do not add trivial SyntaxDescriptors.
+		if (!stx.isEmpty()) {
+			res.push_back(stx);
+		}
+		return false;
+	});
+	// gather SyntaxDescriptors for AnnotationClasses.
+	for (auto a : domain->getAnnotationClasses()) {
+		SyntaxDescriptor stx = a->getSyntaxDescriptor();
+		if (!stx.isEmpty()) {
+			res.push_back(stx);
+		}
+	}
+	return res;
+}
+
 /* Class FieldDescriptor */
 
 FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Type> primitiveType,
                                  Handle<Descriptor> parent, FieldType fieldType,
-                                 std::string name, bool optional)
+                                 std::string name, bool optional,
+                                 WhitespaceMode whitespaceMode)
     : Node(mgr, std::move(name), parent),
       children(this),
       fieldType(fieldType),
       primitiveType(acquire(primitiveType)),
       optional(optional),
-      primitive(true)
+      primitive(true),
+      whitespaceMode(whitespaceMode)
 {
 }
 
 FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Descriptor> parent,
                                  FieldType fieldType, std::string name,
-                                 bool optional)
+                                 bool optional, WhitespaceMode whitespaceMode)
     : Node(mgr, std::move(name), parent),
       children(this),
       fieldType(fieldType),
       optional(optional),
-      primitive(false)
+      primitive(false),
+      whitespaceMode(whitespaceMode)
 {
 }
 
@@ -272,6 +312,25 @@ bool FieldDescriptor::doValidate(Logger &logger) const
 	} else {
 		valid = valid & validateName(logger);
 	}
+	// check start and end token.
+	if (!startToken.special && !startToken.token.empty() &&
+	    !Utils::isUserDefinedToken(startToken.token)) {
+		// TODO: Correct error message.
+		logger.error(std::string("Field \"") + getName() +
+		                 "\" has an invalid custom start token: " +
+		                 startToken.token,
+		             *this);
+		valid = false;
+	}
+	if (!endToken.special && !endToken.token.empty() &&
+	    !Utils::isUserDefinedToken(endToken.token)) {
+		// TODO: Correct error message.
+		logger.error(std::string("Field \"") + getName() +
+		                 "\" has an invalid custom end token: " +
+		                 endToken.token,
+		             *this);
+		valid = false;
+	}
 
 	// check consistency of FieldType with the rest of the FieldDescriptor.
 	if (primitive) {
@@ -325,7 +384,7 @@ bool FieldDescriptor::doValidate(Logger &logger) const
 }
 
 static void gatherSubclasses(
-    std::unordered_set<const StructuredClass *>& visited,
+    std::unordered_set<const StructuredClass *> &visited,
     NodeVector<StructuredClass> &res, Handle<StructuredClass> strct)
 {
 	// this check is to prevent cycles.
@@ -334,7 +393,7 @@ static void gatherSubclasses(
 	}
 	for (auto sub : strct->getSubclasses()) {
 		// this check is to prevent cycles.
-		if(visited.count(sub.get())){
+		if (visited.count(sub.get())) {
 			continue;
 		}
 		res.push_back(sub);
@@ -381,7 +440,7 @@ NodeVector<Node> FieldDescriptor::pathTo(Handle<FieldDescriptor> field,
 NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		if (!n->isa(&RttiTypes::FieldDescriptor)) {
 			return false;
 		}
@@ -396,6 +455,16 @@ NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const
 	return res;
 }
 
+std::vector<SyntaxDescriptor> FieldDescriptor::getPermittedTokens() const
+{
+	if (getParent() == nullptr ||
+	    getParent().cast<Descriptor>()->getParent() == nullptr) {
+		return std::vector<SyntaxDescriptor>();
+	}
+	return collectPermittedTokens(
+	    this, getParent().cast<Descriptor>()->getParent().cast<Domain>());
+}
+
 /* Class Descriptor */
 
 void Descriptor::doResolve(ResolutionState &state)
@@ -443,6 +512,25 @@ bool Descriptor::doValidate(Logger &logger) const
 		}
 		valid = valid & attributesDescriptor->validate(logger);
 	}
+
+	// check start and end token.
+	if (!startToken.special && !startToken.token.empty() &&
+	    !Utils::isUserDefinedToken(startToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom start token: " +
+		                 startToken.token,
+		             *this);
+		valid = false;
+	}
+	if (!endToken.special && !endToken.token.empty() &&
+	    !Utils::isUserDefinedToken(endToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom end token: " +
+		                 endToken.token,
+		             *this);
+		valid = false;
+	}
+
 	// check that only one FieldDescriptor is of type TREE.
 	auto fds = Descriptor::getFieldDescriptors();
 	bool hasTREE = false;
@@ -483,7 +571,7 @@ std::pair<NodeVector<Node>, bool> Descriptor::pathTo(
 NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		if (!n->isa(&RttiTypes::FieldDescriptor)) {
 			return false;
 		}
@@ -501,7 +589,7 @@ NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const
 NodeVector<StructuredClass> Descriptor::getPermittedChildren() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		return n->isa(&RttiTypes::StructuredClass);
 	});
 	NodeVector<StructuredClass> res;
@@ -669,6 +757,14 @@ std::pair<Rooted<FieldDescriptor>, bool> Descriptor::createFieldDescriptor(
 	return std::make_pair(fd, sorted);
 }
 
+std::vector<SyntaxDescriptor> Descriptor::getPermittedTokens() const
+{
+	if (getParent() == nullptr) {
+		return std::vector<SyntaxDescriptor>();
+	}
+	return collectPermittedTokens(this, getParent().cast<Domain>());
+}
+
 /* Class StructuredClass */
 
 StructuredClass::StructuredClass(Manager &mgr, std::string name,
@@ -709,6 +805,16 @@ bool StructuredClass::doValidate(Logger &logger) const
 		logger.error(cardinality.toString() + " is not a cardinality!", *this);
 		valid = false;
 	}
+
+	// check short token.
+	if (!shortToken.special && !shortToken.token.empty() &&
+	    !Utils::isUserDefinedToken(shortToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom short form token: " +
+		                 shortToken.token,
+		             *this);
+		valid = false;
+	}
 	// check the validity of this superclass.
 	if (superclass != nullptr) {
 		valid = valid & superclass->validate(logger);
@@ -961,6 +1067,51 @@ Rooted<AnnotationClass> Ontology::createAnnotationClass(std::string name)
 	    new AnnotationClass(getManager(), std::move(name), this)};
 }
 
+static void gatherTokenDescriptors(
+    Handle<Descriptor> desc, std::vector<TokenDescriptor *> &res,
+    std::unordered_set<FieldDescriptor *> &visited)
+{
+	// add the TokenDescriptors for the Descriptor itself.
+	if (!desc->getStartToken().isEmpty()) {
+		res.push_back(desc->getStartTokenPointer());
+	}
+	if (!desc->getEndToken().isEmpty()) {
+		res.push_back(desc->getEndTokenPointer());
+	}
+	// add the TokenDescriptors for its FieldDescriptors.
+	for (auto fd : desc->getFieldDescriptors()) {
+		if (!visited.insert(fd.get()).second) {
+			continue;
+		}
+		if (!fd->getStartToken().isEmpty()) {
+			res.push_back(fd->getStartTokenPointer());
+		}
+		if (!fd->getEndToken().isEmpty()) {
+			res.push_back(fd->getEndTokenPointer());
+		}
+	}
+}
+
+std::vector<TokenDescriptor *> Domain::getAllTokenDescriptors() const
+{
+	std::vector<TokenDescriptor *> res;
+	// note all fields that are already visited because FieldReferences might
+	// lead to doubled fields.
+	std::unordered_set<FieldDescriptor *> visited;
+	// add the TokenDescriptors for the StructuredClasses (and their fields).
+	for (auto s : structuredClasses) {
+		if (!s->getShortToken().isEmpty()) {
+			res.push_back(s->getShortTokenPointer());
+		}
+		gatherTokenDescriptors(s, res, visited);
+	}
+	// add the TokenDescriptors for the AnnotationClasses (and their fields).
+	for (auto a : annotationClasses) {
+		gatherTokenDescriptors(a, res, visited);
+	}
+	return res;
+}
+
 /* Type registrations */
 
 namespace RttiTypes {
diff --git a/src/core/model/Ontology.hpp b/src/core/model/Ontology.hpp
index e1fbe96..d682bdf 100644
--- a/src/core/model/Ontology.hpp
+++ b/src/core/model/Ontology.hpp
@@ -168,11 +168,13 @@
 #ifndef _OUSIA_MODEL_DOMAIN_HPP_
 #define _OUSIA_MODEL_DOMAIN_HPP_
 
+#include <core/common/Whitespace.hpp>
 #include <core/managed/ManagedContainer.hpp>
 #include <core/RangeSet.hpp>
 
 #include "Node.hpp"
 #include "RootNode.hpp"
+#include "Syntax.hpp"
 #include "Typesystem.hpp"
 
 namespace ousia {
@@ -226,6 +228,9 @@ private:
 	Owned<Type> primitiveType;
 	bool optional;
 	bool primitive;
+	TokenDescriptor startToken;
+	TokenDescriptor endToken;
+	WhitespaceMode whitespaceMode;
 
 protected:
 	bool doValidate(Logger &logger) const override;
@@ -234,39 +239,46 @@ public:
 	/**
 	 * This is the constructor for primitive fields.
 	 *
-	 * @param mgr           is the global Manager instance.
-	 * @param parent        is a handle of the Descriptor node that has this
-	 *                      FieldDescriptor.
-	 * @param primitiveType is a handle to some Type in some Typesystem of which
-	 *                      one instance is allowed to fill this field.
-	 * @param name          is the name of this field.
-	 * @param optional      should be set to 'false' is this field needs to be
-	 *                      filled in order for an instance of the parent
-	 *                      Descriptor to be valid.
+	 * @param mgr            is the global Manager instance.
+	 * @param parent         is a handle of the Descriptor node that has this
+	 *                       FieldDescriptor.
+	 * @param primitiveType  is a handle to some Type in some Typesystem of
+	 *which
+	 *                       one instance is allowed to fill this field.
+	 * @param name           is the name of this field.
+	 * @param optional       should be set to 'false' is this field needs to be
+	 *                       filled in order for an instance of the parent
+	 *                       Descriptor to be valid.
+	 * @param whitespaceMode the WhitespaceMode to be used when an instance of
+	 *                       this FieldDescriptor is parsed.
 	 */
 	FieldDescriptor(Manager &mgr, Handle<Type> primitiveType,
 	                Handle<Descriptor> parent,
 	                FieldType fieldType = FieldType::TREE,
-	                std::string name = "", bool optional = false);
+	                std::string name = "", bool optional = false,
+	                WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * This is the constructor for non-primitive fields. You have to provide
 	 * children here later on.
 	 *
-	 * @param mgr           is the global Manager instance.
-	 * @param parent        is a handle of the Descriptor node that has this
-	 *                      FieldDescriptor.
-	 * @param fieldType     is the FieldType of this FieldDescriptor, either
-	 *                      TREE for the main or default structure or SUBTREE
-	 *                      for supporting structures.
-	 * @param name          is the name of this field.
-	 * @param optional      should be set to 'false' is this field needs to be
-	 *                      filled in order for an instance of the parent
-	 *                      Descriptor to be valid.
+	 * @param mgr            is the global Manager instance.
+	 * @param parent         is a handle of the Descriptor node that has this
+	 *                       FieldDescriptor.
+	 * @param fieldType      is the FieldType of this FieldDescriptor, either
+	 *                       TREE for the main or default structure or SUBTREE
+	 *                       for supporting structures.
+	 * @param name           is the name of this field.
+	 * @param optional       should be set to 'false' is this field needs to be
+	 *                       filled in order for an instance of the parent
+	 *                       Descriptor to be valid.
+	 * @param whitespaceMode the WhitespaceMode to be used when an instance of
+	 *                       this FieldDescriptor is parsed.
 	 */
 	FieldDescriptor(Manager &mgr, Handle<Descriptor> parent = nullptr,
 	                FieldType fieldType = FieldType::TREE,
-	                std::string name = "", bool optional = false);
+	                std::string name = "", bool optional = false,
+	                WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * Returns a const reference to the NodeVector of StructuredClasses whose
@@ -455,6 +467,109 @@ public:
 			return std::move(name);
 		}
 	}
+
+	/**
+	 * Returns a pointer to the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * Note that this does not invalidate the FieldDescriptor. So use with
+	 * care.
+	 *
+	 * @return a pointer to the start TokenDescriptor.
+	 */
+	TokenDescriptor *getStartTokenPointer() { return &startToken; }
+
+	/**
+	 * Returns a copy of the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a copy of the start TokenDescriptor.
+	 */
+	TokenDescriptor getStartToken() const { return startToken; }
+
+	/**
+	 * Sets the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @param st the new start TokenDescriptor.
+	 */
+	void setStartToken(TokenDescriptor st)
+	{
+		invalidate();
+		startToken = st;
+	}
+
+	/**
+	 * Returns a pointer to the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a pointer to the end TokenDescriptor.
+	 */
+	TokenDescriptor *getEndTokenPointer() { return &endToken; }
+
+	/**
+	 * Returns a copy of the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a copy of the end TokenDescriptor.
+	 */
+	TokenDescriptor getEndToken() const { return endToken; }
+
+	/**
+	 * Sets the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @param e the new end TokenDescriptor.
+	 */
+	void setEndToken(TokenDescriptor e)
+	{
+		invalidate();
+		endToken = e;
+	}
+
+	/**
+	 * Returns the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 *
+	 * @return the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 */
+	WhitespaceMode getWhitespaceMode() const { return whitespaceMode; }
+
+	/**
+	 * Sets the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 *
+	 * @param wm the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 */
+	WhitespaceMode setWhitespaceMode(WhitespaceMode wm)
+	{
+		return whitespaceMode = wm;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this FieldDescriptor.
+	 *
+	 * @return the SyntaxDescriptor for this FieldDescriptor.
+	 */
+	SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1)
+	{
+		SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty,
+		                     const_cast<FieldDescriptor *>(this), depth};
+		return stx;
+	}
+
+	/**
+	 * Returns a vector of SyntaxDescriptors, one for each Descriptor
+	 * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is
+	 * permitted as child of this FieldDescriptor. This also makes use
+	 * of transparency.
+	 *
+	 * @return a vector of SyntaxDescriptors, one for each Descriptor that is
+	 *         permitted as child of this FieldDescriptor
+	 */
+	std::vector<SyntaxDescriptor> getPermittedTokens() const;
 };
 
 /**
@@ -478,7 +593,10 @@ public:
  * </A>
  * \endcode
  *
- * key="value" inside the A-node would be an attribute, while <key>value</key>
+ * key="value" inside the A-node would be an attribute, while
+ * \code{.xml}
+ *   <key>value</key>
+ * \endcode
  * would be a primitive field. While equivalent in XML the semantics are
  * different: An attribute describes indeed attributes, features of one single
  * node whereas a primitive field describes the _content_ of a node.
@@ -490,6 +608,8 @@ class Descriptor : public Node {
 private:
 	Owned<StructType> attributesDescriptor;
 	NodeVector<FieldDescriptor> fieldDescriptors;
+	TokenDescriptor startToken;
+	TokenDescriptor endToken;
 
 	bool addAndSortFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger);
 
@@ -738,6 +858,85 @@ public:
 	 *         of an instance of this Descriptor in the structure tree.
 	 */
 	NodeVector<StructuredClass> getPermittedChildren() const;
+
+	/**
+	 * Returns a pointer to the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a pointer to the start TokenDescriptor.
+	 */
+	TokenDescriptor *getStartTokenPointer() { return &startToken; }
+
+	/**
+	 * Returns a copy of the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a copy of the start TokenDescriptor.
+	 */
+	TokenDescriptor getStartToken() const { return startToken; }
+
+	/**
+	 * Sets the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @param st the new start TokenDescriptor.
+	 */
+	void setStartToken(TokenDescriptor st)
+	{
+		invalidate();
+		startToken = st;
+	}
+
+	/**
+	 * Returns a pointer to the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a pointer to the end TokenDescriptor.
+	 */
+	TokenDescriptor *getEndTokenPointer() { return &endToken; }
+
+	/**
+	 * Returns a copy of the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a copy of the end TokenDescriptor.
+	 */
+	TokenDescriptor getEndToken() const { return endToken; }
+
+	/**
+	 * Sets the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @param e the new end TokenDescriptor.
+	 */
+	void setEndToken(TokenDescriptor e)
+	{
+		invalidate();
+		endToken = e;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this Descriptor.
+	 *
+	 * @return the SyntaxDescriptor for this Descriptor.
+	 */
+	virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1)
+	{
+		SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty,
+		                     const_cast<Descriptor *>(this), depth};
+		return stx;
+	}
+
+	/**
+	 * Returns a vector of SyntaxDescriptors, one for each Descriptor
+	 * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is
+	 * permitted as child of this Descriptor. This also makes use
+	 * of transparency.
+	 *
+	 * @return a vector of SyntaxDescriptors, one for each Descriptor that is
+	 *         permitted as child of this Descriptor.
+	 */
+	std::vector<SyntaxDescriptor> getPermittedTokens() const;
 };
 /*
  * TODO: We should discuss Cardinalities one more time. Is it smart to define
@@ -824,6 +1023,7 @@ private:
 	NodeVector<StructuredClass> subclasses;
 	bool transparent;
 	bool root;
+	TokenDescriptor shortToken;
 
 	/**
 	 * Helper method for getFieldDescriptors.
@@ -981,6 +1181,50 @@ public:
 		invalidate();
 		root = std::move(r);
 	}
+
+	/**
+	 * Returns a pointer to the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @return a pointer to the short TokenDescriptor.
+	 */
+	TokenDescriptor *getShortTokenPointer() { return &shortToken; }
+
+	/**
+	 * Returns a copy of the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @return a copy of the short TokenDescriptor.
+	 */
+	TokenDescriptor getShortToken() const { return shortToken; }
+
+	/**
+	 * Sets the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @param s the new short TokenDescriptor.
+	 */
+	void setShortToken(TokenDescriptor s)
+	{
+		invalidate();
+		shortToken = s;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this StructuredClass.
+	 *
+	 * @return the SyntaxDescriptor for this StructuredClass.
+	 */
+	SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override
+	{
+		SyntaxDescriptor stx{getStartToken().id, getEndToken().id,
+		                     shortToken.id, const_cast<StructuredClass *>(this),
+		                     depth};
+		return stx;
+	}
 };
 
 /**
@@ -1207,6 +1451,13 @@ public:
 	{
 		ontologies.insert(ontologies.end(), ds.begin(), ds.end());
 	}
+
+	/**
+	 * Returns all TokenDescriptors of classes and fields in this Ontology.
+	 *
+	 * @return all TokenDescriptors of classes and fields in this Ontology.
+	 */
+	std::vector<TokenDescriptor *> getAllTokenDescriptors() const;
 };
 
 namespace RttiTypes {
@@ -1219,4 +1470,4 @@ extern const Rtti Ontology;
 }
 }
 
-#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
-\ No newline at end of file
+#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
diff --git a/src/core/model/Syntax.cpp b/src/core/model/Syntax.cpp
new file mode 100644
index 0000000..9dbaccc
--- /dev/null
+++ b/src/core/model/Syntax.cpp
@@ -0,0 +1,58 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Syntax.hpp"
+
+#include "Domain.hpp"
+
+namespace ousia {
+
+/* Class TokenSyntaxDescriptor */
+
+bool SyntaxDescriptor::isAnnotation() const
+{
+	return descriptor->isa(&RttiTypes::AnnotationClass);
+}
+bool SyntaxDescriptor::isFieldDescriptor() const
+{
+	return descriptor->isa(&RttiTypes::FieldDescriptor);
+}
+bool SyntaxDescriptor::isStruct() const
+{
+	return descriptor->isa(&RttiTypes::StructuredClass);
+}
+
+void SyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const
+{
+	if (start != Tokens::Empty) {
+		set.insert(start);
+	}
+	if (end != Tokens::Empty) {
+		set.insert(end);
+	}
+	if (shortForm != Tokens::Empty) {
+		set.insert(shortForm);
+	}
+}
+
+bool SyntaxDescriptor::isEmpty() const
+{
+	return start == Tokens::Empty && end == Tokens::Empty &&
+	       shortForm == Tokens::Empty;
+}
+}
+\ No newline at end of file
diff --git a/src/core/model/Syntax.hpp b/src/core/model/Syntax.hpp
new file mode 100644
index 0000000..4da3408
--- /dev/null
+++ b/src/core/model/Syntax.hpp
@@ -0,0 +1,196 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Syntax.hpp
+ *
+ * This header contains the Descriptor classes for user definable syntax for
+ * Document entities or fields. These classes are referenced in Ontology.hpp.
+ */
+
+#ifndef _OUSIA_MODEL_SYNTAX_HPP_
+#define _OUSIA_MODEL_SYNTAX_HPP_
+
+#include <core/common/Token.hpp>
+#include "Node.hpp"
+
+namespace ousia {
+
+/**
+ * Class to describe a single token that shall be used as user-defined syntax.
+ */
+struct TokenDescriptor {
+	/**
+	 * The string content of this token, if it is not a special one.
+	 */
+	std::string token;
+	/**
+	 * A flag to be set true if this TokenDescriptor uses a special token.
+	 */
+	bool special;
+	/**
+	 * An id to uniquely identify this token.
+	 */
+	TokenId id;
+
+	/**
+	 * Constructor for non-special tokens. The special flag is set to false and
+	 * the id to Tokens::Empty.
+	 *
+	 * @param token The string content of this token, if it is not a special
+	 *              one.
+	 */
+	TokenDescriptor(std::string token = std::string())
+	    : token(std::move(token)), special(false), id(Tokens::Empty)
+	{
+	}
+
+	/**
+	 * Constructor for special tokens. The token is set to an empty string and
+	 * the special flag to true.
+	 *
+	 * @param id the id of the special token.
+	 */
+	TokenDescriptor(TokenId id) : special(true), id(id) {}
+
+	/**
+	 * Returns true if and only if neither a string nor an ID is given.
+	 *
+	 * @return true if and only if neither a string nor an ID is given.
+	 */
+	bool isEmpty() const { return token.empty() && id == Tokens::Empty; }
+};
+
+/**
+ * Class describing the user defined syntax for a StructuredClass,
+ * AnnotationClass or FieldDescriptor.
+ *
+ * This class is used during parsing of a Document. It is used to describe
+ * the tokens relevant for one Descriptor that could be created at this point
+ * during parsing.
+ */
+struct SyntaxDescriptor {
+	/**
+	 * Possible start token or Tokens::Empty if no token is set.
+	 */
+	TokenId start;
+
+	/**
+	 * Possible end token or Tokens::Empty if no token is set.
+	 */
+	TokenId end;
+
+	/**
+	 * Possible representation token or Tokens::Empty if no token is set.
+	 */
+	TokenId shortForm;
+
+	/*
+	 * The Descriptor this SyntaxDescriptor belongs to. As this may be
+	 * a FieldDescriptor as well as a class Descriptor (StructuredClass or
+	 * AnnotationClass) we can only use the class Node as inner argument here.
+	 */
+	Rooted<Node> descriptor;
+	/*
+	 * Given the current leaf in the parsed document the depth of a
+	 * SyntaxDescriptor is defined as the number of transparent elements that
+	 * would be needed to construct an instance of the referenced descriptor.
+	 */
+	ssize_t depth;
+
+	/**
+	 * Default constructor, sets all token ids to Tokens::Empty and the
+	 * descriptor handle to nullptr.
+	 */
+	SyntaxDescriptor()
+	    : start(Tokens::Empty),
+	      end(Tokens::Empty),
+	      shortForm(Tokens::Empty),
+	      descriptor(nullptr),
+	      depth(-1)
+	{
+	}
+
+	/**
+	 * Member initializer constructor.
+	 *
+	 * @param start is a possible start token.
+	 * @param end is a possible end token.
+	 * @param shortForm is a possible short form token.
+	 * @param descriptor The Descriptor this SyntaxDescriptor belongs to.
+	 * @param depth Given the current leaf in the parsed document the depth of a
+	 * SyntaxDescriptor is defined as the number of transparent elements that
+	 * would be needed to construct an instance of the referenced descriptor.
+	 */
+	SyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm,
+	                 Handle<Node> descriptor, ssize_t depth)
+	    : start(start),
+	      end(end),
+	      shortForm(shortForm),
+	      descriptor(descriptor),
+	      depth(depth)
+	{
+	}
+
+	/**
+	 * Inserts all tokens referenced in this SyntaxDescriptor into the
+	 * given TokenSet. Skips token ids set to Tokens::Empty.
+	 *
+	 * @param set is the TokenSet instance into which the Tokens should be
+	 * inserted.
+	 */
+	void insertIntoTokenSet(TokenSet &set) const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to an
+	 * AnnotationClass.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to an
+	 * AnnotationClass.
+	 */
+	bool isAnnotation() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to a
+	 * StrcturedClass.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to a
+	 * StrcturedClass.
+	 */
+	bool isStruct() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to a
+	 * FieldDescriptor.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to a
+	 * FieldDescriptor.
+	 */
+	bool isFieldDescriptor() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor has only empty
+	 * entries in start, end and short.
+	 *
+	 * @return true if and only if this SyntaxDescriptor has only empty
+	 * entries in start, end and short.
+	 */
+	bool isEmpty() const;
+};
+}
+#endif
+\ No newline at end of file
diff --git a/src/core/parser/stack/Callbacks.cpp b/src/core/parser/stack/Callbacks.cpp
index 6ebc549..44b31c6 100644
--- a/src/core/parser/stack/Callbacks.cpp
+++ b/src/core/parser/stack/Callbacks.cpp
@@ -19,5 +19,15 @@
 #include "Callbacks.hpp"
 
 namespace ousia {
+namespace parser_stack {
+
+/* Class ParserCallbacks */
+
+ParserCallbacks::~ParserCallbacks()
+{
+	// Do nothing here
+}
+
+}
 }
 
diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp
index 9c61000..dfe41fc 100644
--- a/src/core/parser/stack/Callbacks.hpp
+++ b/src/core/parser/stack/Callbacks.hpp
@@ -30,68 +30,80 @@
 #define _OUSIA_PARSER_STACK_CALLBACKS_HPP_
 
 #include <string>
+#include <vector>
 
 #include <core/common/Whitespace.hpp>
+#include <core/common/Token.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
+
+// Forward declarations
+class Variant;
+
 namespace parser_stack {
 
 /**
- * Interface defining a set of callback functions that act as a basis for the
- * StateStackCallbacks and the ParserCallbacks.
+ * Interface between the Stack class and the underlying parser used for
+ * registering and unregistering tokens.
  */
-class Callbacks {
+class ParserCallbacks {
 public:
 	/**
 	 * Virtual descructor.
 	 */
-	virtual ~Callbacks() {};
-
-	/**
-	 * Sets the whitespace mode that specifies how string data should be
-	 * processed.
-	 *
-	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
-	 * PRESERVE, TRIM or COLLAPSE.
-	 */
-	virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0;
+	virtual ~ParserCallbacks();
 
 	/**
 	 * Registers the given token as token that should be reported to the handler
 	 * using the "token" function.
 	 *
 	 * @param token is the token string that should be reported.
+	 * @return the token id with which the token will be reported. Should return
+	 * Tokens::Empty if the given token could not be registered.
 	 */
-	virtual void registerToken(const std::string &token) = 0;
+	virtual TokenId registerToken(const std::string &token) = 0;
 
 	/**
 	 * Unregisters the given token, it will no longer be reported to the handler
 	 * using the "token" function.
 	 *
-	 * @param token is the token string that should be unregistered.
+	 * @param id is the token id of the token that should be unregistered.
 	 */
-	virtual void unregisterToken(const std::string &token) = 0;
+	virtual void unregisterToken(TokenId id) = 0;
 };
 
 /**
- * Interface defining the callback functions that can be passed from a
- * StateStack to the underlying parser.
+ * Interface defining a set of callback functions that act as a basis for the
+ * StateStackCallbacks and the ParserCallbacks.
  */
-class ParserCallbacks : public Callbacks {
+class HandlerCallbacks : public ParserCallbacks {
+public:
 	/**
-	 * Checks whether the given token is supported by the parser. The parser
-	 * returns true, if the token is supported, false if this token cannot be
-	 * registered. Note that parsers that do not support the registration of
-	 * tokens at all should always return "true".
+	 * Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
+	 * The tokens described in the token list are the tokens that are currently
+	 * enabled.
 	 *
-	 * @param token is the token that should be checked for support.
-	 * @return true if the token is generally supported (or the parser does not
-	 * support registering tokens at all), false if the token is not supported,
-	 * because e.g. it is a reserved token or it interferes with other tokens.
+	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
+	 * stored on the stack.
 	 */
-	virtual bool supportsToken(const std::string &token) = 0;
-};
+	virtual void pushTokens(const std::vector<SyntaxDescriptor> &tokens) = 0;
+
+	/**
+	 * Removes the previously pushed list of tokens from the stack.
+	 */
+	virtual void popTokens() = 0;
 
+	/**
+	 * Reads a string variant form the current input stream. This function must
+	 * be called from the data() method.
+	 *
+	 * @return a string variant containing the current text data. The return
+	 * value depends on the currently set whitespace mode and the tokens that
+	 * were enabled using the enableTokens callback method.
+	 */
+	virtual Variant readData() = 0;
+};
 }
 }
 
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp
index a307f71..26b9b6e 100644
--- a/src/core/parser/stack/DocumentHandler.cpp
+++ b/src/core/parser/stack/DocumentHandler.cpp
@@ -25,6 +25,7 @@
 #include <core/model/Ontology.hpp>
 #include <core/model/Project.hpp>
 #include <core/model/Typesystem.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 #include <core/parser/ParserScope.hpp>
 #include <core/parser/ParserContext.hpp>
 
@@ -36,7 +37,7 @@ namespace parser_stack {
 
 /* DocumentHandler */
 
-bool DocumentHandler::start(Variant::mapType &args)
+bool DocumentHandler::startCommand(Variant::mapType &args)
 {
 	Rooted<Document> document =
 	    context().getProject()->createDocument(args["name"].asString());
@@ -51,6 +52,11 @@ void DocumentHandler::end() { scope().pop(logger()); }
 
 /* DocumentChildHandler */
 
+DocumentChildHandler::DocumentChildHandler(const HandlerData &handlerData)
+    : Handler(handlerData), isExplicitField(false)
+{
+}
+
 void DocumentChildHandler::preamble(Rooted<Node> &parentNode, size_t &fieldIdx,
                                     DocumentEntity *&parent)
 {
@@ -121,10 +127,10 @@ void DocumentChildHandler::createPath(const size_t &firstFieldIdx,
 	scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, false);
 }
 
-bool DocumentChildHandler::start(Variant::mapType &args)
+bool DocumentChildHandler::startCommand(Variant::mapType &args)
 {
-	// extract the special "name" attribute from the input arguments.
-	// the remaining attributes will be forwarded to the newly constructed
+	// Extract the special "name" attribute from the input arguments.
+	// The remaining attributes will be forwarded to the newly constructed
 	// element.
 	std::string nameAttr;
 	{
@@ -168,13 +174,6 @@ bool DocumentChildHandler::start(Variant::mapType &args)
 
 			preamble(parentNode, fieldIdx, parent);
 
-			// TODO: REMOVE
-			std::string thisName = name();
-			std::string parentClassName;
-			if (parent != nullptr) {
-				parentClassName = parent->getDescriptor()->getName();
-			}
-
 			/*
 			 * Try to find a FieldDescriptor for the given tag if we are not in
 			 * a field already. This does _not_ try to construct transparent
@@ -191,9 +190,9 @@ bool DocumentChildHandler::start(Variant::mapType &args)
 						        "Data or structure commands have already been "
 						        "given, command \"") +
 						        name() + std::string(
-						                     "\" is not interpreted explicit "
-						                     "field. Move explicit field "
-						                     "references to the beginning."),
+						                   "\" is not interpreted explicit "
+						                   "field. Move explicit field "
+						                   "references to the beginning."),
 						    location());
 					} else {
 						Rooted<DocumentField> field{new DocumentField(
@@ -260,15 +259,34 @@ bool DocumentChildHandler::start(Variant::mapType &args)
 	}
 }
 
+bool DocumentChildHandler::startAnnotation(Variant::mapType &args,
+                                           AnnotationType annotationType)
+{
+	// TODO: Handle annotation
+	return false;
+}
+
+bool DocumentChildHandler::startToken(Handle<Node> node)
+{
+	// TODO: Handle token start
+	return false;
+}
+
+DocumentChildHandler::EndTokenResult DocumentChildHandler::endToken(
+    const Token &token, Handle<Node> node)
+{
+	// TODO: Handle token end
+	return EndTokenResult::ENDED_NONE;
+}
+
 void DocumentChildHandler::end()
 {
-	// in case of explicit fields we do not want to pop something from the
+	// In case of explicit fields we do not want to pop something from the
 	// stack.
-	if (isExplicitField) {
-		return;
+	if (!isExplicitField) {
+		// pop the "main" element.
+		scope().pop(logger());
 	}
-	// pop the "main" element.
-	scope().pop(logger());
 }
 
 bool DocumentChildHandler::fieldStart(bool &isDefault, size_t fieldIdx)
@@ -278,6 +296,7 @@ bool DocumentChildHandler::fieldStart(bool &isDefault, size_t fieldIdx)
 		isDefault = true;
 		return fieldIdx == 0;
 	}
+
 	Rooted<Node> parentNode = scope().getLeaf();
 	assert(parentNode->isa(&RttiTypes::StructuredEntity) ||
 	       parentNode->isa(&RttiTypes::AnnotationEntity));
@@ -290,7 +309,7 @@ bool DocumentChildHandler::fieldStart(bool &isDefault, size_t fieldIdx)
 	    parent->getDescriptor()->getFieldDescriptors();
 
 	if (isDefault) {
-		if(fields.empty()){
+		if (fields.empty()) {
 			return false;
 		}
 		fieldIdx = fields.size() - 1;
@@ -316,33 +335,19 @@ void DocumentChildHandler::fieldEnd()
 {
 	assert(scope().getLeaf()->isa(&RttiTypes::DocumentField));
 
-	// pop the field from the stack.
+	// Pop the field from the stack.
 	scope().pop(logger());
 
-	// pop all remaining transparent elements.
+	// Pop all remaining transparent elements.
 	while (scope().getLeaf()->isa(&RttiTypes::StructuredEntity) &&
 	       scope().getLeaf().cast<StructuredEntity>()->isTransparent()) {
-		// pop the transparent element.
+		// Pop the transparent element.
 		scope().pop(logger());
-		// pop the transparent field.
+		// Pop the transparent field.
 		scope().pop(logger());
 	}
 }
 
-bool DocumentChildHandler::annotationStart(const Variant &className,
-                                           Variant::mapType &args)
-{
-	// TODO: Implement
-	return false;
-}
-
-bool DocumentChildHandler::annotationEnd(const Variant &className,
-                                         const Variant &elementName)
-{
-	// TODO: Implement
-	return false;
-}
-
 bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field,
                                        Variant &data, Logger &logger)
 {
@@ -370,7 +375,7 @@ bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field,
 	return valid && scope().resolveValue(data, type, logger);
 }
 
-bool DocumentChildHandler::data(Variant &data)
+bool DocumentChildHandler::data()
 {
 	// We're past the region in which explicit fields can be defined in the
 	// parent structure element
@@ -391,11 +396,12 @@ bool DocumentChildHandler::data(Variant &data)
 	// If it is a primitive field directly, try to parse the content.
 	if (field->isPrimitive()) {
 		// Add it as primitive content.
-		if (!convertData(field, data, logger())) {
+		Variant text = readData();
+		if (!convertData(field, text, logger())) {
 			return false;
 		}
 
-		parent->createChildDocumentPrimitive(data, fieldIdx);
+		parent->createChildDocumentPrimitive(text, fieldIdx);
 		return true;
 	}
 
@@ -409,7 +415,11 @@ bool DocumentChildHandler::data(Variant &data)
 	for (auto primitiveField : defaultFields) {
 		// Then try to parse the content using the type specification.
 		forks.emplace_back(logger().fork());
-		if (!convertData(primitiveField, data, forks.back())) {
+
+		// TODO: Actually the data has to be read after the path has been
+		// created (as createPath may push more tokens onto the stack)
+		Variant text = readData();
+		if (!convertData(primitiveField, text, forks.back())) {
 			continue;
 		}
 
@@ -418,24 +428,24 @@ bool DocumentChildHandler::data(Variant &data)
 
 		// Construct the necessary path
 		NodeVector<Node> path = field->pathTo(primitiveField, logger());
-		// TODO: Create methods with indices instead of names.
 		createPath(fieldIdx, path, parent);
 
 		// Then create the primitive element
-		parent->createChildDocumentPrimitive(data);
+		parent->createChildDocumentPrimitive(text);
 		return true;
 	}
 
 	// No field was found that might take the data -- dump the error messages
 	// from the loggers -- or, if there were no primitive fields, clearly state
 	// this fact
+	Variant text = readData();
 	if (defaultFields.empty()) {
 		logger().error("Got data, but structure \"" + name() +
 		                   "\" does not have any primitive field",
-		               data);
+		               text);
 	} else {
 		logger().error("Could not read data with any of the possible fields:",
-		               data);
+		               text);
 		size_t f = 0;
 		for (auto field : defaultFields) {
 			logger().note(std::string("Field ") +
@@ -461,7 +471,9 @@ const State DocumentChild = StateBuilder()
                                 .createdNodeTypes({&RttiTypes::StructureNode,
                                                    &RttiTypes::AnnotationEntity,
                                                    &RttiTypes::DocumentField})
-                                .elementHandler(DocumentChildHandler::create);
+                                .elementHandler(DocumentChildHandler::create)
+                                .supportsAnnotations(true)
+                                .supportsTokens(true);
 }
 }
 
@@ -469,4 +481,4 @@ namespace RttiTypes {
 const Rtti DocumentField = RttiBuilder<ousia::parser_stack::DocumentField>(
                                "DocumentField").parent(&Node);
 }
-}
-\ No newline at end of file
+}
diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp
index 44feb2b..0e35558 100644
--- a/src/core/parser/stack/DocumentHandler.hpp
+++ b/src/core/parser/stack/DocumentHandler.hpp
@@ -53,7 +53,7 @@ class DocumentHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -92,9 +92,10 @@ public:
  */
 class DocumentChildHandler : public Handler {
 private:
-	bool isExplicitField = false;
-	//TODO: REMOVE
-	std::string strct_name;
+	/**
+	 * If set to true, this handler represents an explicit field.
+	 */
+	bool isExplicitField;
 
 	/**
 	 * Code shared by both the start(), fieldStart() and the data() method.
@@ -163,22 +164,18 @@ private:
 	                 Logger &logger);
 
 public:
-	using Handler::Handler;
+	DocumentChildHandler(const HandlerData &handlerData);
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
+	bool startAnnotation(Variant::mapType &args,
+	                     AnnotationType annotationType) override;
+	bool startToken(Handle<Node> node) override;
+	EndTokenResult endToken(const Token &token, Handle<Node> node) override;
 	void end() override;
-	bool data(Variant &data) override;
-
+	bool data() override;
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
-
 	void fieldEnd() override;
 
-	bool annotationStart(const Variant &className,
-	                     Variant::mapType &args) override;
-
-	bool annotationEnd(const Variant &className,
-	                   const Variant &elementName) override;
-
 	/**
 	 * Creates a new instance of the DocumentChildHandler.
 	 *
@@ -213,4 +210,5 @@ extern const Rtti DocumentField;
 }
 }
 
-#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */
-\ No newline at end of file
+#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */
+
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
index bf5d4ea..c01e74c 100644
--- a/src/core/parser/stack/Handler.cpp
+++ b/src/core/parser/stack/Handler.cpp
@@ -18,6 +18,8 @@
 
 #include <core/common/Exceptions.hpp>
 #include <core/common/Logger.hpp>
+#include <core/common/Variant.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 #include <core/parser/ParserContext.hpp>
 
 #include "Callbacks.hpp"
@@ -29,14 +31,10 @@ namespace parser_stack {
 
 /* Class HandlerData */
 
-HandlerData::HandlerData(ParserContext &ctx, /*Callbacks &callbacks,*/
-                         const std::string &name, const State &state,
-                         const SourceLocation &location)
-    : ctx(ctx),
-      /*callbacks(callbacks),*/
-      name(name),
-      state(state),
-      location(location)
+HandlerData::HandlerData(ParserContext &ctx, HandlerCallbacks &callbacks,
+                         const State &state, const Token &token,
+                         HandlerType type)
+    : ctx(ctx), callbacks(callbacks), state(state), token(token), type(type)
 {
 }
 
@@ -63,28 +61,39 @@ Logger &Handler::logger()
 	return handlerData.ctx.getLogger();
 }
 
-const SourceLocation &Handler::location() const { return handlerData.location; }
+const std::string &Handler::name() const { return handlerData.token.content; }
 
-const std::string &Handler::name() const { return handlerData.name; }
+TokenId Handler::tokenId() const { return handlerData.token.id; }
 
-void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode)
+const Token &Handler::token() const { return handlerData.token; }
+
+const SourceLocation &Handler::location() const
 {
-	/*handlerData.callbacks.setWhitespaceMode(whitespaceMode);*/
+	return handlerData.token.location;
 }
 
-void Handler::registerToken(const std::string &token)
+HandlerType Handler::type() const { return handlerData.type; }
+
+const State &Handler::state() const { return handlerData.state; }
+
+Variant Handler::readData() { return handlerData.callbacks.readData(); }
+
+void Handler::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
 {
-	/*handlerData.callbacks.registerToken(token);*/
+	handlerData.callbacks.pushTokens(tokens);
 }
 
-void Handler::unregisterToken(const std::string &token)
+void Handler::popTokens() { handlerData.callbacks.popTokens(); }
+
+TokenId Handler::registerToken(const std::string &token)
 {
-	/*handlerData.callbacks.unregisterToken(token);*/
+	return handlerData.callbacks.registerToken(token);
 }
 
-const std::string &Handler::getName() const { return name(); }
-
-const State &Handler::getState() const { return handlerData.state; }
+void Handler::unregisterToken(TokenId id)
+{
+	handlerData.callbacks.unregisterToken(id);
+}
 
 void Handler::setLogger(Logger &logger) { internalLogger = &logger; }
 
@@ -94,43 +103,50 @@ const SourceLocation &Handler::getLocation() const { return location(); }
 
 /* Class EmptyHandler */
 
-bool EmptyHandler::start(Variant::mapType &args)
+bool EmptyHandler::startCommand(Variant::mapType &args)
 {
-	// Just accept anything
+	// Well, we'll support any command we get, don't we?
 	return true;
 }
 
-void EmptyHandler::end()
+bool EmptyHandler::startAnnotation(Variant::mapType &args,
+                                   Handler::AnnotationType annotationType)
 {
-	// Do nothing if a command ends
+	// Do not support annotations. Annotations are too complicated for poor
+	// EmptyHandler.
+	return false;
 }
 
-bool EmptyHandler::fieldStart(bool &isDefaultField, size_t fieldIndex)
+bool EmptyHandler::startToken(Handle<Node> node)
 {
-	// Accept any field
-	return true;
+	// EmptyHandler does not support tokens.
+	return false;
 }
 
-void EmptyHandler::fieldEnd()
+Handler::EndTokenResult EmptyHandler::endToken(const Token &token,
+                                               Handle<Node> node)
 {
-	// Do not handle fields
+	// There are no tokens to end here.
+	return EndTokenResult::ENDED_NONE;
 }
 
-bool EmptyHandler::annotationStart(const Variant &className,
-                                   Variant::mapType &args)
+void EmptyHandler::end()
 {
-	// Accept any data
-	return true;
+	// Do nothing if a command ends
 }
 
-bool EmptyHandler::annotationEnd(const Variant &className,
-                                 const Variant &elementName)
+bool EmptyHandler::fieldStart(bool &isDefaultField, size_t fieldIndex)
 {
-	// Accept any annotation
+	// Accept any field
 	return true;
 }
 
-bool EmptyHandler::data(Variant &data)
+void EmptyHandler::fieldEnd()
+{
+	// Do not handle field ends
+}
+
+bool EmptyHandler::data()
 {
 	// Support any data
 	return true;
@@ -143,12 +159,26 @@ Handler *EmptyHandler::create(const HandlerData &handlerData)
 
 /* Class StaticHandler */
 
-bool StaticHandler::start(Variant::mapType &args)
+bool StaticHandler::startCommand(Variant::mapType &args)
 {
 	// Do nothing in the default implementation, accept anything
 	return true;
 }
 
+bool StaticHandler::startAnnotation(Variant::mapType &args,
+                                    Handler::AnnotationType annotationType)
+{
+	return false;
+}
+
+bool StaticHandler::startToken(Handle<Node> node) { return false; }
+
+Handler::EndTokenResult StaticHandler::endToken(const Token &token,
+                                                Handle<Node> node)
+{
+	return EndTokenResult::ENDED_NONE;
+}
+
 void StaticHandler::end()
 {
 	// Do nothing here
@@ -170,23 +200,9 @@ void StaticHandler::fieldEnd()
 	// Do nothing here
 }
 
-bool StaticHandler::annotationStart(const Variant &className,
-                                    Variant::mapType &args)
-{
-	// No annotations supported
-	return false;
-}
-
-bool StaticHandler::annotationEnd(const Variant &className,
-                                  const Variant &elementName)
+bool StaticHandler::data()
 {
-	// No annotations supported
-	return false;
-}
-
-bool StaticHandler::data(Variant &data)
-{
-	logger().error("Did not expect any data here", data);
+	logger().error("Did not expect any data here", readData());
 	return false;
 }
 
@@ -198,7 +214,7 @@ StaticFieldHandler::StaticFieldHandler(const HandlerData &handlerData,
 {
 }
 
-bool StaticFieldHandler::start(Variant::mapType &args)
+bool StaticFieldHandler::startCommand(Variant::mapType &args)
 {
 	if (!argName.empty()) {
 		auto it = args.find(argName);
@@ -227,12 +243,15 @@ void StaticFieldHandler::end()
 	}
 }
 
-bool StaticFieldHandler::data(Variant &data)
+bool StaticFieldHandler::data()
 {
+	// Fetch the actual text data
+	Variant stringData = readData();
+
 	// Call the doHandle function if this has not been done before
 	if (!handled) {
 		handled = true;
-		doHandle(data, args);
+		doHandle(stringData, args);
 		return true;
 	}
 
@@ -240,7 +259,7 @@ bool StaticFieldHandler::data(Variant &data)
 	logger().error(
 	    std::string("Found data, but the corresponding argument \"") + argName +
 	        std::string("\" was already specified"),
-	    data);
+	    stringData);
 
 	// Print the location at which the attribute was originally specified
 	auto it = args.find(argName);
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 7cda7a4..67fde06 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -1,6 +1,6 @@
 /*
     Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+    Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -16,6 +16,15 @@
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
+/**
+ * @file Handler.hpp
+ *
+ * Contains the definition of the Handler class, used for representing Handlers
+ * for certain syntactic elements.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
 #ifndef _OUSIA_PARSER_STACK_HANDLER_HPP_
 #define _OUSIA_PARSER_STACK_HANDLER_HPP_
 
@@ -24,6 +33,9 @@
 #include <core/common/Location.hpp>
 #include <core/common/Variant.hpp>
 #include <core/common/Whitespace.hpp>
+#include <core/common/Token.hpp>
+#include <core/model/Node.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
 
@@ -31,14 +43,23 @@ namespace ousia {
 class ParserScope;
 class ParserContext;
 class Logger;
+class TokenizedData;
+class Variant;
 
 namespace parser_stack {
 
 // More forward declarations
-class Callbacks;
+class HandlerCallbacks;
 class State;
 
 /**
+ * Enum describing the type of the Handler instance -- a document handler may
+ * be created for handling a simple command, a token or an annotation start and
+ * end.
+ */
+enum class HandlerType { COMMAND, ANNOTATION_START, ANNOTATION_END, TOKEN };
+
+/**
  * Class collecting all the data that is being passed to a Handler
  * instance.
  */
@@ -51,26 +72,28 @@ public:
 	ParserContext &ctx;
 
 	/**
-	 * Reference at an instance of the Callbacks class, used for
-	 * modifying the behaviour of the parser (like registering tokens, setting
-	 * the data type or changing the whitespace handling mode).
+	 * Reference at a class implementing the HandlerCallbacks interface, used
+	 * for modifying the behaviour of the parser (like registering tokens,
+	 * setting the data type or changing the whitespace handling mode).
 	 */
-	//	Callbacks &callbacks;
+	HandlerCallbacks &callbacks;
 
 	/**
-	 * Contains the name of the command that is being handled.
+	 * Contains the current state of the state machine.
 	 */
-	std::string name;
+	const State &state;
 
 	/**
-	 * Contains the current state of the state machine.
+	 * Token containing the name of the command that is being handled, the
+	 * location of the element in the source code or the token id of the token
+	 * that is being handled.
 	 */
-	const State &state;
+	Token token;
 
 	/**
-	 * Current source code location.
+	 * Type describing for which purpose the HandlerData instance was created.
 	 */
-	SourceLocation location;
+	HandlerType type;
 
 	/**
 	 * Constructor of the HandlerData class.
@@ -78,13 +101,13 @@ public:
 	 * @param ctx is the parser context the handler should be executed in.
 	 * @param callbacks is an instance of Callbacks used to notify
 	 * the parser about certain state changes.
-	 * @param name is the name of the string.
 	 * @param state is the state this handler was called for.
-	 * @param location is the location at which the handler is created.
+	 * @param token contains name, token id and location of the command that is
+	 * being handled.
+	 * @param type describes the purpose of the Handler instance at hand.
 	 */
-	HandlerData(ParserContext &ctx,
-	            /*Callbacks &callbacks,*/ const std::string &name,
-	            const State &state, const SourceLocation &location);
+	HandlerData(ParserContext &ctx, HandlerCallbacks &callbacks,
+	            const State &state, const Token &token, HandlerType type);
 };
 
 /**
@@ -115,6 +138,94 @@ protected:
 	Handler(const HandlerData &handlerData);
 
 	/**
+	 * Calls the corresponding function in the HandlerCallbacks instance. This
+	 * method registers the given tokens as tokens that are generally available,
+	 * tokens must be explicitly enabled using the "pushTokens" and "popTokens"
+	 * method. Tokens that have not been registered are not guaranteed to be
+	 * reported (except for special tokens, these do not have to be registerd).
+	 *
+	 * @param token is the token string that should be made available.
+	 * @return the TokenId that will be used to refer to the token.
+	 */
+	TokenId registerToken(const std::string &token);
+
+	/**
+	 * Calls the corresponding function in the HandlerCallbacks instance. This
+	 * method unregisters the given token. Note that for a token to be no longer
+	 * reported, this function has to be called as many times as registerToken()
+	 * for the corresponding token.
+	 *
+	 * @param id is the id of the Token that should be unregistered.
+	 */
+	void unregisterToken(TokenId id);
+
+	/**
+	 * Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
+	 * The tokens described in the token list are the tokens that are currently
+	 * enabled.
+	 *
+	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
+	 * stored on the stack.
+	 */
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
+
+	/**
+	 * Calls the corresponding function in the HandlerCallbacks instance.
+	 * Removes the previously pushed list of tokens from the stack.
+	 */
+	void popTokens();
+
+	/**
+	 * Calls the corresponding method in the HandlerCallbacks instance. Reads a
+	 * string variant form the current input stream. This function must be
+	 * called from the data() method.
+	 *
+	 * @return a string variant containing the current text data. The return
+	 * value depends on the currently set whitespace mode and the tokens that
+	 * were enabled using the enableTokens callback method.
+	 */
+	Variant readData();
+
+	/**
+	 * Calls the corresponding function in the Callbacks instance. Sets the
+	 * whitespace mode that specifies how string data should be processed. The
+	 * calls to this function are placed on a stack by the underlying Stack
+	 * class. This function should be called from the "fieldStart" callback and
+	 * the "start" callback. If no whitespace mode is pushed in the "start"
+	 * method the whitespace mode "TRIM" is implicitly assumed.
+	 *
+	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
+	 * PRESERVE, TRIM or COLLAPSE.
+	 */
+	//	void pushWhitespaceMode(WhitespaceMode whitespaceMode);
+
+	/**
+	 * Pops a previously pushed whitespace mode. Calls to this function should
+	 * occur in the "end" callback and the "fieldEnd" callback. This function
+	 * can only undo pushs that were performed by the pushWhitespaceMode()
+	 * method of the same handler.
+	 */
+	//	void popWhitespaceMode();
+
+public:
+	/**
+	 * Enum representing the type of the annotation a Handle instance handles.
+	 * It may either handle the start of an annotation or the end of an
+	 * annotation.
+	 */
+	enum class AnnotationType { START, END };
+
+	/**
+	 * Enum type representing the possible outcomes of the endToken() method.
+	 */
+	enum class EndTokenResult { ENDED_THIS, ENDED_HIDDEN, ENDED_NONE };
+
+	/**
+	 * Virtual destructor.
+	 */
+	virtual ~Handler();
+
+	/**
 	 * Returns a reference at the ParserContext.
 	 *
 	 * @return a reference at the ParserContext.
@@ -144,68 +255,55 @@ protected:
 	Logger &logger();
 
 	/**
-	 * Returns the location of the element in the source file, for which this
-	 * Handler was created.
+	 * Returns the name of the command or annotation the handler is currently
+	 * handling. In case the command is currently handling a token, the name
+	 * corresponds to the token string sequence.
 	 *
-	 * @return the location of the Handler in the source file.
-	 */
-	const SourceLocation &location() const;
-
-	/**
-	 * Returns the command name for which the handler was created.
-	 *
-	 * @return a const reference at the command name.
+	 * @return the name of the command or the string sequence of the token that
+	 * is being handled by this handler.
 	 */
 	const std::string &name() const;
 
-public:
-	/**
-	 * Virtual destructor.
-	 */
-	virtual ~Handler();
-
 	/**
-	 * Calls the corresponding function in the Callbacks instance. Sets the
-	 * whitespace mode that specifies how string data should be processed. The
-	 * calls to this function are placed on a stack by the underlying Stack
-	 * class.
+	 * Returns the token id of the token that is currently being handled by the
+	 * handler. In case the handler currently handles a command or annotation,
+	 * the token id is set to Tokens::Data.
 	 *
-	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
-	 * PRESERVE, TRIM or COLLAPSE.
+	 * @return the current token id or Tokens::Data if no token is being
+	 * handled.
 	 */
-	void setWhitespaceMode(WhitespaceMode whitespaceMode);
+	TokenId tokenId() const;
 
 	/**
-	 * Calls the corresponding function in the Callbacks instance.
-	 * Registers the given token as token that should be reported to the handler
-	 * using the "token" function.
+	 * Returns a reference at the Token instance, containing either the token
+	 * that is currently being handled or the name of the command and annotation
+	 * and their location.
 	 *
-	 * @param token is the token string that should be reported.
+	 * @return a const reference at the internal token instance.
 	 */
-	void registerToken(const std::string &token);
+	const Token &token() const;
 
 	/**
-	 * Calls the corresponding function in the Callbacks instance.
-	 * Unregisters the given token, it will no longer be reported to the handler
-	 * using the "token" function.
+	 * Returns the location of the element in the source file, for which this
+	 * Handler was created.
 	 *
-	 * @param token is the token string that should be unregistered.
+	 * @return the location of the Handler in the source file.
 	 */
-	void unregisterToken(const std::string &token);
+	const SourceLocation &location() const;
 
 	/**
-	 * Returns the command name for which the handler was created.
-	 *
-	 * @return a const reference at the command name.
+	 * Returns the type describing the purpose for which the handler instance
+	 * was created.
 	 */
-	const std::string &getName() const;
+	HandlerType type() const;
 
 	/**
-	 * Reference at the State descriptor for which this Handler was created.
+	 * Returns a reference at the State descriptor for which this Handler was
+	 * created.
 	 *
 	 * @return a const reference at the constructing State descriptor.
 	 */
-	const State &getState() const;
+	const State &state() const;
 
 	/**
 	 * Sets the internal logger to the given logger instance.
@@ -229,14 +327,62 @@ public:
 	const SourceLocation &getLocation() const;
 
 	/**
-	 * Called when the command that was specified in the constructor is
-	 * instanciated.
+	 * Called whenever the handler should handle the start of a command. This
+	 * method (or any other of the "start" methods) is called exactly once,
+	 * after the constructor. The name of the command that is started here can
+	 * be accessed using the name() method.
+	 *
+	 * @param args is a map from strings to variants (argument name and value).
+	 * @return true if the handler was successful in starting an element with
+	 * the given name represents, false otherwise.
+	 */
+	virtual bool startCommand(Variant::mapType &args) = 0;
+
+	/**
+	 * Called whenever the handler should handle the start of an annotation.
+	 * This method (or any other of the "start" methods) is called exactly once,
+	 * after the constructor. This method is only called if the
+	 * "supportsAnnotations" flag of the State instance referencing this Handler
+	 * is set to true. The name of the command that is started here can be
+	 * accessed using the name() method.
 	 *
 	 * @param args is a map from strings to variants (argument name and value).
-	 * @return true if the handler was successful in starting the element it
-	 * represents, false otherwise.
+	 * @param type specifies whether this handler should handle the start of an
+	 * annotation or the end of an annotation.
+	 */
+	virtual bool startAnnotation(Variant::mapType &args,
+	                             AnnotationType annotationType) = 0;
+
+	/**
+	 * Called whenever the handler should handle the start of a token. This
+	 * method (or any other of the "start" methods) is called exactly once,
+	 * after the constructor. This method is only called if the "supportsTokens"
+	 * flag of the State instance referencing this Handler is set to true. The
+	 * token id of the token that is should be handled can be accessed using the
+	 * tokenId() method.
+	 *
+	 * @param node is the node for which this token was registered.
+	 */
+	virtual bool startToken(Handle<Node> node) = 0;
+
+	/**
+	 * Called whenever a token is marked as "end" token and this handler happens
+	 * to be the currently active handler. This operation may have three
+	 * outcomes:
+	 * <ol>
+	 *   <li>The token marks the end of the complete handler and the calling
+	 *   code should call the "end" method.</li>
+	 *   <li>The token marks the end of some element that is unknown the calling
+	 *   code. So the operation itself was a success, but the calling code
+	 *   should not call the "end" method.
+	 *   <li>The token did not anything in this context. Basically this shuold
+	 *   never happen, but who knows.</li>
+	 * </ol>
+	 *
+	 * @param id is the Token for which the handler should be started.
+	 * @param node is the node for which this token was registered.
 	 */
-	virtual bool start(Variant::mapType &args) = 0;
+	virtual EndTokenResult endToken(const Token &token, Handle<Node> node) = 0;
 
 	/**
 	 * Called before the command for which this handler is defined ends (is
@@ -266,44 +412,14 @@ public:
 	virtual void fieldEnd() = 0;
 
 	/**
-	 * Called whenever an annotation starts while this handler is active. The
-	 * function should return true if starting the annotation was successful,
-	 * false otherwise.
-	 *
-	 * @param className is a string variant containing the name of the
-	 * annotation class and the location of the name in the source code.
-	 * @param args is a map from strings to variants (argument name and value).
-	 * @return true if the mentioned annotation could be started here, false
-	 * if an error occurred.
-	 */
-	virtual bool annotationStart(const Variant &className,
-	                             Variant::mapType &args) = 0;
-
-	/**
-	 * Called whenever an annotation ends while this handler is active. The
-	 * function should return true if ending the annotation was successful,
-	 * false otherwise.
-	 *
-	 * @param className is a string variant containing the name of the
-	 * annotation class and the location of the class name in the source code.
-	 * @param elementName is a string variant containing the name of the
-	 * annotation class and the location of the element name in the source code.
-	 * @return true if the mentioned annotation could be started here, false if
-	 * an error occurred.
-	 */
-	virtual bool annotationEnd(const Variant &className,
-	                           const Variant &elementName) = 0;
-
-	/**
 	 * Called whenever raw data (int the form of a string) is available for the
 	 * Handler instance. Should return true if the data could be handled, false
-	 * otherwise.
+	 * otherwise. The actual data variant must be retrieved using the "text()"
+	 * callback.
 	 *
-	 * @param data is a string variant containing the character data and its
-	 * location.
 	 * @return true if the data could be handled, false otherwise.
 	 */
-	virtual bool data(Variant &data) = 0;
+	virtual bool data() = 0;
 };
 
 /**
@@ -325,15 +441,15 @@ protected:
 	using Handler::Handler;
 
 public:
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
+	bool startAnnotation(Variant::mapType &args,
+	                     AnnotationType annotationType) override;
+	bool startToken(Handle<Node> node) override;
+	EndTokenResult endToken(const Token &token, Handle<Node> node) override;
 	void end() override;
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
 	void fieldEnd() override;
-	bool annotationStart(const Variant &className,
-	                     Variant::mapType &args) override;
-	bool annotationEnd(const Variant &className,
-	                   const Variant &elementName) override;
-	bool data(Variant &data) override;
+	bool data() override;
 
 	/**
 	 * Creates an instance of the EmptyHandler class.
@@ -351,15 +467,15 @@ protected:
 	using Handler::Handler;
 
 public:
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
+	bool startAnnotation(Variant::mapType &args,
+	                     AnnotationType annotationType) override;
+	bool startToken(Handle<Node> node) override;
+	EndTokenResult endToken(const Token &token, Handle<Node> node) override;
 	void end() override;
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
 	void fieldEnd() override;
-	bool annotationStart(const Variant &className,
-	                     Variant::mapType &args) override;
-	bool annotationEnd(const Variant &className,
-	                   const Variant &elementName) override;
-	bool data(Variant &data) override;
+	bool data() override;
 };
 
 /**
@@ -406,13 +522,12 @@ protected:
 	 * @param fieldData is the captured field data.
 	 * @param args are the arguments that were given in the "start" function.
 	 */
-	virtual void doHandle(const Variant &fieldData,
-	                      Variant::mapType &args) = 0;
+	virtual void doHandle(const Variant &fieldData, Variant::mapType &args) = 0;
 
 public:
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
+	bool data() override;
 	void end() override;
-	bool data(Variant &data) override;
 };
 }
 }
diff --git a/src/core/parser/stack/OntologyHandler.cpp b/src/core/parser/stack/OntologyHandler.cpp
index 8c0e4d9..3b3b386 100644
--- a/src/core/parser/stack/OntologyHandler.cpp
+++ b/src/core/parser/stack/OntologyHandler.cpp
@@ -33,7 +33,7 @@ namespace parser_stack {
 
 /* OntologyHandler */
 
-bool OntologyHandler::start(Variant::mapType &args)
+bool DomainHandler::startCommand(Variant::mapType &args)
 {
 	// Create the Ontology node
 	Rooted<Ontology> ontology =
@@ -226,9 +226,9 @@ bool OntologyChildHandler::start(Variant::mapType &args)
 {
 	Rooted<FieldDescriptor> field = scope().selectOrThrow<FieldDescriptor>();
 
-	const std::string &ref = args["ref"].asString();
+	const std::string &name = args["ref"].asString();
 	scope().resolve<StructuredClass>(
-	    ref, field, logger(),
+	    name, field, logger(),
 	    [](Handle<Node> child, Handle<Node> field, Logger &logger) {
 		    if (child != nullptr) {
 			    field.cast<FieldDescriptor>()->addChild(
@@ -275,7 +275,7 @@ bool OntologyParentFieldHandler::start(Variant::mapType &args)
 	scope().resolve<Descriptor>(
 	    parentNameNode->getName(), strct, logger(),
 	    [type, name, optional](Handle<Node> parent, Handle<Node> strct,
-	                           Logger &logger) {
+	                                Logger &logger) {
 		    if (parent != nullptr) {
 			    Rooted<FieldDescriptor> field =
 			        (parent.cast<Descriptor>()->createFieldDescriptor(
@@ -299,21 +299,20 @@ bool OntologyParentFieldRefHandler::start(Variant::mapType &args)
 
 	// resolve the parent, get the referenced field and add the declared
 	// StructuredClass as child to it.
-	scope().resolve<Descriptor>(
-	    parentNameNode->getName(), strct, logger(),
-	    [name, loc](Handle<Node> parent, Handle<Node> strct, Logger &logger) {
-		    if (parent != nullptr) {
-			    Rooted<FieldDescriptor> field =
-			        parent.cast<Descriptor>()->getFieldDescriptor(name);
-			    if (field == nullptr) {
-				    logger.error(
-				        std::string("Could not find referenced field ") + name,
-				        loc);
-				    return;
-			    }
-			    field->addChild(strct.cast<StructuredClass>());
-		    }
-		});
+	scope().resolve<Descriptor>(parentNameNode->getName(), strct, logger(),
+	                            [name, loc](Handle<Node> parent,
+	                                       Handle<Node> strct, Logger &logger) {
+		if (parent != nullptr) {
+			Rooted<FieldDescriptor> field =
+			    parent.cast<Descriptor>()->getFieldDescriptor(name);
+			if (field == nullptr) {
+				logger.error(
+				    std::string("Could not find referenced field ") + name, loc);
+				return;
+			}
+			field->addChild(strct.cast<StructuredClass>());
+		}
+	});
 	return true;
 }
 
diff --git a/src/core/parser/stack/OntologyHandler.hpp b/src/core/parser/stack/OntologyHandler.hpp
index caeacc7..66146bd 100644
--- a/src/core/parser/stack/OntologyHandler.hpp
+++ b/src/core/parser/stack/OntologyHandler.hpp
@@ -46,7 +46,7 @@ class OntologyHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -59,7 +59,7 @@ class OntologyStructHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -72,7 +72,7 @@ class OntologyAnnotationHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -85,7 +85,7 @@ class OntologyAttributesHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -98,7 +98,7 @@ class OntologyFieldHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -111,7 +111,7 @@ class OntologyFieldRefHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -124,7 +124,7 @@ class OntologyPrimitiveHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -137,7 +137,7 @@ class OntologyChildHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 
 	static Handler *create(const HandlerData &handlerData)
 	{
@@ -154,7 +154,7 @@ class OntologyParentHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -167,7 +167,7 @@ class OntologyParentFieldHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 
 	static Handler *create(const HandlerData &handlerData)
 	{
@@ -179,7 +179,7 @@ class OntologyParentFieldRefHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 
 	static Handler *create(const HandlerData &handlerData)
 	{
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index 5b67248..f341f1d 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -19,18 +19,148 @@
 #include <core/common/Logger.hpp>
 #include <core/common/Utils.hpp>
 #include <core/common/Exceptions.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 #include <core/parser/ParserScope.hpp>
 #include <core/parser/ParserContext.hpp>
 
+#include "Callbacks.hpp"
 #include "Handler.hpp"
 #include "Stack.hpp"
 #include "State.hpp"
+#include "TokenRegistry.hpp"
+#include "TokenStack.hpp"
+
+#define STACK_DEBUG_OUTPUT 0
+#if STACK_DEBUG_OUTPUT
+#include <iostream>
+#endif
 
 namespace ousia {
 namespace parser_stack {
+namespace {
 
 /* Class HandlerInfo */
 
+/**
+ * The HandlerInfo class is used internally by the stack to associate additional
+ * (mutable) data with a handler instance.
+ */
+class HandlerInfo {
+public:
+	/**
+	 * Pointer pointing at the actual handler instance.
+	 */
+	std::shared_ptr<Handler> handler;
+
+	/**
+	 * Next field index to be passed to the "fieldStart" function of the Handler
+	 * class.
+	 */
+	size_t fieldIdx;
+
+	/**
+	 * Set to true if the handler is valid (which is the case if the "start"
+	 * method has returned true). If the handler is invalid, no more calls are
+	 * directed at it until it can be removed from the stack.
+	 */
+	bool valid : 1;
+
+	/**
+	 * Set to true if this is an implicit handler, that was created when the
+	 * current stack state was deduced.
+	 */
+	bool implicit : 1;
+
+	/**
+	 * Set to true if the handled command or annotation has a range.
+	 */
+	bool range : 1;
+
+	/**
+	 * Set to true if the handler currently is in a field.
+	 */
+	bool inField : 1;
+
+	/**
+	 * Set to true if the handler currently is in the default field.
+	 */
+	bool inDefaultField : 1;
+
+	/**
+	 * Set to true if the handler currently is in an implicitly started default
+	 * field.
+	 */
+	bool inImplicitDefaultField : 1;
+
+	/**
+	 * Set to false if this field is only opened pro-forma and does not accept
+	 * any data. Otherwise set to true.
+	 */
+	bool inValidField : 1;
+
+	/**
+	 * Set to true, if the default field was already started.
+	 */
+	bool hadDefaultField : 1;
+
+	/**
+	 * Default constructor of the HandlerInfo class.
+	 */
+	HandlerInfo();
+
+	/**
+	 * Constructor of the HandlerInfo class, allows to set some flags manually.
+	 */
+	HandlerInfo(bool implicit, bool inField, bool inDefaultField,
+	            bool inImplicitDefaultField);
+
+	/**
+	 * Constructor of the HandlerInfo class, taking a shared_ptr to the handler
+	 * to which additional information should be attached.
+	 */
+	HandlerInfo(std::shared_ptr<Handler> handler);
+
+	/**
+	 * Destructor of the HandlerInfo class (to allow Handler to be forward
+	 * declared).
+	 */
+	~HandlerInfo();
+
+	/**
+	 * Updates the "field" flags according to a "fieldStart" event.
+	 */
+	void fieldStart(bool isDefault, bool isImplicit, bool isValid);
+
+	/**
+	 * Updates the "fields" flags according to a "fieldEnd" event.
+	 */
+	void fieldEnd();
+
+	/**
+	 * Returns the name of the referenced handler or an empty string if no
+	 * handler is present.
+	 *
+	 * @return the current handler name.
+	 */
+	std::string name() const;
+
+	/**
+	 * Returns the type of the referenced handler or COMMAND if no handler is
+	 * present.
+	 *
+	 * @return the current handler type.
+	 */
+	HandlerType type() const;
+
+	/**
+	 * Returns the current state the handler is on or States::None if no handler
+	 * is present.
+	 *
+	 * @return the current state machine state.
+	 */
+	const State &state() const;
+};
+
 HandlerInfo::HandlerInfo() : HandlerInfo(nullptr) {}
 
 HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler)
@@ -38,6 +168,7 @@ HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler)
       fieldIdx(0),
       valid(true),
       implicit(false),
+      range(false),
       inField(false),
       inDefaultField(false),
       inImplicitDefaultField(false),
@@ -46,21 +177,36 @@ HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler)
 {
 }
 
-HandlerInfo::HandlerInfo(bool valid, bool implicit, bool inField,
-                         bool inDefaultField, bool inImplicitDefaultField,
-                         bool inValidField)
+HandlerInfo::HandlerInfo(bool implicit, bool inField, bool inDefaultField,
+                         bool inImplicitDefaultField)
     : handler(nullptr),
       fieldIdx(0),
-      valid(valid),
+      valid(true),
       implicit(implicit),
+      range(false),
       inField(inField),
       inDefaultField(inDefaultField),
       inImplicitDefaultField(inImplicitDefaultField),
-      inValidField(inValidField),
+      inValidField(true),
       hadDefaultField(false)
 {
 }
 
+std::string HandlerInfo::name() const
+{
+	return handler == nullptr ? std::string{} : handler->name();
+}
+
+HandlerType HandlerInfo::type() const
+{
+	return handler == nullptr ? HandlerType::COMMAND : handler->type();
+}
+
+const State &HandlerInfo::state() const
+{
+	return handler == nullptr ? States::None : handler->state();
+}
+
 HandlerInfo::~HandlerInfo()
 {
 	// Do nothing
@@ -87,7 +233,20 @@ void HandlerInfo::fieldEnd()
 /**
  * Stub instance of HandlerInfo containing no handler information.
  */
-static HandlerInfo EmptyHandlerInfo{true, true, true, true, false, true};
+static HandlerInfo EmptyHandlerInfo{true, true, true, true};
+
+/**
+ * Small helper class makeing sure the reference at some variable is reset once
+ * the scope is left.
+ */
+template <class T>
+struct GuardedTemporaryPointer {
+	T **ptr;
+	GuardedTemporaryPointer(T *ref, T **ptr) : ptr(ptr) { *ptr = ref; }
+
+	~GuardedTemporaryPointer() { *ptr = nullptr; }
+};
+}
 
 /* Helper functions */
 
@@ -116,11 +275,197 @@ static LoggableException buildInvalidCommandException(
 	}
 }
 
-/* Class Stack */
-
-Stack::Stack(ParserContext &ctx,
-             const std::multimap<std::string, const State *> &states)
-    : ctx(ctx), states(states)
+/* Class StackImpl */
+
+class StackImpl : public HandlerCallbacks {
+private:
+	/**
+	 * Reference at an implementation of the ParserCallbacks instance to which
+	 * certain handler callbacks are directed.
+	 */
+	ParserCallbacks &parser;
+
+	/**
+	 * Reference at the parser context.
+	 */
+	ParserContext &ctx;
+
+	/**
+	 * Map containing all registered command names and the corresponding
+	 * state descriptors.
+	 */
+	const std::multimap<std::string, const State *> &states;
+
+	/**
+	 * Registry responsible for registering the tokens proposed by the
+	 * Handlers in the parser.
+	 */
+	TokenRegistry tokenRegistry;
+
+	/**
+	 * Pointer at a TokenizedDataReader instance from which the data should
+	 * currently be read.
+	 */
+	TokenizedDataReader *dataReader;
+
+	/**
+	 * Internal stack used for managing the currently active Handler instances.
+	 */
+	std::vector<HandlerInfo> stack;
+
+	/**
+	 * Return the reference in the Logger instance stored within the context.
+	 */
+	Logger &logger() { return ctx.getLogger(); }
+
+	/**
+	 * Used internally to get all expected command names for the current state.
+	 * This function is used to build error messages.
+	 *
+	 * @return a set of strings containing the names of the expected commands.
+	 */
+	std::set<std::string> expectedCommands();
+
+	/**
+	 * Returns the targetState for a command with the given name that can be
+	 * reached from the current state.
+	 *
+	 * @param name is the name of the requested command.
+	 * @return nullptr if no target state was found, a pointer at the target
+	 * state otherwise.
+	 */
+	const State *findTargetState(const std::string &name);
+
+	/**
+	 * Returns the targetState for a command with the given name that can be
+	 * reached from the current state, also including the wildcard "*" state.
+	 * Throws an exception if the given target state is not a valid identifier.
+	 *
+	 * @param name is the name of the requested command.
+	 * @return nullptr if no target state was found, a pointer at the target
+	 * state otherwise.
+	 */
+	const State *findTargetStateOrWildcard(const std::string &name);
+
+	/**
+	 * Tries to reconstruct the parser state from the Scope instance of the
+	 * ParserContext given in the constructor. This functionality is needed for
+	 * including files,as the Parser of the included file needs to be brought to
+	 * an equivalent state as the one in the including file.
+	 */
+	void deduceState();
+
+	/**
+	 * Returns a reference at the current HandlerInfo instance (or a stub
+	 * HandlerInfo instance if the stack is empty).
+	 */
+	HandlerInfo &currentInfo();
+
+	/**
+	 * Returns a reference at the last HandlerInfo instance (or a stub
+	 * HandlerInfo instance if the stack has only one element).
+	 */
+	HandlerInfo &lastInfo();
+
+	/**
+	 * Returns a set containing the tokens that should currently be processed
+	 * by the TokenizedData instance.
+	 *
+	 * @return a TokenSet instance containing all tokens that should currently
+	 * be processed.
+	 */
+	TokenSet currentTokens() const;
+
+	/**
+	 * Returns the whitespace mode defined by the current command.
+	 */
+	WhitespaceMode currentWhitespaceMode() const;
+
+	/**
+	 * Ends the current handler and removes the corresponding element from the
+	 * stack.
+	 *
+	 * @return true if a command was ended, false otherwise.
+	 */
+	bool endCurrentHandler();
+
+	/**
+	 * Ends all handlers that currently are not inside a field and already had
+	 * a default field. Tries to start a default field for the current handler,
+	 * if currently the handler is not inside a field and did not have a default
+	 * field yet. This method is called whenever the data(), startAnnotation(),
+	 * startToken(), startCommand(), annotationStart() or annotationEnd() events
+	 * are reached.
+	 *
+	 * @return true if the current command is in a valid field.
+	 */
+	bool prepareCurrentHandler(bool startImplicitDefaultField = true);
+
+	/**
+	 * Returns true if all handlers on the stack are currently valid, or false
+	 * if at least one handler is invalid.
+	 *
+	 * @return true if all handlers on the stack are valid.
+	 */
+	bool handlersValid();
+
+	/**
+	 * Called whenever there is an actual data pending on the current
+	 * TokenizedDataReader. Tries to feed this data to the current handler.
+	 */
+	void handleData();
+
+	/**
+	 * Called whenever there is a token waiting to be processed. If possible
+	 * tries to end a current handler with this token or to start a new handler
+	 * with the token.
+	 *
+	 * @param token is the token that should be handled.
+	 */
+	void handleToken(const Token &token);
+
+	/**
+	 * Called by the rangeEnd() and fieldEnd() methods to end the current ranged
+	 * command.
+	 *
+	 * @param endRange specifies whether this should end the range of a
+	 * command with range.
+	 */
+	void handleFieldEnd(bool endRange);
+
+public:
+	StackImpl(ParserCallbacks &parser, ParserContext &ctx,
+	          const std::multimap<std::string, const State *> &states);
+
+	~StackImpl();
+
+	const State &currentState() const;
+	std::string currentCommandName() const;
+
+	void commandStart(const Variant &name, const Variant::mapType &args,
+	                  bool range);
+	void annotationStart(const Variant &className, const Variant &args,
+	                     bool range);
+	void annotationEnd(const Variant &className, const Variant &elementName);
+	void rangeEnd();
+	void fieldStart(bool isDefault);
+	void fieldEnd();
+	void data(const TokenizedData &data);
+
+	TokenId registerToken(const std::string &token) override;
+	void unregisterToken(TokenId id) override;
+	Variant readData() override;
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens) override;
+	void popTokens() override;
+};
+
+StackImpl::StackImpl(ParserCallbacks &parser, ParserContext &ctx,
+                     const std::multimap<std::string, const State *> &states)
+    : parser(parser),
+      ctx(ctx),
+      states(states),
+      tokenRegistry(parser),
+      dataReader(nullptr)
 {
 	// If the scope instance is not empty we need to deduce the current parser
 	// state
@@ -129,7 +474,7 @@ Stack::Stack(ParserContext &ctx,
 	}
 }
 
-Stack::~Stack()
+StackImpl::~StackImpl()
 {
 	while (!stack.empty()) {
 		// Fetch the topmost stack element
@@ -142,7 +487,7 @@ Stack::~Stack()
 			    !info.inImplicitDefaultField) {
 				logger().error(
 				    std::string("Reached end of stream, but command \"") +
-				        info.handler->getName() +
+				        currentCommandName() +
 				        "\" has not ended yet. Command was started here:",
 				    info.handler->getLocation());
 			}
@@ -153,7 +498,7 @@ Stack::~Stack()
 	}
 }
 
-void Stack::deduceState()
+void StackImpl::deduceState()
 {
 	// Assemble all states
 	std::vector<const State *> states;
@@ -176,8 +521,8 @@ void Stack::deduceState()
 	HandlerConstructor ctor =
 	    state.elementHandler ? state.elementHandler : EmptyHandler::create;
 
-	std::shared_ptr<Handler> handler =
-	    std::shared_ptr<Handler>{ctor({ctx, "", state, SourceLocation{}})};
+	std::shared_ptr<Handler> handler = std::shared_ptr<Handler>{
+	    ctor({ctx, *this, state, SourceLocation{}, HandlerType::COMMAND})};
 	stack.emplace_back(handler);
 
 	// Set the correct flags for this implicit handler
@@ -186,7 +531,7 @@ void Stack::deduceState()
 	info.fieldStart(true, false, true);
 }
 
-std::set<std::string> Stack::expectedCommands()
+std::set<std::string> StackImpl::expectedCommands()
 {
 	const State *currentState = &(this->currentState());
 	std::set<std::string> res;
@@ -198,17 +543,7 @@ std::set<std::string> Stack::expectedCommands()
 	return res;
 }
 
-const State &Stack::currentState()
-{
-	return stack.empty() ? States::None : stack.back().handler->getState();
-}
-
-std::string Stack::currentCommandName()
-{
-	return stack.empty() ? std::string{} : stack.back().handler->getName();
-}
-
-const State *Stack::findTargetState(const std::string &name)
+const State *StackImpl::findTargetState(const std::string &name)
 {
 	const State *currentState = &(this->currentState());
 	auto range = states.equal_range(name);
@@ -222,7 +557,7 @@ const State *Stack::findTargetState(const std::string &name)
 	return nullptr;
 }
 
-const State *Stack::findTargetStateOrWildcard(const std::string &name)
+const State *StackImpl::findTargetStateOrWildcard(const std::string &name)
 {
 	// Try to find the target state with the given name, if none is found, try
 	// find a matching "*" state.
@@ -233,16 +568,40 @@ const State *Stack::findTargetStateOrWildcard(const std::string &name)
 	return targetState;
 }
 
-HandlerInfo &Stack::currentInfo()
+const State &StackImpl::currentState() const
+{
+	return stack.empty() ? States::None : stack.back().state();
+}
+
+std::string StackImpl::currentCommandName() const
+{
+	return stack.empty() ? std::string{} : stack.back().name();
+}
+
+TokenSet StackImpl::currentTokens() const
+{
+	// TODO: Implement
+	return TokenSet{};
+}
+
+WhitespaceMode StackImpl::currentWhitespaceMode() const
+{
+	// TODO: Implement
+	return WhitespaceMode::COLLAPSE;
+}
+
+HandlerInfo &StackImpl::currentInfo()
 {
 	return stack.empty() ? EmptyHandlerInfo : stack.back();
 }
-HandlerInfo &Stack::lastInfo()
+HandlerInfo &StackImpl::lastInfo()
 {
 	return stack.size() < 2U ? EmptyHandlerInfo : stack[stack.size() - 2];
 }
 
-void Stack::endCurrentHandler()
+/* Stack helper functions */
+
+bool StackImpl::endCurrentHandler()
 {
 	if (!stack.empty()) {
 		// Fetch the handler info for the current top-level element
@@ -266,50 +625,59 @@ void Stack::endCurrentHandler()
 
 		// Remove the element from the stack
 		stack.pop_back();
+		return true;
 	}
+	return false;
 }
 
-void Stack::endOverdueHandlers()
+bool StackImpl::prepareCurrentHandler(bool startImplicitDefaultField)
 {
-	if (!stack.empty()) {
-		// Fetch the handler info for the current top-level element
-		HandlerInfo &info = stack.back();
+	// Repeat until a valid handler is found on the stack
+	while (!stack.empty()) {
+		// Fetch the handler for the current top-level element
+		HandlerInfo &info = currentInfo();
 
-		// Abort if this handler currently is inside a field
-		if (info.inField || (!info.hadDefaultField && info.valid)) {
-			return;
+		// If the current Handler is in a field, there is nothing to be done,
+		// abort
+		if (info.inField) {
+			return true;
 		}
 
-		// Otherwise end the current handler
-		endCurrentHandler();
-	}
-}
+		// If the current field already had a default field or is not valid,
+		// end it and repeat
+		if ((info.hadDefaultField || !startImplicitDefaultField) ||
+		    !info.valid) {
+			// We cannot end the command if it is marked as "range" command
+			if (info.range) {
+				return false;
+			}
 
-bool Stack::ensureHandlerIsInField()
-{
-	// If the current handler is not in a field (and actually has a handler)
-	// try to start a default field
-	HandlerInfo &info = currentInfo();
-	if (!info.inField && info.handler != nullptr) {
-		// Abort if the element already had a default field or the handler is
-		// not valid
-		if (info.hadDefaultField || !info.valid) {
+			// End the current handler
+			endCurrentHandler();
+			continue;
+		}
+
+		// Abort if starting new default fields is not allowed here
+		if (!startImplicitDefaultField) {
 			return false;
 		}
 
 		// Try to start a new default field, abort if this did not work
 		bool isDefault = true;
 		if (!info.handler->fieldStart(isDefault, info.fieldIdx)) {
-			return false;
+			endCurrentHandler();
+			continue;
 		}
 
-		// Mark the field as started
-		info.fieldStart(true, true, true);
+		// Mark the field as started and return -- the field should be marked
+		// is implicit if this is not a field with range
+		info.fieldStart(true, !info.range, true);
+		return true;
 	}
-	return true;
+	return false;
 }
 
-bool Stack::handlersValid()
+bool StackImpl::handlersValid()
 {
 	for (auto it = stack.crbegin(); it != stack.crend(); it++) {
 		if (!it->valid) {
@@ -319,13 +687,131 @@ bool Stack::handlersValid()
 	return true;
 }
 
-Logger &Stack::logger() { return ctx.getLogger(); }
+void StackImpl::handleData()
+{
+	// Repeat until we found some handle willingly consuming the data
+	while (true) {
+		// Prepare the stack -- make sure all overdue handlers are ended and
+		// we currently are in an open field
+		if (stack.empty() || !prepareCurrentHandler()) {
+			throw LoggableException("Did not expect any data here");
+		}
+
+		// Fetch the current handler information
+		HandlerInfo &info = currentInfo();
+
+		// If this field should not get any data, log an error and do not
+		// call the "data" handler
+		if (!info.inValidField) {
+			if (!info.hadDefaultField) {
+				logger().error("Did not expect any data here");
+			}
+			return;
+		}
+
+		// If we're currently in an invalid subtree, just eat the data and abort
+		if (!handlersValid()) {
+			return;
+		}
+
+		// Fork the logger and set it as temporary logger for the "data"
+		// method. We only want to keep error messages if this was not a
+		// try to implicitly open a default field.
+		LoggerFork loggerFork = logger().fork();
+		info.handler->setLogger(loggerFork);
+
+		// Pass the data to the current Handler instance
+		bool valid = false;
+		try {
+			valid = info.handler->data();
+		}
+		catch (LoggableException ex) {
+			loggerFork.log(ex);
+		}
+
+		// Reset the logger instance of the handler as soon as possible
+		info.handler->resetLogger();
+
+		// If placing the data here failed and we're currently in an
+		// implicitly opened field, just unroll the stack to the next field
+		// and try again
+		if (!valid && info.inImplicitDefaultField) {
+			endCurrentHandler();
+			continue;
+		}
+
+		// Commit the content of the logger fork. Do not change the valid flag.
+		loggerFork.commit();
+		return;
+	}
+}
+
+void StackImpl::handleToken(const Token &token)
+{
+	// TODO: Implement
+	// Just eat them for now
+}
+
+void StackImpl::handleFieldEnd(bool endRange)
+{
+	// Throw away all overdue handlers
+	prepareCurrentHandler(false);
+
+	// Close all implicit default fields
+	while (!stack.empty()) {
+		HandlerInfo &info = currentInfo();
+		if (!info.inImplicitDefaultField || info.range) {
+			break;
+		}
+		endCurrentHandler();
+	}
+
+	// Fetch the information attached to the current handler
+	HandlerInfo &info = currentInfo();
+	if (stack.empty() || (!info.inField && !endRange) ||
+	    (!info.range && endRange)) {
+		if (endRange) {
+			logger().error(
+			    "Got end of range, but there is no command here to end");
+		} else {
+			logger().error("Got field end, but there is no field here to end");
+		}
+		return;
+	}
+
+	// Only continue if the current handler stack is in a valid state, do not
+	// call the fieldEnd function if something went wrong before
+	if (handlersValid()) {
+		// End the current field if it is valid
+		if (info.inValidField) {
+			info.handler->fieldEnd();
+			info.fieldEnd();
+		}
+
+		// End the complete command if this is a range command, start the
+		// default field for once if range command did not have a default field
+		if (info.range && endRange) {
+			if (!info.hadDefaultField) {
+				bool isDefault = true;
+				info.handler->fieldStart(isDefault, true);
+				info.fieldStart(true, true, true);
+			}
+			endCurrentHandler();
+			return;
+		}
+	}
+
+	// This command no longer is in a field
+	info.fieldEnd();
+}
+
+/* Class StackImpl public functions */
 
-void Stack::command(const Variant &name, const Variant::mapType &args)
+void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
+                             bool range)
 {
-	// End handlers that already had a default field and are currently not
-	// active.
-	endOverdueHandlers();
+	// Call prepareCurrentHandler once to end all overdue commands
+	prepareCurrentHandler();
 
 	// Make sure the given identifier is valid (preventing "*" from being
 	// malicously passed to this function)
@@ -336,14 +822,18 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 	}
 
 	while (true) {
+		// Prepare the stack -- make sure all overdue handlers are ended and
+		// we currently are in an open field
+		prepareCurrentHandler();
+
 		// Try to find a target state for the given command, if none can be
 		// found and the current command does not have an open field, then try
 		// to create an empty default field, otherwise this is an exception
 		const State *targetState = findTargetStateOrWildcard(name.asString());
 		if (targetState == nullptr) {
 			HandlerInfo &info = currentInfo();
-			if (info.inImplicitDefaultField || !info.inField) {
-				endCurrentHandler();
+			if ((info.inImplicitDefaultField || !info.inField) &&
+			    endCurrentHandler()) {
 				continue;
 			} else {
 				throw buildInvalidCommandException(name.asString(),
@@ -351,12 +841,6 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 			}
 		}
 
-		// Make sure we're currently inside a field
-		if (!ensureHandlerIsInField()) {
-			endCurrentHandler();
-			continue;
-		}
-
 		// Fork the logger. We do not want any validation errors to skip
 		LoggerFork loggerFork = logger().fork();
 
@@ -365,10 +849,15 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 		                              ? targetState->elementHandler
 		                              : EmptyHandler::create;
 		std::shared_ptr<Handler> handler{
-		    ctor({ctx, name.asString(), *targetState, name.getLocation()})};
+		    ctor({ctx,
+		          *this,
+		          *targetState,
+		          {name.asString(), name.getLocation()},
+		          HandlerType::COMMAND})};
 		stack.emplace_back(handler);
 
-		// Fetch the HandlerInfo for the parent element and the current element
+		// Fetch the HandlerInfo for the parent element and the current
+		// element
 		HandlerInfo &parentInfo = lastInfo();
 		HandlerInfo &info = currentInfo();
 
@@ -387,7 +876,7 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 
 			handler->setLogger(loggerFork);
 			try {
-				info.valid = handler->start(canonicalArgs);
+				info.valid = handler->startCommand(canonicalArgs);
 			}
 			catch (LoggableException ex) {
 				loggerFork.log(ex);
@@ -395,94 +884,65 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 			handler->resetLogger();
 		}
 
-		// We started the command within an implicit default field and it is not
-		// valid -- remove both the new handler and the parent field from the
-		// stack
+		// We started the command within an implicit default field and it is
+		// not valid -- remove both the new handler and the parent field from
+		// the stack
 		if (!info.valid && parentInfo.inImplicitDefaultField) {
-			endCurrentHandler();
-			endCurrentHandler();
-			continue;
+			// Only continue if the parent handler could actually be removed
+			if (endCurrentHandler() && endCurrentHandler()) {
+				continue;
+			}
 		}
 
-		// If we ended up here, starting the command may or may not have worked,
-		// but after all, we cannot unroll the stack any further. Update the
-		// "valid" flag, commit any potential error messages and return.
+		// If we ended up here, starting the command may or may not have
+		// worked, but after all, we cannot unroll the stack any further. Update
+		// the "valid" flag, commit any potential error messages and return.
 		info.valid = parentInfo.valid && info.valid;
+		info.range = range;
 		loggerFork.commit();
 		return;
 	}
 }
 
-void Stack::data(const Variant &data)
+void StackImpl::annotationStart(const Variant &className, const Variant &args,
+                                bool range)
 {
-	// End handlers that already had a default field and are currently not
-	// active.
-	endOverdueHandlers();
-
-	while (true) {
-		// Check whether there is any command the data can be sent to
-		if (stack.empty()) {
-			throw LoggableException("No command here to receive data.", data);
-		}
-
-		// Fetch the current command handler information
-		HandlerInfo &info = currentInfo();
-
-		// Make sure the current handler has an open field
-		if (!ensureHandlerIsInField()) {
-			endCurrentHandler();
-			continue;
-		}
-
-		// If this field should not get any data, log an error and do not call
-		// the "data" handler
-		if (!info.inValidField) {
-			// If the "hadDefaultField" flag is set, we already issued an error
-			// message
-			if (!info.hadDefaultField) {
-				logger().error("Did not expect any data here", data);
-			}
-		}
-
-		if (handlersValid() && info.inValidField) {
-			// Fork the logger and set it as temporary logger for the "start"
-			// method. We only want to keep error messages if this was not a try
-			// to implicitly open a default field.
-			LoggerFork loggerFork = logger().fork();
-			info.handler->setLogger(loggerFork);
-
-			// Pass the data to the current Handler instance
-			bool valid = false;
-			try {
-				Variant dataCopy = data;
-				valid = info.handler->data(dataCopy);
-			}
-			catch (LoggableException ex) {
-				loggerFork.log(ex);
-			}
+	// TODO
+}
 
-			// Reset the logger instance as soon as possible
-			info.handler->resetLogger();
+void StackImpl::annotationEnd(const Variant &className,
+                              const Variant &elementName)
+{
+	// TODO
+}
 
-			// If placing the data here failed and we're currently in an
-			// implicitly opened field, just unroll the stack to the next field
-			// and try again
-			if (!valid && info.inImplicitDefaultField) {
-				endCurrentHandler();
-				continue;
-			}
+void StackImpl::rangeEnd() { handleFieldEnd(true); }
 
-			// Commit the content of the logger fork. Do not change the valid
-			// flag.
-			loggerFork.commit();
+void StackImpl::data(const TokenizedData &data)
+{
+	// Fetch a reader for the given tokenized data instance.
+	TokenizedDataReader reader = data.reader();
+
+	// Use the GuardedTemporaryPointer to make sure that the member variable
+	// dataReader is resetted to nullptr once this scope is left.
+	GuardedTemporaryPointer<TokenizedDataReader> ptr(&reader, &dataReader);
+
+	// Peek a token from the reader, repeat until all tokens have been read
+	Token token;
+	while (reader.peek(token, currentTokens(), currentWhitespaceMode())) {
+		// Handle the token as text data or as actual token
+		if (token.id == Tokens::Data) {
+			handleData();
+		} else {
+			handleToken(token);
 		}
 
-		// There was no reason to unroll the stack any further, so continue
-		return;
+		// Consume the peeked token
+		reader.consumePeek();
 	}
 }
 
-void Stack::fieldStart(bool isDefault)
+void StackImpl::fieldStart(bool isDefault)
 {
 	// Make sure the current handler stack is not empty
 	if (stack.empty()) {
@@ -494,13 +954,14 @@ void Stack::fieldStart(bool isDefault)
 	HandlerInfo &info = currentInfo();
 	if (info.inField) {
 		logger().error(
-		    "Got field start, but there is no command for which to start the "
+		    "Got field start, but there is no command for which to start "
+		    "the "
 		    "field.");
 		return;
 	}
 
-	// If the handler already had a default field we cannot start a new field
-	// (the default field always is the last field) -- mark the command as
+	// If the handler already had a default field we cannot start a new
+	// field (the default field always is the last field) -- mark the command as
 	// invalid
 	if (info.hadDefaultField) {
 		logger().error(std::string("Got field start, but command \"") +
@@ -534,54 +995,132 @@ void Stack::fieldStart(bool isDefault)
 	info.fieldStart(defaultField, false, valid);
 }
 
-void Stack::fieldEnd()
+void StackImpl::fieldEnd() { handleFieldEnd(false); }
+
+/* Class StackImpl HandlerCallbacks */
+
+TokenId StackImpl::registerToken(const std::string &token)
 {
-	// Unroll the stack until the next explicitly open field
-	while (!stack.empty()) {
-		HandlerInfo &info = currentInfo();
-		if (info.inField && !info.inImplicitDefaultField) {
-			break;
-		}
-		endCurrentHandler();
-	}
+	return tokenRegistry.registerToken(token);
+}
 
-	// Fetch the information attached to the current handler
-	HandlerInfo &info = currentInfo();
-	if (!info.inField || info.inImplicitDefaultField || stack.empty()) {
-		logger().error(
-		    "Got field end, but there is no command for which to end the "
-		    "field.");
-		return;
-	}
+void StackImpl::unregisterToken(TokenId id)
+{
+	tokenRegistry.unregisterToken(id);
+}
 
-	// Only continue if the current handler stack is in a valid state, do not
-	// call the fieldEnd function if something went wrong before
-	if (handlersValid() && !info.hadDefaultField && info.inValidField) {
-		try {
-			info.handler->fieldEnd();
-		}
-		catch (LoggableException ex) {
-			logger().log(ex);
+void StackImpl::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
+{
+	// TODO
+}
+
+void StackImpl::popTokens()
+{
+	// TODO
+}
+
+Variant StackImpl::readData()
+{
+	if (dataReader != nullptr) {
+		TokenizedDataReaderFork dataReaderFork = dataReader->fork();
+		Token token;
+		dataReaderFork.read(token, currentTokens(), currentWhitespaceMode());
+		if (token.id == Tokens::Data) {
+			Variant res = Variant::fromString(token.content);
+			res.setLocation(token.getLocation());
+			return res;
 		}
 	}
+	return Variant{};
+}
 
-	// This command no longer is in a field
-	info.fieldEnd();
+/* Class Stack */
+
+Stack::Stack(ParserCallbacks &parser, ParserContext &ctx,
+             const std::multimap<std::string, const State *> &states)
+    : impl(new StackImpl(parser, ctx, states))
+{
 }
 
-void Stack::annotationStart(const Variant &className, const Variant &args)
+Stack::~Stack()
 {
-	// TODO
+	// Do nothing here, stub needed because StackImpl is incomplete in hpp
+}
+
+const State &Stack::currentState() const { return impl->currentState(); }
+
+std::string Stack::currentCommandName() const
+{
+	return impl->currentCommandName();
+}
+
+void Stack::commandStart(const Variant &name, const Variant::mapType &args,
+                         bool range)
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: commandStart " << name << " " << args << " " << range
+	          << std::endl;
+#endif
+	impl->commandStart(name, args, range);
+}
+
+void Stack::annotationStart(const Variant &className, const Variant &args,
+                            bool range)
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: annotationStart " << className << " " << args << " "
+	          << range << std::endl;
+#endif
+	impl->annotationStart(className, args, range);
 }
 
 void Stack::annotationEnd(const Variant &className, const Variant &elementName)
 {
-	// TODO
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: annotationEnd " << className << " " << elementName
+	          << std::endl;
+#endif
+	impl->annotationEnd(className, elementName);
 }
 
-void Stack::token(Variant token)
+void Stack::rangeEnd()
 {
-	// TODO
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: rangeEnd" << std::endl;
+#endif
+	impl->rangeEnd();
+}
+
+void Stack::fieldStart(bool isDefault)
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: fieldStart " << isDefault << std::endl;
+#endif
+	impl->fieldStart(isDefault);
+}
+
+void Stack::fieldEnd()
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: fieldEnd" << std::endl;
+#endif
+	impl->fieldEnd();
+}
+
+void Stack::data(const TokenizedData &data)
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: data" << std::endl;
+#endif
+	impl->data(data);
+}
+
+void Stack::data(const std::string &str)
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: data (string) " << str << std::endl;
+#endif
+	data(TokenizedData(str));
+}
 }
 }
-}
-\ No newline at end of file
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
index b67ce82..6d42f10 100644
--- a/src/core/parser/stack/Stack.hpp
+++ b/src/core/parser/stack/Stack.hpp
@@ -29,235 +29,48 @@
 #ifndef _OUSIA_PARSER_STACK_STACK_HPP_
 #define _OUSIA_PARSER_STACK_STACK_HPP_
 
-#include <cstdint>
-
 #include <map>
 #include <memory>
-#include <set>
-#include <vector>
-
-#include <core/common/Variant.hpp>
-#include <core/parser/Parser.hpp>
 
 namespace ousia {
 
 // Forward declarations
 class ParserContext;
-class Logger;
+class TokenizedData;
+class Variant;
 
 namespace parser_stack {
 
 // Forward declarations
-class Handler;
+class ParserCallbacks;
+class StackImpl;
 class State;
 
 /**
- * The HandlerInfo class is used internally by the stack to associate additional
- * (mutable) data with a handler instance.
- */
-class HandlerInfo {
-public:
-	/**
-	 * Pointer pointing at the actual handler instance.
-	 */
-	std::shared_ptr<Handler> handler;
-
-	/**
-	 * Next field index to be passed to the "fieldStart" function of the Handler
-	 * class.
-	 */
-	size_t fieldIdx;
-
-	/**
-	 * Set to true if the handler is valid (which is the case if the "start"
-	 * method has returned true). If the handler is invalid, no more calls are
-	 * directed at it until it can be removed from the stack.
-	 */
-	bool valid : 1;
-
-	/**
-	 * Set to true if this is an implicit handler, that was created when the
-	 * current stack state was deduced.
-	 */
-	bool implicit : 1;
-
-	/**
-	 * Set to true if the handler currently is in a field.
-	 */
-	bool inField : 1;
-
-	/**
-	 * Set to true if the handler currently is in the default field.
-	 */
-	bool inDefaultField : 1;
-
-	/**
-	 * Set to true if the handler currently is in an implicitly started default
-	 * field.
-	 */
-	bool inImplicitDefaultField : 1;
-
-	/**
-	 * Set to false if this field is only opened pro-forma and does not accept
-	 * any data. Otherwise set to true.
-	 */
-	bool inValidField : 1;
-
-	/**
-	 * Set to true, if the default field was already started.
-	 */
-	bool hadDefaultField : 1;
-
-	/**
-	 * Default constructor of the HandlerInfo class.
-	 */
-	HandlerInfo();
-	/**
-	 * Constructor of the HandlerInfo class, allows to set all flags manually.
-	 */
-	HandlerInfo(bool valid, bool implicit, bool inField, bool inDefaultField,
-	            bool inImplicitDefaultField, bool inValidField);
-
-	/**
-	 * Constructor of the HandlerInfo class, taking a shared_ptr to the handler
-	 * to which additional information should be attached.
-	 */
-	HandlerInfo(std::shared_ptr<Handler> handler);
-
-	/**
-	 * Destructor of the HandlerInfo class (to allow Handler to be forward
-	 * declared).
-	 */
-	~HandlerInfo();
-
-	/**
-	 * Updates the "field" flags according to a "fieldStart" event.
-	 */
-	void fieldStart(bool isDefault, bool isImplicit, bool isValid);
-
-	/**
-	 * Updates the "fields" flags according to a "fieldEnd" event.
-	 */
-	void fieldEnd();
-};
-
-/**
  * The Stack class is a pushdown automaton responsible for turning a command
  * stream into a tree of Node instances. It does so by following a state
  * transition graph and creating a set of Handler instances, which are placed
- * on the stack.
+ * on the stack. Additionally it is responsible for the normalization of
+ * Annotations and for handling tokens.
  */
 class Stack {
 private:
 	/**
-	 * Reference at the parser context.
-	 */
-	ParserContext &ctx;
-
-	/**
-	 * Map containing all registered command names and the corresponding
-	 * state descriptors.
-	 */
-	const std::multimap<std::string, const State *> &states;
-
-	/**
-	 * Internal stack used for managing the currently active Handler instances.
-	 */
-	std::vector<HandlerInfo> stack;
-
-	/**
-	 * Return the reference in the Logger instance stored within the context.
-	 */
-	Logger &logger();
-
-	/**
-	 * Used internally to get all expected command names for the current state.
-	 * This function is used to build error messages.
-	 *
-	 * @return a set of strings containing the names of the expected commands.
+	 * Pointer at the internal implementation
 	 */
-	std::set<std::string> expectedCommands();
-
-	/**
-	 * Returns the targetState for a command with the given name that can be
-	 * reached from the current state.
-	 *
-	 * @param name is the name of the requested command.
-	 * @return nullptr if no target state was found, a pointer at the target
-	 * state otherwise.
-	 */
-	const State *findTargetState(const std::string &name);
-
-	/**
-	 * Returns the targetState for a command with the given name that can be
-	 * reached from the current state, also including the wildcard "*" state.
-	 * Throws an exception if the given target state is not a valid identifier.
-	 *
-	 * @param name is the name of the requested command.
-	 * @return nullptr if no target state was found, a pointer at the target
-	 * state otherwise.
-	 */
-	const State *findTargetStateOrWildcard(const std::string &name);
-
-	/**
-	 * Tries to reconstruct the parser state from the Scope instance of the
-	 * ParserContext given in the constructor. This functionality is needed for
-	 * including files,as the Parser of the included file needs to be brought to
-	 * an equivalent state as the one in the including file.
-	 */
-	void deduceState();
-
-	/**
-	 * Returns a reference at the current HandlerInfo instance (or a stub
-	 * HandlerInfo instance if the stack is empty).
-	 */
-	HandlerInfo &currentInfo();
-
-	/**
-	 * Returns a reference at the last HandlerInfo instance (or a stub
-	 * HandlerInfo instance if the stack has only one element).
-	 */
-	HandlerInfo &lastInfo();
-
-	/**
-	 * Ends all handlers that currently are not inside a field and already had
-	 * a default field. This method is called whenever the data() and command()
-	 * events are reached.
-	 */
-	void endOverdueHandlers();
-
-	/**
-	 * Ends the current handler and removes the corresponding element from the
-	 * stack.
-	 */
-	void endCurrentHandler();
-
-	/**
-	 * Tries to start a default field for the current handler, if currently the
-	 * handler is not inside a field and did not have a default field yet.
-	 *
-	 * @return true if the handler is inside a field, false if no field could
-	 * be started.
-	 */
-	bool ensureHandlerIsInField();
-
-	/**
-	 * Returns true if all handlers on the stack are currently valid, or false
-	 * if at least one handler is invalid.
-	 *
-	 * @return true if all handlers on the stack are valid.
-	 */
-	bool handlersValid();
+	std::unique_ptr<StackImpl> impl;
 
 public:
 	/**
 	 * Creates a new instance of the Stack class.
 	 *
+	 * @param parser is an implementation of the ParserCallbacks instance to
+	 * which certain calls are directed.
 	 * @param ctx is the parser context the parser stack is working on.
 	 * @param states is a map containing the command names and pointers at the
 	 * corresponding State instances.
 	 */
-	Stack(ParserContext &ctx,
+	Stack(ParserCallbacks &parser, ParserContext &ctx,
 	      const std::multimap<std::string, const State *> &states);
 
 	/**
@@ -268,10 +81,10 @@ public:
 	/**
 	 * Returns the state the Stack instance currently is in.
 	 *
-	 * @return the state of the currently active Handler instance or STATE_NONE
-	 * if no handler is on the stack.
+	 * @return the state of the currently active Handler instance or
+	 * States::None if no handler is on the stack.
 	 */
-	const State &currentState();
+	const State &currentState() const;
 
 	/**
 	 * Returns the command name that is currently being handled.
@@ -279,7 +92,7 @@ public:
 	 * @return the name of the command currently being handled by the active
 	 * Handler instance or an empty string if no handler is currently active.
 	 */
-	std::string currentCommandName();
+	std::string currentCommandName() const;
 
 	/**
 	 * Function that should be called whenever a new command is reached.
@@ -288,17 +101,36 @@ public:
 	 * separator ':') and its corresponding location. Must be a string variant.
 	 * @param args is a map containing the arguments that were passed to the
 	 * command.
+	 * @param range if true, the started command has an explicit range.
 	 */
-	void command(const Variant &name, const Variant::mapType &args);
+	void commandStart(const Variant &name, const Variant::mapType &args,
+	                  bool range = false);
 
 	/**
-	 * Function that shuold be called whenever character data is found in the
-	 * input stream. May only be called if the currently is a command on the
-	 * stack.
+	 * Function that should be called whenever an annotation starts.
+	 *
+	 * @param name is the name of the annotation class.
+	 * @param args is a map variant containing the arguments that were passed
+	 * to the annotation.
+	 * @param range if true, the annotation fields have an explicit range.
+	 */
+	void annotationStart(const Variant &className, const Variant &args,
+	                     bool range = false);
+
+	/**
+	 * Function that should be called whenever an annotation ends.
 	 *
-	 * @param data is a string variant containing the data that has been found.
+	 * @param name is the name of the annotation class that was ended.
+	 * @param annotationName is the name of the annotation that was ended.
 	 */
-	void data(const Variant &data);
+	void annotationEnd(const Variant &className, const Variant &elementName);
+
+	/**
+	 * Function the should be called whenever a ranged command or annotation
+	 * ends. Must be called if the range parameter range was set to true when
+	 * annotationStart() or commandStart() were called.
+	 */
+	void rangeEnd();
 
 	/**
 	 * Function that should be called whenever a new field starts. Fields of the
@@ -317,29 +149,25 @@ public:
 	void fieldEnd();
 
 	/**
-	 * Function that should be called whenever an annotation starts.
-	 *
-	 * @param name is the name of the annotation class.
-	 * @param args is a map variant containing the arguments that were passed
-	 * to the annotation.
-	 */
-	void annotationStart(const Variant &className, const Variant &args);
-
-	/**
-	 * Function that should be called whenever an annotation ends.
+	 * Function that should be called whenever character data is found in the
+	 * input stream. May only be called if there currently is a command on the
+	 * stack.
 	 *
-	 * @param name is the name of the annotation class that was ended.
-	 * @param annotationName is the name of the annotation that was ended.
+	 * @param data is a TokenizedData instance containing the pre-segmented data
+	 * that should be read.
 	 */
-	void annotationEnd(const Variant &className, const Variant &elementName);
+	void data(const TokenizedData &data);
 
 	/**
-	 * Function that should be called whenever a previously registered token
-	 * is found in the input stream.
+	 * Function that may be called whenever character data is found in the
+	 * input stream. May only be called if the currently is a command on the
+	 * stack. This method is mainly intended for unit testing. Pass a
+	 * TokenizedData instance to the 
 	 *
-	 * @param token is string variant containing the token that was encountered.
+	 * @param str is a string containing the data that should be passed to the
+	 * tokenizer.
 	 */
-	void token(Variant token);
+	void data(const std::string &str);
 };
 }
 }
diff --git a/src/core/parser/stack/State.cpp b/src/core/parser/stack/State.cpp
index d72f533..0feeed6 100644
--- a/src/core/parser/stack/State.cpp
+++ b/src/core/parser/stack/State.cpp
@@ -23,17 +23,19 @@ namespace parser_stack {
 
 /* Class State */
 
-State::State() : elementHandler(nullptr) {}
+State::State() : elementHandler(nullptr), supportsAnnotations(false), supportsTokens(false) {}
 
 State::State(StateSet parents, Arguments arguments,
                          RttiSet createdNodeTypes,
                          HandlerConstructor elementHandler,
-                         bool supportsAnnotations)
+                         bool supportsAnnotations,
+                         bool supportsTokens)
     : parents(parents),
       arguments(arguments),
       createdNodeTypes(createdNodeTypes),
       elementHandler(elementHandler),
-      supportsAnnotations(supportsAnnotations)
+      supportsAnnotations(supportsAnnotations),
+      supportsTokens(supportsTokens)
 {
 }
 
@@ -93,6 +95,13 @@ StateBuilder &StateBuilder::supportsAnnotations(bool supportsAnnotations)
 	return *this;
 }
 
+StateBuilder &StateBuilder::supportsTokens(bool supportsTokens)
+{
+	state.supportsTokens = supportsTokens;
+	return *this;
+}
+
+
 const State &StateBuilder::build() const { return state; }
 
 /* Class StateDeductor */
diff --git a/src/core/parser/stack/State.hpp b/src/core/parser/stack/State.hpp
index 4766235..011ccd6 100644
--- a/src/core/parser/stack/State.hpp
+++ b/src/core/parser/stack/State.hpp
@@ -82,13 +82,21 @@ struct State {
 
 	/**
 	 * Set to true if this handler does support annotations. This is almost
-	 * always false (e.g. all description handlers), except for document 
+	 * always false (e.g. all description handlers), except for document
 	 * element handlers.
 	 */
-	bool supportsAnnotations;
+	bool supportsAnnotations : 1;
 
 	/**
-	 * Default constructor, initializes the handlers with nullptr.
+	 * Set to true if this handler does support tokens. This is almost
+	 * always false (e.g. all description handlers), except for document
+	 * element handlers.
+	 */
+	bool supportsTokens : 1;
+
+	/**
+	 * Default constructor, initializes the handlers with nullptr and the
+	 * supportsAnnotations and supportsTokens flags with false.
 	 */
 	State();
 
@@ -108,11 +116,12 @@ struct State {
 	 * be nullptr in which case no handler instance is created.
 	 * @param supportsAnnotations specifies whether annotations are supported
 	 * here at all.
+	 * @param supportsTokens specified whether tokens are supported here at all.
 	 */
 	State(StateSet parents, Arguments arguments = Arguments{},
-	            RttiSet createdNodeTypes = RttiSet{},
-	            HandlerConstructor elementHandler = nullptr,
-	            bool supportsAnnotations = false);
+	      RttiSet createdNodeTypes = RttiSet{},
+	      HandlerConstructor elementHandler = nullptr,
+	      bool supportsAnnotations = false, bool supportsTokens = false);
 
 	/**
 	 * Creates this State from the given StateBuilder instance.
@@ -220,6 +229,16 @@ public:
 	StateBuilder &supportsAnnotations(bool supportsAnnotations);
 
 	/**
+	 * Sets the state of the "supportsTokens" flag (default value is false).
+	 *
+	 * @param supportsTokens should be set to true, if the elementHandler
+	 * registered for this state is capable of handling tokens.
+	 * @return a reference at this StateBuilder instance for method
+	 * chaining.
+	 */
+	StateBuilder &supportsTokens(bool supportsTokens);
+
+	/**
 	 * Returns a reference at the internal State instance that was built
 	 * using the StateBuilder.
 	 *
@@ -275,7 +294,7 @@ public:
 	 * @param states is a list of states that should be checked.
 	 */
 	StateDeductor(std::vector<const Rtti *> signature,
-	                    std::vector<const State *> states);
+	              std::vector<const State *> states);
 
 	/**
 	 * Selects all active states from the given states. Only considers those
diff --git a/src/core/parser/stack/TokenRegistry.cpp b/src/core/parser/stack/TokenRegistry.cpp
new file mode 100644
index 0000000..c135b98
--- /dev/null
+++ b/src/core/parser/stack/TokenRegistry.cpp
@@ -0,0 +1,80 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Callbacks.hpp"
+#include "TokenRegistry.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+TokenRegistry::~TokenRegistry()
+{
+	for (const auto &tid: tokenIds) {
+		parser.unregisterToken(tid.first);
+	}
+}
+
+TokenId TokenRegistry::registerToken(const std::string &token)
+{
+	// Check whether the given token is already registered
+	auto it = tokens.find(token);
+	if (it != tokens.end()) {
+		// Increment the reference count
+		size_t &refCount = it->second.second;
+		refCount++;
+
+		// Return the token id
+		return it->second.first;
+	}
+
+	// Register the token in the parser
+	TokenId id = parser.registerToken(token);
+	tokens[token] = std::pair<TokenId, size_t>(id, 1);
+	tokenIds[id] = token;
+	return id;
+}
+
+void TokenRegistry::unregisterToken(TokenId id)
+{
+	// Lookup the token corresponding to the given token id
+	auto tokenIt = tokenIds.find(id);
+	if (tokenIt != tokenIds.end()) {
+		const std::string &token = tokenIt->second;
+		// Lookup the reference count for the corresponding token
+		auto idIt = tokens.find(token);
+		if (idIt != tokens.end()) {
+			// Decrement the reference count, abort if the refCount is larger
+			// than zero
+			size_t &refCount = idIt->second.second;
+			refCount--;
+			if (refCount > 0) {
+				return;
+			}
+
+			// Unregister the token from the parser
+			parser.unregisterToken(id);
+
+			// Unregister the token from the internal tokens map
+			tokens.erase(token);
+		}
+		// Unregister the token from the internal id map
+		tokenIds.erase(id);
+	}
+}
+}
+}
diff --git a/src/core/parser/stack/TokenRegistry.hpp b/src/core/parser/stack/TokenRegistry.hpp
new file mode 100644
index 0000000..545db39
--- /dev/null
+++ b/src/core/parser/stack/TokenRegistry.hpp
@@ -0,0 +1,114 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file TokenRegistry.hpp
+ *
+ * Contains the TokenRegistry class used for registering all user defined tokens
+ * during the parsing process.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_PARSER_STACK_TOKEN_REGISTRY_HPP_
+#define _OUSIA_PARSER_STACK_TOKEN_REGISTRY_HPP_
+
+#include <string>
+#include <unordered_map>
+
+#include <core/common/Token.hpp>
+
+namespace ousia {
+namespace parser_stack {
+
+// Forward declarations
+class ParserCallbacks;
+
+/**
+ * The TokenRegistry class is used for registering all user defined tokens
+ * during the Parsing process. The TokenRegistry class acts as an adapter
+ * between the parser which allocates a TokenId for each unique token and the
+ * Handler classes which may register the same token multiple times and expect
+ * the same TokenId to be returned for the same token.
+ */
+class TokenRegistry  {
+private:
+	/**
+	 * Reference at the ParserCallback instance the tokens are relayed to.
+	 */
+	ParserCallbacks &parser;
+
+	/**
+	 * Store containing all TokenId instances for all registered tokens. The map
+	 * maps from the token strings to the corresponding TokenId and a reference
+	 * count.
+	 */
+	std::unordered_map<std::string, std::pair<TokenId, size_t>> tokens;
+
+	/**
+	 * Reverse map containing the string corresponding to a TokenId.
+	 */
+	std::unordered_map<TokenId, std::string> tokenIds;
+
+public:
+	/**
+	 * Constructor of the TokenRegistry class.
+	 *
+	 * @param parser is the underlying parser implementing the ParserCallbacks
+	 * interface to which all calls are relayed.
+	 */
+	TokenRegistry(ParserCallbacks &parser) : parser(parser) {}
+
+	/**
+	 * Destructor of the TokenRegistry class, removes all registered tokens from
+	 * the parser.
+	 */
+	~TokenRegistry();
+
+	/* No copy construction */
+	TokenRegistry(const TokenRegistry &) = delete;
+
+	/* No assignment */
+	TokenRegistry &operator=(const TokenRegistry &) = delete;
+
+	/**
+	 * Registers the given string token in the underlying parser and returns the
+	 * TokenId of that token. If the same token string is given multiple times,
+	 * the same TokenId is returned. The token is only registered once in the
+	 * parser.
+	 *
+	 * @param token is the token that should be registered.
+	 * @return the TokenId associated with this token.
+	 */
+	TokenId registerToken(const std::string &token);
+
+	/**
+	 * Unregisters the token with the given TokenId from the parser. Note that
+	 * the token will only be unregistered if unregisterToken() has been called
+	 * as many times as registerToken() for the same token.
+	 *
+	 * @param id is the id of the token returned by registerToken() that should
+	 * be unregistered.
+	 */
+	void unregisterToken(TokenId id);
+};
+}
+}
+
+#endif /* _OUSIA_PARSER_STACK_TOKEN_REGISTRY_HPP_ */
+
diff --git a/src/core/parser/stack/TokenStack.cpp b/src/core/parser/stack/TokenStack.cpp
new file mode 100644
index 0000000..ac1d94e
--- /dev/null
+++ b/src/core/parser/stack/TokenStack.cpp
@@ -0,0 +1,45 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "TokenStack.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+void TokenStack::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
+{
+	stack.push_back(tokens);
+}
+
+void TokenStack::popTokens() { stack.pop_back(); }
+
+TokenSet TokenStack::tokens() const
+{
+	if (stack.empty() && parentStack != nullptr) {
+		return parentStack->tokens();
+	}
+
+	TokenSet res;
+	for (const SyntaxDescriptor &descr : stack.back()) {
+		descr.insertIntoTokenSet(res);
+	}
+	return res;
+}
+}
+}
+
diff --git a/src/core/parser/stack/TokenStack.hpp b/src/core/parser/stack/TokenStack.hpp
new file mode 100644
index 0000000..f2e7edc
--- /dev/null
+++ b/src/core/parser/stack/TokenStack.hpp
@@ -0,0 +1,112 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file TokenStack.hpp
+ *
+ * Contains the TokenStack class used for collecting the currently enabled user
+ * defined tokens on a per-field basis.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_PARSER_STACK_TOKEN_STACK_HPP_
+#define _OUSIA_PARSER_STACK_TOKEN_STACK_HPP_
+
+#include <memory>
+#include <vector>
+
+#include <core/common/Token.hpp>
+#include <core/model/Syntax.hpp>
+
+namespace ousia {
+namespace parser_stack {
+
+/**
+ * The TokenStack class is used by the Stack class to collect all currently
+ * enabled user defined tokens.
+ */
+class TokenStack {
+private:
+	/**
+	 * Shared pointer at the parent TokenStack instance. May be nullptr, in
+	 * which case no parent TokenStack instance exists.
+	 */
+	const TokenStack *parentStack;
+
+	/**
+	 * Stack containing vectors of TokenSyntaxDescriptor instances as given by
+	 * the user.
+	 */
+	std::vector<std::vector<SyntaxDescriptor>> stack;
+
+	/**
+	 * Constructor of the TokenStack class.
+	 *
+	 * @param parentStack is a pointer at the underlying parentStack instance
+	 * to which calls should be forwarded if no data has been pushed onto this
+	 * stack instance.
+	 */
+	TokenStack(const TokenStack *parentStack) : parentStack(parentStack) {}
+
+public:
+	/**
+	 * Default constructor of the TokenStack class with no reference at a parent
+	 * stack.
+	 */
+	TokenStack() : TokenStack(nullptr) {}
+
+	/**
+	 * Constructor of the TokenStack class with a reference at a parent
+	 * TokenStack instance.
+	 *
+	 * @param parentStack is a reference at a parent TokenStack instance. If no
+	 * data has yet been pushed onto this instance, calls will be forwarded to
+	 * the parent stack.
+	 */
+	TokenStack(const TokenStack &parentStack) : TokenStack(&parentStack) {}
+
+	/**
+	 * Pushes a list of SyntaxDescriptor instances onto the internal stack.
+	 *
+	 * @param tokens is a list of SyntaxDescriptor instances that should be
+	 * stored on the stack.
+	 */
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
+
+	/**
+	 * Removes the previously pushed list of tokens from the stack.
+	 */
+	void popTokens();
+
+	/**
+	 * Returns a set containing all currently enabled tokens. The set of enabled
+	 * tokens are those tokens that were pushed last onto the stack. This set
+	 * has to be passed to the TokenizedData instance in order to gather all
+	 * tokens that are currently possible.
+	 *
+	 * @return a set of tokens containing all the Tokens that are currently
+	 * possible.
+	 */
+	TokenSet tokens() const;
+};
+}
+}
+
+#endif /* _OUSIA_PARSER_STACK_TOKEN_STACK_HPP_ */
+
diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp
index b62f684..73bcf62 100644
--- a/src/core/parser/stack/TypesystemHandler.cpp
+++ b/src/core/parser/stack/TypesystemHandler.cpp
@@ -32,7 +32,7 @@ namespace parser_stack {
 
 /* TypesystemHandler */
 
-bool TypesystemHandler::start(Variant::mapType &args)
+bool TypesystemHandler::startCommand(Variant::mapType &args)
 {
 	// Create the typesystem instance
 	Rooted<Typesystem> typesystem =
@@ -63,7 +63,7 @@ void TypesystemHandler::end() { scope().pop(logger()); }
 
 /* TypesystemEnumHandler */
 
-bool TypesystemEnumHandler::start(Variant::mapType &args)
+bool TypesystemEnumHandler::startCommand(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -91,17 +91,17 @@ void TypesystemEnumEntryHandler::doHandle(const Variant &fieldData,
 
 /* TypesystemStructHandler */
 
-bool TypesystemStructHandler::start(Variant::mapType &args)
+bool TypesystemStructHandler::startCommand(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
 	// Fetch the arguments used for creating this type
-	const std::string &name = args["name"].asString();
+	const std::string &structNmae = args["name"].asString();
 	const std::string &parent = args["parent"].asString();
 
 	// Fetch the current typesystem and create the struct node
 	Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
-	Rooted<StructType> structType = typesystem->createStructType(name);
+	Rooted<StructType> structType = typesystem->createStructType(structNmae);
 	structType->setLocation(location());
 
 	// Try to resolve the parent type and set it as parent structure
@@ -124,18 +124,18 @@ void TypesystemStructHandler::end() { scope().pop(logger()); }
 
 /* TypesystemStructFieldHandler */
 
-bool TypesystemStructFieldHandler::start(Variant::mapType &args)
+bool TypesystemStructFieldHandler::startCommand(Variant::mapType &args)
 {
 	// Read the argument values
-	const std::string &name = args["name"].asString();
+	const std::string &fieldName = args["name"].asString();
 	const std::string &type = args["type"].asString();
 	const Variant &defaultValue = args["default"];
 	const bool optional =
 	    !(defaultValue.isObject() && defaultValue.asObject() == nullptr);
 
 	Rooted<StructType> structType = scope().selectOrThrow<StructType>();
-	Rooted<Attribute> attribute =
-	    structType->createAttribute(name, defaultValue, optional, logger());
+	Rooted<Attribute> attribute = structType->createAttribute(
+	    fieldName, defaultValue, optional, logger());
 	attribute->setLocation(location());
 
 	// Try to resolve the type and default value
@@ -163,17 +163,17 @@ bool TypesystemStructFieldHandler::start(Variant::mapType &args)
 
 /* TypesystemConstantHandler */
 
-bool TypesystemConstantHandler::start(Variant::mapType &args)
+bool TypesystemConstantHandler::startCommand(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
 	// Read the argument values
-	const std::string &name = args["name"].asString();
+	const std::string &constantName = args["name"].asString();
 	const std::string &type = args["type"].asString();
 	const Variant &value = args["value"];
 
 	Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
-	Rooted<Constant> constant = typesystem->createConstant(name, value);
+	Rooted<Constant> constant = typesystem->createConstant(constantName, value);
 	constant->setLocation(location());
 
 	// Try to resolve the type
diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp
index 85494f1..0773a3a 100644
--- a/src/core/parser/stack/TypesystemHandler.hpp
+++ b/src/core/parser/stack/TypesystemHandler.hpp
@@ -43,7 +43,7 @@ class TypesystemHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -67,7 +67,7 @@ class TypesystemEnumHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -114,7 +114,7 @@ class TypesystemStructHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -139,7 +139,7 @@ class TypesystemStructFieldHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 
 	/**
 	 * Creates a new instance of the TypesystemStructFieldHandler.
@@ -162,7 +162,7 @@ class TypesystemConstantHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 
 	/**
 	 * Creates a new instance of the TypesystemConstantHandler.
diff --git a/src/core/parser/utils/SourceOffsetVector.hpp b/src/core/parser/utils/SourceOffsetVector.hpp
index d15055a..f322a88 100644
--- a/src/core/parser/utils/SourceOffsetVector.hpp
+++ b/src/core/parser/utils/SourceOffsetVector.hpp
@@ -33,6 +33,7 @@
 #include <limits>
 #include <vector>
 #include <utility>
+#include <unordered_map>
 
 #include <core/common/Location.hpp>
 
@@ -43,6 +44,9 @@ namespace ousia {
  * a delta compression.
  */
 class SourceOffsetVector {
+public:
+	using OffsPair = std::pair<SourceOffset, SourceOffset>;
+
 private:
 	/**
 	 * Type used for representing the length of a character.
@@ -82,9 +86,12 @@ private:
 	std::vector<SourceOffset> offsets;
 
 	/**
+	 * Map used to store discontinuities in the character offsets.
+	 */
+	std::unordered_map<size_t, OffsPair> gaps;
+
+	/**
 	 * Last position given as "end" position in the storeOffset() method.
-	 * Used to adapt the length of the previous element in case start and end
-	 * positions do not match.
 	 */
 	SourceOffset lastEnd;
 
@@ -105,19 +112,22 @@ public:
 		// Make sure (end - start) is smaller than MAX_LEN
 		assert(end - start < MAX_LEN);
 
-		// Adapt the length of the previous character in case there is a gap
-		if (!lens.empty() && start > lastEnd) {
-			lens.back() += start - lastEnd;
-		}
-		lastEnd = end;
-
 		// Store an absolute offset every OFFSET_INTERVAL elements
 		if ((lens.size() & OFFSET_INTERVAL_MASK) == 0) {
 			offsets.push_back(start);
 		}
 
-		// Store the length
-		lens.push_back(end - start);
+		// Adapt the length of the previous character in case there is a gap
+		if (!lens.empty() && start > lastEnd) {
+			// There is a discontinuity, store the given offsets in the "gaps"
+			// map
+			gaps[lens.size()] = OffsPair(start, end);
+			lens.push_back(MAX_LEN);
+		} else {
+			// Store the length
+			lens.push_back(end - start);
+		}
+		lastEnd = end;
 	}
 
 	/**
@@ -127,14 +137,13 @@ public:
 	 * read.
 	 * @return a pair containing start and end source offset.
 	 */
-	std::pair<SourceOffset, SourceOffset> loadOffset(size_t idx)
+	OffsPair loadOffset(size_t idx) const
 	{
 		// Special treatment for the last character
 		const size_t count = lens.size();
 		if (idx > 0 && idx == count) {
 			auto offs = loadOffset(count - 1);
-			return std::pair<SourceOffset, SourceOffset>(offs.second,
-			                                             offs.second);
+			return OffsPair(offs.second, offs.second);
 		}
 
 		// Calculate the start index in the lens vector and in the offsets
@@ -146,18 +155,66 @@ public:
 		assert(idx < count);
 		assert(offsetIdx < offsets.size());
 
+		// If the length of the last character is MAX_LEN, the position is
+		// stored in the "gaps" list
+		if (lens[idx] == MAX_LEN) {
+			auto it = gaps.find(idx);
+			assert(it != gaps.end());
+			return it->second;
+		}
+
 		// Sum over the length starting with the start offset
 		SourceOffset start = offsets[offsetIdx];
 		for (size_t i = sumStartIdx; i < idx; i++) {
-			start += lens[i];
+			if (lens[i] == MAX_LEN) {
+				auto it = gaps.find(i);
+				assert(it != gaps.end());
+				start = it->second.first;
+			} else {
+				start += lens[i];
+			}
 		}
-		return std::pair<SourceOffset, SourceOffset>(start, start + lens[idx]);
+		return OffsPair(start, start + lens[idx]);
 	}
 
 	/**
 	 * Returns the number of characters for which offsets are stored.
 	 */
-	size_t size() { return lens.size(); }
+	size_t size() const { return lens.size(); }
+
+	/**
+	 * Trims the length of the TokenizedData instance to the given length.
+	 * Removes all token matches that lie within the trimmed region.
+	 *
+	 * @param length is the number of characters to which the TokenizedData
+	 * instance should be trimmed.
+	 */
+	void trim(size_t length)
+	{
+		if (length < size()) {
+			lens.resize(length);
+			if (length > 0) {
+				offsets.resize((length >> LOG2_OFFSET_INTERVAL) + 1);
+				lastEnd = loadOffset(length - 1).second;
+			} else {
+				offsets.clear();
+				gaps.clear();
+				lastEnd = 0;
+			}
+		}
+	}
+
+	/**
+	 * Resets the SourceOffsetVector to the state it had when it was
+	 * constructed.
+	 */
+	void clear()
+	{
+		lens.clear();
+		offsets.clear();
+		gaps.clear();
+		lastEnd = 0;
+	}
 };
 }
 
diff --git a/src/core/parser/utils/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp
index 80cc945..a45d3ff 100644
--- a/src/core/parser/utils/TokenTrie.cpp
+++ b/src/core/parser/utils/TokenTrie.cpp
@@ -22,12 +22,12 @@ namespace ousia {
 
 /* Class DynamicTokenTree::Node */
 
-TokenTrie::Node::Node() : type(Tokens::Empty) {}
+TokenTrie::Node::Node() : id(Tokens::Empty) {}
 
 /* Class DynamicTokenTree */
 
 bool TokenTrie::registerToken(const std::string &token,
-                              TokenId type) noexcept
+                              TokenId id) noexcept
 {
 	// Abort if the token is empty -- this would taint the root node
 	if (token.empty()) {
@@ -48,12 +48,12 @@ bool TokenTrie::registerToken(const std::string &token,
 	}
 
 	// If the resulting node already has a type set, we're screwed.
-	if (node->type != Tokens::Empty) {
+	if (node->id != Tokens::Empty) {
 		return false;
 	}
 
 	// Otherwise just set the type to the given type.
-	node->type = type;
+	node->id = id;
 	return true;
 }
 
@@ -78,7 +78,7 @@ bool TokenTrie::unregisterToken(const std::string &token) noexcept
 
 		// Reset the subtree handler if this node has another type
 		node = it->second.get();
-		if ((node->type != Tokens::Empty || node->children.size() > 1) &&
+		if ((node->id != Tokens::Empty || node->children.size() > 1) &&
 		    (i + 1 != token.size())) {
 			subtreeRoot = node;
 			subtreeKey = token[i + 1];
@@ -86,14 +86,14 @@ bool TokenTrie::unregisterToken(const std::string &token) noexcept
 	}
 
 	// If the node type is already Tokens::Empty, we cannot do anything here
-	if (node->type == Tokens::Empty) {
+	if (node->id == Tokens::Empty) {
 		return false;
 	}
 
 	// If the target node has children, we cannot delete the subtree. Set the
 	// type to Tokens::Empty instead
 	if (!node->children.empty()) {
-		node->type = Tokens::Empty;
+		node->id = Tokens::Empty;
 		return true;
 	}
 
@@ -113,7 +113,7 @@ TokenId TokenTrie::hasToken(const std::string &token) const noexcept
 		}
 		node = it->second.get();
 	}
-	return node->type;
+	return node->id;
 }
 }
 
diff --git a/src/core/parser/utils/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp
index b2d1539..c470acc 100644
--- a/src/core/parser/utils/TokenTrie.hpp
+++ b/src/core/parser/utils/TokenTrie.hpp
@@ -33,7 +33,7 @@
 #include <limits>
 #include <unordered_map>
 
-#include "Token.hpp"
+#include <core/common/Token.hpp>
 
 namespace ousia {
 
@@ -75,10 +75,9 @@ public:
 		ChildMap children;
 
 		/**
-		 * Reference at the corresponding token descriptor. Set to nullptr if
-		 * no token is attached to this node.
+		 * Id of the token represented by this node.
 		 */
-		TokenId type;
+		TokenId id;
 
 		/**
 		 * Default constructor, initializes the descriptor with nullptr.
@@ -99,10 +98,10 @@ public:
 	 *
 	 * @param token is the character sequence that should be registered as
 	 * token.
-	 * @param type is the descriptor that should be set for this token.
+	 * @param id is the descriptor that should be set for this token.
 	 * @return true if the operation is successful, false otherwise.
 	 */
-	bool registerToken(const std::string &token, TokenId type) noexcept;
+	bool registerToken(const std::string &token, TokenId id) noexcept;
 
 	/**
 	 * Unregisters the token from the token tree. Returns true if the token was
diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp
index fc7bfaf..d8a8b37 100644
--- a/src/core/parser/utils/TokenizedData.cpp
+++ b/src/core/parser/utils/TokenizedData.cpp
@@ -26,6 +26,11 @@
 #include "TokenizedData.hpp"
 
 namespace ousia {
+/**
+ * Maximum token length.
+ */
+constexpr TokenLength MaxTokenLength = std::numeric_limits<TokenLength>::max();
+
 namespace {
 /**
  * Structure used to represent the position of a token in the internal
@@ -48,6 +53,11 @@ struct TokenMark {
 	TokenLength len;
 
 	/**
+	 * Specifies whether the token is special or not.
+	 */
+	bool special;
+
+	/**
 	 * Constructor of the TokenMark structure, initializes all members with the
 	 * given values.
 	 *
@@ -55,9 +65,10 @@ struct TokenMark {
 	 * @param bufStart is the start position of the TokenMark in the internal
 	 * character buffer.
 	 * @param len is the length of the token.
+	 * @param special modifies the sort order, special tokens are prefered.
 	 */
-	TokenMark(TokenId id, size_t bufStart, TokenLength len)
-	    : bufStart(bufStart), id(id), len(len)
+	TokenMark(TokenId id, size_t bufStart, TokenLength len, bool special)
+	    : bufStart(bufStart), id(id), len(len), special(special)
 	{
 	}
 
@@ -72,7 +83,8 @@ struct TokenMark {
 	TokenMark(size_t bufStart)
 	    : bufStart(bufStart),
 	      id(Tokens::Empty),
-	      len(std::numeric_limits<TokenLength>::max())
+	      len(MaxTokenLength),
+	      special(true)
 	{
 	}
 
@@ -86,8 +98,22 @@ struct TokenMark {
 	 */
 	friend bool operator<(const TokenMark &m1, const TokenMark &m2)
 	{
-		return (m1.bufStart < m2.bufStart) ||
-		       (m1.bufStart == m2.bufStart && m1.len > m2.len);
+		// Prefer the mark with the smaller bufStart
+		if (m1.bufStart < m2.bufStart) {
+			return true;
+		}
+
+		// Special handling for marks with the same bufStart
+		if (m1.bufStart == m2.bufStart) {
+			// If exactly one of the two marks is special, return true if this
+			// one is special
+			if (m1.special != m2.special) {
+				return m1.special;
+			}
+			// Otherwise prefer longer marks
+			return m1.len > m2.len;
+		}
+		return false;
 	}
 };
 }
@@ -110,9 +136,9 @@ private:
 	std::vector<char> buf;
 
 	/**
-	 * Vector containing all token marks.
+	 * Buffset storing the "protected" flag of the character data.
 	 */
-	std::vector<TokenMark> marks;
+	std::vector<bool> protectedChars;
 
 	/**
 	 * Vector storing all the character offsets efficiently.
@@ -120,9 +146,34 @@ private:
 	SourceOffsetVector offsets;
 
 	/**
+	 * Vector containing all token marks.
+	 */
+	mutable std::vector<TokenMark> marks;
+
+	/**
+	 * Position of the first linebreak in a sequence of linebreaks.
+	 */
+	size_t firstLinebreak;
+
+	/**
+	 * Current indentation level.
+	 */
+	uint16_t currentIndentation;
+
+	/**
+	 * Last indentation level.
+	 */
+	uint16_t lastIndentation;
+
+	/**
+	 * Number of linebreaks without any content between them.
+	 */
+	uint16_t numLinebreaks;
+
+	/**
 	 * Flag indicating whether the internal "marks" vector is sorted.
 	 */
-	bool sorted;
+	mutable bool sorted;
 
 public:
 	/**
@@ -132,7 +183,7 @@ public:
 	 * @param sourceId is the source identifier that should be used for
 	 * constructing the location when returning tokens.
 	 */
-	TokenizedDataImpl(SourceId sourceId) : sourceId(sourceId), sorted(true) {}
+	TokenizedDataImpl(SourceId sourceId) : sourceId(sourceId) { clear(); }
 
 	/**
 	 * Appends a complete string to the internal character buffer and extends
@@ -140,22 +191,22 @@ public:
 	 *
 	 * @param data is the string that should be appended to the buffer.
 	 * @param offsStart is the start offset in bytes in the input file.
+	 * @param protect if set to true, the appended characters will not be
+	 * affected by whitespace handling, they will be returned as is.
 	 * @return the current size of the internal byte buffer. The returned value
 	 * is intended to be used for the "mark" function.
 	 */
-	size_t append(const std::string &data, SourceOffset offsStart)
-	{  // Append the data to the internal buffer
-		buf.insert(buf.end(), data.begin(), data.end());
-
-		// Extend the text regions, interpolate the source position (this may
-		// yield incorrect results)
-		const size_t size = buf.size();
-		for (SourceOffset offs = offsStart; offs < offsStart + data.size();
-		     offs++) {
-			offsets.storeOffset(offs, offs + 1);
+	size_t append(const std::string &data, SourceOffset offsStart, bool protect)
+	{
+		for (size_t i = 0; i < data.size(); i++) {
+			if (offsStart != InvalidSourceOffset) {
+				append(data[i], offsStart + i, offsStart + i + 1, protect);
+			} else {
+				append(data[i], InvalidSourceOffset, InvalidSourceOffset,
+				       protect);
+			}
 		}
-
-		return size;
+		return size();
 	}
 
 	/**
@@ -165,16 +216,86 @@ public:
 	 * @param c is the character that should be appended to the buffer.
 	 * @param offsStart is the start offset in bytes in the input file.
 	 * @param offsEnd is the end offset in bytes in the input file.
+	 * @param protect if set to true, the appended character will not be
+	 * affected by whitespace handling, it will be returned as is.
 	 * @return the current size of the internal byte buffer. The returned value
 	 * is intended to be used for the "mark" function.
 	 */
-	size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd)
+	size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd,
+	              bool protect)
 	{
 		// Add the character to the list and store the location of the character
 		// in the source file
 		buf.push_back(c);
+		protectedChars.push_back(protect);
 		offsets.storeOffset(offsStart, offsEnd);
-		return buf.size();
+
+		// Insert special tokens
+		const size_t size = buf.size();
+		const bool isWhitespace = Utils::isWhitespace(c);
+		const bool isLinebreak = Utils::isLinebreak(c);
+
+		// Handle linebreaks
+		if (isLinebreak) {
+			// Mark linebreaks as linebreak
+			mark(Tokens::Newline, size - 1, 1, false);
+
+			// The linebreak sequence started at the previous character
+			if (numLinebreaks == 0) {
+				firstLinebreak = size - 1;
+			}
+
+			// Reset the indentation
+			currentIndentation = 0;
+
+			// Increment the number of linebreaks
+			numLinebreaks++;
+
+			const size_t markStart = firstLinebreak;
+			const size_t markLength = size - firstLinebreak;
+
+			// Issue two consecutive linebreaks as paragraph token
+			if (numLinebreaks == 2) {
+				mark(Tokens::Paragraph, markStart, markLength, false);
+			}
+
+			// Issue three consecutive linebreaks as paragraph token
+			if (numLinebreaks >= 3) {
+				mark(Tokens::Section, markStart, markLength, false);
+			}
+		} else if (isWhitespace) {
+			// Count the whitespace characters at the beginning of the line
+			if (numLinebreaks > 0) {
+				// Implement the UNIX/Pyhton rule for tabs: Tabs extend to the
+				// next multiple of eight.
+				if (c == '\t') {
+					currentIndentation = (currentIndentation + 8) & ~7;
+				} else {
+					currentIndentation++;
+				}
+			}
+		}
+
+		// Issue indent and unindent tokens
+		if (!isWhitespace && numLinebreaks > 0) {
+			// Issue a larger indentation than that in the previous line as
+			// "Indent" token
+			if (currentIndentation > lastIndentation) {
+				mark(Tokens::Indent, size - 1, 0, true);
+			}
+
+			// Issue a smaller indentation than that in the previous line as
+			// "Dedent" token
+			if (currentIndentation < lastIndentation) {
+				mark(Tokens::Dedent, size - 1, 0, true);
+			}
+
+			// Reset the internal state machine
+			lastIndentation = currentIndentation;
+			numLinebreaks = 0;
+		}
+
+		return size;
 	}
 
 	/**
@@ -184,11 +305,12 @@ public:
 	 * @param bufStart is the start position in the internal buffer. Use the
 	 * values returned by append to calculate the start position.
 	 * @param len is the length of the token.
+	 * @param special tags the mark as "special", prefering it in the sort order
 	 */
-	void mark(TokenId id, size_t bufStart, TokenLength len)
+	void mark(TokenId id, size_t bufStart, TokenLength len, bool special)
 	{
 		// Push the new instance back onto the list
-		marks.emplace_back(id, bufStart, len);
+		marks.emplace_back(id, bufStart, len, special);
 
 		// Update the sorted flag as soon as more than one element is in the
 		// list
@@ -212,9 +334,13 @@ public:
 	 * @return true if a token was returned, false if no more tokens are
 	 * available.
 	 */
-	bool next(Token &token, WhitespaceMode mode,
-	          const std::unordered_set<TokenId> &tokens, size_t &cursor)
+	bool next(Token &token, WhitespaceMode mode, const TokenSet &tokens,
+	          TokenizedDataCursor &cursor) const
 	{
+		// Some variables for convenient access
+		size_t &bufPos = cursor.bufPos;
+		size_t &markPos = cursor.markPos;
+
 		// Sort the "marks" vector if it has not been sorted yet.
 		if (!sorted) {
 			std::sort(marks.begin(), marks.end());
@@ -222,10 +348,11 @@ public:
 		}
 
 		// Fetch the next larger TokenMark instance, make sure the token is in
-		// the "enabled" list
-		auto it =
-		    std::lower_bound(marks.begin(), marks.end(), TokenMark(cursor));
-		while (it != marks.end() && tokens.count(it->id) == 0) {
+		// the "enabled" list and within the buffer range
+		auto it = std::lower_bound(marks.begin() + markPos, marks.end(),
+		                           TokenMark(bufPos));
+		while (it != marks.end() && (tokens.count(it->id) == 0 ||
+		                             it->bufStart + it->len > buf.size())) {
 			it++;
 		}
 
@@ -236,15 +363,15 @@ public:
 		// Depending on the whitespace mode, fetch all the data between the
 		// cursor position and the calculated end position and return a token
 		// containing that data.
-		if (cursor < end && cursor < buf.size()) {
+		if (bufPos < end && bufPos < buf.size()) {
 			switch (mode) {
 				case WhitespaceMode::PRESERVE: {
 					token = Token(
-					    Tokens::Data, std::string(&buf[cursor], end - cursor),
+					    Tokens::Data, std::string(&buf[bufPos], end - bufPos),
 					    SourceLocation(sourceId,
-					                   offsets.loadOffset(cursor).first,
+					                   offsets.loadOffset(bufPos).first,
 					                   offsets.loadOffset(end).first));
-					cursor = end;
+					bufPos = end;
 					return true;
 				}
 				case WhitespaceMode::TRIM:
@@ -254,30 +381,35 @@ public:
 					size_t stringStart;
 					size_t stringEnd;
 					std::string content;
+					const char *cBuf = &buf[bufPos];
+					auto filter = [cBuf, this](size_t i) -> bool {
+						return Utils::isWhitespace(cBuf[i]) &&
+						       !protectedChars[i];
+					};
 					if (mode == WhitespaceMode::TRIM) {
-						content = Utils::trim(&buf[cursor], end - cursor,
-						                      stringStart, stringEnd);
+						content = Utils::trim(cBuf, end - bufPos, stringStart,
+						                      stringEnd, filter);
 					} else {
-						content = Utils::collapse(&buf[cursor], end - cursor,
-						                          stringStart, stringEnd);
+						content = Utils::collapse(
+						    cBuf, end - bufPos, stringStart, stringEnd, filter);
 					}
 
 					// If the resulting string is empty (only whitespaces),
 					// abort
 					if (content.empty()) {
-						cursor = end;
+						bufPos = end;
 						break;
 					}
 
 					// Calculate the absolute positions and return the token
-					stringStart += cursor;
-					stringEnd += cursor;
+					stringStart += bufPos;
+					stringEnd += bufPos;
 					token = Token(
 					    Tokens::Data, content,
 					    SourceLocation(sourceId,
 					                   offsets.loadOffset(stringStart).first,
 					                   offsets.loadOffset(stringEnd).first));
-					cursor = end;
+					bufPos = end;
 					return true;
 				}
 			}
@@ -286,14 +418,18 @@ public:
 		// If start equals end, we're currently directly at a token
 		// instance. Return this token and advance the cursor to the end of
 		// the token.
-		if (cursor == end && it != marks.end()) {
+		if (bufPos == end && it != marks.end()) {
 			const size_t tokenStart = it->bufStart;
 			const size_t tokenEnd = it->bufStart + it->len;
 			token = Token(
 			    it->id, std::string(&buf[tokenStart], it->len),
 			    SourceLocation(sourceId, offsets.loadOffset(tokenStart).first,
 			                   offsets.loadOffset(tokenEnd).first));
-			cursor = tokenEnd;
+
+			// Update the cursor, consume the token by incrementing the marks
+			// pos counter
+			bufPos = tokenEnd;
+			markPos = it - marks.begin() + 1;
 			return true;
 		}
 
@@ -304,11 +440,64 @@ public:
 	}
 
 	/**
+	 * Resets the TokenizedDataImpl instance to the state it had when it was
+	 * constructred.
+	 */
+	void clear()
+	{
+		buf.clear();
+		protectedChars.clear();
+		offsets.clear();
+		marks.clear();
+		firstLinebreak = 0;
+		currentIndentation = 0;
+		lastIndentation = 0;
+		numLinebreaks = 1;  // Assume the stream starts with a linebreak
+		sorted = true;
+	}
+
+	/**
+	 * Trims the length of the TokenizedDataImpl instance to the given length.
+	 *
+	 * @param length is the number of characters to which the TokenizedData
+	 * instance should be trimmed.
+	 */
+	void trim(size_t length)
+	{
+		if (length < size()) {
+			buf.resize(length);
+			protectedChars.resize(length);
+			offsets.trim(length);
+		}
+	}
+
+	/**
 	 * Returns the current size of the internal buffer.
 	 *
 	 * @return the size of the internal character buffer.
 	 */
-	size_t getSize() { return buf.size(); }
+	size_t size() const { return buf.size(); }
+
+	/**
+	 * Returns true if no data is in the data buffer.
+	 *
+	 * @return true if the "buf" instance has no data.
+	 */
+	bool empty() const { return buf.empty(); }
+
+	/**
+	 * Returns the current location of all data in the buffer.
+	 *
+	 * @return the location of the entire data represented by this instance.
+	 */
+	SourceLocation getLocation() const
+	{
+		if (empty()) {
+			return SourceLocation{sourceId};
+		}
+		return SourceLocation{sourceId, offsets.loadOffset(0).first,
+		                      offsets.loadOffset(size()).second};
+	}
 };
 
 /* Class TokenizedData */
@@ -316,50 +505,90 @@ public:
 TokenizedData::TokenizedData() : TokenizedData(InvalidSourceId) {}
 
 TokenizedData::TokenizedData(SourceId sourceId)
-    : impl(std::make_shared<TokenizedDataImpl>(sourceId)), cursor(0)
+    : impl(std::make_shared<TokenizedDataImpl>(sourceId))
 {
 }
 
+TokenizedData::TokenizedData(const std::string &data, SourceOffset offsStart,
+                             SourceId sourceId)
+    : TokenizedData(sourceId)
+{
+	append(data, offsStart);
+}
+
 TokenizedData::~TokenizedData() {}
 
-size_t TokenizedData::append(const std::string &data, SourceOffset offsStart)
+size_t TokenizedData::append(const std::string &data, SourceOffset offsStart,
+                             bool protect)
 {
-	return impl->append(data, offsStart);
+	return impl->append(data, offsStart, protect);
 }
 
 size_t TokenizedData::append(char c, SourceOffset offsStart,
-                             SourceOffset offsEnd)
+                             SourceOffset offsEnd, bool protect)
 {
-	return impl->append(c, offsStart, offsEnd);
+	return impl->append(c, offsStart, offsEnd, protect);
 }
 
 void TokenizedData::mark(TokenId id, TokenLength len)
 {
-	impl->mark(id, impl->getSize() - len, len);
+	impl->mark(id, impl->size() - len, len, false);
 }
 
 void TokenizedData::mark(TokenId id, size_t bufStart, TokenLength len)
 {
-	impl->mark(id, bufStart, len);
+	impl->mark(id, bufStart, len, false);
 }
 
-bool TokenizedData::next(Token &token, WhitespaceMode mode)
+void TokenizedData::clear() { impl->clear(); }
+
+void TokenizedData::trim(size_t length) { impl->trim(length); }
+
+size_t TokenizedData::size() const { return impl->size(); }
+
+bool TokenizedData::empty() const { return impl->empty(); }
+
+SourceLocation TokenizedData::getLocation() const
 {
-	return impl->next(token, mode, tokens, cursor);
+	return impl->getLocation();
 }
 
-bool TokenizedData::text(Token &token, WhitespaceMode mode)
+TokenizedDataReader TokenizedData::reader() const
 {
-	// Copy the current cursor position to not update the actual cursor position
-	// if the operation was not successful
-	size_t cursorCopy = cursor;
-	if (!impl->next(token, mode, tokens, cursorCopy) ||
-	    token.id != Tokens::Data) {
-		return false;
-	}
+	return TokenizedDataReader(impl, TokenizedDataCursor(),
+	                           TokenizedDataCursor());
+}
+
+/* Class TokenizedDataReader */
 
-	// There is indeed a text token, update the internal cursor position
-	cursor = cursorCopy;
-	return true;
+TokenizedDataReader::TokenizedDataReader(
+    std::shared_ptr<const TokenizedDataImpl> impl,
+    const TokenizedDataCursor &readCursor,
+    const TokenizedDataCursor &peekCursor)
+    : impl(impl), readCursor(readCursor), peekCursor(peekCursor)
+{
+}
+
+TokenizedDataReaderFork TokenizedDataReader::fork()
+{
+	return TokenizedDataReaderFork(*this, impl, readCursor, peekCursor);
+}
+
+bool TokenizedDataReader::atEnd() const
+{
+	return readCursor.bufPos >= impl->size();
+}
+
+bool TokenizedDataReader::read(Token &token, const TokenSet &tokens,
+                               WhitespaceMode mode)
+{
+	peekCursor = readCursor;
+	return impl->next(token, mode, tokens, readCursor);
+}
+
+bool TokenizedDataReader::peek(Token &token, const TokenSet &tokens,
+                               WhitespaceMode mode)
+{
+	return impl->next(token, mode, tokens, peekCursor);
 }
 }
diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp
index 38125c4..bc937f2 100644
--- a/src/core/parser/utils/TokenizedData.hpp
+++ b/src/core/parser/utils/TokenizedData.hpp
@@ -37,40 +37,48 @@
 
 #include <core/common/Location.hpp>
 #include <core/common/Whitespace.hpp>
-
-#include "Token.hpp"
+#include <core/common/Token.hpp>
 
 namespace ousia {
 
 // Forward declaration
 class TokenizedDataImpl;
+class TokenizedDataReader;
+class TokenizedDataReaderFork;
 
 /**
- * The TokenizedData class stores data extracted from a user defined document.
- * As users are capable of defining their own tokens and these are only valid
- * in certain scopes TokenizedData allows to divide the stored data into chunks
- * separated by tokens.
+ * Internally used structure representing a cursor within the TokenizedData
+ * stream.
  */
-class TokenizedData {
-private:
+struct TokenizedDataCursor {
 	/**
-	 * Shared pointer pointing at the internal data. This data is shared when
-	 * copying TokenizedData instances, which corresponds to forking a
-	 * TokenizedData instance.
+	 * Position within the byte buffer.
 	 */
-	std::shared_ptr<TokenizedDataImpl> impl;
+	size_t bufPos;
 
 	/**
-	 * Contains all currently enabled token ids.
+	 * Position within the token mark buffer.
 	 */
-	std::unordered_set<TokenId> tokens;
+	size_t markPos;
 
 	/**
-	 * Position from which the last element was read from the internal buffer.
-	 * This information is not shared with the other instances of TokenizedData
-	 * pointing at the same location.
+	 * Default constructor. The resulting cursor points at the beginning of the
+	 * stream.
+	 */
+	TokenizedDataCursor() : bufPos(0), markPos(0) {}
+};
+
+/**
+ * The TokenizedData class stores data extracted from a user defined document.
+ * The data stored in TokenizedData
+ */
+class TokenizedData {
+private:
+	/**
+	 * Shared pointer pointing at the internal data. This data is shared with
+	 * all the TokenizedDataReader instances.
 	 */
-	size_t cursor;
+	std::shared_ptr<TokenizedDataImpl> impl;
 
 public:
 	/**
@@ -88,6 +96,18 @@ public:
 	TokenizedData(SourceId sourceId);
 
 	/**
+	 * Creates a new instance of TokenizedData, takes a SourceId and an initial
+	 * string buffer.
+	 *
+	 * @param data is the string that should be appended to the buffer.
+	 * @param offsStart is the start offset in bytes in the input file.
+	 * @param sourceId is the source identifier that should be used for
+	 * constructing the location when returning tokens.
+	 */
+	TokenizedData(const std::string &data, SourceOffset offsStart = 0,
+	              SourceId sourceId = InvalidSourceId);
+
+	/**
 	 * Destructor. Needs to be defined explicitly for freeing a shared pointer
 	 * of the incomplete TokenizedDataImpl type.
 	 */
@@ -101,10 +121,13 @@ public:
 	 *
 	 * @param data is the string that should be appended to the buffer.
 	 * @param offsStart is the start offset in bytes in the input file.
+	 * @param protect if set to true, the appended characters will not be
+	 * affected by whitespace handling, they will be returned as is.
 	 * @return the current size of the internal byte buffer. The returned value
 	 * is intended to be used for the "mark" function.
 	 */
-	size_t append(const std::string &data, SourceOffset offsStart = 0);
+	size_t append(const std::string &data, SourceOffset offsStart = 0,
+	              bool protect = false);
 
 	/**
 	 * Appends a single character to the internal character buffer.
@@ -112,10 +135,13 @@ public:
 	 * @param c is the character that should be appended to the buffer.
 	 * @param start is the start offset in bytes in the input file.
 	 * @param end is the end offset in bytes in the input file.
+	 * @param protect if set to true, the appended character will not be
+	 * affected by whitespace handling, it will be returned as is.
 	 * @return the current size of the internal byte buffer. The returned value
 	 * is intended to be used for the "mark" function.
 	 */
-	size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd);
+	size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd,
+	              bool protect = false);
 
 	/**
 	 * Stores a token ending at the last character of the current buffer.
@@ -136,54 +162,194 @@ public:
 	void mark(TokenId id, size_t bufStart, TokenLength len);
 
 	/**
-	 * Enables a single token id. Enabled tokens will no longer be returned as
-	 * text. Instead, when querying for the next token, TokenizedData will
-	 * return them as token and not as part of a Text token.
+	 * Resets the TokenizedData instance to the state it had when it was
+	 * constructred.
+	 */
+	void clear();
+
+	/**
+	 * Trims the length of the TokenizedData instance to the given length. Note
+	 * that this function does not remove any token matches for performance
+	 * reasons, it merely renders them incaccessible. Appending new data after
+	 * calling trim will make the token marks accessible again. Thus this method
+	 * should be the last function called to modify the data buffer and the
+	 * token marks.
 	 *
-	 * @param id is the TokenId of the token that should be enabled.
+	 * @param length is the number of characters to which the TokenizedData
+	 * instance should be trimmed.
+	 */
+	void trim(size_t length);
+
+	/**
+	 * Returns the number of characters currently represented by this
+	 * TokenizedData instance.
 	 */
-	void enableToken(TokenId id) { tokens.insert(id); }
+	size_t size() const;
 
 	/**
-	 * Enables a set of token ids. Enabled tokens will no longer be returned as
-	 * text. Instead, when querying for the next token, TokenizedData will
-	 * return them as token and not as part of a Text token.
+	 * Returns true if the TokenizedData instance is empty, false otherwise.
 	 *
-	 * @param ids is the TokenId of the token that should be enabled.
+	 * @return true if not data is stored inside the TokenizedData instance.
 	 */
-	void enableToken(const std::unordered_set<TokenId> &ids)
-	{
-		tokens.insert(ids.begin(), ids.end());
-	}
+	bool empty() const;
+
+	/**
+	 * Returns the location of the entire TokenizedData instance.
+	 *
+	 * @return the location of the entire data represented by this instance.
+	 */
+	SourceLocation getLocation() const;
+
+	/**
+	 * Returns a TokenizedDataReader instance that can be used to access the
+	 * data.
+	 *
+	 * @return a new TokenizedDataReader instance pointing at the beginning of
+	 * the internal buffer.
+	 */
+	TokenizedDataReader reader() const;
+};
+
+/**
+ * The TokenizedDataReader
+ */
+class TokenizedDataReader {
+private:
+	friend TokenizedData;
+
+	/**
+	 * Shared pointer pointing at the internal data. This data is shared with
+	 * all the TokenizedDataReader instances.
+	 */
+	std::shared_ptr<const TokenizedDataImpl> impl;
+
+	/**
+	 * Position from which the last element was read from the internal buffer.
+	 */
+	TokenizedDataCursor readCursor;
+
+	/**
+	 * Position from which the last element was peeked from the internal buffer.
+	 */
+	TokenizedDataCursor peekCursor;
+
+protected:
+	/**
+	 * Protected constructor of TokenizedDataReader, taking a reference to the
+	 * internal TokenizedDataImpl structure storing the data that is accessed by
+	 * the reader.
+	 *
+	 * @param impl is the TokenizedDataImpl instance that holds the actual data.
+	 * @param readCursor is the cursor position from which tokens and text are
+	 * read.
+	 * @param peekCursor is the cursor position from which tokens and text are
+	 * peeked.
+	 */
+	TokenizedDataReader(std::shared_ptr<const TokenizedDataImpl> impl,
+	                    const TokenizedDataCursor &readCursor,
+	                    const TokenizedDataCursor &peekCursor);
+
+public:
+	/**
+	 * Returns a new TokenizedDataReaderFork from which tokens and text can be
+	 * read without advancing this reader instance.
+	 */
+	TokenizedDataReaderFork fork();
+
+	/**
+	 * Returns true if this TokenizedData instance is at the end.
+	 *
+	 * @return true if the end of the TokenizedData instance has been reached.
+	 */
+	bool atEnd() const;
 
 	/**
 	 * Stores the next token in the given token reference, returns true if the
-	 * operation was successful, false if there are no more tokens.
+	 * operation was successful, false if there are no more tokens. Advances the
+	 * internal cursor and re
 	 *
 	 * @param token is an output parameter into which the read token will be
 	 * stored. The TokenId is set to Tokens::Empty if there are no more tokens.
+	 * @param tokens is the set of token identifers, representing the currently
+	 * enabled tokens.
 	 * @param mode is the whitespace mode that should be used when a text token
 	 * is returned.
 	 * @return true if the operation was successful and there is a next token,
 	 * false if there are no more tokens.
 	 */
-	bool next(Token &token, WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+	bool read(Token &token, const TokenSet &tokens = TokenSet{},
+	          WhitespaceMode mode = WhitespaceMode::TRIM);
 
 	/**
-	 * Stores the next text token in the given token reference, returns true if
-	 * the operation was successful (there was indeed a text token), false if
-	 * the next token is not a text token or there were no more tokens.
+	 * Stores the next token in the given token reference, returns true if the
+	 * operation was successful, false if there are no more tokens.
 	 *
 	 * @param token is an output parameter into which the read token will be
 	 * stored. The TokenId is set to Tokens::Empty if there are no more tokens.
+	 * @param tokens is the set of token identifers, representing the currently
+	 * enabled tokens.
 	 * @param mode is the whitespace mode that should be used when a text token
 	 * is returned.
 	 * @return true if the operation was successful and there is a next token,
 	 * false if there are no more tokens.
 	 */
-	bool text(Token &token, WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+	bool peek(Token &token, const TokenSet &tokens = TokenSet{},
+	          WhitespaceMode mode = WhitespaceMode::TRIM);
+
+	/**
+	 * Consumes the peeked tokens, the read cursor will now be at the position
+	 * of the peek cursor.
+	 */
+	void consumePeek() { readCursor = peekCursor; }
+
+	/**
+	 * Resets the peek cursor to the position of the read cursor.
+	 */
+	void resetPeek() { peekCursor = readCursor; }
+};
+
+/**
+ * The TokenizedDataReaderFork class is created when forking a
+ * TokenizedDataReader
+ */
+class TokenizedDataReaderFork : public TokenizedDataReader {
+private:
+	friend TokenizedDataReader;
+
+	/**
+	 * Reference pointing at the parent TokenizedDataReader to which changes may
+	 * be commited.
+	 */
+	TokenizedDataReader &parent;
+
+	/**
+	 * Private constructor of TokenizedDataReaderFork, taking a reference to the
+	 * internal TokenizedDataImpl structure storing the data that is accessed by
+	 * the reader and a reference at the parent TokenizedDataReader.
+	 *
+	 * @param parent is the TokenizedDataReader instance to which the current
+	 * read/peek progress may be commited.
+	 * @param impl is the TokenizedDataImpl instance that holds the actual data.
+	 * @param readCursor is the cursor position from which tokens and text are
+	 * read.
+	 * @param peekCursor is the cursor position from which tokens and text are
+	 * peeked.
+	 */
+	TokenizedDataReaderFork(TokenizedDataReader &parent,
+	                        std::shared_ptr<const TokenizedDataImpl> impl,
+	                        const TokenizedDataCursor &readCursor,
+	                        const TokenizedDataCursor &peekCursor)
+	    : TokenizedDataReader(impl, readCursor, peekCursor), parent(parent)
+	{
+	}
+
+public:
+	/**
+	 * Commits the read/peek progress to the underlying parent.
+	 */
+	void commit() { parent = *this; }
 };
 }
 
-#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */
+#endif /* _OUSIA_TOKENIZED_DATA_HPP_ */
 
diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp
index 2e0ac13..8d540a6 100644
--- a/src/core/parser/utils/Tokenizer.cpp
+++ b/src/core/parser/utils/Tokenizer.cpp
@@ -22,8 +22,8 @@
 #include <core/common/CharReader.hpp>
 #include <core/common/Exceptions.hpp>
 #include <core/common/Utils.hpp>
-#include <core/common/WhitespaceHandler.hpp>
 
+#include "TokenizedData.hpp"
 #include "Tokenizer.hpp"
 
 namespace ousia {
@@ -42,26 +42,33 @@ struct TokenMatch {
 	Token token;
 
 	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
+	 * Position at which this token starts in the TokenizedData instance.
 	 */
-	size_t textLength;
+	size_t dataStartOffset;
 
 	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
+	 * Set to true if the matched token is a primary token.
 	 */
-	size_t textEnd;
+	bool primary;
 
 	/**
 	 * Constructor of the TokenMatch class.
 	 */
-	TokenMatch() : textLength(0), textEnd(0) {}
+	TokenMatch() : dataStartOffset(0), primary(false) {}
 
 	/**
 	 * Returns true if this TokenMatch instance actually represents a match.
+	 *
+	 * @return true if the TokenMatch actually has a match.
+	 */
+	bool hasMatch() const { return token.id != Tokens::Empty; }
+
+	/**
+	 * Returns the length of the matched token.
+	 *
+	 * @return the length of the token string.
 	 */
-	bool hasMatch() { return token.id != Tokens::Empty; }
+	size_t size() const { return token.content.size(); }
 };
 
 /* Internal class TokenLookup */
@@ -83,36 +90,28 @@ private:
 	size_t start;
 
 	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
+	 * Position at which this token starts in the TokenizedData instance.
 	 */
-	size_t textLength;
-
-	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
-	 */
-	size_t textEnd;
+	size_t dataStartOffset;
 
 public:
 	/**
 	 * Constructor of the TokenLookup class.
 	 *
 	 * @param node is the current node.
-	 * @param start is the start position.
-	 * @param textLength is the text buffer length of the previous text token.
-	 * @param textEnd is the current end location of the previous text token.
+	 * @param start is the start position in the source file.
+	 * @param dataStartOffset is the current length of the TokenizedData buffer.
 	 */
-	TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength,
-	            size_t textEnd)
-	    : node(node), start(start), textLength(textLength), textEnd(textEnd)
+	TokenLookup(const TokenTrie::Node *node, size_t start,
+	            size_t dataStartOffset)
+	    : node(node), start(start), dataStartOffset(dataStartOffset)
 	{
 	}
 
 	/**
 	 * Tries to extend the current path in the token trie with the given
-	 * character. If a complete token is matched, stores this match in the
-	 * tokens list (in case it is longer than any previous token).
+	 * character. If a complete token is matched, stores the match in the given
+	 * TokenMatch reference and returns true.
 	 *
 	 * @param c is the character that should be appended to the current prefix.
 	 * @param lookups is a list to which new TokeLookup instances are added --
@@ -123,73 +122,48 @@ public:
 	 * Tokenizer.
 	 * @param end is the end byte offset of the current character.
 	 * @param sourceId is the source if of this file.
+	 * @return true if a token was matched, false otherwise.
 	 */
-	void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
-	             const std::vector<std::string> &tokens, SourceOffset end,
-	             SourceId sourceId)
+	bool advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
+	             const std::vector<Tokenizer::TokenDescriptor> &tokens,
+	             SourceOffset end, SourceId sourceId)
 	{
-		// Check whether we can continue the current token path with the given
-		// character without visiting an already visited node
+		// Set to true once a token has been matched
+		bool res = false;
+
+		// Check whether we can continue the current token path, if not, abort
 		auto it = node->children.find(c);
 		if (it == node->children.end()) {
-			return;
+			return res;
 		}
 
 		// Check whether the new node represents a complete token a whether it
 		// is longer than the current token. If yes, replace the current token.
 		node = it->second.get();
-		if (node->type != Tokens::Empty) {
-			const std::string &str = tokens[node->type];
-			size_t len = str.size();
-			if (len > match.token.content.size()) {
-				match.token =
-				    Token{node->type, str, {sourceId, start, end}};
-				match.textLength = textLength;
-				match.textEnd = textEnd;
-			}
+		if (node->id != Tokens::Empty) {
+			const Tokenizer::TokenDescriptor &descr = tokens[node->id];
+			match.token = Token(node->id, descr.string,
+			                    SourceLocation(sourceId, start, end));
+			match.dataStartOffset = dataStartOffset;
+			match.primary = descr.primary;
+			res = true;
 		}
 
 		// If this state can possibly be advanced, store it in the states list.
 		if (!node->children.empty()) {
 			lookups.emplace_back(*this);
 		}
+		return res;
 	}
 };
-
-/**
- * Transforms the given token into a data token containing the extracted
- * text.
- *
- * @param handler is the WhitespaceHandler containing the collected data.
- * @param token is the output token to which the text should be written.
- * @param sourceId is the source id of the underlying file.
- */
-static void buildDataToken(const WhitespaceHandler &handler, TokenMatch &match,
-                           SourceId sourceId)
-{
-	if (match.hasMatch()) {
-		match.token.content =
-		    std::string{handler.textBuf.data(), match.textLength};
-		match.token.location =
-		    SourceLocation{sourceId, handler.textStart, match.textEnd};
-	} else {
-		match.token.content = handler.toString();
-		match.token.location =
-		    SourceLocation{sourceId, handler.textStart, handler.textEnd};
-	}
-	match.token.id = Tokens::Data;
-}
 }
 
 /* Class Tokenizer */
 
-Tokenizer::Tokenizer(WhitespaceMode whitespaceMode)
-    : whitespaceMode(whitespaceMode), nextTokenId(0)
-{
-}
+Tokenizer::Tokenizer() : nextTokenId(0) {}
 
-template <typename TextHandler, bool read>
-bool Tokenizer::next(CharReader &reader, Token &token)
+template <bool read>
+bool Tokenizer::next(CharReader &reader, Token &token, TokenizedData &data)
 {
 	// If we're in the read mode, reset the char reader peek position to the
 	// current read position
@@ -199,45 +173,63 @@ bool Tokenizer::next(CharReader &reader, Token &token)
 
 	// Prepare the lookups in the token trie
 	const TokenTrie::Node *root = trie.getRoot();
-	TokenMatch match;
+	TokenMatch bestMatch;
 	std::vector<TokenLookup> lookups;
 	std::vector<TokenLookup> nextLookups;
 
-	// Instantiate the text handler
-	TextHandler textHandler;
-
 	// Peek characters from the reader and try to advance the current token tree
 	// cursor
 	char c;
+	const size_t initialDataSize = data.size();
 	size_t charStart = reader.getPeekOffset();
 	const SourceId sourceId = reader.getSourceId();
 	while (reader.peek(c)) {
 		const size_t charEnd = reader.getPeekOffset();
-		const size_t textLength = textHandler.textBuf.size();
-		const size_t textEnd = textHandler.textEnd;
+		const size_t dataStartOffset = data.size();
 
 		// If we do not have a match yet, start a new lookup from the root
-		if (!match.hasMatch()) {
-			TokenLookup{root, charStart, textLength, textEnd}.advance(
-			    c, nextLookups, match, tokens, charEnd, sourceId);
+		if (!bestMatch.hasMatch() || !bestMatch.primary) {
+			lookups.emplace_back(root, charStart, dataStartOffset);
 		}
 
 		// Try to advance all other lookups with the new character
+		TokenMatch match;
 		for (TokenLookup &lookup : lookups) {
-			lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId);
+			// Continue if the current lookup
+			if (!lookup.advance(c, nextLookups, match, tokens, charEnd,
+			                    sourceId)) {
+				continue;
+			}
+
+			// Replace the best match with longest token
+			if (match.size() > bestMatch.size()) {
+				bestMatch = match;
+			}
+
+			// If the matched token is a non-primary token -- mark the match in
+			// the TokenizedData list
+			if (!match.primary) {
+				data.mark(match.token.id, data.size() - match.size() + 1,
+				          match.size());
+			}
 		}
 
-		// We have found a token and there are no more states to advance or the
-		// text handler has found something -- abort to return the new token
-		if (match.hasMatch()) {
-			if ((nextLookups.empty() || textHandler.hasText())) {
+
+		// If a token has been found and the token is a primary token, check
+		// whether we have to abort, otherwise if we have a non-primary match,
+		// reset it once it can no longer be advanced
+		if (bestMatch.hasMatch() && nextLookups.empty()) {
+			if (bestMatch.primary) {
 				break;
+			} else {
+				bestMatch = TokenMatch{};
 			}
-		} else {
-			// Record all incomming characters
-			textHandler.append(c, charStart, charEnd);
 		}
 
+		// Record all incomming characters
+		data.append(c, charStart, charEnd);
+
+
 		// Swap the lookups and the nextLookups list
 		lookups = std::move(nextLookups);
 		nextLookups.clear();
@@ -246,60 +238,57 @@ bool Tokenizer::next(CharReader &reader, Token &token)
 		charStart = charEnd;
 	}
 
-	// If we found text, emit that text
-	if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) {
-		buildDataToken(textHandler, match, sourceId);
+	// If we found data, emit a corresponding data token
+	if (data.size() > initialDataSize &&
+	    (!bestMatch.hasMatch() || !bestMatch.primary ||
+	     bestMatch.dataStartOffset > initialDataSize)) {
+		// If we have a "bestMatch" wich starts after text data has started,
+		// trim the TokenizedData to this offset
+		if (bestMatch.dataStartOffset > initialDataSize && bestMatch.primary) {
+			data.trim(bestMatch.dataStartOffset);
+		}
+
+		// Create a token containing the data location
+		bestMatch.token = Token{data.getLocation()};
+	} else if (bestMatch.hasMatch() && bestMatch.primary &&
+	           bestMatch.dataStartOffset == initialDataSize) {
+		data.trim(initialDataSize);
 	}
 
 	// Move the read/peek cursor to the end of the token, abort if an error
 	// happens while doing so
-	if (match.hasMatch()) {
+	if (bestMatch.hasMatch()) {
 		// Make sure we have a valid location
-		if (match.token.location.getEnd() == InvalidSourceOffset) {
+		if (bestMatch.token.location.getEnd() == InvalidSourceOffset) {
 			throw OusiaException{"Token end position offset out of range"};
 		}
 
 		// Seek to the end of the current token
-		const size_t end = match.token.location.getEnd();
+		const size_t end = bestMatch.token.location.getEnd();
 		if (read) {
 			reader.seek(end);
 		} else {
 			reader.seekPeekCursor(end);
 		}
-		token = match.token;
+
+		token = bestMatch.token;
 	} else {
 		token = Token{};
 	}
-	return match.hasMatch();
+	return bestMatch.hasMatch();
 }
 
-bool Tokenizer::read(CharReader &reader, Token &token)
+bool Tokenizer::read(CharReader &reader, Token &token, TokenizedData &data)
 {
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingWhitespaceHandler, true>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingWhitespaceHandler, true>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingWhitespaceHandler, true>(reader, token);
-	}
-	return false;
+	return next<true>(reader, token, data);
 }
 
-bool Tokenizer::peek(CharReader &reader, Token &token)
+bool Tokenizer::peek(CharReader &reader, Token &token, TokenizedData &data)
 {
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingWhitespaceHandler, false>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingWhitespaceHandler, false>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingWhitespaceHandler, false>(reader, token);
-	}
-	return false;
+	return next<false>(reader, token, data);
 }
 
-TokenId Tokenizer::registerToken(const std::string &token)
+TokenId Tokenizer::registerToken(const std::string &token, bool primary)
 {
 	// Abort if an empty token should be registered
 	if (token.empty()) {
@@ -309,8 +298,8 @@ TokenId Tokenizer::registerToken(const std::string &token)
 	// Search for a new slot in the tokens list
 	TokenId type = Tokens::Empty;
 	for (size_t i = nextTokenId; i < tokens.size(); i++) {
-		if (tokens[i].empty()) {
-			tokens[i] = token;
+		if (!tokens[i].valid()) {
+			tokens[i] = TokenDescriptor(token, primary);
 			type = i;
 			break;
 		}
@@ -320,62 +309,47 @@ TokenId Tokenizer::registerToken(const std::string &token)
 	// override the special token type handles
 	if (type == Tokens::Empty) {
 		type = tokens.size();
-		if (type == Tokens::Data || type == Tokens::Empty) {
+		if (type >= Tokens::MaxTokenId) {
 			throw OusiaException{"Token type ids depleted!"};
 		}
-		tokens.emplace_back(token);
+		tokens.emplace_back(token, primary);
 	}
 	nextTokenId = type + 1;
 
-	// Try to register the token in the trie -- if this fails, remove it
-	// from the tokens list
+	// Try to register the token in the trie -- if this fails, remove it from
+	// the tokens list
 	if (!trie.registerToken(token, type)) {
-		tokens[type] = std::string{};
+		tokens[type] = TokenDescriptor();
 		nextTokenId = type;
 		return Tokens::Empty;
 	}
 	return type;
 }
 
-bool Tokenizer::unregisterToken(TokenId type)
+bool Tokenizer::unregisterToken(TokenId id)
 {
 	// Unregister the token from the trie, abort if an invalid type is given
-	if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
-		tokens[type] = std::string{};
-		nextTokenId = type;
+	if (id < tokens.size() && trie.unregisterToken(tokens[id].string)) {
+		tokens[id] = TokenDescriptor();
+		nextTokenId = id;
 		return true;
 	}
 	return false;
 }
 
-std::string Tokenizer::getTokenString(TokenId type)
-{
-	if (type < tokens.size()) {
-		return tokens[type];
-	}
-	return std::string{};
-}
+static Tokenizer::TokenDescriptor EmptyTokenDescriptor;
 
-void Tokenizer::setWhitespaceMode(WhitespaceMode mode)
+const Tokenizer::TokenDescriptor &Tokenizer::lookupToken(TokenId id) const
 {
-	whitespaceMode = mode;
+	if (id < tokens.size()) {
+		return tokens[id];
+	}
+	return EmptyTokenDescriptor;
 }
 
-WhitespaceMode Tokenizer::getWhitespaceMode() { return whitespaceMode; }
-
 /* Explicitly instantiate all possible instantiations of the "next" member
    function */
-template bool Tokenizer::next<PreservingWhitespaceHandler, false>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<TrimmingWhitespaceHandler, false>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<CollapsingWhitespaceHandler, false>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<PreservingWhitespaceHandler, true>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<TrimmingWhitespaceHandler, true>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<CollapsingWhitespaceHandler, true>(
-    CharReader &reader, Token &token);
+template bool Tokenizer::next<false>(CharReader &, Token &, TokenizedData &);
+template bool Tokenizer::next<true>(CharReader &, Token &, TokenizedData &);
 }
 
diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp
index f21c6a3..74e3f0d 100644
--- a/src/core/parser/utils/Tokenizer.hpp
+++ b/src/core/parser/utils/Tokenizer.hpp
@@ -19,8 +19,8 @@
 /**
  * @file Tokenizer.hpp
  *
- * Tokenizer that can be reconfigured at runtime used for parsing the plain
- * text format.
+ * Tokenizer that can be reconfigured at runtime and is used for parsing the
+ * plain text format.
  *
  * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
  */
@@ -28,44 +28,80 @@
 #ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_
 #define _OUSIA_DYNAMIC_TOKENIZER_HPP_
 
-#include <set>
+#include <cstdint>
 #include <string>
 #include <vector>
 
 #include <core/common/Location.hpp>
-#include <core/common/Whitespace.hpp>
+#include <core/common/Token.hpp>
 
-#include "Token.hpp"
 #include "TokenTrie.hpp"
 
 namespace ousia {
 
 // Forward declarations
 class CharReader;
+class TokenizedData;
 
 /**
  * The Tokenizer is used to extract tokens and chunks of text from a
- * CharReader. It allows to register and unregister tokens while parsing and
- * to modify the handling of whitespace characters. Note that the
- * Tokenizer always tries to extract the longest possible token from the
- * tokenizer.
+ * CharReader. It allows to register and unregister tokens while parsing. Note
+ * that the Tokenizer always tries to extract the longest possible token from
+ * the tokenizer. Tokens can be registered as primary or non-primary token. If
+ * a Token is registered as a primary token, it is returned as a single Token
+ * instance if it occurs. In the non-primary case the token is returned as part
+ * of a segmented TokenizedData instance.
  */
 class Tokenizer {
-private:
+public:
 	/**
-	 * Internally used token trie. This object holds all registered tokens.
+	 * Internally used structure describing a registered token.
 	 */
-	TokenTrie trie;
+	struct TokenDescriptor {
+		/**
+		 * String describing the token.
+		 */
+		std::string string;
+
+		/**
+		 * Set to true if this token is primary.
+		 */
+		bool primary;
+
+		/**
+		 * Constructor of the TokenDescriptor class.
+		 *
+		 * @param string is the string representation of the registered token.
+		 * @param primary specifies whether the token is a primary token that
+		 * should be returned as a single token, or a secondary token, that
+		 * should be returned as part of TokenizedData.
+		 */
+		TokenDescriptor(const std::string &string, bool primary)
+		    : string(string), primary(primary)
+		{
+		}
+
+		/**
+		 * Default constructor.
+		 */
+		TokenDescriptor() : primary(false) {}
+
+		/**
+		 * Returns true if the TokenDescriptor represents a valid token.
+		 */
+		bool valid() { return !string.empty(); }
+	};
 
+private:
 	/**
-	 * Flag defining whether whitespaces should be preserved or not.
+	 * Internally used token trie. This object holds all registered tokens.
 	 */
-	WhitespaceMode whitespaceMode;
+	TokenTrie trie;
 
 	/**
 	 * Vector containing all registered token types.
 	 */
-	std::vector<std::string> tokens;
+	std::vector<TokenDescriptor> tokens;
 
 	/**
 	 * Next index in the tokens list where to search for a new token id.
@@ -74,90 +110,78 @@ private:
 
 	/**
 	 * Templated function used internally to read the current token. The
-	 * function is templated in order to force code generation for all six
-	 * combiations of whitespace modes and reading/peeking.
+	 * function is templated in order to force optimized code generation for
+	 * both reading and peeking.
 	 *
-	 * @tparam TextHandler is the type to be used for the textHandler instance.
-	 * @tparam read specifies whether the function should start from and advance
-	 * the read pointer of the char reader.
+	 * @tparam read specifies whether the method should read the token or just
+	 * peek.
 	 * @param reader is the CharReader instance from which the data should be
 	 * read.
 	 * @param token is the token structure into which the token information
 	 * should be written.
+	 * @param data is a reference at the TokenizedData instance to which the
+	 * token information should be appended.
 	 * @return false if the end of the stream has been reached, true otherwise.
 	 */
-	template <typename TextHandler, bool read>
-	bool next(CharReader &reader, Token &token);
+	template <bool read>
+	bool next(CharReader &reader, Token &token, TokenizedData &data);
 
 public:
 	/**
 	 * Constructor of the Tokenizer class.
-	 *
-	 * @param whitespaceMode specifies how whitespace should be handled.
 	 */
-	Tokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
+	Tokenizer();
 
 	/**
-	 * Registers the given string as a token. Returns a const pointer at a
-	 * TokenDescriptor that will be used to reference the newly created token.
+	 * Registers the given string as a token. Returns a unique identifier
+	 * describing the registered token.
 	 *
 	 * @param token is the token string that should be registered.
-	 * @return a unique identifier for the registered token or EmptyToken if
+	 * @param primary specifies whether the token is a primary token -- if true,
+	 * the token will be returned as a single, standalone token. Otherwise the
+	 * token will be returned as part of a "TokenizedData" structure.
+	 * @return a unique identifier for the registered token or Tokens::Empty if
 	 * an error occured.
 	 */
-	TokenId registerToken(const std::string &token);
+	TokenId registerToken(const std::string &token, bool primary = true);
 
 	/**
 	 * Unregisters the token belonging to the given TokenId.
 	 *
 	 * @param type is the token type that should be unregistered. The
-	 *TokenId
-	 * must have been returned by registerToken.
+	 * TokenId must have been returned by registerToken.
 	 * @return true if the operation was successful, false otherwise (e.g.
-	 * because the given TokenDescriptor was already unregistered).
+	 * because the token with the given TokenId was already unregistered).
 	 */
-	bool unregisterToken(TokenId type);
+	bool unregisterToken(TokenId id);
 
 	/**
 	 * Returns the token that was registered under the given TokenId id or
-	 *an
-	 * empty string if an invalid TokenId id is given.
+	 * an empty string if an invalid TokenId id is given.
 	 *
-	 * @param type is the TokenId id for which the corresponding token
-	 *string
+	 * @param id is the TokenId for which the corresponding TokenDescriptor
 	 * should be returned.
-	 * @return the registered token string or an empty string if the given type
-	 * was invalid.
-	 */
-	std::string getTokenString(TokenId type);
-
-	/**
-	 * Sets the whitespace mode.
-	 *
-	 * @param whitespaceMode defines how whitespace should be treated in text
-	 * tokens.
-	 */
-	void setWhitespaceMode(WhitespaceMode mode);
-
-	/**
-	 * Returns the current value of the whitespace mode.
-	 *
-	 * @return the whitespace mode.
+	 * @return the registered TokenDescriptor or an invalid TokenDescriptor if
+	 * the given TokenId is invalid.
 	 */
-	WhitespaceMode getWhitespaceMode();
+	const TokenDescriptor& lookupToken(TokenId id) const;
 
 	/**
 	 * Reads a new token from the CharReader and stores it in the given
-	 * Token instance.
+	 * Token instance. If the token has the id Tokens::Data, use the "getData"
+	 * method to fetch a reference at the underlying TokenizedData instance
+	 * storing the data.
 	 *
 	 * @param reader is the CharReader instance from which the data should be
 	 * read.
 	 * @param token is a reference at the token instance into which the Token
 	 * information should be written.
+	 * @param data is a reference at the TokenizedData instance to which the
+	 * token information should be appended.
 	 * @return true if a token could be read, false if the end of the stream
 	 * has been reached.
 	 */
-	bool read(CharReader &reader, Token &token);
+	bool read(CharReader &reader, Token &token, TokenizedData &data);
 
 	/**
 	 * The peek method does not advance the read position of the char reader,
@@ -167,10 +191,12 @@ public:
 	 * read.
 	 * @param token is a reference at the token instance into which the Token
 	 * information should be written.
+	 * @param data is a reference at the TokenizedData instance to which the
+	 * token information should be appended.
 	 * @return true if a token could be read, false if the end of the stream
 	 * has been reached.
 	 */
-	bool peek(CharReader &reader, Token &token);
+	bool peek(CharReader &reader, Token &token, TokenizedData &data);
 };
 }