From 5a67fc7d682ddba6a862aacf616d02cd20b727eb Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Tue, 24 Feb 2015 02:13:46 +0100
Subject: start of branch, commit log will be rewritten

---
 CMakeLists.txt                               |   2 +-
 src/core/common/Token.cpp                    |  24 +++
 src/core/common/Token.hpp                    | 181 +++++++++++++++++
 src/core/common/WhitespaceHandler.hpp        | 284 ---------------------------
 src/core/parser/stack/DocumentHandler.cpp    |  24 ++-
 src/core/parser/stack/DocumentHandler.hpp    |   4 +-
 src/core/parser/stack/Handler.cpp            |  25 ++-
 src/core/parser/stack/Handler.hpp            |  74 ++++---
 src/core/parser/stack/Stack.cpp              |  55 ++++--
 src/core/parser/stack/Stack.hpp              |  18 +-
 src/core/parser/utils/SourceOffsetVector.hpp |  28 ++-
 src/core/parser/utils/Token.cpp              |  24 ---
 src/core/parser/utils/Token.hpp              | 142 --------------
 src/core/parser/utils/TokenTrie.cpp          |  16 +-
 src/core/parser/utils/TokenTrie.hpp          |  11 +-
 src/core/parser/utils/TokenizedData.cpp      | 133 +++++++++++--
 src/core/parser/utils/TokenizedData.hpp      | 214 ++++++++++++++++----
 src/core/parser/utils/Tokenizer.cpp          | 271 ++++++++++++-------------
 src/core/parser/utils/Tokenizer.hpp          | 140 +++++++------
 src/formats/osml/OsmlStreamParser.cpp        | 157 ++++-----------
 src/formats/osml/OsmlStreamParser.hpp        |  85 ++++----
 src/formats/osxml/OsxmlEventParser.cpp       |  63 +-----
 src/formats/osxml/OsxmlEventParser.hpp       |  31 +--
 test/core/parser/stack/StackTest.cpp         |  15 +-
 test/core/parser/utils/TokenizedDataTest.cpp |  90 +++++----
 test/core/parser/utils/TokenizerTest.cpp     | 248 ++++++++++-------------
 test/formats/osml/OsmlStreamParserTest.cpp   |  79 ++++----
 test/formats/osxml/OsxmlEventParserTest.cpp  |  47 +----
 28 files changed, 1184 insertions(+), 1301 deletions(-)
 create mode 100644 src/core/common/Token.cpp
 create mode 100644 src/core/common/Token.hpp
 delete mode 100644 src/core/common/WhitespaceHandler.hpp
 delete mode 100644 src/core/parser/utils/Token.cpp
 delete mode 100644 src/core/parser/utils/Token.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ea5c3aa..54f971c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -158,6 +158,7 @@ ADD_LIBRARY(ousia_core
 	src/core/common/Rtti
 	src/core/common/RttiBuilder
 	src/core/common/SourceContextReader
+	src/core/common/Token
 	src/core/common/Utils
 	src/core/common/Variant
 	src/core/common/VariantConverter
@@ -189,7 +190,6 @@ ADD_LIBRARY(ousia_core
 	src/core/parser/stack/Stack
 	src/core/parser/stack/TypesystemHandler
 	src/core/parser/utils/SourceOffsetVector
-	src/core/parser/utils/Token
 	src/core/parser/utils/TokenizedData
 	src/core/parser/utils/Tokenizer
 	src/core/parser/utils/TokenTrie
diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp
new file mode 100644
index 0000000..8bcdbb5
--- /dev/null
+++ b/src/core/common/Token.cpp
@@ -0,0 +1,24 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Token.hpp"
+
+namespace ousia {
+// Stub to make sure Tokens.hpp is valid
+}
+
diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
new file mode 100644
index 0000000..07d7c8f
--- /dev/null
+++ b/src/core/common/Token.hpp
@@ -0,0 +1,181 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Token.hpp
+ *
+ * Definition of the TokenId id and constants for some special tokens.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_TOKEN_HPP_
+#define _OUSIA_TOKEN_HPP_
+
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <unordered_set>
+
+#include <core/common/Location.hpp>
+
+namespace ousia {
+
+/**
+ * The TokenId is used to give each token id a unique id.
+ */
+using TokenId = uint32_t;
+
+/**
+ * Type used for storing token lengths.
+ */
+using TokenLength = uint16_t;
+
+/**
+ * Type used for storing token sets.
+ */
+using TokenSet = std::unordered_set<TokenId>;
+
+/**
+ * Namespace containing constants for TokenId instances with special meaning.
+ */
+namespace Tokens {
+/**
+ * Token which is not a token.
+ */
+constexpr TokenId Empty = std::numeric_limits<TokenId>::max();
+
+/**
+ * Token which represents data (represented as TokenizedData).
+ */
+constexpr TokenId Data = std::numeric_limits<TokenId>::max() - 1;
+
+/**
+ * Token which represents a newline token.
+ */
+constexpr TokenId Newline = std::numeric_limits<TokenId>::max() - 2;
+
+/**
+ * Token which represents a paragraph token -- issued if two consecutive
+ * newlines occur with optionally any amout of whitespace between them. The
+ * paragraph token is not repeated until more text is reached.
+ */
+constexpr TokenId Paragraph = std::numeric_limits<TokenId>::max() - 3;
+
+/**
+ * Token which represents a section token -- issued if three or more
+ * consecutive newlines occur with optionally any amout of whitespace between
+ * them. The section token is not repeated until more text is reached.
+ */
+constexpr TokenId Section = std::numeric_limits<TokenId>::max() - 4;
+
+/**
+ * Token which represents an indentation token -- issued if the indentation of
+ * this line is larger than the indentation of the previous line.
+ */
+constexpr TokenId Indent = std::numeric_limits<TokenId>::max() - 5;
+
+/**
+ * Token which represents an unindentation -- issued if the indentation of
+ * this line is smaller than the indentation of the previous line.
+ */
+constexpr TokenId Unindent = std::numeric_limits<TokenId>::max() - 6;
+
+/**
+ * Maximum token id to be used. Tokens allocated for users should not surpass
+ * this value.
+ */
+constexpr TokenId MaxTokenId = std::numeric_limits<TokenId>::max() - 255;
+}
+
+/**
+ * The Token structure describes a token discovered by the Tokenizer or read
+ * from the TokenizedData struct.
+ */
+struct Token {
+	/**
+	 * Id of the id of this token.
+	 */
+	TokenId id;
+
+	/**
+	 * String that was matched.
+	 */
+	std::string content;
+
+	/**
+	 * Location from which the string was extracted.
+	 */
+	SourceLocation location;
+
+	/**
+	 * Default constructor.
+	 */
+	Token() : id(Tokens::Empty) {}
+
+	/**
+	 * Constructor of a "data" token with no explicit content.
+	 *
+	 * @param location is the location of the extracted string content in the
+	 * source file.
+	 */
+	Token(SourceLocation location)
+	    : id(Tokens::Data), location(location)
+	{
+	}
+
+	/**
+	 * Constructor of the Token struct.
+	 *
+	 * @param id represents the token id.
+	 * @param content is the string content that has been extracted.
+	 * @param location is the location of the extracted string content in the
+	 * source file.
+	 */
+	Token(TokenId id, const std::string &content, SourceLocation location)
+	    : id(id), content(content), location(location)
+	{
+	}
+
+	/**
+	 * Constructor of the Token struct, only initializes the token id
+	 *
+	 * @param id is the id corresponding to the id of the token.
+	 */
+	Token(TokenId id) : id(id) {}
+
+	/**
+	 * Returns true if this token is special.
+	 *
+	 * @return true if the TokenId indicates that this token is a "special"
+	 * token.
+	 */
+	
+
+	/**
+	 * The getLocation function allows the tokens to be directly passed as
+	 * parameter to Logger or LoggableException instances.
+	 *
+	 * @return a reference at the location field
+	 */
+	const SourceLocation &getLocation() const { return location; }
+};
+}
+
+#endif /* _OUSIA_TOKENS_HPP_ */
+
diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp
deleted file mode 100644
index ed52ea3..0000000
--- a/src/core/common/WhitespaceHandler.hpp
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file WhitespaceHandler.hpp
- *
- * Contains the WhitespaceHandler classes which are used in multiple places to
- * trim, compact or preserve whitespaces while at the same time maintaining the
- * position information associated with the input strings.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_WHITESPACE_HANDLER_HPP_
-#define _OUSIA_WHITESPACE_HANDLER_HPP_
-
-#include <string>
-#include <vector>
-
-#include "Utils.hpp"
-
-namespace ousia {
-
-/**
- * WhitespaceHandler is a based class that can be used to collect text on a
- * character-by-character basis. Note that this class and its descendants are
- * hoped to be inlined by the compiler (and used in conjunction with templates),
- * thus they are fully defined inside this header.
- */
-class WhitespaceHandler {
-public:
-	/**
-	 * Start position of the extracted text.
-	 */
-	size_t textStart;
-
-	/**
-	 * End position of the extracted text.
-	 */
-	size_t textEnd;
-
-	/**
-	 * Buffer containing the extracted text.
-	 */
-	std::vector<char> textBuf;
-
-	/**
-	 * Constructor of the TextHandlerBase base class. Initializes the start and
-	 * end position with zeros.
-	 */
-	WhitespaceHandler() : textStart(0), textEnd(0) {}
-
-	/**
-	 * Returns true if this whitespace handler has found any text and a text
-	 * token could be emitted.
-	 *
-	 * @return true if the internal data buffer is non-empty.
-	 */
-	bool hasText() { return !textBuf.empty(); }
-
-	/**
-	 * Returns the content of the WhitespaceHandler as string.
-	 */
-	std::string toString() const
-	{
-		return std::string(textBuf.data(), textBuf.size());
-	}
-};
-
-/**
- * The PreservingWhitespaceHandler class preserves all characters unmodified,
- * including whitepace characters.
- */
-class PreservingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Appends the given character to the internal text buffer, does not
-	 * eliminate whitespace.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd);
-	}
-
-	/**
-	 * Static version of PreservingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd)
-	{
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * The TrimmingTextHandler class trims all whitespace characters at the begin
- * and the end of a text section but leaves all other characters unmodified,
- * including whitepace characters.
- */
-class TrimmingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Buffer used internally to temporarily store all whitespace characters.
-	 * They are only added to the output buffer if another non-whitespace
-	 * character is reached.
-	 */
-	std::vector<char> whitespaceBuf;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * whitespace characters at the begin and end of the text.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf);
-	}
-
-	/**
-	 * Static version of TrimmingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 * @param whitespaceBuf is a reference at the buffer for storing whitespace
-	 * characters.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd, std::vector<char> &whitespaceBuf)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				whitespaceBuf.push_back(c);
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (!whitespaceBuf.empty()) {
-			textBuf.insert(textBuf.end(), whitespaceBuf.begin(),
-			               whitespaceBuf.end());
-			whitespaceBuf.clear();
-		}
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * The CollapsingTextHandler trims characters at the beginning and end of the
- * text and reduced multiple whitespace characters to a single blank.
- */
-class CollapsingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Flag set to true if a whitespace character was reached.
-	 */
-	bool hasWhitespace = false;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * redundant whitespace characters.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd, hasWhitespace);
-	}
-
-	/**
-	 * Static version of CollapsingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 * @param hasWhitespace is a reference at the "hasWhitespace" flag.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd, bool &hasWhitespace)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				hasWhitespace = true;
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (hasWhitespace) {
-			textBuf.push_back(' ');
-			hasWhitespace = false;
-		}
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * Function that can be used to append the given buffer (e.g. a string or a
- * vector) to the whitespace handler.
- *
- * @tparam WhitespaceHandler is one of the WhitespaceHandler classes.
- * @tparam Buffer is an iterable type.
- * @param handler is the handler to which the characters of the Buffer should be
- * appended.
- * @param buf is the buffer from which the characters should be read.
- * @param start is the start byte offset. Each character is counted as one byte.
- */
-template <typename WhitespaceHandler, typename Buffer>
-inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf,
-                                      size_t start)
-{
-	for (auto elem : buf) {
-		handler.append(elem, start, start + 1);
-		start++;
-	}
-}
-}
-
-#endif /* _OUSIA_WHITESPACE_HANDLER_HPP_ */
-
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp
index bb04bd3..d44176a 100644
--- a/src/core/parser/stack/DocumentHandler.cpp
+++ b/src/core/parser/stack/DocumentHandler.cpp
@@ -25,6 +25,7 @@
 #include <core/model/Domain.hpp>
 #include <core/model/Project.hpp>
 #include <core/model/Typesystem.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 #include <core/parser/ParserScope.hpp>
 #include <core/parser/ParserContext.hpp>
 
@@ -372,8 +373,15 @@ bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field,
 	return valid && scope().resolveValue(data, type, logger);
 }
 
-bool DocumentChildHandler::data(Variant &data)
+bool DocumentChildHandler::data(TokenizedData &data)
 {
+	// TODO: Handle this correctly
+	Variant text = data.text(WhitespaceMode::TRIM);
+	if (text == nullptr) {
+		// For now, except "no data" as success
+		return true;
+	}
+
 	// We're past the region in which explicit fields can be defined in the
 	// parent structure element
 	scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, true);
@@ -393,11 +401,11 @@ bool DocumentChildHandler::data(Variant &data)
 	// If it is a primitive field directly, try to parse the content.
 	if (field->isPrimitive()) {
 		// Add it as primitive content.
-		if (!convertData(field, data, logger())) {
+		if (!convertData(field, text, logger())) {
 			return false;
 		}
 
-		parent->createChildDocumentPrimitive(data, fieldIdx);
+		parent->createChildDocumentPrimitive(text, fieldIdx);
 		return true;
 	}
 
@@ -411,7 +419,7 @@ bool DocumentChildHandler::data(Variant &data)
 	for (auto primitiveField : defaultFields) {
 		// Then try to parse the content using the type specification.
 		forks.emplace_back(logger().fork());
-		if (!convertData(primitiveField, data, forks.back())) {
+		if (!convertData(primitiveField, text, forks.back())) {
 			continue;
 		}
 
@@ -424,7 +432,7 @@ bool DocumentChildHandler::data(Variant &data)
 		createPath(fieldIdx, path, parent);
 
 		// Then create the primitive element
-		parent->createChildDocumentPrimitive(data);
+		parent->createChildDocumentPrimitive(text);
 		return true;
 	}
 
@@ -434,10 +442,10 @@ bool DocumentChildHandler::data(Variant &data)
 	if (defaultFields.empty()) {
 		logger().error("Got data, but structure \"" + name() +
 		                   "\" does not have any primitive field",
-		               data);
+		               text);
 	} else {
 		logger().error("Could not read data with any of the possible fields:",
-		               data);
+		               text);
 		size_t f = 0;
 		for (auto field : defaultFields) {
 			logger().note(std::string("Field ") +
@@ -471,4 +479,4 @@ namespace RttiTypes {
 const Rtti DocumentField = RttiBuilder<ousia::parser_stack::DocumentField>(
                                "DocumentField").parent(&Node);
 }
-}
\ No newline at end of file
+}
diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp
index 862081c..dda7d8b 100644
--- a/src/core/parser/stack/DocumentHandler.hpp
+++ b/src/core/parser/stack/DocumentHandler.hpp
@@ -167,7 +167,7 @@ public:
 
 	bool start(Variant::mapType &args) override;
 	void end() override;
-	bool data(Variant &data) override;
+	bool data(TokenizedData &data) override;
 
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
 
@@ -213,4 +213,4 @@ extern const Rtti DocumentField;
 }
 }
 
-#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */
\ No newline at end of file
+#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
index bf5d4ea..3d413e8 100644
--- a/src/core/parser/stack/Handler.cpp
+++ b/src/core/parser/stack/Handler.cpp
@@ -18,6 +18,7 @@
 
 #include <core/common/Exceptions.hpp>
 #include <core/common/Logger.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 #include <core/parser/ParserContext.hpp>
 
 #include "Callbacks.hpp"
@@ -130,7 +131,7 @@ bool EmptyHandler::annotationEnd(const Variant &className,
 	return true;
 }
 
-bool EmptyHandler::data(Variant &data)
+bool EmptyHandler::data(TokenizedData &data)
 {
 	// Support any data
 	return true;
@@ -184,10 +185,13 @@ bool StaticHandler::annotationEnd(const Variant &className,
 	return false;
 }
 
-bool StaticHandler::data(Variant &data)
+bool StaticHandler::data(TokenizedData &data)
 {
-	logger().error("Did not expect any data here", data);
-	return false;
+	if (data.text(WhitespaceMode::TRIM) != nullptr) {
+		logger().error("Did not expect any data here", data);
+		return false;
+	}
+	return true;
 }
 
 /* Class StaticFieldHandler */
@@ -227,12 +231,19 @@ void StaticFieldHandler::end()
 	}
 }
 
-bool StaticFieldHandler::data(Variant &data)
+bool StaticFieldHandler::data(TokenizedData &data)
 {
+	Variant text = data.text(WhitespaceMode::TRIM);
+	if (text == nullptr) {
+		// Providing no data here is ok as long as the "doHandle" callback
+		// function has already been called
+		return handled;
+	}
+
 	// Call the doHandle function if this has not been done before
 	if (!handled) {
 		handled = true;
-		doHandle(data, args);
+		doHandle(text, args);
 		return true;
 	}
 
@@ -240,7 +251,7 @@ bool StaticFieldHandler::data(Variant &data)
 	logger().error(
 	    std::string("Found data, but the corresponding argument \"") + argName +
 	        std::string("\" was already specified"),
-	    data);
+	    text);
 
 	// Print the location at which the attribute was originally specified
 	auto it = args.find(argName);
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 7cda7a4..929466d 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -31,6 +31,7 @@ namespace ousia {
 class ParserScope;
 class ParserContext;
 class Logger;
+class TokenizedData;
 
 namespace parser_stack {
 
@@ -158,40 +159,63 @@ protected:
 	 */
 	const std::string &name() const;
 
-public:
-	/**
-	 * Virtual destructor.
-	 */
-	virtual ~Handler();
-
 	/**
 	 * Calls the corresponding function in the Callbacks instance. Sets the
 	 * whitespace mode that specifies how string data should be processed. The
 	 * calls to this function are placed on a stack by the underlying Stack
-	 * class.
+	 * class. This function should be called from the "fieldStart" callback and
+	 * the "start" callback. If no whitespace mode is pushed in the "start"
+	 * method the whitespace mode "TRIM" is implicitly assumed.
 	 *
 	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
 	 * PRESERVE, TRIM or COLLAPSE.
 	 */
-	void setWhitespaceMode(WhitespaceMode whitespaceMode);
+	void pushWhitespaceMode(WhitespaceMode whitespaceMode);
 
 	/**
-	 * Calls the corresponding function in the Callbacks instance.
-	 * Registers the given token as token that should be reported to the handler
-	 * using the "token" function.
-	 *
-	 * @param token is the token string that should be reported.
+	 * Pops a previously pushed whitespace mode. Calls to this function should
+	 * occur in the "end" callback and the "fieldEnd" callback. This function
+	 * can only undo pushs that were performed by the pushWhitespaceMode()
+	 * method of the same handler.
 	 */
-	void registerToken(const std::string &token);
+	void popWhitespaceMode();
 
 	/**
-	 * Calls the corresponding function in the Callbacks instance.
-	 * Unregisters the given token, it will no longer be reported to the handler
-	 * using the "token" function.
+	 * Calls the corresponding function in the Callbacks instance. Sets the
+	 * whitespace mode that specifies how string data should be processed. The
+	 * calls to this function are placed on a stack by the underlying Stack
+	 * class. This function should be called from the "fieldStart" callback and
+	 * the "start" callback. If no whitespace mode is pushed in the "start"
+	 * method the whitespace mode "TRIM" is implicitly assumed.
 	 *
-	 * @param token is the token string that should be unregistered.
+	 * @param tokens is a list of tokens that should be reported to this handler
+	 * instance via the "token" method.
 	 */
-	void unregisterToken(const std::string &token);
+	void pushTokens(const std::vector<std::string> &tokens);
+
+	/**
+	 * Pops a previously pushed whitespace mode. Calls to this function should
+	 * occur in the "end" callback and the "fieldEnd" callback. This function
+	 * can only undo pushs that were performed by the pushWhitespaceMode()
+	 * method of the same handler.
+	 */
+	void popWhitespaceMode();
+
+
+	/**
+	 * Calls the corresponding function in the Callbacks instance. This method
+	 * registers the given tokens as tokens that are generally available, tokens
+	 * must be explicitly enabled using the "pushTokens" and "popTokens" method.
+	 * Tokens that have not been registered are not guaranteed to be reported,
+	 * even though they are 
+	 */
+	void registerTokens(const std::vector<std::string> &tokens);
+
+public:
+	/**
+	 * Virtual destructor.
+	 */
+	virtual ~Handler();
 
 	/**
 	 * Returns the command name for which the handler was created.
@@ -299,11 +323,11 @@ public:
 	 * Handler instance. Should return true if the data could be handled, false
 	 * otherwise.
 	 *
-	 * @param data is a string variant containing the character data and its
-	 * location.
+	 * @param data is an instance of TokenizedData containing the segmented
+	 * character data and its location.
 	 * @return true if the data could be handled, false otherwise.
 	 */
-	virtual bool data(Variant &data) = 0;
+	virtual bool data(TokenizedData &data) = 0;
 };
 
 /**
@@ -333,7 +357,7 @@ public:
 	                     Variant::mapType &args) override;
 	bool annotationEnd(const Variant &className,
 	                   const Variant &elementName) override;
-	bool data(Variant &data) override;
+	bool data(TokenizedData &data) override;
 
 	/**
 	 * Creates an instance of the EmptyHandler class.
@@ -359,7 +383,7 @@ public:
 	                     Variant::mapType &args) override;
 	bool annotationEnd(const Variant &className,
 	                   const Variant &elementName) override;
-	bool data(Variant &data) override;
+	bool data(TokenizedData &data) override;
 };
 
 /**
@@ -412,7 +436,7 @@ protected:
 public:
 	bool start(Variant::mapType &args) override;
 	void end() override;
-	bool data(Variant &data) override;
+	bool data(TokenizedData &data) override;
 };
 }
 }
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index 5b67248..309c9a0 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -19,6 +19,7 @@
 #include <core/common/Logger.hpp>
 #include <core/common/Utils.hpp>
 #include <core/common/Exceptions.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 #include <core/parser/ParserScope.hpp>
 #include <core/parser/ParserContext.hpp>
 
@@ -413,16 +414,24 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 	}
 }
 
-void Stack::data(const Variant &data)
+void Stack::data(TokenizedData data)
 {
-	// End handlers that already had a default field and are currently not
-	// active.
-	endOverdueHandlers();
+	// TODO: Rewrite this function for token handling
+	// TODO: This loop needs to be refactored out
+	while (!data.atEnd()) {
+		// End handlers that already had a default field and are currently not
+		// active.
+		endOverdueHandlers();
 
-	while (true) {
-		// Check whether there is any command the data can be sent to
+		const bool hasNonWhitespaceText = data.hasNonWhitespaceText();
+
+		// Check whether there is any command the data can be sent to -- if not,
+		// make sure the data actually is data
 		if (stack.empty()) {
-			throw LoggableException("No command here to receive data.", data);
+			if (hasNonWhitespaceText) {
+				throw LoggableException("No command here to receive data.", data);
+			}
+			return;
 		}
 
 		// Fetch the current command handler information
@@ -440,7 +449,10 @@ void Stack::data(const Variant &data)
 			// If the "hadDefaultField" flag is set, we already issued an error
 			// message
 			if (!info.hadDefaultField) {
-				logger().error("Did not expect any data here", data);
+				if (hasNonWhitespaceText) {
+					logger().error("Did not expect any data here", data);
+				}
+				return;
 			}
 		}
 
@@ -454,8 +466,16 @@ void Stack::data(const Variant &data)
 			// Pass the data to the current Handler instance
 			bool valid = false;
 			try {
-				Variant dataCopy = data;
-				valid = info.handler->data(dataCopy);
+				// Create a fork of the TokenizedData and let the handler work
+				// on it
+				TokenizedData dataFork = data;
+				valid = info.handler->data(dataFork);
+
+				// If the data was validly handled by the handler, commit the
+				// change
+				if (valid) {
+					data = dataFork;
+				}
 			}
 			catch (LoggableException ex) {
 				loggerFork.log(ex);
@@ -482,6 +502,19 @@ void Stack::data(const Variant &data)
 	}
 }
 
+void Stack::data(const Variant &stringData)
+{
+	// Fetch the SourceLocation of the given stringData variant
+	SourceLocation loc = stringData.getLocation();
+
+	// Create a TokenizedData instance and feed the given string data into it
+	TokenizedData tokenizedData(loc.getSourceId());
+	tokenizedData.append(stringData.asString(), loc.getStart());
+
+	// Call the actual "data" method
+	data(tokenizedData);
+}
+
 void Stack::fieldStart(bool isDefault)
 {
 	// Make sure the current handler stack is not empty
@@ -584,4 +617,4 @@ void Stack::token(Variant token)
 	// TODO
 }
 }
-}
\ No newline at end of file
+}
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
index b67ce82..cd29b28 100644
--- a/src/core/parser/stack/Stack.hpp
+++ b/src/core/parser/stack/Stack.hpp
@@ -44,6 +44,7 @@ namespace ousia {
 // Forward declarations
 class ParserContext;
 class Logger;
+class TokenizedData;
 
 namespace parser_stack {
 
@@ -292,13 +293,24 @@ public:
 	void command(const Variant &name, const Variant::mapType &args);
 
 	/**
-	 * Function that shuold be called whenever character data is found in the
+	 * Function that should be called whenever character data is found in the
 	 * input stream. May only be called if the currently is a command on the
 	 * stack.
 	 *
-	 * @param data is a string variant containing the data that has been found.
+	 * @param data is a TokenizedData instance containing the pre-segmented data
+	 * that should be read.
+	 */
+	void data(TokenizedData data);
+
+	/**
+	 * Function that shuold be called whenever character data is found in the
+	 * input stream. The given string variant is converted into a TokenizedData
+	 * instance internally.
+	 *
+	 * @param stringData is a string variant containing the data that has been
+	 * found.
 	 */
-	void data(const Variant &data);
+	void data(const Variant &stringData);
 
 	/**
 	 * Function that should be called whenever a new field starts. Fields of the
diff --git a/src/core/parser/utils/SourceOffsetVector.hpp b/src/core/parser/utils/SourceOffsetVector.hpp
index d15055a..aaebe7d 100644
--- a/src/core/parser/utils/SourceOffsetVector.hpp
+++ b/src/core/parser/utils/SourceOffsetVector.hpp
@@ -127,7 +127,7 @@ public:
 	 * read.
 	 * @return a pair containing start and end source offset.
 	 */
-	std::pair<SourceOffset, SourceOffset> loadOffset(size_t idx)
+	std::pair<SourceOffset, SourceOffset> loadOffset(size_t idx) const
 	{
 		// Special treatment for the last character
 		const size_t count = lens.size();
@@ -157,7 +157,31 @@ public:
 	/**
 	 * Returns the number of characters for which offsets are stored.
 	 */
-	size_t size() { return lens.size(); }
+	size_t size() const { return lens.size(); }
+
+	/**
+	 * Trims the length of the TokenizedData instance to the given length.
+	 * Removes all token matches that lie within the trimmed region.
+	 *
+	 * @param length is the number of characters to which the TokenizedData
+	 * instance should be trimmed.
+	 */
+	void trim(size_t length) {
+		if (length < size()) {
+			lens.resize(length);
+			offsets.resize((length >> LOG2_OFFSET_INTERVAL) + 1);
+		}
+	}
+
+	/**
+	 * Resets the SourceOffsetVector to the state it had when it was
+	 * constructed.
+	 */
+	void clear() {
+		lens.clear();
+		offsets.clear();
+		lastEnd = 0;
+	}
 };
 }
 
diff --git a/src/core/parser/utils/Token.cpp b/src/core/parser/utils/Token.cpp
deleted file mode 100644
index 8bcdbb5..0000000
--- a/src/core/parser/utils/Token.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "Token.hpp"
-
-namespace ousia {
-// Stub to make sure Tokens.hpp is valid
-}
-
diff --git a/src/core/parser/utils/Token.hpp b/src/core/parser/utils/Token.hpp
deleted file mode 100644
index f907450..0000000
--- a/src/core/parser/utils/Token.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file Token.hpp
- *
- * Definition of the TokenId id and constants for some special tokens.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_TOKEN_HPP_
-#define _OUSIA_TOKEN_HPP_
-
-#include <cstdint>
-#include <limits>
-#include <string>
-
-#include <core/common/Location.hpp>
-
-namespace ousia {
-
-/**
- * The TokenId is used to give each token id a unique id.
- */
-using TokenId = uint32_t;
-
-/**
- * Type used for storing token lengths.
- */
-using TokenLength = uint16_t;
-
-/**
- * Namespace containing constants for TokenId instances with special meaning.
- */
-namespace Tokens {
-/**
- * Token which is not a token.
- */
-constexpr TokenId Empty = std::numeric_limits<TokenId>::max();
-
-/**
- * Token which represents data (represented as TokenizedData).
- */
-constexpr TokenId Data = std::numeric_limits<TokenId>::max() - 1;
-
-/**
- * Token which represents a newline token.
- */
-constexpr TokenId Newline = std::numeric_limits<TokenId>::max() - 2;
-
-/**
- * Token which represents a paragraph token -- issued if two consecutive
- * newlines occur with optionally any amout of whitespace between them.
- */
-constexpr TokenId Paragraph = std::numeric_limits<TokenId>::max() - 3;
-
-/**
- * Token which represents an indentation token -- issued if the indentation of
- * this line is larget than the indentation of the previous line.
- */
-constexpr TokenId Indentation = std::numeric_limits<TokenId>::max() - 4;
-
-/**
- * Maximum token id to be used. Tokens allocated for users should not surpass
- * this value.
- */
-constexpr TokenId MaxTokenId = std::numeric_limits<TokenId>::max() - 255;
-}
-
-/**
- * The Token structure describes a token discovered by the Tokenizer or read
- * from the TokenizedData struct.
- */
-struct Token {
-	/**
-	 * Id of the id of this token.
-	 */
-	TokenId id;
-
-	/**
-	 * String that was matched.
-	 */
-	std::string content;
-
-	/**
-	 * Location from which the string was extracted.
-	 */
-	SourceLocation location;
-
-	/**
-	 * Default constructor.
-	 */
-	Token() : id(Tokens::Empty) {}
-
-	/**
-	 * Constructor of the Token struct.
-	 *
-	 * @param id represents the token id.
-	 * @param content is the string content that has been extracted.
-	 * @param location is the location of the extracted string content in the
-	 * source file.
-	 */
-	Token(TokenId id, const std::string &content, SourceLocation location)
-	    : id(id), content(content), location(location)
-	{
-	}
-
-	/**
-	 * Constructor of the Token struct, only initializes the token id
-	 *
-	 * @param id is the id corresponding to the id of the token.
-	 */
-	Token(TokenId id) : id(id) {}
-
-	/**
-	 * The getLocation function allows the tokens to be directly passed as
-	 * parameter to Logger or LoggableException instances.
-	 *
-	 * @return a reference at the location field
-	 */
-	const SourceLocation &getLocation() const { return location; }
-};
-}
-
-#endif /* _OUSIA_TOKENS_HPP_ */
-
diff --git a/src/core/parser/utils/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp
index 80cc945..a45d3ff 100644
--- a/src/core/parser/utils/TokenTrie.cpp
+++ b/src/core/parser/utils/TokenTrie.cpp
@@ -22,12 +22,12 @@ namespace ousia {
 
 /* Class DynamicTokenTree::Node */
 
-TokenTrie::Node::Node() : type(Tokens::Empty) {}
+TokenTrie::Node::Node() : id(Tokens::Empty) {}
 
 /* Class DynamicTokenTree */
 
 bool TokenTrie::registerToken(const std::string &token,
-                              TokenId type) noexcept
+                              TokenId id) noexcept
 {
 	// Abort if the token is empty -- this would taint the root node
 	if (token.empty()) {
@@ -48,12 +48,12 @@ bool TokenTrie::registerToken(const std::string &token,
 	}
 
 	// If the resulting node already has a type set, we're screwed.
-	if (node->type != Tokens::Empty) {
+	if (node->id != Tokens::Empty) {
 		return false;
 	}
 
 	// Otherwise just set the type to the given type.
-	node->type = type;
+	node->id = id;
 	return true;
 }
 
@@ -78,7 +78,7 @@ bool TokenTrie::unregisterToken(const std::string &token) noexcept
 
 		// Reset the subtree handler if this node has another type
 		node = it->second.get();
-		if ((node->type != Tokens::Empty || node->children.size() > 1) &&
+		if ((node->id != Tokens::Empty || node->children.size() > 1) &&
 		    (i + 1 != token.size())) {
 			subtreeRoot = node;
 			subtreeKey = token[i + 1];
@@ -86,14 +86,14 @@ bool TokenTrie::unregisterToken(const std::string &token) noexcept
 	}
 
 	// If the node type is already Tokens::Empty, we cannot do anything here
-	if (node->type == Tokens::Empty) {
+	if (node->id == Tokens::Empty) {
 		return false;
 	}
 
 	// If the target node has children, we cannot delete the subtree. Set the
 	// type to Tokens::Empty instead
 	if (!node->children.empty()) {
-		node->type = Tokens::Empty;
+		node->id = Tokens::Empty;
 		return true;
 	}
 
@@ -113,7 +113,7 @@ TokenId TokenTrie::hasToken(const std::string &token) const noexcept
 		}
 		node = it->second.get();
 	}
-	return node->type;
+	return node->id;
 }
 }
 
diff --git a/src/core/parser/utils/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp
index b2d1539..c470acc 100644
--- a/src/core/parser/utils/TokenTrie.hpp
+++ b/src/core/parser/utils/TokenTrie.hpp
@@ -33,7 +33,7 @@
 #include <limits>
 #include <unordered_map>
 
-#include "Token.hpp"
+#include <core/common/Token.hpp>
 
 namespace ousia {
 
@@ -75,10 +75,9 @@ public:
 		ChildMap children;
 
 		/**
-		 * Reference at the corresponding token descriptor. Set to nullptr if
-		 * no token is attached to this node.
+		 * Id of the token represented by this node.
 		 */
-		TokenId type;
+		TokenId id;
 
 		/**
 		 * Default constructor, initializes the descriptor with nullptr.
@@ -99,10 +98,10 @@ public:
 	 *
 	 * @param token is the character sequence that should be registered as
 	 * token.
-	 * @param type is the descriptor that should be set for this token.
+	 * @param id is the descriptor that should be set for this token.
 	 * @return true if the operation is successful, false otherwise.
 	 */
-	bool registerToken(const std::string &token, TokenId type) noexcept;
+	bool registerToken(const std::string &token, TokenId id) noexcept;
 
 	/**
 	 * Unregisters the token from the token tree. Returns true if the token was
diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp
index fc7bfaf..0ec56af 100644
--- a/src/core/parser/utils/TokenizedData.cpp
+++ b/src/core/parser/utils/TokenizedData.cpp
@@ -110,19 +110,19 @@ private:
 	std::vector<char> buf;
 
 	/**
-	 * Vector containing all token marks.
+	 * Vector storing all the character offsets efficiently.
 	 */
-	std::vector<TokenMark> marks;
+	SourceOffsetVector offsets;
 
 	/**
-	 * Vector storing all the character offsets efficiently.
+	 * Vector containing all token marks.
 	 */
-	SourceOffsetVector offsets;
+	mutable std::vector<TokenMark> marks;
 
 	/**
 	 * Flag indicating whether the internal "marks" vector is sorted.
 	 */
-	bool sorted;
+	mutable bool sorted;
 
 public:
 	/**
@@ -150,9 +150,12 @@ public:
 		// Extend the text regions, interpolate the source position (this may
 		// yield incorrect results)
 		const size_t size = buf.size();
-		for (SourceOffset offs = offsStart; offs < offsStart + data.size();
-		     offs++) {
-			offsets.storeOffset(offs, offs + 1);
+		for (size_t i = 0; i < data.size(); i++) {
+			if (offsStart != InvalidSourceOffset) {
+				offsets.storeOffset(offsStart + i, offsStart + i + 1);
+			} else {
+				offsets.storeOffset(InvalidSourceOffset, InvalidSourceOffset);
+			}
 		}
 
 		return size;
@@ -213,7 +216,7 @@ public:
 	 * available.
 	 */
 	bool next(Token &token, WhitespaceMode mode,
-	          const std::unordered_set<TokenId> &tokens, size_t &cursor)
+	          const std::unordered_set<TokenId> &tokens, size_t &cursor) const
 	{
 		// Sort the "marks" vector if it has not been sorted yet.
 		if (!sorted) {
@@ -222,10 +225,11 @@ public:
 		}
 
 		// Fetch the next larger TokenMark instance, make sure the token is in
-		// the "enabled" list
+		// the "enabled" list and within the buffer range
 		auto it =
 		    std::lower_bound(marks.begin(), marks.end(), TokenMark(cursor));
-		while (it != marks.end() && tokens.count(it->id) == 0) {
+		while (it != marks.end() && (tokens.count(it->id) == 0 ||
+		                             it->bufStart + it->len > buf.size())) {
 			it++;
 		}
 
@@ -303,12 +307,59 @@ public:
 		return false;
 	}
 
+	/**
+	 * Resets the TokenizedDataImpl instance to the state it had when it was
+	 * constructred.
+	 */
+	void clear()
+	{
+		buf.clear();
+		marks.clear();
+		offsets.clear();
+		sorted = true;
+	}
+
+	/**
+	 * Trims the length of the TokenizedDataImpl instance to the given length.
+	 *
+	 * @param length is the number of characters to which the TokenizedData
+	 * instance should be trimmed.
+	 */
+	void trim(size_t length)
+	{
+		if (length < size()) {
+			buf.resize(length);
+			offsets.trim(length);
+		}
+	}
+
 	/**
 	 * Returns the current size of the internal buffer.
 	 *
 	 * @return the size of the internal character buffer.
 	 */
-	size_t getSize() { return buf.size(); }
+	size_t size() const { return buf.size(); }
+
+	/**
+	 * Returns true if no data is in the data buffer.
+	 *
+	 * @return true if the "buf" instance has no data.
+	 */
+	bool empty() const { return buf.empty(); }
+
+	/**
+	 * Returns the current location of all data in the buffer.
+	 *
+	 * @return the location of the entire data represented by this instance.
+	 */
+	SourceLocation getLocation() const
+	{
+		if (empty()) {
+			return SourceLocation{sourceId};
+		}
+		return SourceLocation{sourceId, offsets.loadOffset(0).first,
+		                      offsets.loadOffset(size()).second};
+	}
 };
 
 /* Class TokenizedData */
@@ -335,7 +386,7 @@ size_t TokenizedData::append(char c, SourceOffset offsStart,
 
 void TokenizedData::mark(TokenId id, TokenLength len)
 {
-	impl->mark(id, impl->getSize() - len, len);
+	impl->mark(id, impl->size() - len, len);
 }
 
 void TokenizedData::mark(TokenId id, size_t bufStart, TokenLength len)
@@ -343,23 +394,67 @@ void TokenizedData::mark(TokenId id, size_t bufStart, TokenLength len)
 	impl->mark(id, bufStart, len);
 }
 
-bool TokenizedData::next(Token &token, WhitespaceMode mode)
+void TokenizedData::clear()
 {
-	return impl->next(token, mode, tokens, cursor);
+	impl->clear();
+	tokens.clear();
+	cursor = 0;
 }
 
-bool TokenizedData::text(Token &token, WhitespaceMode mode)
+void TokenizedData::trim(size_t length) { impl->trim(length); }
+
+size_t TokenizedData::size() const { return impl->size(); }
+
+bool TokenizedData::empty() const { return impl->empty(); }
+
+SourceLocation TokenizedData::getLocation() const
+{
+	return impl->getLocation();
+}
+
+TokenizedDataReader reader() const
+{
+	return TokenizedDataReader(impl, std::unordered_set<TokenId>{}, 0, 0);
+}
+
+/* Class TokenizedDataReader */
+
+TokenizedDataReaderFork TokenizedDataReader::fork()
+{
+	return TokenizedDataReaderFork(*this, impl, tokens, readCursor, peekCursor);
+}
+
+bool TokenizedDataReader::atEnd() const { return readCursor >= size(); }
+
+bool TokenizedData::read(Token &token, const TokenSet &tokens,
+                         WhitespaceMode mode)
+{
+	peekCursor = readCursor;
+	return impl->next(token, mode, tokens, readCursor);
+}
+
+bool TokenizedData::peek(Token &token, const TokenSet &tokens,
+                         WhitespaceMode mode)
+{
+	return impl->next(token, mode, tokens, peekCursor);
+}
+
+Variant TokenizedData::text(WhitespaceMode mode)
 {
 	// Copy the current cursor position to not update the actual cursor position
 	// if the operation was not successful
 	size_t cursorCopy = cursor;
+	Token token;
 	if (!impl->next(token, mode, tokens, cursorCopy) ||
 	    token.id != Tokens::Data) {
-		return false;
+		return Variant{nullptr};
 	}
 
-	// There is indeed a text token, update the internal cursor position
+	// There is indeed a text token, update the internal cursor position and
+	// return the token as variant.
 	cursor = cursorCopy;
-	return true;
+	Variant res = Variant::fromString(token.content);
+	res.setLocation(token.getLocation());
+	return res;
 }
 }
diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp
index 38125c4..85b80ae 100644
--- a/src/core/parser/utils/TokenizedData.hpp
+++ b/src/core/parser/utils/TokenizedData.hpp
@@ -36,42 +36,29 @@
 #include <unordered_set>
 
 #include <core/common/Location.hpp>
+#include <core/common/Variant.hpp>
 #include <core/common/Whitespace.hpp>
-
-#include "Token.hpp"
+#include <core/common/Token.hpp>
 
 namespace ousia {
 
 // Forward declaration
 class TokenizedDataImpl;
+class TokenizedDataReader;
+class TokenizedDataReaderFork;
 
 /**
  * The TokenizedData class stores data extracted from a user defined document.
- * As users are capable of defining their own tokens and these are only valid
- * in certain scopes TokenizedData allows to divide the stored data into chunks
- * separated by tokens.
+ * The data stored in TokenizedData
  */
 class TokenizedData {
 private:
 	/**
-	 * Shared pointer pointing at the internal data. This data is shared when
-	 * copying TokenizedData instances, which corresponds to forking a
-	 * TokenizedData instance.
+	 * Shared pointer pointing at the internal data. This data is shared with
+	 * all the TokenizedDataReader instances.
 	 */
 	std::shared_ptr<TokenizedDataImpl> impl;
 
-	/**
-	 * Contains all currently enabled token ids.
-	 */
-	std::unordered_set<TokenId> tokens;
-
-	/**
-	 * Position from which the last element was read from the internal buffer.
-	 * This information is not shared with the other instances of TokenizedData
-	 * pointing at the same location.
-	 */
-	size_t cursor;
-
 public:
 	/**
 	 * Default constructor, creates a new instance of TokenizedData, sets the
@@ -136,25 +123,121 @@ public:
 	void mark(TokenId id, size_t bufStart, TokenLength len);
 
 	/**
-	 * Enables a single token id. Enabled tokens will no longer be returned as
-	 * text. Instead, when querying for the next token, TokenizedData will
-	 * return them as token and not as part of a Text token.
+	 * Resets the TokenizedData instance to the state it had when it was
+	 * constructred.
+	 */
+	void clear();
+
+	/**
+	 * Trims the length of the TokenizedData instance to the given length. Note
+	 * that this function does not remove any token matches for performance
+	 * reasons, it merely renders them incaccessible. Appending new data after
+	 * calling trim will make the token marks accessible again. Thus this method
+	 * should be the last function called to modify the data buffer and the
+	 * token marks.
 	 *
-	 * @param id is the TokenId of the token that should be enabled.
+	 * @param length is the number of characters to which the TokenizedData
+	 * instance should be trimmed.
 	 */
-	void enableToken(TokenId id) { tokens.insert(id); }
+	void trim(size_t length);
 
 	/**
-	 * Enables a set of token ids. Enabled tokens will no longer be returned as
-	 * text. Instead, when querying for the next token, TokenizedData will
-	 * return them as token and not as part of a Text token.
+	 * Returns the number of characters currently represented by this
+	 * TokenizedData instance.
+	 */
+	size_t size() const;
+
+	/**
+	 * Returns true if the TokenizedData instance is empty, false otherwise.
 	 *
-	 * @param ids is the TokenId of the token that should be enabled.
+	 * @return true if not data is stored inside the TokenizedData instance.
 	 */
-	void enableToken(const std::unordered_set<TokenId> &ids)
-	{
-		tokens.insert(ids.begin(), ids.end());
-	}
+	bool empty() const;
+
+	/**
+	 * Returns the location of the entire TokenizedData instance.
+	 *
+	 * @return the location of the entire data represented by this instance.
+	 */
+	SourceLocation getLocation() const;
+
+	/**
+	 * Returns a TokenizedDataReader instance that can be used to access the
+	 * data.
+	 *
+	 * @return a new TokenizedDataReader instance pointing at the beginning of
+	 * the internal buffer.
+	 */
+	TokenizedDataReader reader() const;
+};
+
+/**
+ * The TokenizedDataReader
+ */
+class TokenizedDataReader {
+private:
+	friend TokenizedData;
+
+	/**
+	 * Shared pointer pointing at the internal data. This data is shared with
+	 * all the TokenizedDataReader instances.
+	 */
+	std::shared_ptr<const TokenizedDataImpl> impl;
+
+	/**
+	 * Position from which the last element was read from the internal buffer.
+	 */
+	size_t readCursor;
+
+	/**
+	 * Position from which the last element was peeked from the internal buffer.
+	 */
+	size_t peekCursor;
+
+	/**
+	 * Private constructor of TokenizedDataReader, taking a reference to the
+	 * internal TokenizedDataImpl structure storing the data that is accessed by
+	 * the reader.
+	 *
+	 * @param impl is the TokenizedDataImpl instance that holds the actual data.
+	 * @param readCursor is the cursor position from which tokens and text are
+	 * read.
+	 * @param peekCursor is the cursor position from which tokens and text are
+	 * peeked.
+	 */
+	TokenizedDataReader(std::shared_ptr<TokenizedDataImpl> impl,
+	                    size_t readCursor, size_t peekCursor);
+
+public:
+	/**
+	 * Returns a new TokenizedDataReaderFork from which tokens and text can be
+	 * read without advancing this reader instance.
+	 */
+	TokenizedDataReaderFork fork();
+
+	/**
+	 * Returns true if this TokenizedData instance is at the end.
+	 *
+	 * @return true if the end of the TokenizedData instance has been reached.
+	 */
+	bool atEnd() const;
+
+	/**
+	 * Stores the next token in the given token reference, returns true if the
+	 * operation was successful, false if there are no more tokens. Advances the
+	 * internal cursor and re
+	 *
+	 * @param token is an output parameter into which the read token will be
+	 * stored. The TokenId is set to Tokens::Empty if there are no more tokens.
+	 * @param tokens is the set of token identifers, representing the currently
+	 * enabled tokens.
+	 * @param mode is the whitespace mode that should be used when a text token
+	 * is returned.
+	 * @return true if the operation was successful and there is a next token,
+	 * false if there are no more tokens.
+	 */
+	bool read(Token &token, const TokenSet &tokens = TokenSet{},
+	          WhitespaceMode mode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * Stores the next token in the given token reference, returns true if the
@@ -162,12 +245,26 @@ public:
 	 *
 	 * @param token is an output parameter into which the read token will be
 	 * stored. The TokenId is set to Tokens::Empty if there are no more tokens.
+	 * @param tokens is the set of token identifers, representing the currently
+	 * enabled tokens.
 	 * @param mode is the whitespace mode that should be used when a text token
 	 * is returned.
 	 * @return true if the operation was successful and there is a next token,
 	 * false if there are no more tokens.
 	 */
-	bool next(Token &token, WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+	bool peek(Token &token, const TokenSet &tokens = TokenSet{},
+	          WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+
+	/**
+	 * Consumes the peeked tokens, the read cursor will now be at the position
+	 * of the peek cursor.
+	 */
+	void consumePeek() { readCursor = peekCursor; }
+
+	/**
+	 * Resets the peek cursor to the position of the read cursor.
+	 */
+	void resetPeek() { peekCursor = readCursor; }
 
 	/**
 	 * Stores the next text token in the given token reference, returns true if
@@ -178,12 +275,53 @@ public:
 	 * stored. The TokenId is set to Tokens::Empty if there are no more tokens.
 	 * @param mode is the whitespace mode that should be used when a text token
 	 * is returned.
-	 * @return true if the operation was successful and there is a next token,
-	 * false if there are no more tokens.
+	 * @return a string variant with the data if there is any data or a nullptr
+	 * variant if there is no text.
 	 */
-	bool text(Token &token, WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+	Variant text(WhitespaceMode mode = WhitespaceMode::COLLAPSE);
 };
+
+/**
+ * The TokenizedDataReaderFork class is created when forking a
+ * TokenizedDataReader
+ */
+class TokenizedDataReaderFork : public TokenizedDataReader {
+private:
+	friend TokenizedDataReader;
+
+	/**
+	 * Reference pointing at the parent TokenizedDataReader to which changes may
+	 * be commited.
+	 */
+	TokenizedDataReader &parent;
+
+	/**
+	 * Private constructor of TokenizedDataReaderFork, taking a reference to the
+	 * internal TokenizedDataImpl structure storing the data that is accessed by
+	 * the reader and a reference at the parent TokenizedDataReader.
+	 *
+	 * @param parent is the TokenizedDataReader instance to which the current
+	 * read/peek progress may be commited.
+	 * @param impl is the TokenizedDataImpl instance that holds the actual data.
+	 * @param readCursor is the cursor position from which tokens and text are
+	 * read.
+	 * @param peekCursor is the cursor position from which tokens and text are
+	 * peeked.
+	 */
+	TokenizedDataReaderFork(TokenizedDataReader &parent,
+	                        std::shared_ptr<TokenizedDataImpl> impl,
+	                        size_t readCursor, size_t peekCursor)
+	    : TokenizedDataReader(impl, readCursor, peekCursor), parent(parent)
+	{
+	}
+
+public:
+	/**
+	 * Commits the read/peek progress to the underlying parent.
+	 */
+	void commit() { parent = *this; }
+}
 }
 
-#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */
+#endif /* _OUSIA_TOKENIZED_DATA_HPP_ */
 
diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp
index 2e0ac13..51787cd 100644
--- a/src/core/parser/utils/Tokenizer.cpp
+++ b/src/core/parser/utils/Tokenizer.cpp
@@ -22,8 +22,8 @@
 #include <core/common/CharReader.hpp>
 #include <core/common/Exceptions.hpp>
 #include <core/common/Utils.hpp>
-#include <core/common/WhitespaceHandler.hpp>
 
+#include "TokenizedData.hpp"
 #include "Tokenizer.hpp"
 
 namespace ousia {
@@ -42,26 +42,33 @@ struct TokenMatch {
 	Token token;
 
 	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
+	 * Position at which this token starts in the TokenizedData instance.
 	 */
-	size_t textLength;
+	size_t dataStartOffset;
 
 	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
+	 * Set to true if the matched token is a primary token.
 	 */
-	size_t textEnd;
+	bool primary;
 
 	/**
 	 * Constructor of the TokenMatch class.
 	 */
-	TokenMatch() : textLength(0), textEnd(0) {}
+	TokenMatch() : dataStartOffset(0), primary(false) {}
 
 	/**
 	 * Returns true if this TokenMatch instance actually represents a match.
+	 *
+	 * @return true if the TokenMatch actually has a match.
+	 */
+	bool hasMatch() const { return token.id != Tokens::Empty; }
+
+	/**
+	 * Returns the length of the matched token.
+	 *
+	 * @return the length of the token string.
 	 */
-	bool hasMatch() { return token.id != Tokens::Empty; }
+	size_t size() const { return token.content.size(); }
 };
 
 /* Internal class TokenLookup */
@@ -83,36 +90,28 @@ private:
 	size_t start;
 
 	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
+	 * Position at which this token starts in the TokenizedData instance.
 	 */
-	size_t textLength;
-
-	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
-	 */
-	size_t textEnd;
+	size_t dataStartOffset;
 
 public:
 	/**
 	 * Constructor of the TokenLookup class.
 	 *
 	 * @param node is the current node.
-	 * @param start is the start position.
-	 * @param textLength is the text buffer length of the previous text token.
-	 * @param textEnd is the current end location of the previous text token.
+	 * @param start is the start position in the source file.
+	 * @param dataStartOffset is the current length of the TokenizedData buffer.
 	 */
-	TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength,
-	            size_t textEnd)
-	    : node(node), start(start), textLength(textLength), textEnd(textEnd)
+	TokenLookup(const TokenTrie::Node *node, size_t start,
+	            size_t dataStartOffset)
+	    : node(node), start(start), dataStartOffset(dataStartOffset)
 	{
 	}
 
 	/**
 	 * Tries to extend the current path in the token trie with the given
-	 * character. If a complete token is matched, stores this match in the
-	 * tokens list (in case it is longer than any previous token).
+	 * character. If a complete token is matched, stores the match in the given
+	 * TokenMatch reference and returns true.
 	 *
 	 * @param c is the character that should be appended to the current prefix.
 	 * @param lookups is a list to which new TokeLookup instances are added --
@@ -123,73 +122,48 @@ public:
 	 * Tokenizer.
 	 * @param end is the end byte offset of the current character.
 	 * @param sourceId is the source if of this file.
+	 * @return true if a token was matched, false otherwise.
 	 */
-	void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
-	             const std::vector<std::string> &tokens, SourceOffset end,
-	             SourceId sourceId)
+	bool advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
+	             const std::vector<Tokenizer::TokenDescriptor> &tokens,
+	             SourceOffset end, SourceId sourceId)
 	{
-		// Check whether we can continue the current token path with the given
-		// character without visiting an already visited node
+		// Set to true once a token has been matched
+		bool res = false;
+
+		// Check whether we can continue the current token path, if not, abort
 		auto it = node->children.find(c);
 		if (it == node->children.end()) {
-			return;
+			return res;
 		}
 
 		// Check whether the new node represents a complete token a whether it
 		// is longer than the current token. If yes, replace the current token.
 		node = it->second.get();
-		if (node->type != Tokens::Empty) {
-			const std::string &str = tokens[node->type];
-			size_t len = str.size();
-			if (len > match.token.content.size()) {
-				match.token =
-				    Token{node->type, str, {sourceId, start, end}};
-				match.textLength = textLength;
-				match.textEnd = textEnd;
-			}
+		if (node->id != Tokens::Empty) {
+			const Tokenizer::TokenDescriptor &descr = tokens[node->id];
+			match.token = Token(node->id, descr.string,
+			                    SourceLocation(sourceId, start, end));
+			match.dataStartOffset = dataStartOffset;
+			match.primary = descr.primary;
+			res = true;
 		}
 
 		// If this state can possibly be advanced, store it in the states list.
 		if (!node->children.empty()) {
 			lookups.emplace_back(*this);
 		}
+		return res;
 	}
 };
-
-/**
- * Transforms the given token into a data token containing the extracted
- * text.
- *
- * @param handler is the WhitespaceHandler containing the collected data.
- * @param token is the output token to which the text should be written.
- * @param sourceId is the source id of the underlying file.
- */
-static void buildDataToken(const WhitespaceHandler &handler, TokenMatch &match,
-                           SourceId sourceId)
-{
-	if (match.hasMatch()) {
-		match.token.content =
-		    std::string{handler.textBuf.data(), match.textLength};
-		match.token.location =
-		    SourceLocation{sourceId, handler.textStart, match.textEnd};
-	} else {
-		match.token.content = handler.toString();
-		match.token.location =
-		    SourceLocation{sourceId, handler.textStart, handler.textEnd};
-	}
-	match.token.id = Tokens::Data;
-}
 }
 
 /* Class Tokenizer */
 
-Tokenizer::Tokenizer(WhitespaceMode whitespaceMode)
-    : whitespaceMode(whitespaceMode), nextTokenId(0)
-{
-}
+Tokenizer::Tokenizer() : nextTokenId(0) {}
 
-template <typename TextHandler, bool read>
-bool Tokenizer::next(CharReader &reader, Token &token)
+template <bool read>
+bool Tokenizer::next(CharReader &reader, Token &token, TokenizedData &data)
 {
 	// If we're in the read mode, reset the char reader peek position to the
 	// current read position
@@ -199,43 +173,68 @@ bool Tokenizer::next(CharReader &reader, Token &token)
 
 	// Prepare the lookups in the token trie
 	const TokenTrie::Node *root = trie.getRoot();
-	TokenMatch match;
+	TokenMatch bestMatch;
 	std::vector<TokenLookup> lookups;
 	std::vector<TokenLookup> nextLookups;
 
-	// Instantiate the text handler
-	TextHandler textHandler;
-
 	// Peek characters from the reader and try to advance the current token tree
 	// cursor
 	char c;
+	const size_t initialDataSize = data.size();
 	size_t charStart = reader.getPeekOffset();
 	const SourceId sourceId = reader.getSourceId();
 	while (reader.peek(c)) {
 		const size_t charEnd = reader.getPeekOffset();
-		const size_t textLength = textHandler.textBuf.size();
-		const size_t textEnd = textHandler.textEnd;
+		const size_t dataStartOffset = data.size();
 
 		// If we do not have a match yet, start a new lookup from the root
-		if (!match.hasMatch()) {
-			TokenLookup{root, charStart, textLength, textEnd}.advance(
-			    c, nextLookups, match, tokens, charEnd, sourceId);
+		if (!bestMatch.hasMatch()) {
+			lookups.emplace_back(root, charStart, dataStartOffset);
 		}
 
 		// Try to advance all other lookups with the new character
+		TokenMatch match;
 		for (TokenLookup &lookup : lookups) {
-			lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId);
+			// Continue if the current lookup
+			if (!lookup.advance(c, nextLookups, match, tokens, charEnd,
+			                    sourceId)) {
+				continue;
+			}
+
+			// If the matched token is primary, check whether it is better than
+			// the current best match, if yes, replace the best match. In any
+			// case just continue
+			if (match.primary) {
+				if (match.size() > bestMatch.size()) {
+					bestMatch = match;
+				}
+				continue;
+			}
+
+			// Otherwise -- if the matched token is a non-primary token (and no
+			// primary token has been found until now) -- mark the match in the
+			// TokenizedData
+			if (!bestMatch.hasMatch()) {
+				data.mark(match.token.id, data.size() - match.size() + 1,
+				          match.size());
+			}
 		}
 
 		// We have found a token and there are no more states to advance or the
 		// text handler has found something -- abort to return the new token
-		if (match.hasMatch()) {
-			if ((nextLookups.empty() || textHandler.hasText())) {
+		if (bestMatch.hasMatch()) {
+			if ((nextLookups.empty() || data.size() > initialDataSize)) {
 				break;
 			}
 		} else {
 			// Record all incomming characters
-			textHandler.append(c, charStart, charEnd);
+			data.append(c, charStart, charEnd);
+
+			// Special token processing
+			// TODO: Build a special state machine for this in another class
+			if (c == '\n') {
+				data.mark(Tokens::Newline, 1);
+			}
 		}
 
 		// Swap the lookups and the nextLookups list
@@ -246,60 +245,53 @@ bool Tokenizer::next(CharReader &reader, Token &token)
 		charStart = charEnd;
 	}
 
-	// If we found text, emit that text
-	if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) {
-		buildDataToken(textHandler, match, sourceId);
+	// If we found data, emit a corresponding data token
+	if (data.size() > initialDataSize &&
+	    (!bestMatch.hasMatch() ||
+	     bestMatch.dataStartOffset > initialDataSize)) {
+		// If we have a "bestMatch" wich starts after text data has started,
+		// trim the TokenizedData to this offset
+		if (bestMatch.dataStartOffset > initialDataSize) {
+			data.trim(bestMatch.dataStartOffset);
+		}
+
+		// Create a token containing the data location
+		bestMatch.token = Token{data.getLocation()};
 	}
 
 	// Move the read/peek cursor to the end of the token, abort if an error
 	// happens while doing so
-	if (match.hasMatch()) {
+	if (bestMatch.hasMatch()) {
 		// Make sure we have a valid location
-		if (match.token.location.getEnd() == InvalidSourceOffset) {
+		if (bestMatch.token.location.getEnd() == InvalidSourceOffset) {
 			throw OusiaException{"Token end position offset out of range"};
 		}
 
 		// Seek to the end of the current token
-		const size_t end = match.token.location.getEnd();
+		const size_t end = bestMatch.token.location.getEnd();
 		if (read) {
 			reader.seek(end);
 		} else {
 			reader.seekPeekCursor(end);
 		}
-		token = match.token;
+		token = bestMatch.token;
 	} else {
 		token = Token{};
 	}
-	return match.hasMatch();
+	return bestMatch.hasMatch();
 }
 
-bool Tokenizer::read(CharReader &reader, Token &token)
+bool Tokenizer::read(CharReader &reader, Token &token, TokenizedData &data)
 {
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingWhitespaceHandler, true>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingWhitespaceHandler, true>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingWhitespaceHandler, true>(reader, token);
-	}
-	return false;
+	return next<true>(reader, token, data);
 }
 
-bool Tokenizer::peek(CharReader &reader, Token &token)
+bool Tokenizer::peek(CharReader &reader, Token &token, TokenizedData &data)
 {
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingWhitespaceHandler, false>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingWhitespaceHandler, false>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingWhitespaceHandler, false>(reader, token);
-	}
-	return false;
+	return next<false>(reader, token, data);
 }
 
-TokenId Tokenizer::registerToken(const std::string &token)
+TokenId Tokenizer::registerToken(const std::string &token, bool primary)
 {
 	// Abort if an empty token should be registered
 	if (token.empty()) {
@@ -309,8 +301,8 @@ TokenId Tokenizer::registerToken(const std::string &token)
 	// Search for a new slot in the tokens list
 	TokenId type = Tokens::Empty;
 	for (size_t i = nextTokenId; i < tokens.size(); i++) {
-		if (tokens[i].empty()) {
-			tokens[i] = token;
+		if (!tokens[i].valid()) {
+			tokens[i] = TokenDescriptor(token, primary);
 			type = i;
 			break;
 		}
@@ -320,62 +312,47 @@ TokenId Tokenizer::registerToken(const std::string &token)
 	// override the special token type handles
 	if (type == Tokens::Empty) {
 		type = tokens.size();
-		if (type == Tokens::Data || type == Tokens::Empty) {
+		if (type >= Tokens::MaxTokenId) {
 			throw OusiaException{"Token type ids depleted!"};
 		}
-		tokens.emplace_back(token);
+		tokens.emplace_back(token, primary);
 	}
 	nextTokenId = type + 1;
 
-	// Try to register the token in the trie -- if this fails, remove it
-	// from the tokens list
+	// Try to register the token in the trie -- if this fails, remove it from
+	// the tokens list
 	if (!trie.registerToken(token, type)) {
-		tokens[type] = std::string{};
+		tokens[type] = TokenDescriptor();
 		nextTokenId = type;
 		return Tokens::Empty;
 	}
 	return type;
 }
 
-bool Tokenizer::unregisterToken(TokenId type)
+bool Tokenizer::unregisterToken(TokenId id)
 {
 	// Unregister the token from the trie, abort if an invalid type is given
-	if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
-		tokens[type] = std::string{};
-		nextTokenId = type;
+	if (id < tokens.size() && trie.unregisterToken(tokens[id].string)) {
+		tokens[id] = TokenDescriptor();
+		nextTokenId = id;
 		return true;
 	}
 	return false;
 }
 
-std::string Tokenizer::getTokenString(TokenId type)
-{
-	if (type < tokens.size()) {
-		return tokens[type];
-	}
-	return std::string{};
-}
+static Tokenizer::TokenDescriptor EmptyTokenDescriptor;
 
-void Tokenizer::setWhitespaceMode(WhitespaceMode mode)
+const Tokenizer::TokenDescriptor &Tokenizer::lookupToken(TokenId id) const
 {
-	whitespaceMode = mode;
+	if (id < tokens.size()) {
+		return tokens[id];
+	}
+	return EmptyTokenDescriptor;
 }
 
-WhitespaceMode Tokenizer::getWhitespaceMode() { return whitespaceMode; }
-
 /* Explicitly instantiate all possible instantiations of the "next" member
    function */
-template bool Tokenizer::next<PreservingWhitespaceHandler, false>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<TrimmingWhitespaceHandler, false>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<CollapsingWhitespaceHandler, false>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<PreservingWhitespaceHandler, true>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<TrimmingWhitespaceHandler, true>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<CollapsingWhitespaceHandler, true>(
-    CharReader &reader, Token &token);
+template bool Tokenizer::next<false>(CharReader &, Token &, TokenizedData &);
+template bool Tokenizer::next<true>(CharReader &, Token &, TokenizedData &);
 }
 
diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp
index f21c6a3..2ddb9c9 100644
--- a/src/core/parser/utils/Tokenizer.hpp
+++ b/src/core/parser/utils/Tokenizer.hpp
@@ -19,8 +19,8 @@
 /**
  * @file Tokenizer.hpp
  *
- * Tokenizer that can be reconfigured at runtime used for parsing the plain
- * text format.
+ * Tokenizer that can be reconfigured at runtime and is used for parsing the
+ * plain text format.
  *
  * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
  */
@@ -33,39 +33,75 @@
 #include <vector>
 
 #include <core/common/Location.hpp>
-#include <core/common/Whitespace.hpp>
+#include <core/common/Token.hpp>
 
-#include "Token.hpp"
 #include "TokenTrie.hpp"
 
 namespace ousia {
 
 // Forward declarations
 class CharReader;
+class TokenizedData;
 
 /**
  * The Tokenizer is used to extract tokens and chunks of text from a
- * CharReader. It allows to register and unregister tokens while parsing and
- * to modify the handling of whitespace characters. Note that the
- * Tokenizer always tries to extract the longest possible token from the
- * tokenizer.
+ * CharReader. It allows to register and unregister tokens while parsing. Note
+ * that the Tokenizer always tries to extract the longest possible token from
+ * the tokenizer. Tokens can be registered as primary or non-primary token. If
+ * a Token is registered as a primary token, it is returned as a single Token
+ * instance if it occurs. In the non-primary case the token is returned as part
+ * of a segmented TokenizedData instance.
  */
 class Tokenizer {
-private:
+public:
 	/**
-	 * Internally used token trie. This object holds all registered tokens.
+	 * Internally used structure describing a registered token.
 	 */
-	TokenTrie trie;
+	struct TokenDescriptor {
+		/**
+		 * String describing the token.
+		 */
+		std::string string;
+
+		/**
+		 * Set to true if this token is primary.
+		 */
+		bool primary;
+
+		/**
+		 * Constructor of the TokenDescriptor class.
+		 *
+		 * @param string is the string representation of the registered token.
+		 * @param primary specifies whether the token is a primary token that
+		 * should be returned as a single token, or a secondary token, that
+		 * should be returned as part of TokenizedData.
+		 */
+		TokenDescriptor(const std::string &string, bool primary)
+		    : string(string), primary(primary)
+		{
+		}
+
+		/**
+		 * Default constructor.
+		 */
+		TokenDescriptor() : primary(false) {}
+
+		/**
+		 * Returns true if the TokenDescriptor represents a valid token.
+		 */
+		bool valid() { return !string.empty(); }
+	};
 
+private:
 	/**
-	 * Flag defining whether whitespaces should be preserved or not.
+	 * Internally used token trie. This object holds all registered tokens.
 	 */
-	WhitespaceMode whitespaceMode;
+	TokenTrie trie;
 
 	/**
 	 * Vector containing all registered token types.
 	 */
-	std::vector<std::string> tokens;
+	std::vector<TokenDescriptor> tokens;
 
 	/**
 	 * Next index in the tokens list where to search for a new token id.
@@ -74,90 +110,78 @@ private:
 
 	/**
 	 * Templated function used internally to read the current token. The
-	 * function is templated in order to force code generation for all six
-	 * combiations of whitespace modes and reading/peeking.
+	 * function is templated in order to force optimized code generation for
+	 * both reading and peeking.
 	 *
-	 * @tparam TextHandler is the type to be used for the textHandler instance.
-	 * @tparam read specifies whether the function should start from and advance
-	 * the read pointer of the char reader.
+	 * @tparam read specifies whether the method should read the token or just
+	 * peek.
 	 * @param reader is the CharReader instance from which the data should be
 	 * read.
 	 * @param token is the token structure into which the token information
 	 * should be written.
+	 * @param data is a reference at the TokenizedData instance to which the
+	 * token information should be appended.
 	 * @return false if the end of the stream has been reached, true otherwise.
 	 */
-	template <typename TextHandler, bool read>
-	bool next(CharReader &reader, Token &token);
+	template <bool read>
+	bool next(CharReader &reader, Token &token, TokenizedData &data);
 
 public:
 	/**
 	 * Constructor of the Tokenizer class.
-	 *
-	 * @param whitespaceMode specifies how whitespace should be handled.
 	 */
-	Tokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
+	Tokenizer();
 
 	/**
-	 * Registers the given string as a token. Returns a const pointer at a
-	 * TokenDescriptor that will be used to reference the newly created token.
+	 * Registers the given string as a token. Returns a unique identifier
+	 * describing the registered token.
 	 *
 	 * @param token is the token string that should be registered.
-	 * @return a unique identifier for the registered token or EmptyToken if
+	 * @param primary specifies whether the token is a primary token -- if true,
+	 * the token will be returned as a single, standalone token. Otherwise the
+	 * token will be returned as part of a "TokenizedData" structure.
+	 * @return a unique identifier for the registered token or Tokens::Empty if
 	 * an error occured.
 	 */
-	TokenId registerToken(const std::string &token);
+	TokenId registerToken(const std::string &token, bool primary = true);
 
 	/**
 	 * Unregisters the token belonging to the given TokenId.
 	 *
 	 * @param type is the token type that should be unregistered. The
-	 *TokenId
-	 * must have been returned by registerToken.
+	 * TokenId must have been returned by registerToken.
 	 * @return true if the operation was successful, false otherwise (e.g.
-	 * because the given TokenDescriptor was already unregistered).
+	 * because the token with the given TokenId was already unregistered).
 	 */
-	bool unregisterToken(TokenId type);
+	bool unregisterToken(TokenId id);
 
 	/**
 	 * Returns the token that was registered under the given TokenId id or
-	 *an
-	 * empty string if an invalid TokenId id is given.
+	 * an empty string if an invalid TokenId id is given.
 	 *
-	 * @param type is the TokenId id for which the corresponding token
-	 *string
+	 * @param id is the TokenId for which the corresponding TokenDescriptor
 	 * should be returned.
-	 * @return the registered token string or an empty string if the given type
-	 * was invalid.
-	 */
-	std::string getTokenString(TokenId type);
-
-	/**
-	 * Sets the whitespace mode.
-	 *
-	 * @param whitespaceMode defines how whitespace should be treated in text
-	 * tokens.
-	 */
-	void setWhitespaceMode(WhitespaceMode mode);
-
-	/**
-	 * Returns the current value of the whitespace mode.
-	 *
-	 * @return the whitespace mode.
+	 * @return the registered TokenDescriptor or an invalid TokenDescriptor if
+	 * the given TokenId is invalid.
 	 */
-	WhitespaceMode getWhitespaceMode();
+	const TokenDescriptor& lookupToken(TokenId id) const;
 
 	/**
 	 * Reads a new token from the CharReader and stores it in the given
-	 * Token instance.
+	 * Token instance. If the token has the id Tokens::Data, use the "getData"
+	 * method to fetch a reference at the underlying TokenizedData instance
+	 * storing the data.
 	 *
 	 * @param reader is the CharReader instance from which the data should be
 	 * read.
 	 * @param token is a reference at the token instance into which the Token
 	 * information should be written.
+	 * @param data is a reference at the TokenizedData instance to which the
+	 * token information should be appended.
 	 * @return true if a token could be read, false if the end of the stream
 	 * has been reached.
 	 */
-	bool read(CharReader &reader, Token &token);
+	bool read(CharReader &reader, Token &token, TokenizedData &data);
 
 	/**
 	 * The peek method does not advance the read position of the char reader,
@@ -167,10 +191,12 @@ public:
 	 * read.
 	 * @param token is a reference at the token instance into which the Token
 	 * information should be written.
+	 * @param data is a reference at the TokenizedData instance to which the
+	 * token information should be appended.
 	 * @return true if a token could be read, false if the end of the stream
 	 * has been reached.
 	 */
-	bool peek(CharReader &reader, Token &token);
+	bool peek(CharReader &reader, Token &token, TokenizedData &data);
 };
 }
 
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index f61ac7d..d4cdbf8 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -94,92 +94,11 @@ public:
 
 static const PlainFormatTokens OsmlTokens;
 
-/**
- * Class used internally to collect data issued via "DATA" event.
- */
-class DataHandler {
-private:
-	/**
-	 * Internal character buffer.
-	 */
-	std::vector<char> buf;
-
-	/**
-	 * Start location of the character data.
-	 */
-	SourceOffset start;
-
-	/**
-	 * End location of the character data.
-	 */
-	SourceOffset end;
-
-public:
-	/**
-	 * Default constructor, initializes start and end with zeros.
-	 */
-	DataHandler() : start(0), end(0) {}
-
-	/**
-	 * Returns true if the internal buffer is empty.
-	 *
-	 * @return true if no characters were added to the internal buffer, false
-	 * otherwise.
-	 */
-	bool isEmpty() { return buf.empty(); }
-
-	/**
-	 * Appends a single character to the internal buffer.
-	 *
-	 * @param c is the character that should be added to the internal buffer.
-	 * @param charStart is the start position of the character.
-	 * @param charEnd is the end position of the character.
-	 */
-	void append(char c, SourceOffset charStart, SourceOffset charEnd)
-	{
-		if (isEmpty()) {
-			start = charStart;
-		}
-		buf.push_back(c);
-		end = charEnd;
-	}
-
-	/**
-	 * Appends a string to the internal buffer.
-	 *
-	 * @param s is the string that should be added to the internal buffer.
-	 * @param stringStart is the start position of the string.
-	 * @param stringEnd is the end position of the string.
-	 */
-	void append(const std::string &s, SourceOffset stringStart,
-	            SourceOffset stringEnd)
-	{
-		if (isEmpty()) {
-			start = stringStart;
-		}
-		std::copy(s.c_str(), s.c_str() + s.size(), back_inserter(buf));
-		end = stringEnd;
-	}
-
-	/**
-	 * Converts the internal buffer to a variant with attached location
-	 * information.
-	 *
-	 * @param sourceId is the source id which is needed for building the
-	 * location information.
-	 * @return a Variant with the internal buffer content as string and
-	 * the correct start and end location.
-	 */
-	Variant toVariant(SourceId sourceId)
-	{
-		Variant res = Variant::fromString(std::string(buf.data(), buf.size()));
-		res.setLocation({sourceId, start, end});
-		return res;
-	}
-};
-
 OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
-    : reader(reader), logger(logger), tokenizer(OsmlTokens)
+    : reader(reader),
+      logger(logger),
+      tokenizer(OsmlTokens),
+      data(reader.getSourceId())
 {
 	// Place an intial command representing the complete file on the stack
 	commands.push(Command{"", Variant::mapType{}, true, true, true, false});
@@ -188,7 +107,7 @@ OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
 Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 {
 	bool first = true;
-	bool hasCharSiceNSSep = false;
+	bool hasCharSinceNSSep = false;
 	std::vector<char> identifier;
 	size_t end = reader.getPeekOffset();
 	char c, c2;
@@ -197,7 +116,7 @@ Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 		if ((first && Utils::isIdentifierStartCharacter(c)) ||
 		    (!first && Utils::isIdentifierCharacter(c))) {
 			identifier.push_back(c);
-		} else if (c == ':' && hasCharSiceNSSep && reader.fetchPeek(c2) &&
+		} else if (c == ':' && hasCharSinceNSSep && reader.fetchPeek(c2) &&
 		           Utils::isIdentifierStartCharacter(c2)) {
 			identifier.push_back(c);
 		} else {
@@ -214,8 +133,8 @@ Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 		// This is no longer the first character
 		first = false;
 
-		// Advance the hasCharSiceNSSep flag
-		hasCharSiceNSSep = allowNSSep && (c != ':');
+		// Advance the hasCharSinceNSSep flag
+		hasCharSinceNSSep = allowNSSep && (c != ':');
 
 		end = reader.getPeekOffset();
 		reader.consumePeek();
@@ -488,7 +407,10 @@ void OsmlStreamParser::parseBlockComment()
 {
 	Token token;
 	size_t depth = 1;
-	while (tokenizer.read(reader, token)) {
+	while (tokenizer.read(reader, token, data)) {
+		// Throw the comment data away
+		data.clear();
+
 		if (token.id == OsmlTokens.BlockCommentEnd) {
 			depth--;
 			if (depth == 0) {
@@ -514,10 +436,9 @@ void OsmlStreamParser::parseLineComment()
 	}
 }
 
-bool OsmlStreamParser::checkIssueData(DataHandler &handler)
+bool OsmlStreamParser::checkIssueData()
 {
-	if (!handler.isEmpty()) {
-		data = handler.toVariant(reader.getSourceId());
+	if (!data.empty()) {
 		location = data.getLocation();
 		reader.resetPeek();
 		return true;
@@ -575,12 +496,12 @@ bool OsmlStreamParser::closeField()
 
 OsmlStreamParser::State OsmlStreamParser::parse()
 {
-	// Handler for incomming data
-	DataHandler handler;
+	// Reset the data handler
+	data.clear();
 
 	// Read tokens until the outer loop should be left
 	Token token;
-	while (tokenizer.peek(reader, token)) {
+	while (tokenizer.peek(reader, token, data)) {
 		const TokenId type = token.id;
 
 		// Special handling for Backslash and Text
@@ -606,7 +527,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			// Try to parse a command
 			if (Utils::isIdentifierStartCharacter(c)) {
 				// Make sure to issue any data before it is to late
-				if (checkIssueData(handler)) {
+				if (checkIssueData()) {
 					return State::DATA;
 				}
 
@@ -633,12 +554,11 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			// If this was an annotation start token, add the parsed < to the
 			// output
 			if (type == OsmlTokens.AnnotationStart) {
-				handler.append('<', token.location.getStart(),
-				               token.location.getStart() + 1);
+				data.append('<', token.location.getStart(),
+				            token.location.getStart() + 1);
 			}
 
-			handler.append(c, token.location.getStart(),
-			               reader.getPeekOffset());
+			data.append(c, token.location.getStart(), reader.getPeekOffset());
 			reader.consumePeek();
 			continue;
 		} else if (type == Tokens::Data) {
@@ -647,18 +567,13 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 				location = token.location;
 				return State::FIELD_START;
 			}
-
-			// Append the text to the data handler
-			handler.append(token.content, token.location.getStart(),
-			               token.location.getEnd());
-
 			reader.consumePeek();
 			continue;
 		}
 
 		// A non-text token was reached, make sure all pending data commands
 		// have been issued
-		if (checkIssueData(handler)) {
+		if (checkIssueData()) {
 			return State::DATA;
 		}
 
@@ -676,34 +591,36 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			Command &cmd = commands.top();
 			if (!cmd.inField) {
 				cmd.inField = true;
-				return State::FIELD_START;
 			}
-			logger.error(
+			return State::FIELD_START;
+/*			logger.error(
 			    "Got field start token \"{\", but no command for which to "
 			    "start the field. Write \"\\{\" to insert this sequence as "
 			    "text.",
-			    token);
+			    token);*/
 		} else if (token.id == OsmlTokens.FieldEnd) {
-			if (closeField()) {
+			closeField();
+			return State::FIELD_END;
+/*			if (closeField()) {
 				return State::FIELD_END;
 			}
 			logger.error(
 			    "Got field end token \"}\", but there is no field to end. "
 			    "Write \"\\}\" to insert this sequence as text.",
-			    token);
+			    token);*/
 		} else if (token.id == OsmlTokens.DefaultFieldStart) {
 			// Try to start a default field the first time the token is reached
 			Command &topCmd = commands.top();
 			if (!topCmd.inField) {
 				topCmd.inField = true;
 				topCmd.inDefaultField = true;
-				return State::FIELD_START;
 			}
-			logger.error(
+			return State::FIELD_START;
+/*			logger.error(
 			    "Got default field start token \"{!\", but no command for "
 			    "which to start the field. Write \"\\{!\" to insert this "
 			    "sequence as text",
-			    token);
+			    token);*/
 		} else if (token.id == OsmlTokens.AnnotationEnd) {
 			// We got a single annotation end token "\>" -- simply issue the
 			// ANNOTATION_END event
@@ -717,7 +634,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 	}
 
 	// Issue available data
-	if (checkIssueData(handler)) {
+	if (checkIssueData()) {
 		return State::DATA;
 	}
 
@@ -737,6 +654,14 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 	return State::END;
 }
 
+Variant OsmlStreamParser::getText(WhitespaceMode mode)
+{
+	TokenizedData dataFork = data;
+	Variant text = dataFork.text(mode);
+	location = text.getLocation();
+	return text;
+}
+
 const Variant &OsmlStreamParser::getCommandName() const
 {
 	return commands.top().name;
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index dc3034c..453a2bb 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -29,17 +29,19 @@
 #ifndef _OUSIA_OSML_STREAM_PARSER_HPP_
 #define _OUSIA_OSML_STREAM_PARSER_HPP_
 
-#include <stack>
+#include <memory>
 
 #include <core/common/Variant.hpp>
+#include <core/common/Whitespace.hpp>
 #include <core/parser/utils/Tokenizer.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 
 namespace ousia {
 
 // Forward declarations
 class CharReader;
 class Logger;
-class DataHandler;
+class OsmlStreamParserImpl;
 
 /**
  * The OsmlStreamParser class provides a low-level reader for the TeX-esque osml
@@ -137,26 +139,15 @@ public:
 		Variant arguments;
 
 		/**
-		 * Set to true if this is a command with clear begin and end.
-		 */
-		bool hasRange : 1;
-
-		/**
-		 * Set to true if we are currently inside a field of this command.
-		 */
-		bool inField : 1;
-
-		/**
-		 * Set to true if we are currently in the range field of the command
-		 * (implies inField being set to true).
+		 * Vector used as stack for holding the number of opening/closing braces
+		 * and the corresponding "isDefaultField" flag.
 		 */
-		bool inRangeField : 1;
+		std::vector<bool> fields;
 
 		/**
-		 * Set to true if we are currently in a field that has been especially
-		 * marked as default field (using the "|") syntax.
+		 * Set to true if this is a command with clear begin and end.
 		 */
-		bool inDefaultField : 1;
+		bool hasRange;
 
 		/**
 		 * Default constructor.
@@ -164,7 +155,6 @@ public:
 		Command()
 		    : hasRange(false),
 		      inField(false),
-		      inRangeField(false),
 		      inDefaultField()
 		{
 		}
@@ -178,15 +168,10 @@ public:
 		 * command.
 		 * @param hasRange should be set to true if this is a command with
 		 * explicit range.
-		 * @param inField is set to true if we currently are inside a field
-		 * of this command.
-		 * @param inRangeField is set to true if we currently are inside the
-		 * outer field of a ranged command.
 		 * @param inDefaultField is set to true if we currently are in a
 		 * specially marked default field.
 		 */
-		Command(Variant name, Variant arguments, bool hasRange,
-		        bool inField, bool inRangeField, bool inDefaultField)
+		Command(Variant name, Variant arguments, bool hasRange)
 		    : name(std::move(name)),
 		      arguments(std::move(arguments)),
 		      hasRange(hasRange),
@@ -215,25 +200,20 @@ private:
 	Tokenizer tokenizer;
 
 	/**
-	 * Stack containing the current commands.
-	 */
-	std::stack<Command> commands;
-
-	/**
-	 * Variant containing the data that has been read (always is a string,
-	 * contains the exact location of the data in the source file).
+	 * Variant containing the tokenized data that was returned from the
+	 * tokenizer as data.
 	 */
-	Variant data;
+	TokenizedData data;
 
 	/**
-	 * Contains the location of the last token.
+	 * Stack containing the current commands.
 	 */
-	SourceLocation location;
+	std::stack<Command> commands;
 
 	/**
-	 * Contains the field index of the current command.
+	 * Pointer at 
 	 */
-	size_t fieldIdx;
+	std::unique_ptr<OsmlStreamParserImpl> impl;
 
 	/**
 	 * Function used internall to parse an identifier.
@@ -291,12 +271,10 @@ private:
 	/**
 	 * Checks whether there is any data pending to be issued, if yes, issues it.
 	 *
-	 * @param handler is the data handler that contains the data that may be
-	 * returned to the user.
 	 * @return true if there was any data and DATA should be returned by the
 	 * parse function, false otherwise.
 	 */
-	bool checkIssueData(DataHandler &handler);
+	bool checkIssueData();
 
 	/**
 	 * Called before any data is appended to the internal data handler. Checks
@@ -327,6 +305,12 @@ public:
 	 */
 	OsmlStreamParser(CharReader &reader, Logger &logger);
 
+	/**
+	 * Destructor of the OsmlStreamParser, needed to destroy the incomplete
+	 * OsmlStreamParserImpl.
+	 */
+	~OsmlStreamParser();
+
 	/**
 	 * Continues parsing. Returns one of the states defined in the State enum.
 	 * Callers should stop once the State::END state is reached. Use the getter
@@ -344,7 +328,19 @@ public:
 	 * @return a reference at a variant containing the data parsed by the
 	 * "parse" function.
 	 */
-	const Variant &getData() const { return data; }
+	const TokenizedData &getData() const { return data; }
+
+	/**
+	 * Returns the complete content of the internal TokenizedData instance as
+	 * a single string Variant. This method is mainly used in the unit tests for
+	 * this class, it simply calls the text() method of TokenizedData.
+	 *
+	 * @param mode is the WhitespaceMode that should be used for returning the
+	 * text.
+	 * @return a string variant containing the text content of the internal
+	 * TokenizedData instance or a nullptr variant if there is no text.
+	 */
+	Variant getText(WhitespaceMode mode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * Returns a reference at the internally stored command name. Only valid if
@@ -371,13 +367,6 @@ public:
 	 * syntax).
 	 */
 	bool inDefaultField() const;
-
-	/**
-	 * Returns a reference at the char reader.
-	 *
-	 * @return the last internal token location.
-	 */
-	const SourceLocation &getLocation() const { return location; }
 };
 }
 
diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
index c9254b0..855f80d 100644
--- a/src/formats/osxml/OsxmlEventParser.cpp
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -25,7 +25,6 @@
 #include <core/common/Variant.hpp>
 #include <core/common/VariantReader.hpp>
 #include <core/common/Utils.hpp>
-#include <core/common/WhitespaceHandler.hpp>
 
 #include "OsxmlAttributeLocator.hpp"
 #include "OsxmlEventParser.hpp"
@@ -56,17 +55,6 @@ public:
 	 */
 	std::vector<char> textBuf;
 
-	/**
-	 * Current whitespace buffer (for the trimming whitspace mode)
-	 */
-	std::vector<char> whitespaceBuf;
-
-	/**
-	 * Flag indicating whether a whitespace character was present (for the
-	 * collapsing whitespace mode).
-	 */
-	bool hasWhitespace;
-
 	/**
 	 * Current character data start.
 	 */
@@ -394,33 +382,17 @@ static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len)
 	SourceLocation loc = xmlSyncLoggerPosition(p, ulen);
 
 	// Fetch some variables for convenience
-	const WhitespaceMode mode = parser->getWhitespaceMode();
 	OsxmlEventParserData &data = parser->getData();
 	std::vector<char> &textBuf = data.textBuf;
-	std::vector<char> &whitespaceBuf = data.whitespaceBuf;
-	bool &hasWhitespace = data.hasWhitespace;
-	size_t &textStart = data.textStart;
-	size_t &textEnd = data.textEnd;
-
-	size_t pos = loc.getStart();
-	for (size_t i = 0; i < ulen; i++, pos++) {
-		switch (mode) {
-			case WhitespaceMode::PRESERVE:
-				PreservingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
-				                                    textStart, textEnd);
-				break;
-			case WhitespaceMode::TRIM:
-				TrimmingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
-				                                  textStart, textEnd,
-				                                  whitespaceBuf);
-				break;
-			case WhitespaceMode::COLLAPSE:
-				CollapsingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
-				                                    textStart, textEnd,
-				                                    hasWhitespace);
-				break;
-		}
+
+	// Update start and end position
+	if (textBuf.empty()) {
+		data.textStart = loc.getStart();
 	}
+	data.textEnd = loc.getEnd();
+
+	// Insert the data into the text buffer
+	textBuf.insert(textBuf.end(), &s[0], &s[ulen]);
 }
 
 /* Class OsxmlEvents */
@@ -430,11 +402,7 @@ OsxmlEvents::~OsxmlEvents() {}
 /* Class OsxmlEventParser */
 
 OsxmlEventParserData::OsxmlEventParserData()
-    : depth(0),
-      annotationEndTagDepth(-1),
-      hasWhitespace(false),
-      textStart(0),
-      textEnd(0)
+    : depth(0), annotationEndTagDepth(-1), textStart(0), textEnd(0)
 {
 }
 
@@ -466,8 +434,6 @@ Variant OsxmlEventParserData::getText(SourceId sourceId)
 
 	// Reset the text buffers
 	textBuf.clear();
-	whitespaceBuf.clear();
-	hasWhitespace = false;
 	textStart = 0;
 	textEnd = 0;
 
@@ -482,7 +448,6 @@ OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events,
     : reader(reader),
       events(events),
       logger(logger),
-      whitespaceMode(WhitespaceMode::COLLAPSE),
       data(new OsxmlEventParserData())
 {
 }
@@ -532,16 +497,6 @@ void OsxmlEventParser::parse()
 	}
 }
 
-void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode)
-{
-	this->whitespaceMode = whitespaceMode;
-}
-
-WhitespaceMode OsxmlEventParser::getWhitespaceMode() const
-{
-	return whitespaceMode;
-}
-
 CharReader &OsxmlEventParser::getReader() const { return reader; }
 
 Logger &OsxmlEventParser::getLogger() const { return logger; }
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
index e39245f..e3fd5d4 100644
--- a/src/formats/osxml/OsxmlEventParser.hpp
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -32,8 +32,6 @@
 #include <memory>
 #include <string>
 
-#include <core/common/Whitespace.hpp>
-
 namespace ousia {
 
 // Forward declarations
@@ -99,13 +97,10 @@ public:
 	virtual void fieldEnd() = 0;
 
 	/**
-	 * Called whenever data is found. Whitespace data is handled as specified
-	 * and the data has been parsed to the specified variant type. This function
-	 * is not called if the parsing failed, the parser prints an error message
-	 * instead.
+	 * Called whenever string data is found.
 	 *
-	 * @param data is the already parsed data that should be passed to the
-	 * handler.
+	 * @param data is a Variant containing the string data that was found in the
+	 * XML file.
 	 */
 	virtual void data(const Variant &data) = 0;
 };
@@ -134,11 +129,6 @@ private:
 	 */
 	Logger &logger;
 
-	/**
-	 * Current whitespace mode.
-	 */
-	WhitespaceMode whitespaceMode;
-
 	/**
 	 * Data to be used by the internal functions.
 	 */
@@ -170,21 +160,6 @@ public:
 	 */
 	void parse();
 
-	/**
-	 * Sets the whitespace handling mode.
-	 *
-	 * @param whitespaceMode defines how whitespace in the data should be
-	 * handled.
-	 */
-	void setWhitespaceMode(WhitespaceMode whitespaceMode);
-
-	/**
-	 * Returns the current whitespace handling mode.
-	 *
-	 * @return the currently set whitespace handling mode.
-	 */
-	WhitespaceMode getWhitespaceMode() const;
-
 	/**
 	 * Returns the internal CharReader reference.
 	 *
diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp
index a93f14a..83966d5 100644
--- a/test/core/parser/stack/StackTest.cpp
+++ b/test/core/parser/stack/StackTest.cpp
@@ -24,6 +24,7 @@
 #include <core/parser/stack/Handler.hpp>
 #include <core/parser/stack/Stack.hpp>
 #include <core/parser/stack/State.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 
 #include <core/StandaloneEnvironment.hpp>
 
@@ -53,7 +54,7 @@ struct Tracker {
 	Variant::mapType annotationStartArgs;
 	Variant annotationEndClassName;
 	Variant annotationEndElementName;
-	Variant dataData;
+	TokenizedData dataData;
 
 	bool startResult;
 	bool fieldStartSetIsDefault;
@@ -81,7 +82,7 @@ struct Tracker {
 		annotationStartArgs = Variant::mapType{};
 		annotationEndClassName = Variant::fromString(std::string{});
 		annotationEndElementName = Variant::fromString(std::string{});
-		dataData = Variant::fromString(std::string{});
+		dataData = TokenizedData();
 
 		startResult = true;
 		fieldStartSetIsDefault = false;
@@ -157,7 +158,7 @@ public:
 		return tracker.annotationEndResult;
 	}
 
-	bool data(Variant &data) override
+	bool data(TokenizedData &data) override
 	{
 		tracker.dataCount++;
 		tracker.dataData = data;
@@ -363,7 +364,7 @@ TEST(Stack, multipleFields)
 
 		s.data("test");
 		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_EQ("test", tracker.dataData);
+		EXPECT_EQ("test", tracker.dataData.text().asString());
 
 		s.fieldEnd();
 		tracker.expect(1, 0, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
@@ -375,7 +376,7 @@ TEST(Stack, multipleFields)
 
 		s.data("test2");
 		tracker.expect(1, 0, 2, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_EQ("test2", tracker.dataData);
+		EXPECT_EQ("test2", tracker.dataData.text().asString());
 
 		s.fieldEnd();
 		tracker.expect(1, 0, 2, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
@@ -387,7 +388,7 @@ TEST(Stack, multipleFields)
 
 		s.data("test3");
 		tracker.expect(1, 0, 3, 2, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_EQ("test3", tracker.dataData);
+		EXPECT_EQ("test3", tracker.dataData.text().asString());
 
 		s.fieldEnd();
 		tracker.expect(1, 0, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
@@ -744,4 +745,4 @@ TEST(Stack, fieldAfterDefaultField)
 	ASSERT_FALSE(logger.hasError());
 }
 }
-}
\ No newline at end of file
+}
diff --git a/test/core/parser/utils/TokenizedDataTest.cpp b/test/core/parser/utils/TokenizedDataTest.cpp
index 231bad9..6bd7234 100644
--- a/test/core/parser/utils/TokenizedDataTest.cpp
+++ b/test/core/parser/utils/TokenizedDataTest.cpp
@@ -380,14 +380,14 @@ TEST(TokenizedData, textPreserveWhitespace)
 
 	data.enableToken(5);
 
-	Token token;
-	ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("  ", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+	Variant text;
+	text = data.text(WhitespaceMode::PRESERVE);
+	EXPECT_EQ("  ", text.asString());
+	EXPECT_EQ(0U, text.getLocation().getStart());
+	EXPECT_EQ(2U, text.getLocation().getEnd());
+	EXPECT_EQ(InvalidSourceId, text.getLocation().getSourceId());
 
+	Token token;
 	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
 	EXPECT_EQ(5U, token.id);
 	EXPECT_EQ("$$", token.content);
@@ -395,14 +395,13 @@ TEST(TokenizedData, textPreserveWhitespace)
 	EXPECT_EQ(4U, token.getLocation().getEnd());
 	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
 
-	ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("  ", token.content);
-	EXPECT_EQ(4U, token.getLocation().getStart());
-	EXPECT_EQ(6U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+	text = data.text(WhitespaceMode::PRESERVE);
+	EXPECT_EQ("  ", text.asString());
+	EXPECT_EQ(4U, text.getLocation().getStart());
+	EXPECT_EQ(6U, text.getLocation().getEnd());
+	EXPECT_EQ(InvalidSourceId, text.getLocation().getSourceId());
 
-	ASSERT_FALSE(data.text(token, WhitespaceMode::PRESERVE));
+	ASSERT_EQ(nullptr, data.text(WhitespaceMode::PRESERVE));
 	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
 }
 
@@ -416,7 +415,7 @@ TEST(TokenizedData, textTrimWhitespace)
 	data.enableToken(5);
 
 	Token token;
-	ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM));
+	ASSERT_EQ(nullptr, data.text(WhitespaceMode::TRIM));
 
 	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
 	EXPECT_EQ(5U, token.id);
@@ -425,7 +424,7 @@ TEST(TokenizedData, textTrimWhitespace)
 	EXPECT_EQ(4U, token.getLocation().getEnd());
 	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
 
-	ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM));
+	ASSERT_EQ(nullptr, data.text(WhitespaceMode::TRIM));
 	ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
 }
 
@@ -439,7 +438,7 @@ TEST(TokenizedData, textCollapseWhitespace)
 	data.enableToken(5);
 
 	Token token;
-	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
+	ASSERT_EQ(nullptr, data.text(WhitespaceMode::COLLAPSE));
 
 	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
 	EXPECT_EQ(5U, token.id);
@@ -448,7 +447,7 @@ TEST(TokenizedData, textCollapseWhitespace)
 	EXPECT_EQ(4U, token.getLocation().getEnd());
 	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
 
-	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
+	ASSERT_EQ(nullptr, data.text(WhitespaceMode::COLLAPSE));
 	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
 }
 
@@ -460,15 +459,15 @@ TEST(TokenizedData, appendChars)
 	ASSERT_EQ(3U, data.append('s', 8, 10));
 	ASSERT_EQ(4U, data.append('t', 10, 12));
 
-	Token token;
-	ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("test", token.content);
-	EXPECT_EQ(5U, token.getLocation().getStart());
-	EXPECT_EQ(12U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+	Variant text = data.text(WhitespaceMode::COLLAPSE);
+	ASSERT_EQ("test", text.asString());
+	EXPECT_EQ(5U, text.getLocation().getStart());
+	EXPECT_EQ(12U, text.getLocation().getEnd());
+	EXPECT_EQ(InvalidSourceId, text.getLocation().getSourceId());
+
+	ASSERT_EQ(nullptr, data.text(WhitespaceMode::PRESERVE));
 
-	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
+	Token token;
 	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
 }
 
@@ -480,15 +479,16 @@ TEST(TokenizedData, copy)
 	data.mark(6, 3, 1);
 	data.enableToken(6);
 
+	Variant text;
 	Token token;
-	ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("a", token.content);
-	EXPECT_EQ(1U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
 
-	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
+	text = data.text(WhitespaceMode::COLLAPSE);
+	ASSERT_EQ("a", text.asString());
+	EXPECT_EQ(1U, text.getLocation().getStart());
+	EXPECT_EQ(2U, text.getLocation().getEnd());
+	EXPECT_EQ(InvalidSourceId, text.getLocation().getSourceId());
+
+	ASSERT_EQ(nullptr, data.text(WhitespaceMode::COLLAPSE));
 
 	TokenizedData dataCopy = data;
 
@@ -506,21 +506,19 @@ TEST(TokenizedData, copy)
 	EXPECT_EQ(4U, token.getLocation().getEnd());
 	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
 
-	ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ(" b ", token.content);
-	EXPECT_EQ(4U, token.getLocation().getStart());
-	EXPECT_EQ(7U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+	text = data.text(WhitespaceMode::PRESERVE);
+	ASSERT_EQ(" b ", text.asString());
+	EXPECT_EQ(4U, text.getLocation().getStart());
+	EXPECT_EQ(7U, text.getLocation().getEnd());
+	EXPECT_EQ(InvalidSourceId, text.getLocation().getSourceId());
 	ASSERT_FALSE(data.next(token));
 
-	ASSERT_TRUE(dataCopy.text(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("b", token.content);
-	EXPECT_EQ(5U, token.getLocation().getStart());
-	EXPECT_EQ(6U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-	ASSERT_FALSE(dataCopy.next(token));
+	text = dataCopy.text(WhitespaceMode::COLLAPSE);
+	ASSERT_EQ("b", text.asString());
+	EXPECT_EQ(5U, text.getLocation().getStart());
+	EXPECT_EQ(6U, text.getLocation().getEnd());
+	EXPECT_EQ(InvalidSourceId, text.getLocation().getSourceId());
+	ASSERT_FALSE(data.next(token));
 }
 }
 
diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp
index 3809a12..0f2bfb7 100644
--- a/test/core/parser/utils/TokenizerTest.cpp
+++ b/test/core/parser/utils/TokenizerTest.cpp
@@ -20,6 +20,7 @@
 
 #include <core/common/CharReader.hpp>
 #include <core/parser/utils/Tokenizer.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 
 namespace ousia {
 
@@ -31,23 +32,40 @@ TEST(Tokenizer, tokenRegistration)
 
 	ASSERT_EQ(0U, tokenizer.registerToken("a"));
 	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("a"));
-	ASSERT_EQ("a", tokenizer.getTokenString(0U));
+	ASSERT_EQ("a", tokenizer.lookupToken(0U).string);
 
 	ASSERT_EQ(1U, tokenizer.registerToken("b"));
 	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("b"));
-	ASSERT_EQ("b", tokenizer.getTokenString(1U));
+	ASSERT_EQ("b", tokenizer.lookupToken(1U).string);
 
 	ASSERT_EQ(2U, tokenizer.registerToken("c"));
 	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("c"));
-	ASSERT_EQ("c", tokenizer.getTokenString(2U));
+	ASSERT_EQ("c", tokenizer.lookupToken(2U).string);
 
 	ASSERT_TRUE(tokenizer.unregisterToken(1U));
 	ASSERT_FALSE(tokenizer.unregisterToken(1U));
-	ASSERT_EQ("", tokenizer.getTokenString(1U));
+	ASSERT_EQ("", tokenizer.lookupToken(1U).string);
 
 	ASSERT_EQ(1U, tokenizer.registerToken("d"));
 	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("d"));
-	ASSERT_EQ("d", tokenizer.getTokenString(1U));
+	ASSERT_EQ("d", tokenizer.lookupToken(1U).string);
+}
+
+void expectData(const std::string &expected, SourceOffset tokenStart,
+                SourceOffset tokenEnd, SourceOffset textStart,
+                SourceOffset textEnd, const Token &token, TokenizedData &data,
+                WhitespaceMode mode = WhitespaceMode::PRESERVE)
+{
+	ASSERT_EQ(Tokens::Data, token.id);
+
+	Variant text = data.text(mode);
+	ASSERT_TRUE(text.isString());
+
+	EXPECT_EQ(expected, text.asString());
+	EXPECT_EQ(tokenStart, token.location.getStart());
+	EXPECT_EQ(tokenEnd, token.location.getEnd());
+	EXPECT_EQ(textStart, text.getLocation().getStart());
+	EXPECT_EQ(textEnd, text.getLocation().getEnd());
 }
 
 TEST(Tokenizer, textTokenPreserveWhitespace)
@@ -56,36 +74,34 @@ TEST(Tokenizer, textTokenPreserveWhitespace)
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::PRESERVE};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(36U, loc.getEnd());
+		expectData(" this \t is only a  \n\n test   text   ", 0, 36, 0, 36,
+		           token, data, WhitespaceMode::PRESERVE);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 
 	{
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::PRESERVE};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
+		expectData("this \t is only a  \n\n test   text", 0, 32, 0, 32,
+		           token, data, WhitespaceMode::PRESERVE);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 }
 
@@ -95,36 +111,34 @@ TEST(Tokenizer, textTokenTrimWhitespace)
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::TRIM};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(1U, loc.getStart());
-		ASSERT_EQ(33U, loc.getEnd());
+		expectData("this \t is only a  \n\n test   text", 0, 36, 1, 33, token,
+		           data, WhitespaceMode::TRIM);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 
 	{
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::TRIM};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
+		expectData("this \t is only a  \n\n test   text", 0, 32, 0, 32,
+		           token, data, WhitespaceMode::TRIM);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 }
 
@@ -134,36 +148,34 @@ TEST(Tokenizer, textTokenCollapseWhitespace)
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::COLLAPSE};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("this is only a test text", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(1U, loc.getStart());
-		ASSERT_EQ(33U, loc.getEnd());
+		expectData("this is only a test text", 0, 36, 1, 33, token, data,
+		           WhitespaceMode::COLLAPSE);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 
 	{
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::COLLAPSE};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("this is only a test text", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
+		expectData("this is only a test text", 0, 32, 0, 32, token, data,
+		           WhitespaceMode::COLLAPSE);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 }
 
@@ -177,14 +189,12 @@ TEST(Tokenizer, simpleReadToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
 		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test1", token.content);
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
+		expectData("test1", 0, 5, 0, 5, token, data);
 
 		char c;
 		ASSERT_TRUE(reader.peek(c));
@@ -193,7 +203,8 @@ TEST(Tokenizer, simpleReadToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
 		ASSERT_EQ(tid, token.id);
 		ASSERT_EQ(":", token.content);
@@ -209,14 +220,10 @@ TEST(Tokenizer, simpleReadToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test2", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
+		expectData("test2", 6, 11, 6, 11, token, data);
 
 		char c;
 		ASSERT_FALSE(reader.peek(c));
@@ -233,21 +240,17 @@ TEST(Tokenizer, simplePeekToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.peek(reader, token, data));
+		expectData("test1", 0, 5, 0, 5, token, data);
 		ASSERT_EQ(0U, reader.getOffset());
 		ASSERT_EQ(5U, reader.getPeekOffset());
 	}
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.peek(reader, token, data));
 
 		ASSERT_EQ(tid, token.id);
 		ASSERT_EQ(":", token.content);
@@ -261,35 +264,26 @@ TEST(Tokenizer, simplePeekToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.peek(reader, token, data));
+		expectData("test2", 6, 11, 6, 11, token, data);
 		ASSERT_EQ(0U, reader.getOffset());
 		ASSERT_EQ(11U, reader.getPeekOffset());
 	}
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		expectData("test1", 0, 5, 0, 5, token, data);
 		ASSERT_EQ(5U, reader.getOffset());
 		ASSERT_EQ(5U, reader.getPeekOffset());
 	}
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
 		ASSERT_EQ(tid, token.id);
 		ASSERT_EQ(":", token.content);
@@ -303,14 +297,9 @@ TEST(Tokenizer, simplePeekToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		expectData("test2", 6, 11, 6, 11, token, data);
 		ASSERT_EQ(11U, reader.getOffset());
 		ASSERT_EQ(11U, reader.getPeekOffset());
 	}
@@ -320,6 +309,7 @@ TEST(Tokenizer, ambiguousTokens)
 {
 	CharReader reader{"abc"};
 	Tokenizer tokenizer;
+	TokenizedData data;
 
 	TokenId t1 = tokenizer.registerToken("abd");
 	TokenId t2 = tokenizer.registerToken("bc");
@@ -328,16 +318,17 @@ TEST(Tokenizer, ambiguousTokens)
 	ASSERT_EQ(1U, t2);
 
 	Token token;
-	ASSERT_TRUE(tokenizer.read(reader, token));
+	data.clear();
+	ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-	ASSERT_EQ(Tokens::Data, token.id);
-	ASSERT_EQ("a", token.content);
+	expectData("a", 0, 1, 0, 1, token, data);
 
 	SourceLocation loc = token.location;
 	ASSERT_EQ(0U, loc.getStart());
 	ASSERT_EQ(1U, loc.getEnd());
 
-	ASSERT_TRUE(tokenizer.read(reader, token));
+	data.clear();
+	ASSERT_TRUE(tokenizer.read(reader, token, data));
 
 	ASSERT_EQ(t2, token.id);
 	ASSERT_EQ("bc", token.content);
@@ -346,7 +337,8 @@ TEST(Tokenizer, ambiguousTokens)
 	ASSERT_EQ(1U, loc.getStart());
 	ASSERT_EQ(3U, loc.getEnd());
 
-	ASSERT_FALSE(tokenizer.read(reader, token));
+	data.clear();
+	ASSERT_FALSE(tokenizer.read(reader, token, data));
 }
 
 TEST(Tokenizer, commentTestWhitespacePreserve)
@@ -354,7 +346,7 @@ TEST(Tokenizer, commentTestWhitespacePreserve)
 	CharReader reader{"Test/Test /* Block Comment */", 0};
 	//                 012345678901234567890123456789
 	//                 0        1         2
-	Tokenizer tokenizer(WhitespaceMode::PRESERVE);
+	Tokenizer tokenizer;
 
 	const TokenId t1 = tokenizer.registerToken("/");
 	const TokenId t2 = tokenizer.registerToken("/*");
@@ -370,45 +362,23 @@ TEST(Tokenizer, commentTestWhitespacePreserve)
 
 	Token t;
 	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(reader, t));
+		TokenizedData data(0);
+		EXPECT_TRUE(tokenizer.read(reader, t, data));
 		EXPECT_EQ(te.id, t.id);
-		EXPECT_EQ(te.content, t.content);
+		if (te.id != Tokens::Data) {
+			EXPECT_EQ(te.content, t.content);
+		} else {
+			Variant text = data.text(WhitespaceMode::PRESERVE);
+			ASSERT_TRUE(text.isString());
+			EXPECT_EQ(te.content, text.asString());
+		}
 		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
 		EXPECT_EQ(te.location.getStart(), t.location.getStart());
 		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
 	}
-	ASSERT_FALSE(tokenizer.read(reader, t));
-}
-
-TEST(Tokenizer, commentTestWhitespaceCollapse)
-{
-	CharReader reader{"Test/Test /* Block Comment */", 0};
-	//                 012345678901234567890123456789
-	//                 0        1         2
-	Tokenizer tokenizer(WhitespaceMode::COLLAPSE);
 
-	const TokenId t1 = tokenizer.registerToken("/");
-	const TokenId t2 = tokenizer.registerToken("/*");
-	const TokenId t3 = tokenizer.registerToken("*/");
-
-	std::vector<Token> expected = {
-	    {Tokens::Data, "Test", SourceLocation{0, 0, 4}},
-	    {t1, "/", SourceLocation{0, 4, 5}},
-	    {Tokens::Data, "Test", SourceLocation{0, 5, 9}},
-	    {t2, "/*", SourceLocation{0, 10, 12}},
-	    {Tokens::Data, "Block Comment", SourceLocation{0, 13, 26}},
-	    {t3, "*/", SourceLocation{0, 27, 29}}};
-
-	Token t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(reader, t));
-		EXPECT_EQ(te.id, t.id);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.read(reader, t));
+	TokenizedData data;
+	ASSERT_FALSE(tokenizer.read(reader, t, data));
 }
 }
 
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index d52fa5b..3d01007 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -30,11 +30,21 @@ namespace ousia {
 static TerminalLogger logger(std::cerr, true);
 // static ConcreteLogger logger;
 
+static OsmlStreamParser::State skipEmptyData(OsmlStreamParser &reader)
+{
+	OsmlStreamParser::State res = reader.parse();
+	if (res == OsmlStreamParser::State::DATA) {
+		EXPECT_FALSE(reader.getData().hasNonWhitespaceText());
+		res = reader.parse();
+	}
+	return res;
+}
+
 static void assertCommand(OsmlStreamParser &reader, const std::string &name,
                           SourceOffset start = InvalidSourceOffset,
                           SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND, skipEmptyData(reader));
 	EXPECT_EQ(name, reader.getCommandName().asString());
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
@@ -57,16 +67,19 @@ static void assertCommand(OsmlStreamParser &reader, const std::string &name,
 
 static void assertData(OsmlStreamParser &reader, const std::string &data,
                        SourceOffset start = InvalidSourceOffset,
-                       SourceOffset end = InvalidSourceOffset)
+                       SourceOffset end = InvalidSourceOffset,
+                       WhitespaceMode mode = WhitespaceMode::COLLAPSE)
 {
 	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(data, reader.getData().asString());
+	Variant text = reader.getText(mode);
+	ASSERT_TRUE(text.isString());
+	EXPECT_EQ(data, text.asString());
 	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getData().getLocation().getStart());
+		EXPECT_EQ(start, text.getLocation().getStart());
 		EXPECT_EQ(start, reader.getLocation().getStart());
 	}
 	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getData().getLocation().getEnd());
+		EXPECT_EQ(end, text.getLocation().getEnd());
 		EXPECT_EQ(end, reader.getLocation().getEnd());
 	}
 }
@@ -75,7 +88,7 @@ static void assertFieldStart(OsmlStreamParser &reader, bool defaultField,
                              SourceOffset start = InvalidSourceOffset,
                              SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, skipEmptyData(reader));
 	EXPECT_EQ(defaultField, reader.inDefaultField());
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, reader.getLocation().getStart());
@@ -89,7 +102,7 @@ static void assertFieldEnd(OsmlStreamParser &reader,
                            SourceOffset start = InvalidSourceOffset,
                            SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, skipEmptyData(reader));
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, reader.getLocation().getStart());
 	}
@@ -103,7 +116,7 @@ static void assertAnnotationStart(OsmlStreamParser &reader,
                                   SourceOffset start = InvalidSourceOffset,
                                   SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, skipEmptyData(reader));
 	EXPECT_EQ(name, reader.getCommandName().asString());
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
@@ -131,7 +144,7 @@ static void assertAnnotationEnd(OsmlStreamParser &reader,
                                 SourceOffset start = InvalidSourceOffset,
                                 SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, skipEmptyData(reader));
 	ASSERT_EQ(name, reader.getCommandName().asString());
 	if (!elementName.empty()) {
 		ASSERT_EQ(1U, reader.getCommandArguments().asMap().size());
@@ -152,7 +165,7 @@ static void assertEnd(OsmlStreamParser &reader,
                       SourceOffset start = InvalidSourceOffset,
                       SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::END, skipEmptyData(reader));
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, reader.getLocation().getStart());
 	}
@@ -205,26 +218,14 @@ TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak)
 	assertData(reader, "hello world", 1, 14);
 }
 
-TEST(OsmlStreamParser, escapeWhitespace)
-{
-	const char *testString = " hello\\ \\ world ";
-	//                        012345 67 89012345
-	//                        0           1
-	CharReader charReader(testString);
-
-	OsmlStreamParser reader(charReader, logger);
-
-	assertData(reader, "hello  world", 1, 15);
-}
-
 static void testEscapeSpecialCharacter(const std::string &c)
 {
 	CharReader charReader(std::string("\\") + c);
 	OsmlStreamParser reader(charReader, logger);
 	EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(c, reader.getData().asString());
+	EXPECT_EQ(c, reader.getText().asString());
 
-	SourceLocation loc = reader.getData().getLocation();
+	SourceLocation loc = reader.getText().getLocation();
 	EXPECT_EQ(0U, loc.getStart());
 	EXPECT_EQ(1U + c.size(), loc.getEnd());
 }
@@ -253,16 +254,16 @@ TEST(OsmlStreamParser, singleLineComment)
 	OsmlStreamParser reader(charReader, logger);
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("a", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(0U, loc.getStart());
 		ASSERT_EQ(1U, loc.getEnd());
 	}
 
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("b", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(33U, loc.getStart());
 		ASSERT_EQ(34U, loc.getEnd());
 	}
@@ -279,16 +280,16 @@ TEST(OsmlStreamParser, multilineComment)
 	OsmlStreamParser reader(charReader, logger);
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("a", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(0U, loc.getStart());
 		ASSERT_EQ(1U, loc.getEnd());
 	}
 
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("b", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(40U, loc.getStart());
 		ASSERT_EQ(41U, loc.getEnd());
 	}
@@ -305,16 +306,16 @@ TEST(OsmlStreamParser, nestedMultilineComment)
 	OsmlStreamParser reader(charReader, logger);
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("a", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(0U, loc.getStart());
 		ASSERT_EQ(1U, loc.getEnd());
 	}
 
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("b", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(40U, loc.getStart());
 		ASSERT_EQ(41U, loc.getEnd());
 	}
@@ -569,8 +570,11 @@ TEST(OsmlStreamParser, multipleCommands)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "a", 0, 2);
+	assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE);
 	assertCommand(reader, "b", 3, 5);
+	assertData(reader, " ", 5, 6, WhitespaceMode::PRESERVE);
 	assertCommand(reader, "c", 6, 8);
+	assertData(reader, " ", 8, 9, WhitespaceMode::PRESERVE);
 	assertCommand(reader, "d", 9, 11);
 	assertEnd(reader, 11, 11);
 }
@@ -584,10 +588,13 @@ TEST(OsmlStreamParser, fieldsWithSpaces)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "a", 0, 2);
+	assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE);
 	assertFieldStart(reader, false, 3, 4);
 	assertCommand(reader, "b", 4, 6);
+	assertData(reader, " ", 6, 7, WhitespaceMode::PRESERVE);
 	assertCommand(reader, "c", 7, 9);
 	assertFieldEnd(reader, 9, 10);
+	assertData(reader, "   \n\n {", 10, 12, WhitespaceMode::PRESERVE);
 	assertFieldStart(reader, false, 16, 17);
 	assertCommand(reader, "d", 17, 19);
 	assertFieldEnd(reader, 19, 20);
diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp
index 3293370..6942166 100644
--- a/test/formats/osxml/OsxmlEventParserTest.cpp
+++ b/test/formats/osxml/OsxmlEventParserTest.cpp
@@ -21,6 +21,7 @@
 #include <core/frontend/TerminalLogger.hpp>
 #include <core/common/CharReader.hpp>
 #include <core/common/Variant.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 
 #include <formats/osxml/OsxmlEventParser.hpp>
 
@@ -74,13 +75,11 @@ public:
 };
 
 static std::vector<std::pair<OsxmlEvent, Variant>> parseXml(
-    const char *testString,
-    WhitespaceMode whitespaceMode = WhitespaceMode::TRIM)
+    const char *testString)
 {
 	TestOsxmlEventListener listener;
 	CharReader reader(testString);
 	OsxmlEventParser parser(reader, listener, logger);
-	parser.setWhitespaceMode(whitespaceMode);
 	parser.parse();
 	return listener.events;
 }
@@ -157,7 +156,7 @@ TEST(OsxmlEventParser, magicTopLevelTagInside)
 	ASSERT_EQ(expectedEvents, events);
 }
 
-TEST(OsxmlEventParser, commandWithDataPreserveWhitespace)
+TEST(OsxmlEventParser, commandWithData)
 {
 	const char *testString = "<a>  hello  \n world </a>";
 	//                        012345678901 234567890123
@@ -168,50 +167,12 @@ TEST(OsxmlEventParser, commandWithDataPreserveWhitespace)
 	    {OsxmlEvent::DATA, Variant::arrayType{"  hello  \n world "}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
 
-	auto events = parseXml(testString, WhitespaceMode::PRESERVE);
+	auto events = parseXml(testString);
 	ASSERT_EQ(expectedEvents, events);
 
 	// Check the location of the text
 	ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart());
 	ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd());
 }
-
-TEST(OsxmlEventParser, commandWithDataTrimWhitespace)
-{
-	const char *testString = "<a>  hello  \n world </a>";
-	//                        012345678901 234567890123
-	//                        0         1          2
-
-	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
-	    {OsxmlEvent::DATA, Variant::arrayType{"hello  \n world"}},
-	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
-
-	auto events = parseXml(testString, WhitespaceMode::TRIM);
-	ASSERT_EQ(expectedEvents, events);
-
-	// Check the location of the text
-	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart());
-	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd());
-}
-
-TEST(OsxmlEventParser, commandWithDataCollapseWhitespace)
-{
-	const char *testString = "<a>  hello  \n world </a>";
-	//                        012345678901 234567890123
-	//                        0         1          2
-
-	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
-	    {OsxmlEvent::DATA, Variant::arrayType{"hello world"}},
-	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
-
-	auto events = parseXml(testString, WhitespaceMode::COLLAPSE);
-	ASSERT_EQ(expectedEvents, events);
-
-	// Check the location of the text
-	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart());
-	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd());
-}
 }
 
-- 
cgit v1.2.3


From 84c9abc3e9762c4486ddc5ca0352a5d697a51987 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Wed, 25 Feb 2015 23:09:26 +0100
Subject: start of branch, commit log will be rewritten

---
 CMakeLists.txt                               | 110 ++---
 src/core/common/SourceContextReader.cpp      |   5 +-
 src/core/common/Token.cpp                    |  24 ++
 src/core/common/Token.hpp                    | 181 ++++++++
 src/core/common/Utils.cpp                    |   6 -
 src/core/common/Utils.hpp                    |  53 ++-
 src/core/common/WhitespaceHandler.hpp        | 284 -------------
 src/core/parser/stack/DocumentHandler.cpp    |  24 +-
 src/core/parser/stack/DocumentHandler.hpp    |   4 +-
 src/core/parser/stack/Handler.cpp            |  25 +-
 src/core/parser/stack/Handler.hpp            |  74 ++--
 src/core/parser/stack/Stack.cpp              |  55 ++-
 src/core/parser/stack/Stack.hpp              |  18 +-
 src/core/parser/utils/SourceOffsetVector.hpp |  28 +-
 src/core/parser/utils/Token.cpp              |  24 --
 src/core/parser/utils/Token.hpp              | 142 -------
 src/core/parser/utils/TokenTrie.cpp          |  16 +-
 src/core/parser/utils/TokenTrie.hpp          |  11 +-
 src/core/parser/utils/TokenizedData.cpp      | 353 +++++++++++++---
 src/core/parser/utils/TokenizedData.hpp      | 234 +++++++++--
 src/core/parser/utils/Tokenizer.cpp          | 264 ++++++------
 src/core/parser/utils/Tokenizer.hpp          | 142 ++++---
 src/formats/osml/OsmlStreamParser.cpp        | 157 ++-----
 src/formats/osml/OsmlStreamParser.hpp        |  85 ++--
 src/formats/osxml/OsxmlEventParser.cpp       |  63 +--
 src/formats/osxml/OsxmlEventParser.hpp       |  31 +-
 test/core/parser/stack/StackTest.cpp         |  15 +-
 test/core/parser/utils/TokenizedDataTest.cpp | 602 +++++++++++----------------
 test/core/parser/utils/TokenizerTest.cpp     | 248 +++++------
 test/formats/osml/OsmlStreamParserTest.cpp   |  79 ++--
 test/formats/osxml/OsxmlEventParserTest.cpp  |  47 +--
 31 files changed, 1664 insertions(+), 1740 deletions(-)
 create mode 100644 src/core/common/Token.cpp
 create mode 100644 src/core/common/Token.hpp
 delete mode 100644 src/core/common/WhitespaceHandler.hpp
 delete mode 100644 src/core/parser/utils/Token.cpp
 delete mode 100644 src/core/parser/utils/Token.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ea5c3aa..225e63d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -158,6 +158,7 @@ ADD_LIBRARY(ousia_core
 	src/core/common/Rtti
 	src/core/common/RttiBuilder
 	src/core/common/SourceContextReader
+	src/core/common/Token
 	src/core/common/Utils
 	src/core/common/Variant
 	src/core/common/VariantConverter
@@ -180,16 +181,15 @@ ADD_LIBRARY(ousia_core
 	src/core/parser/ParserContext
 	src/core/parser/ParserScope
 	src/core/parser/stack/Callbacks
-	src/core/parser/stack/DocumentHandler
-	src/core/parser/stack/DomainHandler
-	src/core/parser/stack/GenericParserStates
-	src/core/parser/stack/Handler
-	src/core/parser/stack/ImportIncludeHandler
+#	src/core/parser/stack/DocumentHandler
+#	src/core/parser/stack/DomainHandler
+#	src/core/parser/stack/GenericParserStates
+#	src/core/parser/stack/Handler
+#	src/core/parser/stack/ImportIncludeHandler
 	src/core/parser/stack/State
-	src/core/parser/stack/Stack
-	src/core/parser/stack/TypesystemHandler
+#	src/core/parser/stack/Stack
+#	src/core/parser/stack/TypesystemHandler
 	src/core/parser/utils/SourceOffsetVector
-	src/core/parser/utils/Token
 	src/core/parser/utils/TokenizedData
 	src/core/parser/utils/Tokenizer
 	src/core/parser/utils/TokenTrie
@@ -212,19 +212,19 @@ ADD_LIBRARY(ousia_core
 #	ousia_core
 #)
 
-ADD_LIBRARY(ousia_osml
-	src/formats/osml/OsmlParser
-	src/formats/osml/OsmlStreamParser
-)
+#ADD_LIBRARY(ousia_osml
+#	src/formats/osml/OsmlParser
+#	src/formats/osml/OsmlStreamParser
+#)
 
-TARGET_LINK_LIBRARIES(ousia_osml
-	ousia_core
-)
+#TARGET_LINK_LIBRARIES(ousia_osml
+#	ousia_core
+#)
 
 ADD_LIBRARY(ousia_osxml
 	src/formats/osxml/OsxmlAttributeLocator
 	src/formats/osxml/OsxmlEventParser
-	src/formats/osxml/OsxmlParser
+#	src/formats/osxml/OsxmlParser
 )
 
 TARGET_LINK_LIBRARIES(ousia_osxml
@@ -273,19 +273,19 @@ TARGET_LINK_LIBRARIES(ousia_xml
 
 # Command line interface
 
-ADD_EXECUTABLE(ousia
-	src/cli/Main
-)
+#ADD_EXECUTABLE(ousia
+#	src/cli/Main
+#)
 
-TARGET_LINK_LIBRARIES(ousia
-	ousia_core
-	ousia_filesystem
-	ousia_html
-	ousia_xml
-	ousia_osml
-	ousia_osxml
-	${Boost_LIBRARIES}
-)
+#TARGET_LINK_LIBRARIES(ousia
+#	ousia_core
+#	ousia_filesystem
+#	ousia_html
+#	ousia_xml
+#	ousia_osml
+#	ousia_osxml
+#	${Boost_LIBRARIES}
+#)
 
 # If testing is enabled, build the unit tests
 IF(TEST)
@@ -323,11 +323,11 @@ IF(TEST)
 		test/core/model/StyleTest
 		test/core/model/TypesystemTest
 		test/core/parser/ParserScopeTest
-		test/core/parser/stack/StackTest
+#		test/core/parser/stack/StackTest
 		test/core/parser/stack/StateTest
 		test/core/parser/utils/SourceOffsetVectorTest
 		test/core/parser/utils/TokenizedDataTest
-		test/core/parser/utils/TokenizerTest
+#		test/core/parser/utils/TokenizerTest
 		test/core/parser/utils/TokenTrieTest
 		test/core/resource/ResourceLocatorTest
 		test/core/resource/ResourceRequestTest
@@ -383,29 +383,29 @@ IF(TEST)
 #		ousia_mozjs
 #	)
 
-	ADD_EXECUTABLE(ousia_test_osml
-		test/formats/osml/OsmlParserTest
-		test/formats/osml/OsmlStreamParserTest
-	)
+#	ADD_EXECUTABLE(ousia_test_osml
+#		test/formats/osml/OsmlParserTest
+#		test/formats/osml/OsmlStreamParserTest
+#	)
 
-	TARGET_LINK_LIBRARIES(ousia_test_osml
-		${GTEST_LIBRARIES}
-		ousia_core
-		ousia_osml
-		ousia_filesystem
-	)
+#	TARGET_LINK_LIBRARIES(ousia_test_osml
+#		${GTEST_LIBRARIES}
+#		ousia_core
+#		ousia_osml
+#		ousia_filesystem
+#	)
 
-	ADD_EXECUTABLE(ousia_test_osxml
-		test/formats/osxml/OsxmlEventParserTest
-		test/formats/osxml/OsxmlParserTest
-	)
+#	ADD_EXECUTABLE(ousia_test_osxml
+#		test/formats/osxml/OsxmlEventParserTest
+#		test/formats/osxml/OsxmlParserTest
+#	)
 
-	TARGET_LINK_LIBRARIES(ousia_test_osxml
-		${GTEST_LIBRARIES}
-		ousia_core
-		ousia_osxml
-		ousia_filesystem
-	)
+#	TARGET_LINK_LIBRARIES(ousia_test_osxml
+#		${GTEST_LIBRARIES}
+#		ousia_core
+#		ousia_osxml
+#		ousia_filesystem
+#	)
 
 	ADD_EXECUTABLE(ousia_test_xml
 		test/plugins/xml/XmlOutputTest
@@ -423,8 +423,8 @@ IF(TEST)
 	ADD_TEST(ousia_test_filesystem ousia_test_filesystem)
 	ADD_TEST(ousia_test_html ousia_test_html)
 #	ADD_TEST(ousia_test_mozjs ousia_test_mozjs)
-	ADD_TEST(ousia_test_osml ousia_test_osml)
-	ADD_TEST(ousia_test_osxml ousia_test_osxml)
+#	ADD_TEST(ousia_test_osml ousia_test_osml)
+#	ADD_TEST(ousia_test_osxml ousia_test_osxml)
 	ADD_TEST(ousia_test_xml ousia_test_xml)
 ENDIF()
 
@@ -442,9 +442,9 @@ INSTALL(DIRECTORY data/ DESTINATION share/ousia
 				OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE
 )
 
-INSTALL(TARGETS ousia
-		RUNTIME DESTINATION bin
-)
+#INSTALL(TARGETS ousia
+#		RUNTIME DESTINATION bin
+#)
 
 IF(INSTALL_GEDIT_HIGHLIGHTER)
 	INSTALL(FILES contrib/gtksourceview-3.0/language-specs/ousia.lang
diff --git a/src/core/common/SourceContextReader.cpp b/src/core/common/SourceContextReader.cpp
index d5d379c..f7dbdf3 100644
--- a/src/core/common/SourceContextReader.cpp
+++ b/src/core/common/SourceContextReader.cpp
@@ -149,8 +149,9 @@ SourceContext SourceContextReader::readContext(CharReader &reader,
 	ctx.relLen = end - start;           // end >= start (I2)
 
 	// Remove linebreaks at the beginning and the end
-	const std::pair<size_t, size_t> b =
-	    Utils::trim(lineBuf, Utils::isLinebreak);
+	const std::pair<size_t, size_t> b = Utils::trim(
+	    lineBuf,
+	    [&lineBuf](size_t i) { return Utils::isLinebreak(lineBuf[i]); });
 	ssize_t s = b.first, e = b.second;
 	s = std::min(s, static_cast<ssize_t>(ctx.relPos));
 
diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp
new file mode 100644
index 0000000..8bcdbb5
--- /dev/null
+++ b/src/core/common/Token.cpp
@@ -0,0 +1,24 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Token.hpp"
+
+namespace ousia {
+// Stub to make sure Tokens.hpp is valid
+}
+
diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
new file mode 100644
index 0000000..0cf56b0
--- /dev/null
+++ b/src/core/common/Token.hpp
@@ -0,0 +1,181 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Token.hpp
+ *
+ * Definition of the TokenId id and constants for some special tokens.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_TOKEN_HPP_
+#define _OUSIA_TOKEN_HPP_
+
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <unordered_set>
+
+#include <core/common/Location.hpp>
+
+namespace ousia {
+
+/**
+ * The TokenId is used to give each token id a unique id.
+ */
+using TokenId = uint32_t;
+
+/**
+ * Type used for storing token lengths.
+ */
+using TokenLength = uint16_t;
+
+/**
+ * Type used for storing token sets.
+ */
+using TokenSet = std::unordered_set<TokenId>;
+
+/**
+ * Namespace containing constants for TokenId instances with special meaning.
+ */
+namespace Tokens {
+/**
+ * Token which is not a token.
+ */
+constexpr TokenId Empty = std::numeric_limits<TokenId>::max();
+
+/**
+ * Token which represents data (represented as TokenizedData).
+ */
+constexpr TokenId Data = std::numeric_limits<TokenId>::max() - 1;
+
+/**
+ * Token which represents a newline token.
+ */
+constexpr TokenId Newline = std::numeric_limits<TokenId>::max() - 2;
+
+/**
+ * Token which represents a paragraph token -- issued if two consecutive
+ * newlines occur with optionally any amout of whitespace between them. The
+ * paragraph token is not repeated until more text is reached.
+ */
+constexpr TokenId Paragraph = std::numeric_limits<TokenId>::max() - 3;
+
+/**
+ * Token which represents a section token -- issued if three or more
+ * consecutive newlines occur with optionally any amout of whitespace between
+ * them. The section token is not repeated until more text is reached.
+ */
+constexpr TokenId Section = std::numeric_limits<TokenId>::max() - 4;
+
+/**
+ * Token which represents an indentation token -- issued if the indentation of
+ * this line is larger than the indentation of the previous line.
+ */
+constexpr TokenId Indent = std::numeric_limits<TokenId>::max() - 5;
+
+/**
+ * Token which represents an dedentation -- issued if the indentation of
+ * this line is smaller than the indentation of the previous line.
+ */
+constexpr TokenId Dedent = std::numeric_limits<TokenId>::max() - 6;
+
+/**
+ * Maximum token id to be used. Tokens allocated for users should not surpass
+ * this value.
+ */
+constexpr TokenId MaxTokenId = std::numeric_limits<TokenId>::max() - 255;
+}
+
+/**
+ * The Token structure describes a token discovered by the Tokenizer or read
+ * from the TokenizedData struct.
+ */
+struct Token {
+	/**
+	 * Id of the id of this token.
+	 */
+	TokenId id;
+
+	/**
+	 * String that was matched.
+	 */
+	std::string content;
+
+	/**
+	 * Location from which the string was extracted.
+	 */
+	SourceLocation location;
+
+	/**
+	 * Default constructor.
+	 */
+	Token() : id(Tokens::Empty) {}
+
+	/**
+	 * Constructor of a "data" token with no explicit content.
+	 *
+	 * @param location is the location of the extracted string content in the
+	 * source file.
+	 */
+	Token(SourceLocation location)
+	    : id(Tokens::Data), location(location)
+	{
+	}
+
+	/**
+	 * Constructor of the Token struct.
+	 *
+	 * @param id represents the token id.
+	 * @param content is the string content that has been extracted.
+	 * @param location is the location of the extracted string content in the
+	 * source file.
+	 */
+	Token(TokenId id, const std::string &content, SourceLocation location)
+	    : id(id), content(content), location(location)
+	{
+	}
+
+	/**
+	 * Constructor of the Token struct, only initializes the token id
+	 *
+	 * @param id is the id corresponding to the id of the token.
+	 */
+	Token(TokenId id) : id(id) {}
+
+	/**
+	 * Returns true if this token is special.
+	 *
+	 * @return true if the TokenId indicates that this token is a "special"
+	 * token.
+	 */
+	bool isSpecial() const {return id > Tokens::MaxTokenId;}
+
+	/**
+	 * The getLocation function allows the tokens to be directly passed as
+	 * parameter to Logger or LoggableException instances.
+	 *
+	 * @return a reference at the location field
+	 */
+	const SourceLocation &getLocation() const { return location; }
+};
+}
+
+#endif /* _OUSIA_TOKENS_HPP_ */
+
diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index a77951e..85d2c28 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -108,12 +108,6 @@ std::string Utils::extractFileExtension(const std::string &filename)
 	return std::string{};
 }
 
-std::string Utils::trim(const std::string &s)
-{
-	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace);
-	return s.substr(bounds.first, bounds.second - bounds.first);
-}
-
 bool Utils::startsWith(const std::string &s, const std::string &prefix)
 {
 	return prefix.size() <= s.size() && s.substr(0, prefix.size()) == prefix;
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 7d96562..82a8f8c 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -123,14 +123,6 @@ public:
 	 */
 	static bool hasNonWhitepaceChar(const std::string &s);
 
-	/**
-	 * Removes whitespace at the beginning and the end of the given string.
-	 *
-	 * @param s is the string that should be trimmed.
-	 * @return a trimmed copy of s.
-	 */
-	static std::string trim(const std::string &s);
-
 	/**
 	 * Trims the given string or vector of chars by returning the start and end
 	 * index.
@@ -153,8 +145,8 @@ public:
 	 *
 	 * @param s is the container that should be trimmed.
 	 * @param len is the number of elements in the container.
-	 * @param f is a function that returns true for values that should be
-	 * removed.
+	 * @param f is a function that returns true for values at a certain index
+	 * that should be removed.
 	 * @return start and end index. Note that "end" points at the character
 	 * beyond the end, thus "end" minus "start"
 	 */
@@ -163,7 +155,7 @@ public:
 	{
 		size_t start = 0;
 		for (size_t i = 0; i < len; i++) {
-			if (!f(s[i])) {
+			if (!f(i)) {
 				start = i;
 				break;
 			}
@@ -171,7 +163,7 @@ public:
 
 		size_t end = 0;
 		for (ssize_t i = len - 1; i >= static_cast<ssize_t>(start); i--) {
-			if (!f(s[i])) {
+			if (!f(i)) {
 				end = i + 1;
 				break;
 			}
@@ -198,16 +190,32 @@ public:
 	 * the collapsed version of the string ends.
 	 * @return start and end index. Note that "end" points at the character
 	 * beyond the end, thus "end" minus "start"
+	 * @param f is a function that returns true for values at a certain index
+	 * that should be removed.
 	 */
-	template <class T>
-	static std::string trim(const T &s, size_t len, size_t &start, size_t &end)
+	template <class T, class Filter>
+	static std::string trim(const T &s, size_t len, size_t &start, size_t &end,
+	                        Filter f)
 	{
-		auto res = trim(s, len, isWhitespace);
+		auto res = trim(s, len, f);
 		start = res.first;
 		end = res.second;
 		return std::string(&s[start], end - start);
 	}
 
+	/**
+	 * Removes whitespace at the beginning and the end of the given string.
+	 *
+	 * @param s is the string that should be trimmed.
+	 * @return a trimmed copy of s.
+	 */
+	static std::string trim(const std::string &s)
+	{
+		std::pair<size_t, size_t> bounds =
+		    trim(s, [&s](size_t i) { return isWhitespace(s[i]); });
+		return s.substr(bounds.first, bounds.second - bounds.first);
+	}
+
 	/**
 	 * Collapses the whitespaces in the given string (trims the string and
 	 * replaces all whitespace characters by a single one).
@@ -219,7 +227,8 @@ public:
 	{
 		size_t start;
 		size_t end;
-		return collapse(s, s.size(), start, end);
+		return collapse(s, s.size(), start, end,
+		                [&s](size_t i) { return isWhitespace(s[i]); });
 	}
 
 	/**
@@ -236,7 +245,8 @@ public:
 	static std::string collapse(const std::string &s, size_t &start,
 	                            size_t &end)
 	{
-		return collapse(s, s.size(), start, end);
+		return collapse(s, s.size(), start, end,
+		                [&s](size_t i) { return isWhitespace(s[i]); });
 	}
 
 	/**
@@ -244,6 +254,8 @@ public:
 	 * replaces all whitespace characters by a single one).
 	 *
 	 * @tparam T is the string type that should be used.
+	 * @tparam Filter is a filter function used for detecting the character
+	 * indices that might be removed.
 	 * @param s is the string in which the whitespace should be collapsed.
 	 * @param len is the length of the input string
 	 * @param start is an output parameter which is set to the offset at which
@@ -252,9 +264,9 @@ public:
 	 * the collapsed version of the string ends.
 	 * @return a copy of s with collapsed whitespace.
 	 */
-	template <class T>
+	template <class T, class Filter>
 	static std::string collapse(const T &s, size_t len, size_t &start,
-	                            size_t &end)
+	                            size_t &end, Filter f)
 	{
 		// Result vector
 		std::vector<char> res;
@@ -268,8 +280,7 @@ public:
 		bool hadWhitespace = false;
 		for (size_t i = 0; i < len; i++) {
 			const char c = s[i];
-			const bool whitespace = isWhitespace(c);
-			if (whitespace) {
+			if (f(i)) {
 				hadWhitespace = !res.empty();
 			} else {
 				// Adapt the start and end position
diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp
deleted file mode 100644
index ed52ea3..0000000
--- a/src/core/common/WhitespaceHandler.hpp
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file WhitespaceHandler.hpp
- *
- * Contains the WhitespaceHandler classes which are used in multiple places to
- * trim, compact or preserve whitespaces while at the same time maintaining the
- * position information associated with the input strings.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_WHITESPACE_HANDLER_HPP_
-#define _OUSIA_WHITESPACE_HANDLER_HPP_
-
-#include <string>
-#include <vector>
-
-#include "Utils.hpp"
-
-namespace ousia {
-
-/**
- * WhitespaceHandler is a based class that can be used to collect text on a
- * character-by-character basis. Note that this class and its descendants are
- * hoped to be inlined by the compiler (and used in conjunction with templates),
- * thus they are fully defined inside this header.
- */
-class WhitespaceHandler {
-public:
-	/**
-	 * Start position of the extracted text.
-	 */
-	size_t textStart;
-
-	/**
-	 * End position of the extracted text.
-	 */
-	size_t textEnd;
-
-	/**
-	 * Buffer containing the extracted text.
-	 */
-	std::vector<char> textBuf;
-
-	/**
-	 * Constructor of the TextHandlerBase base class. Initializes the start and
-	 * end position with zeros.
-	 */
-	WhitespaceHandler() : textStart(0), textEnd(0) {}
-
-	/**
-	 * Returns true if this whitespace handler has found any text and a text
-	 * token could be emitted.
-	 *
-	 * @return true if the internal data buffer is non-empty.
-	 */
-	bool hasText() { return !textBuf.empty(); }
-
-	/**
-	 * Returns the content of the WhitespaceHandler as string.
-	 */
-	std::string toString() const
-	{
-		return std::string(textBuf.data(), textBuf.size());
-	}
-};
-
-/**
- * The PreservingWhitespaceHandler class preserves all characters unmodified,
- * including whitepace characters.
- */
-class PreservingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Appends the given character to the internal text buffer, does not
-	 * eliminate whitespace.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd);
-	}
-
-	/**
-	 * Static version of PreservingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd)
-	{
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * The TrimmingTextHandler class trims all whitespace characters at the begin
- * and the end of a text section but leaves all other characters unmodified,
- * including whitepace characters.
- */
-class TrimmingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Buffer used internally to temporarily store all whitespace characters.
-	 * They are only added to the output buffer if another non-whitespace
-	 * character is reached.
-	 */
-	std::vector<char> whitespaceBuf;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * whitespace characters at the begin and end of the text.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf);
-	}
-
-	/**
-	 * Static version of TrimmingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 * @param whitespaceBuf is a reference at the buffer for storing whitespace
-	 * characters.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd, std::vector<char> &whitespaceBuf)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				whitespaceBuf.push_back(c);
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (!whitespaceBuf.empty()) {
-			textBuf.insert(textBuf.end(), whitespaceBuf.begin(),
-			               whitespaceBuf.end());
-			whitespaceBuf.clear();
-		}
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * The CollapsingTextHandler trims characters at the beginning and end of the
- * text and reduced multiple whitespace characters to a single blank.
- */
-class CollapsingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Flag set to true if a whitespace character was reached.
-	 */
-	bool hasWhitespace = false;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * redundant whitespace characters.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd, hasWhitespace);
-	}
-
-	/**
-	 * Static version of CollapsingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 * @param hasWhitespace is a reference at the "hasWhitespace" flag.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd, bool &hasWhitespace)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				hasWhitespace = true;
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (hasWhitespace) {
-			textBuf.push_back(' ');
-			hasWhitespace = false;
-		}
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * Function that can be used to append the given buffer (e.g. a string or a
- * vector) to the whitespace handler.
- *
- * @tparam WhitespaceHandler is one of the WhitespaceHandler classes.
- * @tparam Buffer is an iterable type.
- * @param handler is the handler to which the characters of the Buffer should be
- * appended.
- * @param buf is the buffer from which the characters should be read.
- * @param start is the start byte offset. Each character is counted as one byte.
- */
-template <typename WhitespaceHandler, typename Buffer>
-inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf,
-                                      size_t start)
-{
-	for (auto elem : buf) {
-		handler.append(elem, start, start + 1);
-		start++;
-	}
-}
-}
-
-#endif /* _OUSIA_WHITESPACE_HANDLER_HPP_ */
-
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp
index bb04bd3..d44176a 100644
--- a/src/core/parser/stack/DocumentHandler.cpp
+++ b/src/core/parser/stack/DocumentHandler.cpp
@@ -25,6 +25,7 @@
 #include <core/model/Domain.hpp>
 #include <core/model/Project.hpp>
 #include <core/model/Typesystem.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 #include <core/parser/ParserScope.hpp>
 #include <core/parser/ParserContext.hpp>
 
@@ -372,8 +373,15 @@ bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field,
 	return valid && scope().resolveValue(data, type, logger);
 }
 
-bool DocumentChildHandler::data(Variant &data)
+bool DocumentChildHandler::data(TokenizedData &data)
 {
+	// TODO: Handle this correctly
+	Variant text = data.text(WhitespaceMode::TRIM);
+	if (text == nullptr) {
+		// For now, except "no data" as success
+		return true;
+	}
+
 	// We're past the region in which explicit fields can be defined in the
 	// parent structure element
 	scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, true);
@@ -393,11 +401,11 @@ bool DocumentChildHandler::data(Variant &data)
 	// If it is a primitive field directly, try to parse the content.
 	if (field->isPrimitive()) {
 		// Add it as primitive content.
-		if (!convertData(field, data, logger())) {
+		if (!convertData(field, text, logger())) {
 			return false;
 		}
 
-		parent->createChildDocumentPrimitive(data, fieldIdx);
+		parent->createChildDocumentPrimitive(text, fieldIdx);
 		return true;
 	}
 
@@ -411,7 +419,7 @@ bool DocumentChildHandler::data(Variant &data)
 	for (auto primitiveField : defaultFields) {
 		// Then try to parse the content using the type specification.
 		forks.emplace_back(logger().fork());
-		if (!convertData(primitiveField, data, forks.back())) {
+		if (!convertData(primitiveField, text, forks.back())) {
 			continue;
 		}
 
@@ -424,7 +432,7 @@ bool DocumentChildHandler::data(Variant &data)
 		createPath(fieldIdx, path, parent);
 
 		// Then create the primitive element
-		parent->createChildDocumentPrimitive(data);
+		parent->createChildDocumentPrimitive(text);
 		return true;
 	}
 
@@ -434,10 +442,10 @@ bool DocumentChildHandler::data(Variant &data)
 	if (defaultFields.empty()) {
 		logger().error("Got data, but structure \"" + name() +
 		                   "\" does not have any primitive field",
-		               data);
+		               text);
 	} else {
 		logger().error("Could not read data with any of the possible fields:",
-		               data);
+		               text);
 		size_t f = 0;
 		for (auto field : defaultFields) {
 			logger().note(std::string("Field ") +
@@ -471,4 +479,4 @@ namespace RttiTypes {
 const Rtti DocumentField = RttiBuilder<ousia::parser_stack::DocumentField>(
                                "DocumentField").parent(&Node);
 }
-}
\ No newline at end of file
+}
diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp
index 862081c..dda7d8b 100644
--- a/src/core/parser/stack/DocumentHandler.hpp
+++ b/src/core/parser/stack/DocumentHandler.hpp
@@ -167,7 +167,7 @@ public:
 
 	bool start(Variant::mapType &args) override;
 	void end() override;
-	bool data(Variant &data) override;
+	bool data(TokenizedData &data) override;
 
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
 
@@ -213,4 +213,4 @@ extern const Rtti DocumentField;
 }
 }
 
-#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */
\ No newline at end of file
+#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
index bf5d4ea..3d413e8 100644
--- a/src/core/parser/stack/Handler.cpp
+++ b/src/core/parser/stack/Handler.cpp
@@ -18,6 +18,7 @@
 
 #include <core/common/Exceptions.hpp>
 #include <core/common/Logger.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 #include <core/parser/ParserContext.hpp>
 
 #include "Callbacks.hpp"
@@ -130,7 +131,7 @@ bool EmptyHandler::annotationEnd(const Variant &className,
 	return true;
 }
 
-bool EmptyHandler::data(Variant &data)
+bool EmptyHandler::data(TokenizedData &data)
 {
 	// Support any data
 	return true;
@@ -184,10 +185,13 @@ bool StaticHandler::annotationEnd(const Variant &className,
 	return false;
 }
 
-bool StaticHandler::data(Variant &data)
+bool StaticHandler::data(TokenizedData &data)
 {
-	logger().error("Did not expect any data here", data);
-	return false;
+	if (data.text(WhitespaceMode::TRIM) != nullptr) {
+		logger().error("Did not expect any data here", data);
+		return false;
+	}
+	return true;
 }
 
 /* Class StaticFieldHandler */
@@ -227,12 +231,19 @@ void StaticFieldHandler::end()
 	}
 }
 
-bool StaticFieldHandler::data(Variant &data)
+bool StaticFieldHandler::data(TokenizedData &data)
 {
+	Variant text = data.text(WhitespaceMode::TRIM);
+	if (text == nullptr) {
+		// Providing no data here is ok as long as the "doHandle" callback
+		// function has already been called
+		return handled;
+	}
+
 	// Call the doHandle function if this has not been done before
 	if (!handled) {
 		handled = true;
-		doHandle(data, args);
+		doHandle(text, args);
 		return true;
 	}
 
@@ -240,7 +251,7 @@ bool StaticFieldHandler::data(Variant &data)
 	logger().error(
 	    std::string("Found data, but the corresponding argument \"") + argName +
 	        std::string("\" was already specified"),
-	    data);
+	    text);
 
 	// Print the location at which the attribute was originally specified
 	auto it = args.find(argName);
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 7cda7a4..929466d 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -31,6 +31,7 @@ namespace ousia {
 class ParserScope;
 class ParserContext;
 class Logger;
+class TokenizedData;
 
 namespace parser_stack {
 
@@ -158,40 +159,63 @@ protected:
 	 */
 	const std::string &name() const;
 
-public:
-	/**
-	 * Virtual destructor.
-	 */
-	virtual ~Handler();
-
 	/**
 	 * Calls the corresponding function in the Callbacks instance. Sets the
 	 * whitespace mode that specifies how string data should be processed. The
 	 * calls to this function are placed on a stack by the underlying Stack
-	 * class.
+	 * class. This function should be called from the "fieldStart" callback and
+	 * the "start" callback. If no whitespace mode is pushed in the "start"
+	 * method the whitespace mode "TRIM" is implicitly assumed.
 	 *
 	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
 	 * PRESERVE, TRIM or COLLAPSE.
 	 */
-	void setWhitespaceMode(WhitespaceMode whitespaceMode);
+	void pushWhitespaceMode(WhitespaceMode whitespaceMode);
 
 	/**
-	 * Calls the corresponding function in the Callbacks instance.
-	 * Registers the given token as token that should be reported to the handler
-	 * using the "token" function.
-	 *
-	 * @param token is the token string that should be reported.
+	 * Pops a previously pushed whitespace mode. Calls to this function should
+	 * occur in the "end" callback and the "fieldEnd" callback. This function
+	 * can only undo pushs that were performed by the pushWhitespaceMode()
+	 * method of the same handler.
 	 */
-	void registerToken(const std::string &token);
+	void popWhitespaceMode();
 
 	/**
-	 * Calls the corresponding function in the Callbacks instance.
-	 * Unregisters the given token, it will no longer be reported to the handler
-	 * using the "token" function.
+	 * Calls the corresponding function in the Callbacks instance. Sets the
+	 * whitespace mode that specifies how string data should be processed. The
+	 * calls to this function are placed on a stack by the underlying Stack
+	 * class. This function should be called from the "fieldStart" callback and
+	 * the "start" callback. If no whitespace mode is pushed in the "start"
+	 * method the whitespace mode "TRIM" is implicitly assumed.
 	 *
-	 * @param token is the token string that should be unregistered.
+	 * @param tokens is a list of tokens that should be reported to this handler
+	 * instance via the "token" method.
 	 */
-	void unregisterToken(const std::string &token);
+	void pushTokens(const std::vector<std::string> &tokens);
+
+	/**
+	 * Pops a previously pushed whitespace mode. Calls to this function should
+	 * occur in the "end" callback and the "fieldEnd" callback. This function
+	 * can only undo pushs that were performed by the pushWhitespaceMode()
+	 * method of the same handler.
+	 */
+	void popWhitespaceMode();
+
+
+	/**
+	 * Calls the corresponding function in the Callbacks instance. This method
+	 * registers the given tokens as tokens that are generally available, tokens
+	 * must be explicitly enabled using the "pushTokens" and "popTokens" method.
+	 * Tokens that have not been registered are not guaranteed to be reported,
+	 * even though they are 
+	 */
+	void registerTokens(const std::vector<std::string> &tokens);
+
+public:
+	/**
+	 * Virtual destructor.
+	 */
+	virtual ~Handler();
 
 	/**
 	 * Returns the command name for which the handler was created.
@@ -299,11 +323,11 @@ public:
 	 * Handler instance. Should return true if the data could be handled, false
 	 * otherwise.
 	 *
-	 * @param data is a string variant containing the character data and its
-	 * location.
+	 * @param data is an instance of TokenizedData containing the segmented
+	 * character data and its location.
 	 * @return true if the data could be handled, false otherwise.
 	 */
-	virtual bool data(Variant &data) = 0;
+	virtual bool data(TokenizedData &data) = 0;
 };
 
 /**
@@ -333,7 +357,7 @@ public:
 	                     Variant::mapType &args) override;
 	bool annotationEnd(const Variant &className,
 	                   const Variant &elementName) override;
-	bool data(Variant &data) override;
+	bool data(TokenizedData &data) override;
 
 	/**
 	 * Creates an instance of the EmptyHandler class.
@@ -359,7 +383,7 @@ public:
 	                     Variant::mapType &args) override;
 	bool annotationEnd(const Variant &className,
 	                   const Variant &elementName) override;
-	bool data(Variant &data) override;
+	bool data(TokenizedData &data) override;
 };
 
 /**
@@ -412,7 +436,7 @@ protected:
 public:
 	bool start(Variant::mapType &args) override;
 	void end() override;
-	bool data(Variant &data) override;
+	bool data(TokenizedData &data) override;
 };
 }
 }
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index 5b67248..309c9a0 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -19,6 +19,7 @@
 #include <core/common/Logger.hpp>
 #include <core/common/Utils.hpp>
 #include <core/common/Exceptions.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 #include <core/parser/ParserScope.hpp>
 #include <core/parser/ParserContext.hpp>
 
@@ -413,16 +414,24 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 	}
 }
 
-void Stack::data(const Variant &data)
+void Stack::data(TokenizedData data)
 {
-	// End handlers that already had a default field and are currently not
-	// active.
-	endOverdueHandlers();
+	// TODO: Rewrite this function for token handling
+	// TODO: This loop needs to be refactored out
+	while (!data.atEnd()) {
+		// End handlers that already had a default field and are currently not
+		// active.
+		endOverdueHandlers();
 
-	while (true) {
-		// Check whether there is any command the data can be sent to
+		const bool hasNonWhitespaceText = data.hasNonWhitespaceText();
+
+		// Check whether there is any command the data can be sent to -- if not,
+		// make sure the data actually is data
 		if (stack.empty()) {
-			throw LoggableException("No command here to receive data.", data);
+			if (hasNonWhitespaceText) {
+				throw LoggableException("No command here to receive data.", data);
+			}
+			return;
 		}
 
 		// Fetch the current command handler information
@@ -440,7 +449,10 @@ void Stack::data(const Variant &data)
 			// If the "hadDefaultField" flag is set, we already issued an error
 			// message
 			if (!info.hadDefaultField) {
-				logger().error("Did not expect any data here", data);
+				if (hasNonWhitespaceText) {
+					logger().error("Did not expect any data here", data);
+				}
+				return;
 			}
 		}
 
@@ -454,8 +466,16 @@ void Stack::data(const Variant &data)
 			// Pass the data to the current Handler instance
 			bool valid = false;
 			try {
-				Variant dataCopy = data;
-				valid = info.handler->data(dataCopy);
+				// Create a fork of the TokenizedData and let the handler work
+				// on it
+				TokenizedData dataFork = data;
+				valid = info.handler->data(dataFork);
+
+				// If the data was validly handled by the handler, commit the
+				// change
+				if (valid) {
+					data = dataFork;
+				}
 			}
 			catch (LoggableException ex) {
 				loggerFork.log(ex);
@@ -482,6 +502,19 @@ void Stack::data(const Variant &data)
 	}
 }
 
+void Stack::data(const Variant &stringData)
+{
+	// Fetch the SourceLocation of the given stringData variant
+	SourceLocation loc = stringData.getLocation();
+
+	// Create a TokenizedData instance and feed the given string data into it
+	TokenizedData tokenizedData(loc.getSourceId());
+	tokenizedData.append(stringData.asString(), loc.getStart());
+
+	// Call the actual "data" method
+	data(tokenizedData);
+}
+
 void Stack::fieldStart(bool isDefault)
 {
 	// Make sure the current handler stack is not empty
@@ -584,4 +617,4 @@ void Stack::token(Variant token)
 	// TODO
 }
 }
-}
\ No newline at end of file
+}
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
index b67ce82..cd29b28 100644
--- a/src/core/parser/stack/Stack.hpp
+++ b/src/core/parser/stack/Stack.hpp
@@ -44,6 +44,7 @@ namespace ousia {
 // Forward declarations
 class ParserContext;
 class Logger;
+class TokenizedData;
 
 namespace parser_stack {
 
@@ -292,13 +293,24 @@ public:
 	void command(const Variant &name, const Variant::mapType &args);
 
 	/**
-	 * Function that shuold be called whenever character data is found in the
+	 * Function that should be called whenever character data is found in the
 	 * input stream. May only be called if the currently is a command on the
 	 * stack.
 	 *
-	 * @param data is a string variant containing the data that has been found.
+	 * @param data is a TokenizedData instance containing the pre-segmented data
+	 * that should be read.
+	 */
+	void data(TokenizedData data);
+
+	/**
+	 * Function that shuold be called whenever character data is found in the
+	 * input stream. The given string variant is converted into a TokenizedData
+	 * instance internally.
+	 *
+	 * @param stringData is a string variant containing the data that has been
+	 * found.
 	 */
-	void data(const Variant &data);
+	void data(const Variant &stringData);
 
 	/**
 	 * Function that should be called whenever a new field starts. Fields of the
diff --git a/src/core/parser/utils/SourceOffsetVector.hpp b/src/core/parser/utils/SourceOffsetVector.hpp
index d15055a..aaebe7d 100644
--- a/src/core/parser/utils/SourceOffsetVector.hpp
+++ b/src/core/parser/utils/SourceOffsetVector.hpp
@@ -127,7 +127,7 @@ public:
 	 * read.
 	 * @return a pair containing start and end source offset.
 	 */
-	std::pair<SourceOffset, SourceOffset> loadOffset(size_t idx)
+	std::pair<SourceOffset, SourceOffset> loadOffset(size_t idx) const
 	{
 		// Special treatment for the last character
 		const size_t count = lens.size();
@@ -157,7 +157,31 @@ public:
 	/**
 	 * Returns the number of characters for which offsets are stored.
 	 */
-	size_t size() { return lens.size(); }
+	size_t size() const { return lens.size(); }
+
+	/**
+	 * Trims the length of the TokenizedData instance to the given length.
+	 * Removes all token matches that lie within the trimmed region.
+	 *
+	 * @param length is the number of characters to which the TokenizedData
+	 * instance should be trimmed.
+	 */
+	void trim(size_t length) {
+		if (length < size()) {
+			lens.resize(length);
+			offsets.resize((length >> LOG2_OFFSET_INTERVAL) + 1);
+		}
+	}
+
+	/**
+	 * Resets the SourceOffsetVector to the state it had when it was
+	 * constructed.
+	 */
+	void clear() {
+		lens.clear();
+		offsets.clear();
+		lastEnd = 0;
+	}
 };
 }
 
diff --git a/src/core/parser/utils/Token.cpp b/src/core/parser/utils/Token.cpp
deleted file mode 100644
index 8bcdbb5..0000000
--- a/src/core/parser/utils/Token.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "Token.hpp"
-
-namespace ousia {
-// Stub to make sure Tokens.hpp is valid
-}
-
diff --git a/src/core/parser/utils/Token.hpp b/src/core/parser/utils/Token.hpp
deleted file mode 100644
index f907450..0000000
--- a/src/core/parser/utils/Token.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file Token.hpp
- *
- * Definition of the TokenId id and constants for some special tokens.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_TOKEN_HPP_
-#define _OUSIA_TOKEN_HPP_
-
-#include <cstdint>
-#include <limits>
-#include <string>
-
-#include <core/common/Location.hpp>
-
-namespace ousia {
-
-/**
- * The TokenId is used to give each token id a unique id.
- */
-using TokenId = uint32_t;
-
-/**
- * Type used for storing token lengths.
- */
-using TokenLength = uint16_t;
-
-/**
- * Namespace containing constants for TokenId instances with special meaning.
- */
-namespace Tokens {
-/**
- * Token which is not a token.
- */
-constexpr TokenId Empty = std::numeric_limits<TokenId>::max();
-
-/**
- * Token which represents data (represented as TokenizedData).
- */
-constexpr TokenId Data = std::numeric_limits<TokenId>::max() - 1;
-
-/**
- * Token which represents a newline token.
- */
-constexpr TokenId Newline = std::numeric_limits<TokenId>::max() - 2;
-
-/**
- * Token which represents a paragraph token -- issued if two consecutive
- * newlines occur with optionally any amout of whitespace between them.
- */
-constexpr TokenId Paragraph = std::numeric_limits<TokenId>::max() - 3;
-
-/**
- * Token which represents an indentation token -- issued if the indentation of
- * this line is larget than the indentation of the previous line.
- */
-constexpr TokenId Indentation = std::numeric_limits<TokenId>::max() - 4;
-
-/**
- * Maximum token id to be used. Tokens allocated for users should not surpass
- * this value.
- */
-constexpr TokenId MaxTokenId = std::numeric_limits<TokenId>::max() - 255;
-}
-
-/**
- * The Token structure describes a token discovered by the Tokenizer or read
- * from the TokenizedData struct.
- */
-struct Token {
-	/**
-	 * Id of the id of this token.
-	 */
-	TokenId id;
-
-	/**
-	 * String that was matched.
-	 */
-	std::string content;
-
-	/**
-	 * Location from which the string was extracted.
-	 */
-	SourceLocation location;
-
-	/**
-	 * Default constructor.
-	 */
-	Token() : id(Tokens::Empty) {}
-
-	/**
-	 * Constructor of the Token struct.
-	 *
-	 * @param id represents the token id.
-	 * @param content is the string content that has been extracted.
-	 * @param location is the location of the extracted string content in the
-	 * source file.
-	 */
-	Token(TokenId id, const std::string &content, SourceLocation location)
-	    : id(id), content(content), location(location)
-	{
-	}
-
-	/**
-	 * Constructor of the Token struct, only initializes the token id
-	 *
-	 * @param id is the id corresponding to the id of the token.
-	 */
-	Token(TokenId id) : id(id) {}
-
-	/**
-	 * The getLocation function allows the tokens to be directly passed as
-	 * parameter to Logger or LoggableException instances.
-	 *
-	 * @return a reference at the location field
-	 */
-	const SourceLocation &getLocation() const { return location; }
-};
-}
-
-#endif /* _OUSIA_TOKENS_HPP_ */
-
diff --git a/src/core/parser/utils/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp
index 80cc945..a45d3ff 100644
--- a/src/core/parser/utils/TokenTrie.cpp
+++ b/src/core/parser/utils/TokenTrie.cpp
@@ -22,12 +22,12 @@ namespace ousia {
 
 /* Class DynamicTokenTree::Node */
 
-TokenTrie::Node::Node() : type(Tokens::Empty) {}
+TokenTrie::Node::Node() : id(Tokens::Empty) {}
 
 /* Class DynamicTokenTree */
 
 bool TokenTrie::registerToken(const std::string &token,
-                              TokenId type) noexcept
+                              TokenId id) noexcept
 {
 	// Abort if the token is empty -- this would taint the root node
 	if (token.empty()) {
@@ -48,12 +48,12 @@ bool TokenTrie::registerToken(const std::string &token,
 	}
 
 	// If the resulting node already has a type set, we're screwed.
-	if (node->type != Tokens::Empty) {
+	if (node->id != Tokens::Empty) {
 		return false;
 	}
 
 	// Otherwise just set the type to the given type.
-	node->type = type;
+	node->id = id;
 	return true;
 }
 
@@ -78,7 +78,7 @@ bool TokenTrie::unregisterToken(const std::string &token) noexcept
 
 		// Reset the subtree handler if this node has another type
 		node = it->second.get();
-		if ((node->type != Tokens::Empty || node->children.size() > 1) &&
+		if ((node->id != Tokens::Empty || node->children.size() > 1) &&
 		    (i + 1 != token.size())) {
 			subtreeRoot = node;
 			subtreeKey = token[i + 1];
@@ -86,14 +86,14 @@ bool TokenTrie::unregisterToken(const std::string &token) noexcept
 	}
 
 	// If the node type is already Tokens::Empty, we cannot do anything here
-	if (node->type == Tokens::Empty) {
+	if (node->id == Tokens::Empty) {
 		return false;
 	}
 
 	// If the target node has children, we cannot delete the subtree. Set the
 	// type to Tokens::Empty instead
 	if (!node->children.empty()) {
-		node->type = Tokens::Empty;
+		node->id = Tokens::Empty;
 		return true;
 	}
 
@@ -113,7 +113,7 @@ TokenId TokenTrie::hasToken(const std::string &token) const noexcept
 		}
 		node = it->second.get();
 	}
-	return node->type;
+	return node->id;
 }
 }
 
diff --git a/src/core/parser/utils/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp
index b2d1539..c470acc 100644
--- a/src/core/parser/utils/TokenTrie.hpp
+++ b/src/core/parser/utils/TokenTrie.hpp
@@ -33,7 +33,7 @@
 #include <limits>
 #include <unordered_map>
 
-#include "Token.hpp"
+#include <core/common/Token.hpp>
 
 namespace ousia {
 
@@ -75,10 +75,9 @@ public:
 		ChildMap children;
 
 		/**
-		 * Reference at the corresponding token descriptor. Set to nullptr if
-		 * no token is attached to this node.
+		 * Id of the token represented by this node.
 		 */
-		TokenId type;
+		TokenId id;
 
 		/**
 		 * Default constructor, initializes the descriptor with nullptr.
@@ -99,10 +98,10 @@ public:
 	 *
 	 * @param token is the character sequence that should be registered as
 	 * token.
-	 * @param type is the descriptor that should be set for this token.
+	 * @param id is the descriptor that should be set for this token.
 	 * @return true if the operation is successful, false otherwise.
 	 */
-	bool registerToken(const std::string &token, TokenId type) noexcept;
+	bool registerToken(const std::string &token, TokenId id) noexcept;
 
 	/**
 	 * Unregisters the token from the token tree. Returns true if the token was
diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp
index fc7bfaf..aeefa26 100644
--- a/src/core/parser/utils/TokenizedData.cpp
+++ b/src/core/parser/utils/TokenizedData.cpp
@@ -47,6 +47,17 @@ struct TokenMark {
 	 */
 	TokenLength len;
 
+	/**
+	 * Specifies whether the token is special or not.
+	 */
+	bool special;
+
+	/**
+	 * Maximum token length.
+	 */
+	static constexpr TokenLength MaxTokenLength =
+	    std::numeric_limits<TokenLength>::max();
+
 	/**
 	 * Constructor of the TokenMark structure, initializes all members with the
 	 * given values.
@@ -55,9 +66,10 @@ struct TokenMark {
 	 * @param bufStart is the start position of the TokenMark in the internal
 	 * character buffer.
 	 * @param len is the length of the token.
+	 * @param special modifies the sort order, special tokens are prefered.
 	 */
-	TokenMark(TokenId id, size_t bufStart, TokenLength len)
-	    : bufStart(bufStart), id(id), len(len)
+	TokenMark(TokenId id, size_t bufStart, TokenLength len, bool special)
+	    : bufStart(bufStart), id(id), len(len), special(special)
 	{
 	}
 
@@ -72,7 +84,8 @@ struct TokenMark {
 	TokenMark(size_t bufStart)
 	    : bufStart(bufStart),
 	      id(Tokens::Empty),
-	      len(std::numeric_limits<TokenLength>::max())
+	      len(MaxTokenLength),
+	      special(true)
 	{
 	}
 
@@ -86,8 +99,22 @@ struct TokenMark {
 	 */
 	friend bool operator<(const TokenMark &m1, const TokenMark &m2)
 	{
-		return (m1.bufStart < m2.bufStart) ||
-		       (m1.bufStart == m2.bufStart && m1.len > m2.len);
+		// Prefer the mark with the smaller bufStart
+		if (m1.bufStart < m2.bufStart) {
+			return true;
+		}
+
+		// Special handling for marks with the same bufStart
+		if (m1.bufStart == m2.bufStart) {
+			// If exactly one of the two marks is special, return true if this
+			// one is special
+			if (m1.special != m2.special) {
+				return m1.special;
+			}
+			// Otherwise prefer longer marks
+			return m1.len > m2.len;
+		}
+		return false;
 	}
 };
 }
@@ -110,19 +137,44 @@ private:
 	std::vector<char> buf;
 
 	/**
-	 * Vector containing all token marks.
+	 * Buffset storing the "protected" flag of the character data.
 	 */
-	std::vector<TokenMark> marks;
+	std::vector<bool> protectedChars;
 
 	/**
 	 * Vector storing all the character offsets efficiently.
 	 */
 	SourceOffsetVector offsets;
 
+	/**
+	 * Vector containing all token marks.
+	 */
+	mutable std::vector<TokenMark> marks;
+
+	/**
+	 * Position of the first linebreak in a sequence of linebreaks.
+	 */
+	size_t firstLinebreak;
+
+	/**
+	 * Current indentation level.
+	 */
+	uint16_t currentIndentation;
+
+	/**
+	 * Last indentation level.
+	 */
+	uint16_t lastIndentation;
+
+	/**
+	 * Number of linebreaks without any content between them.
+	 */
+	uint16_t numLinebreaks;
+
 	/**
 	 * Flag indicating whether the internal "marks" vector is sorted.
 	 */
-	bool sorted;
+	mutable bool sorted;
 
 public:
 	/**
@@ -132,7 +184,7 @@ public:
 	 * @param sourceId is the source identifier that should be used for
 	 * constructing the location when returning tokens.
 	 */
-	TokenizedDataImpl(SourceId sourceId) : sourceId(sourceId), sorted(true) {}
+	TokenizedDataImpl(SourceId sourceId) : sourceId(sourceId) { clear(); }
 
 	/**
 	 * Appends a complete string to the internal character buffer and extends
@@ -140,22 +192,22 @@ public:
 	 *
 	 * @param data is the string that should be appended to the buffer.
 	 * @param offsStart is the start offset in bytes in the input file.
+	 * @param protect if set to true, the appended characters will not be
+	 * affected by whitespace handling, they will be returned as is.
 	 * @return the current size of the internal byte buffer. The returned value
 	 * is intended to be used for the "mark" function.
 	 */
-	size_t append(const std::string &data, SourceOffset offsStart)
-	{  // Append the data to the internal buffer
-		buf.insert(buf.end(), data.begin(), data.end());
-
-		// Extend the text regions, interpolate the source position (this may
-		// yield incorrect results)
-		const size_t size = buf.size();
-		for (SourceOffset offs = offsStart; offs < offsStart + data.size();
-		     offs++) {
-			offsets.storeOffset(offs, offs + 1);
+	size_t append(const std::string &data, SourceOffset offsStart, bool protect)
+	{
+		for (size_t i = 0; i < data.size(); i++) {
+			if (offsStart != InvalidSourceOffset) {
+				append(data[i], offsStart + i, offsStart + i + 1, protect);
+			} else {
+				append(data[i], InvalidSourceOffset, InvalidSourceOffset,
+				       protect);
+			}
 		}
-
-		return size;
+		return size();
 	}
 
 	/**
@@ -165,16 +217,86 @@ public:
 	 * @param c is the character that should be appended to the buffer.
 	 * @param offsStart is the start offset in bytes in the input file.
 	 * @param offsEnd is the end offset in bytes in the input file.
+	 * @param protect if set to true, the appended character will not be
+	 * affected by whitespace handling, it will be returned as is.
 	 * @return the current size of the internal byte buffer. The returned value
 	 * is intended to be used for the "mark" function.
 	 */
-	size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd)
+	size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd,
+	              bool protect)
 	{
 		// Add the character to the list and store the location of the character
 		// in the source file
 		buf.push_back(c);
+		protectedChars.push_back(protect);
 		offsets.storeOffset(offsStart, offsEnd);
-		return buf.size();
+
+		// Insert special tokens
+		const size_t size = buf.size();
+		const bool isWhitespace = Utils::isWhitespace(c);
+		const bool isLinebreak = Utils::isLinebreak(c);
+
+		// Handle linebreaks
+		if (isLinebreak) {
+			// Mark linebreaks as linebreak
+			mark(Tokens::Newline, size - 1, 1, false);
+
+			// The linebreak sequence started at the previous character
+			if (numLinebreaks == 0) {
+				firstLinebreak = size - 1;
+			}
+
+			// Reset the indentation
+			currentIndentation = 0;
+
+			// Increment the number of linebreaks
+			numLinebreaks++;
+
+			const size_t markStart = firstLinebreak;
+			const size_t markLength = size - firstLinebreak;
+
+			// Issue two consecutive linebreaks as paragraph token
+			if (numLinebreaks == 2) {
+				mark(Tokens::Paragraph, markStart, markLength, false);
+			}
+
+			// Issue three consecutive linebreaks as paragraph token
+			if (numLinebreaks >= 3) {
+				mark(Tokens::Section, markStart, markLength, false);
+			}
+		} else if (isWhitespace) {
+			// Count the whitespace characters at the beginning of the line
+			if (numLinebreaks > 0) {
+				// Implement the UNIX/Pyhton rule for tabs: Tabs extend to the
+				// next multiple of eight.
+				if (c == '\t') {
+					currentIndentation = (currentIndentation + 8) & ~7;
+				} else {
+					currentIndentation++;
+				}
+			}
+		}
+
+		// Issue indent and unindent tokens
+		if (!isWhitespace && numLinebreaks > 0) {
+			// Issue a larger indentation than that in the previous line as
+			// "Indent" token
+			if (currentIndentation > lastIndentation) {
+				mark(Tokens::Indent, size - 1, 0, true);
+			}
+
+			// Issue a smaller indentation than that in the previous line as
+			// "Dedent" token
+			if (currentIndentation < lastIndentation) {
+				mark(Tokens::Dedent, size - 1, 0, true);
+			}
+
+			// Reset the internal state machine
+			lastIndentation = currentIndentation;
+			numLinebreaks = 0;
+		}
+
+		return size;
 	}
 
 	/**
@@ -184,11 +306,12 @@ public:
 	 * @param bufStart is the start position in the internal buffer. Use the
 	 * values returned by append to calculate the start position.
 	 * @param len is the length of the token.
+	 * @param special tags the mark as "special", prefering it in the sort order
 	 */
-	void mark(TokenId id, size_t bufStart, TokenLength len)
+	void mark(TokenId id, size_t bufStart, TokenLength len, bool special)
 	{
 		// Push the new instance back onto the list
-		marks.emplace_back(id, bufStart, len);
+		marks.emplace_back(id, bufStart, len, special);
 
 		// Update the sorted flag as soon as more than one element is in the
 		// list
@@ -212,9 +335,13 @@ public:
 	 * @return true if a token was returned, false if no more tokens are
 	 * available.
 	 */
-	bool next(Token &token, WhitespaceMode mode,
-	          const std::unordered_set<TokenId> &tokens, size_t &cursor)
+	bool next(Token &token, WhitespaceMode mode, const TokenSet &tokens,
+	          TokenizedDataCursor &cursor) const
 	{
+		// Some variables for convenient access
+		size_t &bufPos = cursor.bufPos;
+		size_t &markPos = cursor.markPos;
+
 		// Sort the "marks" vector if it has not been sorted yet.
 		if (!sorted) {
 			std::sort(marks.begin(), marks.end());
@@ -222,10 +349,11 @@ public:
 		}
 
 		// Fetch the next larger TokenMark instance, make sure the token is in
-		// the "enabled" list
-		auto it =
-		    std::lower_bound(marks.begin(), marks.end(), TokenMark(cursor));
-		while (it != marks.end() && tokens.count(it->id) == 0) {
+		// the "enabled" list and within the buffer range
+		auto it = std::lower_bound(marks.begin() + markPos, marks.end(),
+		                           TokenMark(bufPos));
+		while (it != marks.end() && (tokens.count(it->id) == 0 ||
+		                             it->bufStart + it->len > buf.size())) {
 			it++;
 		}
 
@@ -236,15 +364,15 @@ public:
 		// Depending on the whitespace mode, fetch all the data between the
 		// cursor position and the calculated end position and return a token
 		// containing that data.
-		if (cursor < end && cursor < buf.size()) {
+		if (bufPos < end && bufPos < buf.size()) {
 			switch (mode) {
 				case WhitespaceMode::PRESERVE: {
 					token = Token(
-					    Tokens::Data, std::string(&buf[cursor], end - cursor),
+					    Tokens::Data, std::string(&buf[bufPos], end - bufPos),
 					    SourceLocation(sourceId,
-					                   offsets.loadOffset(cursor).first,
+					                   offsets.loadOffset(bufPos).first,
 					                   offsets.loadOffset(end).first));
-					cursor = end;
+					bufPos = end;
 					return true;
 				}
 				case WhitespaceMode::TRIM:
@@ -254,30 +382,35 @@ public:
 					size_t stringStart;
 					size_t stringEnd;
 					std::string content;
+					const char *cBuf = &buf[bufPos];
+					auto filter = [cBuf, this](size_t i) -> bool {
+						return Utils::isWhitespace(cBuf[i]) &&
+						       !protectedChars[i];
+					};
 					if (mode == WhitespaceMode::TRIM) {
-						content = Utils::trim(&buf[cursor], end - cursor,
-						                      stringStart, stringEnd);
+						content = Utils::trim(cBuf, end - bufPos, stringStart,
+						                      stringEnd, filter);
 					} else {
-						content = Utils::collapse(&buf[cursor], end - cursor,
-						                          stringStart, stringEnd);
+						content = Utils::collapse(
+						    cBuf, end - bufPos, stringStart, stringEnd, filter);
 					}
 
 					// If the resulting string is empty (only whitespaces),
 					// abort
 					if (content.empty()) {
-						cursor = end;
+						bufPos = end;
 						break;
 					}
 
 					// Calculate the absolute positions and return the token
-					stringStart += cursor;
-					stringEnd += cursor;
+					stringStart += bufPos;
+					stringEnd += bufPos;
 					token = Token(
 					    Tokens::Data, content,
 					    SourceLocation(sourceId,
 					                   offsets.loadOffset(stringStart).first,
 					                   offsets.loadOffset(stringEnd).first));
-					cursor = end;
+					bufPos = end;
 					return true;
 				}
 			}
@@ -286,14 +419,18 @@ public:
 		// If start equals end, we're currently directly at a token
 		// instance. Return this token and advance the cursor to the end of
 		// the token.
-		if (cursor == end && it != marks.end()) {
+		if (bufPos == end && it != marks.end()) {
 			const size_t tokenStart = it->bufStart;
 			const size_t tokenEnd = it->bufStart + it->len;
 			token = Token(
 			    it->id, std::string(&buf[tokenStart], it->len),
 			    SourceLocation(sourceId, offsets.loadOffset(tokenStart).first,
 			                   offsets.loadOffset(tokenEnd).first));
-			cursor = tokenEnd;
+
+			// Update the cursor, consume the token by incrementing the marks
+			// pos counter
+			bufPos = tokenEnd;
+			markPos = it - marks.begin() + 1;
 			return true;
 		}
 
@@ -303,12 +440,63 @@ public:
 		return false;
 	}
 
+	/**
+	 * Resets the TokenizedDataImpl instance to the state it had when it was
+	 * constructred.
+	 */
+	void clear()
+	{
+		buf.clear();
+		protectedChars.clear();
+		offsets.clear();
+		marks.clear();
+		currentIndentation = 0;
+		lastIndentation = 0;
+		numLinebreaks = 1;  // Assume the stream starts with a linebreak
+		sorted = true;
+	}
+
+	/**
+	 * Trims the length of the TokenizedDataImpl instance to the given length.
+	 *
+	 * @param length is the number of characters to which the TokenizedData
+	 * instance should be trimmed.
+	 */
+	void trim(size_t length)
+	{
+		if (length < size()) {
+			buf.resize(length);
+			offsets.trim(length);
+		}
+	}
+
 	/**
 	 * Returns the current size of the internal buffer.
 	 *
 	 * @return the size of the internal character buffer.
 	 */
-	size_t getSize() { return buf.size(); }
+	size_t size() const { return buf.size(); }
+
+	/**
+	 * Returns true if no data is in the data buffer.
+	 *
+	 * @return true if the "buf" instance has no data.
+	 */
+	bool empty() const { return buf.empty(); }
+
+	/**
+	 * Returns the current location of all data in the buffer.
+	 *
+	 * @return the location of the entire data represented by this instance.
+	 */
+	SourceLocation getLocation() const
+	{
+		if (empty()) {
+			return SourceLocation{sourceId};
+		}
+		return SourceLocation{sourceId, offsets.loadOffset(0).first,
+		                      offsets.loadOffset(size()).second};
+	}
 };
 
 /* Class TokenizedData */
@@ -316,50 +504,83 @@ public:
 TokenizedData::TokenizedData() : TokenizedData(InvalidSourceId) {}
 
 TokenizedData::TokenizedData(SourceId sourceId)
-    : impl(std::make_shared<TokenizedDataImpl>(sourceId)), cursor(0)
+    : impl(std::make_shared<TokenizedDataImpl>(sourceId))
 {
 }
 
 TokenizedData::~TokenizedData() {}
 
-size_t TokenizedData::append(const std::string &data, SourceOffset offsStart)
+size_t TokenizedData::append(const std::string &data, SourceOffset offsStart,
+                             bool protect)
 {
-	return impl->append(data, offsStart);
+	return impl->append(data, offsStart, protect);
 }
 
 size_t TokenizedData::append(char c, SourceOffset offsStart,
-                             SourceOffset offsEnd)
+                             SourceOffset offsEnd, bool protect)
 {
-	return impl->append(c, offsStart, offsEnd);
+	return impl->append(c, offsStart, offsEnd, protect);
 }
 
 void TokenizedData::mark(TokenId id, TokenLength len)
 {
-	impl->mark(id, impl->getSize() - len, len);
+	impl->mark(id, impl->size() - len, len, false);
 }
 
 void TokenizedData::mark(TokenId id, size_t bufStart, TokenLength len)
 {
-	impl->mark(id, bufStart, len);
+	impl->mark(id, bufStart, len, false);
 }
 
-bool TokenizedData::next(Token &token, WhitespaceMode mode)
+void TokenizedData::clear() { impl->clear(); }
+
+void TokenizedData::trim(size_t length) { impl->trim(length); }
+
+size_t TokenizedData::size() const { return impl->size(); }
+
+bool TokenizedData::empty() const { return impl->empty(); }
+
+SourceLocation TokenizedData::getLocation() const
 {
-	return impl->next(token, mode, tokens, cursor);
+	return impl->getLocation();
 }
 
-bool TokenizedData::text(Token &token, WhitespaceMode mode)
+TokenizedDataReader TokenizedData::reader() const
 {
-	// Copy the current cursor position to not update the actual cursor position
-	// if the operation was not successful
-	size_t cursorCopy = cursor;
-	if (!impl->next(token, mode, tokens, cursorCopy) ||
-	    token.id != Tokens::Data) {
-		return false;
-	}
+	return TokenizedDataReader(impl, TokenizedDataCursor(),
+	                           TokenizedDataCursor());
+}
+
+/* Class TokenizedDataReader */
 
-	// There is indeed a text token, update the internal cursor position
-	cursor = cursorCopy;
-	return true;
+TokenizedDataReader::TokenizedDataReader(
+    std::shared_ptr<const TokenizedDataImpl> impl,
+    const TokenizedDataCursor &readCursor,
+    const TokenizedDataCursor &peekCursor)
+    : impl(impl), readCursor(readCursor), peekCursor(peekCursor)
+{
+}
+
+TokenizedDataReaderFork TokenizedDataReader::fork()
+{
+	return TokenizedDataReaderFork(*this, impl, readCursor, peekCursor);
+}
+
+bool TokenizedDataReader::atEnd() const
+{
+	return readCursor.bufPos >= impl->size();
+}
+
+bool TokenizedDataReader::read(Token &token, const TokenSet &tokens,
+                               WhitespaceMode mode)
+{
+	peekCursor = readCursor;
+	return impl->next(token, mode, tokens, readCursor);
+}
+
+bool TokenizedDataReader::peek(Token &token, const TokenSet &tokens,
+                               WhitespaceMode mode)
+{
+	return impl->next(token, mode, tokens, peekCursor);
 }
 }
diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp
index 38125c4..b72ca02 100644
--- a/src/core/parser/utils/TokenizedData.hpp
+++ b/src/core/parser/utils/TokenizedData.hpp
@@ -37,40 +37,48 @@
 
 #include <core/common/Location.hpp>
 #include <core/common/Whitespace.hpp>
-
-#include "Token.hpp"
+#include <core/common/Token.hpp>
 
 namespace ousia {
 
 // Forward declaration
 class TokenizedDataImpl;
+class TokenizedDataReader;
+class TokenizedDataReaderFork;
 
 /**
- * The TokenizedData class stores data extracted from a user defined document.
- * As users are capable of defining their own tokens and these are only valid
- * in certain scopes TokenizedData allows to divide the stored data into chunks
- * separated by tokens.
+ * Internally used structure representing a cursor within the TokenizedData
+ * stream.
  */
-class TokenizedData {
-private:
+struct TokenizedDataCursor {
 	/**
-	 * Shared pointer pointing at the internal data. This data is shared when
-	 * copying TokenizedData instances, which corresponds to forking a
-	 * TokenizedData instance.
+	 * Position within the byte buffer.
 	 */
-	std::shared_ptr<TokenizedDataImpl> impl;
+	size_t bufPos;
 
 	/**
-	 * Contains all currently enabled token ids.
+	 * Position within the token mark buffer.
 	 */
-	std::unordered_set<TokenId> tokens;
+	size_t markPos;
 
 	/**
-	 * Position from which the last element was read from the internal buffer.
-	 * This information is not shared with the other instances of TokenizedData
-	 * pointing at the same location.
+	 * Default constructor. The resulting cursor points at the beginning of the
+	 * stream.
+	 */
+	TokenizedDataCursor() : bufPos(0), markPos(0) {}
+};
+
+/**
+ * The TokenizedData class stores data extracted from a user defined document.
+ * The data stored in TokenizedData
+ */
+class TokenizedData {
+private:
+	/**
+	 * Shared pointer pointing at the internal data. This data is shared with
+	 * all the TokenizedDataReader instances.
 	 */
-	size_t cursor;
+	std::shared_ptr<TokenizedDataImpl> impl;
 
 public:
 	/**
@@ -101,10 +109,13 @@ public:
 	 *
 	 * @param data is the string that should be appended to the buffer.
 	 * @param offsStart is the start offset in bytes in the input file.
+	 * @param protect if set to true, the appended characters will not be
+	 * affected by whitespace handling, they will be returned as is.
 	 * @return the current size of the internal byte buffer. The returned value
 	 * is intended to be used for the "mark" function.
 	 */
-	size_t append(const std::string &data, SourceOffset offsStart = 0);
+	size_t append(const std::string &data, SourceOffset offsStart = 0,
+	              bool protect = false);
 
 	/**
 	 * Appends a single character to the internal character buffer.
@@ -112,10 +123,13 @@ public:
 	 * @param c is the character that should be appended to the buffer.
 	 * @param start is the start offset in bytes in the input file.
 	 * @param end is the end offset in bytes in the input file.
+	 * @param protect if set to true, the appended character will not be
+	 * affected by whitespace handling, it will be returned as is.
 	 * @return the current size of the internal byte buffer. The returned value
 	 * is intended to be used for the "mark" function.
 	 */
-	size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd);
+	size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd,
+	              bool protect = false);
 
 	/**
 	 * Stores a token ending at the last character of the current buffer.
@@ -136,54 +150,194 @@ public:
 	void mark(TokenId id, size_t bufStart, TokenLength len);
 
 	/**
-	 * Enables a single token id. Enabled tokens will no longer be returned as
-	 * text. Instead, when querying for the next token, TokenizedData will
-	 * return them as token and not as part of a Text token.
+	 * Resets the TokenizedData instance to the state it had when it was
+	 * constructred.
+	 */
+	void clear();
+
+	/**
+	 * Trims the length of the TokenizedData instance to the given length. Note
+	 * that this function does not remove any token matches for performance
+	 * reasons, it merely renders them incaccessible. Appending new data after
+	 * calling trim will make the token marks accessible again. Thus this method
+	 * should be the last function called to modify the data buffer and the
+	 * token marks.
+	 *
+	 * @param length is the number of characters to which the TokenizedData
+	 * instance should be trimmed.
+	 */
+	void trim(size_t length);
+
+	/**
+	 * Returns the number of characters currently represented by this
+	 * TokenizedData instance.
+	 */
+	size_t size() const;
+
+	/**
+	 * Returns true if the TokenizedData instance is empty, false otherwise.
 	 *
-	 * @param id is the TokenId of the token that should be enabled.
+	 * @return true if not data is stored inside the TokenizedData instance.
 	 */
-	void enableToken(TokenId id) { tokens.insert(id); }
+	bool empty() const;
 
 	/**
-	 * Enables a set of token ids. Enabled tokens will no longer be returned as
-	 * text. Instead, when querying for the next token, TokenizedData will
-	 * return them as token and not as part of a Text token.
+	 * Returns the location of the entire TokenizedData instance.
 	 *
-	 * @param ids is the TokenId of the token that should be enabled.
+	 * @return the location of the entire data represented by this instance.
 	 */
-	void enableToken(const std::unordered_set<TokenId> &ids)
-	{
-		tokens.insert(ids.begin(), ids.end());
-	}
+	SourceLocation getLocation() const;
+
+	/**
+	 * Returns a TokenizedDataReader instance that can be used to access the
+	 * data.
+	 *
+	 * @return a new TokenizedDataReader instance pointing at the beginning of
+	 * the internal buffer.
+	 */
+	TokenizedDataReader reader() const;
+};
+
+/**
+ * The TokenizedDataReader
+ */
+class TokenizedDataReader {
+private:
+	friend TokenizedData;
+
+	/**
+	 * Shared pointer pointing at the internal data. This data is shared with
+	 * all the TokenizedDataReader instances.
+	 */
+	std::shared_ptr<const TokenizedDataImpl> impl;
+
+	/**
+	 * Position from which the last element was read from the internal buffer.
+	 */
+	TokenizedDataCursor readCursor;
+
+	/**
+	 * Position from which the last element was peeked from the internal buffer.
+	 */
+	TokenizedDataCursor peekCursor;
+
+protected:
+	/**
+	 * Protected constructor of TokenizedDataReader, taking a reference to the
+	 * internal TokenizedDataImpl structure storing the data that is accessed by
+	 * the reader.
+	 *
+	 * @param impl is the TokenizedDataImpl instance that holds the actual data.
+	 * @param readCursor is the cursor position from which tokens and text are
+	 * read.
+	 * @param peekCursor is the cursor position from which tokens and text are
+	 * peeked.
+	 */
+	TokenizedDataReader(std::shared_ptr<const TokenizedDataImpl> impl,
+	                    const TokenizedDataCursor &readCursor,
+	                    const TokenizedDataCursor &peekCursor);
+
+public:
+	/**
+	 * Returns a new TokenizedDataReaderFork from which tokens and text can be
+	 * read without advancing this reader instance.
+	 */
+	TokenizedDataReaderFork fork();
+
+	/**
+	 * Returns true if this TokenizedData instance is at the end.
+	 *
+	 * @return true if the end of the TokenizedData instance has been reached.
+	 */
+	bool atEnd() const;
 
 	/**
 	 * Stores the next token in the given token reference, returns true if the
-	 * operation was successful, false if there are no more tokens.
+	 * operation was successful, false if there are no more tokens. Advances the
+	 * internal cursor and re
 	 *
 	 * @param token is an output parameter into which the read token will be
 	 * stored. The TokenId is set to Tokens::Empty if there are no more tokens.
+	 * @param tokens is the set of token identifers, representing the currently
+	 * enabled tokens.
 	 * @param mode is the whitespace mode that should be used when a text token
 	 * is returned.
 	 * @return true if the operation was successful and there is a next token,
 	 * false if there are no more tokens.
 	 */
-	bool next(Token &token, WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+	bool read(Token &token, const TokenSet &tokens = TokenSet{},
+	          WhitespaceMode mode = WhitespaceMode::TRIM);
 
 	/**
-	 * Stores the next text token in the given token reference, returns true if
-	 * the operation was successful (there was indeed a text token), false if
-	 * the next token is not a text token or there were no more tokens.
+	 * Stores the next token in the given token reference, returns true if the
+	 * operation was successful, false if there are no more tokens.
 	 *
 	 * @param token is an output parameter into which the read token will be
 	 * stored. The TokenId is set to Tokens::Empty if there are no more tokens.
+	 * @param tokens is the set of token identifers, representing the currently
+	 * enabled tokens.
 	 * @param mode is the whitespace mode that should be used when a text token
 	 * is returned.
 	 * @return true if the operation was successful and there is a next token,
 	 * false if there are no more tokens.
 	 */
-	bool text(Token &token, WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+	bool peek(Token &token, const TokenSet &tokens = TokenSet{},
+	          WhitespaceMode mode = WhitespaceMode::TRIM);
+
+	/**
+	 * Consumes the peeked tokens, the read cursor will now be at the position
+	 * of the peek cursor.
+	 */
+	void consumePeek() { readCursor = peekCursor; }
+
+	/**
+	 * Resets the peek cursor to the position of the read cursor.
+	 */
+	void resetPeek() { peekCursor = readCursor; }
+};
+
+/**
+ * The TokenizedDataReaderFork class is created when forking a
+ * TokenizedDataReader
+ */
+class TokenizedDataReaderFork : public TokenizedDataReader {
+private:
+	friend TokenizedDataReader;
+
+	/**
+	 * Reference pointing at the parent TokenizedDataReader to which changes may
+	 * be commited.
+	 */
+	TokenizedDataReader &parent;
+
+	/**
+	 * Private constructor of TokenizedDataReaderFork, taking a reference to the
+	 * internal TokenizedDataImpl structure storing the data that is accessed by
+	 * the reader and a reference at the parent TokenizedDataReader.
+	 *
+	 * @param parent is the TokenizedDataReader instance to which the current
+	 * read/peek progress may be commited.
+	 * @param impl is the TokenizedDataImpl instance that holds the actual data.
+	 * @param readCursor is the cursor position from which tokens and text are
+	 * read.
+	 * @param peekCursor is the cursor position from which tokens and text are
+	 * peeked.
+	 */
+	TokenizedDataReaderFork(TokenizedDataReader &parent,
+	                        std::shared_ptr<const TokenizedDataImpl> impl,
+	                        const TokenizedDataCursor &readCursor,
+	                        const TokenizedDataCursor &peekCursor)
+	    : TokenizedDataReader(impl, readCursor, peekCursor), parent(parent)
+	{
+	}
+
+public:
+	/**
+	 * Commits the read/peek progress to the underlying parent.
+	 */
+	void commit() { parent = *this; }
 };
 }
 
-#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */
+#endif /* _OUSIA_TOKENIZED_DATA_HPP_ */
 
diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp
index 2e0ac13..e78b0f4 100644
--- a/src/core/parser/utils/Tokenizer.cpp
+++ b/src/core/parser/utils/Tokenizer.cpp
@@ -22,8 +22,8 @@
 #include <core/common/CharReader.hpp>
 #include <core/common/Exceptions.hpp>
 #include <core/common/Utils.hpp>
-#include <core/common/WhitespaceHandler.hpp>
 
+#include "TokenizedData.hpp"
 #include "Tokenizer.hpp"
 
 namespace ousia {
@@ -42,26 +42,33 @@ struct TokenMatch {
 	Token token;
 
 	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
+	 * Position at which this token starts in the TokenizedData instance.
 	 */
-	size_t textLength;
+	size_t dataStartOffset;
 
 	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
+	 * Set to true if the matched token is a primary token.
 	 */
-	size_t textEnd;
+	bool primary;
 
 	/**
 	 * Constructor of the TokenMatch class.
 	 */
-	TokenMatch() : textLength(0), textEnd(0) {}
+	TokenMatch() : dataStartOffset(0), primary(false) {}
 
 	/**
 	 * Returns true if this TokenMatch instance actually represents a match.
+	 *
+	 * @return true if the TokenMatch actually has a match.
+	 */
+	bool hasMatch() const { return token.id != Tokens::Empty; }
+
+	/**
+	 * Returns the length of the matched token.
+	 *
+	 * @return the length of the token string.
 	 */
-	bool hasMatch() { return token.id != Tokens::Empty; }
+	size_t size() const { return token.content.size(); }
 };
 
 /* Internal class TokenLookup */
@@ -83,36 +90,28 @@ private:
 	size_t start;
 
 	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
+	 * Position at which this token starts in the TokenizedData instance.
 	 */
-	size_t textLength;
-
-	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
-	 */
-	size_t textEnd;
+	size_t dataStartOffset;
 
 public:
 	/**
 	 * Constructor of the TokenLookup class.
 	 *
 	 * @param node is the current node.
-	 * @param start is the start position.
-	 * @param textLength is the text buffer length of the previous text token.
-	 * @param textEnd is the current end location of the previous text token.
+	 * @param start is the start position in the source file.
+	 * @param dataStartOffset is the current length of the TokenizedData buffer.
 	 */
-	TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength,
-	            size_t textEnd)
-	    : node(node), start(start), textLength(textLength), textEnd(textEnd)
+	TokenLookup(const TokenTrie::Node *node, size_t start,
+	            size_t dataStartOffset)
+	    : node(node), start(start), dataStartOffset(dataStartOffset)
 	{
 	}
 
 	/**
 	 * Tries to extend the current path in the token trie with the given
-	 * character. If a complete token is matched, stores this match in the
-	 * tokens list (in case it is longer than any previous token).
+	 * character. If a complete token is matched, stores the match in the given
+	 * TokenMatch reference and returns true.
 	 *
 	 * @param c is the character that should be appended to the current prefix.
 	 * @param lookups is a list to which new TokeLookup instances are added --
@@ -123,73 +122,49 @@ public:
 	 * Tokenizer.
 	 * @param end is the end byte offset of the current character.
 	 * @param sourceId is the source if of this file.
+	 * @return true if a token was matched, false otherwise.
 	 */
-	void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
-	             const std::vector<std::string> &tokens, SourceOffset end,
-	             SourceId sourceId)
+	bool advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
+	             const std::vector<Tokenizer::TokenDescriptor> &tokens,
+	             SourceOffset end, SourceId sourceId)
 	{
-		// Check whether we can continue the current token path with the given
-		// character without visiting an already visited node
+		// Set to true once a token has been matched
+		bool res = false;
+
+		// Check whether we can continue the current token path, if not, abort
 		auto it = node->children.find(c);
 		if (it == node->children.end()) {
-			return;
+			return res;
 		}
 
 		// Check whether the new node represents a complete token a whether it
 		// is longer than the current token. If yes, replace the current token.
 		node = it->second.get();
-		if (node->type != Tokens::Empty) {
-			const std::string &str = tokens[node->type];
-			size_t len = str.size();
-			if (len > match.token.content.size()) {
-				match.token =
-				    Token{node->type, str, {sourceId, start, end}};
-				match.textLength = textLength;
-				match.textEnd = textEnd;
-			}
+		if (node->id != Tokens::Empty) {
+			const Tokenizer::TokenDescriptor &descr = tokens[node->id];
+			match.token = Token(node->id, descr.string,
+			                    SourceLocation(sourceId, start, end));
+			match.dataStartOffset = dataStartOffset;
+			match.primary = descr.primary;
+			res = true;
 		}
 
 		// If this state can possibly be advanced, store it in the states list.
 		if (!node->children.empty()) {
 			lookups.emplace_back(*this);
 		}
+		return res;
 	}
 };
 
-/**
- * Transforms the given token into a data token containing the extracted
- * text.
- *
- * @param handler is the WhitespaceHandler containing the collected data.
- * @param token is the output token to which the text should be written.
- * @param sourceId is the source id of the underlying file.
- */
-static void buildDataToken(const WhitespaceHandler &handler, TokenMatch &match,
-                           SourceId sourceId)
-{
-	if (match.hasMatch()) {
-		match.token.content =
-		    std::string{handler.textBuf.data(), match.textLength};
-		match.token.location =
-		    SourceLocation{sourceId, handler.textStart, match.textEnd};
-	} else {
-		match.token.content = handler.toString();
-		match.token.location =
-		    SourceLocation{sourceId, handler.textStart, handler.textEnd};
-	}
-	match.token.id = Tokens::Data;
-}
 }
 
 /* Class Tokenizer */
 
-Tokenizer::Tokenizer(WhitespaceMode whitespaceMode)
-    : whitespaceMode(whitespaceMode), nextTokenId(0)
-{
-}
+Tokenizer::Tokenizer() : nextTokenId(0) {}
 
-template <typename TextHandler, bool read>
-bool Tokenizer::next(CharReader &reader, Token &token)
+template <bool read>
+bool Tokenizer::next(CharReader &reader, Token &token, TokenizedData &data)
 {
 	// If we're in the read mode, reset the char reader peek position to the
 	// current read position
@@ -199,43 +174,62 @@ bool Tokenizer::next(CharReader &reader, Token &token)
 
 	// Prepare the lookups in the token trie
 	const TokenTrie::Node *root = trie.getRoot();
-	TokenMatch match;
+	TokenMatch bestMatch;
 	std::vector<TokenLookup> lookups;
 	std::vector<TokenLookup> nextLookups;
 
-	// Instantiate the text handler
-	TextHandler textHandler;
-
 	// Peek characters from the reader and try to advance the current token tree
 	// cursor
 	char c;
+	const size_t initialDataSize = data.size();
 	size_t charStart = reader.getPeekOffset();
 	const SourceId sourceId = reader.getSourceId();
 	while (reader.peek(c)) {
 		const size_t charEnd = reader.getPeekOffset();
-		const size_t textLength = textHandler.textBuf.size();
-		const size_t textEnd = textHandler.textEnd;
+		const size_t dataStartOffset = data.size();
 
 		// If we do not have a match yet, start a new lookup from the root
-		if (!match.hasMatch()) {
-			TokenLookup{root, charStart, textLength, textEnd}.advance(
-			    c, nextLookups, match, tokens, charEnd, sourceId);
+		if (!bestMatch.hasMatch()) {
+			lookups.emplace_back(root, charStart, dataStartOffset);
 		}
 
 		// Try to advance all other lookups with the new character
+		TokenMatch match;
 		for (TokenLookup &lookup : lookups) {
-			lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId);
+			// Continue if the current lookup
+			if (!lookup.advance(c, nextLookups, match, tokens, charEnd,
+			                    sourceId)) {
+				continue;
+			}
+
+			// If the matched token is primary, check whether it is better than
+			// the current best match, if yes, replace the best match. In any
+			// case just continue
+			if (match.primary) {
+				if (match.size() > bestMatch.size()) {
+					bestMatch = match;
+				}
+				continue;
+			}
+
+			// Otherwise -- if the matched token is a non-primary token (and no
+			// primary token has been found until now) -- mark the match in the
+			// TokenizedData
+			if (!bestMatch.hasMatch()) {
+				data.mark(match.token.id, data.size() - match.size() + 1,
+				          match.size());
+			}
 		}
 
 		// We have found a token and there are no more states to advance or the
 		// text handler has found something -- abort to return the new token
-		if (match.hasMatch()) {
-			if ((nextLookups.empty() || textHandler.hasText())) {
+		if (bestMatch.hasMatch()) {
+			if ((nextLookups.empty() || data.size() > initialDataSize)) {
 				break;
 			}
 		} else {
 			// Record all incomming characters
-			textHandler.append(c, charStart, charEnd);
+			data.append(c, charStart, charEnd);
 		}
 
 		// Swap the lookups and the nextLookups list
@@ -246,60 +240,53 @@ bool Tokenizer::next(CharReader &reader, Token &token)
 		charStart = charEnd;
 	}
 
-	// If we found text, emit that text
-	if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) {
-		buildDataToken(textHandler, match, sourceId);
+	// If we found data, emit a corresponding data token
+	if (data.size() > initialDataSize &&
+	    (!bestMatch.hasMatch() ||
+	     bestMatch.dataStartOffset > initialDataSize)) {
+		// If we have a "bestMatch" wich starts after text data has started,
+		// trim the TokenizedData to this offset
+		if (bestMatch.dataStartOffset > initialDataSize) {
+			data.trim(bestMatch.dataStartOffset);
+		}
+
+		// Create a token containing the data location
+		bestMatch.token = Token{data.getLocation()};
 	}
 
 	// Move the read/peek cursor to the end of the token, abort if an error
 	// happens while doing so
-	if (match.hasMatch()) {
+	if (bestMatch.hasMatch()) {
 		// Make sure we have a valid location
-		if (match.token.location.getEnd() == InvalidSourceOffset) {
+		if (bestMatch.token.location.getEnd() == InvalidSourceOffset) {
 			throw OusiaException{"Token end position offset out of range"};
 		}
 
 		// Seek to the end of the current token
-		const size_t end = match.token.location.getEnd();
+		const size_t end = bestMatch.token.location.getEnd();
 		if (read) {
 			reader.seek(end);
 		} else {
 			reader.seekPeekCursor(end);
 		}
-		token = match.token;
+		token = bestMatch.token;
 	} else {
 		token = Token{};
 	}
-	return match.hasMatch();
+	return bestMatch.hasMatch();
 }
 
-bool Tokenizer::read(CharReader &reader, Token &token)
+bool Tokenizer::read(CharReader &reader, Token &token, TokenizedData &data)
 {
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingWhitespaceHandler, true>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingWhitespaceHandler, true>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingWhitespaceHandler, true>(reader, token);
-	}
-	return false;
+	return next<true>(reader, token, data);
 }
 
-bool Tokenizer::peek(CharReader &reader, Token &token)
+bool Tokenizer::peek(CharReader &reader, Token &token, TokenizedData &data)
 {
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingWhitespaceHandler, false>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingWhitespaceHandler, false>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingWhitespaceHandler, false>(reader, token);
-	}
-	return false;
+	return next<false>(reader, token, data);
 }
 
-TokenId Tokenizer::registerToken(const std::string &token)
+TokenId Tokenizer::registerToken(const std::string &token, bool primary)
 {
 	// Abort if an empty token should be registered
 	if (token.empty()) {
@@ -309,8 +296,8 @@ TokenId Tokenizer::registerToken(const std::string &token)
 	// Search for a new slot in the tokens list
 	TokenId type = Tokens::Empty;
 	for (size_t i = nextTokenId; i < tokens.size(); i++) {
-		if (tokens[i].empty()) {
-			tokens[i] = token;
+		if (!tokens[i].valid()) {
+			tokens[i] = TokenDescriptor(token, primary);
 			type = i;
 			break;
 		}
@@ -320,62 +307,47 @@ TokenId Tokenizer::registerToken(const std::string &token)
 	// override the special token type handles
 	if (type == Tokens::Empty) {
 		type = tokens.size();
-		if (type == Tokens::Data || type == Tokens::Empty) {
+		if (type >= Tokens::MaxTokenId) {
 			throw OusiaException{"Token type ids depleted!"};
 		}
-		tokens.emplace_back(token);
+		tokens.emplace_back(token, primary);
 	}
 	nextTokenId = type + 1;
 
-	// Try to register the token in the trie -- if this fails, remove it
-	// from the tokens list
+	// Try to register the token in the trie -- if this fails, remove it from
+	// the tokens list
 	if (!trie.registerToken(token, type)) {
-		tokens[type] = std::string{};
+		tokens[type] = TokenDescriptor();
 		nextTokenId = type;
 		return Tokens::Empty;
 	}
 	return type;
 }
 
-bool Tokenizer::unregisterToken(TokenId type)
+bool Tokenizer::unregisterToken(TokenId id)
 {
 	// Unregister the token from the trie, abort if an invalid type is given
-	if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
-		tokens[type] = std::string{};
-		nextTokenId = type;
+	if (id < tokens.size() && trie.unregisterToken(tokens[id].string)) {
+		tokens[id] = TokenDescriptor();
+		nextTokenId = id;
 		return true;
 	}
 	return false;
 }
 
-std::string Tokenizer::getTokenString(TokenId type)
-{
-	if (type < tokens.size()) {
-		return tokens[type];
-	}
-	return std::string{};
-}
+static Tokenizer::TokenDescriptor EmptyTokenDescriptor;
 
-void Tokenizer::setWhitespaceMode(WhitespaceMode mode)
+const Tokenizer::TokenDescriptor &Tokenizer::lookupToken(TokenId id) const
 {
-	whitespaceMode = mode;
+	if (id < tokens.size()) {
+		return tokens[id];
+	}
+	return EmptyTokenDescriptor;
 }
 
-WhitespaceMode Tokenizer::getWhitespaceMode() { return whitespaceMode; }
-
 /* Explicitly instantiate all possible instantiations of the "next" member
    function */
-template bool Tokenizer::next<PreservingWhitespaceHandler, false>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<TrimmingWhitespaceHandler, false>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<CollapsingWhitespaceHandler, false>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<PreservingWhitespaceHandler, true>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<TrimmingWhitespaceHandler, true>(
-    CharReader &reader, Token &token);
-template bool Tokenizer::next<CollapsingWhitespaceHandler, true>(
-    CharReader &reader, Token &token);
+template bool Tokenizer::next<false>(CharReader &, Token &, TokenizedData &);
+template bool Tokenizer::next<true>(CharReader &, Token &, TokenizedData &);
 }
 
diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp
index f21c6a3..74e3f0d 100644
--- a/src/core/parser/utils/Tokenizer.hpp
+++ b/src/core/parser/utils/Tokenizer.hpp
@@ -19,8 +19,8 @@
 /**
  * @file Tokenizer.hpp
  *
- * Tokenizer that can be reconfigured at runtime used for parsing the plain
- * text format.
+ * Tokenizer that can be reconfigured at runtime and is used for parsing the
+ * plain text format.
  *
  * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
  */
@@ -28,44 +28,80 @@
 #ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_
 #define _OUSIA_DYNAMIC_TOKENIZER_HPP_
 
-#include <set>
+#include <cstdint>
 #include <string>
 #include <vector>
 
 #include <core/common/Location.hpp>
-#include <core/common/Whitespace.hpp>
+#include <core/common/Token.hpp>
 
-#include "Token.hpp"
 #include "TokenTrie.hpp"
 
 namespace ousia {
 
 // Forward declarations
 class CharReader;
+class TokenizedData;
 
 /**
  * The Tokenizer is used to extract tokens and chunks of text from a
- * CharReader. It allows to register and unregister tokens while parsing and
- * to modify the handling of whitespace characters. Note that the
- * Tokenizer always tries to extract the longest possible token from the
- * tokenizer.
+ * CharReader. It allows to register and unregister tokens while parsing. Note
+ * that the Tokenizer always tries to extract the longest possible token from
+ * the tokenizer. Tokens can be registered as primary or non-primary token. If
+ * a Token is registered as a primary token, it is returned as a single Token
+ * instance if it occurs. In the non-primary case the token is returned as part
+ * of a segmented TokenizedData instance.
  */
 class Tokenizer {
-private:
+public:
 	/**
-	 * Internally used token trie. This object holds all registered tokens.
+	 * Internally used structure describing a registered token.
 	 */
-	TokenTrie trie;
+	struct TokenDescriptor {
+		/**
+		 * String describing the token.
+		 */
+		std::string string;
+
+		/**
+		 * Set to true if this token is primary.
+		 */
+		bool primary;
+
+		/**
+		 * Constructor of the TokenDescriptor class.
+		 *
+		 * @param string is the string representation of the registered token.
+		 * @param primary specifies whether the token is a primary token that
+		 * should be returned as a single token, or a secondary token, that
+		 * should be returned as part of TokenizedData.
+		 */
+		TokenDescriptor(const std::string &string, bool primary)
+		    : string(string), primary(primary)
+		{
+		}
+
+		/**
+		 * Default constructor.
+		 */
+		TokenDescriptor() : primary(false) {}
+
+		/**
+		 * Returns true if the TokenDescriptor represents a valid token.
+		 */
+		bool valid() { return !string.empty(); }
+	};
 
+private:
 	/**
-	 * Flag defining whether whitespaces should be preserved or not.
+	 * Internally used token trie. This object holds all registered tokens.
 	 */
-	WhitespaceMode whitespaceMode;
+	TokenTrie trie;
 
 	/**
 	 * Vector containing all registered token types.
 	 */
-	std::vector<std::string> tokens;
+	std::vector<TokenDescriptor> tokens;
 
 	/**
 	 * Next index in the tokens list where to search for a new token id.
@@ -74,90 +110,78 @@ private:
 
 	/**
 	 * Templated function used internally to read the current token. The
-	 * function is templated in order to force code generation for all six
-	 * combiations of whitespace modes and reading/peeking.
+	 * function is templated in order to force optimized code generation for
+	 * both reading and peeking.
 	 *
-	 * @tparam TextHandler is the type to be used for the textHandler instance.
-	 * @tparam read specifies whether the function should start from and advance
-	 * the read pointer of the char reader.
+	 * @tparam read specifies whether the method should read the token or just
+	 * peek.
 	 * @param reader is the CharReader instance from which the data should be
 	 * read.
 	 * @param token is the token structure into which the token information
 	 * should be written.
+	 * @param data is a reference at the TokenizedData instance to which the
+	 * token information should be appended.
 	 * @return false if the end of the stream has been reached, true otherwise.
 	 */
-	template <typename TextHandler, bool read>
-	bool next(CharReader &reader, Token &token);
+	template <bool read>
+	bool next(CharReader &reader, Token &token, TokenizedData &data);
 
 public:
 	/**
 	 * Constructor of the Tokenizer class.
-	 *
-	 * @param whitespaceMode specifies how whitespace should be handled.
 	 */
-	Tokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
+	Tokenizer();
 
 	/**
-	 * Registers the given string as a token. Returns a const pointer at a
-	 * TokenDescriptor that will be used to reference the newly created token.
+	 * Registers the given string as a token. Returns a unique identifier
+	 * describing the registered token.
 	 *
 	 * @param token is the token string that should be registered.
-	 * @return a unique identifier for the registered token or EmptyToken if
+	 * @param primary specifies whether the token is a primary token -- if true,
+	 * the token will be returned as a single, standalone token. Otherwise the
+	 * token will be returned as part of a "TokenizedData" structure.
+	 * @return a unique identifier for the registered token or Tokens::Empty if
 	 * an error occured.
 	 */
-	TokenId registerToken(const std::string &token);
+	TokenId registerToken(const std::string &token, bool primary = true);
 
 	/**
 	 * Unregisters the token belonging to the given TokenId.
 	 *
 	 * @param type is the token type that should be unregistered. The
-	 *TokenId
-	 * must have been returned by registerToken.
+	 * TokenId must have been returned by registerToken.
 	 * @return true if the operation was successful, false otherwise (e.g.
-	 * because the given TokenDescriptor was already unregistered).
+	 * because the token with the given TokenId was already unregistered).
 	 */
-	bool unregisterToken(TokenId type);
+	bool unregisterToken(TokenId id);
 
 	/**
 	 * Returns the token that was registered under the given TokenId id or
-	 *an
-	 * empty string if an invalid TokenId id is given.
+	 * an empty string if an invalid TokenId id is given.
 	 *
-	 * @param type is the TokenId id for which the corresponding token
-	 *string
+	 * @param id is the TokenId for which the corresponding TokenDescriptor
 	 * should be returned.
-	 * @return the registered token string or an empty string if the given type
-	 * was invalid.
-	 */
-	std::string getTokenString(TokenId type);
-
-	/**
-	 * Sets the whitespace mode.
-	 *
-	 * @param whitespaceMode defines how whitespace should be treated in text
-	 * tokens.
-	 */
-	void setWhitespaceMode(WhitespaceMode mode);
-
-	/**
-	 * Returns the current value of the whitespace mode.
-	 *
-	 * @return the whitespace mode.
+	 * @return the registered TokenDescriptor or an invalid TokenDescriptor if
+	 * the given TokenId is invalid.
 	 */
-	WhitespaceMode getWhitespaceMode();
+	const TokenDescriptor& lookupToken(TokenId id) const;
 
 	/**
 	 * Reads a new token from the CharReader and stores it in the given
-	 * Token instance.
+	 * Token instance. If the token has the id Tokens::Data, use the "getData"
+	 * method to fetch a reference at the underlying TokenizedData instance
+	 * storing the data.
 	 *
 	 * @param reader is the CharReader instance from which the data should be
 	 * read.
 	 * @param token is a reference at the token instance into which the Token
 	 * information should be written.
+	 * @param data is a reference at the TokenizedData instance to which the
+	 * token information should be appended.
 	 * @return true if a token could be read, false if the end of the stream
 	 * has been reached.
 	 */
-	bool read(CharReader &reader, Token &token);
+	bool read(CharReader &reader, Token &token, TokenizedData &data);
 
 	/**
 	 * The peek method does not advance the read position of the char reader,
@@ -167,10 +191,12 @@ public:
 	 * read.
 	 * @param token is a reference at the token instance into which the Token
 	 * information should be written.
+	 * @param data is a reference at the TokenizedData instance to which the
+	 * token information should be appended.
 	 * @return true if a token could be read, false if the end of the stream
 	 * has been reached.
 	 */
-	bool peek(CharReader &reader, Token &token);
+	bool peek(CharReader &reader, Token &token, TokenizedData &data);
 };
 }
 
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index f61ac7d..d4cdbf8 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -94,92 +94,11 @@ public:
 
 static const PlainFormatTokens OsmlTokens;
 
-/**
- * Class used internally to collect data issued via "DATA" event.
- */
-class DataHandler {
-private:
-	/**
-	 * Internal character buffer.
-	 */
-	std::vector<char> buf;
-
-	/**
-	 * Start location of the character data.
-	 */
-	SourceOffset start;
-
-	/**
-	 * End location of the character data.
-	 */
-	SourceOffset end;
-
-public:
-	/**
-	 * Default constructor, initializes start and end with zeros.
-	 */
-	DataHandler() : start(0), end(0) {}
-
-	/**
-	 * Returns true if the internal buffer is empty.
-	 *
-	 * @return true if no characters were added to the internal buffer, false
-	 * otherwise.
-	 */
-	bool isEmpty() { return buf.empty(); }
-
-	/**
-	 * Appends a single character to the internal buffer.
-	 *
-	 * @param c is the character that should be added to the internal buffer.
-	 * @param charStart is the start position of the character.
-	 * @param charEnd is the end position of the character.
-	 */
-	void append(char c, SourceOffset charStart, SourceOffset charEnd)
-	{
-		if (isEmpty()) {
-			start = charStart;
-		}
-		buf.push_back(c);
-		end = charEnd;
-	}
-
-	/**
-	 * Appends a string to the internal buffer.
-	 *
-	 * @param s is the string that should be added to the internal buffer.
-	 * @param stringStart is the start position of the string.
-	 * @param stringEnd is the end position of the string.
-	 */
-	void append(const std::string &s, SourceOffset stringStart,
-	            SourceOffset stringEnd)
-	{
-		if (isEmpty()) {
-			start = stringStart;
-		}
-		std::copy(s.c_str(), s.c_str() + s.size(), back_inserter(buf));
-		end = stringEnd;
-	}
-
-	/**
-	 * Converts the internal buffer to a variant with attached location
-	 * information.
-	 *
-	 * @param sourceId is the source id which is needed for building the
-	 * location information.
-	 * @return a Variant with the internal buffer content as string and
-	 * the correct start and end location.
-	 */
-	Variant toVariant(SourceId sourceId)
-	{
-		Variant res = Variant::fromString(std::string(buf.data(), buf.size()));
-		res.setLocation({sourceId, start, end});
-		return res;
-	}
-};
-
 OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
-    : reader(reader), logger(logger), tokenizer(OsmlTokens)
+    : reader(reader),
+      logger(logger),
+      tokenizer(OsmlTokens),
+      data(reader.getSourceId())
 {
 	// Place an intial command representing the complete file on the stack
 	commands.push(Command{"", Variant::mapType{}, true, true, true, false});
@@ -188,7 +107,7 @@ OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
 Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 {
 	bool first = true;
-	bool hasCharSiceNSSep = false;
+	bool hasCharSinceNSSep = false;
 	std::vector<char> identifier;
 	size_t end = reader.getPeekOffset();
 	char c, c2;
@@ -197,7 +116,7 @@ Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 		if ((first && Utils::isIdentifierStartCharacter(c)) ||
 		    (!first && Utils::isIdentifierCharacter(c))) {
 			identifier.push_back(c);
-		} else if (c == ':' && hasCharSiceNSSep && reader.fetchPeek(c2) &&
+		} else if (c == ':' && hasCharSinceNSSep && reader.fetchPeek(c2) &&
 		           Utils::isIdentifierStartCharacter(c2)) {
 			identifier.push_back(c);
 		} else {
@@ -214,8 +133,8 @@ Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 		// This is no longer the first character
 		first = false;
 
-		// Advance the hasCharSiceNSSep flag
-		hasCharSiceNSSep = allowNSSep && (c != ':');
+		// Advance the hasCharSinceNSSep flag
+		hasCharSinceNSSep = allowNSSep && (c != ':');
 
 		end = reader.getPeekOffset();
 		reader.consumePeek();
@@ -488,7 +407,10 @@ void OsmlStreamParser::parseBlockComment()
 {
 	Token token;
 	size_t depth = 1;
-	while (tokenizer.read(reader, token)) {
+	while (tokenizer.read(reader, token, data)) {
+		// Throw the comment data away
+		data.clear();
+
 		if (token.id == OsmlTokens.BlockCommentEnd) {
 			depth--;
 			if (depth == 0) {
@@ -514,10 +436,9 @@ void OsmlStreamParser::parseLineComment()
 	}
 }
 
-bool OsmlStreamParser::checkIssueData(DataHandler &handler)
+bool OsmlStreamParser::checkIssueData()
 {
-	if (!handler.isEmpty()) {
-		data = handler.toVariant(reader.getSourceId());
+	if (!data.empty()) {
 		location = data.getLocation();
 		reader.resetPeek();
 		return true;
@@ -575,12 +496,12 @@ bool OsmlStreamParser::closeField()
 
 OsmlStreamParser::State OsmlStreamParser::parse()
 {
-	// Handler for incomming data
-	DataHandler handler;
+	// Reset the data handler
+	data.clear();
 
 	// Read tokens until the outer loop should be left
 	Token token;
-	while (tokenizer.peek(reader, token)) {
+	while (tokenizer.peek(reader, token, data)) {
 		const TokenId type = token.id;
 
 		// Special handling for Backslash and Text
@@ -606,7 +527,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			// Try to parse a command
 			if (Utils::isIdentifierStartCharacter(c)) {
 				// Make sure to issue any data before it is to late
-				if (checkIssueData(handler)) {
+				if (checkIssueData()) {
 					return State::DATA;
 				}
 
@@ -633,12 +554,11 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			// If this was an annotation start token, add the parsed < to the
 			// output
 			if (type == OsmlTokens.AnnotationStart) {
-				handler.append('<', token.location.getStart(),
-				               token.location.getStart() + 1);
+				data.append('<', token.location.getStart(),
+				            token.location.getStart() + 1);
 			}
 
-			handler.append(c, token.location.getStart(),
-			               reader.getPeekOffset());
+			data.append(c, token.location.getStart(), reader.getPeekOffset());
 			reader.consumePeek();
 			continue;
 		} else if (type == Tokens::Data) {
@@ -647,18 +567,13 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 				location = token.location;
 				return State::FIELD_START;
 			}
-
-			// Append the text to the data handler
-			handler.append(token.content, token.location.getStart(),
-			               token.location.getEnd());
-
 			reader.consumePeek();
 			continue;
 		}
 
 		// A non-text token was reached, make sure all pending data commands
 		// have been issued
-		if (checkIssueData(handler)) {
+		if (checkIssueData()) {
 			return State::DATA;
 		}
 
@@ -676,34 +591,36 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			Command &cmd = commands.top();
 			if (!cmd.inField) {
 				cmd.inField = true;
-				return State::FIELD_START;
 			}
-			logger.error(
+			return State::FIELD_START;
+/*			logger.error(
 			    "Got field start token \"{\", but no command for which to "
 			    "start the field. Write \"\\{\" to insert this sequence as "
 			    "text.",
-			    token);
+			    token);*/
 		} else if (token.id == OsmlTokens.FieldEnd) {
-			if (closeField()) {
+			closeField();
+			return State::FIELD_END;
+/*			if (closeField()) {
 				return State::FIELD_END;
 			}
 			logger.error(
 			    "Got field end token \"}\", but there is no field to end. "
 			    "Write \"\\}\" to insert this sequence as text.",
-			    token);
+			    token);*/
 		} else if (token.id == OsmlTokens.DefaultFieldStart) {
 			// Try to start a default field the first time the token is reached
 			Command &topCmd = commands.top();
 			if (!topCmd.inField) {
 				topCmd.inField = true;
 				topCmd.inDefaultField = true;
-				return State::FIELD_START;
 			}
-			logger.error(
+			return State::FIELD_START;
+/*			logger.error(
 			    "Got default field start token \"{!\", but no command for "
 			    "which to start the field. Write \"\\{!\" to insert this "
 			    "sequence as text",
-			    token);
+			    token);*/
 		} else if (token.id == OsmlTokens.AnnotationEnd) {
 			// We got a single annotation end token "\>" -- simply issue the
 			// ANNOTATION_END event
@@ -717,7 +634,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 	}
 
 	// Issue available data
-	if (checkIssueData(handler)) {
+	if (checkIssueData()) {
 		return State::DATA;
 	}
 
@@ -737,6 +654,14 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 	return State::END;
 }
 
+Variant OsmlStreamParser::getText(WhitespaceMode mode)
+{
+	TokenizedData dataFork = data;
+	Variant text = dataFork.text(mode);
+	location = text.getLocation();
+	return text;
+}
+
 const Variant &OsmlStreamParser::getCommandName() const
 {
 	return commands.top().name;
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index dc3034c..453a2bb 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -29,17 +29,19 @@
 #ifndef _OUSIA_OSML_STREAM_PARSER_HPP_
 #define _OUSIA_OSML_STREAM_PARSER_HPP_
 
-#include <stack>
+#include <memory>
 
 #include <core/common/Variant.hpp>
+#include <core/common/Whitespace.hpp>
 #include <core/parser/utils/Tokenizer.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 
 namespace ousia {
 
 // Forward declarations
 class CharReader;
 class Logger;
-class DataHandler;
+class OsmlStreamParserImpl;
 
 /**
  * The OsmlStreamParser class provides a low-level reader for the TeX-esque osml
@@ -137,26 +139,15 @@ public:
 		Variant arguments;
 
 		/**
-		 * Set to true if this is a command with clear begin and end.
-		 */
-		bool hasRange : 1;
-
-		/**
-		 * Set to true if we are currently inside a field of this command.
-		 */
-		bool inField : 1;
-
-		/**
-		 * Set to true if we are currently in the range field of the command
-		 * (implies inField being set to true).
+		 * Vector used as stack for holding the number of opening/closing braces
+		 * and the corresponding "isDefaultField" flag.
 		 */
-		bool inRangeField : 1;
+		std::vector<bool> fields;
 
 		/**
-		 * Set to true if we are currently in a field that has been especially
-		 * marked as default field (using the "|") syntax.
+		 * Set to true if this is a command with clear begin and end.
 		 */
-		bool inDefaultField : 1;
+		bool hasRange;
 
 		/**
 		 * Default constructor.
@@ -164,7 +155,6 @@ public:
 		Command()
 		    : hasRange(false),
 		      inField(false),
-		      inRangeField(false),
 		      inDefaultField()
 		{
 		}
@@ -178,15 +168,10 @@ public:
 		 * command.
 		 * @param hasRange should be set to true if this is a command with
 		 * explicit range.
-		 * @param inField is set to true if we currently are inside a field
-		 * of this command.
-		 * @param inRangeField is set to true if we currently are inside the
-		 * outer field of a ranged command.
 		 * @param inDefaultField is set to true if we currently are in a
 		 * specially marked default field.
 		 */
-		Command(Variant name, Variant arguments, bool hasRange,
-		        bool inField, bool inRangeField, bool inDefaultField)
+		Command(Variant name, Variant arguments, bool hasRange)
 		    : name(std::move(name)),
 		      arguments(std::move(arguments)),
 		      hasRange(hasRange),
@@ -215,25 +200,20 @@ private:
 	Tokenizer tokenizer;
 
 	/**
-	 * Stack containing the current commands.
-	 */
-	std::stack<Command> commands;
-
-	/**
-	 * Variant containing the data that has been read (always is a string,
-	 * contains the exact location of the data in the source file).
+	 * Variant containing the tokenized data that was returned from the
+	 * tokenizer as data.
 	 */
-	Variant data;
+	TokenizedData data;
 
 	/**
-	 * Contains the location of the last token.
+	 * Stack containing the current commands.
 	 */
-	SourceLocation location;
+	std::stack<Command> commands;
 
 	/**
-	 * Contains the field index of the current command.
+	 * Pointer at 
 	 */
-	size_t fieldIdx;
+	std::unique_ptr<OsmlStreamParserImpl> impl;
 
 	/**
 	 * Function used internall to parse an identifier.
@@ -291,12 +271,10 @@ private:
 	/**
 	 * Checks whether there is any data pending to be issued, if yes, issues it.
 	 *
-	 * @param handler is the data handler that contains the data that may be
-	 * returned to the user.
 	 * @return true if there was any data and DATA should be returned by the
 	 * parse function, false otherwise.
 	 */
-	bool checkIssueData(DataHandler &handler);
+	bool checkIssueData();
 
 	/**
 	 * Called before any data is appended to the internal data handler. Checks
@@ -327,6 +305,12 @@ public:
 	 */
 	OsmlStreamParser(CharReader &reader, Logger &logger);
 
+	/**
+	 * Destructor of the OsmlStreamParser, needed to destroy the incomplete
+	 * OsmlStreamParserImpl.
+	 */
+	~OsmlStreamParser();
+
 	/**
 	 * Continues parsing. Returns one of the states defined in the State enum.
 	 * Callers should stop once the State::END state is reached. Use the getter
@@ -344,7 +328,19 @@ public:
 	 * @return a reference at a variant containing the data parsed by the
 	 * "parse" function.
 	 */
-	const Variant &getData() const { return data; }
+	const TokenizedData &getData() const { return data; }
+
+	/**
+	 * Returns the complete content of the internal TokenizedData instance as
+	 * a single string Variant. This method is mainly used in the unit tests for
+	 * this class, it simply calls the text() method of TokenizedData.
+	 *
+	 * @param mode is the WhitespaceMode that should be used for returning the
+	 * text.
+	 * @return a string variant containing the text content of the internal
+	 * TokenizedData instance or a nullptr variant if there is no text.
+	 */
+	Variant getText(WhitespaceMode mode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * Returns a reference at the internally stored command name. Only valid if
@@ -371,13 +367,6 @@ public:
 	 * syntax).
 	 */
 	bool inDefaultField() const;
-
-	/**
-	 * Returns a reference at the char reader.
-	 *
-	 * @return the last internal token location.
-	 */
-	const SourceLocation &getLocation() const { return location; }
 };
 }
 
diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
index c9254b0..855f80d 100644
--- a/src/formats/osxml/OsxmlEventParser.cpp
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -25,7 +25,6 @@
 #include <core/common/Variant.hpp>
 #include <core/common/VariantReader.hpp>
 #include <core/common/Utils.hpp>
-#include <core/common/WhitespaceHandler.hpp>
 
 #include "OsxmlAttributeLocator.hpp"
 #include "OsxmlEventParser.hpp"
@@ -56,17 +55,6 @@ public:
 	 */
 	std::vector<char> textBuf;
 
-	/**
-	 * Current whitespace buffer (for the trimming whitspace mode)
-	 */
-	std::vector<char> whitespaceBuf;
-
-	/**
-	 * Flag indicating whether a whitespace character was present (for the
-	 * collapsing whitespace mode).
-	 */
-	bool hasWhitespace;
-
 	/**
 	 * Current character data start.
 	 */
@@ -394,33 +382,17 @@ static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len)
 	SourceLocation loc = xmlSyncLoggerPosition(p, ulen);
 
 	// Fetch some variables for convenience
-	const WhitespaceMode mode = parser->getWhitespaceMode();
 	OsxmlEventParserData &data = parser->getData();
 	std::vector<char> &textBuf = data.textBuf;
-	std::vector<char> &whitespaceBuf = data.whitespaceBuf;
-	bool &hasWhitespace = data.hasWhitespace;
-	size_t &textStart = data.textStart;
-	size_t &textEnd = data.textEnd;
-
-	size_t pos = loc.getStart();
-	for (size_t i = 0; i < ulen; i++, pos++) {
-		switch (mode) {
-			case WhitespaceMode::PRESERVE:
-				PreservingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
-				                                    textStart, textEnd);
-				break;
-			case WhitespaceMode::TRIM:
-				TrimmingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
-				                                  textStart, textEnd,
-				                                  whitespaceBuf);
-				break;
-			case WhitespaceMode::COLLAPSE:
-				CollapsingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
-				                                    textStart, textEnd,
-				                                    hasWhitespace);
-				break;
-		}
+
+	// Update start and end position
+	if (textBuf.empty()) {
+		data.textStart = loc.getStart();
 	}
+	data.textEnd = loc.getEnd();
+
+	// Insert the data into the text buffer
+	textBuf.insert(textBuf.end(), &s[0], &s[ulen]);
 }
 
 /* Class OsxmlEvents */
@@ -430,11 +402,7 @@ OsxmlEvents::~OsxmlEvents() {}
 /* Class OsxmlEventParser */
 
 OsxmlEventParserData::OsxmlEventParserData()
-    : depth(0),
-      annotationEndTagDepth(-1),
-      hasWhitespace(false),
-      textStart(0),
-      textEnd(0)
+    : depth(0), annotationEndTagDepth(-1), textStart(0), textEnd(0)
 {
 }
 
@@ -466,8 +434,6 @@ Variant OsxmlEventParserData::getText(SourceId sourceId)
 
 	// Reset the text buffers
 	textBuf.clear();
-	whitespaceBuf.clear();
-	hasWhitespace = false;
 	textStart = 0;
 	textEnd = 0;
 
@@ -482,7 +448,6 @@ OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events,
     : reader(reader),
       events(events),
       logger(logger),
-      whitespaceMode(WhitespaceMode::COLLAPSE),
       data(new OsxmlEventParserData())
 {
 }
@@ -532,16 +497,6 @@ void OsxmlEventParser::parse()
 	}
 }
 
-void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode)
-{
-	this->whitespaceMode = whitespaceMode;
-}
-
-WhitespaceMode OsxmlEventParser::getWhitespaceMode() const
-{
-	return whitespaceMode;
-}
-
 CharReader &OsxmlEventParser::getReader() const { return reader; }
 
 Logger &OsxmlEventParser::getLogger() const { return logger; }
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
index e39245f..e3fd5d4 100644
--- a/src/formats/osxml/OsxmlEventParser.hpp
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -32,8 +32,6 @@
 #include <memory>
 #include <string>
 
-#include <core/common/Whitespace.hpp>
-
 namespace ousia {
 
 // Forward declarations
@@ -99,13 +97,10 @@ public:
 	virtual void fieldEnd() = 0;
 
 	/**
-	 * Called whenever data is found. Whitespace data is handled as specified
-	 * and the data has been parsed to the specified variant type. This function
-	 * is not called if the parsing failed, the parser prints an error message
-	 * instead.
+	 * Called whenever string data is found.
 	 *
-	 * @param data is the already parsed data that should be passed to the
-	 * handler.
+	 * @param data is a Variant containing the string data that was found in the
+	 * XML file.
 	 */
 	virtual void data(const Variant &data) = 0;
 };
@@ -134,11 +129,6 @@ private:
 	 */
 	Logger &logger;
 
-	/**
-	 * Current whitespace mode.
-	 */
-	WhitespaceMode whitespaceMode;
-
 	/**
 	 * Data to be used by the internal functions.
 	 */
@@ -170,21 +160,6 @@ public:
 	 */
 	void parse();
 
-	/**
-	 * Sets the whitespace handling mode.
-	 *
-	 * @param whitespaceMode defines how whitespace in the data should be
-	 * handled.
-	 */
-	void setWhitespaceMode(WhitespaceMode whitespaceMode);
-
-	/**
-	 * Returns the current whitespace handling mode.
-	 *
-	 * @return the currently set whitespace handling mode.
-	 */
-	WhitespaceMode getWhitespaceMode() const;
-
 	/**
 	 * Returns the internal CharReader reference.
 	 *
diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp
index a93f14a..83966d5 100644
--- a/test/core/parser/stack/StackTest.cpp
+++ b/test/core/parser/stack/StackTest.cpp
@@ -24,6 +24,7 @@
 #include <core/parser/stack/Handler.hpp>
 #include <core/parser/stack/Stack.hpp>
 #include <core/parser/stack/State.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 
 #include <core/StandaloneEnvironment.hpp>
 
@@ -53,7 +54,7 @@ struct Tracker {
 	Variant::mapType annotationStartArgs;
 	Variant annotationEndClassName;
 	Variant annotationEndElementName;
-	Variant dataData;
+	TokenizedData dataData;
 
 	bool startResult;
 	bool fieldStartSetIsDefault;
@@ -81,7 +82,7 @@ struct Tracker {
 		annotationStartArgs = Variant::mapType{};
 		annotationEndClassName = Variant::fromString(std::string{});
 		annotationEndElementName = Variant::fromString(std::string{});
-		dataData = Variant::fromString(std::string{});
+		dataData = TokenizedData();
 
 		startResult = true;
 		fieldStartSetIsDefault = false;
@@ -157,7 +158,7 @@ public:
 		return tracker.annotationEndResult;
 	}
 
-	bool data(Variant &data) override
+	bool data(TokenizedData &data) override
 	{
 		tracker.dataCount++;
 		tracker.dataData = data;
@@ -363,7 +364,7 @@ TEST(Stack, multipleFields)
 
 		s.data("test");
 		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_EQ("test", tracker.dataData);
+		EXPECT_EQ("test", tracker.dataData.text().asString());
 
 		s.fieldEnd();
 		tracker.expect(1, 0, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
@@ -375,7 +376,7 @@ TEST(Stack, multipleFields)
 
 		s.data("test2");
 		tracker.expect(1, 0, 2, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_EQ("test2", tracker.dataData);
+		EXPECT_EQ("test2", tracker.dataData.text().asString());
 
 		s.fieldEnd();
 		tracker.expect(1, 0, 2, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
@@ -387,7 +388,7 @@ TEST(Stack, multipleFields)
 
 		s.data("test3");
 		tracker.expect(1, 0, 3, 2, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_EQ("test3", tracker.dataData);
+		EXPECT_EQ("test3", tracker.dataData.text().asString());
 
 		s.fieldEnd();
 		tracker.expect(1, 0, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
@@ -744,4 +745,4 @@ TEST(Stack, fieldAfterDefaultField)
 	ASSERT_FALSE(logger.hasError());
 }
 }
-}
\ No newline at end of file
+}
diff --git a/test/core/parser/utils/TokenizedDataTest.cpp b/test/core/parser/utils/TokenizedDataTest.cpp
index 231bad9..dfe2526 100644
--- a/test/core/parser/utils/TokenizedDataTest.cpp
+++ b/test/core/parser/utils/TokenizedDataTest.cpp
@@ -22,6 +22,43 @@
 
 namespace ousia {
 
+void assertToken(TokenizedDataReader &reader, TokenId id,
+                 const std::string &text, const TokenSet &tokens = TokenSet{},
+                 WhitespaceMode mode = WhitespaceMode::TRIM,
+                 SourceOffset start = InvalidSourceOffset,
+                 SourceOffset end = InvalidSourceOffset,
+                 SourceId sourceId = InvalidSourceId)
+{
+	Token token;
+	ASSERT_TRUE(reader.read(token, tokens, mode));
+	EXPECT_EQ(id, token.id);
+	EXPECT_EQ(text, token.content);
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, token.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, token.getLocation().getEnd());
+	}
+	EXPECT_EQ(sourceId, token.getLocation().getSourceId());
+}
+
+void assertText(TokenizedDataReader &reader, const std::string &text,
+                const TokenSet &tokens = TokenSet{},
+                WhitespaceMode mode = WhitespaceMode::TRIM,
+                SourceOffset start = InvalidSourceOffset,
+                SourceOffset end = InvalidSourceOffset,
+                SourceId id = InvalidSourceId)
+{
+	assertToken(reader, Tokens::Data, text, tokens, mode, start, end, id);
+}
+
+void assertEnd(TokenizedDataReader &reader)
+{
+	Token token;
+	ASSERT_TRUE(reader.atEnd());
+	ASSERT_FALSE(reader.read(token));
+}
+
 TEST(TokenizedData, dataWhitespacePreserve)
 {
 	TokenizedData data;
@@ -29,15 +66,10 @@ TEST(TokenizedData, dataWhitespacePreserve)
 	//                          0123456789012345
 	//                          0         1
 
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ(" test1   test2  ", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(16U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, " test1   test2  ", TokenSet{}, WhitespaceMode::PRESERVE,
+	           0, 16);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, dataWhitespaceTrim)
@@ -47,15 +79,10 @@ TEST(TokenizedData, dataWhitespaceTrim)
 	//                          0123456789012345
 	//                          0         1
 
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("test1   test2", token.content);
-	EXPECT_EQ(1U, token.getLocation().getStart());
-	EXPECT_EQ(14U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, "test1   test2", TokenSet{}, WhitespaceMode::TRIM, 1,
+	           14);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, dataWhitespaceCollapse)
@@ -65,15 +92,10 @@ TEST(TokenizedData, dataWhitespaceCollapse)
 	//                          0123456789012345
 	//                          0         1
 
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("test1 test2", token.content);
-	EXPECT_EQ(1U, token.getLocation().getStart());
-	EXPECT_EQ(14U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, "test1 test2", TokenSet{}, WhitespaceMode::COLLAPSE, 1,
+	           14);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, singleToken)
@@ -82,17 +104,9 @@ TEST(TokenizedData, singleToken)
 	ASSERT_EQ(2U, data.append("$$"));
 	data.mark(5, 0, 2);
 
-	data.enableToken(5);
-
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, singleDisabledToken)
@@ -101,15 +115,9 @@ TEST(TokenizedData, singleDisabledToken)
 	ASSERT_EQ(2U, data.append("$$"));
 	data.mark(5, 0, 2);
 
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, "$$", TokenSet{}, WhitespaceMode::COLLAPSE, 0, 2);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, dualToken)
@@ -120,18 +128,10 @@ TEST(TokenizedData, dualToken)
 	data.mark(5, 0, 2);
 	data.mark(6, 1, 1);
 
-	data.enableToken(5);
-	data.enableToken(6);
-
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, 5, "$$", TokenSet{5, 6}, WhitespaceMode::COLLAPSE, 0,
+	            2);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, dualTokenShorterEnabled)
@@ -142,385 +142,281 @@ TEST(TokenizedData, dualTokenShorterEnabled)
 	data.mark(5, 0, 2);
 	data.mark(6, 1, 1);
 
-	data.enableToken(6);
-
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(6U, token.id);
-	EXPECT_EQ("$", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(1U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(6U, token.id);
-	EXPECT_EQ("$", token.content);
-	EXPECT_EQ(1U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, 6, "$", TokenSet{6}, WhitespaceMode::COLLAPSE, 0, 1);
+	assertToken(reader, 6, "$", TokenSet{6}, WhitespaceMode::COLLAPSE, 1, 2);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, dualTokenLongerEnabled)
 {
 	TokenizedData data;
 	ASSERT_EQ(2U, data.append("$$"));
+	data.mark(6, 0, 1);
 	data.mark(5, 0, 2);
+	data.mark(6, 1, 1);
 
-	data.enableToken(5);
-
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, tokensAndDataPreserveWhitespace)
 {
 	TokenizedData data;
-	ASSERT_EQ(10U, data.append("$$ test $$"));
-	//                          0123456789
+	ASSERT_EQ(18U, data.append("$$ test    text $$"));
+	//                          012345678901234567
 	data.mark(5, 0, 2);
 	data.mark(5, 2);
 
-	data.enableToken(5);
-
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ(" test ", token.content);
-	EXPECT_EQ(2U, token.getLocation().getStart());
-	EXPECT_EQ(8U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(8U, token.getLocation().getStart());
-	EXPECT_EQ(10U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 0, 2);
+	assertText(reader, " test    text ", TokenSet{5}, WhitespaceMode::PRESERVE,
+	           2, 16);
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 16, 18);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, tokensAndDataTrimWhitespace)
 {
 	TokenizedData data;
-	ASSERT_EQ(10U, data.append("$$ test $$"));
-	//                          0123456789
+	ASSERT_EQ(18U, data.append("$$ test    text $$"));
+	//                          012345678901234567
 	data.mark(5, 0, 2);
 	data.mark(5, 2);
 
-	data.enableToken(5);
-
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("test", token.content);
-	EXPECT_EQ(3U, token.getLocation().getStart());
-	EXPECT_EQ(7U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(8U, token.getLocation().getStart());
-	EXPECT_EQ(10U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 0, 2);
+	assertText(reader, "test    text", TokenSet{5}, WhitespaceMode::TRIM, 3,
+	           15);
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 16, 18);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, tokensAndDataCollapseWhitespace)
 {
 	TokenizedData data;
-	ASSERT_EQ(10U, data.append("$$ test $$"));
-	//                          0123456789
+	ASSERT_EQ(18U, data.append("$$ test    text $$"));
+	//                          012345678901234567
 	data.mark(5, 0, 2);
 	data.mark(5, 2);
 
-	data.enableToken(5);
-
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("test", token.content);
-	EXPECT_EQ(3U, token.getLocation().getStart());
-	EXPECT_EQ(7U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(8U, token.getLocation().getStart());
-	EXPECT_EQ(10U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2);
+	assertText(reader, "test text", TokenSet{5}, WhitespaceMode::COLLAPSE, 3,
+	           15);
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 16, 18);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, tokensAndWhitespacePreserveWhitespace)
 {
 	TokenizedData data;
-	ASSERT_EQ(10U, data.append("$$      $$"));
-	//                          0123456789
+	ASSERT_EQ(8U, data.append("$$    $$"));
+	//                         01234567
 	data.mark(5, 0, 2);
 	data.mark(5, 2);
 
-	data.enableToken(5);
-
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("      ", token.content);
-	EXPECT_EQ(2U, token.getLocation().getStart());
-	EXPECT_EQ(8U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(8U, token.getLocation().getStart());
-	EXPECT_EQ(10U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 0, 2);
+	assertText(reader, "    ", TokenSet{5}, WhitespaceMode::PRESERVE, 2, 6);
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::PRESERVE, 6, 8);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, tokensAndWhitespaceTrimWhitespace)
 {
 	TokenizedData data;
-	ASSERT_EQ(10U, data.append("$$      $$"));
-	//                          0123456789
+	ASSERT_EQ(8U, data.append("$$    $$"));
+	//                         01234567
 	data.mark(5, 0, 2);
 	data.mark(5, 2);
 
-	data.enableToken(5);
-
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(8U, token.getLocation().getStart());
-	EXPECT_EQ(10U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 0, 2);
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::TRIM, 6, 8);
+	assertEnd(reader);
 }
 
 TEST(TokenizedData, tokensAndWhitespaceCollapseWhitespace)
 {
 	TokenizedData data;
-	ASSERT_EQ(10U, data.append("$$      $$"));
-	//                          0123456789
+	ASSERT_EQ(8U, data.append("$$    $$"));
+	//                         01234567
 	data.mark(5, 0, 2);
 	data.mark(5, 2);
 
-	data.enableToken(5);
-
-	Token token;
-	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(8U, token.getLocation().getStart());
-	EXPECT_EQ(10U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 0, 2);
+	assertToken(reader, 5, "$$", TokenSet{5}, WhitespaceMode::COLLAPSE, 6, 8);
+	assertEnd(reader);
 }
 
-TEST(TokenizedData, textPreserveWhitespace)
+TEST(TokenizedData, appendChars)
 {
 	TokenizedData data;
-	ASSERT_EQ(6U, data.append("  $$  "));
-	//                         012345
-	data.mark(5, 2, 2);
-
-	data.enableToken(5);
+	ASSERT_EQ(1U, data.append('t', 5, 7));
+	ASSERT_EQ(2U, data.append('e', 7, 8));
+	ASSERT_EQ(3U, data.append('s', 8, 10));
+	ASSERT_EQ(4U, data.append('t', 10, 12));
 
-	Token token;
-	ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("  ", token.content);
-	EXPECT_EQ(0U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(2U, token.getLocation().getStart());
-	EXPECT_EQ(4U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("  ", token.content);
-	EXPECT_EQ(4U, token.getLocation().getStart());
-	EXPECT_EQ(6U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.text(token, WhitespaceMode::PRESERVE));
-	ASSERT_FALSE(data.next(token, WhitespaceMode::PRESERVE));
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, "test", TokenSet{5}, WhitespaceMode::COLLAPSE, 5, 12);
+	assertEnd(reader);
 }
 
-TEST(TokenizedData, textTrimWhitespace)
+TEST(TokenizedData, protectedWhitespace)
 {
 	TokenizedData data;
-	ASSERT_EQ(6U, data.append("  $$  "));
-	//                         012345
-	data.mark(5, 2, 2);
+	ASSERT_EQ(4U, data.append("test", 10));
+	ASSERT_EQ(11U, data.append("   test", 14, true));
 
-	data.enableToken(5);
-
-	Token token;
-	ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM));
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::TRIM));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(2U, token.getLocation().getStart());
-	EXPECT_EQ(4U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, "test   test", TokenSet{5}, WhitespaceMode::COLLAPSE, 10,
+	           21);
+	assertEnd(reader);
+}
 
-	ASSERT_FALSE(data.text(token, WhitespaceMode::TRIM));
-	ASSERT_FALSE(data.next(token, WhitespaceMode::TRIM));
+TEST(TokenizedData, specialNewlineToken)
+{
+	TokenizedData data;
+	data.append("a\nb\n   \nc\n");
+	//           0 12 3456 78 9
+
+	const TokenSet tokens{Tokens::Newline};
+
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1);
+	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+	            1, 2);
+	assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 2, 3);
+	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+	            3, 4);
+	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+	            7, 8);
+	assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 8, 9);
+	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+	            9, 10);
+	assertEnd(reader);
 }
 
-TEST(TokenizedData, textCollapseWhitespace)
+TEST(TokenizedData, specialParagraphToken)
 {
 	TokenizedData data;
-	ASSERT_EQ(6U, data.append("  $$  "));
-	//                         012345
-	data.mark(5, 2, 2);
+	data.append("a\nb\n   \nc\n");
+	//           0 12 3456 78 9
 
-	data.enableToken(5);
+	const TokenSet tokens{Tokens::Paragraph};
 
-	Token token;
-	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, "a b", tokens, WhitespaceMode::COLLAPSE, 0, 3);
+	assertToken(reader, Tokens::Paragraph, "\n   \n", tokens,
+	            WhitespaceMode::COLLAPSE, 3, 8);
+	assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 8, 9);
+	assertEnd(reader);
+}
 
-	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(5U, token.id);
-	EXPECT_EQ("$$", token.content);
-	EXPECT_EQ(2U, token.getLocation().getStart());
-	EXPECT_EQ(4U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
+TEST(TokenizedData, specialSectionToken)
+{
+	TokenizedData data;
+	data.append("a\nb\n   \n  \t \n");
+	//           0 12 3456 789 01 2
+	//           0             1
+
+	const TokenSet tokens{Tokens::Section};
 
-	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
-	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, "a b", tokens, WhitespaceMode::COLLAPSE, 0, 3);
+	assertToken(reader, Tokens::Section, "\n   \n  \t \n", tokens,
+	            WhitespaceMode::COLLAPSE, 3, 13);
+	assertEnd(reader);
 }
 
-TEST(TokenizedData, appendChars)
+TEST(TokenizedData, specialTokenPrecedence)
 {
 	TokenizedData data;
-	ASSERT_EQ(1U, data.append('t', 5, 7));
-	ASSERT_EQ(2U, data.append('e', 7, 8));
-	ASSERT_EQ(3U, data.append('s', 8, 10));
-	ASSERT_EQ(4U, data.append('t', 10, 12));
+	data.append("a\nb\n\nc\n\n\nd");
+	//           0 12 3 45 6 7 89
+
+	const TokenSet tokens{Tokens::Newline, Tokens::Paragraph, Tokens::Section};
+
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1);
+	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+	            1, 2);
+	assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 2, 3);
+	assertToken(reader, Tokens::Paragraph, "\n\n", tokens,
+	            WhitespaceMode::COLLAPSE, 3, 5);
+	assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 5, 6);
+	assertToken(reader, Tokens::Section, "\n\n\n", tokens,
+	            WhitespaceMode::COLLAPSE, 6, 9);
+	assertText(reader, "d", tokens, WhitespaceMode::COLLAPSE, 9, 10);
+	assertEnd(reader);
+}
 
-	Token token;
-	ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("test", token.content);
-	EXPECT_EQ(5U, token.getLocation().getStart());
-	EXPECT_EQ(12U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
-	ASSERT_FALSE(data.next(token, WhitespaceMode::COLLAPSE));
+TEST(TokenizedData, specialTokenPrecedence2)
+{
+	TokenizedData data;
+	data.append("\nb\n\nc\n\n\n");
+	//            0 12 3 45 6 7
+
+	const TokenSet tokens{Tokens::Newline, Tokens::Paragraph, Tokens::Section};
+
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, Tokens::Newline, "\n", tokens, WhitespaceMode::COLLAPSE,
+	            0, 1);
+	assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 1, 2);
+	assertToken(reader, Tokens::Paragraph, "\n\n", tokens,
+	            WhitespaceMode::COLLAPSE, 2, 4);
+	assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 4, 5);
+	assertToken(reader, Tokens::Section, "\n\n\n", tokens,
+	            WhitespaceMode::COLLAPSE, 5, 8);
+	assertEnd(reader);
 }
 
-TEST(TokenizedData, copy)
+TEST(TokenizedData, specialTokenIndent)
 {
 	TokenizedData data;
-	ASSERT_EQ(7U, data.append(" a $ b "));
-	//                         0123456
-	data.mark(6, 3, 1);
-	data.enableToken(6);
+	data.append("    test\n\ttest2\n        test3  \ttest4\ntest5");
+	//           01234567 8 901234 5678901234567890 123456 789012
+	//           0           1          2         3           4
+	const TokenSet tokens{Tokens::Indent, Tokens::Dedent};
+
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+	            4, 4);
+	assertText(reader, "test", tokens, WhitespaceMode::COLLAPSE, 4, 8);
+	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+	            10, 10);
+	assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37);
+	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
+	            38, 38);
+	assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43);
+	assertEnd(reader);
+}
 
-	Token token;
-	ASSERT_TRUE(data.text(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("a", token.content);
-	EXPECT_EQ(1U, token.getLocation().getStart());
-	EXPECT_EQ(2U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_FALSE(data.text(token, WhitespaceMode::COLLAPSE));
-
-	TokenizedData dataCopy = data;
-
-	ASSERT_TRUE(data.next(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(6U, token.id);
-	EXPECT_EQ("$", token.content);
-	EXPECT_EQ(3U, token.getLocation().getStart());
-	EXPECT_EQ(4U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(dataCopy.next(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(6U, token.id);
-	EXPECT_EQ("$", token.content);
-	EXPECT_EQ(3U, token.getLocation().getStart());
-	EXPECT_EQ(4U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-
-	ASSERT_TRUE(data.text(token, WhitespaceMode::PRESERVE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ(" b ", token.content);
-	EXPECT_EQ(4U, token.getLocation().getStart());
-	EXPECT_EQ(7U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-	ASSERT_FALSE(data.next(token));
-
-	ASSERT_TRUE(dataCopy.text(token, WhitespaceMode::COLLAPSE));
-	EXPECT_EQ(Tokens::Data, token.id);
-	EXPECT_EQ("b", token.content);
-	EXPECT_EQ(5U, token.getLocation().getStart());
-	EXPECT_EQ(6U, token.getLocation().getEnd());
-	EXPECT_EQ(InvalidSourceId, token.getLocation().getSourceId());
-	ASSERT_FALSE(dataCopy.next(token));
+TEST(TokenizedData, specialTokenIndentOverlap)
+{
+	TokenizedData data;
+	data.append("    test\n\ttest2\n        test3  \ttest4\ntest5");
+	//           01234567 8 901234 5678901234567890 123456 789012
+	//           0           1          2         3           4
+	const TokenSet tokens{Tokens::Indent, Tokens::Dedent, 5};
+
+	data.mark(5, 4, 4);
+
+	TokenizedDataReader reader = data.reader();
+	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+	            4, 4);
+	assertToken(reader, 5, "test", tokens, WhitespaceMode::COLLAPSE, 4, 8);
+	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+	            10, 10);
+	assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37);
+	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
+	            38, 38);
+	assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43);
+	assertEnd(reader);
 }
+
 }
 
diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp
index 3809a12..0f2bfb7 100644
--- a/test/core/parser/utils/TokenizerTest.cpp
+++ b/test/core/parser/utils/TokenizerTest.cpp
@@ -20,6 +20,7 @@
 
 #include <core/common/CharReader.hpp>
 #include <core/parser/utils/Tokenizer.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 
 namespace ousia {
 
@@ -31,23 +32,40 @@ TEST(Tokenizer, tokenRegistration)
 
 	ASSERT_EQ(0U, tokenizer.registerToken("a"));
 	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("a"));
-	ASSERT_EQ("a", tokenizer.getTokenString(0U));
+	ASSERT_EQ("a", tokenizer.lookupToken(0U).string);
 
 	ASSERT_EQ(1U, tokenizer.registerToken("b"));
 	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("b"));
-	ASSERT_EQ("b", tokenizer.getTokenString(1U));
+	ASSERT_EQ("b", tokenizer.lookupToken(1U).string);
 
 	ASSERT_EQ(2U, tokenizer.registerToken("c"));
 	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("c"));
-	ASSERT_EQ("c", tokenizer.getTokenString(2U));
+	ASSERT_EQ("c", tokenizer.lookupToken(2U).string);
 
 	ASSERT_TRUE(tokenizer.unregisterToken(1U));
 	ASSERT_FALSE(tokenizer.unregisterToken(1U));
-	ASSERT_EQ("", tokenizer.getTokenString(1U));
+	ASSERT_EQ("", tokenizer.lookupToken(1U).string);
 
 	ASSERT_EQ(1U, tokenizer.registerToken("d"));
 	ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("d"));
-	ASSERT_EQ("d", tokenizer.getTokenString(1U));
+	ASSERT_EQ("d", tokenizer.lookupToken(1U).string);
+}
+
+void expectData(const std::string &expected, SourceOffset tokenStart,
+                SourceOffset tokenEnd, SourceOffset textStart,
+                SourceOffset textEnd, const Token &token, TokenizedData &data,
+                WhitespaceMode mode = WhitespaceMode::PRESERVE)
+{
+	ASSERT_EQ(Tokens::Data, token.id);
+
+	Variant text = data.text(mode);
+	ASSERT_TRUE(text.isString());
+
+	EXPECT_EQ(expected, text.asString());
+	EXPECT_EQ(tokenStart, token.location.getStart());
+	EXPECT_EQ(tokenEnd, token.location.getEnd());
+	EXPECT_EQ(textStart, text.getLocation().getStart());
+	EXPECT_EQ(textEnd, text.getLocation().getEnd());
 }
 
 TEST(Tokenizer, textTokenPreserveWhitespace)
@@ -56,36 +74,34 @@ TEST(Tokenizer, textTokenPreserveWhitespace)
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::PRESERVE};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(36U, loc.getEnd());
+		expectData(" this \t is only a  \n\n test   text   ", 0, 36, 0, 36,
+		           token, data, WhitespaceMode::PRESERVE);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 
 	{
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::PRESERVE};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
+		expectData("this \t is only a  \n\n test   text", 0, 32, 0, 32,
+		           token, data, WhitespaceMode::PRESERVE);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 }
 
@@ -95,36 +111,34 @@ TEST(Tokenizer, textTokenTrimWhitespace)
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::TRIM};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(1U, loc.getStart());
-		ASSERT_EQ(33U, loc.getEnd());
+		expectData("this \t is only a  \n\n test   text", 0, 36, 1, 33, token,
+		           data, WhitespaceMode::TRIM);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 
 	{
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::TRIM};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
+		expectData("this \t is only a  \n\n test   text", 0, 32, 0, 32,
+		           token, data, WhitespaceMode::TRIM);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 }
 
@@ -134,36 +148,34 @@ TEST(Tokenizer, textTokenCollapseWhitespace)
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::COLLAPSE};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("this is only a test text", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(1U, loc.getStart());
-		ASSERT_EQ(33U, loc.getEnd());
+		expectData("this is only a test text", 0, 36, 1, 33, token, data,
+		           WhitespaceMode::COLLAPSE);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 
 	{
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		Tokenizer tokenizer{WhitespaceMode::COLLAPSE};
+		Tokenizer tokenizer;
 
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("this is only a test text", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
+		expectData("this is only a test text", 0, 32, 0, 32, token, data,
+		           WhitespaceMode::COLLAPSE);
 
-		ASSERT_FALSE(tokenizer.read(reader, token));
+		data.clear();
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 }
 
@@ -177,14 +189,12 @@ TEST(Tokenizer, simpleReadToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
 		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test1", token.content);
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
+		expectData("test1", 0, 5, 0, 5, token, data);
 
 		char c;
 		ASSERT_TRUE(reader.peek(c));
@@ -193,7 +203,8 @@ TEST(Tokenizer, simpleReadToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
 		ASSERT_EQ(tid, token.id);
 		ASSERT_EQ(":", token.content);
@@ -209,14 +220,10 @@ TEST(Tokenizer, simpleReadToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test2", token.content);
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
+		expectData("test2", 6, 11, 6, 11, token, data);
 
 		char c;
 		ASSERT_FALSE(reader.peek(c));
@@ -233,21 +240,17 @@ TEST(Tokenizer, simplePeekToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.peek(reader, token, data));
+		expectData("test1", 0, 5, 0, 5, token, data);
 		ASSERT_EQ(0U, reader.getOffset());
 		ASSERT_EQ(5U, reader.getPeekOffset());
 	}
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.peek(reader, token, data));
 
 		ASSERT_EQ(tid, token.id);
 		ASSERT_EQ(":", token.content);
@@ -261,35 +264,26 @@ TEST(Tokenizer, simplePeekToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.peek(reader, token, data));
+		expectData("test2", 6, 11, 6, 11, token, data);
 		ASSERT_EQ(0U, reader.getOffset());
 		ASSERT_EQ(11U, reader.getPeekOffset());
 	}
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		expectData("test1", 0, 5, 0, 5, token, data);
 		ASSERT_EQ(5U, reader.getOffset());
 		ASSERT_EQ(5U, reader.getPeekOffset());
 	}
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
 		ASSERT_EQ(tid, token.id);
 		ASSERT_EQ(":", token.content);
@@ -303,14 +297,9 @@ TEST(Tokenizer, simplePeekToken)
 
 	{
 		Token token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(Tokens::Data, token.id);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		expectData("test2", 6, 11, 6, 11, token, data);
 		ASSERT_EQ(11U, reader.getOffset());
 		ASSERT_EQ(11U, reader.getPeekOffset());
 	}
@@ -320,6 +309,7 @@ TEST(Tokenizer, ambiguousTokens)
 {
 	CharReader reader{"abc"};
 	Tokenizer tokenizer;
+	TokenizedData data;
 
 	TokenId t1 = tokenizer.registerToken("abd");
 	TokenId t2 = tokenizer.registerToken("bc");
@@ -328,16 +318,17 @@ TEST(Tokenizer, ambiguousTokens)
 	ASSERT_EQ(1U, t2);
 
 	Token token;
-	ASSERT_TRUE(tokenizer.read(reader, token));
+	data.clear();
+	ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-	ASSERT_EQ(Tokens::Data, token.id);
-	ASSERT_EQ("a", token.content);
+	expectData("a", 0, 1, 0, 1, token, data);
 
 	SourceLocation loc = token.location;
 	ASSERT_EQ(0U, loc.getStart());
 	ASSERT_EQ(1U, loc.getEnd());
 
-	ASSERT_TRUE(tokenizer.read(reader, token));
+	data.clear();
+	ASSERT_TRUE(tokenizer.read(reader, token, data));
 
 	ASSERT_EQ(t2, token.id);
 	ASSERT_EQ("bc", token.content);
@@ -346,7 +337,8 @@ TEST(Tokenizer, ambiguousTokens)
 	ASSERT_EQ(1U, loc.getStart());
 	ASSERT_EQ(3U, loc.getEnd());
 
-	ASSERT_FALSE(tokenizer.read(reader, token));
+	data.clear();
+	ASSERT_FALSE(tokenizer.read(reader, token, data));
 }
 
 TEST(Tokenizer, commentTestWhitespacePreserve)
@@ -354,7 +346,7 @@ TEST(Tokenizer, commentTestWhitespacePreserve)
 	CharReader reader{"Test/Test /* Block Comment */", 0};
 	//                 012345678901234567890123456789
 	//                 0        1         2
-	Tokenizer tokenizer(WhitespaceMode::PRESERVE);
+	Tokenizer tokenizer;
 
 	const TokenId t1 = tokenizer.registerToken("/");
 	const TokenId t2 = tokenizer.registerToken("/*");
@@ -370,45 +362,23 @@ TEST(Tokenizer, commentTestWhitespacePreserve)
 
 	Token t;
 	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(reader, t));
+		TokenizedData data(0);
+		EXPECT_TRUE(tokenizer.read(reader, t, data));
 		EXPECT_EQ(te.id, t.id);
-		EXPECT_EQ(te.content, t.content);
+		if (te.id != Tokens::Data) {
+			EXPECT_EQ(te.content, t.content);
+		} else {
+			Variant text = data.text(WhitespaceMode::PRESERVE);
+			ASSERT_TRUE(text.isString());
+			EXPECT_EQ(te.content, text.asString());
+		}
 		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
 		EXPECT_EQ(te.location.getStart(), t.location.getStart());
 		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
 	}
-	ASSERT_FALSE(tokenizer.read(reader, t));
-}
-
-TEST(Tokenizer, commentTestWhitespaceCollapse)
-{
-	CharReader reader{"Test/Test /* Block Comment */", 0};
-	//                 012345678901234567890123456789
-	//                 0        1         2
-	Tokenizer tokenizer(WhitespaceMode::COLLAPSE);
 
-	const TokenId t1 = tokenizer.registerToken("/");
-	const TokenId t2 = tokenizer.registerToken("/*");
-	const TokenId t3 = tokenizer.registerToken("*/");
-
-	std::vector<Token> expected = {
-	    {Tokens::Data, "Test", SourceLocation{0, 0, 4}},
-	    {t1, "/", SourceLocation{0, 4, 5}},
-	    {Tokens::Data, "Test", SourceLocation{0, 5, 9}},
-	    {t2, "/*", SourceLocation{0, 10, 12}},
-	    {Tokens::Data, "Block Comment", SourceLocation{0, 13, 26}},
-	    {t3, "*/", SourceLocation{0, 27, 29}}};
-
-	Token t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(reader, t));
-		EXPECT_EQ(te.id, t.id);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.read(reader, t));
+	TokenizedData data;
+	ASSERT_FALSE(tokenizer.read(reader, t, data));
 }
 }
 
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index d52fa5b..3d01007 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -30,11 +30,21 @@ namespace ousia {
 static TerminalLogger logger(std::cerr, true);
 // static ConcreteLogger logger;
 
+static OsmlStreamParser::State skipEmptyData(OsmlStreamParser &reader)
+{
+	OsmlStreamParser::State res = reader.parse();
+	if (res == OsmlStreamParser::State::DATA) {
+		EXPECT_FALSE(reader.getData().hasNonWhitespaceText());
+		res = reader.parse();
+	}
+	return res;
+}
+
 static void assertCommand(OsmlStreamParser &reader, const std::string &name,
                           SourceOffset start = InvalidSourceOffset,
                           SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND, skipEmptyData(reader));
 	EXPECT_EQ(name, reader.getCommandName().asString());
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
@@ -57,16 +67,19 @@ static void assertCommand(OsmlStreamParser &reader, const std::string &name,
 
 static void assertData(OsmlStreamParser &reader, const std::string &data,
                        SourceOffset start = InvalidSourceOffset,
-                       SourceOffset end = InvalidSourceOffset)
+                       SourceOffset end = InvalidSourceOffset,
+                       WhitespaceMode mode = WhitespaceMode::COLLAPSE)
 {
 	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(data, reader.getData().asString());
+	Variant text = reader.getText(mode);
+	ASSERT_TRUE(text.isString());
+	EXPECT_EQ(data, text.asString());
 	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getData().getLocation().getStart());
+		EXPECT_EQ(start, text.getLocation().getStart());
 		EXPECT_EQ(start, reader.getLocation().getStart());
 	}
 	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getData().getLocation().getEnd());
+		EXPECT_EQ(end, text.getLocation().getEnd());
 		EXPECT_EQ(end, reader.getLocation().getEnd());
 	}
 }
@@ -75,7 +88,7 @@ static void assertFieldStart(OsmlStreamParser &reader, bool defaultField,
                              SourceOffset start = InvalidSourceOffset,
                              SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, skipEmptyData(reader));
 	EXPECT_EQ(defaultField, reader.inDefaultField());
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, reader.getLocation().getStart());
@@ -89,7 +102,7 @@ static void assertFieldEnd(OsmlStreamParser &reader,
                            SourceOffset start = InvalidSourceOffset,
                            SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, skipEmptyData(reader));
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, reader.getLocation().getStart());
 	}
@@ -103,7 +116,7 @@ static void assertAnnotationStart(OsmlStreamParser &reader,
                                   SourceOffset start = InvalidSourceOffset,
                                   SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, skipEmptyData(reader));
 	EXPECT_EQ(name, reader.getCommandName().asString());
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
@@ -131,7 +144,7 @@ static void assertAnnotationEnd(OsmlStreamParser &reader,
                                 SourceOffset start = InvalidSourceOffset,
                                 SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, skipEmptyData(reader));
 	ASSERT_EQ(name, reader.getCommandName().asString());
 	if (!elementName.empty()) {
 		ASSERT_EQ(1U, reader.getCommandArguments().asMap().size());
@@ -152,7 +165,7 @@ static void assertEnd(OsmlStreamParser &reader,
                       SourceOffset start = InvalidSourceOffset,
                       SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	ASSERT_EQ(OsmlStreamParser::State::END, skipEmptyData(reader));
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, reader.getLocation().getStart());
 	}
@@ -205,26 +218,14 @@ TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak)
 	assertData(reader, "hello world", 1, 14);
 }
 
-TEST(OsmlStreamParser, escapeWhitespace)
-{
-	const char *testString = " hello\\ \\ world ";
-	//                        012345 67 89012345
-	//                        0           1
-	CharReader charReader(testString);
-
-	OsmlStreamParser reader(charReader, logger);
-
-	assertData(reader, "hello  world", 1, 15);
-}
-
 static void testEscapeSpecialCharacter(const std::string &c)
 {
 	CharReader charReader(std::string("\\") + c);
 	OsmlStreamParser reader(charReader, logger);
 	EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(c, reader.getData().asString());
+	EXPECT_EQ(c, reader.getText().asString());
 
-	SourceLocation loc = reader.getData().getLocation();
+	SourceLocation loc = reader.getText().getLocation();
 	EXPECT_EQ(0U, loc.getStart());
 	EXPECT_EQ(1U + c.size(), loc.getEnd());
 }
@@ -253,16 +254,16 @@ TEST(OsmlStreamParser, singleLineComment)
 	OsmlStreamParser reader(charReader, logger);
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("a", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(0U, loc.getStart());
 		ASSERT_EQ(1U, loc.getEnd());
 	}
 
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("b", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(33U, loc.getStart());
 		ASSERT_EQ(34U, loc.getEnd());
 	}
@@ -279,16 +280,16 @@ TEST(OsmlStreamParser, multilineComment)
 	OsmlStreamParser reader(charReader, logger);
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("a", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(0U, loc.getStart());
 		ASSERT_EQ(1U, loc.getEnd());
 	}
 
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("b", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(40U, loc.getStart());
 		ASSERT_EQ(41U, loc.getEnd());
 	}
@@ -305,16 +306,16 @@ TEST(OsmlStreamParser, nestedMultilineComment)
 	OsmlStreamParser reader(charReader, logger);
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("a", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(0U, loc.getStart());
 		ASSERT_EQ(1U, loc.getEnd());
 	}
 
 	{
 		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ("b", reader.getText().asString());
+		SourceLocation loc = reader.getText().getLocation();
 		ASSERT_EQ(40U, loc.getStart());
 		ASSERT_EQ(41U, loc.getEnd());
 	}
@@ -569,8 +570,11 @@ TEST(OsmlStreamParser, multipleCommands)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "a", 0, 2);
+	assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE);
 	assertCommand(reader, "b", 3, 5);
+	assertData(reader, " ", 5, 6, WhitespaceMode::PRESERVE);
 	assertCommand(reader, "c", 6, 8);
+	assertData(reader, " ", 8, 9, WhitespaceMode::PRESERVE);
 	assertCommand(reader, "d", 9, 11);
 	assertEnd(reader, 11, 11);
 }
@@ -584,10 +588,13 @@ TEST(OsmlStreamParser, fieldsWithSpaces)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "a", 0, 2);
+	assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE);
 	assertFieldStart(reader, false, 3, 4);
 	assertCommand(reader, "b", 4, 6);
+	assertData(reader, " ", 6, 7, WhitespaceMode::PRESERVE);
 	assertCommand(reader, "c", 7, 9);
 	assertFieldEnd(reader, 9, 10);
+	assertData(reader, "   \n\n {", 10, 12, WhitespaceMode::PRESERVE);
 	assertFieldStart(reader, false, 16, 17);
 	assertCommand(reader, "d", 17, 19);
 	assertFieldEnd(reader, 19, 20);
diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp
index 3293370..6942166 100644
--- a/test/formats/osxml/OsxmlEventParserTest.cpp
+++ b/test/formats/osxml/OsxmlEventParserTest.cpp
@@ -21,6 +21,7 @@
 #include <core/frontend/TerminalLogger.hpp>
 #include <core/common/CharReader.hpp>
 #include <core/common/Variant.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 
 #include <formats/osxml/OsxmlEventParser.hpp>
 
@@ -74,13 +75,11 @@ public:
 };
 
 static std::vector<std::pair<OsxmlEvent, Variant>> parseXml(
-    const char *testString,
-    WhitespaceMode whitespaceMode = WhitespaceMode::TRIM)
+    const char *testString)
 {
 	TestOsxmlEventListener listener;
 	CharReader reader(testString);
 	OsxmlEventParser parser(reader, listener, logger);
-	parser.setWhitespaceMode(whitespaceMode);
 	parser.parse();
 	return listener.events;
 }
@@ -157,7 +156,7 @@ TEST(OsxmlEventParser, magicTopLevelTagInside)
 	ASSERT_EQ(expectedEvents, events);
 }
 
-TEST(OsxmlEventParser, commandWithDataPreserveWhitespace)
+TEST(OsxmlEventParser, commandWithData)
 {
 	const char *testString = "<a>  hello  \n world </a>";
 	//                        012345678901 234567890123
@@ -168,50 +167,12 @@ TEST(OsxmlEventParser, commandWithDataPreserveWhitespace)
 	    {OsxmlEvent::DATA, Variant::arrayType{"  hello  \n world "}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
 
-	auto events = parseXml(testString, WhitespaceMode::PRESERVE);
+	auto events = parseXml(testString);
 	ASSERT_EQ(expectedEvents, events);
 
 	// Check the location of the text
 	ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart());
 	ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd());
 }
-
-TEST(OsxmlEventParser, commandWithDataTrimWhitespace)
-{
-	const char *testString = "<a>  hello  \n world </a>";
-	//                        012345678901 234567890123
-	//                        0         1          2
-
-	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
-	    {OsxmlEvent::DATA, Variant::arrayType{"hello  \n world"}},
-	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
-
-	auto events = parseXml(testString, WhitespaceMode::TRIM);
-	ASSERT_EQ(expectedEvents, events);
-
-	// Check the location of the text
-	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart());
-	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd());
-}
-
-TEST(OsxmlEventParser, commandWithDataCollapseWhitespace)
-{
-	const char *testString = "<a>  hello  \n world </a>";
-	//                        012345678901 234567890123
-	//                        0         1          2
-
-	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
-	    {OsxmlEvent::DATA, Variant::arrayType{"hello world"}},
-	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
-
-	auto events = parseXml(testString, WhitespaceMode::COLLAPSE);
-	ASSERT_EQ(expectedEvents, events);
-
-	// Check the location of the text
-	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart());
-	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd());
-}
 }
 
-- 
cgit v1.2.3


From b95cf0ddd1aee517ed948155d43da4e2b64cfcdf Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Thu, 26 Feb 2015 00:21:33 +0100
Subject: Fixed non-initialized variable

---
 src/core/parser/utils/TokenizedData.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp
index aeefa26..bcbbe43 100644
--- a/src/core/parser/utils/TokenizedData.cpp
+++ b/src/core/parser/utils/TokenizedData.cpp
@@ -26,6 +26,12 @@
 #include "TokenizedData.hpp"
 
 namespace ousia {
+/**
+ * Maximum token length.
+ */
+constexpr TokenLength MaxTokenLength =
+	std::numeric_limits<TokenLength>::max();
+
 namespace {
 /**
  * Structure used to represent the position of a token in the internal
@@ -52,12 +58,6 @@ struct TokenMark {
 	 */
 	bool special;
 
-	/**
-	 * Maximum token length.
-	 */
-	static constexpr TokenLength MaxTokenLength =
-	    std::numeric_limits<TokenLength>::max();
-
 	/**
 	 * Constructor of the TokenMark structure, initializes all members with the
 	 * given values.
@@ -450,6 +450,7 @@ public:
 		protectedChars.clear();
 		offsets.clear();
 		marks.clear();
+		firstLinebreak = 0;
 		currentIndentation = 0;
 		lastIndentation = 0;
 		numLinebreaks = 1;  // Assume the stream starts with a linebreak
-- 
cgit v1.2.3


From 041a2dd18050e9e26ca1ee00851461dff1e1f90c Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Thu, 26 Feb 2015 00:22:12 +0100
Subject: Moved "assert" functions to own header

---
 test/core/parser/utils/TokenizedDataTest.cpp      | 39 +-------------
 test/core/parser/utils/TokenizedDataTestUtils.hpp | 64 +++++++++++++++++++++++
 2 files changed, 66 insertions(+), 37 deletions(-)
 create mode 100644 test/core/parser/utils/TokenizedDataTestUtils.hpp

diff --git a/test/core/parser/utils/TokenizedDataTest.cpp b/test/core/parser/utils/TokenizedDataTest.cpp
index dfe2526..8488459 100644
--- a/test/core/parser/utils/TokenizedDataTest.cpp
+++ b/test/core/parser/utils/TokenizedDataTest.cpp
@@ -20,44 +20,9 @@
 
 #include <core/parser/utils/TokenizedData.hpp>
 
-namespace ousia {
-
-void assertToken(TokenizedDataReader &reader, TokenId id,
-                 const std::string &text, const TokenSet &tokens = TokenSet{},
-                 WhitespaceMode mode = WhitespaceMode::TRIM,
-                 SourceOffset start = InvalidSourceOffset,
-                 SourceOffset end = InvalidSourceOffset,
-                 SourceId sourceId = InvalidSourceId)
-{
-	Token token;
-	ASSERT_TRUE(reader.read(token, tokens, mode));
-	EXPECT_EQ(id, token.id);
-	EXPECT_EQ(text, token.content);
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, token.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, token.getLocation().getEnd());
-	}
-	EXPECT_EQ(sourceId, token.getLocation().getSourceId());
-}
-
-void assertText(TokenizedDataReader &reader, const std::string &text,
-                const TokenSet &tokens = TokenSet{},
-                WhitespaceMode mode = WhitespaceMode::TRIM,
-                SourceOffset start = InvalidSourceOffset,
-                SourceOffset end = InvalidSourceOffset,
-                SourceId id = InvalidSourceId)
-{
-	assertToken(reader, Tokens::Data, text, tokens, mode, start, end, id);
-}
+#include "TokenizedDataTestUtils.hpp"
 
-void assertEnd(TokenizedDataReader &reader)
-{
-	Token token;
-	ASSERT_TRUE(reader.atEnd());
-	ASSERT_FALSE(reader.read(token));
-}
+namespace ousia {
 
 TEST(TokenizedData, dataWhitespacePreserve)
 {
diff --git a/test/core/parser/utils/TokenizedDataTestUtils.hpp b/test/core/parser/utils/TokenizedDataTestUtils.hpp
new file mode 100644
index 0000000..c384f9d
--- /dev/null
+++ b/test/core/parser/utils/TokenizedDataTestUtils.hpp
@@ -0,0 +1,64 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_
+#define _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_
+
+namespace ousia {
+
+static void assertToken(TokenizedDataReader &reader, TokenId id,
+                 const std::string &text, const TokenSet &tokens = TokenSet{},
+                 WhitespaceMode mode = WhitespaceMode::TRIM,
+                 SourceOffset start = InvalidSourceOffset,
+                 SourceOffset end = InvalidSourceOffset,
+                 SourceId sourceId = InvalidSourceId)
+{
+	Token token;
+	ASSERT_TRUE(reader.read(token, tokens, mode));
+	EXPECT_EQ(id, token.id);
+	EXPECT_EQ(text, token.content);
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, token.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, token.getLocation().getEnd());
+	}
+	EXPECT_EQ(sourceId, token.getLocation().getSourceId());
+}
+
+static void assertText(TokenizedDataReader &reader, const std::string &text,
+                const TokenSet &tokens = TokenSet{},
+                WhitespaceMode mode = WhitespaceMode::TRIM,
+                SourceOffset start = InvalidSourceOffset,
+                SourceOffset end = InvalidSourceOffset,
+                SourceId id = InvalidSourceId)
+{
+	assertToken(reader, Tokens::Data, text, tokens, mode, start, end, id);
+}
+
+static void assertEnd(TokenizedDataReader &reader)
+{
+	Token token;
+	ASSERT_TRUE(reader.atEnd());
+	ASSERT_FALSE(reader.read(token));
+}
+
+}
+
+#endif /* _OUSIA_TOKENIZED_DATA_TEST_UTILS_HPP_ */
+
-- 
cgit v1.2.3


From 19dd5946125e90dcbd61966896c9f6cfc4451d80 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Thu, 26 Feb 2015 00:22:23 +0100
Subject: Reactivated TokenizerTest

---
 CMakeLists.txt                           |  2 +-
 test/core/parser/utils/TokenizerTest.cpp | 94 ++++++++++++++++++++++++++++----
 2 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 225e63d..75909e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -327,7 +327,7 @@ IF(TEST)
 		test/core/parser/stack/StateTest
 		test/core/parser/utils/SourceOffsetVectorTest
 		test/core/parser/utils/TokenizedDataTest
-#		test/core/parser/utils/TokenizerTest
+		test/core/parser/utils/TokenizerTest
 		test/core/parser/utils/TokenTrieTest
 		test/core/resource/ResourceLocatorTest
 		test/core/resource/ResourceRequestTest
diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp
index 0f2bfb7..785bd81 100644
--- a/test/core/parser/utils/TokenizerTest.cpp
+++ b/test/core/parser/utils/TokenizerTest.cpp
@@ -22,6 +22,8 @@
 #include <core/parser/utils/Tokenizer.hpp>
 #include <core/parser/utils/TokenizedData.hpp>
 
+#include "TokenizedDataTestUtils.hpp"
+
 namespace ousia {
 
 TEST(Tokenizer, tokenRegistration)
@@ -58,14 +60,16 @@ void expectData(const std::string &expected, SourceOffset tokenStart,
 {
 	ASSERT_EQ(Tokens::Data, token.id);
 
-	Variant text = data.text(mode);
-	ASSERT_TRUE(text.isString());
+	Token textToken;
+	TokenizedDataReader reader = data.reader();
+	ASSERT_TRUE(reader.read(textToken, TokenSet{}, mode));
 
-	EXPECT_EQ(expected, text.asString());
+	EXPECT_EQ(expected, textToken.content);
 	EXPECT_EQ(tokenStart, token.location.getStart());
 	EXPECT_EQ(tokenEnd, token.location.getEnd());
-	EXPECT_EQ(textStart, text.getLocation().getStart());
-	EXPECT_EQ(textEnd, text.getLocation().getEnd());
+	EXPECT_EQ(textStart, textToken.getLocation().getStart());
+	EXPECT_EQ(textEnd, textToken.getLocation().getEnd());
+	EXPECT_TRUE(reader.atEnd());
 }
 
 TEST(Tokenizer, textTokenPreserveWhitespace)
@@ -97,8 +101,8 @@ TEST(Tokenizer, textTokenPreserveWhitespace)
 		TokenizedData data;
 		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		expectData("this \t is only a  \n\n test   text", 0, 32, 0, 32,
-		           token, data, WhitespaceMode::PRESERVE);
+		expectData("this \t is only a  \n\n test   text", 0, 32, 0, 32, token,
+		           data, WhitespaceMode::PRESERVE);
 
 		data.clear();
 		ASSERT_FALSE(tokenizer.read(reader, token, data));
@@ -134,8 +138,8 @@ TEST(Tokenizer, textTokenTrimWhitespace)
 		TokenizedData data;
 		ASSERT_TRUE(tokenizer.read(reader, token, data));
 
-		expectData("this \t is only a  \n\n test   text", 0, 32, 0, 32,
-		           token, data, WhitespaceMode::TRIM);
+		expectData("this \t is only a  \n\n test   text", 0, 32, 0, 32, token,
+		           data, WhitespaceMode::TRIM);
 
 		data.clear();
 		ASSERT_FALSE(tokenizer.read(reader, token, data));
@@ -368,9 +372,12 @@ TEST(Tokenizer, commentTestWhitespacePreserve)
 		if (te.id != Tokens::Data) {
 			EXPECT_EQ(te.content, t.content);
 		} else {
-			Variant text = data.text(WhitespaceMode::PRESERVE);
-			ASSERT_TRUE(text.isString());
-			EXPECT_EQ(te.content, text.asString());
+			TokenizedDataReader dataReader = data.reader();
+			Token textToken;
+			ASSERT_TRUE(dataReader.read(textToken, TokenSet{},
+			                            WhitespaceMode::PRESERVE));
+			EXPECT_TRUE(dataReader.atEnd());
+			EXPECT_EQ(te.content, textToken.content);
 		}
 		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
 		EXPECT_EQ(te.location.getStart(), t.location.getStart());
@@ -380,5 +387,68 @@ TEST(Tokenizer, commentTestWhitespacePreserve)
 	TokenizedData data;
 	ASSERT_FALSE(tokenizer.read(reader, t, data));
 }
+
+TEST(Tokenizer, nonPrimaryTokens)
+{
+	CharReader reader{
+	    "<<switch to $inline \\math mode$ they said, see the world they "
+	    "said>>"};
+	//   012345678901234567890 12345678901234567890123456789012345678901234567
+	//   0         1         2          3         4         5         6
+
+	Tokenizer tokenizer;
+
+	TokenId tBackslash = tokenizer.registerToken("\\");
+	TokenId tDollar = tokenizer.registerToken("$", false);
+	TokenId tSpeechStart = tokenizer.registerToken("<<", false);
+	TokenId tSpeechEnd = tokenizer.registerToken(">>", false);
+
+	TokenSet tokens = TokenSet{tDollar, tSpeechStart, tSpeechEnd};
+
+	Token token, textToken;
+	{
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		ASSERT_EQ(Tokens::Data, token.id);
+
+		TokenizedDataReader dataReader = data.reader();
+		assertToken(dataReader, tSpeechStart, "<<", tokens,
+		            WhitespaceMode::TRIM, 0, 2);
+		assertText(dataReader, "switch to", tokens, WhitespaceMode::TRIM, 2,
+		           11);
+		assertToken(dataReader, tDollar, "$", tokens, WhitespaceMode::TRIM, 12,
+		            13);
+		assertText(dataReader, "inline", tokens, WhitespaceMode::TRIM, 13, 19);
+		assertEnd(dataReader);
+	}
+
+	{
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		ASSERT_EQ(tBackslash, token.id);
+		ASSERT_EQ(20U, token.location.getStart());
+		ASSERT_EQ(21U, token.location.getEnd());
+	}
+
+	{
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		ASSERT_EQ(Tokens::Data, token.id);
+
+		TokenizedDataReader dataReader = data.reader();
+		assertText(dataReader, "math mode", tokens, WhitespaceMode::TRIM, 21,
+		           30);
+		assertToken(dataReader, tDollar, "$", tokens, WhitespaceMode::TRIM, 30,
+		            31);
+		assertText(dataReader, "they said, see the world they said", tokens,
+		           WhitespaceMode::TRIM, 32, 66);
+		assertToken(dataReader, tSpeechEnd, ">>", tokens, WhitespaceMode::TRIM,
+		            66, 68);
+		assertEnd(dataReader);
+	}
+
+	TokenizedData data;
+	ASSERT_FALSE(tokenizer.read(reader, token, data));
+}
 }
 
-- 
cgit v1.2.3


From 12e10d18810b7ea4ce142d76e846b4faf0c33488 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Fri, 27 Feb 2015 18:52:43 +0100
Subject: Made OsmlStreamParser ready for user defined tokens, started to adapt
 unit tests.

---
 CMakeLists.txt                             |   32 +-
 src/formats/osml/OsmlStreamParser.cpp      |  701 +++++++++----
 src/formats/osml/OsmlStreamParser.hpp      |  298 ++----
 test/formats/osml/OsmlStreamParserTest.cpp | 1542 ++++++++++++++--------------
 4 files changed, 1355 insertions(+), 1218 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 75909e9..4e2d7f7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -212,14 +212,14 @@ ADD_LIBRARY(ousia_core
 #	ousia_core
 #)
 
-#ADD_LIBRARY(ousia_osml
+ADD_LIBRARY(ousia_osml
 #	src/formats/osml/OsmlParser
-#	src/formats/osml/OsmlStreamParser
-#)
+	src/formats/osml/OsmlStreamParser
+)
 
-#TARGET_LINK_LIBRARIES(ousia_osml
-#	ousia_core
-#)
+TARGET_LINK_LIBRARIES(ousia_osml
+	ousia_core
+)
 
 ADD_LIBRARY(ousia_osxml
 	src/formats/osxml/OsxmlAttributeLocator
@@ -383,17 +383,17 @@ IF(TEST)
 #		ousia_mozjs
 #	)
 
-#	ADD_EXECUTABLE(ousia_test_osml
+	ADD_EXECUTABLE(ousia_test_osml
 #		test/formats/osml/OsmlParserTest
-#		test/formats/osml/OsmlStreamParserTest
-#	)
+		test/formats/osml/OsmlStreamParserTest
+	)
 
-#	TARGET_LINK_LIBRARIES(ousia_test_osml
-#		${GTEST_LIBRARIES}
-#		ousia_core
-#		ousia_osml
-#		ousia_filesystem
-#	)
+	TARGET_LINK_LIBRARIES(ousia_test_osml
+		${GTEST_LIBRARIES}
+		ousia_core
+		ousia_osml
+		ousia_filesystem
+	)
 
 #	ADD_EXECUTABLE(ousia_test_osxml
 #		test/formats/osxml/OsxmlEventParserTest
@@ -423,7 +423,7 @@ IF(TEST)
 	ADD_TEST(ousia_test_filesystem ousia_test_filesystem)
 	ADD_TEST(ousia_test_html ousia_test_html)
 #	ADD_TEST(ousia_test_mozjs ousia_test_mozjs)
-#	ADD_TEST(ousia_test_osml ousia_test_osml)
+	ADD_TEST(ousia_test_osml ousia_test_osml)
 #	ADD_TEST(ousia_test_osxml ousia_test_osxml)
 	ADD_TEST(ousia_test_xml ousia_test_xml)
 ENDIF()
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index d4cdbf8..7e01a3c 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -19,92 +19,411 @@
 #include <core/common/CharReader.hpp>
 #include <core/common/Logger.hpp>
 #include <core/common/Utils.hpp>
+#include <core/common/Variant.hpp>
 #include <core/common/VariantReader.hpp>
 
+#include <core/parser/utils/Tokenizer.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
+
 #include "OsmlStreamParser.hpp"
 
+#include <stack>
+#include <vector>
+
 namespace ousia {
 
+namespace {
 /**
- * Plain format default tokenizer.
+ * Osml format default tokenizer. Registers the primary tokens in its
+ * constructor. A single, static instance of this class is created as
+ * "OsmlTokens", which is copied to the Tokenizer instance of
+ * OsmlStreamParserImpl.
  */
-class PlainFormatTokens : public Tokenizer {
+class OsmlFormatTokens : public Tokenizer {
 public:
+	TokenId Backslash;
+	TokenId LineComment;
+	TokenId BlockCommentStart;
+	TokenId BlockCommentEnd;
+	TokenId FieldStart;
+	TokenId FieldEnd;
+	TokenId DefaultFieldStart;
+	TokenId AnnotationStart;
+	TokenId AnnotationEnd;
+
 	/**
-	 * Id of the backslash token.
+	 * Registers the plain format tokens in the internal tokenizer.
 	 */
-	TokenId Backslash;
+	OsmlFormatTokens()
+	{
+		Backslash = registerToken("\\");
+		LineComment = registerToken("%");
+		BlockCommentStart = registerToken("%{");
+		BlockCommentEnd = registerToken("}%");
+		FieldStart = registerToken("{");
+		FieldEnd = registerToken("}");
+		DefaultFieldStart = registerToken("{!");
+		AnnotationStart = registerToken("<\\");
+		AnnotationEnd = registerToken("\\>");
+	}
+};
+
+/**
+ * Instance of OsmlFormatTokens used to initialize the internal tokenizer
+ * instance of OsmlStreamParserImpl.
+ */
+static const OsmlFormatTokens OsmlTokens;
 
+/**
+ * Structure representing a field.
+ */
+struct Field {
 	/**
-	 * Id of the line comment token.
+	 * Specifies whether this field was marked as default field.
 	 */
-	TokenId LineComment;
+	bool defaultField;
 
 	/**
-	 * Id of the block comment start token.
+	 * Location at which the field was started.
 	 */
-	TokenId BlockCommentStart;
+	SourceLocation location;
 
 	/**
-	 * Id of the block comment end token.
+	 * Constructor of the Field structure, initializes all member variables with
+	 * the given values.
+	 *
+	 * @param defaultField is a flag specifying whether this field is a default
+	 * field.
+	 * @param location specifies the location at which the field was started.
 	 */
-	TokenId BlockCommentEnd;
+	Field(bool defaultField = false,
+	      const SourceLocation &location = SourceLocation{})
+	    : defaultField(defaultField), location(location)
+	{
+	}
+};
 
+/**
+ * Entry used for the command stack.
+ */
+class Command {
+private:
 	/**
-	 * Id of the field start token.
+	 * Name and location of the current command.
 	 */
-	TokenId FieldStart;
+	Variant name;
 
 	/**
-	 * Id of the field end token.
+	 * Arguments that were passed to the command.
 	 */
-	TokenId FieldEnd;
+	Variant arguments;
 
 	/**
-	 * Id of the default field start token.
+	 * Vector used as stack for holding the number of opening/closing braces
+	 * and the corresponding "isDefaultField" flag.
 	 */
-	TokenId DefaultFieldStart;
+	std::vector<Field> fields;
 
 	/**
-	 * Id of the annotation start token.
+	 * Set to true if this is a command with clear begin and end.
 	 */
-	TokenId AnnotationStart;
+	bool hasRange;
 
+public:
 	/**
-	 * Id of the annotation end token.
+	 * Default constructor, marks this command as normal, non-range command.
 	 */
-	TokenId AnnotationEnd;
+	Command() : hasRange(false) {}
 
 	/**
-	 * Registers the plain format tokens in the internal tokenizer.
+	 * Constructor of the Command class.
+	 *
+	 * @param name is a string variant with name and location of the
+	 * command.
+	 * @param arguments is a map variant with the arguments given to the
+	 * command.
+	 * @param hasRange should be set to true if this is a command with
+	 * explicit range.
 	 */
-	PlainFormatTokens()
+	Command(Variant name, Variant arguments, bool hasRange)
+	    : name(std::move(name)),
+	      arguments(std::move(arguments)),
+	      hasRange(hasRange)
 	{
-		Backslash = registerToken("\\");
-		LineComment = registerToken("%");
-		BlockCommentStart = registerToken("%{");
-		BlockCommentEnd = registerToken("}%");
-		FieldStart = registerToken("{");
-		FieldEnd = registerToken("}");
-		DefaultFieldStart = registerToken("{!");
-		AnnotationStart = registerToken("<\\");
-		AnnotationEnd = registerToken("\\>");
+	}
+
+	/**
+	 * Returns a reference at the variant representing name and location of the
+	 * command.
+	 *
+	 * @return a variant containing name and location of the command.
+	 */
+	const Variant &getName() const { return name; }
+
+	/**
+	 * Returns a reference at the variant containing name, value and location of
+	 * the arguments.
+	 *
+	 * @return the arguments stored for the command.
+	 */
+	const Variant &getArguments() const { return arguments; }
+
+	/**
+	 * Returns a reference at the internal field list. This list should be used
+	 * for printing error messages when fields are still open although the outer
+	 * range field closes.
+	 *
+	 * @return a const reference at the internal field vector.
+	 */
+	const std::vector<Field> &getFields() const { return fields; }
+
+	/**
+	 * Returns true if this command is currently in a default field.
+	 *
+	 * @return true if the current field on the field stack was explicitly
+	 * marked as default field. If the field stack is empty, true is returned
+	 * if this is a range command.
+	 */
+	bool inDefaultField() const
+	{
+		return (!fields.empty() && fields.back().defaultField) ||
+		       (fields.empty() && hasRange);
+	}
+
+	/**
+	 * Returns true if this command currently is in any field.
+	 *
+	 * @return true if a field is on the stack or this is a range commands.
+	 * Range commands always are in a field.
+	 */
+	bool inField() const { return !fields.empty() || hasRange; }
+
+	/**
+	 * Returns true if this command currently is in a range field.
+	 *
+	 * @return true if the command has a range and no other ranges are on the
+	 * stack.
+	 */
+	bool inRangeField() const { return fields.empty() && hasRange; }
+
+	/**
+	 * Returns true if this command currently is in a non-range field.
+	 *
+	 * @return true if the command is in a field, but the field is not the field
+	 * constructed by the "range"
+	 */
+	bool inNonRangeField() const { return !fields.empty(); }
+
+	/**
+	 * Pushes another field onto the field stack of this command.
+	 *
+	 * @param defaultField if true, explicitly marks this field as default
+	 * field.
+	 * @param location is the source location at which the field was started.
+	 * Used for error messages in which the user is notified about an error with
+	 * too few closing fields.
+	 */
+	void pushField(bool defaultField = false,
+	               const SourceLocation &location = SourceLocation{})
+	{
+		fields.emplace_back(defaultField, location);
+	}
+
+	/**
+	 * Removes another field from the field stack of this command, returns true
+	 * if the operation was successful.
+	 *
+	 * @return true if there was a field to pop on the stack, false otherwise.
+	 */
+	bool popField()
+	{
+		if (!fields.empty()) {
+			fields.pop_back();
+			return true;
+		}
+		return false;
 	}
 };
+}
 
-static const PlainFormatTokens OsmlTokens;
+/* Class OsmlStreamParserImpl */
 
-OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
-    : reader(reader),
-      logger(logger),
-      tokenizer(OsmlTokens),
-      data(reader.getSourceId())
+/**
+ * Internal implementation of OsmlStreamParser.
+ */
+class OsmlStreamParserImpl {
+public:
+	/**
+	 * State enum compatible with OsmlStreamParserState but extended by two more
+	 * entries (END and NONE).
+	 */
+	enum class State : uint8_t {
+		COMMAND_START = 0,
+		COMMAND_END = 1,
+		FIELD_START = 2,
+		FIELD_END = 3,
+		ANNOTATION_START = 4,
+		ANNOTATION_END = 5,
+		DATA = 6,
+		END = 7,
+		RECOVERABLE_ERROR = 8,
+		IRRECOVERABLE_ERROR = 9
+	};
+
+private:
+	/**
+	 * Reference to the CharReader instance from which the incomming bytes are
+	 * read.
+	 */
+	CharReader &reader;
+
+	/**
+	 * Reference at the logger instance to which all error messages are sent.
+	 */
+	Logger &logger;
+
+	/**
+	 * Tokenizer instance used to read individual tokens from the text.
+	 */
+	Tokenizer tokenizer;
+
+	/**
+	 * Stack containing the current commands.
+	 */
+	std::stack<Command> commands;
+
+	/**
+	 * Variant containing the tokenized data that was returned from the
+	 * tokenizer as data.
+	 */
+	TokenizedData data;
+
+	/**
+	 * Variable containing the current location of the parser.
+	 */
+	SourceLocation location;
+
+	/**
+	 * Function used internally to parse an identifier.
+	 *
+	 * @param start is the start byte offset of the identifier (including the
+	 * backslash).
+	 * @param allowNSSep should be set to true if the namespace separator is
+	 * allowed in the identifier name. Issues error if the namespace separator
+	 * is placed incorrectly.
+	 */
+	Variant parseIdentifier(size_t start, bool allowNSSep = false);
+
+	/**
+	 * Function used internally to handle the special "\begin" command.
+	 *
+	 * @return an internal State specifying whether an error occured (return
+	 * values State::REOVERABLE_ERROR or State::IRRECOVERABLE_ERROR) or a
+	 * command was actually started (return value State::COMMAND_START).
+	 */
+	State parseBeginCommand();
+
+	/**
+	 * Function used internally to handle the special "\end" command.
+	 *
+	 * @return an internal State specifying whether an error occured (return
+	 * values State::REOVERABLE_ERROR or State::IRRECOVERABLE_ERROR) or a
+	 * command was actually ended (return value State::COMMAND_END).
+	 */
+	State parseEndCommand();
+
+	/**
+	 * Parses the command arguments. Handles errors if the name of the command
+	 * was given using the hash notation and as a name field.
+	 *
+	 * @param commandArgName is the name argument that was given using the hash
+	 * notation.
+	 * @return a map variant containing the arguments.
+	 */
+	Variant parseCommandArguments(Variant commandArgName);
+
+	/**
+	 * Function used internally to parse a command.
+	 *
+	 * @param start is the start byte offset of the command (including the
+	 * backslash)
+	 * @param isAnnotation if true, the command is not returned as command, but
+	 * as annotation start.
+	 * @return true if a command was actuall parsed, false otherwise.
+	 */
+	State parseCommand(size_t start, bool isAnnotation);
+
+	/**
+	 * Function used internally to parse a block comment.
+	 */
+	void parseBlockComment();
+
+	/**
+	 * Function used internally to parse a generic comment.
+	 */
+	void parseLineComment();
+
+	/**
+	 * Pushes the parsed command onto the command stack.
+	 */
+	void pushCommand(Variant commandName, Variant commandArguments,
+	                 bool hasRange);
+
+	/**
+	 * Checks whether there is any data pending to be issued, if yes, resets the
+	 * currently peeked characters and returns true.
+	 *
+	 * @return true if there was any data and DATA should be returned by the
+	 * parse function, false otherwise.
+	 */
+	bool checkIssueData();
+
+	/**
+	 * Returns a reference at the current command at the top of the command
+	 * stack.
+	 *
+	 * @return a reference at the top command in the command stack.
+	 */
+	Command &cmd() { return commands.top(); }
+
+	/**
+	 * Returns a reference at the current command at the top of the command
+	 * stack.
+	 *
+	 * @return a reference at the top command in the command stack.
+	 */
+	const Command &cmd() const { return commands.top(); }
+
+public:
+	/**
+	 * Constructor of the OsmlStreamParserImpl class. Attaches the new
+	 * OsmlStreamParserImpl to the given CharReader and Logger instances.
+	 *
+	 * @param reader is the reader instance from which incomming characters
+	 * should be read.
+	 * @param logger is the logger instance to which errors should be written.
+	 */
+	OsmlStreamParserImpl(CharReader &reader, Logger &logger);
+
+	State parse();
+
+	const TokenizedData &getData() const { return data; }
+	const Variant &getCommandName() const { return cmd().getName(); }
+	const Variant &getCommandArguments() const { return cmd().getArguments(); }
+	const SourceLocation &getLocation() const { return location; }
+	bool inRangeCommand() const { return cmd().inRangeField(); };
+	bool inDefaultField() const { return cmd().inDefaultField(); }
+};
+
+/* Class OsmlStreamParserImpl */
+
+OsmlStreamParserImpl::OsmlStreamParserImpl(CharReader &reader, Logger &logger)
+    : reader(reader), logger(logger), tokenizer(OsmlTokens)
 {
-	// Place an intial command representing the complete file on the stack
-	commands.push(Command{"", Variant::mapType{}, true, true, true, false});
+	commands.emplace("", Variant::mapType{}, true);
 }
 
-Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
+Variant OsmlStreamParserImpl::parseIdentifier(size_t start, bool allowNSSep)
 {
 	bool first = true;
 	bool hasCharSinceNSSep = false;
@@ -147,20 +466,20 @@ Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 	return res;
 }
 
-OsmlStreamParser::State OsmlStreamParser::parseBeginCommand()
+OsmlStreamParserImpl::State OsmlStreamParserImpl::parseBeginCommand()
 {
 	// Expect a '{' after the command
 	reader.consumeWhitespace();
 	if (!reader.expect('{')) {
 		logger.error("Expected \"{\" after \\begin", reader);
-		return State::NONE;
+		return State::RECOVERABLE_ERROR;
 	}
 
 	// Parse the name of the command that should be opened
 	Variant commandName = parseIdentifier(reader.getOffset(), true);
 	if (commandName.asString().empty()) {
 		logger.error("Expected identifier", commandName);
-		return State::ERROR;
+		return State::IRRECOVERABLE_ERROR;
 	}
 
 	// Check whether the next character is a '#', indicating the start of the
@@ -176,7 +495,7 @@ OsmlStreamParser::State OsmlStreamParser::parseBeginCommand()
 
 	if (!reader.expect('}')) {
 		logger.error("Expected \"}\"", reader);
-		return State::ERROR;
+		return State::IRRECOVERABLE_ERROR;
 	}
 
 	// Parse the arguments
@@ -185,28 +504,15 @@ OsmlStreamParser::State OsmlStreamParser::parseBeginCommand()
 	// Push the command onto the command stack
 	pushCommand(std::move(commandName), std::move(commandArguments), true);
 
-	return State::COMMAND;
-}
-
-static bool checkStillInField(const OsmlStreamParser::Command &cmd,
-                              const Variant &endName, Logger &logger)
-{
-	if (cmd.inField && !cmd.inRangeField) {
-		logger.error(std::string("\\end in open field of command \"") +
-		                 cmd.name.asString() + std::string("\""),
-		             endName);
-		logger.note(std::string("Open command started here:"), cmd.name);
-		return true;
-	}
-	return false;
+	return State::COMMAND_START;
 }
 
-OsmlStreamParser::State OsmlStreamParser::parseEndCommand()
+OsmlStreamParserImpl::State OsmlStreamParserImpl::parseEndCommand()
 {
 	// Expect a '{' after the command
 	if (!reader.expect('{')) {
 		logger.error("Expected \"{\" after \\end", reader);
-		return State::NONE;
+		return State::RECOVERABLE_ERROR;
 	}
 
 	// Fetch the name of the command that should be ended here
@@ -215,56 +521,58 @@ OsmlStreamParser::State OsmlStreamParser::parseEndCommand()
 	// Make sure the given command name is not empty
 	if (name.asString().empty()) {
 		logger.error("Expected identifier", name);
-		return State::ERROR;
+		return State::IRRECOVERABLE_ERROR;
 	}
 
 	// Make sure the command name is terminated with a '}'
 	if (!reader.expect('}')) {
 		logger.error("Expected \"}\"", reader);
-		return State::ERROR;
+		return State::IRRECOVERABLE_ERROR;
 	}
 
-	// Unroll the command stack up to the last range command
-	while (!commands.top().hasRange) {
-		if (checkStillInField(commands.top(), name, logger)) {
-			return State::ERROR;
+	// Unroll the command stack up to the last range command, make sure we do
+	// not intersect with any open field
+	while (!cmd().inRangeField()) {
+		if (cmd().inField()) {
+			logger.error(std::string("\\end in open field of command \"") +
+			                 cmd().getName().asString() + std::string("\""),
+			             name);
+			const std::vector<Field> &fields = cmd().getFields();
+			for (const Field &field : fields) {
+				logger.note(std::string("Still open field started here: "),
+				            field.location);
+			}
+			return State::IRRECOVERABLE_ERROR;
 		}
 		commands.pop();
 	}
 
-	// Make sure we're not in an open field of this command
-	if (checkStillInField(commands.top(), name, logger)) {
-		return State::ERROR;
-	}
-
 	// Special error message if the top-level command is reached
 	if (commands.size() == 1) {
 		logger.error(std::string("Cannot end command \"") + name.asString() +
 		                 std::string("\" here, no command open"),
 		             name);
-		return State::ERROR;
+		return State::IRRECOVERABLE_ERROR;
 	}
 
-	// Inform the about command mismatches
-	const Command &cmd = commands.top();
-	if (commands.top().name.asString() != name.asString()) {
-		logger.error(std::string("Trying to end command \"") +
-		                 cmd.name.asString() +
+	// Inform the user about command mismatches, copy the current command
+	// descriptor before popping it from the stack
+	if (getCommandName().asString() != name.asString()) {
+		logger.error(std::string("Trying to end command \"") + name.asString() +
 		                 std::string("\", but open command is \"") +
-		                 name.asString() + std::string("\""),
+		                 getCommandName().asString() + std::string("\""),
 		             name);
-		logger.note("Last command was opened here:", cmd.name);
-		return State::ERROR;
+		logger.note("Open command started here:", getCommandName());
+		return State::IRRECOVERABLE_ERROR;
 	}
 
-	// Set the location to the location of the command that was ended, then end
-	// the current command
+	// End the current command
 	location = name.getLocation();
 	commands.pop();
-	return cmd.inRangeField ? State::FIELD_END : State::NONE;
+	return State::COMMAND_END;
 }
 
-Variant OsmlStreamParser::parseCommandArguments(Variant commandArgName)
+Variant OsmlStreamParserImpl::parseCommandArguments(Variant commandArgName)
 {
 	// Parse the arguments using the universal VariantReader
 	Variant commandArguments;
@@ -290,29 +598,14 @@ Variant OsmlStreamParser::parseCommandArguments(Variant commandArgName)
 	return commandArguments;
 }
 
-void OsmlStreamParser::pushCommand(Variant commandName,
-                                   Variant commandArguments, bool hasRange)
-{
-	// Store the location on the stack
-	location = commandName.getLocation();
-
-	// Place the command on the command stack, remove the last commands if we're
-	// not currently inside a field of these commands
-	while (!commands.top().inField) {
-		commands.pop();
-	}
-	commands.push(Command{std::move(commandName), std::move(commandArguments),
-	                      hasRange, false, false, false});
-}
-
-OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
-                                                       bool isAnnotation)
+OsmlStreamParserImpl::State OsmlStreamParserImpl::parseCommand(
+    size_t start, bool isAnnotation)
 {
 	// Parse the commandName as a first identifier
 	Variant commandName = parseIdentifier(start, true);
 	if (commandName.asString().empty()) {
 		logger.error("Empty command name", reader);
-		return State::NONE;
+		return State::RECOVERABLE_ERROR;
 	}
 
 	// Handle the special "begin" and "end" commands
@@ -322,7 +615,7 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
 	const bool isEnd = commandNameComponents[0] == "end";
 
 	// Parse the begin or end command
-	State res = State::COMMAND;
+	State res = State::COMMAND_START;
 	if (isBegin || isEnd) {
 		if (commandNameComponents.size() > 1) {
 			logger.error(
@@ -378,12 +671,13 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
 		} else {
 			// Make sure no arguments apart from the "name" argument are given
 			// to an annotation end
-			Variant::mapType &map = commands.top().arguments.asMap();
+			const Variant::mapType &map = getCommandArguments().asMap();
 			if (!map.empty()) {
 				if (map.count("name") == 0 || map.size() > 1U) {
 					logger.error(
 					    "An annotation end command may not have any arguments "
-					    "other than \"name\"");
+					    "other than \"name\"",
+					    reader);
 					return res;
 				}
 			}
@@ -397,13 +691,13 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
 
 	// If we're starting an annotation, return the command as annotation start
 	// instead of command
-	if (isAnnotation && res == State::COMMAND) {
+	if (isAnnotation && res == State::COMMAND_START) {
 		return State::ANNOTATION_START;
 	}
 	return res;
 }
 
-void OsmlStreamParser::parseBlockComment()
+void OsmlStreamParserImpl::parseBlockComment()
 {
 	Token token;
 	size_t depth = 1;
@@ -426,7 +720,7 @@ void OsmlStreamParser::parseBlockComment()
 	logger.error("File ended while being in a block comment", reader);
 }
 
-void OsmlStreamParser::parseLineComment()
+void OsmlStreamParserImpl::parseLineComment()
 {
 	char c;
 	while (reader.read(c)) {
@@ -436,65 +730,34 @@ void OsmlStreamParser::parseLineComment()
 	}
 }
 
-bool OsmlStreamParser::checkIssueData()
-{
-	if (!data.empty()) {
-		location = data.getLocation();
-		reader.resetPeek();
-		return true;
-	}
-	return false;
-}
-
-bool OsmlStreamParser::checkIssueFieldStart()
+void OsmlStreamParserImpl::pushCommand(Variant commandName,
+                                       Variant commandArguments, bool hasRange)
 {
-	// Fetch the current command, and check whether we're currently inside a
-	// field of this command
-	Command &cmd = commands.top();
-	if (!cmd.inField) {
-		// If this is a range command, we're now implicitly inside the field of
-		// this command -- we'll have to issue a field start command!
-		if (cmd.hasRange) {
-			cmd.inField = true;
-			cmd.inRangeField = true;
-			reader.resetPeek();
-			return true;
-		}
+	// Store the location of the command
+	location = commandName.getLocation();
 
-		// This was not a range command, so obviously we're now inside within
-		// a field of some command -- so unroll the commands stack until a
-		// command with open field is reached
-		while (!commands.top().inField) {
-			commands.pop();
-		}
+	// Place the command on the command stack, remove the last commands if we're
+	// not currently inside a field of these commands
+	while (!cmd().inField()) {
+		commands.pop();
 	}
-	return false;
+
+	// Push the new command onto the command stack
+	commands.emplace(std::move(commandName), std::move(commandArguments),
+	                 hasRange);
 }
 
-bool OsmlStreamParser::closeField()
+bool OsmlStreamParserImpl::checkIssueData()
 {
-	// Try to end an open field of the current command -- if the current command
-	// is not inside an open field, end this command and try to close the next
-	// one
-	for (int i = 0; i < 2 && commands.size() > 1; i++) {
-		Command &cmd = commands.top();
-		if (!cmd.inRangeField) {
-			if (cmd.inField) {
-				cmd.inField = false;
-				if (cmd.inDefaultField) {
-					commands.pop();
-				}
-				return true;
-			}
-			commands.pop();
-		} else {
-			return false;
-		}
+	if (!data.empty()) {
+		location = data.getLocation();
+		reader.resetPeek();
+		return true;
 	}
 	return false;
 }
 
-OsmlStreamParser::State OsmlStreamParser::parse()
+OsmlStreamParserImpl::State OsmlStreamParserImpl::parse()
 {
 	// Reset the data handler
 	data.clear();
@@ -507,14 +770,6 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 		// Special handling for Backslash and Text
 		if (type == OsmlTokens.Backslash ||
 		    type == OsmlTokens.AnnotationStart) {
-			// Before appending anything to the output data or starting a new
-			// command, check whether FIELD_START has to be issued, as the
-			// current command is a command with range
-			if (checkIssueFieldStart()) {
-				location = token.location;
-				return State::FIELD_START;
-			}
-
 			// Check whether a command starts now, without advancing the peek
 			// cursor
 			char c;
@@ -535,11 +790,11 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 				State res = parseCommand(token.location.getStart(),
 				                         type == OsmlTokens.AnnotationStart);
 				switch (res) {
-					case State::ERROR:
+					case State::IRRECOVERABLE_ERROR:
 						throw LoggableException(
 						    "Last error was irrecoverable, ending parsing "
 						    "process");
-					case State::NONE:
+					case State::RECOVERABLE_ERROR:
 						continue;
 					default:
 						return res;
@@ -558,15 +813,12 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 				            token.location.getStart() + 1);
 			}
 
-			data.append(c, token.location.getStart(), reader.getPeekOffset());
+			// Append the character to the output data, mark it as protected
+			data.append(c, token.location.getStart(), reader.getPeekOffset(),
+			            true);
 			reader.consumePeek();
 			continue;
 		} else if (type == Tokens::Data) {
-			// Check whether FIELD_START has to be issued before appending text
-			if (checkIssueFieldStart()) {
-				location = token.location;
-				return State::FIELD_START;
-			}
 			reader.consumePeek();
 			continue;
 		}
@@ -580,7 +832,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 		// We will handle the token now, consume the peeked characters
 		reader.consumePeek();
 
-		// Update the location to the current token location
+		// Synchronize the location with the current token location
 		location = token.location;
 
 		if (token.id == OsmlTokens.LineComment) {
@@ -588,39 +840,27 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 		} else if (token.id == OsmlTokens.BlockCommentStart) {
 			parseBlockComment();
 		} else if (token.id == OsmlTokens.FieldStart) {
-			Command &cmd = commands.top();
-			if (!cmd.inField) {
-				cmd.inField = true;
-			}
+			cmd().pushField(false, token.location);
 			return State::FIELD_START;
-/*			logger.error(
-			    "Got field start token \"{\", but no command for which to "
-			    "start the field. Write \"\\{\" to insert this sequence as "
-			    "text.",
-			    token);*/
 		} else if (token.id == OsmlTokens.FieldEnd) {
-			closeField();
-			return State::FIELD_END;
-/*			if (closeField()) {
+			// Remove all commands from the list that currently are not in any
+			// field
+			while (!cmd().inField()) {
+				commands.pop();
+			}
+
+			// If the remaining command is not in a range field, remove this
+			// command
+			if (cmd().inNonRangeField()) {
+				cmd().popField();
 				return State::FIELD_END;
 			}
 			logger.error(
-			    "Got field end token \"}\", but there is no field to end. "
-			    "Write \"\\}\" to insert this sequence as text.",
-			    token);*/
+			    "Got field end token \"}\", but there is no field to end.",
+			    token);
 		} else if (token.id == OsmlTokens.DefaultFieldStart) {
-			// Try to start a default field the first time the token is reached
-			Command &topCmd = commands.top();
-			if (!topCmd.inField) {
-				topCmd.inField = true;
-				topCmd.inDefaultField = true;
-			}
+			cmd().pushField(true, token.location);
 			return State::FIELD_START;
-/*			logger.error(
-			    "Got default field start token \"{!\", but no command for "
-			    "which to start the field. Write \"\\{!\" to insert this "
-			    "sequence as text",
-			    token);*/
 		} else if (token.id == OsmlTokens.AnnotationEnd) {
 			// We got a single annotation end token "\>" -- simply issue the
 			// ANNOTATION_END event
@@ -641,11 +881,25 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 	// Make sure all open commands and fields have been ended at the end of the
 	// stream
 	while (commands.size() > 1) {
-		Command &cmd = commands.top();
-		if (cmd.inField || cmd.hasRange) {
-			logger.error("Reached end of stream, but command \"" +
-			                 cmd.name.asString() + "\" has not been ended",
-			             cmd.name);
+		if (cmd().inField()) {
+			// If the stream ended with an open range field, issue information
+			// about the range field
+			if (cmd().inRangeField()) {
+				// Inform about the still open command itself
+				logger.error("Reached end of stream, but command \"" +
+				                 getCommandName().asString() +
+				                 "\" has not been ended",
+				             getCommandName());
+			} else {
+				// Issue information about still open fields
+				const std::vector<Field> &fields = cmd().getFields();
+				if (!fields.empty()) {
+					logger.error(
+					    std::string(
+					        "Reached end of stream, but field is still open."),
+					    fields.back().location);
+				}
+			}
 		}
 		commands.pop();
 	}
@@ -654,26 +908,45 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 	return State::END;
 }
 
-Variant OsmlStreamParser::getText(WhitespaceMode mode)
+/* Class OsmlStreamParser */
+
+OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
+    : impl(new OsmlStreamParserImpl(reader, logger))
+{
+}
+
+OsmlStreamParser::~OsmlStreamParser()
+{
+	// Stub needed because OsmlStreamParserImpl is incomplete in header
+}
+
+OsmlStreamParser::State OsmlStreamParser::parse()
+{
+	return static_cast<State>(impl->parse());
+}
+
+const TokenizedData &OsmlStreamParser::getData() const
 {
-	TokenizedData dataFork = data;
-	Variant text = dataFork.text(mode);
-	location = text.getLocation();
-	return text;
+	return impl->getData();
 }
 
 const Variant &OsmlStreamParser::getCommandName() const
 {
-	return commands.top().name;
+	return impl->getCommandName();
 }
 
 const Variant &OsmlStreamParser::getCommandArguments() const
 {
-	return commands.top().arguments;
+	return impl->getCommandArguments();
 }
 
-bool OsmlStreamParser::inDefaultField() const
+const SourceLocation &OsmlStreamParser::getLocation() const
 {
-	return commands.top().inRangeField || commands.top().inDefaultField;
+	return impl->getLocation();
 }
+
+bool OsmlStreamParser::inDefaultField() const { return impl->inDefaultField(); }
+
+bool OsmlStreamParser::inRangeCommand() const { return impl->inRangeCommand(); }
+
 }
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index 453a2bb..1fee90b 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -29,30 +29,29 @@
 #ifndef _OUSIA_OSML_STREAM_PARSER_HPP_
 #define _OUSIA_OSML_STREAM_PARSER_HPP_
 
+#include <cstdint>
 #include <memory>
 
-#include <core/common/Variant.hpp>
-#include <core/common/Whitespace.hpp>
-#include <core/parser/utils/Tokenizer.hpp>
-#include <core/parser/utils/TokenizedData.hpp>
-
 namespace ousia {
 
 // Forward declarations
 class CharReader;
 class Logger;
 class OsmlStreamParserImpl;
+class TokenizedData;
+class Variant;
 
 /**
  * The OsmlStreamParser class provides a low-level reader for the TeX-esque osml
  * format. The parser is constructed around a "parse" function, which reads data
  * from the underlying CharReader until a new state is reached and indicates
  * this state in a return value. The calling code then has to pull corresponding
- * data from the stream reader. The reader makes sure the incommind file is
+ * data from the stream reader. The reader makes sure the incomming stream is
  * syntactically valid and tries to recorver from most errors. If an error is
  * irrecoverable (this is the case for errors with wrong nesting of commands or
  * fields, as this would lead to too many consecutive errors) a
- * LoggableException is thrown.
+ * LoggableException is thrown. The OsmlStreamParser can be compared to a SAX
+ * parser for XML.
  */
 class OsmlStreamParser {
 public:
@@ -60,39 +59,21 @@ public:
 	 * Enum used to indicate which state the OsmlStreamParser class is in
 	 * after calling the "parse" function.
 	 */
-	enum class State {
-		/**
-	     * State returned if a fully featured command has been read. A command
-	     * consists of the command name and its arguments (which optionally
-	     * includes the name).
-	     */
-		COMMAND,
-
-		/**
-	     * State returned if data is given. The reader must decide which field
-	     * or command this should be routed to. Trailing or leading whitespace
-	     * has been removed. Only called if the data is non-empty.
-	     */
-		DATA,
-
+	enum class State : uint8_t {
 		/**
-	     * A user-defined entity has been found. The entity sequence is stored
-	     * in the command name.
+	     * State returned if the start of a command has been read. Use the
+	     * getCommandName(), getCommandArguments() and inRangeCommand()
+	     * functions the retrieve more information about the command that was
+	     * just started.
 	     */
-		ENTITY,
+		COMMAND_START = 0,
 
 		/**
-	     * State returned if an annotation was started. An annotation consists
-	     * of the command name and its arguments (which optionally include the
-	     * name).
+	     * State returned if a range command has just ended. This state is not
+	     * returned for non-range commands (as the actual end of a command is
+	     * context dependant).
 	     */
-		ANNOTATION_START,
-
-		/**
-	     * State returned if an annotation ends. The reader indicates which
-	     * annotation ends.
-	     */
-		ANNOTATION_END,
+		COMMAND_END = 1,
 
 		/**
 	     * State returned if a new field started. The reader assures that the
@@ -100,200 +81,47 @@ public:
 	     * is not started if data has been given outside of a field. The
 	     * field number is set to the current field index.
 	     */
-		FIELD_START,
+		FIELD_START = 2,
 
 		/**
 	     * State returned if the current field ends. The reader assures that a
 	     * field was actually open.
 	     */
-		FIELD_END,
+		FIELD_END = 3,
 
 		/**
-	     * The end of the stream has been reached.
+	     * State returned if an annotation was started. An annotation consists
+	     * of the command name and its arguments (which optionally include the
+	     * name).
 	     */
-		END,
+		ANNOTATION_START = 4,
 
 		/**
-	     * Returned from internal functions if nothing should be done.
+	     * State returned if an annotation ends. The reader indicates which
+	     * annotation ends.
 	     */
-		NONE,
+		ANNOTATION_END = 5,
 
 		/**
-	     * Returned from internal function to indicate irrecoverable errors.
+	     * State returned if data is given. The reader must decide which field
+	     * or command this should be routed to. Trailing or leading whitespace
+	     * has been removed. Only called if the data is non-empty.
 	     */
-		ERROR
-	};
-
-	/**
-	 * Entry used for the command stack.
-	 */
-	struct Command {
-		/**
-		 * Name and location of the current command.
-		 */
-		Variant name;
-
-		/**
-		 * Arguments that were passed to the command.
-		 */
-		Variant arguments;
-
-		/**
-		 * Vector used as stack for holding the number of opening/closing braces
-		 * and the corresponding "isDefaultField" flag.
-		 */
-		std::vector<bool> fields;
-
-		/**
-		 * Set to true if this is a command with clear begin and end.
-		 */
-		bool hasRange;
-
-		/**
-		 * Default constructor.
-		 */
-		Command()
-		    : hasRange(false),
-		      inField(false),
-		      inDefaultField()
-		{
-		}
+		DATA = 6,
 
 		/**
-		 * Constructor of the Command class.
-		 *
-		 * @param name is a string variant with name and location of the
-		 * command.
-		 * @param arguments is a map variant with the arguments given to the
-		 * command.
-		 * @param hasRange should be set to true if this is a command with
-		 * explicit range.
-		 * @param inDefaultField is set to true if we currently are in a
-		 * specially marked default field.
-		 */
-		Command(Variant name, Variant arguments, bool hasRange)
-		    : name(std::move(name)),
-		      arguments(std::move(arguments)),
-		      hasRange(hasRange),
-		      inField(inField),
-		      inRangeField(inRangeField),
-		      inDefaultField(inDefaultField)
-		{
-		}
+	     * The end of the stream has been reached.
+	     */
+		END = 7
 	};
 
 private:
 	/**
-	 * Reference to the CharReader instance from which the incomming bytes are
-	 * read.
-	 */
-	CharReader &reader;
-
-	/**
-	 * Reference at the logger instance to which all error messages are sent.
-	 */
-	Logger &logger;
-
-	/**
-	 * Tokenizer instance used to read individual tokens from the text.
-	 */
-	Tokenizer tokenizer;
-
-	/**
-	 * Variant containing the tokenized data that was returned from the
-	 * tokenizer as data.
-	 */
-	TokenizedData data;
-
-	/**
-	 * Stack containing the current commands.
-	 */
-	std::stack<Command> commands;
-
-	/**
-	 * Pointer at 
+	 * Pointer at the class containing the internal implementation (according
+	 * to the PIMPL idiom).
 	 */
 	std::unique_ptr<OsmlStreamParserImpl> impl;
 
-	/**
-	 * Function used internall to parse an identifier.
-	 *
-	 * @param start is the start byte offset of the identifier (including the
-	 * backslash).
-	 * @param allowNSSep should be set to true if the namespace separator is
-	 * allowed in the identifier name. Issues error if the namespace separator
-	 * is placed incorrectly.
-	 */
-	Variant parseIdentifier(size_t start, bool allowNSSep = false);
-
-	/**
-	 * Function used internally to handle the special "\begin" command.
-	 */
-	State parseBeginCommand();
-
-	/**
-	 * Function used internally to handle the special "\end" command.
-	 */
-	State parseEndCommand();
-
-	/**
-	 * Pushes the parsed command onto the command stack.
-	 */
-	void pushCommand(Variant commandName, Variant commandArguments,
-	                 bool hasRange);
-
-	/**
-	 * Parses the command arguments.
-	 */
-	Variant parseCommandArguments(Variant commandArgName);
-
-	/**
-	 * Function used internally to parse a command.
-	 *
-	 * @param start is the start byte offset of the command (including the
-	 * backslash)
-	 * @param isAnnotation if true, the command is not returned as command, but
-	 * as annotation start.
-	 * @return true if a command was actuall parsed, false otherwise.
-	 */
-	State parseCommand(size_t start, bool isAnnotation);
-
-	/**
-	 * Function used internally to parse a block comment.
-	 */
-	void parseBlockComment();
-
-	/**
-	 * Function used internally to parse a generic comment.
-	 */
-	void parseLineComment();
-
-	/**
-	 * Checks whether there is any data pending to be issued, if yes, issues it.
-	 *
-	 * @return true if there was any data and DATA should be returned by the
-	 * parse function, false otherwise.
-	 */
-	bool checkIssueData();
-
-	/**
-	 * Called before any data is appended to the internal data handler. Checks
-	 * whether a new field should be started or implicitly ended.
-	 *
-	 * @return true if FIELD_START should be returned by the parse function.
-	 */
-	bool checkIssueFieldStart();
-
-	/**
-	 * Closes a currently open field. Note that the command will be removed from
-	 * the internal command stack if the field that is being closed is a
-	 * field marked as default field.
-	 *
-	 * @return true if the field could be closed, false if there was no field
-	 * to close.
-	 */
-	bool closeField();
-
 public:
 	/**
 	 * Constructor of the OsmlStreamParser class. Attaches the new
@@ -321,30 +149,10 @@ public:
 	 */
 	State parse();
 
-	/**
-	 * Returns a reference at the internally stored data. Only valid if
-	 * State::DATA was returned by the "parse" function.
-	 *
-	 * @return a reference at a variant containing the data parsed by the
-	 * "parse" function.
-	 */
-	const TokenizedData &getData() const { return data; }
-
-	/**
-	 * Returns the complete content of the internal TokenizedData instance as
-	 * a single string Variant. This method is mainly used in the unit tests for
-	 * this class, it simply calls the text() method of TokenizedData.
-	 *
-	 * @param mode is the WhitespaceMode that should be used for returning the
-	 * text.
-	 * @return a string variant containing the text content of the internal
-	 * TokenizedData instance or a nullptr variant if there is no text.
-	 */
-	Variant getText(WhitespaceMode mode = WhitespaceMode::COLLAPSE);
-
 	/**
 	 * Returns a reference at the internally stored command name. Only valid if
-	 * State::COMMAND was returned by the "parse" function.
+	 * State::COMMAND_START, State::ANNOTATION_START or State::ANNOTATION_END
+	 * was returned by the "parse" function.
 	 *
 	 * @return a reference at a variant containing name and location of the
 	 * parsed command.
@@ -353,18 +161,46 @@ public:
 
 	/**
 	 * Returns a reference at the internally stored command name. Only valid if
-	 * State::COMMAND was returned by the "parse" function.
+	 * State::COMMAND_START, State::ANNOTATION_START or State::ANNOTATION_END
+	 * was returned by the "parse" function.
 	 *
 	 * @return a reference at a variant containing arguments given to the
 	 * command.
 	 */
 	const Variant &getCommandArguments() const;
 
+	/**
+	 * Returns a reference at the internally stored data. Only valid if
+	 * State::DATA was returned by the "parse" function.
+	 *
+	 * @return a reference at a variant containing the data parsed by the
+	 * "parse" function.
+	 */
+	const TokenizedData &getData() const;
+
+	/**
+	 * Returns the location of the current token.
+	 */
+	const SourceLocation &getLocation() const;
+
+	/**
+	 * Returns true if the currently started command is a range command, only
+	 * valid if State::COMMAND_START was returned by the "parse" function.
+	 *
+	 * @return true if the command is started is a range command, false
+	 * otherwise.
+	 */
+	bool inRangeCommand() const;
+
 	/**
 	 * Returns true if the current field is the "default" field. This is true if
 	 * the parser either is in the outer range of a range command or inside a
-	 * field that has been especially marked as "default" field (using the "|"
-	 * syntax).
+	 * field that has been especially marked as "default" field (using the "{!"
+	 * syntax). Only valid if State::FIELD_START was returned by the "parse"
+	 * function.
+	 *
+	 * @return true if the current field was marked as default field (using the
+	 * "{!" syntax).
 	 */
 	bool inDefaultField() const;
 };
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index 3d01007..8b64e51 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -21,7 +21,9 @@
 #include <iostream>
 
 #include <core/common/CharReader.hpp>
+#include <core/common/Variant.hpp>
 #include <core/frontend/TerminalLogger.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 
 #include <formats/osml/OsmlStreamParser.hpp>
 
@@ -30,147 +32,196 @@ namespace ousia {
 static TerminalLogger logger(std::cerr, true);
 // static ConcreteLogger logger;
 
-static OsmlStreamParser::State skipEmptyData(OsmlStreamParser &reader)
+static void assertCommandStart(OsmlStreamParser &parser,
+                               const std::string &name,
+                               bool rangeCommand,
+                               SourceOffset start = InvalidSourceOffset,
+                               SourceOffset end = InvalidSourceOffset)
 {
-	OsmlStreamParser::State res = reader.parse();
-	if (res == OsmlStreamParser::State::DATA) {
-		EXPECT_FALSE(reader.getData().hasNonWhitespaceText());
-		res = reader.parse();
-	}
-	return res;
-}
-
-static void assertCommand(OsmlStreamParser &reader, const std::string &name,
-                          SourceOffset start = InvalidSourceOffset,
-                          SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::COMMAND, skipEmptyData(reader));
-	EXPECT_EQ(name, reader.getCommandName().asString());
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND_START, parser.parse());
+	EXPECT_EQ(name, parser.getCommandName().asString());
+	EXPECT_EQ(rangeCommand, parser.inRangeCommand());
 	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
+		EXPECT_EQ(start, parser.getCommandName().getLocation().getStart());
+		EXPECT_EQ(start, parser.getLocation().getStart());
 	}
 	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
+		EXPECT_EQ(end, parser.getCommandName().getLocation().getEnd());
+		EXPECT_EQ(end, parser.getLocation().getEnd());
 	}
 }
 
-static void assertCommand(OsmlStreamParser &reader, const std::string &name,
-                          const Variant::mapType &args,
-                          SourceOffset start = InvalidSourceOffset,
-                          SourceOffset end = InvalidSourceOffset)
+static void assertCommandStart(OsmlStreamParser &parser,
+                               const std::string &name,
+                               bool rangeCommand,
+                               const Variant::mapType &args,
+                               SourceOffset start = InvalidSourceOffset,
+                               SourceOffset end = InvalidSourceOffset)
 {
-	assertCommand(reader, name, start, end);
-	EXPECT_EQ(args, reader.getCommandArguments());
+	assertCommandStart(parser, name, rangeCommand, start, end);
+	EXPECT_EQ(args, parser.getCommandArguments());
 }
 
-static void assertData(OsmlStreamParser &reader, const std::string &data,
-                       SourceOffset start = InvalidSourceOffset,
-                       SourceOffset end = InvalidSourceOffset,
-                       WhitespaceMode mode = WhitespaceMode::COLLAPSE)
+static void assertCommand(OsmlStreamParser &parser,
+                               const std::string &name,
+                               SourceOffset start = InvalidSourceOffset,
+                               SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	Variant text = reader.getText(mode);
-	ASSERT_TRUE(text.isString());
-	EXPECT_EQ(data, text.asString());
+	assertCommandStart(parser, name, false, Variant::mapType{}, start, end);
+}
+
+static void assertCommandEnd(OsmlStreamParser &parser,
+                             SourceOffset start = InvalidSourceOffset,
+                             SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND_END, parser.parse());
 	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, text.getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
+		EXPECT_EQ(start, parser.getLocation().getStart());
 	}
 	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, text.getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
+		EXPECT_EQ(end, parser.getLocation().getEnd());
+	}
+}
+
+static void assertTextData(OsmlStreamParser &parser, const std::string &text,
+                           SourceOffset dataStart = InvalidSourceOffset,
+                           SourceOffset dataEnd = InvalidSourceOffset,
+                           SourceOffset textStart = InvalidSourceOffset,
+                           SourceOffset textEnd = InvalidSourceOffset,
+                           WhitespaceMode mode = WhitespaceMode::COLLAPSE)
+{
+	ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse());
+
+	const TokenizedData &data = parser.getData();
+	TokenizedDataReader dataReader = data.reader();
+
+	Token token;
+	ASSERT_TRUE(dataReader.read(token, TokenSet{}, mode));
+	EXPECT_EQ(Tokens::Data, token.id);
+	EXPECT_EQ(text, token.content);
+	if (dataStart != InvalidSourceOffset) {
+		EXPECT_EQ(dataStart, data.getLocation().getStart());
+		EXPECT_EQ(dataStart, parser.getLocation().getStart());
+	}
+	if (dataEnd != InvalidSourceOffset) {
+		EXPECT_EQ(dataEnd, data.getLocation().getEnd());
+		EXPECT_EQ(dataEnd, parser.getLocation().getEnd());
+	}
+	if (textStart != InvalidSourceOffset) {
+		EXPECT_EQ(textStart, token.getLocation().getStart());
 	}
+	if (textEnd != InvalidSourceOffset) {
+		EXPECT_EQ(textEnd, token.getLocation().getEnd());
+	}
+}
+
+static void assertData(OsmlStreamParser &parser, const std::string &text,
+                           SourceOffset textStart = InvalidSourceOffset,
+                           SourceOffset textEnd = InvalidSourceOffset,
+                           WhitespaceMode mode = WhitespaceMode::COLLAPSE)
+{
+	assertTextData(parser, text, InvalidSourceOffset, InvalidSourceOffset, textStart, textEnd, mode);
+}
+
+static void assertEmptyData(OsmlStreamParser &parser)
+{
+	ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse());
+
+	const TokenizedData &data = parser.getData();
+	TokenizedDataReader dataReader = data.reader();
+
+	Token token;
+	EXPECT_FALSE(dataReader.read(token, TokenSet{}, WhitespaceMode::TRIM));
 }
 
-static void assertFieldStart(OsmlStreamParser &reader, bool defaultField,
+
+static void assertFieldStart(OsmlStreamParser &parser, bool defaultField,
                              SourceOffset start = InvalidSourceOffset,
                              SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, skipEmptyData(reader));
-	EXPECT_EQ(defaultField, reader.inDefaultField());
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, parser.parse());
+	EXPECT_EQ(defaultField, parser.inDefaultField());
 	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
+		EXPECT_EQ(start, parser.getLocation().getStart());
 	}
 	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
+		EXPECT_EQ(end, parser.getLocation().getEnd());
 	}
 }
 
-static void assertFieldEnd(OsmlStreamParser &reader,
+static void assertFieldEnd(OsmlStreamParser &parser,
                            SourceOffset start = InvalidSourceOffset,
                            SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, skipEmptyData(reader));
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, parser.parse());
 	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
+		EXPECT_EQ(start, parser.getLocation().getStart());
 	}
 	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
+		EXPECT_EQ(end, parser.getLocation().getEnd());
 	}
 }
 
-static void assertAnnotationStart(OsmlStreamParser &reader,
+static void assertAnnotationStart(OsmlStreamParser &parser,
                                   const std::string &name,
                                   SourceOffset start = InvalidSourceOffset,
                                   SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, skipEmptyData(reader));
-	EXPECT_EQ(name, reader.getCommandName().asString());
+	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, parser.parse());
+	EXPECT_EQ(name, parser.getCommandName().asString());
 	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
+		EXPECT_EQ(start, parser.getCommandName().getLocation().getStart());
+		EXPECT_EQ(start, parser.getLocation().getStart());
 	}
 	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
+		EXPECT_EQ(end, parser.getCommandName().getLocation().getEnd());
+		EXPECT_EQ(end, parser.getLocation().getEnd());
 	}
 }
 
-static void assertAnnotationStart(OsmlStreamParser &reader,
+static void assertAnnotationStart(OsmlStreamParser &parser,
                                   const std::string &name,
                                   const Variant::mapType &args,
                                   SourceOffset start = InvalidSourceOffset,
                                   SourceOffset end = InvalidSourceOffset)
 {
-	assertAnnotationStart(reader, name, start, end);
-	EXPECT_EQ(args, reader.getCommandArguments());
+	assertAnnotationStart(parser, name, start, end);
+	EXPECT_EQ(args, parser.getCommandArguments());
 }
 
-static void assertAnnotationEnd(OsmlStreamParser &reader,
+static void assertAnnotationEnd(OsmlStreamParser &parser,
                                 const std::string &name,
                                 const std::string &elementName,
                                 SourceOffset start = InvalidSourceOffset,
                                 SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, skipEmptyData(reader));
-	ASSERT_EQ(name, reader.getCommandName().asString());
+	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, parser.parse());
+	ASSERT_EQ(name, parser.getCommandName().asString());
 	if (!elementName.empty()) {
-		ASSERT_EQ(1U, reader.getCommandArguments().asMap().size());
-		ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name"));
+		ASSERT_EQ(1U, parser.getCommandArguments().asMap().size());
+		ASSERT_EQ(1U, parser.getCommandArguments().asMap().count("name"));
 
-		auto it = reader.getCommandArguments().asMap().find("name");
+		auto it = parser.getCommandArguments().asMap().find("name");
 		ASSERT_EQ(elementName, it->second.asString());
 	}
 	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
+		EXPECT_EQ(start, parser.getLocation().getStart());
 	}
 	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
+		EXPECT_EQ(end, parser.getLocation().getEnd());
 	}
 }
 
-static void assertEnd(OsmlStreamParser &reader,
+static void assertEnd(OsmlStreamParser &parser,
                       SourceOffset start = InvalidSourceOffset,
                       SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::END, skipEmptyData(reader));
+	ASSERT_EQ(OsmlStreamParser::State::END, parser.parse());
 	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
+		EXPECT_EQ(start, parser.getLocation().getStart());
 	}
 	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
+		EXPECT_EQ(end, parser.getLocation().getEnd());
 	}
 }
 
@@ -179,9 +230,9 @@ TEST(OsmlStreamParser, empty)
 	const char *testString = "";
 	CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	assertEnd(parser, 0, 0);
 }
 
 TEST(OsmlStreamParser, oneCharacter)
@@ -189,45 +240,102 @@ TEST(OsmlStreamParser, oneCharacter)
 	const char *testString = "a";
 	CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
+
+	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::COLLAPSE);
+	assertEnd(parser, 1, 1);
+}
+
+TEST(OsmlStreamParser, whitespacePreserve)
+{
+	const char *testString = " hello \t world ";
+	//                        0123456 78901234
+	//                        0          1
+	CharReader charReader(testString);
+
+	OsmlStreamParser parser(charReader, logger);
+
+	assertTextData(parser, " hello \t world ", 0, 15, 0, 15,
+	               WhitespaceMode::PRESERVE);
+	assertEnd(parser, 15, 15);
+}
+
+TEST(OsmlStreamParser, whitespaceTrim)
+{
+	const char *testString = " hello \t world ";
+	//                        0123456 78901234
+	//                        0          1
+	CharReader charReader(testString);
+
+	OsmlStreamParser parser(charReader, logger);
 
-	assertData(reader, "a", 0, 1);
+	assertTextData(parser, "hello \t world", 0, 15, 1, 14,
+	               WhitespaceMode::TRIM);
+	assertEnd(parser, 15, 15);
 }
 
-TEST(OsmlStreamParser, whitespaceElimination)
+TEST(OsmlStreamParser, whitespaceCollapse)
 {
 	const char *testString = " hello \t world ";
 	//                        0123456 78901234
 	//                        0          1
 	CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-	assertData(reader, "hello world", 1, 14);
+	assertTextData(parser, "hello world", 0, 15, 1, 14,
+	               WhitespaceMode::COLLAPSE);
+	assertEnd(parser, 15, 15);
 }
 
-TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak)
+TEST(OsmlStreamParser, whitespaceCollapseLinebreak)
 {
 	const char *testString = " hello \n world ";
 	//                        0123456 78901234
 	//                        0          1
 	CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
+
+	assertTextData(parser, "hello world", 0, 15, 1, 14,
+	               WhitespaceMode::COLLAPSE);
+	assertEnd(parser, 15, 15);
+}
+
+TEST(OsmlStreamParser, whitespaceCollapseProtected)
+{
+	const char *testString = " hello\\ \\ world ";
+	//                        012345 67 89012345
+	//                        0           1
+	CharReader charReader(testString);
+
+	OsmlStreamParser parser(charReader, logger);
+
+	assertTextData(parser, "hello  world", 0, 16, 1, 15,
+	               WhitespaceMode::COLLAPSE);
+	assertEnd(parser, 16, 16);
+}
+
+TEST(OsmlStreamParser, whitespaceCollapseProtected2)
+{
+	const char *testString = " hello \\ \\ world ";
+	//                        012345 67 89012345
+	//                        0           1
+	CharReader charReader(testString);
+
+	OsmlStreamParser parser(charReader, logger);
 
-	assertData(reader, "hello world", 1, 14);
+	assertTextData(parser, "hello   world", 0, 17, 1, 16,
+	               WhitespaceMode::COLLAPSE);
+	assertEnd(parser, 17, 17);
 }
 
 static void testEscapeSpecialCharacter(const std::string &c)
 {
 	CharReader charReader(std::string("\\") + c);
-	OsmlStreamParser reader(charReader, logger);
-	EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(c, reader.getText().asString());
-
-	SourceLocation loc = reader.getText().getLocation();
-	EXPECT_EQ(0U, loc.getStart());
-	EXPECT_EQ(1U + c.size(), loc.getEnd());
+	OsmlStreamParser parser(charReader, logger);
+	assertTextData(parser, c, 0, 2, 0, 2, WhitespaceMode::PRESERVE);
+	assertEnd(parser, 2, 2);
 }
 
 TEST(OsmlStreamParser, escapeSpecialCharacters)
@@ -240,9 +348,11 @@ TEST(OsmlStreamParser, escapeSpecialCharacters)
 TEST(OsmlStreamParser, simpleSingleLineComment)
 {
 	const char *testString = "% This is a single line comment";
+	//                        0123456789012345678901234567890
+	//                        0         1         2         3
 	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	OsmlStreamParser parser(charReader, logger);
+	assertEnd(parser, 31, 31);
 }
 
 TEST(OsmlStreamParser, singleLineComment)
@@ -251,24 +361,11 @@ TEST(OsmlStreamParser, singleLineComment)
 	//                        01234567890123456789012345678901 23
 	//                        0         1         2         3
 	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
-	{
-		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getText().asString());
-		SourceLocation loc = reader.getText().getLocation();
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(1U, loc.getEnd());
-	}
-
-	{
-		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getText().asString());
-		SourceLocation loc = reader.getText().getLocation();
-		ASSERT_EQ(33U, loc.getStart());
-		ASSERT_EQ(34U, loc.getEnd());
-	}
+	OsmlStreamParser parser(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
+	assertTextData(parser, "b", 33, 34, 33, 34, WhitespaceMode::PRESERVE);
+	assertEnd(parser, 34, 34);
 }
 
 TEST(OsmlStreamParser, multilineComment)
@@ -277,24 +374,27 @@ TEST(OsmlStreamParser, multilineComment)
 	//                        0123456789012 3 456789012345678901234567890
 	//                        0         1           2         3         4
 	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
-	{
-		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getText().asString());
-		SourceLocation loc = reader.getText().getLocation();
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(1U, loc.getEnd());
-	}
+	OsmlStreamParser parser(charReader, logger);
 
-	{
-		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getText().asString());
-		SourceLocation loc = reader.getText().getLocation();
-		ASSERT_EQ(40U, loc.getStart());
-		ASSERT_EQ(41U, loc.getEnd());
-	}
+	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
+	assertTextData(parser, "b", 40, 41, 40, 41, WhitespaceMode::PRESERVE);
+	assertEnd(parser, 41, 41);
+}
 
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+TEST(OsmlStreamParser, unfinishedMultilineComment)
+{
+	const char *testString = "a%{ This is a\n\n multiline line comment";
+	//                        0123456789012 3 456789012345678901234567
+	//                        0         1           2         3
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
+
+	logger.reset();
+
+	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(parser, 38, 38);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, nestedMultilineComment)
@@ -303,24 +403,11 @@ TEST(OsmlStreamParser, nestedMultilineComment)
 	//                        0123456789012 3 456789012345678901234567890
 	//                        0         1           2         3         4
 	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
-	{
-		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getText().asString());
-		SourceLocation loc = reader.getText().getLocation();
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(1U, loc.getEnd());
-	}
-
-	{
-		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getText().asString());
-		SourceLocation loc = reader.getText().getLocation();
-		ASSERT_EQ(40U, loc.getStart());
-		ASSERT_EQ(41U, loc.getEnd());
-	}
+	OsmlStreamParser parser(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
+	assertTextData(parser, "b", 40, 41, 40, 41, WhitespaceMode::PRESERVE);
+	assertEnd(parser, 41, 41);
 }
 
 TEST(OsmlStreamParser, simpleCommand)
@@ -328,45 +415,27 @@ TEST(OsmlStreamParser, simpleCommand)
 	const char *testString = "\\test";
 	//                        0 12345
 	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+	OsmlStreamParser parser(charReader, logger);
 
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
-
-	ASSERT_EQ(0U, reader.getCommandArguments().asMap().size());
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	assertCommand(parser, "test", 0, 5);
+	assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, simpleCommandWithName)
 {
-	const char *testString = "\\test#bla";
-	//                        0 12345678
+	const char *testString = "\\test#foo";
+	//                         012345678
 	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
-
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
+	OsmlStreamParser parser(charReader, logger);
 
-	Variant commandArguments = reader.getCommandArguments();
-	ASSERT_TRUE(commandArguments.isMap());
-	ASSERT_EQ(1U, commandArguments.asMap().size());
-	ASSERT_EQ(1U, commandArguments.asMap().count("name"));
-	ASSERT_EQ("bla", commandArguments.asMap()["name"].asString());
+	assertCommandStart(parser, "test", false, Variant::mapType{{"name", "foo"}},
+	                   0, 5);
 
-	loc = commandArguments.asMap()["name"].getLocation();
-	ASSERT_EQ(5U, loc.getStart());
-	ASSERT_EQ(9U, loc.getEnd());
+	Variant::mapType args = parser.getCommandArguments().asMap();
+	ASSERT_EQ(5U, args["name"].getLocation().getStart());
+	ASSERT_EQ(9U, args["name"].getLocation().getEnd());
 
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, simpleCommandWithArguments)
@@ -375,38 +444,21 @@ TEST(OsmlStreamParser, simpleCommandWithArguments)
 	//                        0 123456789012345 678901 2
 	//                        0          1          2
 	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+	OsmlStreamParser parser(charReader, logger);
 
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
+	assertCommandStart(parser, "test", false,
+	                   Variant::mapType{{"a", 1}, {"b", 2}, {"c", "test"}}, 0,
+	                   5);
 
-	Variant commandArguments = reader.getCommandArguments();
-	ASSERT_TRUE(commandArguments.isMap());
-	ASSERT_EQ(3U, commandArguments.asMap().size());
-	ASSERT_EQ(1U, commandArguments.asMap().count("a"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("b"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("c"));
-	ASSERT_EQ(1, commandArguments.asMap()["a"].asInt());
-	ASSERT_EQ(2, commandArguments.asMap()["b"].asInt());
-	ASSERT_EQ("test", commandArguments.asMap()["c"].asString());
+	Variant::mapType args = parser.getCommandArguments().asMap();
+	ASSERT_EQ(8U, args["a"].getLocation().getStart());
+	ASSERT_EQ(9U, args["a"].getLocation().getEnd());
+	ASSERT_EQ(12U, args["b"].getLocation().getStart());
+	ASSERT_EQ(13U, args["b"].getLocation().getEnd());
+	ASSERT_EQ(16U, args["c"].getLocation().getStart());
+	ASSERT_EQ(22U, args["c"].getLocation().getEnd());
 
-	loc = commandArguments.asMap()["a"].getLocation();
-	ASSERT_EQ(8U, loc.getStart());
-	ASSERT_EQ(9U, loc.getEnd());
-
-	loc = commandArguments.asMap()["b"].getLocation();
-	ASSERT_EQ(12U, loc.getStart());
-	ASSERT_EQ(13U, loc.getEnd());
-
-	loc = commandArguments.asMap()["c"].getLocation();
-	ASSERT_EQ(16U, loc.getStart());
-	ASSERT_EQ(22U, loc.getEnd());
-
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName)
@@ -415,44 +467,24 @@ TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName)
 	//                        0 1234567890123456789 01234 56
 	//                        0          1          2
 	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
-
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
-
-	Variant commandArguments = reader.getCommandArguments();
-	ASSERT_TRUE(commandArguments.isMap());
-	ASSERT_EQ(4U, commandArguments.asMap().size());
-	ASSERT_EQ(1U, commandArguments.asMap().count("a"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("b"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("c"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("name"));
-	ASSERT_EQ(1, commandArguments.asMap()["a"].asInt());
-	ASSERT_EQ(2, commandArguments.asMap()["b"].asInt());
-	ASSERT_EQ("test", commandArguments.asMap()["c"].asString());
-	ASSERT_EQ("bla", commandArguments.asMap()["name"].asString());
-
-	loc = commandArguments.asMap()["a"].getLocation();
-	ASSERT_EQ(12U, loc.getStart());
-	ASSERT_EQ(13U, loc.getEnd());
-
-	loc = commandArguments.asMap()["b"].getLocation();
-	ASSERT_EQ(16U, loc.getStart());
-	ASSERT_EQ(17U, loc.getEnd());
-
-	loc = commandArguments.asMap()["c"].getLocation();
-	ASSERT_EQ(20U, loc.getStart());
-	ASSERT_EQ(26U, loc.getEnd());
-
-	loc = commandArguments.asMap()["name"].getLocation();
-	ASSERT_EQ(5U, loc.getStart());
-	ASSERT_EQ(9U, loc.getEnd());
-
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	OsmlStreamParser parser(charReader, logger);
+
+	assertCommandStart(
+	    parser, "test", false,
+	    Variant::mapType{{"name", "bla"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 0,
+	    5);
+
+	Variant::mapType args = parser.getCommandArguments().asMap();
+	ASSERT_EQ(5U, args["name"].getLocation().getStart());
+	ASSERT_EQ(9U, args["name"].getLocation().getEnd());
+	ASSERT_EQ(12U, args["a"].getLocation().getStart());
+	ASSERT_EQ(13U, args["a"].getLocation().getEnd());
+	ASSERT_EQ(16U, args["b"].getLocation().getStart());
+	ASSERT_EQ(17U, args["b"].getLocation().getEnd());
+	ASSERT_EQ(20U, args["c"].getLocation().getStart());
+	ASSERT_EQ(26U, args["c"].getLocation().getEnd());
+
+	assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, fields)
@@ -461,21 +493,21 @@ TEST(OsmlStreamParser, fields)
 	//                         01234567890123
 	//                         0         1
 	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, false, 5, 6);
-	assertData(reader, "a", 6, 7);
-	assertFieldEnd(reader, 7, 8);
+	assertCommand(parser, "test", 0, 5);
+	assertFieldStart(parser, false, 5, 6);
+	assertTextData(parser, "a", 6, 7, 6, 7, WhitespaceMode::PRESERVE);
+	assertFieldEnd(parser, 7, 8);
 
-	assertFieldStart(reader, false, 8, 9);
-	assertData(reader, "b", 9, 10);
-	assertFieldEnd(reader, 10, 11);
+	assertFieldStart(parser, false, 8, 9);
+	assertTextData(parser, "b", 9, 10, 9, 10, WhitespaceMode::PRESERVE);
+	assertFieldEnd(parser, 10, 11);
 
-	assertFieldStart(reader, false, 11, 12);
-	assertData(reader, "c", 12, 13);
-	assertFieldEnd(reader, 13, 14);
-	assertEnd(reader, 14, 14);
+	assertFieldStart(parser, false, 11, 12);
+	assertTextData(parser, "c", 12, 13, 12, 13, WhitespaceMode::PRESERVE);
+	assertFieldEnd(parser, 13, 14);
+	assertEnd(parser, 14, 14);
 }
 
 TEST(OsmlStreamParser, dataOutsideField)
@@ -484,785 +516,781 @@ TEST(OsmlStreamParser, dataOutsideField)
 	//                         0123456789012
 	//                         0         1
 	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, false, 5, 6);
-	assertData(reader, "a", 6, 7);
-	assertFieldEnd(reader, 7, 8);
+	assertCommand(parser, "test", 0, 5);
+	assertFieldStart(parser, false, 5, 6);
+	assertTextData(parser, "a", 6, 7, 6, 7, WhitespaceMode::COLLAPSE);
+	assertFieldEnd(parser, 7, 8);
 
-	assertFieldStart(reader, false, 8, 9);
-	assertData(reader, "b", 9, 10);
-	assertFieldEnd(reader, 10, 11);
+	assertFieldStart(parser, false, 8, 9);
+	assertTextData(parser, "b", 9, 10, 9, 10, WhitespaceMode::COLLAPSE);
+	assertFieldEnd(parser, 10, 11);
 
-	assertData(reader, "c", 12, 13);
-	assertEnd(reader, 13, 13);
+	assertTextData(parser, "c", 11, 13, 12, 13, WhitespaceMode::COLLAPSE);
+	assertEnd(parser, 13, 13);
 }
 
 TEST(OsmlStreamParser, nestedCommand)
 {
-	const char *testString = "\\test{a}{\\test2{b} c} d";
-	//                         012345678 90123456789012
-	//                         0          1         2
-	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
+    const char *testString = "\\test{a}{\\test2{b} c} d";
+    //                         012345678 90123456789012
+    //                         0          1         2
+    CharReader charReader(testString);
+    OsmlStreamParser parser(charReader, logger);
 
-	assertCommand(reader, "test", 0, 5);
+    assertCommand(parser, "test", 0, 5);
+    assertFieldStart(parser, false, 5, 6);
+    assertData(parser, "a", 6, 7);
+    assertFieldEnd(parser, 7, 8);
 
-	assertFieldStart(reader, false, 5, 6);
-	assertData(reader, "a", 6, 7);
-	assertFieldEnd(reader, 7, 8);
-
-	assertFieldStart(reader, false, 8, 9);
-	{
-		assertCommand(reader, "test2", 9, 15);
-		assertFieldStart(reader, false, 15, 16);
-		assertData(reader, "b", 16, 17);
-		assertFieldEnd(reader, 17, 18);
-	}
-	assertData(reader, "c", 19, 20);
-	assertFieldEnd(reader, 20, 21);
-	assertData(reader, "d", 22, 23);
-	assertEnd(reader, 23, 23);
+    assertFieldStart(parser, false, 8, 9);
+    assertCommand(parser, "test2", 9, 15);
+    assertFieldStart(parser, false, 15, 16);
+    assertData(parser, "b", 16, 17);
+    assertFieldEnd(parser, 17, 18);
+    assertData(parser, "c", 19, 20);
+    assertFieldEnd(parser, 20, 21);
+    assertData(parser, "d", 22, 23);
+    assertEnd(parser, 23, 23);
 }
 
+
 TEST(OsmlStreamParser, nestedCommandImmediateEnd)
 {
-	const char *testString = "\\test{\\test2{b}} d";
-	//                         012345 678901234567
-	//                         0          1
-	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, false, 5, 6);
-	{
-		assertCommand(reader, "test2", 6, 12);
-		assertFieldStart(reader, false, 12, 13);
-		assertData(reader, "b", 13, 14);
-		assertFieldEnd(reader, 14, 15);
-	}
-	assertFieldEnd(reader, 15, 16);
-	assertData(reader, "d", 17, 18);
-	assertEnd(reader, 18, 18);
+    const char *testString = "\\test{\\test2{b}} d";
+    //                         012345 678901234567
+    //                         0          1
+    CharReader charReader(testString);
+    OsmlStreamParser parser(charReader, logger);
+
+    assertCommand(parser, "test", 0, 5);
+    assertFieldStart(parser, false, 5, 6);
+    {
+        assertCommand(parser, "test2", 6, 12);
+        assertFieldStart(parser, false, 12, 13);
+        assertData(parser, "b", 13, 14);
+        assertFieldEnd(parser, 14, 15);
+    }
+    assertFieldEnd(parser, 15, 16);
+    assertData(parser, "d", 17, 18);
+    assertEnd(parser, 18, 18);
 }
 
 TEST(OsmlStreamParser, nestedCommandNoData)
 {
-	const char *testString = "\\test{\\test2}";
-	//                         012345 6789012
-	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
+    const char *testString = "\\test{\\test2}";
+    //                         012345 6789012
+    CharReader charReader(testString);
+    OsmlStreamParser parser(charReader, logger);
 
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, false, 5, 6);
-	assertCommand(reader, "test2", 6, 12);
-	assertFieldEnd(reader, 12, 13);
-	assertEnd(reader, 13, 13);
+    assertCommand(parser, "test", 0, 5);
+    assertFieldStart(parser, false, 5, 6);
+    assertCommand(parser, "test2", 6, 12);
+    assertFieldEnd(parser, 12, 13);
+    assertEnd(parser, 13, 13);
 }
 
 TEST(OsmlStreamParser, multipleCommands)
 {
-	const char *testString = "\\a \\b \\c \\d";
-	//                         012 345 678 90
-	//                         0            1
-	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
+    const char *testString = "\\a \\b \\c \\d";
+    //                         012 345 678 90
+    //                         0            1
+    CharReader charReader(testString);
+    OsmlStreamParser parser(charReader, logger);
 
-	assertCommand(reader, "a", 0, 2);
-	assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE);
-	assertCommand(reader, "b", 3, 5);
-	assertData(reader, " ", 5, 6, WhitespaceMode::PRESERVE);
-	assertCommand(reader, "c", 6, 8);
-	assertData(reader, " ", 8, 9, WhitespaceMode::PRESERVE);
-	assertCommand(reader, "d", 9, 11);
-	assertEnd(reader, 11, 11);
+    assertCommand(parser, "a", 0, 2);
+    assertEmptyData(parser);
+    assertCommand(parser, "b", 3, 5);
+    assertEmptyData(parser);
+    assertCommand(parser, "c", 6, 8);
+    assertEmptyData(parser);
+    assertCommand(parser, "d", 9, 11);
+    assertEnd(parser, 11, 11);
 }
 
 TEST(OsmlStreamParser, fieldsWithSpaces)
 {
-	const char *testString = "\\a {\\b \\c}   \n\n {\\d}";
-	//                         0123 456 789012 3 456 789
-	//                         0           1
-	CharReader charReader(testString);
-	OsmlStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "a", 0, 2);
-	assertData(reader, " ", 2, 3, WhitespaceMode::PRESERVE);
-	assertFieldStart(reader, false, 3, 4);
-	assertCommand(reader, "b", 4, 6);
-	assertData(reader, " ", 6, 7, WhitespaceMode::PRESERVE);
-	assertCommand(reader, "c", 7, 9);
-	assertFieldEnd(reader, 9, 10);
-	assertData(reader, "   \n\n {", 10, 12, WhitespaceMode::PRESERVE);
-	assertFieldStart(reader, false, 16, 17);
-	assertCommand(reader, "d", 17, 19);
-	assertFieldEnd(reader, 19, 20);
-	assertEnd(reader, 20, 20);
-}
-
-TEST(OsmlStreamParser, errorNoFieldToStart)
-{
-	const char *testString = "\\a b {";
-	//                         012345
-	//                         0
-	CharReader charReader(testString);
+    const char *testString = "\\a {\\b \\c}   \n\n {\\d}";
+    //                         0123 456 789012 3 456 789
+    //                         0           1
+    CharReader charReader(testString);
+    OsmlStreamParser parser(charReader, logger);
 
-	OsmlStreamParser reader(charReader, logger);
+    assertCommand(parser, "a", 0, 2);
+    assertEmptyData(parser);
+    assertFieldStart(parser, false, 3, 4);
+    assertCommand(parser, "b", 4, 6);
+    assertEmptyData(parser);
+    assertCommand(parser, "c", 7, 9);
+    assertFieldEnd(parser, 9, 10);
+    assertEmptyData(parser);
+    assertFieldStart(parser, false, 16, 17);
+    assertCommand(parser, "d", 17, 19);
+    assertFieldEnd(parser, 19, 20);
+    assertEnd(parser, 20, 20);
+}
 
-	logger.reset();
-	assertCommand(reader, "a", 0, 2);
-	assertData(reader, "b", 3, 4);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 6, 6);
-	ASSERT_TRUE(logger.hasError());
+TEST(OsmlStreamParser, errorEndButOpenField)
+{
+    const char *testString = "\\a b {";
+    //                         012345
+    //                         0
+    CharReader charReader(testString);
+
+    OsmlStreamParser parser(charReader, logger);
+
+    logger.reset();
+    assertCommand(parser, "a", 0, 2);
+    assertData(parser, "b", 3, 4);
+    assertFieldStart(parser, false, 5, 6);
+    ASSERT_FALSE(logger.hasError());
+    assertEnd(parser, 6, 6);
+    ASSERT_TRUE(logger.hasError());
 }
 
+
 TEST(OsmlStreamParser, errorNoFieldToEnd)
 {
-	const char *testString = "\\a b }";
-	//                         012345
-	//                         0
-	CharReader charReader(testString);
+    const char *testString = "\\a b }";
+    //                         012345
+    //                         0
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-	logger.reset();
-	assertCommand(reader, "a", 0, 2);
-	assertData(reader, "b", 3, 4);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 6, 6);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    assertCommand(parser, "a", 0, 2);
+    assertData(parser, "b", 3, 4);
+    ASSERT_FALSE(logger.hasError());
+    assertEnd(parser, 6, 6);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorNoFieldEndNested)
 {
-	const char *testString = "\\test{\\test2{}}}";
-	//                         012345 6789012345
-	//                         0          1
-	CharReader charReader(testString);
+    const char *testString = "\\test{\\test2{}}}";
+    //                         012345 6789012345
+    //                         0          1
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-	logger.reset();
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, false, 5, 6);
-	assertCommand(reader, "test2", 6, 12);
-	assertFieldStart(reader, false, 12, 13);
-	assertFieldEnd(reader, 13, 14);
-	assertFieldEnd(reader, 14, 15);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 16, 16);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    assertCommand(parser, "test", 0, 5);
+    assertFieldStart(parser, false, 5, 6);
+    assertCommand(parser, "test2", 6, 12);
+    assertFieldStart(parser, false, 12, 13);
+    assertFieldEnd(parser, 13, 14);
+    assertFieldEnd(parser, 14, 15);
+    ASSERT_FALSE(logger.hasError());
+    assertEnd(parser, 16, 16);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorNoFieldEndNestedData)
 {
-	const char *testString = "\\test{\\test2{}}a}";
-	//                         012345 67890123456
-	//                         0          1
-	CharReader charReader(testString);
+    const char *testString = "\\test{\\test2{}}a}";
+    //                         012345 67890123456
+    //                         0          1
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-	logger.reset();
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, false, 5, 6);
-	assertCommand(reader, "test2", 6, 12);
-	assertFieldStart(reader, false, 12, 13);
-	assertFieldEnd(reader, 13, 14);
-	assertFieldEnd(reader, 14, 15);
-	assertData(reader, "a", 15, 16);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 17, 17);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    assertCommand(parser, "test", 0, 5);
+    assertFieldStart(parser, false, 5, 6);
+    assertCommand(parser, "test2", 6, 12);
+    assertFieldStart(parser, false, 12, 13);
+    assertFieldEnd(parser, 13, 14);
+    assertFieldEnd(parser, 14, 15);
+    assertData(parser, "a", 15, 16);
+    ASSERT_FALSE(logger.hasError());
+    assertEnd(parser, 17, 17);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, beginEnd)
 {
-	const char *testString = "\\begin{book}\\end{book}";
-	//                         012345678901 2345678901
-	//                         0         1          2
-	CharReader charReader(testString);
+    const char *testString = "\\begin{book}\\end{book}";
+    //                         012345678901 2345678901
+    //                         0         1          2
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, true, 12, 13);
-	assertFieldEnd(reader, 17, 21);
-	assertEnd(reader, 22, 22);
+    assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+    assertCommandEnd(parser, 17, 21);
+    assertEnd(parser, 22, 22);
 }
 
 TEST(OsmlStreamParser, beginEndWithName)
 {
-	const char *testString = "\\begin{book#a}\\end{book}";
-	//                         01234567890123 4567890123
-	//                         0         1          2
-	CharReader charReader(testString);
+    const char *testString = "\\begin{book#a}\\end{book}";
+    //                         01234567890123 4567890123
+    //                         0         1          2
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-	assertCommand(reader, "book", {{"name", "a"}}, 7, 11);
-	assertFieldStart(reader, true, 14, 15);
-	assertFieldEnd(reader, 19, 23);
-	assertEnd(reader, 24, 24);
+    assertCommandStart(parser, "book", true, {{"name", "a"}}, 7, 11);
+    assertCommandEnd(parser, 19, 23);
+    assertEnd(parser, 24, 24);
 }
 
 TEST(OsmlStreamParser, beginEndWithNameAndArgs)
 {
-	const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}";
-	//                         0123456789012345678901234 56789 01 2345678901
-	//                         0         1         2           3          4
-	CharReader charReader(testString);
+    const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}";
+    //                         0123456789012345678901234 56789 01 2345678901
+    //                         0         1         2           3          4
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-	assertCommand(reader, "book",
-	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-	assertFieldStart(reader, true, 32, 33);
-	assertFieldEnd(reader, 37, 41);
-	assertEnd(reader, 42, 42);
+    assertCommandStart(parser, "book", true,
+                  {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
+    assertCommandEnd(parser, 37, 41);
+    assertEnd(parser, 42, 42);
 }
 
 TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields)
 {
-	const char *testString =
-	    "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}";
-	//    0123456789012345678901234 56789 01234 567890123 45678901 2345678901
-	//    0         1         2           3          4          5          6
-	CharReader charReader(testString);
-
-	OsmlStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book",
-	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-	assertFieldStart(reader, false, 32, 33);
-	assertData(reader, "a", 33, 34);
-	assertCommand(reader, "test", Variant::mapType{}, 35, 40);
-	assertFieldEnd(reader, 40, 41);
-	assertFieldStart(reader, false, 41, 42);
-	assertData(reader, "b", 42, 43);
-	assertCommand(reader, "test", Variant::mapType{}, 44, 49);
-	assertFieldStart(reader, false, 49, 50);
-	assertFieldEnd(reader, 50, 51);
-	assertFieldEnd(reader, 51, 52);
-	assertFieldStart(reader, true, 52, 53);
-	assertFieldEnd(reader, 57, 61);
-	assertEnd(reader, 62, 62);
+    const char *testString =
+        "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}";
+    //    0123456789012345678901234 56789 01234 567890123 45678901 2345678901
+    //    0         1         2           3          4          5          6
+    CharReader charReader(testString);
+
+    OsmlStreamParser parser(charReader, logger);
+
+    assertCommandStart(parser, "book", true,
+                  {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
+    assertFieldStart(parser, false, 32, 33);
+    assertData(parser, "a", 33, 34);
+    assertCommand(parser, "test", 35, 40);
+    assertFieldEnd(parser, 40, 41);
+    assertFieldStart(parser, false, 41, 42);
+    assertData(parser, "b", 42, 43);
+    assertCommand(parser, "test", 44, 49);
+    assertFieldStart(parser, false, 49, 50);
+    assertFieldEnd(parser, 50, 51);
+    assertFieldEnd(parser, 51, 52);
+    assertCommandEnd(parser, 57, 61);
+    assertEnd(parser, 62, 62);
 }
 
 TEST(OsmlStreamParser, beginEndWithData)
 {
-	const char *testString = "\\begin{book}a\\end{book}";
-	//                         0123456789012 3456789012
-	//                         0         1          2
-	CharReader charReader(testString);
+    const char *testString = "\\begin{book}a\\end{book}";
+    //                         0123456789012 3456789012
+    //                         0         1          2
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, true, 12, 13);
-	assertData(reader, "a", 12, 13);
-	assertFieldEnd(reader, 18, 22);
-	assertEnd(reader, 23, 23);
+    assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+    assertData(parser, "a", 12, 13);
+    assertCommandEnd(parser, 18, 22);
+    assertEnd(parser, 23, 23);
 }
-
+/*
 TEST(OsmlStreamParser, beginEndNested)
 {
-	const char *testString =
-	    "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}";
-	//    012345678901234 5678901234567890 123456 7890123 4567890
-	//    0         1          2         3           4          5
-	CharReader charReader(testString);
-
-	OsmlStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "a", 7, 8);
-	assertFieldStart(reader, false, 9, 10);
-	assertData(reader, "b", 10, 11);
-	assertFieldEnd(reader, 11, 12);
-	assertFieldStart(reader, true, 13, 14);
-	assertData(reader, "c", 13, 14);
-	assertCommand(reader, "d", 22, 23);
-	assertFieldStart(reader, false, 24, 25);
-	assertData(reader, "e", 25, 26);
-	assertFieldEnd(reader, 26, 27);
-	assertFieldStart(reader, false, 27, 28);
-	assertData(reader, "f", 28, 29);
-	assertFieldEnd(reader, 29, 30);
-	assertFieldStart(reader, true, 31, 32);
-	assertCommand(reader, "g", 31, 33);
-	assertFieldStart(reader, false, 33, 34);
-	assertData(reader, "h", 34, 35);
-	assertFieldEnd(reader, 35, 36);
-	assertFieldEnd(reader, 42, 43);
-	assertFieldEnd(reader, 49, 50);
-	assertEnd(reader, 51, 51);
+    const char *testString =
+        "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}";
+    //    012345678901234 5678901234567890 123456 7890123 4567890
+    //    0         1          2         3           4          5
+    CharReader charReader(testString);
+
+    OsmlStreamParser reader(charReader, logger);
+
+    assertCommand(reader, "a", 7, 8);
+    assertFieldStart(reader, false, 9, 10);
+    assertData(reader, "b", 10, 11);
+    assertFieldEnd(reader, 11, 12);
+    assertFieldStart(reader, true, 13, 14);
+    assertData(reader, "c", 13, 14);
+    assertCommand(reader, "d", 22, 23);
+    assertFieldStart(reader, false, 24, 25);
+    assertData(reader, "e", 25, 26);
+    assertFieldEnd(reader, 26, 27);
+    assertFieldStart(reader, false, 27, 28);
+    assertData(reader, "f", 28, 29);
+    assertFieldEnd(reader, 29, 30);
+    assertFieldStart(reader, true, 31, 32);
+    assertCommand(reader, "g", 31, 33);
+    assertFieldStart(reader, false, 33, 34);
+    assertData(reader, "h", 34, 35);
+    assertFieldEnd(reader, 35, 36);
+    assertFieldEnd(reader, 42, 43);
+    assertFieldEnd(reader, 49, 50);
+    assertEnd(reader, 51, 51);
 }
 
 TEST(OsmlStreamParser, beginEndWithCommand)
 {
-	const char *testString = "\\begin{book}\\a{test}\\end{book}";
-	//                         012345678901 23456789 0123456789
-	//                         0         1           2
-	CharReader charReader(testString);
+    const char *testString = "\\begin{book}\\a{test}\\end{book}";
+    //                         012345678901 23456789 0123456789
+    //                         0         1           2
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, true, 12, 13);
-	assertCommand(reader, "a", 12, 14);
-	assertFieldStart(reader, false, 14, 15);
-	assertData(reader, "test", 15, 19);
-	assertFieldEnd(reader, 19, 20);
-	assertFieldEnd(reader, 25, 29);
-	assertEnd(reader, 30, 30);
+    assertCommand(reader, "book", 7, 11);
+    assertFieldStart(reader, true, 12, 13);
+    assertCommand(reader, "a", 12, 14);
+    assertFieldStart(reader, false, 14, 15);
+    assertData(reader, "test", 15, 19);
+    assertFieldEnd(reader, 19, 20);
+    assertFieldEnd(reader, 25, 29);
+    assertEnd(reader, 30, 30);
 }
 
 TEST(OsmlStreamParser, errorBeginNoBraceOpen)
 {
-	const char *testString = "\\begin a";
-	//                         01234567
-	CharReader charReader(testString);
+    const char *testString = "\\begin a";
+    //                         01234567
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertData(reader, "a", 7, 8);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    assertData(reader, "a", 7, 8);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorBeginNoIdentifier)
 {
-	const char *testString = "\\begin{!";
-	CharReader charReader(testString);
+    const char *testString = "\\begin{!";
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    ASSERT_THROW(parser.parse(), LoggableException);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorBeginNoBraceClose)
 {
-	const char *testString = "\\begin{a";
-	CharReader charReader(testString);
+    const char *testString = "\\begin{a";
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    ASSERT_THROW(parser.parse(), LoggableException);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorBeginNoName)
 {
-	const char *testString = "\\begin{a#}";
-	CharReader charReader(testString);
+    const char *testString = "\\begin{a#}";
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "a");
-	ASSERT_TRUE(logger.hasError());
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    assertCommand(reader, "a");
+    ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    assertEnd(reader);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorEndNoBraceOpen)
 {
-	const char *testString = "\\end a";
-	//                         012345
-	CharReader charReader(testString);
+    const char *testString = "\\end a";
+    //                         012345
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertData(reader, "a", 5, 6);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    assertData(reader, "a", 5, 6);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorEndNoIdentifier)
 {
-	const char *testString = "\\end{!";
-	CharReader charReader(testString);
+    const char *testString = "\\end{!";
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    ASSERT_THROW(parser.parse(), LoggableException);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorEndNoBraceClose)
 {
-	const char *testString = "\\end{a";
-	CharReader charReader(testString);
+    const char *testString = "\\end{a";
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    ASSERT_THROW(parser.parse(), LoggableException);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorEndNoBegin)
 {
-	const char *testString = "\\end{a}";
-	CharReader charReader(testString);
+    const char *testString = "\\end{a}";
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    ASSERT_THROW(parser.parse(), LoggableException);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorBeginEndMismatch)
 {
-	const char *testString = "\\begin{a} \\begin{b} test \\end{a}";
-	//                         0123456789 012345678901234 5678901
-	//                         0          1         2          3
-	CharReader charReader(testString);
+    const char *testString = "\\begin{a} \\begin{b} test \\end{a}";
+    //                         0123456789 012345678901234 5678901
+    //                         0          1         2          3
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	assertCommand(reader, "a", 7, 8);
-	assertFieldStart(reader, true, 10, 11);
-	assertCommand(reader, "b", 17, 18);
-	assertFieldStart(reader, true, 20, 24);
-	assertData(reader, "test", 20, 24);
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
+    logger.reset();
+    assertCommand(reader, "a", 7, 8);
+    assertFieldStart(reader, true, 10, 11);
+    assertCommand(reader, "b", 17, 18);
+    assertFieldStart(reader, true, 20, 24);
+    assertData(reader, "test", 20, 24);
+    ASSERT_FALSE(logger.hasError());
+    ASSERT_THROW(parser.parse(), LoggableException);
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, commandWithNSSep)
 {
-	const char *testString = "\\test1:test2";
-	//                         012345678901
-	CharReader charReader(testString);
+    const char *testString = "\\test1:test2";
+    //                         012345678901
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertCommand(reader, "test1:test2", 0, 12);
-	assertEnd(reader, 12, 12);
+    assertCommand(reader, "test1:test2", 0, 12);
+    assertEnd(reader, 12, 12);
 }
 
 TEST(OsmlStreamParser, beginEndWithNSSep)
 {
-	const char *testString = "\\begin{test1:test2}\\end{test1:test2}";
-	//                         0123456789012345678 90123456789012345
-	//                         0         1          2         3
-	CharReader charReader(testString);
+    const char *testString = "\\begin{test1:test2}\\end{test1:test2}";
+    //                         0123456789012345678 90123456789012345
+    //                         0         1          2         3
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertCommand(reader, "test1:test2", 7, 18);
-	assertFieldStart(reader, true, 19, 20);
-	assertFieldEnd(reader, 24, 35);
-	assertEnd(reader, 36, 36);
+    assertCommand(reader, "test1:test2", 7, 18);
+    assertFieldStart(reader, true, 19, 20);
+    assertFieldEnd(reader, 24, 35);
+    assertEnd(reader, 36, 36);
 }
 
 TEST(OsmlStreamParser, errorBeginNSSep)
 {
-	const char *testString = "\\begin:test{blub}\\end{blub}";
-	CharReader charReader(testString);
+    const char *testString = "\\begin:test{blub}\\end{blub}";
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "blub");
-	ASSERT_TRUE(logger.hasError());
-	assertFieldStart(reader, true);
-	assertFieldEnd(reader);
-	assertEnd(reader);
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    assertCommand(reader, "blub");
+    ASSERT_TRUE(logger.hasError());
+    assertFieldStart(reader, true);
+    assertFieldEnd(reader);
+    assertEnd(reader);
 }
 
 TEST(OsmlStreamParser, errorEndNSSep)
 {
-	const char *testString = "\\begin{blub}\\end:test{blub}";
-	CharReader charReader(testString);
+    const char *testString = "\\begin{blub}\\end:test{blub}";
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	assertCommand(reader, "blub");
-	assertFieldStart(reader, true);
-	ASSERT_FALSE(logger.hasError());
-	assertFieldEnd(reader);
-	ASSERT_TRUE(logger.hasError());
-	assertEnd(reader);
+    logger.reset();
+    assertCommand(reader, "blub");
+    assertFieldStart(reader, true);
+    ASSERT_FALSE(logger.hasError());
+    assertFieldEnd(reader);
+    ASSERT_TRUE(logger.hasError());
+    assertEnd(reader);
 }
 
 TEST(OsmlStreamParser, errorEmptyNs)
 {
-	const char *testString = "\\test:";
-	CharReader charReader(testString);
+    const char *testString = "\\test:";
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "test");
-	ASSERT_TRUE(logger.hasError());
-	assertData(reader, ":");
-	assertEnd(reader);
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    assertCommand(reader, "test");
+    ASSERT_TRUE(logger.hasError());
+    assertData(reader, ":");
+    assertEnd(reader);
 }
 
 TEST(OsmlStreamParser, errorRepeatedNs)
 {
-	const char *testString = "\\test::";
-	CharReader charReader(testString);
+    const char *testString = "\\test::";
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "test");
-	ASSERT_TRUE(logger.hasError());
-	assertData(reader, "::");
-	assertEnd(reader);
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    assertCommand(reader, "test");
+    ASSERT_TRUE(logger.hasError());
+    assertData(reader, "::");
+    assertEnd(reader);
 }
 
 TEST(OsmlStreamParser, explicitDefaultField)
 {
-	const char *testString = "\\a{!b}c";
-	//                         01234567
-	CharReader charReader(testString);
+    const char *testString = "\\a{!b}c";
+    //                         01234567
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertCommand(reader, "a", 0, 2);
-	assertFieldStart(reader, true, 2, 4);
-	assertData(reader, "b", 4, 5);
-	assertFieldEnd(reader, 5, 6);
-	assertData(reader, "c", 6, 7);
-	assertEnd(reader, 7, 7);
+    assertCommand(reader, "a", 0, 2);
+    assertFieldStart(reader, true, 2, 4);
+    assertData(reader, "b", 4, 5);
+    assertFieldEnd(reader, 5, 6);
+    assertData(reader, "c", 6, 7);
+    assertEnd(reader, 7, 7);
 }
 
 TEST(OsmlStreamParser, explicitDefaultFieldWithCommand)
 {
-	const char *testString = "\\a{!\\b}c";
-	//                         0123 4567
-	CharReader charReader(testString);
+    const char *testString = "\\a{!\\b}c";
+    //                         0123 4567
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertCommand(reader, "a", 0, 2);
-	assertFieldStart(reader, true, 2, 4);
-	assertCommand(reader, "b", 4, 6);
-	assertFieldEnd(reader, 6, 7);
-	assertData(reader, "c", 7, 8);
-	assertEnd(reader, 8, 8);
+    assertCommand(reader, "a", 0, 2);
+    assertFieldStart(reader, true, 2, 4);
+    assertCommand(reader, "b", 4, 6);
+    assertFieldEnd(reader, 6, 7);
+    assertData(reader, "c", 7, 8);
+    assertEnd(reader, 8, 8);
 }
 
 TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField)
 {
-	const char *testString = "\\a{!\\b}{c}";
-	//                         0123 456789
-	CharReader charReader(testString);
+    const char *testString = "\\a{!\\b}{c}";
+    //                         0123 456789
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	assertCommand(reader, "a", 0, 2);
-	assertFieldStart(reader, true, 2, 4);
-	assertCommand(reader, "b", 4, 6);
-	assertFieldEnd(reader, 6, 7);
-	ASSERT_FALSE(logger.hasError());
-	assertData(reader, "c", 8, 9);
-	ASSERT_TRUE(logger.hasError());
-	assertEnd(reader, 10, 10);
+    logger.reset();
+    assertCommand(reader, "a", 0, 2);
+    assertFieldStart(reader, true, 2, 4);
+    assertCommand(reader, "b", 4, 6);
+    assertFieldEnd(reader, 6, 7);
+    ASSERT_FALSE(logger.hasError());
+    assertData(reader, "c", 8, 9);
+    ASSERT_TRUE(logger.hasError());
+    assertEnd(reader, 10, 10);
 }
 
 TEST(OsmlStreamParser, annotationStart)
 {
-	const char *testString = "<\\a";
-	//                        0 12
+    const char *testString = "<\\a";
+    //                        0 12
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
-	assertEnd(reader, 3, 3);
+    assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
+    assertEnd(reader, 3, 3);
 }
 
 TEST(OsmlStreamParser, annotationStartWithName)
 {
-	const char *testString = "<\\annotationWithName#aName";
-	//                        0 1234567890123456789012345
-	//                        0          1         2
+    const char *testString = "<\\annotationWithName#aName";
+    //                        0 1234567890123456789012345
+    //                        0          1         2
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertAnnotationStart(reader, "annotationWithName",
-	                      Variant::mapType{{"name", "aName"}}, 0, 20);
-	assertEnd(reader, 26, 26);
+    assertAnnotationStart(reader, "annotationWithName",
+                          Variant::mapType{{"name", "aName"}}, 0, 20);
+    assertEnd(reader, 26, 26);
 }
 
 TEST(OsmlStreamParser, annotationStartWithArguments)
 {
-	const char *testString = "<\\annotationWithName#aName[a=1,b=2]";
-	//                        0 1234567890123456789012345678901234
-	//                        0          1         2         3
+    const char *testString = "<\\annotationWithName#aName[a=1,b=2]";
+    //                        0 1234567890123456789012345678901234
+    //                        0          1         2         3
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertAnnotationStart(
-	    reader, "annotationWithName",
-	    Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20);
-	assertEnd(reader, 35, 35);
+    assertAnnotationStart(
+        reader, "annotationWithName",
+        Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20);
+    assertEnd(reader, 35, 35);
 }
 
 TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd)
 {
-	const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>";
-	//                        0 123456789012345678901234567 89012345 67
-	//                        0          1         2          3
+    const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>";
+    //                        0 123456789012345678901234567 89012345 67
+    //                        0          1         2          3
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertAnnotationStart(
-	    reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
-	    10);
-	assertFieldStart(reader, true, 26, 27);
-	assertData(reader, "a", 26, 27);
-	assertFieldEnd(reader, 33, 35);
-	assertAnnotationEnd(reader, "", "", 36, 38);
-	assertEnd(reader, 38, 38);
+    assertAnnotationStart(
+        reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
+        10);
+    assertFieldStart(reader, true, 26, 27);
+    assertData(reader, "a", 26, 27);
+    assertFieldEnd(reader, 33, 35);
+    assertAnnotationEnd(reader, "", "", 36, 38);
+    assertEnd(reader, 38, 38);
 }
 
 TEST(OsmlStreamParser, annotationEnd)
 {
-	const char *testString = "\\a>";
-	//                         012
+    const char *testString = "\\a>";
+    //                         012
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertAnnotationEnd(reader, "a", "", 0, 2);
-	assertEnd(reader, 3, 3);
+    assertAnnotationEnd(reader, "a", "", 0, 2);
+    assertEnd(reader, 3, 3);
 }
 
 TEST(OsmlStreamParser, annotationEndWithName)
 {
-	const char *testString = "\\a#name>";
-	//                         01234567
+    const char *testString = "\\a#name>";
+    //                         01234567
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertAnnotationEnd(reader, "a", "name", 0, 2);
-	assertEnd(reader, 8, 8);
+    assertAnnotationEnd(reader, "a", "name", 0, 2);
+    assertEnd(reader, 8, 8);
 }
 
 TEST(OsmlStreamParser, annotationEndWithNameAsArgs)
 {
-	const char *testString = "\\a[name=name]>";
-	//                         01234567890123
+    const char *testString = "\\a[name=name]>";
+    //                         01234567890123
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertAnnotationEnd(reader, "a", "name", 0, 2);
-	assertEnd(reader, 14, 14);
+    assertAnnotationEnd(reader, "a", "name", 0, 2);
+    assertEnd(reader, 14, 14);
 }
 
 TEST(OsmlStreamParser, errorAnnotationEndWithArguments)
 {
-	const char *testString = "\\a[foo=bar]>";
-	//                         012345678901
-	//                         0         1
+    const char *testString = "\\a[foo=bar]>";
+    //                         012345678901
+    //                         0         1
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2);
-	ASSERT_TRUE(logger.hasError());
-	assertData(reader, ">", 11, 12);
-	assertEnd(reader, 12, 12);
+    logger.reset();
+    ASSERT_FALSE(logger.hasError());
+    assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2);
+    ASSERT_TRUE(logger.hasError());
+    assertData(reader, ">", 11, 12);
+    assertEnd(reader, 12, 12);
 }
 
 TEST(OsmlStreamParser, closingAnnotation)
 {
-	const char *testString = "<\\a>";
-	//                        0 123
+    const char *testString = "<\\a>";
+    //                        0 123
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
-	assertData(reader, ">", 3, 4);
-	assertEnd(reader, 4, 4);
+    assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
+    assertData(reader, ">", 3, 4);
+    assertEnd(reader, 4, 4);
 }
 
 TEST(OsmlStreamParser, annotationWithFields)
 {
-	const char *testString = "a <\\b{c}{d}{!e} f \\> g";
-	//                        012 345678901234567 8901
-	//                        0          1          2
+    const char *testString = "a <\\b{c}{d}{!e} f \\> g";
+    //                        012 345678901234567 8901
+    //                        0          1          2
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
-
-	assertData(reader, "a", 0, 1);
-	assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5);
-	assertFieldStart(reader, false, 5, 6);
-	assertData(reader, "c", 6, 7);
-	assertFieldEnd(reader, 7, 8);
-	assertFieldStart(reader, false, 8, 9);
-	assertData(reader, "d", 9, 10);
-	assertFieldEnd(reader, 10, 11);
-	assertFieldStart(reader, true, 11, 13);
-	assertData(reader, "e", 13, 14);
-	assertFieldEnd(reader, 14, 15);
-	assertData(reader, "f", 16, 17);
-	assertAnnotationEnd(reader, "", "", 18, 20);
-	assertData(reader, "g", 21, 22);
-	assertEnd(reader, 22, 22);
+    OsmlStreamParser reader(charReader, logger);
+
+    assertData(reader, "a", 0, 1);
+    assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5);
+    assertFieldStart(reader, false, 5, 6);
+    assertData(reader, "c", 6, 7);
+    assertFieldEnd(reader, 7, 8);
+    assertFieldStart(reader, false, 8, 9);
+    assertData(reader, "d", 9, 10);
+    assertFieldEnd(reader, 10, 11);
+    assertFieldStart(reader, true, 11, 13);
+    assertData(reader, "e", 13, 14);
+    assertFieldEnd(reader, 14, 15);
+    assertData(reader, "f", 16, 17);
+    assertAnnotationEnd(reader, "", "", 18, 20);
+    assertData(reader, "g", 21, 22);
+    assertEnd(reader, 22, 22);
 }
 
 TEST(OsmlStreamParser, annotationStartEscape)
 {
-	const char *testString = "<\\%test";
-	//                        0 123456
-	//                        0
+    const char *testString = "<\\%test";
+    //                        0 123456
+    //                        0
 
-	CharReader charReader(testString);
+    CharReader charReader(testString);
 
-	OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser reader(charReader, logger);
 
-	assertData(reader, "<%test", 0, 7);
-	assertEnd(reader, 7, 7);
+    assertData(reader, "<%test", 0, 7);
+    assertEnd(reader, 7, 7);
 }
+*/
 }
 
-- 
cgit v1.2.3


From c18790f70beb5f52b00bc1c2b1ded2b252f1998a Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 28 Feb 2015 15:46:55 +0100
Subject: Fixed potential problem in SourceOffsetVector

---
 src/core/parser/utils/SourceOffsetVector.hpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/core/parser/utils/SourceOffsetVector.hpp b/src/core/parser/utils/SourceOffsetVector.hpp
index aaebe7d..67bacef 100644
--- a/src/core/parser/utils/SourceOffsetVector.hpp
+++ b/src/core/parser/utils/SourceOffsetVector.hpp
@@ -170,6 +170,11 @@ public:
 		if (length < size()) {
 			lens.resize(length);
 			offsets.resize((length >> LOG2_OFFSET_INTERVAL) + 1);
+			if (length > 0) {
+				lastEnd = loadOffset(length - 1).second;
+			} else {
+				lastEnd = 0;
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From 6776f53b60ade0ece65ab895d23476761c5481d5 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 28 Feb 2015 15:47:13 +0100
Subject: Trimming forgotten protectedChars buffer

---
 src/core/parser/utils/TokenizedData.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp
index bcbbe43..c3c4f98 100644
--- a/src/core/parser/utils/TokenizedData.cpp
+++ b/src/core/parser/utils/TokenizedData.cpp
@@ -467,6 +467,7 @@ public:
 	{
 		if (length < size()) {
 			buf.resize(length);
+			protectedChars.resize(length);
 			offsets.trim(length);
 		}
 	}
-- 
cgit v1.2.3


From 81e009aa22b5018b055ddda689cd3e78336a164b Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 28 Feb 2015 15:47:30 +0100
Subject: Always call trim if a bestMatch has been found

---
 src/core/parser/utils/Tokenizer.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp
index e78b0f4..94d9cb0 100644
--- a/src/core/parser/utils/Tokenizer.cpp
+++ b/src/core/parser/utils/Tokenizer.cpp
@@ -156,7 +156,6 @@ public:
 		return res;
 	}
 };
-
 }
 
 /* Class Tokenizer */
@@ -252,6 +251,9 @@ bool Tokenizer::next(CharReader &reader, Token &token, TokenizedData &data)
 
 		// Create a token containing the data location
 		bestMatch.token = Token{data.getLocation()};
+	} else if (bestMatch.hasMatch() &&
+	           bestMatch.dataStartOffset == initialDataSize) {
+		data.trim(initialDataSize);
 	}
 
 	// Move the read/peek cursor to the end of the token, abort if an error
@@ -269,6 +271,7 @@ bool Tokenizer::next(CharReader &reader, Token &token, TokenizedData &data)
 		} else {
 			reader.seekPeekCursor(end);
 		}
+
 		token = bestMatch.token;
 	} else {
 		token = Token{};
-- 
cgit v1.2.3


From b54760fbd5470032dc716dc870dc08b32dfba5ac Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 28 Feb 2015 15:48:07 +0100
Subject: Test case for data being empty if a token is found

---
 test/core/parser/utils/TokenizerTest.cpp | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp
index 785bd81..9f644c2 100644
--- a/test/core/parser/utils/TokenizerTest.cpp
+++ b/test/core/parser/utils/TokenizerTest.cpp
@@ -450,5 +450,32 @@ TEST(Tokenizer, nonPrimaryTokens)
 	TokenizedData data;
 	ASSERT_FALSE(tokenizer.read(reader, token, data));
 }
+
+
+TEST(Tokenizer, ambiguousTokens2)
+{
+	CharReader reader{"<\\"};
+
+	Tokenizer tokenizer;
+
+	TokenId tBackslash = tokenizer.registerToken("\\");
+	TokenId tAnnotationStart = tokenizer.registerToken("<\\");
+
+	TokenSet tokens = TokenSet{tBackslash, tAnnotationStart};
+	Token token;
+	{
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		ASSERT_EQ("<\\", token.content);
+		ASSERT_EQ(tAnnotationStart, token.id);
+		ASSERT_TRUE(data.empty());
+	}
+
+	{
+		TokenizedData data;
+		ASSERT_FALSE(tokenizer.read(reader, token, data));
+	}
+}
+
 }
 
-- 
cgit v1.2.3


From fa2a5bdf0152002de520fcc72e48686b9e2657b1 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 28 Feb 2015 15:48:38 +0100
Subject: Adapted all unit tests, renamed COMMAND_END event to RANGE_END event
 to match ranged annotations

---
 src/formats/osml/OsmlStreamParser.cpp      |  28 +-
 src/formats/osml/OsmlStreamParser.hpp      |  11 +-
 test/formats/osml/OsmlStreamParserTest.cpp | 438 ++++++++++++++++++-----------
 3 files changed, 299 insertions(+), 178 deletions(-)

diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 7e01a3c..e467dc5 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -127,7 +127,7 @@ private:
 	/**
 	 * Set to true if this is a command with clear begin and end.
 	 */
-	bool hasRange;
+	bool hasRange: 1;
 
 public:
 	/**
@@ -259,7 +259,7 @@ public:
 	 */
 	enum class State : uint8_t {
 		COMMAND_START = 0,
-		COMMAND_END = 1,
+		RANGE_END = 1,
 		FIELD_START = 2,
 		FIELD_END = 3,
 		ANNOTATION_START = 4,
@@ -328,7 +328,7 @@ private:
 	 *
 	 * @return an internal State specifying whether an error occured (return
 	 * values State::REOVERABLE_ERROR or State::IRRECOVERABLE_ERROR) or a
-	 * command was actually ended (return value State::COMMAND_END).
+	 * command was actually ended (return value State::RANGE_END).
 	 */
 	State parseEndCommand();
 
@@ -569,7 +569,7 @@ OsmlStreamParserImpl::State OsmlStreamParserImpl::parseEndCommand()
 	// End the current command
 	location = name.getLocation();
 	commands.pop();
-	return State::COMMAND_END;
+	return State::RANGE_END;
 }
 
 Variant OsmlStreamParserImpl::parseCommandArguments(Variant commandArgName)
@@ -808,14 +808,15 @@ OsmlStreamParserImpl::State OsmlStreamParserImpl::parse()
 
 			// If this was an annotation start token, add the parsed < to the
 			// output
+			SourceOffset charStart = token.location.getStart();
+			SourceOffset charEnd = reader.getPeekOffset();
 			if (type == OsmlTokens.AnnotationStart) {
-				data.append('<', token.location.getStart(),
-				            token.location.getStart() + 1);
+				data.append('<', charStart, charStart + 1);
+				charStart = charStart + 1;
 			}
 
 			// Append the character to the output data, mark it as protected
-			data.append(c, token.location.getStart(), reader.getPeekOffset(),
-			            true);
+			data.append(c, charStart, charEnd, true);
 			reader.consumePeek();
 			continue;
 		} else if (type == Tokens::Data) {
@@ -880,11 +881,12 @@ OsmlStreamParserImpl::State OsmlStreamParserImpl::parse()
 
 	// Make sure all open commands and fields have been ended at the end of the
 	// stream
-	while (commands.size() > 1) {
+	while (true) {
+		bool topLevelCommand = commands.size() == 1U;
 		if (cmd().inField()) {
 			// If the stream ended with an open range field, issue information
 			// about the range field
-			if (cmd().inRangeField()) {
+			if (cmd().inRangeField() && !topLevelCommand) {
 				// Inform about the still open command itself
 				logger.error("Reached end of stream, but command \"" +
 				                 getCommandName().asString() +
@@ -901,7 +903,11 @@ OsmlStreamParserImpl::State OsmlStreamParserImpl::parse()
 				}
 			}
 		}
-		commands.pop();
+		if (!topLevelCommand) {
+			commands.pop();
+		} else {
+			break;
+		}
 	}
 
 	location = SourceLocation{reader.getSourceId(), reader.getOffset()};
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index 1fee90b..10d5296 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -69,11 +69,11 @@ public:
 		COMMAND_START = 0,
 
 		/**
-	     * State returned if a range command has just ended. This state is not
-	     * returned for non-range commands (as the actual end of a command is
-	     * context dependant).
+	     * State returned if a range command or range annotation has just ended.
+	     * This state is not returned for non-range commands (as the actual end
+	     * of a command is context dependent).
 	     */
-		COMMAND_END = 1,
+		RANGE_END = 1,
 
 		/**
 	     * State returned if a new field started. The reader assures that the
@@ -185,7 +185,8 @@ public:
 
 	/**
 	 * Returns true if the currently started command is a range command, only
-	 * valid if State::COMMAND_START was returned by the "parse" function.
+	 * valid if State::COMMAND_START or State::ANNOTATION_START was returned by
+	 * the "parse" function.
 	 *
 	 * @return true if the command is started is a range command, false
 	 * otherwise.
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index 8b64e51..3e7f4c1 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -70,11 +70,11 @@ static void assertCommand(OsmlStreamParser &parser,
 	assertCommandStart(parser, name, false, Variant::mapType{}, start, end);
 }
 
-static void assertCommandEnd(OsmlStreamParser &parser,
+static void assertRangeEnd(OsmlStreamParser &parser,
                              SourceOffset start = InvalidSourceOffset,
                              SourceOffset end = InvalidSourceOffset)
 {
-	ASSERT_EQ(OsmlStreamParser::State::COMMAND_END, parser.parse());
+	ASSERT_EQ(OsmlStreamParser::State::RANGE_END, parser.parse());
 	if (start != InvalidSourceOffset) {
 		EXPECT_EQ(start, parser.getLocation().getStart());
 	}
@@ -510,6 +510,61 @@ TEST(OsmlStreamParser, fields)
 	assertEnd(parser, 14, 14);
 }
 
+TEST(OsmlStreamParser, fieldsWithoutCommand)
+{
+	const char *testString = "{a}{b}{c}";
+	//                        012345678
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
+
+	assertFieldStart(parser, false, 0, 1);
+	assertTextData(parser, "a", 1, 2, 1, 2, WhitespaceMode::PRESERVE);
+	assertFieldEnd(parser, 2, 3);
+
+	assertFieldStart(parser, false, 3, 4);
+	assertTextData(parser, "b", 4, 5, 4, 5, WhitespaceMode::PRESERVE);
+	assertFieldEnd(parser, 5, 6);
+
+	assertFieldStart(parser, false, 6, 7);
+	assertTextData(parser, "c", 7, 8, 7, 8, WhitespaceMode::PRESERVE);
+	assertFieldEnd(parser, 8, 9);
+	assertEnd(parser, 9, 9);
+}
+
+TEST(OsmlStreamParser, nestedField)
+{
+	const char *testString = "{{a{b}}}";
+	//                        01234567
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
+
+	assertFieldStart(parser, false, 0, 1);
+	assertFieldStart(parser, false, 1, 2);
+	assertTextData(parser, "a", 2, 3, 2, 3, WhitespaceMode::PRESERVE);
+	assertFieldStart(parser, false, 3, 4);
+	assertTextData(parser, "b", 4, 5, 4, 5, WhitespaceMode::PRESERVE);
+	assertFieldEnd(parser, 5, 6);
+	assertFieldEnd(parser, 6, 7);
+	assertFieldEnd(parser, 7, 8);
+	assertEnd(parser, 8, 8);
+}
+
+TEST(OsmlStreamParser, errorUnbalancedField)
+{
+	const char *testString = "{a";
+	//                        01
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
+
+	logger.reset();
+
+	assertFieldStart(parser, false, 0, 1);
+	assertTextData(parser, "a", 1, 2, 1, 2, WhitespaceMode::PRESERVE);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(parser, 2, 2);
+	ASSERT_TRUE(logger.hasError());
+}
+
 TEST(OsmlStreamParser, dataOutsideField)
 {
 	const char *testString = "\\test{a}{b} c";
@@ -720,7 +775,7 @@ TEST(OsmlStreamParser, beginEnd)
     OsmlStreamParser parser(charReader, logger);
 
     assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
-    assertCommandEnd(parser, 17, 21);
+    assertRangeEnd(parser, 17, 21);
     assertEnd(parser, 22, 22);
 }
 
@@ -734,7 +789,7 @@ TEST(OsmlStreamParser, beginEndWithName)
     OsmlStreamParser parser(charReader, logger);
 
     assertCommandStart(parser, "book", true, {{"name", "a"}}, 7, 11);
-    assertCommandEnd(parser, 19, 23);
+    assertRangeEnd(parser, 19, 23);
     assertEnd(parser, 24, 24);
 }
 
@@ -749,7 +804,7 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgs)
 
     assertCommandStart(parser, "book", true,
                   {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-    assertCommandEnd(parser, 37, 41);
+    assertRangeEnd(parser, 37, 41);
     assertEnd(parser, 42, 42);
 }
 
@@ -775,7 +830,7 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields)
     assertFieldStart(parser, false, 49, 50);
     assertFieldEnd(parser, 50, 51);
     assertFieldEnd(parser, 51, 52);
-    assertCommandEnd(parser, 57, 61);
+    assertRangeEnd(parser, 57, 61);
     assertEnd(parser, 62, 62);
 }
 
@@ -790,10 +845,10 @@ TEST(OsmlStreamParser, beginEndWithData)
 
     assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
     assertData(parser, "a", 12, 13);
-    assertCommandEnd(parser, 18, 22);
+    assertRangeEnd(parser, 18, 22);
     assertEnd(parser, 23, 23);
 }
-/*
+
 TEST(OsmlStreamParser, beginEndNested)
 {
     const char *testString =
@@ -802,29 +857,32 @@ TEST(OsmlStreamParser, beginEndNested)
     //    0         1          2         3           4          5
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
-
-    assertCommand(reader, "a", 7, 8);
-    assertFieldStart(reader, false, 9, 10);
-    assertData(reader, "b", 10, 11);
-    assertFieldEnd(reader, 11, 12);
-    assertFieldStart(reader, true, 13, 14);
-    assertData(reader, "c", 13, 14);
-    assertCommand(reader, "d", 22, 23);
-    assertFieldStart(reader, false, 24, 25);
-    assertData(reader, "e", 25, 26);
-    assertFieldEnd(reader, 26, 27);
-    assertFieldStart(reader, false, 27, 28);
-    assertData(reader, "f", 28, 29);
-    assertFieldEnd(reader, 29, 30);
-    assertFieldStart(reader, true, 31, 32);
-    assertCommand(reader, "g", 31, 33);
-    assertFieldStart(reader, false, 33, 34);
-    assertData(reader, "h", 34, 35);
-    assertFieldEnd(reader, 35, 36);
-    assertFieldEnd(reader, 42, 43);
-    assertFieldEnd(reader, 49, 50);
-    assertEnd(reader, 51, 51);
+    OsmlStreamParser parser(charReader, logger);
+
+    assertCommandStart(parser, "a", true, Variant::mapType{}, 7, 8);
+    assertFieldStart(parser, false, 9, 10);
+    assertData(parser, "b", 10, 11);
+    assertFieldEnd(parser, 11, 12);
+
+    assertData(parser, "c", 13, 14);
+
+    assertCommandStart(parser, "d", true, Variant::mapType{}, 22, 23);
+    assertFieldStart(parser, false, 24, 25);
+    assertData(parser, "e", 25, 26);
+    assertFieldEnd(parser, 26, 27);
+    assertFieldStart(parser, false, 27, 28);
+    assertData(parser, "f", 28, 29);
+    assertFieldEnd(parser, 29, 30);
+
+    assertEmptyData(parser);
+    assertCommand(parser, "g", 31, 33);
+    assertFieldStart(parser, false, 33, 34);
+    assertData(parser, "h", 34, 35);
+    assertFieldEnd(parser, 35, 36);
+    assertEmptyData(parser);
+    assertRangeEnd(parser, 42, 43);
+    assertRangeEnd(parser, 49, 50);
+    assertEnd(parser, 51, 51);
 }
 
 TEST(OsmlStreamParser, beginEndWithCommand)
@@ -834,16 +892,75 @@ TEST(OsmlStreamParser, beginEndWithCommand)
     //                         0         1           2
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
+
+    assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+    assertCommand(parser, "a", 12, 14);
+    assertFieldStart(parser, false, 14, 15);
+    assertData(parser, "test", 15, 19);
+    assertFieldEnd(parser, 19, 20);
+    assertRangeEnd(parser, 25, 29);
+    assertEnd(parser, 30, 30);
+}
+
+TEST(OsmlStreamParser, beginEndNestedFields)
+{
+    const char *testString = "\\begin{book}a{{b{c}}}\\end{book}";
+    //                         012345678901234567890 1234567890
+    //                         0         1         2          3
+    CharReader charReader(testString);
+    OsmlStreamParser parser(charReader, logger);
+    logger.reset();
+
+    assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+    assertData(parser, "a", 12, 13);
+    assertFieldStart(parser, false, 13, 14);
+    assertFieldStart(parser, false, 14, 15);
+    assertData(parser, "b", 15, 16);
+    assertFieldStart(parser, false, 16, 17);
+    assertData(parser, "c", 17, 18);
+    assertFieldEnd(parser, 18, 19);
+    assertFieldEnd(parser, 19, 20);
+    assertFieldEnd(parser, 20, 21);
+    assertRangeEnd(parser, 26, 30);
+    assertEnd(parser, 31, 31);
+}
+
+TEST(OsmlStreamParser, errorBeginEndUnbalancedNestedFields)
+{
+    const char *testString = "\\begin{book}a{{b{c}}\\end{book}";
+    //                         012345678901234567890 123456789
+    //                         0         1         2
+    CharReader charReader(testString);
+    OsmlStreamParser parser(charReader, logger);
+    logger.reset();
+
+    assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+    assertData(parser, "a", 12, 13);
+    assertFieldStart(parser, false, 13, 14);
+    assertFieldStart(parser, false, 14, 15);
+    assertData(parser, "b", 15, 16);
+    assertFieldStart(parser, false, 16, 17);
+    assertData(parser, "c", 17, 18);
+    assertFieldEnd(parser, 18, 19);
+    assertFieldEnd(parser, 19, 20);
+    ASSERT_THROW(assertRangeEnd(parser, 25, 29), LoggableException);
+}
+
+TEST(OsmlStreamParser, errorBeginEndUnbalancedFields)
+{
+	const char *testString = "{a";
+	//                        01
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(reader, "book", 7, 11);
-    assertFieldStart(reader, true, 12, 13);
-    assertCommand(reader, "a", 12, 14);
-    assertFieldStart(reader, false, 14, 15);
-    assertData(reader, "test", 15, 19);
-    assertFieldEnd(reader, 19, 20);
-    assertFieldEnd(reader, 25, 29);
-    assertEnd(reader, 30, 30);
+	logger.reset();
+
+	assertFieldStart(parser, false, 0, 1);
+	assertTextData(parser, "a", 1, 2, 1, 2, WhitespaceMode::PRESERVE);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(parser, 2, 2);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorBeginNoBraceOpen)
@@ -852,12 +969,13 @@ TEST(OsmlStreamParser, errorBeginNoBraceOpen)
     //                         01234567
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
-    assertData(reader, "a", 7, 8);
+    assertData(parser, "a", 7, 8);
     ASSERT_TRUE(logger.hasError());
+    assertEnd(parser, 8, 8);
 }
 
 TEST(OsmlStreamParser, errorBeginNoIdentifier)
@@ -865,7 +983,7 @@ TEST(OsmlStreamParser, errorBeginNoIdentifier)
     const char *testString = "\\begin{!";
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
@@ -878,7 +996,7 @@ TEST(OsmlStreamParser, errorBeginNoBraceClose)
     const char *testString = "\\begin{a";
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
@@ -891,15 +1009,15 @@ TEST(OsmlStreamParser, errorBeginNoName)
     const char *testString = "\\begin{a#}";
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
-    assertCommand(reader, "a");
+    assertCommandStart(parser, "a", true);
     ASSERT_TRUE(logger.hasError());
     logger.reset();
     ASSERT_FALSE(logger.hasError());
-    assertEnd(reader);
+    assertEnd(parser);
     ASSERT_TRUE(logger.hasError());
 }
 
@@ -909,11 +1027,11 @@ TEST(OsmlStreamParser, errorEndNoBraceOpen)
     //                         012345
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
-    assertData(reader, "a", 5, 6);
+    assertData(parser, "a", 5, 6);
     ASSERT_TRUE(logger.hasError());
 }
 
@@ -922,7 +1040,7 @@ TEST(OsmlStreamParser, errorEndNoIdentifier)
     const char *testString = "\\end{!";
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
@@ -935,7 +1053,7 @@ TEST(OsmlStreamParser, errorEndNoBraceClose)
     const char *testString = "\\end{a";
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
@@ -948,7 +1066,7 @@ TEST(OsmlStreamParser, errorEndNoBegin)
     const char *testString = "\\end{a}";
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
@@ -963,14 +1081,13 @@ TEST(OsmlStreamParser, errorBeginEndMismatch)
     //                         0          1         2          3
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
-    assertCommand(reader, "a", 7, 8);
-    assertFieldStart(reader, true, 10, 11);
-    assertCommand(reader, "b", 17, 18);
-    assertFieldStart(reader, true, 20, 24);
-    assertData(reader, "test", 20, 24);
+    assertCommandStart(parser, "a", true, Variant::mapType{}, 7, 8);
+    assertEmptyData(parser);
+    assertCommandStart(parser, "b", true, Variant::mapType{}, 17, 18);
+    assertData(parser, "test", 20, 24);
     ASSERT_FALSE(logger.hasError());
     ASSERT_THROW(parser.parse(), LoggableException);
     ASSERT_TRUE(logger.hasError());
@@ -982,10 +1099,10 @@ TEST(OsmlStreamParser, commandWithNSSep)
     //                         012345678901
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(reader, "test1:test2", 0, 12);
-    assertEnd(reader, 12, 12);
+    assertCommand(parser, "test1:test2", 0, 12);
+    assertEnd(parser, 12, 12);
 }
 
 TEST(OsmlStreamParser, beginEndWithNSSep)
@@ -995,12 +1112,11 @@ TEST(OsmlStreamParser, beginEndWithNSSep)
     //                         0         1          2         3
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(reader, "test1:test2", 7, 18);
-    assertFieldStart(reader, true, 19, 20);
-    assertFieldEnd(reader, 24, 35);
-    assertEnd(reader, 36, 36);
+    assertCommandStart(parser, "test1:test2", true, Variant::mapType{}, 7, 18);
+    assertRangeEnd(parser, 24, 35);
+    assertEnd(parser, 36, 36);
 }
 
 TEST(OsmlStreamParser, errorBeginNSSep)
@@ -1008,15 +1124,14 @@ TEST(OsmlStreamParser, errorBeginNSSep)
     const char *testString = "\\begin:test{blub}\\end{blub}";
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
-    assertCommand(reader, "blub");
+    assertCommandStart(parser, "blub", true, Variant::mapType{});
     ASSERT_TRUE(logger.hasError());
-    assertFieldStart(reader, true);
-    assertFieldEnd(reader);
-    assertEnd(reader);
+    assertRangeEnd(parser);
+    assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, errorEndNSSep)
@@ -1024,15 +1139,14 @@ TEST(OsmlStreamParser, errorEndNSSep)
     const char *testString = "\\begin{blub}\\end:test{blub}";
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
-    assertCommand(reader, "blub");
-    assertFieldStart(reader, true);
+    assertCommandStart(parser, "blub", true, Variant::mapType{});
     ASSERT_FALSE(logger.hasError());
-    assertFieldEnd(reader);
+    assertRangeEnd(parser);
     ASSERT_TRUE(logger.hasError());
-    assertEnd(reader);
+    assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, errorEmptyNs)
@@ -1040,14 +1154,14 @@ TEST(OsmlStreamParser, errorEmptyNs)
     const char *testString = "\\test:";
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
-    assertCommand(reader, "test");
+    assertCommand(parser, "test");
     ASSERT_TRUE(logger.hasError());
-    assertData(reader, ":");
-    assertEnd(reader);
+    assertData(parser, ":");
+    assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, errorRepeatedNs)
@@ -1055,14 +1169,14 @@ TEST(OsmlStreamParser, errorRepeatedNs)
     const char *testString = "\\test::";
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
-    assertCommand(reader, "test");
+    assertCommand(parser, "test");
     ASSERT_TRUE(logger.hasError());
-    assertData(reader, "::");
-    assertEnd(reader);
+    assertData(parser, "::");
+    assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, explicitDefaultField)
@@ -1071,14 +1185,14 @@ TEST(OsmlStreamParser, explicitDefaultField)
     //                         01234567
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(reader, "a", 0, 2);
-    assertFieldStart(reader, true, 2, 4);
-    assertData(reader, "b", 4, 5);
-    assertFieldEnd(reader, 5, 6);
-    assertData(reader, "c", 6, 7);
-    assertEnd(reader, 7, 7);
+    assertCommand(parser, "a", 0, 2);
+    assertFieldStart(parser, true, 2, 4);
+    assertData(parser, "b", 4, 5);
+    assertFieldEnd(parser, 5, 6);
+    assertData(parser, "c", 6, 7);
+    assertEnd(parser, 7, 7);
 }
 
 TEST(OsmlStreamParser, explicitDefaultFieldWithCommand)
@@ -1087,33 +1201,33 @@ TEST(OsmlStreamParser, explicitDefaultFieldWithCommand)
     //                         0123 4567
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(reader, "a", 0, 2);
-    assertFieldStart(reader, true, 2, 4);
-    assertCommand(reader, "b", 4, 6);
-    assertFieldEnd(reader, 6, 7);
-    assertData(reader, "c", 7, 8);
-    assertEnd(reader, 8, 8);
+    assertCommand(parser, "a", 0, 2);
+    assertFieldStart(parser, true, 2, 4);
+    assertCommand(parser, "b", 4, 6);
+    assertFieldEnd(parser, 6, 7);
+    assertData(parser, "c", 7, 8);
+    assertEnd(parser, 8, 8);
 }
 
-TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField)
+TEST(OsmlStreamParser, fieldAfterExplicitDefaultField)
 {
     const char *testString = "\\a{!\\b}{c}";
     //                         0123 456789
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
-    assertCommand(reader, "a", 0, 2);
-    assertFieldStart(reader, true, 2, 4);
-    assertCommand(reader, "b", 4, 6);
-    assertFieldEnd(reader, 6, 7);
-    ASSERT_FALSE(logger.hasError());
-    assertData(reader, "c", 8, 9);
-    ASSERT_TRUE(logger.hasError());
-    assertEnd(reader, 10, 10);
+    assertCommand(parser, "a", 0, 2);
+    assertFieldStart(parser, true, 2, 4);
+    assertCommand(parser, "b", 4, 6);
+    assertFieldEnd(parser, 6, 7);
+    assertFieldStart(parser, false, 7, 8);
+    assertData(parser, "c", 8, 9);
+    assertFieldEnd(parser, 9, 10);
+    assertEnd(parser, 10, 10);
 }
 
 TEST(OsmlStreamParser, annotationStart)
@@ -1123,10 +1237,10 @@ TEST(OsmlStreamParser, annotationStart)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
-    assertEnd(reader, 3, 3);
+    assertAnnotationStart(parser, "a", Variant::mapType{}, 0, 3);
+    assertEnd(parser, 3, 3);
 }
 
 TEST(OsmlStreamParser, annotationStartWithName)
@@ -1137,11 +1251,11 @@ TEST(OsmlStreamParser, annotationStartWithName)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationStart(reader, "annotationWithName",
+    assertAnnotationStart(parser, "annotationWithName",
                           Variant::mapType{{"name", "aName"}}, 0, 20);
-    assertEnd(reader, 26, 26);
+    assertEnd(parser, 26, 26);
 }
 
 TEST(OsmlStreamParser, annotationStartWithArguments)
@@ -1152,12 +1266,12 @@ TEST(OsmlStreamParser, annotationStartWithArguments)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     assertAnnotationStart(
-        reader, "annotationWithName",
+        parser, "annotationWithName",
         Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20);
-    assertEnd(reader, 35, 35);
+    assertEnd(parser, 35, 35);
 }
 
 TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd)
@@ -1168,16 +1282,16 @@ TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     assertAnnotationStart(
-        reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
+        parser, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
         10);
-    assertFieldStart(reader, true, 26, 27);
-    assertData(reader, "a", 26, 27);
-    assertFieldEnd(reader, 33, 35);
-    assertAnnotationEnd(reader, "", "", 36, 38);
-    assertEnd(reader, 38, 38);
+    ASSERT_TRUE(parser.inRangeCommand());
+    assertData(parser, "a", 26, 27);
+    assertRangeEnd(parser, 33, 35);
+    assertAnnotationEnd(parser, "", "", 36, 38);
+    assertEnd(parser, 38, 38);
 }
 
 TEST(OsmlStreamParser, annotationEnd)
@@ -1187,10 +1301,10 @@ TEST(OsmlStreamParser, annotationEnd)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationEnd(reader, "a", "", 0, 2);
-    assertEnd(reader, 3, 3);
+    assertAnnotationEnd(parser, "a", "", 0, 2);
+    assertEnd(parser, 3, 3);
 }
 
 TEST(OsmlStreamParser, annotationEndWithName)
@@ -1200,10 +1314,10 @@ TEST(OsmlStreamParser, annotationEndWithName)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationEnd(reader, "a", "name", 0, 2);
-    assertEnd(reader, 8, 8);
+    assertAnnotationEnd(parser, "a", "name", 0, 2);
+    assertEnd(parser, 8, 8);
 }
 
 TEST(OsmlStreamParser, annotationEndWithNameAsArgs)
@@ -1213,10 +1327,10 @@ TEST(OsmlStreamParser, annotationEndWithNameAsArgs)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationEnd(reader, "a", "name", 0, 2);
-    assertEnd(reader, 14, 14);
+    assertAnnotationEnd(parser, "a", "name", 0, 2);
+    assertEnd(parser, 14, 14);
 }
 
 TEST(OsmlStreamParser, errorAnnotationEndWithArguments)
@@ -1227,14 +1341,14 @@ TEST(OsmlStreamParser, errorAnnotationEndWithArguments)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
     logger.reset();
     ASSERT_FALSE(logger.hasError());
-    assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2);
+    assertCommandStart(parser, "a", false, Variant::mapType{{"foo", "bar"}}, 0, 2);
     ASSERT_TRUE(logger.hasError());
-    assertData(reader, ">", 11, 12);
-    assertEnd(reader, 12, 12);
+    assertData(parser, ">", 11, 12);
+    assertEnd(parser, 12, 12);
 }
 
 TEST(OsmlStreamParser, closingAnnotation)
@@ -1244,11 +1358,11 @@ TEST(OsmlStreamParser, closingAnnotation)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
-    assertData(reader, ">", 3, 4);
-    assertEnd(reader, 4, 4);
+    assertAnnotationStart(parser, "a", Variant::mapType{}, 0, 3);
+    assertData(parser, ">", 3, 4);
+    assertEnd(parser, 4, 4);
 }
 
 TEST(OsmlStreamParser, annotationWithFields)
@@ -1259,23 +1373,23 @@ TEST(OsmlStreamParser, annotationWithFields)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
-
-    assertData(reader, "a", 0, 1);
-    assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5);
-    assertFieldStart(reader, false, 5, 6);
-    assertData(reader, "c", 6, 7);
-    assertFieldEnd(reader, 7, 8);
-    assertFieldStart(reader, false, 8, 9);
-    assertData(reader, "d", 9, 10);
-    assertFieldEnd(reader, 10, 11);
-    assertFieldStart(reader, true, 11, 13);
-    assertData(reader, "e", 13, 14);
-    assertFieldEnd(reader, 14, 15);
-    assertData(reader, "f", 16, 17);
-    assertAnnotationEnd(reader, "", "", 18, 20);
-    assertData(reader, "g", 21, 22);
-    assertEnd(reader, 22, 22);
+    OsmlStreamParser parser(charReader, logger);
+
+    assertData(parser, "a", 0, 1);
+    assertAnnotationStart(parser, "b", Variant::mapType{}, 2, 5);
+    assertFieldStart(parser, false, 5, 6);
+    assertData(parser, "c", 6, 7);
+    assertFieldEnd(parser, 7, 8);
+    assertFieldStart(parser, false, 8, 9);
+    assertData(parser, "d", 9, 10);
+    assertFieldEnd(parser, 10, 11);
+    assertFieldStart(parser, true, 11, 13);
+    assertData(parser, "e", 13, 14);
+    assertFieldEnd(parser, 14, 15);
+    assertData(parser, "f", 16, 17);
+    assertAnnotationEnd(parser, "", "", 18, 20);
+    assertData(parser, "g", 21, 22);
+    assertEnd(parser, 22, 22);
 }
 
 TEST(OsmlStreamParser, annotationStartEscape)
@@ -1286,11 +1400,11 @@ TEST(OsmlStreamParser, annotationStartEscape)
 
     CharReader charReader(testString);
 
-    OsmlStreamParser reader(charReader, logger);
+    OsmlStreamParser parser(charReader, logger);
 
-    assertData(reader, "<%test", 0, 7);
-    assertEnd(reader, 7, 7);
+    assertData(parser, "<%test", 0, 7);
+    assertEnd(parser, 7, 7);
 }
-*/
+
 }
 
-- 
cgit v1.2.3


From cb6cacdc7eade9d4290767bafb7ccf4e935d0fbf Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 1 Mar 2015 13:49:26 +0100
Subject: allowing to store gaps in SourceOffsetVector and fixed bug with trim
 not resetting offsets correctly when the new length is zero

---
 src/core/parser/utils/SourceOffsetVector.hpp      | 64 ++++++++++++++++-------
 test/core/parser/utils/SourceOffsetVectorTest.cpp |  2 +-
 2 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/src/core/parser/utils/SourceOffsetVector.hpp b/src/core/parser/utils/SourceOffsetVector.hpp
index 67bacef..f322a88 100644
--- a/src/core/parser/utils/SourceOffsetVector.hpp
+++ b/src/core/parser/utils/SourceOffsetVector.hpp
@@ -33,6 +33,7 @@
 #include <limits>
 #include <vector>
 #include <utility>
+#include <unordered_map>
 
 #include <core/common/Location.hpp>
 
@@ -43,6 +44,9 @@ namespace ousia {
  * a delta compression.
  */
 class SourceOffsetVector {
+public:
+	using OffsPair = std::pair<SourceOffset, SourceOffset>;
+
 private:
 	/**
 	 * Type used for representing the length of a character.
@@ -81,10 +85,13 @@ private:
 	 */
 	std::vector<SourceOffset> offsets;
 
+	/**
+	 * Map used to store discontinuities in the character offsets.
+	 */
+	std::unordered_map<size_t, OffsPair> gaps;
+
 	/**
 	 * Last position given as "end" position in the storeOffset() method.
-	 * Used to adapt the length of the previous element in case start and end
-	 * positions do not match.
 	 */
 	SourceOffset lastEnd;
 
@@ -105,19 +112,22 @@ public:
 		// Make sure (end - start) is smaller than MAX_LEN
 		assert(end - start < MAX_LEN);
 
-		// Adapt the length of the previous character in case there is a gap
-		if (!lens.empty() && start > lastEnd) {
-			lens.back() += start - lastEnd;
-		}
-		lastEnd = end;
-
 		// Store an absolute offset every OFFSET_INTERVAL elements
 		if ((lens.size() & OFFSET_INTERVAL_MASK) == 0) {
 			offsets.push_back(start);
 		}
 
-		// Store the length
-		lens.push_back(end - start);
+		// Adapt the length of the previous character in case there is a gap
+		if (!lens.empty() && start > lastEnd) {
+			// There is a discontinuity, store the given offsets in the "gaps"
+			// map
+			gaps[lens.size()] = OffsPair(start, end);
+			lens.push_back(MAX_LEN);
+		} else {
+			// Store the length
+			lens.push_back(end - start);
+		}
+		lastEnd = end;
 	}
 
 	/**
@@ -127,14 +137,13 @@ public:
 	 * read.
 	 * @return a pair containing start and end source offset.
 	 */
-	std::pair<SourceOffset, SourceOffset> loadOffset(size_t idx) const
+	OffsPair loadOffset(size_t idx) const
 	{
 		// Special treatment for the last character
 		const size_t count = lens.size();
 		if (idx > 0 && idx == count) {
 			auto offs = loadOffset(count - 1);
-			return std::pair<SourceOffset, SourceOffset>(offs.second,
-			                                             offs.second);
+			return OffsPair(offs.second, offs.second);
 		}
 
 		// Calculate the start index in the lens vector and in the offsets
@@ -146,12 +155,26 @@ public:
 		assert(idx < count);
 		assert(offsetIdx < offsets.size());
 
+		// If the length of the last character is MAX_LEN, the position is
+		// stored in the "gaps" list
+		if (lens[idx] == MAX_LEN) {
+			auto it = gaps.find(idx);
+			assert(it != gaps.end());
+			return it->second;
+		}
+
 		// Sum over the length starting with the start offset
 		SourceOffset start = offsets[offsetIdx];
 		for (size_t i = sumStartIdx; i < idx; i++) {
-			start += lens[i];
+			if (lens[i] == MAX_LEN) {
+				auto it = gaps.find(i);
+				assert(it != gaps.end());
+				start = it->second.first;
+			} else {
+				start += lens[i];
+			}
 		}
-		return std::pair<SourceOffset, SourceOffset>(start, start + lens[idx]);
+		return OffsPair(start, start + lens[idx]);
 	}
 
 	/**
@@ -166,13 +189,16 @@ public:
 	 * @param length is the number of characters to which the TokenizedData
 	 * instance should be trimmed.
 	 */
-	void trim(size_t length) {
+	void trim(size_t length)
+	{
 		if (length < size()) {
 			lens.resize(length);
-			offsets.resize((length >> LOG2_OFFSET_INTERVAL) + 1);
 			if (length > 0) {
+				offsets.resize((length >> LOG2_OFFSET_INTERVAL) + 1);
 				lastEnd = loadOffset(length - 1).second;
 			} else {
+				offsets.clear();
+				gaps.clear();
 				lastEnd = 0;
 			}
 		}
@@ -182,9 +208,11 @@ public:
 	 * Resets the SourceOffsetVector to the state it had when it was
 	 * constructed.
 	 */
-	void clear() {
+	void clear()
+	{
 		lens.clear();
 		offsets.clear();
+		gaps.clear();
 		lastEnd = 0;
 	}
 };
diff --git a/test/core/parser/utils/SourceOffsetVectorTest.cpp b/test/core/parser/utils/SourceOffsetVectorTest.cpp
index 25a4163..26254f9 100644
--- a/test/core/parser/utils/SourceOffsetVectorTest.cpp
+++ b/test/core/parser/utils/SourceOffsetVectorTest.cpp
@@ -51,7 +51,7 @@ TEST(SourceOffsetVector, gaps)
 	for (size_t i = 0; i < 999; i++) {
 		auto elem = vec.loadOffset(i);
 		EXPECT_EQ(i * 3 + 5, elem.first);
-		EXPECT_EQ((i + 1) * 3 + 5, elem.second);
+		EXPECT_EQ(i * 3 + 7, elem.second);
 	}
 	auto elem = vec.loadOffset(999);
 	EXPECT_EQ(999U * 3 + 5, elem.first);
-- 
cgit v1.2.3


From 31c83c05d257c9a7a336f12342c401f97d380674 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 1 Mar 2015 13:50:15 +0100
Subject: Prefer longer non-primary tokens

---
 src/core/parser/utils/Tokenizer.cpp      |  45 +++++-----
 test/core/parser/utils/TokenizerTest.cpp | 148 ++++++++++++++++++++++++++-----
 2 files changed, 150 insertions(+), 43 deletions(-)

diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp
index 94d9cb0..8d540a6 100644
--- a/src/core/parser/utils/Tokenizer.cpp
+++ b/src/core/parser/utils/Tokenizer.cpp
@@ -188,7 +188,7 @@ bool Tokenizer::next(CharReader &reader, Token &token, TokenizedData &data)
 		const size_t dataStartOffset = data.size();
 
 		// If we do not have a match yet, start a new lookup from the root
-		if (!bestMatch.hasMatch()) {
+		if (!bestMatch.hasMatch() || !bestMatch.primary) {
 			lookups.emplace_back(root, charStart, dataStartOffset);
 		}
 
@@ -201,36 +201,35 @@ bool Tokenizer::next(CharReader &reader, Token &token, TokenizedData &data)
 				continue;
 			}
 
-			// If the matched token is primary, check whether it is better than
-			// the current best match, if yes, replace the best match. In any
-			// case just continue
-			if (match.primary) {
-				if (match.size() > bestMatch.size()) {
-					bestMatch = match;
-				}
-				continue;
+			// Replace the best match with longest token
+			if (match.size() > bestMatch.size()) {
+				bestMatch = match;
 			}
 
-			// Otherwise -- if the matched token is a non-primary token (and no
-			// primary token has been found until now) -- mark the match in the
-			// TokenizedData
-			if (!bestMatch.hasMatch()) {
+			// If the matched token is a non-primary token -- mark the match in
+			// the TokenizedData list
+			if (!match.primary) {
 				data.mark(match.token.id, data.size() - match.size() + 1,
 				          match.size());
 			}
 		}
 
-		// We have found a token and there are no more states to advance or the
-		// text handler has found something -- abort to return the new token
-		if (bestMatch.hasMatch()) {
-			if ((nextLookups.empty() || data.size() > initialDataSize)) {
+
+		// If a token has been found and the token is a primary token, check
+		// whether we have to abort, otherwise if we have a non-primary match,
+		// reset it once it can no longer be advanced
+		if (bestMatch.hasMatch() && nextLookups.empty()) {
+			if (bestMatch.primary) {
 				break;
+			} else {
+				bestMatch = TokenMatch{};
 			}
-		} else {
-			// Record all incomming characters
-			data.append(c, charStart, charEnd);
 		}
 
+		// Record all incomming characters
+		data.append(c, charStart, charEnd);
+
+
 		// Swap the lookups and the nextLookups list
 		lookups = std::move(nextLookups);
 		nextLookups.clear();
@@ -241,17 +240,17 @@ bool Tokenizer::next(CharReader &reader, Token &token, TokenizedData &data)
 
 	// If we found data, emit a corresponding data token
 	if (data.size() > initialDataSize &&
-	    (!bestMatch.hasMatch() ||
+	    (!bestMatch.hasMatch() || !bestMatch.primary ||
 	     bestMatch.dataStartOffset > initialDataSize)) {
 		// If we have a "bestMatch" wich starts after text data has started,
 		// trim the TokenizedData to this offset
-		if (bestMatch.dataStartOffset > initialDataSize) {
+		if (bestMatch.dataStartOffset > initialDataSize && bestMatch.primary) {
 			data.trim(bestMatch.dataStartOffset);
 		}
 
 		// Create a token containing the data location
 		bestMatch.token = Token{data.getLocation()};
-	} else if (bestMatch.hasMatch() &&
+	} else if (bestMatch.hasMatch() && bestMatch.primary &&
 	           bestMatch.dataStartOffset == initialDataSize) {
 		data.trim(initialDataSize);
 	}
diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp
index 9f644c2..45fc77a 100644
--- a/test/core/parser/utils/TokenizerTest.cpp
+++ b/test/core/parser/utils/TokenizerTest.cpp
@@ -26,6 +26,60 @@
 
 namespace ousia {
 
+static void assertPrimaryToken(CharReader &reader, Tokenizer &tokenizer,
+                               TokenId id, const std::string &text,
+                               SourceOffset start = InvalidSourceOffset,
+                               SourceOffset end = InvalidSourceOffset,
+                               SourceId sourceId = InvalidSourceId)
+{
+	Token token;
+	TokenizedData data;
+	ASSERT_TRUE(tokenizer.read(reader, token, data));
+	EXPECT_EQ(id, token.id);
+	EXPECT_EQ(text, token.content);
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, token.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, token.getLocation().getEnd());
+	}
+	EXPECT_EQ(sourceId, token.getLocation().getSourceId());
+}
+
+static void expectData(const std::string &expected, SourceOffset tokenStart,
+                       SourceOffset tokenEnd, SourceOffset textStart,
+                       SourceOffset textEnd, const Token &token,
+                       TokenizedData &data,
+                       WhitespaceMode mode = WhitespaceMode::PRESERVE)
+{
+	ASSERT_EQ(Tokens::Data, token.id);
+
+	Token textToken;
+	TokenizedDataReader reader = data.reader();
+	ASSERT_TRUE(reader.read(textToken, TokenSet{}, mode));
+
+	EXPECT_EQ(expected, textToken.content);
+	EXPECT_EQ(tokenStart, token.location.getStart());
+	EXPECT_EQ(tokenEnd, token.location.getEnd());
+	EXPECT_EQ(textStart, textToken.getLocation().getStart());
+	EXPECT_EQ(textEnd, textToken.getLocation().getEnd());
+	EXPECT_TRUE(reader.atEnd());
+}
+
+static void assertDataToken(CharReader &reader, Tokenizer &tokenizer,
+                            const std::string &expected,
+                            SourceOffset tokenStart, SourceOffset tokenEnd,
+                            SourceOffset textStart, SourceOffset textEnd,
+                            WhitespaceMode mode = WhitespaceMode::PRESERVE)
+{
+	Token token;
+	TokenizedData data;
+	ASSERT_TRUE(tokenizer.read(reader, token, data));
+
+	expectData(expected, tokenStart, tokenEnd, textStart, textEnd, token, data,
+	           mode);
+}
+
 TEST(Tokenizer, tokenRegistration)
 {
 	Tokenizer tokenizer;
@@ -53,25 +107,6 @@ TEST(Tokenizer, tokenRegistration)
 	ASSERT_EQ("d", tokenizer.lookupToken(1U).string);
 }
 
-void expectData(const std::string &expected, SourceOffset tokenStart,
-                SourceOffset tokenEnd, SourceOffset textStart,
-                SourceOffset textEnd, const Token &token, TokenizedData &data,
-                WhitespaceMode mode = WhitespaceMode::PRESERVE)
-{
-	ASSERT_EQ(Tokens::Data, token.id);
-
-	Token textToken;
-	TokenizedDataReader reader = data.reader();
-	ASSERT_TRUE(reader.read(textToken, TokenSet{}, mode));
-
-	EXPECT_EQ(expected, textToken.content);
-	EXPECT_EQ(tokenStart, token.location.getStart());
-	EXPECT_EQ(tokenEnd, token.location.getEnd());
-	EXPECT_EQ(textStart, textToken.getLocation().getStart());
-	EXPECT_EQ(textEnd, textToken.getLocation().getEnd());
-	EXPECT_TRUE(reader.atEnd());
-}
-
 TEST(Tokenizer, textTokenPreserveWhitespace)
 {
 	{
@@ -451,6 +486,80 @@ TEST(Tokenizer, nonPrimaryTokens)
 	ASSERT_FALSE(tokenizer.read(reader, token, data));
 }
 
+TEST(Tokenizer, primaryNonPrimaryTokenInteraction)
+{
+	CharReader reader{"<<test1>><test2><<test3\\><<<test4>>>"};
+	//                 01234567890123456789012 3456789012345
+	//                 0         1         2          3
+
+	Tokenizer tokenizer;
+
+	TokenId tP1 = tokenizer.registerToken("<", true);
+	TokenId tP2 = tokenizer.registerToken(">", true);
+	TokenId tP3 = tokenizer.registerToken("\\>", true);
+	TokenId tN1 = tokenizer.registerToken("<<", false);
+	TokenId tN2 = tokenizer.registerToken(">>", false);
+
+	TokenSet tokens = TokenSet{tN1, tN2};
+
+	Token token, textToken;
+	{
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		ASSERT_EQ(Tokens::Data, token.id);
+
+		TokenizedDataReader dataReader = data.reader();
+		assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 0, 2);
+		assertText(dataReader, "test1", tokens, WhitespaceMode::TRIM, 2, 7);
+		assertToken(dataReader, tN2, ">>", tokens, WhitespaceMode::TRIM, 7, 9);
+		assertEnd(dataReader);
+	}
+
+	assertPrimaryToken(reader, tokenizer, tP1, "<", 9, 10);
+	assertDataToken(reader, tokenizer, "test2", 10, 15, 10, 15);
+	assertPrimaryToken(reader, tokenizer, tP2, ">", 15, 16);
+
+	{
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		ASSERT_EQ(Tokens::Data, token.id);
+
+		TokenizedDataReader dataReader = data.reader();
+		assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 16, 18);
+		assertText(dataReader, "test3", tokens, WhitespaceMode::TRIM, 18, 23);
+		assertEnd(dataReader);
+	}
+
+	assertPrimaryToken(reader, tokenizer, tP3, "\\>", 23, 25);
+
+	{
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		ASSERT_EQ(Tokens::Data, token.id);
+
+		TokenizedDataReader dataReader = data.reader();
+		assertToken(dataReader, tN1, "<<", tokens, WhitespaceMode::TRIM, 25, 27);
+		assertEnd(dataReader);
+	}
+
+	assertPrimaryToken(reader, tokenizer, tP1, "<", 27, 28);
+
+	{
+		TokenizedData data;
+		ASSERT_TRUE(tokenizer.read(reader, token, data));
+		ASSERT_EQ(Tokens::Data, token.id);
+
+		TokenizedDataReader dataReader = data.reader();
+		assertText(dataReader, "test4", tokens, WhitespaceMode::TRIM, 28, 33);
+		assertToken(dataReader, tN2, ">>", tokens, WhitespaceMode::TRIM, 33, 35);
+		assertEnd(dataReader);
+	}
+
+	assertPrimaryToken(reader, tokenizer, tP2, ">", 35, 36);
+
+	TokenizedData data;
+	ASSERT_FALSE(tokenizer.read(reader, token, data));
+}
 
 TEST(Tokenizer, ambiguousTokens2)
 {
@@ -476,6 +585,5 @@ TEST(Tokenizer, ambiguousTokens2)
 		ASSERT_FALSE(tokenizer.read(reader, token, data));
 	}
 }
-
 }
 
-- 
cgit v1.2.3


From 4e199ad0d5c5d94955839da2a52967b4f0f34a43 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 1 Mar 2015 13:52:34 +0100
Subject: Implemented registration of user-defined tokens, fixed comment
 handling (do not issue multiple data events if a comment occurs, just skip
 the comment data like in TeX)

---
 src/formats/osml/OsmlStreamParser.cpp      | 50 +++++++++++++++++++++++-------
 src/formats/osml/OsmlStreamParser.hpp      | 11 +++++--
 test/formats/osml/OsmlStreamParserTest.cpp | 13 +++-----
 3 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index e467dc5..823075a 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -16,6 +16,10 @@
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
+#include <cassert>
+#include <stack>
+#include <vector>
+
 #include <core/common/CharReader.hpp>
 #include <core/common/Logger.hpp>
 #include <core/common/Utils.hpp>
@@ -27,9 +31,6 @@
 
 #include "OsmlStreamParser.hpp"
 
-#include <stack>
-#include <vector>
-
 namespace ousia {
 
 namespace {
@@ -127,7 +128,7 @@ private:
 	/**
 	 * Set to true if this is a command with clear begin and end.
 	 */
-	bool hasRange: 1;
+	bool hasRange;
 
 public:
 	/**
@@ -407,6 +408,9 @@ public:
 
 	State parse();
 
+	TokenId registerToken(const std::string &token);
+	void unregisterToken(TokenId token);
+
 	const TokenizedData &getData() const { return data; }
 	const Variant &getCommandName() const { return cmd().getName(); }
 	const Variant &getCommandArguments() const { return cmd().getArguments(); }
@@ -700,10 +704,11 @@ OsmlStreamParserImpl::State OsmlStreamParserImpl::parseCommand(
 void OsmlStreamParserImpl::parseBlockComment()
 {
 	Token token;
+	TokenizedData commentData;
 	size_t depth = 1;
-	while (tokenizer.read(reader, token, data)) {
+	while (tokenizer.read(reader, token, commentData)) {
 		// Throw the comment data away
-		data.clear();
+		commentData.clear();
 
 		if (token.id == OsmlTokens.BlockCommentEnd) {
 			depth--;
@@ -822,6 +827,14 @@ OsmlStreamParserImpl::State OsmlStreamParserImpl::parse()
 		} else if (type == Tokens::Data) {
 			reader.consumePeek();
 			continue;
+		} else if (type == OsmlTokens.LineComment) {
+			reader.consumePeek();
+			parseLineComment();
+			continue;
+		} else if (type == OsmlTokens.BlockCommentStart) {
+			reader.consumePeek();
+			parseBlockComment();
+			continue;
 		}
 
 		// A non-text token was reached, make sure all pending data commands
@@ -836,11 +849,7 @@ OsmlStreamParserImpl::State OsmlStreamParserImpl::parse()
 		// Synchronize the location with the current token location
 		location = token.location;
 
-		if (token.id == OsmlTokens.LineComment) {
-			parseLineComment();
-		} else if (token.id == OsmlTokens.BlockCommentStart) {
-			parseBlockComment();
-		} else if (token.id == OsmlTokens.FieldStart) {
+		if (token.id == OsmlTokens.FieldStart) {
 			cmd().pushField(false, token.location);
 			return State::FIELD_START;
 		} else if (token.id == OsmlTokens.FieldEnd) {
@@ -914,6 +923,16 @@ OsmlStreamParserImpl::State OsmlStreamParserImpl::parse()
 	return State::END;
 }
 
+TokenId OsmlStreamParserImpl::registerToken(const std::string &token)
+{
+	return tokenizer.registerToken(token, false);
+}
+
+void OsmlStreamParserImpl::unregisterToken(TokenId token)
+{
+	assert(tokenizer.unregisterToken(token));
+}
+
 /* Class OsmlStreamParser */
 
 OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
@@ -955,4 +974,13 @@ bool OsmlStreamParser::inDefaultField() const { return impl->inDefaultField(); }
 
 bool OsmlStreamParser::inRangeCommand() const { return impl->inRangeCommand(); }
 
+TokenId OsmlStreamParser::registerToken(const std::string &token)
+{
+	return impl->registerToken(token);
+}
+
+void OsmlStreamParser::unregisterToken(TokenId token)
+{
+	impl->unregisterToken(token);
+}
 }
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index 10d5296..b7e64f7 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -32,6 +32,8 @@
 #include <cstdint>
 #include <memory>
 
+#include <core/parser/stack/Callbacks.hpp>
+
 namespace ousia {
 
 // Forward declarations
@@ -50,10 +52,10 @@ class Variant;
  * syntactically valid and tries to recorver from most errors. If an error is
  * irrecoverable (this is the case for errors with wrong nesting of commands or
  * fields, as this would lead to too many consecutive errors) a
- * LoggableException is thrown. The OsmlStreamParser can be compared to a SAX
- * parser for XML.
+ * LoggableException is thrown. In short, the OsmlStreamParser can be described
+ * as a SAX parser for OSML.
  */
-class OsmlStreamParser {
+class OsmlStreamParser: public parser_stack::ParserCallbacks {
 public:
 	/**
 	 * Enum used to indicate which state the OsmlStreamParser class is in
@@ -204,6 +206,9 @@ public:
 	 * "{!" syntax).
 	 */
 	bool inDefaultField() const;
+
+	TokenId registerToken(const std::string &token) override;
+	void unregisterToken(TokenId token) override;
 };
 }
 
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index 3e7f4c1..0ea087f 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -363,8 +363,7 @@ TEST(OsmlStreamParser, singleLineComment)
 	CharReader charReader(testString);
 	OsmlStreamParser parser(charReader, logger);
 
-	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
-	assertTextData(parser, "b", 33, 34, 33, 34, WhitespaceMode::PRESERVE);
+	assertTextData(parser, "ab", 0, 34, 0, 34, WhitespaceMode::PRESERVE);
 	assertEnd(parser, 34, 34);
 }
 
@@ -376,8 +375,7 @@ TEST(OsmlStreamParser, multilineComment)
 	CharReader charReader(testString);
 	OsmlStreamParser parser(charReader, logger);
 
-	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
-	assertTextData(parser, "b", 40, 41, 40, 41, WhitespaceMode::PRESERVE);
+	assertTextData(parser, "ab", 0, 41, 0, 41, WhitespaceMode::PRESERVE);
 	assertEnd(parser, 41, 41);
 }
 
@@ -391,10 +389,10 @@ TEST(OsmlStreamParser, unfinishedMultilineComment)
 
 	logger.reset();
 
-	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
 	ASSERT_FALSE(logger.hasError());
-	assertEnd(parser, 38, 38);
+	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
 	ASSERT_TRUE(logger.hasError());
+	assertEnd(parser, 38, 38);
 }
 
 TEST(OsmlStreamParser, nestedMultilineComment)
@@ -405,8 +403,7 @@ TEST(OsmlStreamParser, nestedMultilineComment)
 	CharReader charReader(testString);
 	OsmlStreamParser parser(charReader, logger);
 
-	assertTextData(parser, "a", 0, 1, 0, 1, WhitespaceMode::PRESERVE);
-	assertTextData(parser, "b", 40, 41, 40, 41, WhitespaceMode::PRESERVE);
+	assertTextData(parser, "ab", 0, 41, 0, 41, WhitespaceMode::PRESERVE);
 	assertEnd(parser, 41, 41);
 }
 
-- 
cgit v1.2.3


From 689348baf70d00e5ff1c8eec3959afc56071994e Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 1 Mar 2015 14:25:39 +0100
Subject: Added user defined tokens test

---
 test/formats/osml/OsmlStreamParserTest.cpp | 1104 ++++++++++++++--------------
 1 file changed, 568 insertions(+), 536 deletions(-)

diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index 0ea087f..d47f529 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -24,17 +24,17 @@
 #include <core/common/Variant.hpp>
 #include <core/frontend/TerminalLogger.hpp>
 #include <core/parser/utils/TokenizedData.hpp>
-
 #include <formats/osml/OsmlStreamParser.hpp>
 
+#include <core/parser/utils/TokenizedDataTestUtils.hpp>
+
 namespace ousia {
 
 static TerminalLogger logger(std::cerr, true);
 // static ConcreteLogger logger;
 
 static void assertCommandStart(OsmlStreamParser &parser,
-                               const std::string &name,
-                               bool rangeCommand,
+                               const std::string &name, bool rangeCommand,
                                SourceOffset start = InvalidSourceOffset,
                                SourceOffset end = InvalidSourceOffset)
 {
@@ -52,8 +52,7 @@ static void assertCommandStart(OsmlStreamParser &parser,
 }
 
 static void assertCommandStart(OsmlStreamParser &parser,
-                               const std::string &name,
-                               bool rangeCommand,
+                               const std::string &name, bool rangeCommand,
                                const Variant::mapType &args,
                                SourceOffset start = InvalidSourceOffset,
                                SourceOffset end = InvalidSourceOffset)
@@ -62,17 +61,16 @@ static void assertCommandStart(OsmlStreamParser &parser,
 	EXPECT_EQ(args, parser.getCommandArguments());
 }
 
-static void assertCommand(OsmlStreamParser &parser,
-                               const std::string &name,
-                               SourceOffset start = InvalidSourceOffset,
-                               SourceOffset end = InvalidSourceOffset)
+static void assertCommand(OsmlStreamParser &parser, const std::string &name,
+                          SourceOffset start = InvalidSourceOffset,
+                          SourceOffset end = InvalidSourceOffset)
 {
 	assertCommandStart(parser, name, false, Variant::mapType{}, start, end);
 }
 
 static void assertRangeEnd(OsmlStreamParser &parser,
-                             SourceOffset start = InvalidSourceOffset,
-                             SourceOffset end = InvalidSourceOffset)
+                           SourceOffset start = InvalidSourceOffset,
+                           SourceOffset end = InvalidSourceOffset)
 {
 	ASSERT_EQ(OsmlStreamParser::State::RANGE_END, parser.parse());
 	if (start != InvalidSourceOffset) {
@@ -116,11 +114,12 @@ static void assertTextData(OsmlStreamParser &parser, const std::string &text,
 }
 
 static void assertData(OsmlStreamParser &parser, const std::string &text,
-                           SourceOffset textStart = InvalidSourceOffset,
-                           SourceOffset textEnd = InvalidSourceOffset,
-                           WhitespaceMode mode = WhitespaceMode::COLLAPSE)
+                       SourceOffset textStart = InvalidSourceOffset,
+                       SourceOffset textEnd = InvalidSourceOffset,
+                       WhitespaceMode mode = WhitespaceMode::COLLAPSE)
 {
-	assertTextData(parser, text, InvalidSourceOffset, InvalidSourceOffset, textStart, textEnd, mode);
+	assertTextData(parser, text, InvalidSourceOffset, InvalidSourceOffset,
+	               textStart, textEnd, mode);
 }
 
 static void assertEmptyData(OsmlStreamParser &parser)
@@ -134,7 +133,6 @@ static void assertEmptyData(OsmlStreamParser &parser)
 	EXPECT_FALSE(dataReader.read(token, TokenSet{}, WhitespaceMode::TRIM));
 }
 
-
 static void assertFieldStart(OsmlStreamParser &parser, bool defaultField,
                              SourceOffset start = InvalidSourceOffset,
                              SourceOffset end = InvalidSourceOffset)
@@ -585,363 +583,363 @@ TEST(OsmlStreamParser, dataOutsideField)
 
 TEST(OsmlStreamParser, nestedCommand)
 {
-    const char *testString = "\\test{a}{\\test2{b} c} d";
-    //                         012345678 90123456789012
-    //                         0          1         2
-    CharReader charReader(testString);
-    OsmlStreamParser parser(charReader, logger);
+	const char *testString = "\\test{a}{\\test2{b} c} d";
+	//                         012345678 90123456789012
+	//                         0          1         2
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(parser, "test", 0, 5);
-    assertFieldStart(parser, false, 5, 6);
-    assertData(parser, "a", 6, 7);
-    assertFieldEnd(parser, 7, 8);
+	assertCommand(parser, "test", 0, 5);
+	assertFieldStart(parser, false, 5, 6);
+	assertData(parser, "a", 6, 7);
+	assertFieldEnd(parser, 7, 8);
 
-    assertFieldStart(parser, false, 8, 9);
-    assertCommand(parser, "test2", 9, 15);
-    assertFieldStart(parser, false, 15, 16);
-    assertData(parser, "b", 16, 17);
-    assertFieldEnd(parser, 17, 18);
-    assertData(parser, "c", 19, 20);
-    assertFieldEnd(parser, 20, 21);
-    assertData(parser, "d", 22, 23);
-    assertEnd(parser, 23, 23);
+	assertFieldStart(parser, false, 8, 9);
+	assertCommand(parser, "test2", 9, 15);
+	assertFieldStart(parser, false, 15, 16);
+	assertData(parser, "b", 16, 17);
+	assertFieldEnd(parser, 17, 18);
+	assertData(parser, "c", 19, 20);
+	assertFieldEnd(parser, 20, 21);
+	assertData(parser, "d", 22, 23);
+	assertEnd(parser, 23, 23);
 }
 
-
 TEST(OsmlStreamParser, nestedCommandImmediateEnd)
 {
-    const char *testString = "\\test{\\test2{b}} d";
-    //                         012345 678901234567
-    //                         0          1
-    CharReader charReader(testString);
-    OsmlStreamParser parser(charReader, logger);
-
-    assertCommand(parser, "test", 0, 5);
-    assertFieldStart(parser, false, 5, 6);
-    {
-        assertCommand(parser, "test2", 6, 12);
-        assertFieldStart(parser, false, 12, 13);
-        assertData(parser, "b", 13, 14);
-        assertFieldEnd(parser, 14, 15);
-    }
-    assertFieldEnd(parser, 15, 16);
-    assertData(parser, "d", 17, 18);
-    assertEnd(parser, 18, 18);
+	const char *testString = "\\test{\\test2{b}} d";
+	//                         012345 678901234567
+	//                         0          1
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
+
+	assertCommand(parser, "test", 0, 5);
+	assertFieldStart(parser, false, 5, 6);
+	{
+		assertCommand(parser, "test2", 6, 12);
+		assertFieldStart(parser, false, 12, 13);
+		assertData(parser, "b", 13, 14);
+		assertFieldEnd(parser, 14, 15);
+	}
+	assertFieldEnd(parser, 15, 16);
+	assertData(parser, "d", 17, 18);
+	assertEnd(parser, 18, 18);
 }
 
 TEST(OsmlStreamParser, nestedCommandNoData)
 {
-    const char *testString = "\\test{\\test2}";
-    //                         012345 6789012
-    CharReader charReader(testString);
-    OsmlStreamParser parser(charReader, logger);
+	const char *testString = "\\test{\\test2}";
+	//                         012345 6789012
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(parser, "test", 0, 5);
-    assertFieldStart(parser, false, 5, 6);
-    assertCommand(parser, "test2", 6, 12);
-    assertFieldEnd(parser, 12, 13);
-    assertEnd(parser, 13, 13);
+	assertCommand(parser, "test", 0, 5);
+	assertFieldStart(parser, false, 5, 6);
+	assertCommand(parser, "test2", 6, 12);
+	assertFieldEnd(parser, 12, 13);
+	assertEnd(parser, 13, 13);
 }
 
 TEST(OsmlStreamParser, multipleCommands)
 {
-    const char *testString = "\\a \\b \\c \\d";
-    //                         012 345 678 90
-    //                         0            1
-    CharReader charReader(testString);
-    OsmlStreamParser parser(charReader, logger);
+	const char *testString = "\\a \\b \\c \\d";
+	//                         012 345 678 90
+	//                         0            1
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(parser, "a", 0, 2);
-    assertEmptyData(parser);
-    assertCommand(parser, "b", 3, 5);
-    assertEmptyData(parser);
-    assertCommand(parser, "c", 6, 8);
-    assertEmptyData(parser);
-    assertCommand(parser, "d", 9, 11);
-    assertEnd(parser, 11, 11);
+	assertCommand(parser, "a", 0, 2);
+	assertEmptyData(parser);
+	assertCommand(parser, "b", 3, 5);
+	assertEmptyData(parser);
+	assertCommand(parser, "c", 6, 8);
+	assertEmptyData(parser);
+	assertCommand(parser, "d", 9, 11);
+	assertEnd(parser, 11, 11);
 }
 
 TEST(OsmlStreamParser, fieldsWithSpaces)
 {
-    const char *testString = "\\a {\\b \\c}   \n\n {\\d}";
-    //                         0123 456 789012 3 456 789
-    //                         0           1
-    CharReader charReader(testString);
-    OsmlStreamParser parser(charReader, logger);
-
-    assertCommand(parser, "a", 0, 2);
-    assertEmptyData(parser);
-    assertFieldStart(parser, false, 3, 4);
-    assertCommand(parser, "b", 4, 6);
-    assertEmptyData(parser);
-    assertCommand(parser, "c", 7, 9);
-    assertFieldEnd(parser, 9, 10);
-    assertEmptyData(parser);
-    assertFieldStart(parser, false, 16, 17);
-    assertCommand(parser, "d", 17, 19);
-    assertFieldEnd(parser, 19, 20);
-    assertEnd(parser, 20, 20);
+	const char *testString = "\\a {\\b \\c}   \n\n {\\d}";
+	//                         0123 456 789012 3 456 789
+	//                         0           1
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
+
+	assertCommand(parser, "a", 0, 2);
+	assertEmptyData(parser);
+	assertFieldStart(parser, false, 3, 4);
+	assertCommand(parser, "b", 4, 6);
+	assertEmptyData(parser);
+	assertCommand(parser, "c", 7, 9);
+	assertFieldEnd(parser, 9, 10);
+	assertEmptyData(parser);
+	assertFieldStart(parser, false, 16, 17);
+	assertCommand(parser, "d", 17, 19);
+	assertFieldEnd(parser, 19, 20);
+	assertEnd(parser, 20, 20);
 }
 
 TEST(OsmlStreamParser, errorEndButOpenField)
 {
-    const char *testString = "\\a b {";
-    //                         012345
-    //                         0
-    CharReader charReader(testString);
+	const char *testString = "\\a b {";
+	//                         012345
+	//                         0
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    assertCommand(parser, "a", 0, 2);
-    assertData(parser, "b", 3, 4);
-    assertFieldStart(parser, false, 5, 6);
-    ASSERT_FALSE(logger.hasError());
-    assertEnd(parser, 6, 6);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	assertCommand(parser, "a", 0, 2);
+	assertData(parser, "b", 3, 4);
+	assertFieldStart(parser, false, 5, 6);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(parser, 6, 6);
+	ASSERT_TRUE(logger.hasError());
 }
 
-
 TEST(OsmlStreamParser, errorNoFieldToEnd)
 {
-    const char *testString = "\\a b }";
-    //                         012345
-    //                         0
-    CharReader charReader(testString);
+	const char *testString = "\\a b }";
+	//                         012345
+	//                         0
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    assertCommand(parser, "a", 0, 2);
-    assertData(parser, "b", 3, 4);
-    ASSERT_FALSE(logger.hasError());
-    assertEnd(parser, 6, 6);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	assertCommand(parser, "a", 0, 2);
+	assertData(parser, "b", 3, 4);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(parser, 6, 6);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorNoFieldEndNested)
 {
-    const char *testString = "\\test{\\test2{}}}";
-    //                         012345 6789012345
-    //                         0          1
-    CharReader charReader(testString);
+	const char *testString = "\\test{\\test2{}}}";
+	//                         012345 6789012345
+	//                         0          1
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    assertCommand(parser, "test", 0, 5);
-    assertFieldStart(parser, false, 5, 6);
-    assertCommand(parser, "test2", 6, 12);
-    assertFieldStart(parser, false, 12, 13);
-    assertFieldEnd(parser, 13, 14);
-    assertFieldEnd(parser, 14, 15);
-    ASSERT_FALSE(logger.hasError());
-    assertEnd(parser, 16, 16);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	assertCommand(parser, "test", 0, 5);
+	assertFieldStart(parser, false, 5, 6);
+	assertCommand(parser, "test2", 6, 12);
+	assertFieldStart(parser, false, 12, 13);
+	assertFieldEnd(parser, 13, 14);
+	assertFieldEnd(parser, 14, 15);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(parser, 16, 16);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorNoFieldEndNestedData)
 {
-    const char *testString = "\\test{\\test2{}}a}";
-    //                         012345 67890123456
-    //                         0          1
-    CharReader charReader(testString);
+	const char *testString = "\\test{\\test2{}}a}";
+	//                         012345 67890123456
+	//                         0          1
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    assertCommand(parser, "test", 0, 5);
-    assertFieldStart(parser, false, 5, 6);
-    assertCommand(parser, "test2", 6, 12);
-    assertFieldStart(parser, false, 12, 13);
-    assertFieldEnd(parser, 13, 14);
-    assertFieldEnd(parser, 14, 15);
-    assertData(parser, "a", 15, 16);
-    ASSERT_FALSE(logger.hasError());
-    assertEnd(parser, 17, 17);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	assertCommand(parser, "test", 0, 5);
+	assertFieldStart(parser, false, 5, 6);
+	assertCommand(parser, "test2", 6, 12);
+	assertFieldStart(parser, false, 12, 13);
+	assertFieldEnd(parser, 13, 14);
+	assertFieldEnd(parser, 14, 15);
+	assertData(parser, "a", 15, 16);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(parser, 17, 17);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, beginEnd)
 {
-    const char *testString = "\\begin{book}\\end{book}";
-    //                         012345678901 2345678901
-    //                         0         1          2
-    CharReader charReader(testString);
+	const char *testString = "\\begin{book}\\end{book}";
+	//                         012345678901 2345678901
+	//                         0         1          2
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
-    assertRangeEnd(parser, 17, 21);
-    assertEnd(parser, 22, 22);
+	assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+	assertRangeEnd(parser, 17, 21);
+	assertEnd(parser, 22, 22);
 }
 
 TEST(OsmlStreamParser, beginEndWithName)
 {
-    const char *testString = "\\begin{book#a}\\end{book}";
-    //                         01234567890123 4567890123
-    //                         0         1          2
-    CharReader charReader(testString);
+	const char *testString = "\\begin{book#a}\\end{book}";
+	//                         01234567890123 4567890123
+	//                         0         1          2
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommandStart(parser, "book", true, {{"name", "a"}}, 7, 11);
-    assertRangeEnd(parser, 19, 23);
-    assertEnd(parser, 24, 24);
+	assertCommandStart(parser, "book", true, {{"name", "a"}}, 7, 11);
+	assertRangeEnd(parser, 19, 23);
+	assertEnd(parser, 24, 24);
 }
 
 TEST(OsmlStreamParser, beginEndWithNameAndArgs)
 {
-    const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}";
-    //                         0123456789012345678901234 56789 01 2345678901
-    //                         0         1         2           3          4
-    CharReader charReader(testString);
+	const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}";
+	//                         0123456789012345678901234 56789 01 2345678901
+	//                         0         1         2           3          4
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommandStart(parser, "book", true,
-                  {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-    assertRangeEnd(parser, 37, 41);
-    assertEnd(parser, 42, 42);
+	assertCommandStart(parser, "book", true,
+	                   {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7,
+	                   11);
+	assertRangeEnd(parser, 37, 41);
+	assertEnd(parser, 42, 42);
 }
 
 TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields)
 {
-    const char *testString =
-        "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}";
-    //    0123456789012345678901234 56789 01234 567890123 45678901 2345678901
-    //    0         1         2           3          4          5          6
-    CharReader charReader(testString);
-
-    OsmlStreamParser parser(charReader, logger);
-
-    assertCommandStart(parser, "book", true,
-                  {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-    assertFieldStart(parser, false, 32, 33);
-    assertData(parser, "a", 33, 34);
-    assertCommand(parser, "test", 35, 40);
-    assertFieldEnd(parser, 40, 41);
-    assertFieldStart(parser, false, 41, 42);
-    assertData(parser, "b", 42, 43);
-    assertCommand(parser, "test", 44, 49);
-    assertFieldStart(parser, false, 49, 50);
-    assertFieldEnd(parser, 50, 51);
-    assertFieldEnd(parser, 51, 52);
-    assertRangeEnd(parser, 57, 61);
-    assertEnd(parser, 62, 62);
+	const char *testString =
+	    "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}";
+	//    0123456789012345678901234 56789 01234 567890123 45678901 2345678901
+	//    0         1         2           3          4          5          6
+	CharReader charReader(testString);
+
+	OsmlStreamParser parser(charReader, logger);
+
+	assertCommandStart(parser, "book", true,
+	                   {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7,
+	                   11);
+	assertFieldStart(parser, false, 32, 33);
+	assertData(parser, "a", 33, 34);
+	assertCommand(parser, "test", 35, 40);
+	assertFieldEnd(parser, 40, 41);
+	assertFieldStart(parser, false, 41, 42);
+	assertData(parser, "b", 42, 43);
+	assertCommand(parser, "test", 44, 49);
+	assertFieldStart(parser, false, 49, 50);
+	assertFieldEnd(parser, 50, 51);
+	assertFieldEnd(parser, 51, 52);
+	assertRangeEnd(parser, 57, 61);
+	assertEnd(parser, 62, 62);
 }
 
 TEST(OsmlStreamParser, beginEndWithData)
 {
-    const char *testString = "\\begin{book}a\\end{book}";
-    //                         0123456789012 3456789012
-    //                         0         1          2
-    CharReader charReader(testString);
+	const char *testString = "\\begin{book}a\\end{book}";
+	//                         0123456789012 3456789012
+	//                         0         1          2
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
-    assertData(parser, "a", 12, 13);
-    assertRangeEnd(parser, 18, 22);
-    assertEnd(parser, 23, 23);
+	assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+	assertData(parser, "a", 12, 13);
+	assertRangeEnd(parser, 18, 22);
+	assertEnd(parser, 23, 23);
 }
 
 TEST(OsmlStreamParser, beginEndNested)
 {
-    const char *testString =
-        "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}";
-    //    012345678901234 5678901234567890 123456 7890123 4567890
-    //    0         1          2         3           4          5
-    CharReader charReader(testString);
+	const char *testString =
+	    "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}";
+	//    012345678901234 5678901234567890 123456 7890123 4567890
+	//    0         1          2         3           4          5
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommandStart(parser, "a", true, Variant::mapType{}, 7, 8);
-    assertFieldStart(parser, false, 9, 10);
-    assertData(parser, "b", 10, 11);
-    assertFieldEnd(parser, 11, 12);
+	assertCommandStart(parser, "a", true, Variant::mapType{}, 7, 8);
+	assertFieldStart(parser, false, 9, 10);
+	assertData(parser, "b", 10, 11);
+	assertFieldEnd(parser, 11, 12);
 
-    assertData(parser, "c", 13, 14);
+	assertData(parser, "c", 13, 14);
 
-    assertCommandStart(parser, "d", true, Variant::mapType{}, 22, 23);
-    assertFieldStart(parser, false, 24, 25);
-    assertData(parser, "e", 25, 26);
-    assertFieldEnd(parser, 26, 27);
-    assertFieldStart(parser, false, 27, 28);
-    assertData(parser, "f", 28, 29);
-    assertFieldEnd(parser, 29, 30);
+	assertCommandStart(parser, "d", true, Variant::mapType{}, 22, 23);
+	assertFieldStart(parser, false, 24, 25);
+	assertData(parser, "e", 25, 26);
+	assertFieldEnd(parser, 26, 27);
+	assertFieldStart(parser, false, 27, 28);
+	assertData(parser, "f", 28, 29);
+	assertFieldEnd(parser, 29, 30);
 
-    assertEmptyData(parser);
-    assertCommand(parser, "g", 31, 33);
-    assertFieldStart(parser, false, 33, 34);
-    assertData(parser, "h", 34, 35);
-    assertFieldEnd(parser, 35, 36);
-    assertEmptyData(parser);
-    assertRangeEnd(parser, 42, 43);
-    assertRangeEnd(parser, 49, 50);
-    assertEnd(parser, 51, 51);
+	assertEmptyData(parser);
+	assertCommand(parser, "g", 31, 33);
+	assertFieldStart(parser, false, 33, 34);
+	assertData(parser, "h", 34, 35);
+	assertFieldEnd(parser, 35, 36);
+	assertEmptyData(parser);
+	assertRangeEnd(parser, 42, 43);
+	assertRangeEnd(parser, 49, 50);
+	assertEnd(parser, 51, 51);
 }
 
 TEST(OsmlStreamParser, beginEndWithCommand)
 {
-    const char *testString = "\\begin{book}\\a{test}\\end{book}";
-    //                         012345678901 23456789 0123456789
-    //                         0         1           2
-    CharReader charReader(testString);
+	const char *testString = "\\begin{book}\\a{test}\\end{book}";
+	//                         012345678901 23456789 0123456789
+	//                         0         1           2
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
-    assertCommand(parser, "a", 12, 14);
-    assertFieldStart(parser, false, 14, 15);
-    assertData(parser, "test", 15, 19);
-    assertFieldEnd(parser, 19, 20);
-    assertRangeEnd(parser, 25, 29);
-    assertEnd(parser, 30, 30);
+	assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+	assertCommand(parser, "a", 12, 14);
+	assertFieldStart(parser, false, 14, 15);
+	assertData(parser, "test", 15, 19);
+	assertFieldEnd(parser, 19, 20);
+	assertRangeEnd(parser, 25, 29);
+	assertEnd(parser, 30, 30);
 }
 
 TEST(OsmlStreamParser, beginEndNestedFields)
 {
-    const char *testString = "\\begin{book}a{{b{c}}}\\end{book}";
-    //                         012345678901234567890 1234567890
-    //                         0         1         2          3
-    CharReader charReader(testString);
-    OsmlStreamParser parser(charReader, logger);
-    logger.reset();
-
-    assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
-    assertData(parser, "a", 12, 13);
-    assertFieldStart(parser, false, 13, 14);
-    assertFieldStart(parser, false, 14, 15);
-    assertData(parser, "b", 15, 16);
-    assertFieldStart(parser, false, 16, 17);
-    assertData(parser, "c", 17, 18);
-    assertFieldEnd(parser, 18, 19);
-    assertFieldEnd(parser, 19, 20);
-    assertFieldEnd(parser, 20, 21);
-    assertRangeEnd(parser, 26, 30);
-    assertEnd(parser, 31, 31);
+	const char *testString = "\\begin{book}a{{b{c}}}\\end{book}";
+	//                         012345678901234567890 1234567890
+	//                         0         1         2          3
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
+	logger.reset();
+
+	assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+	assertData(parser, "a", 12, 13);
+	assertFieldStart(parser, false, 13, 14);
+	assertFieldStart(parser, false, 14, 15);
+	assertData(parser, "b", 15, 16);
+	assertFieldStart(parser, false, 16, 17);
+	assertData(parser, "c", 17, 18);
+	assertFieldEnd(parser, 18, 19);
+	assertFieldEnd(parser, 19, 20);
+	assertFieldEnd(parser, 20, 21);
+	assertRangeEnd(parser, 26, 30);
+	assertEnd(parser, 31, 31);
 }
 
 TEST(OsmlStreamParser, errorBeginEndUnbalancedNestedFields)
 {
-    const char *testString = "\\begin{book}a{{b{c}}\\end{book}";
-    //                         012345678901234567890 123456789
-    //                         0         1         2
-    CharReader charReader(testString);
-    OsmlStreamParser parser(charReader, logger);
-    logger.reset();
-
-    assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
-    assertData(parser, "a", 12, 13);
-    assertFieldStart(parser, false, 13, 14);
-    assertFieldStart(parser, false, 14, 15);
-    assertData(parser, "b", 15, 16);
-    assertFieldStart(parser, false, 16, 17);
-    assertData(parser, "c", 17, 18);
-    assertFieldEnd(parser, 18, 19);
-    assertFieldEnd(parser, 19, 20);
-    ASSERT_THROW(assertRangeEnd(parser, 25, 29), LoggableException);
+	const char *testString = "\\begin{book}a{{b{c}}\\end{book}";
+	//                         012345678901234567890 123456789
+	//                         0         1         2
+	CharReader charReader(testString);
+	OsmlStreamParser parser(charReader, logger);
+	logger.reset();
+
+	assertCommandStart(parser, "book", true, Variant::mapType{}, 7, 11);
+	assertData(parser, "a", 12, 13);
+	assertFieldStart(parser, false, 13, 14);
+	assertFieldStart(parser, false, 14, 15);
+	assertData(parser, "b", 15, 16);
+	assertFieldStart(parser, false, 16, 17);
+	assertData(parser, "c", 17, 18);
+	assertFieldEnd(parser, 18, 19);
+	assertFieldEnd(parser, 19, 20);
+	ASSERT_THROW(assertRangeEnd(parser, 25, 29), LoggableException);
 }
 
 TEST(OsmlStreamParser, errorBeginEndUnbalancedFields)
@@ -962,446 +960,480 @@ TEST(OsmlStreamParser, errorBeginEndUnbalancedFields)
 
 TEST(OsmlStreamParser, errorBeginNoBraceOpen)
 {
-    const char *testString = "\\begin a";
-    //                         01234567
-    CharReader charReader(testString);
+	const char *testString = "\\begin a";
+	//                         01234567
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    assertData(parser, "a", 7, 8);
-    ASSERT_TRUE(logger.hasError());
-    assertEnd(parser, 8, 8);
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertData(parser, "a", 7, 8);
+	ASSERT_TRUE(logger.hasError());
+	assertEnd(parser, 8, 8);
 }
 
 TEST(OsmlStreamParser, errorBeginNoIdentifier)
 {
-    const char *testString = "\\begin{!";
-    CharReader charReader(testString);
+	const char *testString = "\\begin{!";
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    ASSERT_THROW(parser.parse(), LoggableException);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(parser.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorBeginNoBraceClose)
 {
-    const char *testString = "\\begin{a";
-    CharReader charReader(testString);
+	const char *testString = "\\begin{a";
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    ASSERT_THROW(parser.parse(), LoggableException);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(parser.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorBeginNoName)
 {
-    const char *testString = "\\begin{a#}";
-    CharReader charReader(testString);
+	const char *testString = "\\begin{a#}";
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    assertCommandStart(parser, "a", true);
-    ASSERT_TRUE(logger.hasError());
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    assertEnd(parser);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommandStart(parser, "a", true);
+	ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(parser);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorEndNoBraceOpen)
 {
-    const char *testString = "\\end a";
-    //                         012345
-    CharReader charReader(testString);
+	const char *testString = "\\end a";
+	//                         012345
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    assertData(parser, "a", 5, 6);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertData(parser, "a", 5, 6);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorEndNoIdentifier)
 {
-    const char *testString = "\\end{!";
-    CharReader charReader(testString);
+	const char *testString = "\\end{!";
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    ASSERT_THROW(parser.parse(), LoggableException);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(parser.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorEndNoBraceClose)
 {
-    const char *testString = "\\end{a";
-    CharReader charReader(testString);
+	const char *testString = "\\end{a";
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    ASSERT_THROW(parser.parse(), LoggableException);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(parser.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorEndNoBegin)
 {
-    const char *testString = "\\end{a}";
-    CharReader charReader(testString);
+	const char *testString = "\\end{a}";
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    ASSERT_THROW(parser.parse(), LoggableException);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(parser.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, errorBeginEndMismatch)
 {
-    const char *testString = "\\begin{a} \\begin{b} test \\end{a}";
-    //                         0123456789 012345678901234 5678901
-    //                         0          1         2          3
-    CharReader charReader(testString);
+	const char *testString = "\\begin{a} \\begin{b} test \\end{a}";
+	//                         0123456789 012345678901234 5678901
+	//                         0          1         2          3
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    assertCommandStart(parser, "a", true, Variant::mapType{}, 7, 8);
-    assertEmptyData(parser);
-    assertCommandStart(parser, "b", true, Variant::mapType{}, 17, 18);
-    assertData(parser, "test", 20, 24);
-    ASSERT_FALSE(logger.hasError());
-    ASSERT_THROW(parser.parse(), LoggableException);
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	assertCommandStart(parser, "a", true, Variant::mapType{}, 7, 8);
+	assertEmptyData(parser);
+	assertCommandStart(parser, "b", true, Variant::mapType{}, 17, 18);
+	assertData(parser, "test", 20, 24);
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(parser.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(OsmlStreamParser, commandWithNSSep)
 {
-    const char *testString = "\\test1:test2";
-    //                         012345678901
-    CharReader charReader(testString);
+	const char *testString = "\\test1:test2";
+	//                         012345678901
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(parser, "test1:test2", 0, 12);
-    assertEnd(parser, 12, 12);
+	assertCommand(parser, "test1:test2", 0, 12);
+	assertEnd(parser, 12, 12);
 }
 
 TEST(OsmlStreamParser, beginEndWithNSSep)
 {
-    const char *testString = "\\begin{test1:test2}\\end{test1:test2}";
-    //                         0123456789012345678 90123456789012345
-    //                         0         1          2         3
-    CharReader charReader(testString);
+	const char *testString = "\\begin{test1:test2}\\end{test1:test2}";
+	//                         0123456789012345678 90123456789012345
+	//                         0         1          2         3
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommandStart(parser, "test1:test2", true, Variant::mapType{}, 7, 18);
-    assertRangeEnd(parser, 24, 35);
-    assertEnd(parser, 36, 36);
+	assertCommandStart(parser, "test1:test2", true, Variant::mapType{}, 7, 18);
+	assertRangeEnd(parser, 24, 35);
+	assertEnd(parser, 36, 36);
 }
 
 TEST(OsmlStreamParser, errorBeginNSSep)
 {
-    const char *testString = "\\begin:test{blub}\\end{blub}";
-    CharReader charReader(testString);
+	const char *testString = "\\begin:test{blub}\\end{blub}";
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    assertCommandStart(parser, "blub", true, Variant::mapType{});
-    ASSERT_TRUE(logger.hasError());
-    assertRangeEnd(parser);
-    assertEnd(parser);
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommandStart(parser, "blub", true, Variant::mapType{});
+	ASSERT_TRUE(logger.hasError());
+	assertRangeEnd(parser);
+	assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, errorEndNSSep)
 {
-    const char *testString = "\\begin{blub}\\end:test{blub}";
-    CharReader charReader(testString);
+	const char *testString = "\\begin{blub}\\end:test{blub}";
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    assertCommandStart(parser, "blub", true, Variant::mapType{});
-    ASSERT_FALSE(logger.hasError());
-    assertRangeEnd(parser);
-    ASSERT_TRUE(logger.hasError());
-    assertEnd(parser);
+	logger.reset();
+	assertCommandStart(parser, "blub", true, Variant::mapType{});
+	ASSERT_FALSE(logger.hasError());
+	assertRangeEnd(parser);
+	ASSERT_TRUE(logger.hasError());
+	assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, errorEmptyNs)
 {
-    const char *testString = "\\test:";
-    CharReader charReader(testString);
+	const char *testString = "\\test:";
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    assertCommand(parser, "test");
-    ASSERT_TRUE(logger.hasError());
-    assertData(parser, ":");
-    assertEnd(parser);
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommand(parser, "test");
+	ASSERT_TRUE(logger.hasError());
+	assertData(parser, ":");
+	assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, errorRepeatedNs)
 {
-    const char *testString = "\\test::";
-    CharReader charReader(testString);
+	const char *testString = "\\test::";
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    assertCommand(parser, "test");
-    ASSERT_TRUE(logger.hasError());
-    assertData(parser, "::");
-    assertEnd(parser);
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommand(parser, "test");
+	ASSERT_TRUE(logger.hasError());
+	assertData(parser, "::");
+	assertEnd(parser);
 }
 
 TEST(OsmlStreamParser, explicitDefaultField)
 {
-    const char *testString = "\\a{!b}c";
-    //                         01234567
-    CharReader charReader(testString);
+	const char *testString = "\\a{!b}c";
+	//                         01234567
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(parser, "a", 0, 2);
-    assertFieldStart(parser, true, 2, 4);
-    assertData(parser, "b", 4, 5);
-    assertFieldEnd(parser, 5, 6);
-    assertData(parser, "c", 6, 7);
-    assertEnd(parser, 7, 7);
+	assertCommand(parser, "a", 0, 2);
+	assertFieldStart(parser, true, 2, 4);
+	assertData(parser, "b", 4, 5);
+	assertFieldEnd(parser, 5, 6);
+	assertData(parser, "c", 6, 7);
+	assertEnd(parser, 7, 7);
 }
 
 TEST(OsmlStreamParser, explicitDefaultFieldWithCommand)
 {
-    const char *testString = "\\a{!\\b}c";
-    //                         0123 4567
-    CharReader charReader(testString);
+	const char *testString = "\\a{!\\b}c";
+	//                         0123 4567
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertCommand(parser, "a", 0, 2);
-    assertFieldStart(parser, true, 2, 4);
-    assertCommand(parser, "b", 4, 6);
-    assertFieldEnd(parser, 6, 7);
-    assertData(parser, "c", 7, 8);
-    assertEnd(parser, 8, 8);
+	assertCommand(parser, "a", 0, 2);
+	assertFieldStart(parser, true, 2, 4);
+	assertCommand(parser, "b", 4, 6);
+	assertFieldEnd(parser, 6, 7);
+	assertData(parser, "c", 7, 8);
+	assertEnd(parser, 8, 8);
 }
 
 TEST(OsmlStreamParser, fieldAfterExplicitDefaultField)
 {
-    const char *testString = "\\a{!\\b}{c}";
-    //                         0123 456789
-    CharReader charReader(testString);
+	const char *testString = "\\a{!\\b}{c}";
+	//                         0123 456789
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    assertCommand(parser, "a", 0, 2);
-    assertFieldStart(parser, true, 2, 4);
-    assertCommand(parser, "b", 4, 6);
-    assertFieldEnd(parser, 6, 7);
-    assertFieldStart(parser, false, 7, 8);
-    assertData(parser, "c", 8, 9);
-    assertFieldEnd(parser, 9, 10);
-    assertEnd(parser, 10, 10);
+	logger.reset();
+	assertCommand(parser, "a", 0, 2);
+	assertFieldStart(parser, true, 2, 4);
+	assertCommand(parser, "b", 4, 6);
+	assertFieldEnd(parser, 6, 7);
+	assertFieldStart(parser, false, 7, 8);
+	assertData(parser, "c", 8, 9);
+	assertFieldEnd(parser, 9, 10);
+	assertEnd(parser, 10, 10);
 }
 
 TEST(OsmlStreamParser, annotationStart)
 {
-    const char *testString = "<\\a";
-    //                        0 12
+	const char *testString = "<\\a";
+	//                        0 12
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationStart(parser, "a", Variant::mapType{}, 0, 3);
-    assertEnd(parser, 3, 3);
+	assertAnnotationStart(parser, "a", Variant::mapType{}, 0, 3);
+	assertEnd(parser, 3, 3);
 }
 
 TEST(OsmlStreamParser, annotationStartWithName)
 {
-    const char *testString = "<\\annotationWithName#aName";
-    //                        0 1234567890123456789012345
-    //                        0          1         2
+	const char *testString = "<\\annotationWithName#aName";
+	//                        0 1234567890123456789012345
+	//                        0          1         2
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationStart(parser, "annotationWithName",
-                          Variant::mapType{{"name", "aName"}}, 0, 20);
-    assertEnd(parser, 26, 26);
+	assertAnnotationStart(parser, "annotationWithName",
+	                      Variant::mapType{{"name", "aName"}}, 0, 20);
+	assertEnd(parser, 26, 26);
 }
 
 TEST(OsmlStreamParser, annotationStartWithArguments)
 {
-    const char *testString = "<\\annotationWithName#aName[a=1,b=2]";
-    //                        0 1234567890123456789012345678901234
-    //                        0          1         2         3
+	const char *testString = "<\\annotationWithName#aName[a=1,b=2]";
+	//                        0 1234567890123456789012345678901234
+	//                        0          1         2         3
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationStart(
-        parser, "annotationWithName",
-        Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20);
-    assertEnd(parser, 35, 35);
+	assertAnnotationStart(
+	    parser, "annotationWithName",
+	    Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20);
+	assertEnd(parser, 35, 35);
 }
 
 TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd)
 {
-    const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>";
-    //                        0 123456789012345678901234567 89012345 67
-    //                        0          1         2          3
+	const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>";
+	//                        0 123456789012345678901234567 89012345 67
+	//                        0          1         2          3
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationStart(
-        parser, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
-        10);
-    ASSERT_TRUE(parser.inRangeCommand());
-    assertData(parser, "a", 26, 27);
-    assertRangeEnd(parser, 33, 35);
-    assertAnnotationEnd(parser, "", "", 36, 38);
-    assertEnd(parser, 38, 38);
+	assertAnnotationStart(
+	    parser, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
+	    10);
+	ASSERT_TRUE(parser.inRangeCommand());
+	assertData(parser, "a", 26, 27);
+	assertRangeEnd(parser, 33, 35);
+	assertAnnotationEnd(parser, "", "", 36, 38);
+	assertEnd(parser, 38, 38);
 }
 
 TEST(OsmlStreamParser, annotationEnd)
 {
-    const char *testString = "\\a>";
-    //                         012
+	const char *testString = "\\a>";
+	//                         012
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationEnd(parser, "a", "", 0, 2);
-    assertEnd(parser, 3, 3);
+	assertAnnotationEnd(parser, "a", "", 0, 2);
+	assertEnd(parser, 3, 3);
 }
 
 TEST(OsmlStreamParser, annotationEndWithName)
 {
-    const char *testString = "\\a#name>";
-    //                         01234567
+	const char *testString = "\\a#name>";
+	//                         01234567
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationEnd(parser, "a", "name", 0, 2);
-    assertEnd(parser, 8, 8);
+	assertAnnotationEnd(parser, "a", "name", 0, 2);
+	assertEnd(parser, 8, 8);
 }
 
 TEST(OsmlStreamParser, annotationEndWithNameAsArgs)
 {
-    const char *testString = "\\a[name=name]>";
-    //                         01234567890123
+	const char *testString = "\\a[name=name]>";
+	//                         01234567890123
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationEnd(parser, "a", "name", 0, 2);
-    assertEnd(parser, 14, 14);
+	assertAnnotationEnd(parser, "a", "name", 0, 2);
+	assertEnd(parser, 14, 14);
 }
 
 TEST(OsmlStreamParser, errorAnnotationEndWithArguments)
 {
-    const char *testString = "\\a[foo=bar]>";
-    //                         012345678901
-    //                         0         1
+	const char *testString = "\\a[foo=bar]>";
+	//                         012345678901
+	//                         0         1
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    logger.reset();
-    ASSERT_FALSE(logger.hasError());
-    assertCommandStart(parser, "a", false, Variant::mapType{{"foo", "bar"}}, 0, 2);
-    ASSERT_TRUE(logger.hasError());
-    assertData(parser, ">", 11, 12);
-    assertEnd(parser, 12, 12);
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommandStart(parser, "a", false, Variant::mapType{{"foo", "bar"}}, 0,
+	                   2);
+	ASSERT_TRUE(logger.hasError());
+	assertData(parser, ">", 11, 12);
+	assertEnd(parser, 12, 12);
 }
 
 TEST(OsmlStreamParser, closingAnnotation)
 {
-    const char *testString = "<\\a>";
-    //                        0 123
+	const char *testString = "<\\a>";
+	//                        0 123
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertAnnotationStart(parser, "a", Variant::mapType{}, 0, 3);
-    assertData(parser, ">", 3, 4);
-    assertEnd(parser, 4, 4);
+	assertAnnotationStart(parser, "a", Variant::mapType{}, 0, 3);
+	assertData(parser, ">", 3, 4);
+	assertEnd(parser, 4, 4);
 }
 
 TEST(OsmlStreamParser, annotationWithFields)
 {
-    const char *testString = "a <\\b{c}{d}{!e} f \\> g";
-    //                        012 345678901234567 8901
-    //                        0          1          2
+	const char *testString = "a <\\b{c}{d}{!e} f \\> g";
+	//                        012 345678901234567 8901
+	//                        0          1          2
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertData(parser, "a", 0, 1);
-    assertAnnotationStart(parser, "b", Variant::mapType{}, 2, 5);
-    assertFieldStart(parser, false, 5, 6);
-    assertData(parser, "c", 6, 7);
-    assertFieldEnd(parser, 7, 8);
-    assertFieldStart(parser, false, 8, 9);
-    assertData(parser, "d", 9, 10);
-    assertFieldEnd(parser, 10, 11);
-    assertFieldStart(parser, true, 11, 13);
-    assertData(parser, "e", 13, 14);
-    assertFieldEnd(parser, 14, 15);
-    assertData(parser, "f", 16, 17);
-    assertAnnotationEnd(parser, "", "", 18, 20);
-    assertData(parser, "g", 21, 22);
-    assertEnd(parser, 22, 22);
+	assertData(parser, "a", 0, 1);
+	assertAnnotationStart(parser, "b", Variant::mapType{}, 2, 5);
+	assertFieldStart(parser, false, 5, 6);
+	assertData(parser, "c", 6, 7);
+	assertFieldEnd(parser, 7, 8);
+	assertFieldStart(parser, false, 8, 9);
+	assertData(parser, "d", 9, 10);
+	assertFieldEnd(parser, 10, 11);
+	assertFieldStart(parser, true, 11, 13);
+	assertData(parser, "e", 13, 14);
+	assertFieldEnd(parser, 14, 15);
+	assertData(parser, "f", 16, 17);
+	assertAnnotationEnd(parser, "", "", 18, 20);
+	assertData(parser, "g", 21, 22);
+	assertEnd(parser, 22, 22);
 }
 
 TEST(OsmlStreamParser, annotationStartEscape)
 {
-    const char *testString = "<\\%test";
-    //                        0 123456
-    //                        0
+	const char *testString = "<\\%test";
+	//                        0 123456
+	//                        0
 
-    CharReader charReader(testString);
+	CharReader charReader(testString);
 
-    OsmlStreamParser parser(charReader, logger);
+	OsmlStreamParser parser(charReader, logger);
 
-    assertData(parser, "<%test", 0, 7);
-    assertEnd(parser, 7, 7);
+	assertData(parser, "<%test", 0, 7);
+	assertEnd(parser, 7, 7);
 }
 
+TEST(OsmlStreamParser, userDefinedTokens)
+{
+	const char *testString = "<<My dear fellows>>, the *old man* said.";
+	//                        0123456789012345678901234567890123456789
+	//                        0         1         2         3
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser parser(charReader, logger);
+
+	TokenId tSpeechStart = parser.registerToken("<<");
+	TokenId tSpeechEnd = parser.registerToken(">>");
+	TokenId tStar = parser.registerToken("*");
+
+	ASSERT_TRUE(tSpeechStart != Tokens::Empty);
+	ASSERT_TRUE(tSpeechEnd != Tokens::Empty);
+	ASSERT_TRUE(tStar != Tokens::Empty);
+
+	TokenSet tokens{tSpeechStart, tSpeechEnd, tStar};
+
+	ASSERT_EQ(OsmlStreamParser::State::DATA, parser.parse());
+	TokenizedDataReader reader = parser.getData().reader();
+
+	assertToken(reader, tSpeechStart, "<<", tokens, WhitespaceMode::PRESERVE, 0, 2);
+	assertText(reader, "My dear fellows", tokens, WhitespaceMode::PRESERVE, 2, 17);
+	assertToken(reader, tSpeechEnd, ">>", tokens, WhitespaceMode::PRESERVE, 17, 19);
+	assertText(reader, ", the ", tokens, WhitespaceMode::PRESERVE, 19, 25);
+	assertToken(reader, tStar, "*", tokens, WhitespaceMode::PRESERVE, 25, 26);
+	assertText(reader, "old man", tokens, WhitespaceMode::PRESERVE, 26, 33);
+	assertToken(reader, tStar, "*", tokens, WhitespaceMode::PRESERVE, 33, 34);
+	assertText(reader, " said.", tokens, WhitespaceMode::PRESERVE, 34, 40);
+	assertEnd(reader);
+}
 }
 
-- 
cgit v1.2.3


From e2fd79ac8c85ac6191f6ed895fa5cdff091f7551 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 1 Mar 2015 16:28:35 +0100
Subject: Implemented TokenRegistry class and corresponding simple test case

---
 CMakeLists.txt                               |  2 +
 src/core/parser/stack/TokenRegistry.cpp      | 72 +++++++++++++++++++++++
 src/core/parser/stack/TokenRegistry.hpp      | 87 ++++++++++++++++++++++++++++
 test/core/parser/stack/TokenRegistryTest.cpp | 78 +++++++++++++++++++++++++
 4 files changed, 239 insertions(+)
 create mode 100644 src/core/parser/stack/TokenRegistry.cpp
 create mode 100644 src/core/parser/stack/TokenRegistry.hpp
 create mode 100644 test/core/parser/stack/TokenRegistryTest.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4e2d7f7..6e021fd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -188,6 +188,7 @@ ADD_LIBRARY(ousia_core
 #	src/core/parser/stack/ImportIncludeHandler
 	src/core/parser/stack/State
 #	src/core/parser/stack/Stack
+	src/core/parser/stack/TokenRegistry
 #	src/core/parser/stack/TypesystemHandler
 	src/core/parser/utils/SourceOffsetVector
 	src/core/parser/utils/TokenizedData
@@ -325,6 +326,7 @@ IF(TEST)
 		test/core/parser/ParserScopeTest
 #		test/core/parser/stack/StackTest
 		test/core/parser/stack/StateTest
+		test/core/parser/stack/TokenRegistryTest
 		test/core/parser/utils/SourceOffsetVectorTest
 		test/core/parser/utils/TokenizedDataTest
 		test/core/parser/utils/TokenizerTest
diff --git a/src/core/parser/stack/TokenRegistry.cpp b/src/core/parser/stack/TokenRegistry.cpp
new file mode 100644
index 0000000..21ae109
--- /dev/null
+++ b/src/core/parser/stack/TokenRegistry.cpp
@@ -0,0 +1,72 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "TokenRegistry.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+TokenId TokenRegistry::registerToken(const std::string &token)
+{
+	// Check whether the given token is already registered
+	auto it = tokens.find(token);
+	if (it != tokens.end()) {
+		// Increment the reference count
+		size_t &refCount = it->second.second;
+		refCount++;
+
+		// Return the token id
+		return it->second.first;
+	}
+
+	// Register the token in the parser
+	TokenId id = parser.registerToken(token);
+	tokens[token] = std::pair<TokenId, size_t>(id, 1);
+	tokenIds[id] = token;
+	return id;
+}
+
+void TokenRegistry::unregisterToken(TokenId id)
+{
+	// Lookup the token corresponding to the given token id
+	auto tokenIt = tokenIds.find(id);
+	if (tokenIt != tokenIds.end()) {
+		const std::string &token = tokenIt->second;
+		// Lookup the reference count for the corresponding token
+		auto idIt = tokens.find(token);
+		if (idIt != tokens.end()) {
+			// Decrement the reference count, abort if the refCount is larger
+			// than zero
+			size_t &refCount = idIt->second.second;
+			refCount--;
+			if (refCount > 0) {
+				return;
+			}
+
+			// Unregister the token from the parser
+			parser.unregisterToken(id);
+
+			// Unregister the token from the internal tokens map
+			tokens.erase(token);
+		}
+		// Unregister the token from the internal id map
+		tokenIds.erase(id);
+	}
+}
+}
+}
diff --git a/src/core/parser/stack/TokenRegistry.hpp b/src/core/parser/stack/TokenRegistry.hpp
new file mode 100644
index 0000000..21c36b5
--- /dev/null
+++ b/src/core/parser/stack/TokenRegistry.hpp
@@ -0,0 +1,87 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file TokenRegistry.hpp
+ *
+ * Contains the TokenRegistry class used for registering all possible tokens
+ * during the parsing process.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_PARSER_STACK_TOKEN_REGISTRY_HPP_
+#define _OUSIA_PARSER_STACK_TOKEN_REGISTRY_HPP_
+
+#include <string>
+#include <unordered_map>
+
+#include "Callbacks.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+/**
+ * The TokenRegistry class is used for registering all possible tokens during
+ * the Parsing process. The TokenRegistry class acts as an adapter between the
+ * parser which allocates TokenId for each unique token and the Handler classes
+ * which may register tokens multiple times and expect the same TokenId to be
+ * returned for the same token.
+ */
+class TokenRegistry : public ParserCallbacks {
+private:
+	/**
+	 * Reference at the ParserCallback instance the tokens are relayed to.
+	 */
+	ParserCallbacks &parser;
+
+	/**
+	 * Store containing all TokenId instances for all registered tokens. The map
+	 * maps from the token strings to the corresponding TokenId and a reference
+	 * count.
+	 */
+	std::unordered_map<std::string, std::pair<TokenId, size_t>> tokens;
+
+	/**
+	 * Reverse map containing the string corresponding to a TokenId.
+	 */
+	std::unordered_map<TokenId, std::string> tokenIds;
+
+public:
+	/**
+	 * Constructor of the TokenRegistry class.
+	 *
+	 * @param parser is the underlying parser implementing the ParserCallbacks
+	 * interface to which all calls are relayed.
+	 */
+	TokenRegistry(ParserCallbacks &parser) : parser(parser) {}
+
+	/* No copy construction */
+	TokenRegistry(const TokenRegistry &) = delete;
+
+	/* No assignment */
+	TokenRegistry &operator=(const TokenRegistry &) = delete;
+
+	TokenId registerToken(const std::string &token) override;
+	void unregisterToken(TokenId id) override;
+};
+}
+}
+
+#endif /* _OUSIA_PARSER_STACK_TOKEN_REGISTRY_HPP_ */
+
diff --git a/test/core/parser/stack/TokenRegistryTest.cpp b/test/core/parser/stack/TokenRegistryTest.cpp
new file mode 100644
index 0000000..390851e
--- /dev/null
+++ b/test/core/parser/stack/TokenRegistryTest.cpp
@@ -0,0 +1,78 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/parser/stack/TokenRegistry.hpp>
+
+namespace ousia {
+namespace parser_stack {
+
+class ParserCallbacksProxy : public ParserCallbacks {
+public:
+	size_t registerTokenCount = 0;
+	size_t unregisterTokenCount = 0;
+
+	TokenId registerToken(const std::string &token) override
+	{
+		registerTokenCount++;
+		return registerTokenCount;
+	}
+
+	void unregisterToken(TokenId id) override { unregisterTokenCount++; }
+};
+
+TEST(TokenRegistry, simple)
+{
+	ParserCallbacksProxy parser;
+	TokenRegistry registry(parser);
+
+	ASSERT_EQ(0U, parser.registerTokenCount);
+	ASSERT_EQ(0U, parser.unregisterTokenCount);
+
+	ASSERT_EQ(1U, registry.registerToken("test"));
+	ASSERT_EQ(1U, registry.registerToken("test"));
+	ASSERT_EQ(2U, registry.registerToken("test2"));
+	ASSERT_EQ(2U, registry.registerToken("test2"));
+	ASSERT_EQ(2U, parser.registerTokenCount);
+	ASSERT_EQ(0U, parser.unregisterTokenCount);
+
+	registry.unregisterToken(1);
+	ASSERT_EQ(2U, parser.registerTokenCount);
+	ASSERT_EQ(0U, parser.unregisterTokenCount);
+
+	registry.unregisterToken(1);
+	ASSERT_EQ(2U, parser.registerTokenCount);
+	ASSERT_EQ(1U, parser.unregisterTokenCount);
+
+	registry.unregisterToken(1);
+	ASSERT_EQ(2U, parser.registerTokenCount);
+	ASSERT_EQ(1U, parser.unregisterTokenCount);
+
+	registry.unregisterToken(2);
+	ASSERT_EQ(2U, parser.registerTokenCount);
+	ASSERT_EQ(1U, parser.unregisterTokenCount);
+
+	registry.unregisterToken(2);
+	ASSERT_EQ(2U, parser.registerTokenCount);
+	ASSERT_EQ(2U, parser.unregisterTokenCount);
+}
+
+}
+}
+
-- 
cgit v1.2.3


From 3bdc30e0798d6b356782da430e93b72b4303e963 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:32:34 +0100
Subject: Adapted OsxmlParser to new Stack interface, enabled Osxml code in
 CMakeLists again

---
 CMakeLists.txt                              | 22 ++++++++--------
 src/formats/osxml/OsxmlEventParser.cpp      |  6 ++---
 src/formats/osxml/OsxmlEventParser.hpp      | 11 +++-----
 src/formats/osxml/OsxmlParser.cpp           | 13 ++++-----
 test/formats/osxml/OsxmlEventParserTest.cpp | 41 ++++++++++++++++-------------
 5 files changed, 48 insertions(+), 45 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6e021fd..2a09b54 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -225,7 +225,7 @@ TARGET_LINK_LIBRARIES(ousia_osml
 ADD_LIBRARY(ousia_osxml
 	src/formats/osxml/OsxmlAttributeLocator
 	src/formats/osxml/OsxmlEventParser
-#	src/formats/osxml/OsxmlParser
+	src/formats/osxml/OsxmlParser
 )
 
 TARGET_LINK_LIBRARIES(ousia_osxml
@@ -397,17 +397,17 @@ IF(TEST)
 		ousia_filesystem
 	)
 
-#	ADD_EXECUTABLE(ousia_test_osxml
-#		test/formats/osxml/OsxmlEventParserTest
+	ADD_EXECUTABLE(ousia_test_osxml
+		test/formats/osxml/OsxmlEventParserTest
 #		test/formats/osxml/OsxmlParserTest
-#	)
+	)
 
-#	TARGET_LINK_LIBRARIES(ousia_test_osxml
-#		${GTEST_LIBRARIES}
-#		ousia_core
-#		ousia_osxml
-#		ousia_filesystem
-#	)
+	TARGET_LINK_LIBRARIES(ousia_test_osxml
+		${GTEST_LIBRARIES}
+		ousia_core
+		ousia_osxml
+		ousia_filesystem
+	)
 
 	ADD_EXECUTABLE(ousia_test_xml
 		test/plugins/xml/XmlOutputTest
@@ -426,7 +426,7 @@ IF(TEST)
 	ADD_TEST(ousia_test_html ousia_test_html)
 #	ADD_TEST(ousia_test_mozjs ousia_test_mozjs)
 	ADD_TEST(ousia_test_osml ousia_test_osml)
-#	ADD_TEST(ousia_test_osxml ousia_test_osxml)
+	ADD_TEST(ousia_test_osxml ousia_test_osxml)
 	ADD_TEST(ousia_test_xml ousia_test_xml)
 ENDIF()
 
diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
index 855f80d..83c16f0 100644
--- a/src/formats/osxml/OsxmlEventParser.cpp
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -323,7 +323,7 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 		// Just issue a "commandStart" event in any other case
 		Variant nameVar = Variant::fromString(nameStr);
 		nameVar.setLocation(nameLoc);
-		parser->getEvents().command(nameVar, args);
+		parser->getEvents().commandStart(nameVar, args);
 	}
 }
 
@@ -358,8 +358,8 @@ static void xmlEndElementHandler(void *ref, const XML_Char *name)
 		return;
 	}
 
-	// Issue the "fieldEnd" event
-	parser->getEvents().fieldEnd();
+	// Issue the "rangeEnd" event
+	parser->getEvents().rangeEnd();
 }
 
 static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len)
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
index e3fd5d4..7a8c96d 100644
--- a/src/formats/osxml/OsxmlEventParser.hpp
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -59,7 +59,8 @@ public:
 	 * @param args is a map containing the arguments that were given to the
 	 * command.
 	 */
-	virtual void command(const Variant &name, const Variant::mapType &args) = 0;
+	virtual void commandStart(const Variant &name,
+	                          const Variant::mapType &args) = 0;
 
 	/**
 	 * Called whenever an annotation starts. Note that this implicitly always
@@ -88,13 +89,9 @@ public:
 	                           const Variant &elementName) = 0;
 
 	/**
-	 * Called whenever the default field which was implicitly started by
-	 * commandStart or annotationStart ends. Note that this does not end the
-	 * range of an annotation, but the default field of the annotation. To
-	 * signal the end of the annotation this, the annotationEnd method will be
-	 * invoked.
+	 * Called whenever the command or annotation tags end.
 	 */
-	virtual void fieldEnd() = 0;
+	virtual void rangeEnd() = 0;
 
 	/**
 	 * Called whenever string data is found.
diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp
index c216855..924d11b 100644
--- a/src/formats/osxml/OsxmlParser.cpp
+++ b/src/formats/osxml/OsxmlParser.cpp
@@ -16,6 +16,8 @@
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
+#include <core/common/Variant.hpp>
+#include <core/common/CharReader.hpp>
 #include <core/parser/stack/GenericParserStates.hpp>
 #include <core/parser/stack/Stack.hpp>
 #include <core/parser/ParserContext.hpp>
@@ -63,17 +65,16 @@ public:
 	 */
 	void parse() { parser.parse(); }
 
-	void command(const Variant &name, const Variant::mapType &args) override
+	void commandStart(const Variant &name,
+	                  const Variant::mapType &args) override
 	{
-		stack.command(name, args);
-		stack.fieldStart(true);
+		stack.commandStart(name, args, true);
 	}
 
 	void annotationStart(const Variant &name,
 	                     const Variant::mapType &args) override
 	{
-		stack.annotationStart(name, args);
-		stack.fieldStart(true);
+		stack.annotationStart(name, args, true);
 	}
 
 	void annotationEnd(const Variant &className,
@@ -82,7 +83,7 @@ public:
 		stack.annotationEnd(className, elementName);
 	}
 
-	void fieldEnd() override { stack.fieldEnd(); }
+	void rangeEnd() override { stack.rangeEnd(); }
 
 	void data(const Variant &data) override { stack.data(data); }
 };
diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp
index 6942166..b24a43d 100644
--- a/test/formats/osxml/OsxmlEventParserTest.cpp
+++ b/test/formats/osxml/OsxmlEventParserTest.cpp
@@ -32,10 +32,10 @@ static TerminalLogger logger(std::cerr, true);
 
 namespace {
 enum class OsxmlEvent {
-	COMMAND,
+	COMMAND_START,
 	ANNOTATION_START,
 	ANNOTATION_END,
-	FIELD_END,
+	RANGE_END,
 	DATA
 };
 
@@ -43,9 +43,10 @@ class TestOsxmlEventListener : public OsxmlEvents {
 public:
 	std::vector<std::pair<OsxmlEvent, Variant>> events;
 
-	void command(const Variant &name, const Variant::mapType &args) override
+	void commandStart(const Variant &name,
+	                  const Variant::mapType &args) override
 	{
-		events.emplace_back(OsxmlEvent::COMMAND,
+		events.emplace_back(OsxmlEvent::COMMAND_START,
 		                    Variant::arrayType{name, args});
 	}
 
@@ -63,9 +64,9 @@ public:
 		                    Variant::arrayType{className, elementName});
 	}
 
-	void fieldEnd() override
+	void rangeEnd() override
 	{
-		events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{});
+		events.emplace_back(OsxmlEvent::RANGE_END, Variant::arrayType{});
 	}
 
 	void data(const Variant &data) override
@@ -92,11 +93,11 @@ TEST(OsxmlEventParser, simpleCommandWithArgs)
 	//                        0          1            2            3
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND,
+	    {OsxmlEvent::COMMAND_START,
 	     Variant::arrayType{
 	         "a", Variant::mapType{
 	                  {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}},
-	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+	    {OsxmlEvent::RANGE_END, Variant::arrayType{}}};
 
 	auto events = parseXml(testString);
 	ASSERT_EQ(expectedEvents, events);
@@ -132,10 +133,12 @@ TEST(OsxmlEventParser, magicTopLevelTag)
 	const char *testString = "<ousia><a/><b/></ousia>";
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}},
-	    {OsxmlEvent::FIELD_END, Variant::arrayType{}},
-	    {OsxmlEvent::COMMAND, Variant::arrayType{{"b", Variant::mapType{}}}},
-	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"a", Variant::mapType{}}}},
+	    {OsxmlEvent::RANGE_END, Variant::arrayType{}},
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"b", Variant::mapType{}}}},
+	    {OsxmlEvent::RANGE_END, Variant::arrayType{}}};
 
 	auto events = parseXml(testString);
 	ASSERT_EQ(expectedEvents, events);
@@ -146,11 +149,12 @@ TEST(OsxmlEventParser, magicTopLevelTagInside)
 	const char *testString = "<a><ousia/></a>";
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}},
-	    {OsxmlEvent::COMMAND,
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"a", Variant::mapType{}}}},
+	    {OsxmlEvent::COMMAND_START,
 	     Variant::arrayType{{"ousia", Variant::mapType{}}}},
-	    {OsxmlEvent::FIELD_END, Variant::arrayType{}},
-	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+	    {OsxmlEvent::RANGE_END, Variant::arrayType{}},
+	    {OsxmlEvent::RANGE_END, Variant::arrayType{}}};
 
 	auto events = parseXml(testString);
 	ASSERT_EQ(expectedEvents, events);
@@ -163,9 +167,10 @@ TEST(OsxmlEventParser, commandWithData)
 	//                        0         1          2
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{"a", Variant::mapType{}}},
 	    {OsxmlEvent::DATA, Variant::arrayType{"  hello  \n world "}},
-	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+	    {OsxmlEvent::RANGE_END, Variant::arrayType{}}};
 
 	auto events = parseXml(testString);
 	ASSERT_EQ(expectedEvents, events);
-- 
cgit v1.2.3


From 8197dc488926e8645efb47e60d0988a6a65fc15f Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:33:32 +0100
Subject: Adapted OsmlParser to new Stack interface, reenabled OsmlParser code
 in CMakeLists

---
 CMakeLists.txt                        |  2 +-
 src/formats/osml/OsmlParser.cpp       | 28 +++++++++++++---------------
 src/formats/osml/OsmlStreamParser.cpp | 10 +++++-----
 3 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2a09b54..f6a7257 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -214,7 +214,7 @@ ADD_LIBRARY(ousia_core
 #)
 
 ADD_LIBRARY(ousia_osml
-#	src/formats/osml/OsmlParser
+	src/formats/osml/OsmlParser
 	src/formats/osml/OsmlStreamParser
 )
 
diff --git a/src/formats/osml/OsmlParser.cpp b/src/formats/osml/OsmlParser.cpp
index a24f091..c25974f 100644
--- a/src/formats/osml/OsmlParser.cpp
+++ b/src/formats/osml/OsmlParser.cpp
@@ -88,7 +88,7 @@ public:
 			OsmlStreamParser::State state = parser.parse();
 			logger.setDefaultLocation(parser.getLocation());
 			switch (state) {
-				case OsmlStreamParser::State::COMMAND: {
+				case OsmlStreamParser::State::COMMAND_START: {
 					// Implicitly create a "document" element if the first
 					// command is not any other top-level command
 					if (needsDocument) {
@@ -96,23 +96,23 @@ public:
 						    parser.getCommandName().asString();
 						if (cmd != "typesystem" && cmd != "document" &&
 						    cmd != "domain") {
-							stack.command("document", Variant::mapType{});
+							stack.commandStart("document", Variant::mapType{},
+							                   false);
 						}
 						needsDocument = false;
 					}
-					stack.command(parser.getCommandName(),
-					              parser.getCommandArguments().asMap());
+					stack.commandStart(parser.getCommandName(),
+					                   parser.getCommandArguments().asMap(),
+					                   parser.inRangeCommand());
 					break;
 				}
-				case OsmlStreamParser::State::DATA:
-					stack.data(parser.getData());
-					break;
-				case OsmlStreamParser::State::ENTITY:
-					// TODO
+				case OsmlStreamParser::State::RANGE_END:
+					stack.rangeEnd();
 					break;
 				case OsmlStreamParser::State::ANNOTATION_START:
 					stack.annotationStart(parser.getCommandName(),
-					                      parser.getCommandArguments().asMap());
+					                      parser.getCommandArguments().asMap(),
+					                      parser.inRangeCommand());
 					break;
 				case OsmlStreamParser::State::ANNOTATION_END: {
 					Variant elementName = Variant::fromString(std::string{});
@@ -130,11 +130,9 @@ public:
 				case OsmlStreamParser::State::FIELD_END:
 					stack.fieldEnd();
 					break;
-				case OsmlStreamParser::State::NONE:
-				case OsmlStreamParser::State::ERROR:
-					// Internally used in OsmlStreamParser, these states should
-					// never occur. Just contiunue.
-					continue;
+				case OsmlStreamParser::State::DATA:
+					stack.data(parser.getData());
+					break;
 				case OsmlStreamParser::State::END:
 					return;
 			}
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 823075a..64a489d 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -409,7 +409,7 @@ public:
 	State parse();
 
 	TokenId registerToken(const std::string &token);
-	void unregisterToken(TokenId token);
+	void unregisterToken(TokenId id);
 
 	const TokenizedData &getData() const { return data; }
 	const Variant &getCommandName() const { return cmd().getName(); }
@@ -928,9 +928,9 @@ TokenId OsmlStreamParserImpl::registerToken(const std::string &token)
 	return tokenizer.registerToken(token, false);
 }
 
-void OsmlStreamParserImpl::unregisterToken(TokenId token)
+void OsmlStreamParserImpl::unregisterToken(TokenId id)
 {
-	assert(tokenizer.unregisterToken(token));
+	assert(tokenizer.unregisterToken(id));
 }
 
 /* Class OsmlStreamParser */
@@ -979,8 +979,8 @@ TokenId OsmlStreamParser::registerToken(const std::string &token)
 	return impl->registerToken(token);
 }
 
-void OsmlStreamParser::unregisterToken(TokenId token)
+void OsmlStreamParser::unregisterToken(TokenId id)
 {
-	impl->unregisterToken(token);
+	impl->unregisterToken(id);
 }
 }
-- 
cgit v1.2.3


From 596fdab71b8bd116e20e33647d68f1d7a567696e Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:34:15 +0100
Subject: Wrote isUserDefinedToken function which checks whether a token is a
 valid user defined token and added unit tests

---
 src/core/common/Utils.cpp      | 24 ++++++++++++++++++++++++
 src/core/common/Utils.hpp      | 19 +++++++++++++++++++
 test/core/common/UtilsTest.cpp | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 85d2c28..219b437 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -118,5 +118,29 @@ bool Utils::endsWith(const std::string &s, const std::string &suffix)
 	return suffix.size() <= s.size() &&
 	       s.substr(s.size() - suffix.size(), suffix.size()) == suffix;
 }
+
+bool Utils::isUserDefinedToken(const std::string &token)
+{
+	// Make sure the token meets is neither empty, nor starts or ends with an
+	// alphanumeric character
+	const size_t len = token.size();
+	if (len == 0 || isAlphanumeric(token[0]) || isAlphanumeric(token[len - 1])) {
+		return false;
+	}
+
+	// Make sure the token is not any special OSML token
+	if (token == "\\" || token == "%" || token == "%{" || token == "}%" ||
+	    token == "{!" || token == "<\\" || token == "\\>") {
+		return false;
+	}
+
+	// Make sure the token contains other characters but { and }
+	for (char c: token) {
+		if (c != '{' && c != '}') {
+			return true;
+		}
+	}
+	return false;
+}
 }
 
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 82a8f8c..25a4de5 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -102,6 +102,25 @@ public:
 	 */
 	static bool isNamespacedIdentifier(const std::string &name);
 
+	/**
+	 * Returns true if the given characters form a valid user-defined token.
+	 * This function returns true under the following circumstances:
+	 * <ul>
+	 *   <li>The given token is not empty</li>
+	 *   <li>The given token starts and ends with a non-alphanumeric character
+	 *       </li>
+	 *   <li>The token is none of the following character sequences (which are
+	 *       special in OSML):
+	 *      <ul>
+	 *        <li>'{', '}' or any combined repetition of these characters</li>
+	 *        <li>'\', '{!', '<\', '\>'</li>
+	 *        <li>'%', '%{', '}%'</li>
+	 *      </ul>
+	 *   </li>
+	 * </ul>
+	 */
+	static bool isUserDefinedToken(const std::string &token);
+
 	/**
 	 * Returns true if the given character is a linebreak character.
 	 */
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 4bf1587..54890ee 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -131,4 +131,33 @@ TEST(Utils, collapse)
 	ASSERT_EQ("long test", Utils::collapse("     long    test   "));
 }
 
-}
\ No newline at end of file
+TEST(Utils, isUserDefinedToken)
+{
+	EXPECT_FALSE(Utils::isUserDefinedToken(""));
+	EXPECT_FALSE(Utils::isUserDefinedToken("a"));
+	EXPECT_TRUE(Utils::isUserDefinedToken(":"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("::"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("!?"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("."));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<<"));
+	EXPECT_TRUE(Utils::isUserDefinedToken(">>"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("''"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("``"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("´´"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("´"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("`"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<"));
+	EXPECT_TRUE(Utils::isUserDefinedToken(">"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("a:"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("a:a"));
+	EXPECT_FALSE(Utils::isUserDefinedToken(":a"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{{"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("}}"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{{}{}"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("<\\"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("\\>"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{!"));
+}
+
+}
-- 
cgit v1.2.3


From 231f426708babe0964495ac28a54f0f2835c084a Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:35:36 +0100
Subject: Unregistering all registered tokens when TokenRegistry is destroyed

---
 src/core/parser/stack/TokenRegistry.cpp      |  8 ++++
 src/core/parser/stack/TokenRegistry.hpp      | 47 +++++++++++++++----
 test/core/parser/stack/TokenRegistryTest.cpp | 68 +++++++++++++++-------------
 3 files changed, 81 insertions(+), 42 deletions(-)

diff --git a/src/core/parser/stack/TokenRegistry.cpp b/src/core/parser/stack/TokenRegistry.cpp
index 21ae109..c135b98 100644
--- a/src/core/parser/stack/TokenRegistry.cpp
+++ b/src/core/parser/stack/TokenRegistry.cpp
@@ -16,11 +16,19 @@
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
+#include "Callbacks.hpp"
 #include "TokenRegistry.hpp"
 
 namespace ousia {
 namespace parser_stack {
 
+TokenRegistry::~TokenRegistry()
+{
+	for (const auto &tid: tokenIds) {
+		parser.unregisterToken(tid.first);
+	}
+}
+
 TokenId TokenRegistry::registerToken(const std::string &token)
 {
 	// Check whether the given token is already registered
diff --git a/src/core/parser/stack/TokenRegistry.hpp b/src/core/parser/stack/TokenRegistry.hpp
index 21c36b5..545db39 100644
--- a/src/core/parser/stack/TokenRegistry.hpp
+++ b/src/core/parser/stack/TokenRegistry.hpp
@@ -19,7 +19,7 @@
 /**
  * @file TokenRegistry.hpp
  *
- * Contains the TokenRegistry class used for registering all possible tokens
+ * Contains the TokenRegistry class used for registering all user defined tokens
  * during the parsing process.
  *
  * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
@@ -31,19 +31,22 @@
 #include <string>
 #include <unordered_map>
 
-#include "Callbacks.hpp"
+#include <core/common/Token.hpp>
 
 namespace ousia {
 namespace parser_stack {
 
+// Forward declarations
+class ParserCallbacks;
+
 /**
- * The TokenRegistry class is used for registering all possible tokens during
- * the Parsing process. The TokenRegistry class acts as an adapter between the
- * parser which allocates TokenId for each unique token and the Handler classes
- * which may register tokens multiple times and expect the same TokenId to be
- * returned for the same token.
+ * The TokenRegistry class is used for registering all user defined tokens
+ * during the Parsing process. The TokenRegistry class acts as an adapter
+ * between the parser which allocates a TokenId for each unique token and the
+ * Handler classes which may register the same token multiple times and expect
+ * the same TokenId to be returned for the same token.
  */
-class TokenRegistry : public ParserCallbacks {
+class TokenRegistry  {
 private:
 	/**
 	 * Reference at the ParserCallback instance the tokens are relayed to.
@@ -71,14 +74,38 @@ public:
 	 */
 	TokenRegistry(ParserCallbacks &parser) : parser(parser) {}
 
+	/**
+	 * Destructor of the TokenRegistry class, removes all registered tokens from
+	 * the parser.
+	 */
+	~TokenRegistry();
+
 	/* No copy construction */
 	TokenRegistry(const TokenRegistry &) = delete;
 
 	/* No assignment */
 	TokenRegistry &operator=(const TokenRegistry &) = delete;
 
-	TokenId registerToken(const std::string &token) override;
-	void unregisterToken(TokenId id) override;
+	/**
+	 * Registers the given string token in the underlying parser and returns the
+	 * TokenId of that token. If the same token string is given multiple times,
+	 * the same TokenId is returned. The token is only registered once in the
+	 * parser.
+	 *
+	 * @param token is the token that should be registered.
+	 * @return the TokenId associated with this token.
+	 */
+	TokenId registerToken(const std::string &token);
+
+	/**
+	 * Unregisters the token with the given TokenId from the parser. Note that
+	 * the token will only be unregistered if unregisterToken() has been called
+	 * as many times as registerToken() for the same token.
+	 *
+	 * @param id is the id of the token returned by registerToken() that should
+	 * be unregistered.
+	 */
+	void unregisterToken(TokenId id);
 };
 }
 }
diff --git a/test/core/parser/stack/TokenRegistryTest.cpp b/test/core/parser/stack/TokenRegistryTest.cpp
index 390851e..20d6cd0 100644
--- a/test/core/parser/stack/TokenRegistryTest.cpp
+++ b/test/core/parser/stack/TokenRegistryTest.cpp
@@ -18,6 +18,7 @@
 
 #include <gtest/gtest.h>
 
+#include <core/parser/stack/Callbacks.hpp>
 #include <core/parser/stack/TokenRegistry.hpp>
 
 namespace ousia {
@@ -40,39 +41,42 @@ public:
 TEST(TokenRegistry, simple)
 {
 	ParserCallbacksProxy parser;
-	TokenRegistry registry(parser);
-
-	ASSERT_EQ(0U, parser.registerTokenCount);
-	ASSERT_EQ(0U, parser.unregisterTokenCount);
-
-	ASSERT_EQ(1U, registry.registerToken("test"));
-	ASSERT_EQ(1U, registry.registerToken("test"));
-	ASSERT_EQ(2U, registry.registerToken("test2"));
-	ASSERT_EQ(2U, registry.registerToken("test2"));
-	ASSERT_EQ(2U, parser.registerTokenCount);
-	ASSERT_EQ(0U, parser.unregisterTokenCount);
-
-	registry.unregisterToken(1);
-	ASSERT_EQ(2U, parser.registerTokenCount);
-	ASSERT_EQ(0U, parser.unregisterTokenCount);
-
-	registry.unregisterToken(1);
-	ASSERT_EQ(2U, parser.registerTokenCount);
-	ASSERT_EQ(1U, parser.unregisterTokenCount);
-
-	registry.unregisterToken(1);
-	ASSERT_EQ(2U, parser.registerTokenCount);
-	ASSERT_EQ(1U, parser.unregisterTokenCount);
-
-	registry.unregisterToken(2);
-	ASSERT_EQ(2U, parser.registerTokenCount);
-	ASSERT_EQ(1U, parser.unregisterTokenCount);
-
-	registry.unregisterToken(2);
-	ASSERT_EQ(2U, parser.registerTokenCount);
-	ASSERT_EQ(2U, parser.unregisterTokenCount);
+	{
+		TokenRegistry registry(parser);
+
+		ASSERT_EQ(0U, parser.registerTokenCount);
+		ASSERT_EQ(0U, parser.unregisterTokenCount);
+
+		ASSERT_EQ(1U, registry.registerToken("test"));
+		ASSERT_EQ(1U, registry.registerToken("test"));
+		ASSERT_EQ(2U, registry.registerToken("test2"));
+		ASSERT_EQ(2U, registry.registerToken("test2"));
+		ASSERT_EQ(3U, registry.registerToken("test3"));
+		ASSERT_EQ(3U, parser.registerTokenCount);
+		ASSERT_EQ(0U, parser.unregisterTokenCount);
+
+		registry.unregisterToken(1);
+		ASSERT_EQ(3U, parser.registerTokenCount);
+		ASSERT_EQ(0U, parser.unregisterTokenCount);
+
+		registry.unregisterToken(1);
+		ASSERT_EQ(3U, parser.registerTokenCount);
+		ASSERT_EQ(1U, parser.unregisterTokenCount);
+
+		registry.unregisterToken(1);
+		ASSERT_EQ(3U, parser.registerTokenCount);
+		ASSERT_EQ(1U, parser.unregisterTokenCount);
+
+		registry.unregisterToken(2);
+		ASSERT_EQ(3U, parser.registerTokenCount);
+		ASSERT_EQ(1U, parser.unregisterTokenCount);
+
+		registry.unregisterToken(2);
+		ASSERT_EQ(3U, parser.registerTokenCount);
+		ASSERT_EQ(2U, parser.unregisterTokenCount);
+	}
+	ASSERT_EQ(3U, parser.unregisterTokenCount);
 }
-
 }
 }
 
-- 
cgit v1.2.3


From 7a8b4eb8b9d943959b919076596ec96ef0c4c03c Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:36:18 +0100
Subject: Adapted Callbacks interface and Handlers

---
 CMakeLists.txt                      |  2 +-
 src/core/parser/stack/Callbacks.cpp | 10 ++++
 src/core/parser/stack/Callbacks.hpp | 68 ++++++++++++++++-----------
 src/core/parser/stack/Handler.cpp   | 52 +++++++++++----------
 src/core/parser/stack/Handler.hpp   | 93 ++++++++++++++++++++++---------------
 5 files changed, 134 insertions(+), 91 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f6a7257..1e81822 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -184,7 +184,7 @@ ADD_LIBRARY(ousia_core
 #	src/core/parser/stack/DocumentHandler
 #	src/core/parser/stack/DomainHandler
 #	src/core/parser/stack/GenericParserStates
-#	src/core/parser/stack/Handler
+	src/core/parser/stack/Handler
 #	src/core/parser/stack/ImportIncludeHandler
 	src/core/parser/stack/State
 #	src/core/parser/stack/Stack
diff --git a/src/core/parser/stack/Callbacks.cpp b/src/core/parser/stack/Callbacks.cpp
index 6ebc549..44b31c6 100644
--- a/src/core/parser/stack/Callbacks.cpp
+++ b/src/core/parser/stack/Callbacks.cpp
@@ -19,5 +19,15 @@
 #include "Callbacks.hpp"
 
 namespace ousia {
+namespace parser_stack {
+
+/* Class ParserCallbacks */
+
+ParserCallbacks::~ParserCallbacks()
+{
+	// Do nothing here
+}
+
+}
 }
 
diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp
index 9c61000..d7b2547 100644
--- a/src/core/parser/stack/Callbacks.hpp
+++ b/src/core/parser/stack/Callbacks.hpp
@@ -30,66 +30,78 @@
 #define _OUSIA_PARSER_STACK_CALLBACKS_HPP_
 
 #include <string>
+#include <vector>
 
 #include <core/common/Whitespace.hpp>
+#include <core/common/Token.hpp>
 
 namespace ousia {
+
+// Forward declarations
+class Variant;
+
 namespace parser_stack {
 
 /**
- * Interface defining a set of callback functions that act as a basis for the
- * StateStackCallbacks and the ParserCallbacks.
+ * Interface between the Stack class and the underlying parser used for
+ * registering and unregistering tokens.
  */
-class Callbacks {
+class ParserCallbacks {
 public:
 	/**
 	 * Virtual descructor.
 	 */
-	virtual ~Callbacks() {};
-
-	/**
-	 * Sets the whitespace mode that specifies how string data should be
-	 * processed.
-	 *
-	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
-	 * PRESERVE, TRIM or COLLAPSE.
-	 */
-	virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0;
+	virtual ~ParserCallbacks();
 
 	/**
 	 * Registers the given token as token that should be reported to the handler
 	 * using the "token" function.
 	 *
 	 * @param token is the token string that should be reported.
+	 * @return the token id with which the token will be reported. Should return
+	 * Tokens::Empty if the given token could not be registered.
 	 */
-	virtual void registerToken(const std::string &token) = 0;
+	virtual TokenId registerToken(const std::string &token) = 0;
 
 	/**
 	 * Unregisters the given token, it will no longer be reported to the handler
 	 * using the "token" function.
 	 *
-	 * @param token is the token string that should be unregistered.
+	 * @param id is the token id of the token that should be unregistered.
 	 */
-	virtual void unregisterToken(const std::string &token) = 0;
+	virtual void unregisterToken(TokenId id) = 0;
 };
 
 /**
- * Interface defining the callback functions that can be passed from a
- * StateStack to the underlying parser.
+ * Interface defining a set of callback functions that act as a basis for the
+ * StateStackCallbacks and the ParserCallbacks.
  */
-class ParserCallbacks : public Callbacks {
+class HandlerCallbacks: public ParserCallbacks {
+public:
 	/**
-	 * Checks whether the given token is supported by the parser. The parser
-	 * returns true, if the token is supported, false if this token cannot be
-	 * registered. Note that parsers that do not support the registration of
-	 * tokens at all should always return "true".
+	 * Reads a string variant form the current input stream. This function must
+	 * be called from the data() method.
 	 *
-	 * @param token is the token that should be checked for support.
-	 * @return true if the token is generally supported (or the parser does not
-	 * support registering tokens at all), false if the token is not supported,
-	 * because e.g. it is a reserved token or it interferes with other tokens.
+	 * @return a string variant containing the current text data. The return
+	 * value depends on the currently set whitespace mode and the tokens that
+	 * were enabled using the enableTokens callback method.
+	 */
+	Variant readData();
+
+	/**
+	 * Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
+	 * The tokens described in the token list are the tokens that are currently
+	 * enabled.
+	 *
+	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
+	 * stored on the stack.
+	 */
+	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+
+	/**
+	 * Removes the previously pushed list of tokens from the stack.
 	 */
-	virtual bool supportsToken(const std::string &token) = 0;
+	void popTokens();
 };
 
 }
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
index 3d413e8..734976a 100644
--- a/src/core/parser/stack/Handler.cpp
+++ b/src/core/parser/stack/Handler.cpp
@@ -18,6 +18,7 @@
 
 #include <core/common/Exceptions.hpp>
 #include <core/common/Logger.hpp>
+#include <core/common/Variant.hpp>
 #include <core/parser/utils/TokenizedData.hpp>
 #include <core/parser/ParserContext.hpp>
 
@@ -30,11 +31,11 @@ namespace parser_stack {
 
 /* Class HandlerData */
 
-HandlerData::HandlerData(ParserContext &ctx, /*Callbacks &callbacks,*/
+HandlerData::HandlerData(ParserContext &ctx, HandlerCallbacks &callbacks,
                          const std::string &name, const State &state,
                          const SourceLocation &location)
     : ctx(ctx),
-      /*callbacks(callbacks),*/
+      callbacks(callbacks),
       name(name),
       state(state),
       location(location)
@@ -68,19 +69,29 @@ const SourceLocation &Handler::location() const { return handlerData.location; }
 
 const std::string &Handler::name() const { return handlerData.name; }
 
-void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode)
+Variant Handler::readData()
 {
-	/*handlerData.callbacks.setWhitespaceMode(whitespaceMode);*/
+	return handlerData.callbacks.readData();
 }
 
-void Handler::registerToken(const std::string &token)
+void Handler::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
 {
-	/*handlerData.callbacks.registerToken(token);*/
+	handlerData.callbacks.pushTokens(tokens);
 }
 
-void Handler::unregisterToken(const std::string &token)
+void Handler::popTokens()
 {
-	/*handlerData.callbacks.unregisterToken(token);*/
+	handlerData.callbacks.popTokens();
+}
+
+TokenId Handler::registerToken(const std::string &token)
+{
+	return handlerData.callbacks.registerToken(token);
+}
+
+void Handler::unregisterToken(TokenId id)
+{
+	handlerData.callbacks.unregisterToken(id);
 }
 
 const std::string &Handler::getName() const { return name(); }
@@ -131,7 +142,7 @@ bool EmptyHandler::annotationEnd(const Variant &className,
 	return true;
 }
 
-bool EmptyHandler::data(TokenizedData &data)
+bool EmptyHandler::data()
 {
 	// Support any data
 	return true;
@@ -185,13 +196,10 @@ bool StaticHandler::annotationEnd(const Variant &className,
 	return false;
 }
 
-bool StaticHandler::data(TokenizedData &data)
+bool StaticHandler::data()
 {
-	if (data.text(WhitespaceMode::TRIM) != nullptr) {
-		logger().error("Did not expect any data here", data);
-		return false;
-	}
-	return true;
+	logger().error("Did not expect any data here", readData());
+	return false;
 }
 
 /* Class StaticFieldHandler */
@@ -231,19 +239,15 @@ void StaticFieldHandler::end()
 	}
 }
 
-bool StaticFieldHandler::data(TokenizedData &data)
+bool StaticFieldHandler::data()
 {
-	Variant text = data.text(WhitespaceMode::TRIM);
-	if (text == nullptr) {
-		// Providing no data here is ok as long as the "doHandle" callback
-		// function has already been called
-		return handled;
-	}
+	// Fetch the actual text data
+	Variant stringData = readData();
 
 	// Call the doHandle function if this has not been done before
 	if (!handled) {
 		handled = true;
-		doHandle(text, args);
+		doHandle(stringData, args);
 		return true;
 	}
 
@@ -251,7 +255,7 @@ bool StaticFieldHandler::data(TokenizedData &data)
 	logger().error(
 	    std::string("Found data, but the corresponding argument \"") + argName +
 	        std::string("\" was already specified"),
-	    text);
+	    stringData);
 
 	// Print the location at which the attribute was originally specified
 	auto it = args.find(argName);
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 929466d..848d395 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -32,6 +32,7 @@ class ParserScope;
 class ParserContext;
 class Logger;
 class TokenizedData;
+class Variant;
 
 namespace parser_stack {
 
@@ -52,11 +53,11 @@ public:
 	ParserContext &ctx;
 
 	/**
-	 * Reference at an instance of the Callbacks class, used for
-	 * modifying the behaviour of the parser (like registering tokens, setting
-	 * the data type or changing the whitespace handling mode).
+	 * Reference at a class implementing the HandlerCallbacks interface, used
+	 * for modifying the behaviour of the parser (like registering tokens,
+	 * setting the data type or changing the whitespace handling mode).
 	 */
-	//	Callbacks &callbacks;
+	HandlerCallbacks &callbacks;
 
 	/**
 	 * Contains the name of the command that is being handled.
@@ -83,9 +84,9 @@ public:
 	 * @param state is the state this handler was called for.
 	 * @param location is the location at which the handler is created.
 	 */
-	HandlerData(ParserContext &ctx,
-	            /*Callbacks &callbacks,*/ const std::string &name,
-	            const State &state, const SourceLocation &location);
+	HandlerData(ParserContext &ctx, HandlerCallbacks &callbacks,
+	            const std::string &name, const State &state,
+	            const SourceLocation &location);
 };
 
 /**
@@ -159,6 +160,17 @@ protected:
 	 */
 	const std::string &name() const;
 
+	/**
+	 * Calls the corresponding method in the HandlerCallbacks instance. Reads a
+	 * string variant form the current input stream. This function must be
+	 * called from the data() method.
+	 *
+	 * @return a string variant containing the current text data. The return
+	 * value depends on the currently set whitespace mode and the tokens that
+	 * were enabled using the enableTokens callback method.
+	 */
+	Variant readData();
+
 	/**
 	 * Calls the corresponding function in the Callbacks instance. Sets the
 	 * whitespace mode that specifies how string data should be processed. The
@@ -170,7 +182,7 @@ protected:
 	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
 	 * PRESERVE, TRIM or COLLAPSE.
 	 */
-	void pushWhitespaceMode(WhitespaceMode whitespaceMode);
+	//	void pushWhitespaceMode(WhitespaceMode whitespaceMode);
 
 	/**
 	 * Pops a previously pushed whitespace mode. Calls to this function should
@@ -178,38 +190,45 @@ protected:
 	 * can only undo pushs that were performed by the pushWhitespaceMode()
 	 * method of the same handler.
 	 */
-	void popWhitespaceMode();
+	//	void popWhitespaceMode();
 
 	/**
-	 * Calls the corresponding function in the Callbacks instance. Sets the
-	 * whitespace mode that specifies how string data should be processed. The
-	 * calls to this function are placed on a stack by the underlying Stack
-	 * class. This function should be called from the "fieldStart" callback and
-	 * the "start" callback. If no whitespace mode is pushed in the "start"
-	 * method the whitespace mode "TRIM" is implicitly assumed.
+	 * Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
+	 * The tokens described in the token list are the tokens that are currently
+	 * enabled.
 	 *
-	 * @param tokens is a list of tokens that should be reported to this handler
-	 * instance via the "token" method.
+	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
+	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<std::string> &tokens);
+	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
 
 	/**
-	 * Pops a previously pushed whitespace mode. Calls to this function should
-	 * occur in the "end" callback and the "fieldEnd" callback. This function
-	 * can only undo pushs that were performed by the pushWhitespaceMode()
-	 * method of the same handler.
+	 * Calls the corresponding function in the HandlerCallbacks instance.
+	 * Removes the previously pushed list of tokens from the stack.
 	 */
-	void popWhitespaceMode();
+	void popTokens();
 
+	/**
+	 * Calls the corresponding function in the HandlerCallbacks instance. This
+	 * method registers the given tokens as tokens that are generally available,
+	 * tokens must be explicitly enabled using the "pushTokens" and "popTokens"
+	 * method. Tokens that have not been registered are not guaranteed to be
+	 * reported (except for special tokens, these do not have to be registerd).
+	 *
+	 * @param token is the token string that should be made available.
+	 * @return the TokenId that will be used to refer to the token.
+	 */
+	TokenId registerToken(const std::string &token);
 
 	/**
-	 * Calls the corresponding function in the Callbacks instance. This method
-	 * registers the given tokens as tokens that are generally available, tokens
-	 * must be explicitly enabled using the "pushTokens" and "popTokens" method.
-	 * Tokens that have not been registered are not guaranteed to be reported,
-	 * even though they are 
+	 * Calls the corresponding function in the HandlerCallbacks instance. This
+	 * method unregisters the given token. Note that for a token to be no longer
+	 * reported, this function has to be called as many times as registerToken()
+	 * for the corresponding token.
+	 *
+	 * @param id is the id of the Token that should be unregistered.
 	 */
-	void registerTokens(const std::vector<std::string> &tokens);
+	void unregisterToken(TokenId id);
 
 public:
 	/**
@@ -321,13 +340,12 @@ public:
 	/**
 	 * Called whenever raw data (int the form of a string) is available for the
 	 * Handler instance. Should return true if the data could be handled, false
-	 * otherwise.
+	 * otherwise. The actual data variant must be retrieved using the "text()"
+	 * callback.
 	 *
-	 * @param data is an instance of TokenizedData containing the segmented
-	 * character data and its location.
 	 * @return true if the data could be handled, false otherwise.
 	 */
-	virtual bool data(TokenizedData &data) = 0;
+	virtual bool data() = 0;
 };
 
 /**
@@ -357,7 +375,7 @@ public:
 	                     Variant::mapType &args) override;
 	bool annotationEnd(const Variant &className,
 	                   const Variant &elementName) override;
-	bool data(TokenizedData &data) override;
+	bool data() override;
 
 	/**
 	 * Creates an instance of the EmptyHandler class.
@@ -383,7 +401,7 @@ public:
 	                     Variant::mapType &args) override;
 	bool annotationEnd(const Variant &className,
 	                   const Variant &elementName) override;
-	bool data(TokenizedData &data) override;
+	bool data() override;
 };
 
 /**
@@ -430,13 +448,12 @@ protected:
 	 * @param fieldData is the captured field data.
 	 * @param args are the arguments that were given in the "start" function.
 	 */
-	virtual void doHandle(const Variant &fieldData,
-	                      Variant::mapType &args) = 0;
+	virtual void doHandle(const Variant &fieldData, Variant::mapType &args) = 0;
 
 public:
 	bool start(Variant::mapType &args) override;
 	void end() override;
-	bool data(TokenizedData &data) override;
+	bool data() override;
 };
 }
 }
-- 
cgit v1.2.3


From 2807dc44b0555c19944f2520852d242eacc30b20 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:36:44 +0100
Subject: Added first (not final) version of the TokenStack class which will be
 used internally by the Stack class

---
 CMakeLists.txt                       |   1 +
 src/core/parser/stack/TokenStack.cpp |  45 ++++++++++++++
 src/core/parser/stack/TokenStack.hpp | 111 +++++++++++++++++++++++++++++++++++
 3 files changed, 157 insertions(+)
 create mode 100644 src/core/parser/stack/TokenStack.cpp
 create mode 100644 src/core/parser/stack/TokenStack.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1e81822..b206458 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -189,6 +189,7 @@ ADD_LIBRARY(ousia_core
 	src/core/parser/stack/State
 #	src/core/parser/stack/Stack
 	src/core/parser/stack/TokenRegistry
+	src/core/parser/stack/TokenStack
 #	src/core/parser/stack/TypesystemHandler
 	src/core/parser/utils/SourceOffsetVector
 	src/core/parser/utils/TokenizedData
diff --git a/src/core/parser/stack/TokenStack.cpp b/src/core/parser/stack/TokenStack.cpp
new file mode 100644
index 0000000..6afeaed
--- /dev/null
+++ b/src/core/parser/stack/TokenStack.cpp
@@ -0,0 +1,45 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "TokenStack.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+void TokenStack::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
+{
+	stack.push_back(tokens);
+}
+
+void TokenStack::popTokens() { stack.pop_back(); }
+
+TokenSet TokenStack::tokens() const
+{
+	if (stack.empty() && parentStack != nullptr) {
+		return parentStack->tokens();
+	}
+
+	TokenSet res;
+	for (const TokenSyntaxDescriptor &descr : stack.back()) {
+		descr.insertIntoTokenSet(res);
+	}
+	return res;
+}
+}
+}
+
diff --git a/src/core/parser/stack/TokenStack.hpp b/src/core/parser/stack/TokenStack.hpp
new file mode 100644
index 0000000..9669f50
--- /dev/null
+++ b/src/core/parser/stack/TokenStack.hpp
@@ -0,0 +1,111 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file TokenStack.hpp
+ *
+ * Contains the TokenStack class used for collecting the currently enabled user
+ * defined tokens on a per-field basis.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_PARSER_STACK_TOKEN_STACK_HPP_
+#define _OUSIA_PARSER_STACK_TOKEN_STACK_HPP_
+
+#include <memory>
+#include <vector>
+
+#include <core/common/Token.hpp>
+
+namespace ousia {
+namespace parser_stack {
+
+/**
+ * The TokenStack class is used by the Stack class to collect all currently
+ * enabled user defined tokens.
+ */
+class TokenStack {
+private:
+	/**
+	 * Shared pointer at the parent TokenStack instance. May be nullptr, in
+	 * which case no parent TokenStack instance exists.
+	 */
+	const TokenStack *parentStack;
+
+	/**
+	 * Stack containing vectors of TokenSyntaxDescriptor instances as given by
+	 * the user.
+	 */
+	std::vector<std::vector<TokenSyntaxDescriptor>> stack;
+
+	/**
+	 * Constructor of the TokenStack class.
+	 *
+	 * @param parentStack is a pointer at the underlying parentStack instance
+	 * to which calls should be forwarded if no data has been pushed onto this
+	 * stack instance.
+	 */
+	TokenStack(const TokenStack *parentStack) : parentStack(parentStack) {}
+
+public:
+	/**
+	 * Default constructor of the TokenStack class with no reference at a parent
+	 * stack.
+	 */
+	TokenStack() : TokenStack(nullptr) {}
+
+	/**
+	 * Constructor of the TokenStack class with a reference at a parent
+	 * TokenStack instance.
+	 *
+	 * @param parentStack is a reference at a parent TokenStack instance. If no
+	 * data has yet been pushed onto this instance, calls will be forwarded to
+	 * the parent stack.
+	 */
+	TokenStack(const TokenStack &parentStack) : TokenStack(&parentStack) {}
+
+	/**
+	 * Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
+	 *
+	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
+	 * stored on the stack.
+	 */
+	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+
+	/**
+	 * Removes the previously pushed list of tokens from the stack.
+	 */
+	void popTokens();
+
+	/**
+	 * Returns a set containing all currently enabled tokens. The set of enabled
+	 * tokens are those tokens that were pushed last onto the stack. This set
+	 * has to be passed to the TokenizedData instance in order to gather all
+	 * tokens that are currently possible.
+	 *
+	 * @return a set of tokens containing all the Tokens that are currently
+	 * possible.
+	 */
+	TokenSet tokens() const;
+};
+}
+}
+
+#endif /* _OUSIA_PARSER_STACK_TOKEN_STACK_HPP_ */
+
-- 
cgit v1.2.3


From 95f0ade7c19d7c6c451025e9a76d66ffb64e1f70 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:41:15 +0100
Subject: Adapted Stack interface (Stack.cpp is a mess right now and does not
 compile)

---
 src/core/parser/stack/Stack.cpp | 202 +++++++++++++++++++++++++++++-
 src/core/parser/stack/Stack.hpp | 270 ++++++----------------------------------
 2 files changed, 238 insertions(+), 234 deletions(-)

diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index 309c9a0..292e7e2 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -32,6 +32,96 @@ namespace parser_stack {
 
 /* Class HandlerInfo */
 
+/**
+ * The HandlerInfo class is used internally by the stack to associate additional
+ * (mutable) data with a handler instance.
+ */
+class HandlerInfo {
+public:
+	/**
+	 * Pointer pointing at the actual handler instance.
+	 */
+	std::shared_ptr<Handler> handler;
+
+	/**
+	 * Next field index to be passed to the "fieldStart" function of the Handler
+	 * class.
+	 */
+	size_t fieldIdx;
+
+	/**
+	 * Set to true if the handler is valid (which is the case if the "start"
+	 * method has returned true). If the handler is invalid, no more calls are
+	 * directed at it until it can be removed from the stack.
+	 */
+	bool valid : 1;
+
+	/**
+	 * Set to true if this is an implicit handler, that was created when the
+	 * current stack state was deduced.
+	 */
+	bool implicit : 1;
+
+	/**
+	 * Set to true if the handler currently is in a field.
+	 */
+	bool inField : 1;
+
+	/**
+	 * Set to true if the handler currently is in the default field.
+	 */
+	bool inDefaultField : 1;
+
+	/**
+	 * Set to true if the handler currently is in an implicitly started default
+	 * field.
+	 */
+	bool inImplicitDefaultField : 1;
+
+	/**
+	 * Set to false if this field is only opened pro-forma and does not accept
+	 * any data. Otherwise set to true.
+	 */
+	bool inValidField : 1;
+
+	/**
+	 * Set to true, if the default field was already started.
+	 */
+	bool hadDefaultField : 1;
+
+	/**
+	 * Default constructor of the HandlerInfo class.
+	 */
+	HandlerInfo();
+	/**
+	 * Constructor of the HandlerInfo class, allows to set all flags manually.
+	 */
+	HandlerInfo(bool valid, bool implicit, bool inField, bool inDefaultField,
+	            bool inImplicitDefaultField, bool inValidField);
+
+	/**
+	 * Constructor of the HandlerInfo class, taking a shared_ptr to the handler
+	 * to which additional information should be attached.
+	 */
+	HandlerInfo(std::shared_ptr<Handler> handler);
+
+	/**
+	 * Destructor of the HandlerInfo class (to allow Handler to be forward
+	 * declared).
+	 */
+	~HandlerInfo();
+
+	/**
+	 * Updates the "field" flags according to a "fieldStart" event.
+	 */
+	void fieldStart(bool isDefault, bool isImplicit, bool isValid);
+
+	/**
+	 * Updates the "fields" flags according to a "fieldEnd" event.
+	 */
+	void fieldEnd();
+};
+
 HandlerInfo::HandlerInfo() : HandlerInfo(nullptr) {}
 
 HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler)
@@ -117,6 +207,113 @@ static LoggableException buildInvalidCommandException(
 	}
 }
 
+/* Class StackImpl */
+
+class StackImpl {
+
+private:
+	/**
+	 * Reference at the parser context.
+	 */
+	ParserContext &ctx;
+
+	/**
+	 * Map containing all registered command names and the corresponding
+	 * state descriptors.
+	 */
+	const std::multimap<std::string, const State *> &states;
+
+	/**
+	 * Internal stack used for managing the currently active Handler instances.
+	 */
+	std::vector<HandlerInfo> stack;
+
+	/**
+	 * Return the reference in the Logger instance stored within the context.
+	 */
+	Logger &logger();
+
+	/**
+	 * Used internally to get all expected command names for the current state.
+	 * This function is used to build error messages.
+	 *
+	 * @return a set of strings containing the names of the expected commands.
+	 */
+	std::set<std::string> expectedCommands();
+
+	/**
+	 * Returns the targetState for a command with the given name that can be
+	 * reached from the current state.
+	 *
+	 * @param name is the name of the requested command.
+	 * @return nullptr if no target state was found, a pointer at the target
+	 * state otherwise.
+	 */
+	const State *findTargetState(const std::string &name);
+
+	/**
+	 * Returns the targetState for a command with the given name that can be
+	 * reached from the current state, also including the wildcard "*" state.
+	 * Throws an exception if the given target state is not a valid identifier.
+	 *
+	 * @param name is the name of the requested command.
+	 * @return nullptr if no target state was found, a pointer at the target
+	 * state otherwise.
+	 */
+	const State *findTargetStateOrWildcard(const std::string &name);
+
+	/**
+	 * Tries to reconstruct the parser state from the Scope instance of the
+	 * ParserContext given in the constructor. This functionality is needed for
+	 * including files,as the Parser of the included file needs to be brought to
+	 * an equivalent state as the one in the including file.
+	 */
+	void deduceState();
+
+	/**
+	 * Returns a reference at the current HandlerInfo instance (or a stub
+	 * HandlerInfo instance if the stack is empty).
+	 */
+	HandlerInfo &currentInfo();
+
+	/**
+	 * Returns a reference at the last HandlerInfo instance (or a stub
+	 * HandlerInfo instance if the stack has only one element).
+	 */
+	HandlerInfo &lastInfo();
+
+	/**
+	 * Ends all handlers that currently are not inside a field and already had
+	 * a default field. This method is called whenever the data() and command()
+	 * events are reached.
+	 */
+	void endOverdueHandlers();
+
+	/**
+	 * Ends the current handler and removes the corresponding element from the
+	 * stack.
+	 */
+	void endCurrentHandler();
+
+	/**
+	 * Tries to start a default field for the current handler, if currently the
+	 * handler is not inside a field and did not have a default field yet.
+	 *
+	 * @return true if the handler is inside a field, false if no field could
+	 * be started.
+	 */
+	bool ensureHandlerIsInField();
+
+	/**
+	 * Returns true if all handlers on the stack are currently valid, or false
+	 * if at least one handler is invalid.
+	 *
+	 * @return true if all handlers on the stack are valid.
+	 */
+	bool handlersValid();
+};
+
+
 /* Class Stack */
 
 Stack::Stack(ParserContext &ctx,
@@ -611,10 +808,5 @@ void Stack::annotationEnd(const Variant &className, const Variant &elementName)
 {
 	// TODO
 }
-
-void Stack::token(Variant token)
-{
-	// TODO
-}
 }
 }
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
index cd29b28..e1173d0 100644
--- a/src/core/parser/stack/Stack.hpp
+++ b/src/core/parser/stack/Stack.hpp
@@ -29,226 +29,35 @@
 #ifndef _OUSIA_PARSER_STACK_STACK_HPP_
 #define _OUSIA_PARSER_STACK_STACK_HPP_
 
-#include <cstdint>
-
 #include <map>
 #include <memory>
-#include <set>
-#include <vector>
-
-#include <core/common/Variant.hpp>
-#include <core/parser/Parser.hpp>
 
 namespace ousia {
 
 // Forward declarations
 class ParserContext;
-class Logger;
 class TokenizedData;
+class Variant;
 
 namespace parser_stack {
 
 // Forward declarations
-class Handler;
+class StackImpl;
 class State;
 
-/**
- * The HandlerInfo class is used internally by the stack to associate additional
- * (mutable) data with a handler instance.
- */
-class HandlerInfo {
-public:
-	/**
-	 * Pointer pointing at the actual handler instance.
-	 */
-	std::shared_ptr<Handler> handler;
-
-	/**
-	 * Next field index to be passed to the "fieldStart" function of the Handler
-	 * class.
-	 */
-	size_t fieldIdx;
-
-	/**
-	 * Set to true if the handler is valid (which is the case if the "start"
-	 * method has returned true). If the handler is invalid, no more calls are
-	 * directed at it until it can be removed from the stack.
-	 */
-	bool valid : 1;
-
-	/**
-	 * Set to true if this is an implicit handler, that was created when the
-	 * current stack state was deduced.
-	 */
-	bool implicit : 1;
-
-	/**
-	 * Set to true if the handler currently is in a field.
-	 */
-	bool inField : 1;
-
-	/**
-	 * Set to true if the handler currently is in the default field.
-	 */
-	bool inDefaultField : 1;
-
-	/**
-	 * Set to true if the handler currently is in an implicitly started default
-	 * field.
-	 */
-	bool inImplicitDefaultField : 1;
-
-	/**
-	 * Set to false if this field is only opened pro-forma and does not accept
-	 * any data. Otherwise set to true.
-	 */
-	bool inValidField : 1;
-
-	/**
-	 * Set to true, if the default field was already started.
-	 */
-	bool hadDefaultField : 1;
-
-	/**
-	 * Default constructor of the HandlerInfo class.
-	 */
-	HandlerInfo();
-	/**
-	 * Constructor of the HandlerInfo class, allows to set all flags manually.
-	 */
-	HandlerInfo(bool valid, bool implicit, bool inField, bool inDefaultField,
-	            bool inImplicitDefaultField, bool inValidField);
-
-	/**
-	 * Constructor of the HandlerInfo class, taking a shared_ptr to the handler
-	 * to which additional information should be attached.
-	 */
-	HandlerInfo(std::shared_ptr<Handler> handler);
-
-	/**
-	 * Destructor of the HandlerInfo class (to allow Handler to be forward
-	 * declared).
-	 */
-	~HandlerInfo();
-
-	/**
-	 * Updates the "field" flags according to a "fieldStart" event.
-	 */
-	void fieldStart(bool isDefault, bool isImplicit, bool isValid);
-
-	/**
-	 * Updates the "fields" flags according to a "fieldEnd" event.
-	 */
-	void fieldEnd();
-};
-
 /**
  * The Stack class is a pushdown automaton responsible for turning a command
  * stream into a tree of Node instances. It does so by following a state
  * transition graph and creating a set of Handler instances, which are placed
- * on the stack.
+ * on the stack. Additionally it is responsible for the normalization of
+ * Annotations and for handling tokens.
  */
 class Stack {
 private:
 	/**
-	 * Reference at the parser context.
-	 */
-	ParserContext &ctx;
-
-	/**
-	 * Map containing all registered command names and the corresponding
-	 * state descriptors.
+	 * Pointer at the internal implementation
 	 */
-	const std::multimap<std::string, const State *> &states;
-
-	/**
-	 * Internal stack used for managing the currently active Handler instances.
-	 */
-	std::vector<HandlerInfo> stack;
-
-	/**
-	 * Return the reference in the Logger instance stored within the context.
-	 */
-	Logger &logger();
-
-	/**
-	 * Used internally to get all expected command names for the current state.
-	 * This function is used to build error messages.
-	 *
-	 * @return a set of strings containing the names of the expected commands.
-	 */
-	std::set<std::string> expectedCommands();
-
-	/**
-	 * Returns the targetState for a command with the given name that can be
-	 * reached from the current state.
-	 *
-	 * @param name is the name of the requested command.
-	 * @return nullptr if no target state was found, a pointer at the target
-	 * state otherwise.
-	 */
-	const State *findTargetState(const std::string &name);
-
-	/**
-	 * Returns the targetState for a command with the given name that can be
-	 * reached from the current state, also including the wildcard "*" state.
-	 * Throws an exception if the given target state is not a valid identifier.
-	 *
-	 * @param name is the name of the requested command.
-	 * @return nullptr if no target state was found, a pointer at the target
-	 * state otherwise.
-	 */
-	const State *findTargetStateOrWildcard(const std::string &name);
-
-	/**
-	 * Tries to reconstruct the parser state from the Scope instance of the
-	 * ParserContext given in the constructor. This functionality is needed for
-	 * including files,as the Parser of the included file needs to be brought to
-	 * an equivalent state as the one in the including file.
-	 */
-	void deduceState();
-
-	/**
-	 * Returns a reference at the current HandlerInfo instance (or a stub
-	 * HandlerInfo instance if the stack is empty).
-	 */
-	HandlerInfo &currentInfo();
-
-	/**
-	 * Returns a reference at the last HandlerInfo instance (or a stub
-	 * HandlerInfo instance if the stack has only one element).
-	 */
-	HandlerInfo &lastInfo();
-
-	/**
-	 * Ends all handlers that currently are not inside a field and already had
-	 * a default field. This method is called whenever the data() and command()
-	 * events are reached.
-	 */
-	void endOverdueHandlers();
-
-	/**
-	 * Ends the current handler and removes the corresponding element from the
-	 * stack.
-	 */
-	void endCurrentHandler();
-
-	/**
-	 * Tries to start a default field for the current handler, if currently the
-	 * handler is not inside a field and did not have a default field yet.
-	 *
-	 * @return true if the handler is inside a field, false if no field could
-	 * be started.
-	 */
-	bool ensureHandlerIsInField();
-
-	/**
-	 * Returns true if all handlers on the stack are currently valid, or false
-	 * if at least one handler is invalid.
-	 *
-	 * @return true if all handlers on the stack are valid.
-	 */
-	bool handlersValid();
+	std::unique_ptr<StackImpl> impl;
 
 public:
 	/**
@@ -269,8 +78,8 @@ public:
 	/**
 	 * Returns the state the Stack instance currently is in.
 	 *
-	 * @return the state of the currently active Handler instance or STATE_NONE
-	 * if no handler is on the stack.
+	 * @return the state of the currently active Handler instance or
+	 * States::None if no handler is on the stack.
 	 */
 	const State &currentState();
 
@@ -289,28 +98,36 @@ public:
 	 * separator ':') and its corresponding location. Must be a string variant.
 	 * @param args is a map containing the arguments that were passed to the
 	 * command.
+	 * @param range if true, the started command has an explicit range.
 	 */
-	void command(const Variant &name, const Variant::mapType &args);
+	void commandStart(const Variant &name, const Variant::mapType &args,
+	                  bool range);
 
 	/**
-	 * Function that should be called whenever character data is found in the
-	 * input stream. May only be called if the currently is a command on the
-	 * stack.
+	 * Function that should be called whenever an annotation starts.
 	 *
-	 * @param data is a TokenizedData instance containing the pre-segmented data
-	 * that should be read.
+	 * @param name is the name of the annotation class.
+	 * @param args is a map variant containing the arguments that were passed
+	 * to the annotation.
+	 * @param range if true, the annotation fields have an explicit range.
 	 */
-	void data(TokenizedData data);
+	void annotationStart(const Variant &className, const Variant &args,
+	                     bool range);
 
 	/**
-	 * Function that shuold be called whenever character data is found in the
-	 * input stream. The given string variant is converted into a TokenizedData
-	 * instance internally.
+	 * Function that should be called whenever an annotation ends.
 	 *
-	 * @param stringData is a string variant containing the data that has been
-	 * found.
+	 * @param name is the name of the annotation class that was ended.
+	 * @param annotationName is the name of the annotation that was ended.
 	 */
-	void data(const Variant &stringData);
+	void annotationEnd(const Variant &className, const Variant &elementName);
+
+	/**
+	 * Function the should be called whenever a ranged command or annotation
+	 * ends. Must be called if the range parameter range was set to true when
+	 * annotationStart() or commandStart() were called.
+	 */
+	void rangeEnd();
 
 	/**
 	 * Function that should be called whenever a new field starts. Fields of the
@@ -329,29 +146,24 @@ public:
 	void fieldEnd();
 
 	/**
-	 * Function that should be called whenever an annotation starts.
-	 *
-	 * @param name is the name of the annotation class.
-	 * @param args is a map variant containing the arguments that were passed
-	 * to the annotation.
-	 */
-	void annotationStart(const Variant &className, const Variant &args);
-
-	/**
-	 * Function that should be called whenever an annotation ends.
+	 * Function that should be called whenever character data is found in the
+	 * input stream. May only be called if the currently is a command on the
+	 * stack.
 	 *
-	 * @param name is the name of the annotation class that was ended.
-	 * @param annotationName is the name of the annotation that was ended.
+	 * @param data is a TokenizedData instance containing the pre-segmented data
+	 * that should be read.
 	 */
-	void annotationEnd(const Variant &className, const Variant &elementName);
+	void data(const TokenizedData &data);
 
 	/**
-	 * Function that should be called whenever a previously registered token
-	 * is found in the input stream.
+	 * Function that shuold be called whenever character data is found in the
+	 * input stream. The given string variant is converted into a TokenizedData
+	 * instance internally.
 	 *
-	 * @param token is string variant containing the token that was encountered.
+	 * @param stringData is a string variant containing the data that has been
+	 * found.
 	 */
-	void token(Variant token);
+	void data(const Variant &stringData);
 };
 }
 }
-- 
cgit v1.2.3


From 88afbcc2a4c4cb9956e4459cf1c5aa08e349835e Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:41:35 +0100
Subject: Implemented TokenSyntaxDescriptor structure

---
 src/core/common/Token.cpp | 16 ++++++++++-
 src/core/common/Token.hpp | 72 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp
index 8bcdbb5..e454ae4 100644
--- a/src/core/common/Token.cpp
+++ b/src/core/common/Token.cpp
@@ -19,6 +19,20 @@
 #include "Token.hpp"
 
 namespace ousia {
-// Stub to make sure Tokens.hpp is valid
+
+/* Class TokenSyntaxDescriptor */
+
+void TokenSyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const
+{
+	if (start != Tokens::Empty) {
+		set.insert(start);
+	}
+	if (end != Tokens::Empty) {
+		set.insert(end);
+	}
+	if (shortForm != Tokens::Empty) {
+		set.insert(shortForm);
+	}
+}
 }
 
diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
index 0cf56b0..f89a0ce 100644
--- a/src/core/common/Token.hpp
+++ b/src/core/common/Token.hpp
@@ -134,10 +134,7 @@ struct Token {
 	 * @param location is the location of the extracted string content in the
 	 * source file.
 	 */
-	Token(SourceLocation location)
-	    : id(Tokens::Data), location(location)
-	{
-	}
+	Token(SourceLocation location) : id(Tokens::Data), location(location) {}
 
 	/**
 	 * Constructor of the Token struct.
@@ -165,7 +162,7 @@ struct Token {
 	 * @return true if the TokenId indicates that this token is a "special"
 	 * token.
 	 */
-	bool isSpecial() const {return id > Tokens::MaxTokenId;}
+	bool isSpecial() const { return id > Tokens::MaxTokenId; }
 
 	/**
 	 * The getLocation function allows the tokens to be directly passed as
@@ -175,6 +172,71 @@ struct Token {
 	 */
 	const SourceLocation &getLocation() const { return location; }
 };
+
+/**
+ * Class describing the user defined syntax for a single field or annotation.
+ */
+struct TokenSyntaxDescriptor {
+	/**
+	 * Possible start token or Tokens::Empty if no token is set.
+	 */
+	TokenId start;
+
+	/**
+	 * Possible end token or Tokens::Empty if no token is set.
+	 */
+	TokenId end;
+
+	/**
+	 * Possible representation token or Tokens::Empty if no token is set.
+	 */
+	TokenId shortForm;
+
+	/**
+	 * Flag specifying whether this TokenSyntaxDescriptor describes an
+	 * annotation.
+	 */
+	bool isAnnotation;
+
+	/**
+	 * Default constructor, sets all token ids to Tokens::Empty and isAnnotation
+	 * to false.
+	 */
+	TokenSyntaxDescriptor()
+	    : start(Tokens::Empty),
+	      end(Tokens::Empty),
+	      shortForm(Tokens::Empty),
+	      isAnnotation(false)
+	{
+	}
+
+	/**
+	 * Member initializer constructor.
+	 *
+	 * @param start is a possible start token.
+	 * @param end is a possible end token.
+	 * @param shortForm is a possible short form token.
+	 * @param isAnnotation is set to true if this syntax descriptor describes an
+	 * annotation.
+	 */
+	TokenSyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm,
+	                      bool isAnnotation)
+	    : start(start),
+	      end(end),
+	      shortForm(shortForm),
+	      isAnnotation(isAnnotation)
+	{
+	}
+
+	/**
+	 * Inserts all tokens referenced in this TokenSyntaxDescriptor into the
+	 * given TokenSet. Skips token ids set to Tokens::Empty.
+	 *
+	 * @param set is the TokenSet instance into which the Tokens should be
+	 * inserted.
+	 */
+	void insertIntoTokenSet(TokenSet &set) const;
+};
 }
 
 #endif /* _OUSIA_TOKENS_HPP_ */
-- 
cgit v1.2.3


From 11ee669f29e426effaf4a1e0d82baa978219e92f Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 10:35:22 +0100
Subject: OsxmlEventParser also fills a TokenizedData instance now

---
 src/formats/osxml/OsxmlEventParser.cpp      | 87 +++++++++--------------------
 src/formats/osxml/OsxmlEventParser.hpp      | 10 ++--
 src/formats/osxml/OsxmlParser.cpp           |  2 +-
 test/formats/osxml/OsxmlEventParserTest.cpp | 11 +++-
 4 files changed, 41 insertions(+), 69 deletions(-)

diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
index 83c16f0..79a8dbe 100644
--- a/src/formats/osxml/OsxmlEventParser.cpp
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -25,6 +25,7 @@
 #include <core/common/Variant.hpp>
 #include <core/common/VariantReader.hpp>
 #include <core/common/Utils.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
 
 #include "OsxmlAttributeLocator.hpp"
 #include "OsxmlEventParser.hpp"
@@ -38,6 +39,11 @@ namespace ousia {
  */
 class OsxmlEventParserData {
 public:
+	/**
+	 * Current character data buffer.
+	 */
+	TokenizedData data;
+
 	/**
 	 * Contains the current depth of the parsing process.
 	 */
@@ -51,24 +57,13 @@ public:
 	ssize_t annotationEndTagDepth;
 
 	/**
-	 * Current character data buffer.
-	 */
-	std::vector<char> textBuf;
-
-	/**
-	 * Current character data start.
-	 */
-	size_t textStart;
-
-	/**
-	 * Current character data end.
-	 */
-	size_t textEnd;
-
-	/**
-	 * Default constructor.
+	 * Constructor taking the sourceId of the file from which the XML is being
+	 * parsed.
+	 *
+	 * @param sourceId is the source if of the XML file from which the data is
+	 * currently being parsed.
 	 */
-	OsxmlEventParserData();
+	OsxmlEventParserData(SourceId sourceId);
 
 	/**
 	 * Increments the depth.
@@ -91,14 +86,6 @@ public:
 	 * @return true if character data is available.
 	 */
 	bool hasText();
-
-	/**
-	 * Returns a Variant containing the character data and its location.
-	 *
-	 * @return a string variant containing the text data and the character
-	 * location.
-	 */
-	Variant getText(SourceId sourceId);
 };
 
 /* Class GuardedExpatXmlParser */
@@ -156,7 +143,7 @@ public:
 static const std::string TOP_LEVEL_TAG{"ousia"};
 
 /**
- * Prefix used to indicate the start of an annoation (note the trailing colon)
+ * Prefix used to indicate the start of an annoation (note the trailing colon).
  */
 static const std::string ANNOTATION_START_PREFIX{"a:start:"};
 
@@ -203,8 +190,9 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 
 	// If there is any text data in the buffer, issue that first
 	if (parser->getData().hasText()) {
-		parser->getEvents().data(
-		    parser->getData().getText(parser->getReader().getSourceId()));
+		TokenizedData &data = parser->getData().data;
+		parser->getEvents().data(data);
+		data.clear();
 	}
 
 	// Read the argument locations -- this is only a stupid and slow hack,
@@ -348,8 +336,9 @@ static void xmlEndElementHandler(void *ref, const XML_Char *name)
 
 	// If there is any text data in the buffer, issue that first
 	if (parser->getData().hasText()) {
-		parser->getEvents().data(
-		    parser->getData().getText(parser->getReader().getSourceId()));
+		TokenizedData &data = parser->getData().data;
+		parser->getEvents().data(data);
+		data.clear();
 	}
 
 	// Abort if the special ousia tag ends here
@@ -381,18 +370,8 @@ static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len)
 	// Synchronize the logger position
 	SourceLocation loc = xmlSyncLoggerPosition(p, ulen);
 
-	// Fetch some variables for convenience
-	OsxmlEventParserData &data = parser->getData();
-	std::vector<char> &textBuf = data.textBuf;
-
-	// Update start and end position
-	if (textBuf.empty()) {
-		data.textStart = loc.getStart();
-	}
-	data.textEnd = loc.getEnd();
-
-	// Insert the data into the text buffer
-	textBuf.insert(textBuf.end(), &s[0], &s[ulen]);
+	// Append the data to the buffer
+	parser->getData().data.append(std::string(s, ulen), loc.getStart());
 }
 
 /* Class OsxmlEvents */
@@ -401,8 +380,8 @@ OsxmlEvents::~OsxmlEvents() {}
 
 /* Class OsxmlEventParser */
 
-OsxmlEventParserData::OsxmlEventParserData()
-    : depth(0), annotationEndTagDepth(-1), textStart(0), textEnd(0)
+OsxmlEventParserData::OsxmlEventParserData(SourceId sourceId)
+    : data(sourceId), depth(0), annotationEndTagDepth(-1)
 {
 }
 
@@ -423,23 +402,7 @@ bool OsxmlEventParserData::inAnnotationEndTag()
 	return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth);
 }
 
-bool OsxmlEventParserData::hasText() { return !textBuf.empty(); }
-
-Variant OsxmlEventParserData::getText(SourceId sourceId)
-{
-	// Create a variant containing the string data and the location
-	Variant var =
-	    Variant::fromString(std::string{textBuf.data(), textBuf.size()});
-	var.setLocation({sourceId, textStart, textEnd});
-
-	// Reset the text buffers
-	textBuf.clear();
-	textStart = 0;
-	textEnd = 0;
-
-	// Return the variant
-	return var;
-}
+bool OsxmlEventParserData::hasText() { return !data.empty(); }
 
 /* Class OsxmlEventParser */
 
@@ -448,7 +411,7 @@ OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events,
     : reader(reader),
       events(events),
       logger(logger),
-      data(new OsxmlEventParserData())
+      data(new OsxmlEventParserData(reader.getSourceId()))
 {
 }
 
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
index 7a8c96d..4c5a485 100644
--- a/src/formats/osxml/OsxmlEventParser.hpp
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -96,10 +96,10 @@ public:
 	/**
 	 * Called whenever string data is found.
 	 *
-	 * @param data is a Variant containing the string data that was found in the
-	 * XML file.
+	 * @param data is a TokenizedData instance containing the string data that
+	 * was found in the XML file.
 	 */
-	virtual void data(const Variant &data) = 0;
+	virtual void data(const TokenizedData &data) = 0;
 };
 
 /**
@@ -179,7 +179,9 @@ public:
 	OsxmlEvents &getEvents() const;
 
 	/**
-	 * Returns a reference at the internal data.
+	 * Used internally to fetch a reference at the internal data.
+	 *
+	 * @return a reference at the internal OsxmlEventParserData structure.
 	 */
 	OsxmlEventParserData &getData() const;
 };
diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp
index 924d11b..afe0dc6 100644
--- a/src/formats/osxml/OsxmlParser.cpp
+++ b/src/formats/osxml/OsxmlParser.cpp
@@ -85,7 +85,7 @@ public:
 
 	void rangeEnd() override { stack.rangeEnd(); }
 
-	void data(const Variant &data) override { stack.data(data); }
+	void data(const TokenizedData &data) override { stack.data(data); }
 };
 
 /* Class OsxmlParser */
diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp
index b24a43d..d4e9443 100644
--- a/test/formats/osxml/OsxmlEventParserTest.cpp
+++ b/test/formats/osxml/OsxmlEventParserTest.cpp
@@ -69,9 +69,16 @@ public:
 		events.emplace_back(OsxmlEvent::RANGE_END, Variant::arrayType{});
 	}
 
-	void data(const Variant &data) override
+	void data(const TokenizedData &data) override
 	{
-		events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data});
+		Token token;
+		Variant text;
+		TokenizedDataReader reader = data.reader();
+		reader.read(token, TokenSet{}, WhitespaceMode::PRESERVE);
+		EXPECT_EQ(Tokens::Data, token.id);
+		text = Variant::fromString(token.content);
+		text.setLocation(token.getLocation());
+		events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{text});
 	}
 };
 
-- 
cgit v1.2.3


From f65e7af0dd0028ec481360eeaa16c4ff95ce253b Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 10:59:50 +0100
Subject: Got all handlers compling again

---
 CMakeLists.txt                            |  12 +--
 src/core/parser/stack/DocumentHandler.cpp |  18 ++---
 src/core/parser/stack/DocumentHandler.hpp |   4 +-
 src/core/parser/stack/Handler.hpp         |   3 +-
 src/core/parser/stack/Stack.cpp           | 123 +++++++++++++++++++++++-------
 src/core/parser/stack/Stack.hpp           |  14 +---
 6 files changed, 112 insertions(+), 62 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b206458..cef1e31 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -181,16 +181,16 @@ ADD_LIBRARY(ousia_core
 	src/core/parser/ParserContext
 	src/core/parser/ParserScope
 	src/core/parser/stack/Callbacks
-#	src/core/parser/stack/DocumentHandler
-#	src/core/parser/stack/DomainHandler
-#	src/core/parser/stack/GenericParserStates
+	src/core/parser/stack/DocumentHandler
+	src/core/parser/stack/DomainHandler
+	src/core/parser/stack/GenericParserStates
 	src/core/parser/stack/Handler
-#	src/core/parser/stack/ImportIncludeHandler
+	src/core/parser/stack/ImportIncludeHandler
 	src/core/parser/stack/State
 #	src/core/parser/stack/Stack
 	src/core/parser/stack/TokenRegistry
 	src/core/parser/stack/TokenStack
-#	src/core/parser/stack/TypesystemHandler
+	src/core/parser/stack/TypesystemHandler
 	src/core/parser/utils/SourceOffsetVector
 	src/core/parser/utils/TokenizedData
 	src/core/parser/utils/Tokenizer
@@ -215,8 +215,8 @@ ADD_LIBRARY(ousia_core
 #)
 
 ADD_LIBRARY(ousia_osml
-	src/formats/osml/OsmlParser
 	src/formats/osml/OsmlStreamParser
+	src/formats/osml/OsmlParser
 )
 
 TARGET_LINK_LIBRARIES(ousia_osml
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp
index d44176a..714ab1b 100644
--- a/src/core/parser/stack/DocumentHandler.cpp
+++ b/src/core/parser/stack/DocumentHandler.cpp
@@ -246,8 +246,6 @@ bool DocumentChildHandler::start(Variant::mapType &args)
 				    parent->getDescriptor()->getFieldDescriptorIndex();
 			}
 			// create the entity for the new element at last.
-			// TODO: REMOVE
-			strct_name = strct->getName();
 			entity = parent->createChildStructuredEntity(strct, lastFieldIdx,
 			                                             args, nameAttr);
 		}
@@ -373,15 +371,8 @@ bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field,
 	return valid && scope().resolveValue(data, type, logger);
 }
 
-bool DocumentChildHandler::data(TokenizedData &data)
+bool DocumentChildHandler::data()
 {
-	// TODO: Handle this correctly
-	Variant text = data.text(WhitespaceMode::TRIM);
-	if (text == nullptr) {
-		// For now, except "no data" as success
-		return true;
-	}
-
 	// We're past the region in which explicit fields can be defined in the
 	// parent structure element
 	scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, true);
@@ -401,6 +392,7 @@ bool DocumentChildHandler::data(TokenizedData &data)
 	// If it is a primitive field directly, try to parse the content.
 	if (field->isPrimitive()) {
 		// Add it as primitive content.
+		Variant text = readData();
 		if (!convertData(field, text, logger())) {
 			return false;
 		}
@@ -419,6 +411,10 @@ bool DocumentChildHandler::data(TokenizedData &data)
 	for (auto primitiveField : defaultFields) {
 		// Then try to parse the content using the type specification.
 		forks.emplace_back(logger().fork());
+
+		// TODO: Actually the data has to be read after the path has been
+		// created (as createPath may push more tokens onto the stack)
+		Variant text = readData();
 		if (!convertData(primitiveField, text, forks.back())) {
 			continue;
 		}
@@ -428,7 +424,6 @@ bool DocumentChildHandler::data(TokenizedData &data)
 
 		// Construct the necessary path
 		NodeVector<Node> path = field->pathTo(primitiveField, logger());
-		// TODO: Create methods with indices instead of names.
 		createPath(fieldIdx, path, parent);
 
 		// Then create the primitive element
@@ -439,6 +434,7 @@ bool DocumentChildHandler::data(TokenizedData &data)
 	// No field was found that might take the data -- dump the error messages
 	// from the loggers -- or, if there were no primitive fields, clearly state
 	// this fact
+	Variant text = readData();
 	if (defaultFields.empty()) {
 		logger().error("Got data, but structure \"" + name() +
 		                   "\" does not have any primitive field",
diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp
index dda7d8b..c51c188 100644
--- a/src/core/parser/stack/DocumentHandler.hpp
+++ b/src/core/parser/stack/DocumentHandler.hpp
@@ -93,8 +93,6 @@ public:
 class DocumentChildHandler : public Handler {
 private:
 	bool isExplicitField = false;
-	//TODO: REMOVE
-	std::string strct_name;
 
 	/**
 	 * Code shared by both the start(), fieldStart() and the data() method.
@@ -167,7 +165,7 @@ public:
 
 	bool start(Variant::mapType &args) override;
 	void end() override;
-	bool data(TokenizedData &data) override;
+	bool data() override;
 
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
 
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 848d395..377a214 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -24,6 +24,7 @@
 #include <core/common/Location.hpp>
 #include <core/common/Variant.hpp>
 #include <core/common/Whitespace.hpp>
+#include <core/common/Token.hpp>
 
 namespace ousia {
 
@@ -37,7 +38,7 @@ class Variant;
 namespace parser_stack {
 
 // More forward declarations
-class Callbacks;
+class HandlerCallbacks;
 class State;
 
 /**
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index 292e7e2..ff03a6b 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -210,7 +210,6 @@ static LoggableException buildInvalidCommandException(
 /* Class StackImpl */
 
 class StackImpl {
-
 private:
 	/**
 	 * Reference at the parser context.
@@ -231,7 +230,7 @@ private:
 	/**
 	 * Return the reference in the Logger instance stored within the context.
 	 */
-	Logger &logger();
+	Logger &logger() {return ctx.getLogger();}
 
 	/**
 	 * Used internally to get all expected command names for the current state.
@@ -311,12 +310,28 @@ private:
 	 * @return true if all handlers on the stack are valid.
 	 */
 	bool handlersValid();
-};
 
+public:
+	StackImpl(ParserContext &ctx,
+	      const std::multimap<std::string, const State *> &states);
+
+	~StackImpl();
 
-/* Class Stack */
+	const State &currentState() const;
+	std::string currentCommandName() const;
 
-Stack::Stack(ParserContext &ctx,
+	void commandStart(const Variant &name, const Variant::mapType &args,
+	                  bool range);
+	void annotationStart(const Variant &className, const Variant &args,
+	                     bool range);
+	void annotationEnd(const Variant &className, const Variant &elementName);
+	void rangeEnd();
+	void fieldStart(bool isDefault);
+	void fieldEnd();
+	void data(const TokenizedData &data);
+};
+
+StackImpl::StackImpl(ParserContext &ctx,
              const std::multimap<std::string, const State *> &states)
     : ctx(ctx), states(states)
 {
@@ -327,7 +342,7 @@ Stack::Stack(ParserContext &ctx,
 	}
 }
 
-Stack::~Stack()
+StackImpl::~StackImpl()
 {
 	while (!stack.empty()) {
 		// Fetch the topmost stack element
@@ -351,7 +366,7 @@ Stack::~Stack()
 	}
 }
 
-void Stack::deduceState()
+void StackImpl::deduceState()
 {
 	// Assemble all states
 	std::vector<const State *> states;
@@ -384,7 +399,7 @@ void Stack::deduceState()
 	info.fieldStart(true, false, true);
 }
 
-std::set<std::string> Stack::expectedCommands()
+std::set<std::string> StackImpl::expectedCommands()
 {
 	const State *currentState = &(this->currentState());
 	std::set<std::string> res;
@@ -396,17 +411,17 @@ std::set<std::string> Stack::expectedCommands()
 	return res;
 }
 
-const State &Stack::currentState()
+const State &StackImpl::currentState()
 {
 	return stack.empty() ? States::None : stack.back().handler->getState();
 }
 
-std::string Stack::currentCommandName()
+std::string StackImpl::currentCommandName()
 {
 	return stack.empty() ? std::string{} : stack.back().handler->getName();
 }
 
-const State *Stack::findTargetState(const std::string &name)
+const State *StackImpl::findTargetState(const std::string &name)
 {
 	const State *currentState = &(this->currentState());
 	auto range = states.equal_range(name);
@@ -420,7 +435,7 @@ const State *Stack::findTargetState(const std::string &name)
 	return nullptr;
 }
 
-const State *Stack::findTargetStateOrWildcard(const std::string &name)
+const State *StackImpl::findTargetStateOrWildcard(const std::string &name)
 {
 	// Try to find the target state with the given name, if none is found, try
 	// find a matching "*" state.
@@ -431,16 +446,16 @@ const State *Stack::findTargetStateOrWildcard(const std::string &name)
 	return targetState;
 }
 
-HandlerInfo &Stack::currentInfo()
+HandlerInfo &StackImpl::currentInfo()
 {
 	return stack.empty() ? EmptyHandlerInfo : stack.back();
 }
-HandlerInfo &Stack::lastInfo()
+HandlerInfo &StackImpl::lastInfo()
 {
 	return stack.size() < 2U ? EmptyHandlerInfo : stack[stack.size() - 2];
 }
 
-void Stack::endCurrentHandler()
+void StackImpl::endCurrentHandler()
 {
 	if (!stack.empty()) {
 		// Fetch the handler info for the current top-level element
@@ -467,7 +482,7 @@ void Stack::endCurrentHandler()
 	}
 }
 
-void Stack::endOverdueHandlers()
+void StackImpl::endOverdueHandlers()
 {
 	if (!stack.empty()) {
 		// Fetch the handler info for the current top-level element
@@ -483,7 +498,7 @@ void Stack::endOverdueHandlers()
 	}
 }
 
-bool Stack::ensureHandlerIsInField()
+bool StackImpl::ensureHandlerIsInField()
 {
 	// If the current handler is not in a field (and actually has a handler)
 	// try to start a default field
@@ -507,7 +522,7 @@ bool Stack::ensureHandlerIsInField()
 	return true;
 }
 
-bool Stack::handlersValid()
+bool StackImpl::handlersValid()
 {
 	for (auto it = stack.crbegin(); it != stack.crend(); it++) {
 		if (!it->valid) {
@@ -517,9 +532,7 @@ bool Stack::handlersValid()
 	return true;
 }
 
-Logger &Stack::logger() { return ctx.getLogger(); }
-
-void Stack::command(const Variant &name, const Variant::mapType &args)
+void StackImpl::commandStart(const Variant &name, const Variant::mapType &args)
 {
 	// End handlers that already had a default field and are currently not
 	// active.
@@ -611,7 +624,22 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 	}
 }
 
-void Stack::data(TokenizedData data)
+void StackImpl::annotationStart(const Variant &className, const Variant &args)
+{
+	// TODO
+}
+
+void StackImpl::annotationEnd(const Variant &className, const Variant &elementName)
+{
+	// TODO
+}
+
+void StackImpl::rangeEnd()
+{
+	// TODO
+}
+
+void StackImpl::data(TokenizedData data)
 {
 	// TODO: Rewrite this function for token handling
 	// TODO: This loop needs to be refactored out
@@ -626,7 +654,8 @@ void Stack::data(TokenizedData data)
 		// make sure the data actually is data
 		if (stack.empty()) {
 			if (hasNonWhitespaceText) {
-				throw LoggableException("No command here to receive data.", data);
+				throw LoggableException("No command here to receive data.",
+				                        data);
 			}
 			return;
 		}
@@ -699,7 +728,7 @@ void Stack::data(TokenizedData data)
 	}
 }
 
-void Stack::data(const Variant &stringData)
+void StackImpl::data(const Variant &stringData)
 {
 	// Fetch the SourceLocation of the given stringData variant
 	SourceLocation loc = stringData.getLocation();
@@ -712,7 +741,7 @@ void Stack::data(const Variant &stringData)
 	data(tokenizedData);
 }
 
-void Stack::fieldStart(bool isDefault)
+void StackImpl::fieldStart(bool isDefault)
 {
 	// Make sure the current handler stack is not empty
 	if (stack.empty()) {
@@ -764,7 +793,7 @@ void Stack::fieldStart(bool isDefault)
 	info.fieldStart(defaultField, false, valid);
 }
 
-void Stack::fieldEnd()
+void StackImpl::fieldEnd()
 {
 	// Unroll the stack until the next explicitly open field
 	while (!stack.empty()) {
@@ -799,14 +828,50 @@ void Stack::fieldEnd()
 	info.fieldEnd();
 }
 
-void Stack::annotationStart(const Variant &className, const Variant &args)
+/* Class Stack */
+
+Stack::Stack(ParserContext &ctx,
+             const std::multimap<std::string, const State *> &states)
+    : impl(new StackImpl(ctx, states))
+{
+}
+
+Stack::~Stack()
 {
-	// TODO
+	// Do nothing here, stub needed because StackImpl is incomplete in hpp
+}
+
+const State &Stack::currentState() const { return impl->currentState(); }
+
+std::string Stack::currentCommandName() const
+{
+	return impl->currentCommandName();
+}
+
+void Stack::commandStart(const Variant &name, const Variant::mapType &args,
+                         bool range)
+{
+	impl->commandStart(name, args, range);
+}
+
+void Stack::annotationStart(const Variant &className, const Variant &args,
+                            bool range)
+{
+	impl->annotationStart(className, args, range);
 }
 
 void Stack::annotationEnd(const Variant &className, const Variant &elementName)
 {
-	// TODO
+	impl->annotationEnd(className, elementName);
 }
+
+void Stack::rangeEnd() { impl->rangeEnd(); }
+
+void Stack::fieldStart(bool isDefault) { impl->fieldStart(isDefault); }
+
+void Stack::fieldEnd() { impl->fieldEnd(); }
+
+void Stack::data(const TokenizedData &data) { impl->data(data); }
+};
 }
 }
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
index e1173d0..1d87b9c 100644
--- a/src/core/parser/stack/Stack.hpp
+++ b/src/core/parser/stack/Stack.hpp
@@ -81,7 +81,7 @@ public:
 	 * @return the state of the currently active Handler instance or
 	 * States::None if no handler is on the stack.
 	 */
-	const State &currentState();
+	const State &currentState() const;
 
 	/**
 	 * Returns the command name that is currently being handled.
@@ -89,7 +89,7 @@ public:
 	 * @return the name of the command currently being handled by the active
 	 * Handler instance or an empty string if no handler is currently active.
 	 */
-	std::string currentCommandName();
+	std::string currentCommandName() const;
 
 	/**
 	 * Function that should be called whenever a new command is reached.
@@ -154,16 +154,6 @@ public:
 	 * that should be read.
 	 */
 	void data(const TokenizedData &data);
-
-	/**
-	 * Function that shuold be called whenever character data is found in the
-	 * input stream. The given string variant is converted into a TokenizedData
-	 * instance internally.
-	 *
-	 * @param stringData is a string variant containing the data that has been
-	 * found.
-	 */
-	void data(const Variant &stringData);
 };
 }
 }
-- 
cgit v1.2.3


From 5b81f755a5303c3eab05c605711ecca32c071b6d Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 11:46:47 +0100
Subject: Got Stack compiling again

---
 src/core/parser/stack/Callbacks.hpp |  28 ++--
 src/core/parser/stack/Handler.hpp   |  75 ++++-----
 src/core/parser/stack/Stack.cpp     | 297 ++++++++++++++++++++++--------------
 src/core/parser/stack/Stack.hpp     |   5 +-
 src/formats/osml/OsmlParser.cpp     |   2 +-
 src/formats/osxml/OsxmlParser.cpp   |  15 +-
 6 files changed, 251 insertions(+), 171 deletions(-)

diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp
index d7b2547..8acc02d 100644
--- a/src/core/parser/stack/Callbacks.hpp
+++ b/src/core/parser/stack/Callbacks.hpp
@@ -76,18 +76,8 @@ public:
  * Interface defining a set of callback functions that act as a basis for the
  * StateStackCallbacks and the ParserCallbacks.
  */
-class HandlerCallbacks: public ParserCallbacks {
+class HandlerCallbacks : public ParserCallbacks {
 public:
-	/**
-	 * Reads a string variant form the current input stream. This function must
-	 * be called from the data() method.
-	 *
-	 * @return a string variant containing the current text data. The return
-	 * value depends on the currently set whitespace mode and the tokens that
-	 * were enabled using the enableTokens callback method.
-	 */
-	Variant readData();
-
 	/**
 	 * Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
 	 * The tokens described in the token list are the tokens that are currently
@@ -96,14 +86,24 @@ public:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+	virtual void pushTokens(
+	    const std::vector<TokenSyntaxDescriptor> &tokens) = 0;
 
 	/**
 	 * Removes the previously pushed list of tokens from the stack.
 	 */
-	void popTokens();
-};
+	virtual void popTokens() = 0;
 
+	/**
+	 * Reads a string variant form the current input stream. This function must
+	 * be called from the data() method.
+	 *
+	 * @return a string variant containing the current text data. The return
+	 * value depends on the currently set whitespace mode and the tokens that
+	 * were enabled using the enableTokens callback method.
+	 */
+	virtual Variant readData() = 0;
+};
 }
 }
 
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 377a214..19c3d65 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -161,6 +161,44 @@ protected:
 	 */
 	const std::string &name() const;
 
+	/**
+	 * Calls the corresponding function in the HandlerCallbacks instance. This
+	 * method registers the given tokens as tokens that are generally available,
+	 * tokens must be explicitly enabled using the "pushTokens" and "popTokens"
+	 * method. Tokens that have not been registered are not guaranteed to be
+	 * reported (except for special tokens, these do not have to be registerd).
+	 *
+	 * @param token is the token string that should be made available.
+	 * @return the TokenId that will be used to refer to the token.
+	 */
+	TokenId registerToken(const std::string &token);
+
+	/**
+	 * Calls the corresponding function in the HandlerCallbacks instance. This
+	 * method unregisters the given token. Note that for a token to be no longer
+	 * reported, this function has to be called as many times as registerToken()
+	 * for the corresponding token.
+	 *
+	 * @param id is the id of the Token that should be unregistered.
+	 */
+	void unregisterToken(TokenId id);
+
+	/**
+	 * Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
+	 * The tokens described in the token list are the tokens that are currently
+	 * enabled.
+	 *
+	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
+	 * stored on the stack.
+	 */
+	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+
+	/**
+	 * Calls the corresponding function in the HandlerCallbacks instance.
+	 * Removes the previously pushed list of tokens from the stack.
+	 */
+	void popTokens();
+
 	/**
 	 * Calls the corresponding method in the HandlerCallbacks instance. Reads a
 	 * string variant form the current input stream. This function must be
@@ -193,43 +231,6 @@ protected:
 	 */
 	//	void popWhitespaceMode();
 
-	/**
-	 * Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
-	 * The tokens described in the token list are the tokens that are currently
-	 * enabled.
-	 *
-	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
-	 * stored on the stack.
-	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
-
-	/**
-	 * Calls the corresponding function in the HandlerCallbacks instance.
-	 * Removes the previously pushed list of tokens from the stack.
-	 */
-	void popTokens();
-
-	/**
-	 * Calls the corresponding function in the HandlerCallbacks instance. This
-	 * method registers the given tokens as tokens that are generally available,
-	 * tokens must be explicitly enabled using the "pushTokens" and "popTokens"
-	 * method. Tokens that have not been registered are not guaranteed to be
-	 * reported (except for special tokens, these do not have to be registerd).
-	 *
-	 * @param token is the token string that should be made available.
-	 * @return the TokenId that will be used to refer to the token.
-	 */
-	TokenId registerToken(const std::string &token);
-
-	/**
-	 * Calls the corresponding function in the HandlerCallbacks instance. This
-	 * method unregisters the given token. Note that for a token to be no longer
-	 * reported, this function has to be called as many times as registerToken()
-	 * for the corresponding token.
-	 *
-	 * @param id is the id of the Token that should be unregistered.
-	 */
-	void unregisterToken(TokenId id);
 
 public:
 	/**
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index ff03a6b..a556999 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -23,9 +23,12 @@
 #include <core/parser/ParserScope.hpp>
 #include <core/parser/ParserContext.hpp>
 
+#include "Callbacks.hpp"
 #include "Handler.hpp"
 #include "Stack.hpp"
 #include "State.hpp"
+#include "TokenRegistry.hpp"
+#include "TokenStack.hpp"
 
 namespace ousia {
 namespace parser_stack {
@@ -209,8 +212,14 @@ static LoggableException buildInvalidCommandException(
 
 /* Class StackImpl */
 
-class StackImpl {
+class StackImpl : public HandlerCallbacks {
 private:
+	/**
+	 * Reference at an implementation of the ParserCallbacks instance to which
+	 * certain handler callbacks are directed.
+	 */
+	ParserCallbacks &parser;
+
 	/**
 	 * Reference at the parser context.
 	 */
@@ -222,6 +231,18 @@ private:
 	 */
 	const std::multimap<std::string, const State *> &states;
 
+	/**
+	 * Registry responsible for registering the tokens proposed by the
+	 * Handlers in the parser.
+	 */
+	TokenRegistry tokenRegistry;
+
+	/**
+	 * Pointer at a TokenizedDataReader instance from which the data should
+	 * currently be read.
+	 */
+	TokenizedDataReader *dataReader;
+
 	/**
 	 * Internal stack used for managing the currently active Handler instances.
 	 */
@@ -230,7 +251,7 @@ private:
 	/**
 	 * Return the reference in the Logger instance stored within the context.
 	 */
-	Logger &logger() {return ctx.getLogger();}
+	Logger &logger() { return ctx.getLogger(); }
 
 	/**
 	 * Used internally to get all expected command names for the current state.
@@ -312,8 +333,8 @@ private:
 	bool handlersValid();
 
 public:
-	StackImpl(ParserContext &ctx,
-	      const std::multimap<std::string, const State *> &states);
+	StackImpl(ParserCallbacks &parser, ParserContext &ctx,
+	          const std::multimap<std::string, const State *> &states);
 
 	~StackImpl();
 
@@ -329,11 +350,22 @@ public:
 	void fieldStart(bool isDefault);
 	void fieldEnd();
 	void data(const TokenizedData &data);
+
+	TokenId registerToken(const std::string &token) override;
+	void unregisterToken(TokenId id) override;
+	Variant readData() override;
+	bool hasData();
+	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens) override;
+	void popTokens() override;
 };
 
-StackImpl::StackImpl(ParserContext &ctx,
-             const std::multimap<std::string, const State *> &states)
-    : ctx(ctx), states(states)
+StackImpl::StackImpl(ParserCallbacks &parser, ParserContext &ctx,
+                     const std::multimap<std::string, const State *> &states)
+    : parser(parser),
+      ctx(ctx),
+      states(states),
+      tokenRegistry(parser),
+      dataReader(nullptr)
 {
 	// If the scope instance is not empty we need to deduce the current parser
 	// state
@@ -389,8 +421,8 @@ void StackImpl::deduceState()
 	HandlerConstructor ctor =
 	    state.elementHandler ? state.elementHandler : EmptyHandler::create;
 
-	std::shared_ptr<Handler> handler =
-	    std::shared_ptr<Handler>{ctor({ctx, "", state, SourceLocation{}})};
+	std::shared_ptr<Handler> handler = std::shared_ptr<Handler>{
+	    ctor({ctx, *this, "", state, SourceLocation{}})};
 	stack.emplace_back(handler);
 
 	// Set the correct flags for this implicit handler
@@ -411,12 +443,12 @@ std::set<std::string> StackImpl::expectedCommands()
 	return res;
 }
 
-const State &StackImpl::currentState()
+const State &StackImpl::currentState() const
 {
 	return stack.empty() ? States::None : stack.back().handler->getState();
 }
 
-std::string StackImpl::currentCommandName()
+std::string StackImpl::currentCommandName() const
 {
 	return stack.empty() ? std::string{} : stack.back().handler->getName();
 }
@@ -532,7 +564,8 @@ bool StackImpl::handlersValid()
 	return true;
 }
 
-void StackImpl::commandStart(const Variant &name, const Variant::mapType &args)
+void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
+                             bool range)
 {
 	// End handlers that already had a default field and are currently not
 	// active.
@@ -575,8 +608,8 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args)
 		HandlerConstructor ctor = targetState->elementHandler
 		                              ? targetState->elementHandler
 		                              : EmptyHandler::create;
-		std::shared_ptr<Handler> handler{
-		    ctor({ctx, name.asString(), *targetState, name.getLocation()})};
+		std::shared_ptr<Handler> handler{ctor(
+		    {ctx, *this, name.asString(), *targetState, name.getLocation()})};
 		stack.emplace_back(handler);
 
 		// Fetch the HandlerInfo for the parent element and the current element
@@ -624,12 +657,14 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args)
 	}
 }
 
-void StackImpl::annotationStart(const Variant &className, const Variant &args)
+void StackImpl::annotationStart(const Variant &className, const Variant &args,
+                                bool range)
 {
 	// TODO
 }
 
-void StackImpl::annotationEnd(const Variant &className, const Variant &elementName)
+void StackImpl::annotationEnd(const Variant &className,
+                              const Variant &elementName)
 {
 	// TODO
 }
@@ -639,106 +674,93 @@ void StackImpl::rangeEnd()
 	// TODO
 }
 
-void StackImpl::data(TokenizedData data)
+void StackImpl::data(const TokenizedData &data)
 {
 	// TODO: Rewrite this function for token handling
 	// TODO: This loop needs to be refactored out
-	while (!data.atEnd()) {
-		// End handlers that already had a default field and are currently not
-		// active.
-		endOverdueHandlers();
-
-		const bool hasNonWhitespaceText = data.hasNonWhitespaceText();
-
-		// Check whether there is any command the data can be sent to -- if not,
-		// make sure the data actually is data
-		if (stack.empty()) {
-			if (hasNonWhitespaceText) {
-				throw LoggableException("No command here to receive data.",
-				                        data);
-			}
-			return;
-		}
-
-		// Fetch the current command handler information
-		HandlerInfo &info = currentInfo();
-
-		// Make sure the current handler has an open field
-		if (!ensureHandlerIsInField()) {
-			endCurrentHandler();
-			continue;
-		}
-
-		// If this field should not get any data, log an error and do not call
-		// the "data" handler
-		if (!info.inValidField) {
-			// If the "hadDefaultField" flag is set, we already issued an error
-			// message
-			if (!info.hadDefaultField) {
-				if (hasNonWhitespaceText) {
-					logger().error("Did not expect any data here", data);
-				}
-				return;
-			}
-		}
-
-		if (handlersValid() && info.inValidField) {
-			// Fork the logger and set it as temporary logger for the "start"
-			// method. We only want to keep error messages if this was not a try
-			// to implicitly open a default field.
-			LoggerFork loggerFork = logger().fork();
-			info.handler->setLogger(loggerFork);
-
-			// Pass the data to the current Handler instance
-			bool valid = false;
-			try {
-				// Create a fork of the TokenizedData and let the handler work
-				// on it
-				TokenizedData dataFork = data;
-				valid = info.handler->data(dataFork);
-
-				// If the data was validly handled by the handler, commit the
-				// change
-				if (valid) {
-					data = dataFork;
-				}
-			}
-			catch (LoggableException ex) {
-				loggerFork.log(ex);
-			}
-
-			// Reset the logger instance as soon as possible
-			info.handler->resetLogger();
-
-			// If placing the data here failed and we're currently in an
-			// implicitly opened field, just unroll the stack to the next field
-			// and try again
-			if (!valid && info.inImplicitDefaultField) {
-				endCurrentHandler();
-				continue;
-			}
-
-			// Commit the content of the logger fork. Do not change the valid
-			// flag.
-			loggerFork.commit();
-		}
-
-		// There was no reason to unroll the stack any further, so continue
-		return;
-	}
-}
-
-void StackImpl::data(const Variant &stringData)
-{
-	// Fetch the SourceLocation of the given stringData variant
-	SourceLocation loc = stringData.getLocation();
-
-	// Create a TokenizedData instance and feed the given string data into it
-	TokenizedData tokenizedData(loc.getSourceId());
-	tokenizedData.append(stringData.asString(), loc.getStart());
-
-	// Call the actual "data" method
-	data(tokenizedData);
+	/*while (!data.atEnd()) {
+	    // End handlers that already had a default field and are currently not
+	    // active.
+	    endOverdueHandlers();
+
+	    const bool hasNonWhitespaceText = data.hasNonWhitespaceText();
+
+	    // Check whether there is any command the data can be sent to -- if not,
+	    // make sure the data actually is data
+	    if (stack.empty()) {
+	        if (hasNonWhitespaceText) {
+	            throw LoggableException("No command here to receive data.",
+	                                    data);
+	        }
+	        return;
+	    }
+
+	    // Fetch the current command handler information
+	    HandlerInfo &info = currentInfo();
+
+	    // Make sure the current handler has an open field
+	    if (!ensureHandlerIsInField()) {
+	        endCurrentHandler();
+	        continue;
+	    }
+
+	    // If this field should not get any data, log an error and do not call
+	    // the "data" handler
+	    if (!info.inValidField) {
+	        // If the "hadDefaultField" flag is set, we already issued an error
+	        // message
+	        if (!info.hadDefaultField) {
+	            if (hasNonWhitespaceText) {
+	                logger().error("Did not expect any data here", data);
+	            }
+	            return;
+	        }
+	    }
+
+	    if (handlersValid() && info.inValidField) {
+	        // Fork the logger and set it as temporary logger for the "start"
+	        // method. We only want to keep error messages if this was not a try
+	        // to implicitly open a default field.
+	        LoggerFork loggerFork = logger().fork();
+	        info.handler->setLogger(loggerFork);
+
+	        // Pass the data to the current Handler instance
+	        bool valid = false;
+	        try {
+	            // Create a fork of the TokenizedData and let the handler work
+	            // on it
+	            TokenizedData dataFork = data;
+	            valid = info.handler->data(dataFork);
+
+	            // If the data was validly handled by the handler, commit the
+	            // change
+	            if (valid) {
+	                data = dataFork;
+	            }
+	        }
+	        catch (LoggableException ex) {
+	            loggerFork.log(ex);
+	        }
+
+	        // Reset the logger instance as soon as possible
+	        info.handler->resetLogger();
+
+	        // If placing the data here failed and we're currently in an
+	        // implicitly opened field, just unroll the stack to the next field
+	        // and try again
+	        if (!valid && info.inImplicitDefaultField) {
+	            endCurrentHandler();
+	            continue;
+	        }
+
+	        // Commit the content of the logger fork. Do not change the valid
+	        // flag.
+	        loggerFork.commit();
+	    }
+
+	    // There was no reason to unroll the stack any further, so continue
+	    return;
+	}*/
 }
 
 void StackImpl::fieldStart(bool isDefault)
@@ -828,11 +850,55 @@ void StackImpl::fieldEnd()
 	info.fieldEnd();
 }
 
+TokenId StackImpl::registerToken(const std::string &token)
+{
+	return tokenRegistry.registerToken(token);
+}
+
+void StackImpl::unregisterToken(TokenId id)
+{
+	tokenRegistry.unregisterToken(id);
+}
+
+void StackImpl::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
+{
+	// TODO
+}
+
+void StackImpl::popTokens()
+{
+	// TODO
+}
+
+Variant StackImpl::readData()
+{
+	if (dataReader != nullptr) {
+		TokenizedDataReaderFork dataReaderFork = dataReader->fork();
+		Token token;
+
+		// TODO: Use correct token set
+		TokenSet tokens;
+
+		// TODO: Use correct whitespace mode
+		WhitespaceMode mode = WhitespaceMode::COLLAPSE;
+
+		dataReaderFork.read(token, tokens, mode);
+		if (token.id == Tokens::Data) {
+			Variant res = Variant::fromString(token.content);
+			res.setLocation(token.getLocation());
+			return res;
+		}
+	}
+	return Variant{};
+}
+
+bool StackImpl::hasData() { return readData() != nullptr; }
+
 /* Class Stack */
 
-Stack::Stack(ParserContext &ctx,
+Stack::Stack(ParserCallbacks &parser, ParserContext &ctx,
              const std::multimap<std::string, const State *> &states)
-    : impl(new StackImpl(ctx, states))
+    : impl(new StackImpl(parser, ctx, states))
 {
 }
 
@@ -872,6 +938,5 @@ void Stack::fieldStart(bool isDefault) { impl->fieldStart(isDefault); }
 void Stack::fieldEnd() { impl->fieldEnd(); }
 
 void Stack::data(const TokenizedData &data) { impl->data(data); }
-};
 }
 }
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
index 1d87b9c..de281d4 100644
--- a/src/core/parser/stack/Stack.hpp
+++ b/src/core/parser/stack/Stack.hpp
@@ -42,6 +42,7 @@ class Variant;
 namespace parser_stack {
 
 // Forward declarations
+class ParserCallbacks;
 class StackImpl;
 class State;
 
@@ -63,11 +64,13 @@ public:
 	/**
 	 * Creates a new instance of the Stack class.
 	 *
+	 * @param parser is an implementation of the ParserCallbacks instance to
+	 * which certain calls are directed.
 	 * @param ctx is the parser context the parser stack is working on.
 	 * @param states is a map containing the command names and pointers at the
 	 * corresponding State instances.
 	 */
-	Stack(ParserContext &ctx,
+	Stack(ParserCallbacks &parser, ParserContext &ctx,
 	      const std::multimap<std::string, const State *> &states);
 
 	/**
diff --git a/src/formats/osml/OsmlParser.cpp b/src/formats/osml/OsmlParser.cpp
index c25974f..36ef2b6 100644
--- a/src/formats/osml/OsmlParser.cpp
+++ b/src/formats/osml/OsmlParser.cpp
@@ -73,7 +73,7 @@ public:
 	    : logger(ctx.getLogger()),
 	      ctx(ctx),
 	      parser(reader, logger),
-	      stack(ctx, GenericParserStates)
+	      stack(parser, ctx, GenericParserStates)
 	{
 	}
 
diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp
index afe0dc6..10cc77a 100644
--- a/src/formats/osxml/OsxmlParser.cpp
+++ b/src/formats/osxml/OsxmlParser.cpp
@@ -18,6 +18,7 @@
 
 #include <core/common/Variant.hpp>
 #include <core/common/CharReader.hpp>
+#include <core/parser/stack/Callbacks.hpp>
 #include <core/parser/stack/GenericParserStates.hpp>
 #include <core/parser/stack/Stack.hpp>
 #include <core/parser/ParserContext.hpp>
@@ -32,7 +33,7 @@ using namespace parser_stack;
 /**
  * Class containing the actual OsxmlParser implementation.
  */
-class OsxmlParserImplementation : public OsxmlEvents {
+class OsxmlParserImplementation : public OsxmlEvents, ParserCallbacks {
 private:
 	/**
 	 * Actual xml parser -- converts the xml stream into a set of events.
@@ -56,7 +57,7 @@ public:
 	 */
 	OsxmlParserImplementation(CharReader &reader, ParserContext &ctx)
 	    : parser(reader, *this, ctx.getLogger()),
-	      stack(ctx, GenericParserStates)
+	      stack(*this, ctx, GenericParserStates)
 	{
 	}
 
@@ -86,6 +87,16 @@ public:
 	void rangeEnd() override { stack.rangeEnd(); }
 
 	void data(const TokenizedData &data) override { stack.data(data); }
+
+	TokenId registerToken(const std::string &token) override
+	{
+		return Tokens::Empty;
+	}
+
+	void unregisterToken(TokenId id) override
+	{
+		// Do nothing here
+	}
 };
 
 /* Class OsxmlParser */
-- 
cgit v1.2.3


From 072992a634d816fc7061b7eee5fd0cabe4242de4 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 13:45:24 +0100
Subject: Added supportsTokens method to StateBuilder

---
 src/core/parser/stack/State.cpp | 15 ++++++++++++---
 src/core/parser/stack/State.hpp | 33 ++++++++++++++++++++++++++-------
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/src/core/parser/stack/State.cpp b/src/core/parser/stack/State.cpp
index d72f533..0feeed6 100644
--- a/src/core/parser/stack/State.cpp
+++ b/src/core/parser/stack/State.cpp
@@ -23,17 +23,19 @@ namespace parser_stack {
 
 /* Class State */
 
-State::State() : elementHandler(nullptr) {}
+State::State() : elementHandler(nullptr), supportsAnnotations(false), supportsTokens(false) {}
 
 State::State(StateSet parents, Arguments arguments,
                          RttiSet createdNodeTypes,
                          HandlerConstructor elementHandler,
-                         bool supportsAnnotations)
+                         bool supportsAnnotations,
+                         bool supportsTokens)
     : parents(parents),
       arguments(arguments),
       createdNodeTypes(createdNodeTypes),
       elementHandler(elementHandler),
-      supportsAnnotations(supportsAnnotations)
+      supportsAnnotations(supportsAnnotations),
+      supportsTokens(supportsTokens)
 {
 }
 
@@ -93,6 +95,13 @@ StateBuilder &StateBuilder::supportsAnnotations(bool supportsAnnotations)
 	return *this;
 }
 
+StateBuilder &StateBuilder::supportsTokens(bool supportsTokens)
+{
+	state.supportsTokens = supportsTokens;
+	return *this;
+}
+
+
 const State &StateBuilder::build() const { return state; }
 
 /* Class StateDeductor */
diff --git a/src/core/parser/stack/State.hpp b/src/core/parser/stack/State.hpp
index 4766235..011ccd6 100644
--- a/src/core/parser/stack/State.hpp
+++ b/src/core/parser/stack/State.hpp
@@ -82,13 +82,21 @@ struct State {
 
 	/**
 	 * Set to true if this handler does support annotations. This is almost
-	 * always false (e.g. all description handlers), except for document 
+	 * always false (e.g. all description handlers), except for document
 	 * element handlers.
 	 */
-	bool supportsAnnotations;
+	bool supportsAnnotations : 1;
 
 	/**
-	 * Default constructor, initializes the handlers with nullptr.
+	 * Set to true if this handler does support tokens. This is almost
+	 * always false (e.g. all description handlers), except for document
+	 * element handlers.
+	 */
+	bool supportsTokens : 1;
+
+	/**
+	 * Default constructor, initializes the handlers with nullptr and the
+	 * supportsAnnotations and supportsTokens flags with false.
 	 */
 	State();
 
@@ -108,11 +116,12 @@ struct State {
 	 * be nullptr in which case no handler instance is created.
 	 * @param supportsAnnotations specifies whether annotations are supported
 	 * here at all.
+	 * @param supportsTokens specified whether tokens are supported here at all.
 	 */
 	State(StateSet parents, Arguments arguments = Arguments{},
-	            RttiSet createdNodeTypes = RttiSet{},
-	            HandlerConstructor elementHandler = nullptr,
-	            bool supportsAnnotations = false);
+	      RttiSet createdNodeTypes = RttiSet{},
+	      HandlerConstructor elementHandler = nullptr,
+	      bool supportsAnnotations = false, bool supportsTokens = false);
 
 	/**
 	 * Creates this State from the given StateBuilder instance.
@@ -219,6 +228,16 @@ public:
 	 */
 	StateBuilder &supportsAnnotations(bool supportsAnnotations);
 
+	/**
+	 * Sets the state of the "supportsTokens" flag (default value is false).
+	 *
+	 * @param supportsTokens should be set to true, if the elementHandler
+	 * registered for this state is capable of handling tokens.
+	 * @return a reference at this StateBuilder instance for method
+	 * chaining.
+	 */
+	StateBuilder &supportsTokens(bool supportsTokens);
+
 	/**
 	 * Returns a reference at the internal State instance that was built
 	 * using the StateBuilder.
@@ -275,7 +294,7 @@ public:
 	 * @param states is a list of states that should be checked.
 	 */
 	StateDeductor(std::vector<const Rtti *> signature,
-	                    std::vector<const State *> states);
+	              std::vector<const State *> states);
 
 	/**
 	 * Selects all active states from the given states. Only considers those
-- 
cgit v1.2.3


From 5d6ee07995c7f59e66e0df558c8ebe7d2a8d1f68 Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:52:13 +0100
Subject: refactored SyntaxDescriptor to Token.hpp and added TokenDescriptor
 class.

---
 CMakeLists.txt                       |   1 +
 src/core/common/Token.cpp            |  14 ---
 src/core/common/Token.hpp            |  67 +-----------
 src/core/model/Syntax.cpp            |  58 +++++++++++
 src/core/model/Syntax.hpp            | 196 +++++++++++++++++++++++++++++++++++
 src/core/parser/stack/Callbacks.hpp  |   3 +-
 src/core/parser/stack/Handler.cpp    |   2 +-
 src/core/parser/stack/Handler.hpp    |   3 +-
 src/core/parser/stack/TokenStack.cpp |   4 +-
 src/core/parser/stack/TokenStack.hpp |   5 +-
 10 files changed, 266 insertions(+), 87 deletions(-)
 create mode 100644 src/core/model/Syntax.cpp
 create mode 100644 src/core/model/Syntax.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b206458..13de9ac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,6 +176,7 @@ ADD_LIBRARY(ousia_core
 	src/core/model/Project
 	src/core/model/RootNode
 	src/core/model/Style
+	src/core/model/Syntax
 	src/core/model/Typesystem
 	src/core/parser/Parser
 	src/core/parser/ParserContext
diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp
index e454ae4..17ce03e 100644
--- a/src/core/common/Token.cpp
+++ b/src/core/common/Token.cpp
@@ -20,19 +20,5 @@
 
 namespace ousia {
 
-/* Class TokenSyntaxDescriptor */
-
-void TokenSyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const
-{
-	if (start != Tokens::Empty) {
-		set.insert(start);
-	}
-	if (end != Tokens::Empty) {
-		set.insert(end);
-	}
-	if (shortForm != Tokens::Empty) {
-		set.insert(shortForm);
-	}
-}
 }
 
diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
index f89a0ce..f37151f 100644
--- a/src/core/common/Token.hpp
+++ b/src/core/common/Token.hpp
@@ -173,71 +173,6 @@ struct Token {
 	const SourceLocation &getLocation() const { return location; }
 };
 
-/**
- * Class describing the user defined syntax for a single field or annotation.
- */
-struct TokenSyntaxDescriptor {
-	/**
-	 * Possible start token or Tokens::Empty if no token is set.
-	 */
-	TokenId start;
-
-	/**
-	 * Possible end token or Tokens::Empty if no token is set.
-	 */
-	TokenId end;
-
-	/**
-	 * Possible representation token or Tokens::Empty if no token is set.
-	 */
-	TokenId shortForm;
-
-	/**
-	 * Flag specifying whether this TokenSyntaxDescriptor describes an
-	 * annotation.
-	 */
-	bool isAnnotation;
-
-	/**
-	 * Default constructor, sets all token ids to Tokens::Empty and isAnnotation
-	 * to false.
-	 */
-	TokenSyntaxDescriptor()
-	    : start(Tokens::Empty),
-	      end(Tokens::Empty),
-	      shortForm(Tokens::Empty),
-	      isAnnotation(false)
-	{
-	}
-
-	/**
-	 * Member initializer constructor.
-	 *
-	 * @param start is a possible start token.
-	 * @param end is a possible end token.
-	 * @param shortForm is a possible short form token.
-	 * @param isAnnotation is set to true if this syntax descriptor describes an
-	 * annotation.
-	 */
-	TokenSyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm,
-	                      bool isAnnotation)
-	    : start(start),
-	      end(end),
-	      shortForm(shortForm),
-	      isAnnotation(isAnnotation)
-	{
-	}
-
-	/**
-	 * Inserts all tokens referenced in this TokenSyntaxDescriptor into the
-	 * given TokenSet. Skips token ids set to Tokens::Empty.
-	 *
-	 * @param set is the TokenSet instance into which the Tokens should be
-	 * inserted.
-	 */
-	void insertIntoTokenSet(TokenSet &set) const;
-};
 }
 
-#endif /* _OUSIA_TOKENS_HPP_ */
-
+#endif /* _OUSIA_TOKENS_HPP_ */
\ No newline at end of file
diff --git a/src/core/model/Syntax.cpp b/src/core/model/Syntax.cpp
new file mode 100644
index 0000000..9dbaccc
--- /dev/null
+++ b/src/core/model/Syntax.cpp
@@ -0,0 +1,58 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Syntax.hpp"
+
+#include "Domain.hpp"
+
+namespace ousia {
+
+/* Class TokenSyntaxDescriptor */
+
+bool SyntaxDescriptor::isAnnotation() const
+{
+	return descriptor->isa(&RttiTypes::AnnotationClass);
+}
+bool SyntaxDescriptor::isFieldDescriptor() const
+{
+	return descriptor->isa(&RttiTypes::FieldDescriptor);
+}
+bool SyntaxDescriptor::isStruct() const
+{
+	return descriptor->isa(&RttiTypes::StructuredClass);
+}
+
+void SyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const
+{
+	if (start != Tokens::Empty) {
+		set.insert(start);
+	}
+	if (end != Tokens::Empty) {
+		set.insert(end);
+	}
+	if (shortForm != Tokens::Empty) {
+		set.insert(shortForm);
+	}
+}
+
+bool SyntaxDescriptor::isEmpty() const
+{
+	return start == Tokens::Empty && end == Tokens::Empty &&
+	       shortForm == Tokens::Empty;
+}
+}
\ No newline at end of file
diff --git a/src/core/model/Syntax.hpp b/src/core/model/Syntax.hpp
new file mode 100644
index 0000000..4da3408
--- /dev/null
+++ b/src/core/model/Syntax.hpp
@@ -0,0 +1,196 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Syntax.hpp
+ *
+ * This header contains the Descriptor classes for user definable syntax for
+ * Document entities or fields. These classes are referenced in Ontology.hpp.
+ */
+
+#ifndef _OUSIA_MODEL_SYNTAX_HPP_
+#define _OUSIA_MODEL_SYNTAX_HPP_
+
+#include <core/common/Token.hpp>
+#include "Node.hpp"
+
+namespace ousia {
+
+/**
+ * Class to describe a single token that shall be used as user-defined syntax.
+ */
+struct TokenDescriptor {
+	/**
+	 * The string content of this token, if it is not a special one.
+	 */
+	std::string token;
+	/**
+	 * A flag to be set true if this TokenDescriptor uses a special token.
+	 */
+	bool special;
+	/**
+	 * An id to uniquely identify this token.
+	 */
+	TokenId id;
+
+	/**
+	 * Constructor for non-special tokens. The special flag is set to false and
+	 * the id to Tokens::Empty.
+	 *
+	 * @param token The string content of this token, if it is not a special
+	 *              one.
+	 */
+	TokenDescriptor(std::string token = std::string())
+	    : token(std::move(token)), special(false), id(Tokens::Empty)
+	{
+	}
+
+	/**
+	 * Constructor for special tokens. The token is set to an empty string and
+	 * the special flag to true.
+	 *
+	 * @param id the id of the special token.
+	 */
+	TokenDescriptor(TokenId id) : special(true), id(id) {}
+
+	/**
+	 * Returns true if and only if neither a string nor an ID is given.
+	 *
+	 * @return true if and only if neither a string nor an ID is given.
+	 */
+	bool isEmpty() const { return token.empty() && id == Tokens::Empty; }
+};
+
+/**
+ * Class describing the user defined syntax for a StructuredClass,
+ * AnnotationClass or FieldDescriptor.
+ *
+ * This class is used during parsing of a Document. It is used to describe
+ * the tokens relevant for one Descriptor that could be created at this point
+ * during parsing.
+ */
+struct SyntaxDescriptor {
+	/**
+	 * Possible start token or Tokens::Empty if no token is set.
+	 */
+	TokenId start;
+
+	/**
+	 * Possible end token or Tokens::Empty if no token is set.
+	 */
+	TokenId end;
+
+	/**
+	 * Possible representation token or Tokens::Empty if no token is set.
+	 */
+	TokenId shortForm;
+
+	/*
+	 * The Descriptor this SyntaxDescriptor belongs to. As this may be
+	 * a FieldDescriptor as well as a class Descriptor (StructuredClass or
+	 * AnnotationClass) we can only use the class Node as inner argument here.
+	 */
+	Rooted<Node> descriptor;
+	/*
+	 * Given the current leaf in the parsed document the depth of a
+	 * SyntaxDescriptor is defined as the number of transparent elements that
+	 * would be needed to construct an instance of the referenced descriptor.
+	 */
+	ssize_t depth;
+
+	/**
+	 * Default constructor, sets all token ids to Tokens::Empty and the
+	 * descriptor handle to nullptr.
+	 */
+	SyntaxDescriptor()
+	    : start(Tokens::Empty),
+	      end(Tokens::Empty),
+	      shortForm(Tokens::Empty),
+	      descriptor(nullptr),
+	      depth(-1)
+	{
+	}
+
+	/**
+	 * Member initializer constructor.
+	 *
+	 * @param start is a possible start token.
+	 * @param end is a possible end token.
+	 * @param shortForm is a possible short form token.
+	 * @param descriptor The Descriptor this SyntaxDescriptor belongs to.
+	 * @param depth Given the current leaf in the parsed document the depth of a
+	 * SyntaxDescriptor is defined as the number of transparent elements that
+	 * would be needed to construct an instance of the referenced descriptor.
+	 */
+	SyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm,
+	                 Handle<Node> descriptor, ssize_t depth)
+	    : start(start),
+	      end(end),
+	      shortForm(shortForm),
+	      descriptor(descriptor),
+	      depth(depth)
+	{
+	}
+
+	/**
+	 * Inserts all tokens referenced in this SyntaxDescriptor into the
+	 * given TokenSet. Skips token ids set to Tokens::Empty.
+	 *
+	 * @param set is the TokenSet instance into which the Tokens should be
+	 * inserted.
+	 */
+	void insertIntoTokenSet(TokenSet &set) const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to an
+	 * AnnotationClass.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to an
+	 * AnnotationClass.
+	 */
+	bool isAnnotation() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to a
+	 * StrcturedClass.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to a
+	 * StrcturedClass.
+	 */
+	bool isStruct() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to a
+	 * FieldDescriptor.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to a
+	 * FieldDescriptor.
+	 */
+	bool isFieldDescriptor() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor has only empty
+	 * entries in start, end and short.
+	 *
+	 * @return true if and only if this SyntaxDescriptor has only empty
+	 * entries in start, end and short.
+	 */
+	bool isEmpty() const;
+};
+}
+#endif
\ No newline at end of file
diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp
index d7b2547..e471881 100644
--- a/src/core/parser/stack/Callbacks.hpp
+++ b/src/core/parser/stack/Callbacks.hpp
@@ -34,6 +34,7 @@
 
 #include <core/common/Whitespace.hpp>
 #include <core/common/Token.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
 
@@ -96,7 +97,7 @@ public:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
 
 	/**
 	 * Removes the previously pushed list of tokens from the stack.
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
index 734976a..12df0fd 100644
--- a/src/core/parser/stack/Handler.cpp
+++ b/src/core/parser/stack/Handler.cpp
@@ -74,7 +74,7 @@ Variant Handler::readData()
 	return handlerData.callbacks.readData();
 }
 
-void Handler::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
+void Handler::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
 {
 	handlerData.callbacks.pushTokens(tokens);
 }
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 848d395..19660d0 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -24,6 +24,7 @@
 #include <core/common/Location.hpp>
 #include <core/common/Variant.hpp>
 #include <core/common/Whitespace.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
 
@@ -200,7 +201,7 @@ protected:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
 
 	/**
 	 * Calls the corresponding function in the HandlerCallbacks instance.
diff --git a/src/core/parser/stack/TokenStack.cpp b/src/core/parser/stack/TokenStack.cpp
index 6afeaed..ac1d94e 100644
--- a/src/core/parser/stack/TokenStack.cpp
+++ b/src/core/parser/stack/TokenStack.cpp
@@ -21,7 +21,7 @@
 namespace ousia {
 namespace parser_stack {
 
-void TokenStack::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
+void TokenStack::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
 {
 	stack.push_back(tokens);
 }
@@ -35,7 +35,7 @@ TokenSet TokenStack::tokens() const
 	}
 
 	TokenSet res;
-	for (const TokenSyntaxDescriptor &descr : stack.back()) {
+	for (const SyntaxDescriptor &descr : stack.back()) {
 		descr.insertIntoTokenSet(res);
 	}
 	return res;
diff --git a/src/core/parser/stack/TokenStack.hpp b/src/core/parser/stack/TokenStack.hpp
index 9669f50..af734bb 100644
--- a/src/core/parser/stack/TokenStack.hpp
+++ b/src/core/parser/stack/TokenStack.hpp
@@ -32,6 +32,7 @@
 #include <vector>
 
 #include <core/common/Token.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
 namespace parser_stack {
@@ -52,7 +53,7 @@ private:
 	 * Stack containing vectors of TokenSyntaxDescriptor instances as given by
 	 * the user.
 	 */
-	std::vector<std::vector<TokenSyntaxDescriptor>> stack;
+	std::vector<std::vector<SyntaxDescriptor>> stack;
 
 	/**
 	 * Constructor of the TokenStack class.
@@ -86,7 +87,7 @@ public:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
 
 	/**
 	 * Removes the previously pushed list of tokens from the stack.
-- 
cgit v1.2.3


From 522580cfdfc9e6dc3448240448c29533e68f240f Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:52:34 +0100
Subject: added check for witespace characters in Utils::isUserDefinedToken

---
 src/core/common/Utils.cpp      | 15 +++++++++++----
 src/core/common/Utils.hpp      |  1 +
 test/core/common/UtilsTest.cpp |  2 ++
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 219b437..a87ff6d 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -124,7 +124,8 @@ bool Utils::isUserDefinedToken(const std::string &token)
 	// Make sure the token meets is neither empty, nor starts or ends with an
 	// alphanumeric character
 	const size_t len = token.size();
-	if (len == 0 || isAlphanumeric(token[0]) || isAlphanumeric(token[len - 1])) {
+	if (len == 0 || isAlphanumeric(token[0]) ||
+	    isAlphanumeric(token[len - 1])) {
 		return false;
 	}
 
@@ -134,13 +135,19 @@ bool Utils::isUserDefinedToken(const std::string &token)
 		return false;
 	}
 
+	// Make sure the token does not contain any whitespaces.
+	for (char c : token) {
+		if (isWhitespace(c)) {
+			return false;
+		}
+	}
+
 	// Make sure the token contains other characters but { and }
-	for (char c: token) {
+	for (char c : token) {
 		if (c != '{' && c != '}') {
 			return true;
 		}
 	}
 	return false;
 }
-}
-
+}
\ No newline at end of file
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 25a4de5..d9e26da 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -117,6 +117,7 @@ public:
 	 *        <li>'%', '%{', '}%'</li>
 	 *      </ul>
 	 *   </li>
+	 *   <li>The token does not contain any whitespaces.</li>
 	 * </ul>
 	 */
 	static bool isUserDefinedToken(const std::string &token);
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 54890ee..2aaa430 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -148,6 +148,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_TRUE(Utils::isUserDefinedToken("`"));
 	EXPECT_TRUE(Utils::isUserDefinedToken("<"));
 	EXPECT_TRUE(Utils::isUserDefinedToken(">"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<+>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:a"));
 	EXPECT_FALSE(Utils::isUserDefinedToken(":a"));
@@ -158,6 +159,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_FALSE(Utils::isUserDefinedToken("<\\"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("\\>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("{!"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("< + >"));
 }
 
 }
-- 
cgit v1.2.3


From ee943c5e9b60cf577ff236a694df180db89b0972 Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:53:20 +0100
Subject: integrated syntax tokens in Domain.

---
 src/core/model/Domain.cpp      | 193 +++++++++++++++++++++++---
 src/core/model/Domain.hpp      | 297 +++++++++++++++++++++++++++++++++++++----
 test/core/model/DomainTest.cpp | 165 ++++++++++++++++++++++-
 3 files changed, 607 insertions(+), 48 deletions(-)

diff --git a/src/core/model/Domain.cpp b/src/core/model/Domain.cpp
index 8255401..587a382 100644
--- a/src/core/model/Domain.cpp
+++ b/src/core/model/Domain.cpp
@@ -20,8 +20,9 @@
 #include <queue>
 #include <set>
 
-#include <core/common/RttiBuilder.hpp>
 #include <core/common/Exceptions.hpp>
+#include <core/common/RttiBuilder.hpp>
+#include <core/common/Utils.hpp>
 
 #include "Domain.hpp"
 
@@ -169,52 +170,60 @@ static NodeVector<Node> pathTo(const Node *start, Logger &logger,
 	return shortest;
 }
 
+struct CollectState {
+	Node *n;
+	size_t depth;
+
+	CollectState(Node *n, size_t depth) : n(n), depth(depth) {}
+};
+
 template <typename F>
 static NodeVector<Node> collect(const Node *start, F match)
 {
 	// result
 	NodeVector<Node> res;
 	// queue for breadth-first search of graph.
-	std::queue<Rooted<Node>> q;
+	std::queue<CollectState> q;
 	// put the initial node on the stack.
-	q.push(const_cast<Node *>(start));
+	q.push(CollectState(const_cast<Node *>(start), 0));
 	// set of visited nodes.
 	std::unordered_set<const Node *> visited;
 	while (!q.empty()) {
-		Rooted<Node> n = q.front();
+		CollectState state = q.front();
 		q.pop();
 		// do not proceed if this node was already visited.
-		if (!visited.insert(n.get()).second) {
+		if (!visited.insert(state.n).second) {
 			continue;
 		}
 
-		if (n->isa(&RttiTypes::StructuredClass)) {
-			Rooted<StructuredClass> strct = n.cast<StructuredClass>();
+		if (state.n->isa(&RttiTypes::Descriptor)) {
+			Rooted<Descriptor> strct{static_cast<Descriptor *>(state.n)};
 
 			// look through all fields.
 			NodeVector<FieldDescriptor> fields = strct->getFieldDescriptors();
 			for (auto fd : fields) {
 				// note matches.
-				if (match(fd)) {
+				if (match(fd, state.depth)) {
 					res.push_back(fd);
 				}
 				// only continue in the TREE field.
 				if (fd->getFieldType() == FieldDescriptor::FieldType::TREE) {
-					q.push(fd);
+					q.push(CollectState(fd.get(), state.depth));
 				}
 			}
 		} else {
 			// otherwise this is a FieldDescriptor.
-			Rooted<FieldDescriptor> field = n.cast<FieldDescriptor>();
+			Rooted<FieldDescriptor> field{
+			    static_cast<FieldDescriptor *>(state.n)};
 			// and we proceed by visiting all permitted children.
 			for (auto c : field->getChildrenWithSubclasses()) {
 				// note matches.
-				if (match(c)) {
+				if (match(c, state.depth)) {
 					res.push_back(c);
 				}
 				// We only continue our search via transparent children.
 				if (c->isTransparent()) {
-					q.push(c);
+					q.push(CollectState(c.get(), state.depth + 1));
 				}
 			}
 		}
@@ -222,28 +231,59 @@ static NodeVector<Node> collect(const Node *start, F match)
 	return res;
 }
 
+static std::vector<SyntaxDescriptor> collectPermittedTokens(
+    const Node *start, Handle<Domain> domain)
+{
+	// gather SyntaxDescriptors for structure children first.
+	std::vector<SyntaxDescriptor> res;
+	collect(start, [&res](Handle<Node> n, size_t depth) {
+		SyntaxDescriptor stx;
+		if (n->isa(&RttiTypes::FieldDescriptor)) {
+			stx = n.cast<FieldDescriptor>()->getSyntaxDescriptor(depth);
+		} else {
+			stx = n.cast<Descriptor>()->getSyntaxDescriptor(depth);
+		}
+		// do not add trivial SyntaxDescriptors.
+		if (!stx.isEmpty()) {
+			res.push_back(stx);
+		}
+		return false;
+	});
+	// gather SyntaxDescriptors for AnnotationClasses.
+	for (auto a : domain->getAnnotationClasses()) {
+		SyntaxDescriptor stx = a->getSyntaxDescriptor();
+		if (!stx.isEmpty()) {
+			res.push_back(stx);
+		}
+	}
+	return res;
+}
+
 /* Class FieldDescriptor */
 
 FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Type> primitiveType,
                                  Handle<Descriptor> parent, FieldType fieldType,
-                                 std::string name, bool optional)
+                                 std::string name, bool optional,
+                                 WhitespaceMode whitespaceMode)
     : Node(mgr, std::move(name), parent),
       children(this),
       fieldType(fieldType),
       primitiveType(acquire(primitiveType)),
       optional(optional),
-      primitive(true)
+      primitive(true),
+      whitespaceMode(whitespaceMode)
 {
 }
 
 FieldDescriptor::FieldDescriptor(Manager &mgr, Handle<Descriptor> parent,
                                  FieldType fieldType, std::string name,
-                                 bool optional)
+                                 bool optional, WhitespaceMode whitespaceMode)
     : Node(mgr, std::move(name), parent),
       children(this),
       fieldType(fieldType),
       optional(optional),
-      primitive(false)
+      primitive(false),
+      whitespaceMode(whitespaceMode)
 {
 }
 
@@ -272,6 +312,25 @@ bool FieldDescriptor::doValidate(Logger &logger) const
 	} else {
 		valid = valid & validateName(logger);
 	}
+	// check start and end token.
+	if (!startToken.special && !startToken.token.empty() &&
+	    !Utils::isUserDefinedToken(startToken.token)) {
+		// TODO: Correct error message.
+		logger.error(std::string("Field \"") + getName() +
+		                 "\" has an invalid custom start token: " +
+		                 startToken.token,
+		             *this);
+		valid = false;
+	}
+	if (!endToken.special && !endToken.token.empty() &&
+	    !Utils::isUserDefinedToken(endToken.token)) {
+		// TODO: Correct error message.
+		logger.error(std::string("Field \"") + getName() +
+		                 "\" has an invalid custom end token: " +
+		                 endToken.token,
+		             *this);
+		valid = false;
+	}
 
 	// check consistency of FieldType with the rest of the FieldDescriptor.
 	if (primitive) {
@@ -325,7 +384,7 @@ bool FieldDescriptor::doValidate(Logger &logger) const
 }
 
 static void gatherSubclasses(
-    std::unordered_set<const StructuredClass *>& visited,
+    std::unordered_set<const StructuredClass *> &visited,
     NodeVector<StructuredClass> &res, Handle<StructuredClass> strct)
 {
 	// this check is to prevent cycles.
@@ -334,7 +393,7 @@ static void gatherSubclasses(
 	}
 	for (auto sub : strct->getSubclasses()) {
 		// this check is to prevent cycles.
-		if(visited.count(sub.get())){
+		if (visited.count(sub.get())) {
 			continue;
 		}
 		res.push_back(sub);
@@ -381,7 +440,7 @@ NodeVector<Node> FieldDescriptor::pathTo(Handle<FieldDescriptor> field,
 NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		if (!n->isa(&RttiTypes::FieldDescriptor)) {
 			return false;
 		}
@@ -396,6 +455,16 @@ NodeVector<FieldDescriptor> FieldDescriptor::getDefaultFields() const
 	return res;
 }
 
+std::vector<SyntaxDescriptor> FieldDescriptor::getPermittedTokens() const
+{
+	if (getParent() == nullptr ||
+	    getParent().cast<Descriptor>()->getParent() == nullptr) {
+		return std::vector<SyntaxDescriptor>();
+	}
+	return collectPermittedTokens(
+	    this, getParent().cast<Descriptor>()->getParent().cast<Domain>());
+}
+
 /* Class Descriptor */
 
 void Descriptor::doResolve(ResolutionState &state)
@@ -443,6 +512,25 @@ bool Descriptor::doValidate(Logger &logger) const
 		}
 		valid = valid & attributesDescriptor->validate(logger);
 	}
+
+	// check start and end token.
+	if (!startToken.special && !startToken.token.empty() &&
+	    !Utils::isUserDefinedToken(startToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom start token: " +
+		                 startToken.token,
+		             *this);
+		valid = false;
+	}
+	if (!endToken.special && !endToken.token.empty() &&
+	    !Utils::isUserDefinedToken(endToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom end token: " +
+		                 endToken.token,
+		             *this);
+		valid = false;
+	}
+
 	// check that only one FieldDescriptor is of type TREE.
 	auto fds = Descriptor::getFieldDescriptors();
 	bool hasTREE = false;
@@ -483,7 +571,7 @@ std::pair<NodeVector<Node>, bool> Descriptor::pathTo(
 NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		if (!n->isa(&RttiTypes::FieldDescriptor)) {
 			return false;
 		}
@@ -501,7 +589,7 @@ NodeVector<FieldDescriptor> Descriptor::getDefaultFields() const
 NodeVector<StructuredClass> Descriptor::getPermittedChildren() const
 {
 	// TODO: In principle a cast would be nicer here, but for now we copy.
-	NodeVector<Node> nodes = collect(this, [](Handle<Node> n) {
+	NodeVector<Node> nodes = collect(this, [](Handle<Node> n, size_t depth) {
 		return n->isa(&RttiTypes::StructuredClass);
 	});
 	NodeVector<StructuredClass> res;
@@ -669,6 +757,14 @@ std::pair<Rooted<FieldDescriptor>, bool> Descriptor::createFieldDescriptor(
 	return std::make_pair(fd, sorted);
 }
 
+std::vector<SyntaxDescriptor> Descriptor::getPermittedTokens() const
+{
+	if (getParent() == nullptr) {
+		return std::vector<SyntaxDescriptor>();
+	}
+	return collectPermittedTokens(this, getParent().cast<Domain>());
+}
+
 /* Class StructuredClass */
 
 StructuredClass::StructuredClass(Manager &mgr, std::string name,
@@ -709,6 +805,16 @@ bool StructuredClass::doValidate(Logger &logger) const
 		logger.error(cardinality.toString() + " is not a cardinality!", *this);
 		valid = false;
 	}
+
+	// check short token.
+	if (!shortToken.special && !shortToken.token.empty() &&
+	    !Utils::isUserDefinedToken(shortToken.token)) {
+		logger.error(std::string("Descriptor \"") + getName() +
+		                 "\" has an invalid custom short form token: " +
+		                 shortToken.token,
+		             *this);
+		valid = false;
+	}
 	// check the validity of this superclass.
 	if (superclass != nullptr) {
 		valid = valid & superclass->validate(logger);
@@ -961,6 +1067,51 @@ Rooted<AnnotationClass> Domain::createAnnotationClass(std::string name)
 	    new AnnotationClass(getManager(), std::move(name), this)};
 }
 
+static void gatherTokenDescriptors(
+    Handle<Descriptor> desc, std::vector<TokenDescriptor *> &res,
+    std::unordered_set<FieldDescriptor *> &visited)
+{
+	// add the TokenDescriptors for the Descriptor itself.
+	if (!desc->getStartToken().isEmpty()) {
+		res.push_back(desc->getStartTokenPointer());
+	}
+	if (!desc->getEndToken().isEmpty()) {
+		res.push_back(desc->getEndTokenPointer());
+	}
+	// add the TokenDescriptors for its FieldDescriptors.
+	for (auto fd : desc->getFieldDescriptors()) {
+		if (!visited.insert(fd.get()).second) {
+			continue;
+		}
+		if (!fd->getStartToken().isEmpty()) {
+			res.push_back(fd->getStartTokenPointer());
+		}
+		if (!fd->getEndToken().isEmpty()) {
+			res.push_back(fd->getEndTokenPointer());
+		}
+	}
+}
+
+std::vector<TokenDescriptor *> Domain::getAllTokenDescriptors() const
+{
+	std::vector<TokenDescriptor *> res;
+	// note all fields that are already visited because FieldReferences might
+	// lead to doubled fields.
+	std::unordered_set<FieldDescriptor *> visited;
+	// add the TokenDescriptors for the StructuredClasses (and their fields).
+	for (auto s : structuredClasses) {
+		if (!s->getShortToken().isEmpty()) {
+			res.push_back(s->getShortTokenPointer());
+		}
+		gatherTokenDescriptors(s, res, visited);
+	}
+	// add the TokenDescriptors for the AnnotationClasses (and their fields).
+	for (auto a : annotationClasses) {
+		gatherTokenDescriptors(a, res, visited);
+	}
+	return res;
+}
+
 /* Type registrations */
 
 namespace RttiTypes {
diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp
index 7e10d91..e984ed9 100644
--- a/src/core/model/Domain.hpp
+++ b/src/core/model/Domain.hpp
@@ -167,11 +167,13 @@
 #ifndef _OUSIA_MODEL_DOMAIN_HPP_
 #define _OUSIA_MODEL_DOMAIN_HPP_
 
+#include <core/common/Whitespace.hpp>
 #include <core/managed/ManagedContainer.hpp>
 #include <core/RangeSet.hpp>
 
 #include "Node.hpp"
 #include "RootNode.hpp"
+#include "Syntax.hpp"
 #include "Typesystem.hpp"
 
 namespace ousia {
@@ -225,6 +227,9 @@ private:
 	Owned<Type> primitiveType;
 	bool optional;
 	bool primitive;
+	TokenDescriptor startToken;
+	TokenDescriptor endToken;
+	WhitespaceMode whitespaceMode;
 
 protected:
 	bool doValidate(Logger &logger) const override;
@@ -233,39 +238,46 @@ public:
 	/**
 	 * This is the constructor for primitive fields.
 	 *
-	 * @param mgr           is the global Manager instance.
-	 * @param parent        is a handle of the Descriptor node that has this
-	 *                      FieldDescriptor.
-	 * @param primitiveType is a handle to some Type in some Typesystem of which
-	 *                      one instance is allowed to fill this field.
-	 * @param name          is the name of this field.
-	 * @param optional      should be set to 'false' is this field needs to be
-	 *                      filled in order for an instance of the parent
-	 *                      Descriptor to be valid.
+	 * @param mgr            is the global Manager instance.
+	 * @param parent         is a handle of the Descriptor node that has this
+	 *                       FieldDescriptor.
+	 * @param primitiveType  is a handle to some Type in some Typesystem of
+	 *which
+	 *                       one instance is allowed to fill this field.
+	 * @param name           is the name of this field.
+	 * @param optional       should be set to 'false' is this field needs to be
+	 *                       filled in order for an instance of the parent
+	 *                       Descriptor to be valid.
+	 * @param whitespaceMode the WhitespaceMode to be used when an instance of
+	 *                       this FieldDescriptor is parsed.
 	 */
 	FieldDescriptor(Manager &mgr, Handle<Type> primitiveType,
 	                Handle<Descriptor> parent,
 	                FieldType fieldType = FieldType::TREE,
-	                std::string name = "", bool optional = false);
+	                std::string name = "", bool optional = false,
+	                WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * This is the constructor for non-primitive fields. You have to provide
 	 * children here later on.
 	 *
-	 * @param mgr           is the global Manager instance.
-	 * @param parent        is a handle of the Descriptor node that has this
-	 *                      FieldDescriptor.
-	 * @param fieldType     is the FieldType of this FieldDescriptor, either
-	 *                      TREE for the main or default structure or SUBTREE
-	 *                      for supporting structures.
-	 * @param name          is the name of this field.
-	 * @param optional      should be set to 'false' is this field needs to be
-	 *                      filled in order for an instance of the parent
-	 *                      Descriptor to be valid.
+	 * @param mgr            is the global Manager instance.
+	 * @param parent         is a handle of the Descriptor node that has this
+	 *                       FieldDescriptor.
+	 * @param fieldType      is the FieldType of this FieldDescriptor, either
+	 *                       TREE for the main or default structure or SUBTREE
+	 *                       for supporting structures.
+	 * @param name           is the name of this field.
+	 * @param optional       should be set to 'false' is this field needs to be
+	 *                       filled in order for an instance of the parent
+	 *                       Descriptor to be valid.
+	 * @param whitespaceMode the WhitespaceMode to be used when an instance of
+	 *                       this FieldDescriptor is parsed.
 	 */
 	FieldDescriptor(Manager &mgr, Handle<Descriptor> parent = nullptr,
 	                FieldType fieldType = FieldType::TREE,
-	                std::string name = "", bool optional = false);
+	                std::string name = "", bool optional = false,
+	                WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
 
 	/**
 	 * Returns a const reference to the NodeVector of StructuredClasses whose
@@ -437,6 +449,109 @@ public:
 	 *         children of an instance of this Descriptor.
 	 */
 	NodeVector<FieldDescriptor> getDefaultFields() const;
+
+	/**
+	 * Returns a pointer to the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * Note that this does not invalidate the FieldDescriptor. So use with
+	 * care.
+	 *
+	 * @return a pointer to the start TokenDescriptor.
+	 */
+	TokenDescriptor *getStartTokenPointer() { return &startToken; }
+
+	/**
+	 * Returns a copy of the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a copy of the start TokenDescriptor.
+	 */
+	TokenDescriptor getStartToken() const { return startToken; }
+
+	/**
+	 * Sets the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @param st the new start TokenDescriptor.
+	 */
+	void setStartToken(TokenDescriptor st)
+	{
+		invalidate();
+		startToken = st;
+	}
+
+	/**
+	 * Returns a pointer to the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a pointer to the end TokenDescriptor.
+	 */
+	TokenDescriptor *getEndTokenPointer() { return &endToken; }
+
+	/**
+	 * Returns a copy of the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a copy of the end TokenDescriptor.
+	 */
+	TokenDescriptor getEndToken() const { return endToken; }
+
+	/**
+	 * Sets the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @param e the new end TokenDescriptor.
+	 */
+	void setEndToken(TokenDescriptor e)
+	{
+		invalidate();
+		endToken = e;
+	}
+
+	/**
+	 * Returns the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 *
+	 * @return the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 */
+	WhitespaceMode getWhitespaceMode() const { return whitespaceMode; }
+
+	/**
+	 * Sets the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 *
+	 * @param wm the WhitespaceMode to be used when an instance of this
+	 * FieldDescriptor is parsed.
+	 */
+	WhitespaceMode setWhitespaceMode(WhitespaceMode wm)
+	{
+		return whitespaceMode = wm;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this FieldDescriptor.
+	 *
+	 * @return the SyntaxDescriptor for this FieldDescriptor.
+	 */
+	SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1)
+	{
+		SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty,
+		                     const_cast<FieldDescriptor *>(this), depth};
+		return stx;
+	}
+
+	/**
+	 * Returns a vector of SyntaxDescriptors, one for each Descriptor
+	 * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is
+	 * permitted as child of this FieldDescriptor. This also makes use
+	 * of transparency.
+	 *
+	 * @return a vector of SyntaxDescriptors, one for each Descriptor that is
+	 *         permitted as child of this FieldDescriptor
+	 */
+	std::vector<SyntaxDescriptor> getPermittedTokens() const;
 };
 
 /**
@@ -460,7 +575,10 @@ public:
  * </A>
  * \endcode
  *
- * key="value" inside the A-node would be an attribute, while <key>value</key>
+ * key="value" inside the A-node would be an attribute, while
+ * \code{.xml}
+ *   <key>value</key>
+ * \endcode
  * would be a primitive field. While equivalent in XML the semantics are
  * different: An attribute describes indeed attributes, features of one single
  * node whereas a primitive field describes the _content_ of a node.
@@ -472,6 +590,8 @@ class Descriptor : public Node {
 private:
 	Owned<StructType> attributesDescriptor;
 	NodeVector<FieldDescriptor> fieldDescriptors;
+	TokenDescriptor startToken;
+	TokenDescriptor endToken;
 
 	bool addAndSortFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger);
 
@@ -720,6 +840,85 @@ public:
 	 *         of an instance of this Descriptor in the structure tree.
 	 */
 	NodeVector<StructuredClass> getPermittedChildren() const;
+
+	/**
+	 * Returns a pointer to the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a pointer to the start TokenDescriptor.
+	 */
+	TokenDescriptor *getStartTokenPointer() { return &startToken; }
+
+	/**
+	 * Returns a copy of the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @return a copy of the start TokenDescriptor.
+	 */
+	TokenDescriptor getStartToken() const { return startToken; }
+
+	/**
+	 * Sets the start TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor starts.
+	 *
+	 * @param st the new start TokenDescriptor.
+	 */
+	void setStartToken(TokenDescriptor st)
+	{
+		invalidate();
+		startToken = st;
+	}
+
+	/**
+	 * Returns a pointer to the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a pointer to the end TokenDescriptor.
+	 */
+	TokenDescriptor *getEndTokenPointer() { return &endToken; }
+
+	/**
+	 * Returns a copy of the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @return a copy of the end TokenDescriptor.
+	 */
+	TokenDescriptor getEndToken() const { return endToken; }
+
+	/**
+	 * Sets the end TokenDescriptor. This Token is used as a
+	 * signifier during parsing that an instance of this FieldDescriptor ends.
+	 *
+	 * @param e the new end TokenDescriptor.
+	 */
+	void setEndToken(TokenDescriptor e)
+	{
+		invalidate();
+		endToken = e;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this Descriptor.
+	 *
+	 * @return the SyntaxDescriptor for this Descriptor.
+	 */
+	virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1)
+	{
+		SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty,
+		                     const_cast<Descriptor *>(this), depth};
+		return stx;
+	}
+
+	/**
+	 * Returns a vector of SyntaxDescriptors, one for each Descriptor
+	 * (StructuredClasses, AnnotationClasses or FieldDescriptors) that is
+	 * permitted as child of this Descriptor. This also makes use
+	 * of transparency.
+	 *
+	 * @return a vector of SyntaxDescriptors, one for each Descriptor that is
+	 *         permitted as child of this Descriptor.
+	 */
+	std::vector<SyntaxDescriptor> getPermittedTokens() const;
 };
 /*
  * TODO: We should discuss Cardinalities one more time. Is it smart to define
@@ -806,6 +1005,7 @@ private:
 	NodeVector<StructuredClass> subclasses;
 	bool transparent;
 	bool root;
+	TokenDescriptor shortToken;
 
 	/**
 	 * Helper method for getFieldDescriptors.
@@ -963,6 +1163,50 @@ public:
 		invalidate();
 		root = std::move(r);
 	}
+
+	/**
+	 * Returns a pointer to the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @return a pointer to the short TokenDescriptor.
+	 */
+	TokenDescriptor *getShortTokenPointer() { return &shortToken; }
+
+	/**
+	 * Returns a copy of the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @return a copy of the short TokenDescriptor.
+	 */
+	TokenDescriptor getShortToken() const { return shortToken; }
+
+	/**
+	 * Sets the short TokenDescriptor. During parsing an
+	 * occurence of this token will be translated to an empty instance of this
+	 * StructuredClass.
+	 *
+	 * @param s the new short TokenDescriptor.
+	 */
+	void setShortToken(TokenDescriptor s)
+	{
+		invalidate();
+		shortToken = s;
+	}
+
+	/**
+	 * Returns the SyntaxDescriptor for this StructuredClass.
+	 *
+	 * @return the SyntaxDescriptor for this StructuredClass.
+	 */
+	SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override
+	{
+		SyntaxDescriptor stx{getStartToken().id, getEndToken().id,
+		                     shortToken.id, const_cast<StructuredClass *>(this),
+		                     depth};
+		return stx;
+	}
 };
 
 /**
@@ -1188,6 +1432,13 @@ public:
 	{
 		domains.insert(domains.end(), ds.begin(), ds.end());
 	}
+
+	/**
+	 * Returns all TokenDescriptors of classes and fields in this Ontology.
+	 *
+	 * @return all TokenDescriptors of classes and fields in this Ontology.
+	 */
+	std::vector<TokenDescriptor *> getAllTokenDescriptors() const;
 };
 
 namespace RttiTypes {
@@ -1200,4 +1451,4 @@ extern const Rtti Domain;
 }
 }
 
-#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
+#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
\ No newline at end of file
diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp
index 6bbf26d..f59e745 100644
--- a/test/core/model/DomainTest.cpp
+++ b/test/core/model/DomainTest.cpp
@@ -82,9 +82,7 @@ TEST(Domain, testDomainResolving)
 }
 
 // i use this wrapper due to the strange behaviour of GTEST.
-static void assertFalse(bool b){
-	ASSERT_FALSE(b);
-}
+static void assertFalse(bool b) { ASSERT_FALSE(b); }
 
 static Rooted<FieldDescriptor> createUnsortedPrimitiveField(
     Handle<StructuredClass> strct, Handle<Type> type, Logger &logger, bool tree,
@@ -170,7 +168,6 @@ TEST(StructuredClass, getFieldDescriptors)
 	}
 }
 
-
 TEST(StructuredClass, getFieldDescriptorsCycles)
 {
 	Logger logger;
@@ -523,6 +520,91 @@ TEST(Descriptor, getPermittedChildrenCycles)
 	ASSERT_EQ(A, children[0]);
 }
 
+TEST(Descriptor, getSyntaxDescriptor)
+{
+	// build an ontology with some custom syntax.
+	Manager mgr{1};
+	Logger logger;
+	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};
+	// Construct the domain
+	Rooted<Domain> domain{new Domain(mgr, sys, "ontology")};
+	Rooted<StructuredClass> A{new StructuredClass(
+	    mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)};
+	A->setStartToken(TokenDescriptor(Tokens::Indent));
+	A->setEndToken(TokenDescriptor(Tokens::Dedent));
+	{
+		TokenDescriptor sh{"<+>"};
+		sh.id = 1;
+		A->setShortToken(sh);
+	}
+	// check the SyntaxDescriptor
+	SyntaxDescriptor stx = A->getSyntaxDescriptor();
+	ASSERT_EQ(Tokens::Indent, stx.start);
+	ASSERT_EQ(Tokens::Dedent, stx.end);
+	ASSERT_EQ(1, stx.shortForm);
+	ASSERT_EQ(A, stx.descriptor);
+	ASSERT_TRUE(stx.isStruct());
+	ASSERT_FALSE(stx.isAnnotation());
+	ASSERT_FALSE(stx.isFieldDescriptor());
+}
+
+TEST(Descriptor, getPermittedTokens)
+{
+	// build an ontology with some custom syntax.
+	Manager mgr{1};
+	Logger logger;
+	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};
+	// Construct the domain
+	Rooted<Domain> domain{new Domain(mgr, sys, "ontology")};
+	// add one StructuredClass with all tokens set.
+	Rooted<StructuredClass> A{new StructuredClass(
+	    mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)};
+	A->setStartToken(TokenDescriptor(Tokens::Indent));
+	A->setEndToken(TokenDescriptor(Tokens::Dedent));
+	{
+		TokenDescriptor sh{"<+>"};
+		sh.id = 1;
+		A->setShortToken(sh);
+	}
+	// add a field with one token set.
+	Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first;
+	A_field->setEndToken(TokenDescriptor(Tokens::Newline));
+	A_field->addChild(A);
+	// add an annotation with start and end set.
+	Rooted<AnnotationClass> A_anno = domain->createAnnotationClass("A");
+	{
+		TokenDescriptor start{"<"};
+		start.id = 7;
+		A_anno->setStartToken(start);
+	}
+	{
+		TokenDescriptor end{">"};
+		end.id = 8;
+		A_anno->setEndToken(end);
+	}
+	// add a trivial annotation, which should not be returned.
+	Rooted<AnnotationClass> B_anno = domain->createAnnotationClass("B");
+	ASSERT_TRUE(domain->validate(logger));
+
+	// check result.
+	std::vector<SyntaxDescriptor> stxs = A->getPermittedTokens();
+	ASSERT_EQ(3, stxs.size());
+	// the field should be first, because A itself should not be collected
+	// directly.
+	ASSERT_EQ(A_field, stxs[0].descriptor);
+	ASSERT_EQ(Tokens::Empty, stxs[0].start);
+	ASSERT_EQ(Tokens::Newline, stxs[0].end);
+	ASSERT_EQ(Tokens::Empty, stxs[0].shortForm);
+	ASSERT_EQ(A, stxs[1].descriptor);
+	ASSERT_EQ(Tokens::Indent, stxs[1].start);
+	ASSERT_EQ(Tokens::Dedent, stxs[1].end);
+	ASSERT_EQ(1, stxs[1].shortForm);
+	ASSERT_EQ(A_anno, stxs[2].descriptor);
+	ASSERT_EQ(7, stxs[2].start);
+	ASSERT_EQ(8, stxs[2].end);
+	ASSERT_EQ(Tokens::Empty, stxs[2].shortForm);
+}
+
 TEST(StructuredClass, isSubclassOf)
 {
 	// create an inheritance hierarchy.
@@ -629,6 +711,14 @@ TEST(Domain, validate)
 		base_field->setPrimitiveType(sys->getStringType());
 		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
 		ASSERT_TRUE(domain->validate(logger));
+		// add an invalid start token.
+		base_field->setStartToken(TokenDescriptor("< + >"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_FALSE(domain->validate(logger));
+		// make it valid.
+		base_field->setStartToken(TokenDescriptor("<"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_TRUE(domain->validate(logger));
 		// add a subclass for our base class.
 		Rooted<StructuredClass> sub{new StructuredClass(mgr, "sub", domain)};
 		// this should be valid in itself.
@@ -686,4 +776,71 @@ TEST(Domain, validate)
 		ASSERT_TRUE(domain->validate(logger));
 	}
 }
+
+TEST(Domain, getAllTokenDescriptors)
+{
+	// build an ontology with some custom syntax.
+	Manager mgr{1};
+	Logger logger;
+	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};
+	// Construct the domain
+	Rooted<Domain> domain{new Domain(mgr, sys, "ontology")};
+	// add one StructuredClass with all tokens set.
+	Rooted<StructuredClass> A{new StructuredClass(
+	    mgr, "A", domain, Cardinality::any(), {nullptr}, true, true)};
+	A->setStartToken(TokenDescriptor(Tokens::Indent));
+	A->setEndToken(TokenDescriptor(Tokens::Dedent));
+	{
+		TokenDescriptor sh{"<+>"};
+		sh.id = 1;
+		A->setShortToken(sh);
+	}
+	// add a field with one token set.
+	Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first;
+	A_field->setEndToken(TokenDescriptor(Tokens::Newline));
+	A_field->addChild(A);
+	// add an annotation with start and end set.
+	Rooted<AnnotationClass> A_anno = domain->createAnnotationClass("A");
+	{
+		TokenDescriptor start{"<"};
+		start.id = 7;
+		A_anno->setStartToken(start);
+	}
+	{
+		TokenDescriptor end{">"};
+		end.id = 8;
+		A_anno->setEndToken(end);
+	}
+	// add a trivial annotation, which should not be returned.
+	Rooted<AnnotationClass> B_anno = domain->createAnnotationClass("B");
+	ASSERT_TRUE(domain->validate(logger));
+
+	// check the result.
+	std::vector<TokenDescriptor *> tks = domain->getAllTokenDescriptors();
+
+	// A short token
+	ASSERT_EQ("<+>", tks[0]->token);
+	ASSERT_EQ(1, tks[0]->id);
+	ASSERT_FALSE(tks[0]->special);
+	// A start token
+	ASSERT_EQ("", tks[1]->token);
+	ASSERT_EQ(Tokens::Indent, tks[1]->id);
+	ASSERT_TRUE(tks[1]->special);
+	// A end token
+	ASSERT_EQ("", tks[2]->token);
+	ASSERT_EQ(Tokens::Dedent, tks[2]->id);
+	ASSERT_TRUE(tks[2]->special);
+	// A field end token
+	ASSERT_EQ("", tks[3]->token);
+	ASSERT_EQ(Tokens::Newline, tks[3]->id);
+	ASSERT_TRUE(tks[3]->special);
+	// A anno start token
+	ASSERT_EQ("<", tks[4]->token);
+	ASSERT_EQ(7, tks[4]->id);
+	ASSERT_FALSE(tks[4]->special);
+	// A anno end token
+	ASSERT_EQ(">", tks[5]->token);
+	ASSERT_EQ(8, tks[5]->id);
+	ASSERT_FALSE(tks[5]->special);
+}
 }
\ No newline at end of file
-- 
cgit v1.2.3


From 4b5f37d07e4e691848b243ae795bb59893a6379c Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:55:41 +0100
Subject: added another domain test case for invalid syntax tokens.

---
 test/core/model/DomainTest.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp
index f59e745..b3c5771 100644
--- a/test/core/model/DomainTest.cpp
+++ b/test/core/model/DomainTest.cpp
@@ -701,6 +701,14 @@ TEST(Domain, validate)
 		base->setName("myClass");
 		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
 		ASSERT_TRUE(domain->validate(logger));
+		// add an invalid short token.
+		base->setShortToken(TokenDescriptor("bla"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_FALSE(domain->validate(logger));
+		// make it valid.
+		base->setShortToken(TokenDescriptor("!bla!"));
+		ASSERT_EQ(ValidationState::UNKNOWN, domain->getValidationState());
+		ASSERT_TRUE(domain->validate(logger));
 		// Let's add a primitive field (without a primitive type at first)
 		Rooted<FieldDescriptor> base_field =
 		    base->createPrimitiveFieldDescriptor(nullptr, logger).first;
-- 
cgit v1.2.3


From 3cc6ebf406c53b0c82a52f0daf1ce14c62f7b521 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 16:30:51 +0100
Subject: Implemented new "start" methods in the Handler instances

---
 CMakeLists.txt                              |   6 +-
 src/core/parser/stack/DocumentHandler.cpp   | 133 +++++++++++++++---------
 src/core/parser/stack/DocumentHandler.hpp   |  69 ++++++++++---
 src/core/parser/stack/DomainHandler.cpp     |  76 ++++++++------
 src/core/parser/stack/DomainHandler.hpp     |  22 ++--
 src/core/parser/stack/Handler.cpp           | 102 +++++++++----------
 src/core/parser/stack/Handler.hpp           | 150 +++++++++++++++-------------
 src/core/parser/stack/Stack.cpp             |  24 +++--
 src/core/parser/stack/TypesystemHandler.cpp |  29 +++---
 src/core/parser/stack/TypesystemHandler.hpp |  15 ++-
 10 files changed, 375 insertions(+), 251 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cef1e31..45310a0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -187,7 +187,7 @@ ADD_LIBRARY(ousia_core
 	src/core/parser/stack/Handler
 	src/core/parser/stack/ImportIncludeHandler
 	src/core/parser/stack/State
-#	src/core/parser/stack/Stack
+	src/core/parser/stack/Stack
 	src/core/parser/stack/TokenRegistry
 	src/core/parser/stack/TokenStack
 	src/core/parser/stack/TypesystemHandler
@@ -387,7 +387,7 @@ IF(TEST)
 #	)
 
 	ADD_EXECUTABLE(ousia_test_osml
-#		test/formats/osml/OsmlParserTest
+		test/formats/osml/OsmlParserTest
 		test/formats/osml/OsmlStreamParserTest
 	)
 
@@ -400,7 +400,7 @@ IF(TEST)
 
 	ADD_EXECUTABLE(ousia_test_osxml
 		test/formats/osxml/OsxmlEventParserTest
-#		test/formats/osxml/OsxmlParserTest
+		test/formats/osxml/OsxmlParserTest
 	)
 
 	TARGET_LINK_LIBRARIES(ousia_test_osxml
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp
index 714ab1b..de6e367 100644
--- a/src/core/parser/stack/DocumentHandler.cpp
+++ b/src/core/parser/stack/DocumentHandler.cpp
@@ -37,7 +37,8 @@ namespace parser_stack {
 
 /* DocumentHandler */
 
-bool DocumentHandler::start(Variant::mapType &args)
+bool DocumentHandler::startCommand(const std::string &commandName,
+                                   Variant::mapType &args)
 {
 	Rooted<Document> document =
 	    context().getProject()->createDocument(args["name"].asString());
@@ -52,6 +53,25 @@ void DocumentHandler::end() { scope().pop(logger()); }
 
 /* DocumentChildHandler */
 
+DocumentChildHandler::DocumentChildHandler(const HandlerData &handlerData)
+    : Handler(handlerData), mode(Mode::STRUCT)
+{
+}
+
+void DocumentChildHandler::setMode(Mode mode, const std::string &name)
+{
+	this->mode = mode;
+	this->name = name;
+	this->token = Token();
+}
+
+void DocumentChildHandler::setMode(Mode mode, const Token &token)
+{
+	this->mode = mode;
+	this->name = token.content;
+	this->token = token;
+}
+
 void DocumentChildHandler::preamble(Rooted<Node> &parentNode, size_t &fieldIdx,
                                     DocumentEntity *&parent)
 {
@@ -122,10 +142,14 @@ void DocumentChildHandler::createPath(const size_t &firstFieldIdx,
 	scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, false);
 }
 
-bool DocumentChildHandler::start(Variant::mapType &args)
+bool DocumentChildHandler::startCommand(const std::string &commandName,
+                                        Variant::mapType &args)
 {
-	// extract the special "name" attribute from the input arguments.
-	// the remaining attributes will be forwarded to the newly constructed
+	// Set the internal mode to STRUCT and copy the name
+	setMode(Mode::STRUCT, name);
+
+	// Extract the special "name" attribute from the input arguments.
+	// The remaining attributes will be forwarded to the newly constructed
 	// element.
 	std::string nameAttr;
 	{
@@ -152,11 +176,11 @@ bool DocumentChildHandler::start(Variant::mapType &args)
 				return false;
 			}
 			Rooted<StructuredClass> strct = scope().resolve<StructuredClass>(
-			    Utils::split(name(), ':'), logger());
+			    Utils::split(name, ':'), logger());
 			if (strct == nullptr) {
 				// if we could not resolve the name, throw an exception.
 				throw LoggableException(
-				    std::string("\"") + name() + "\" could not be resolved.",
+				    std::string("\"") + name + "\" could not be resolved.",
 				    location());
 			}
 			entity = parentNode.cast<Document>()->createRootStructuredEntity(
@@ -169,13 +193,6 @@ bool DocumentChildHandler::start(Variant::mapType &args)
 
 			preamble(parentNode, fieldIdx, parent);
 
-			// TODO: REMOVE
-			std::string thisName = name();
-			std::string parentClassName;
-			if (parent != nullptr) {
-				parentClassName = parent->getDescriptor()->getName();
-			}
-
 			/*
 			 * Try to find a FieldDescriptor for the given tag if we are not in
 			 * a field already. This does _not_ try to construct transparent
@@ -183,7 +200,7 @@ bool DocumentChildHandler::start(Variant::mapType &args)
 			 */
 			{
 				ssize_t newFieldIdx =
-				    parent->getDescriptor()->getFieldDescriptorIndex(name());
+				    parent->getDescriptor()->getFieldDescriptorIndex(name);
 				if (newFieldIdx != -1) {
 					// Check whether explicit fields are allowed here, if not
 					if (scope().getFlag(ParserFlag::POST_EXPLICIT_FIELDS)) {
@@ -191,17 +208,17 @@ bool DocumentChildHandler::start(Variant::mapType &args)
 						    std::string(
 						        "Data or structure commands have already been "
 						        "given, command \"") +
-						        name() + std::string(
-						                     "\" is not interpreted explicit "
-						                     "field. Move explicit field "
-						                     "references to the beginning."),
+						        name + std::string(
+						                   "\" is not interpreted explicit "
+						                   "field. Move explicit field "
+						                   "references to the beginning."),
 						    location());
 					} else {
 						Rooted<DocumentField> field{new DocumentField(
 						    manager(), parentNode, newFieldIdx, false)};
 						field->setLocation(location());
 						scope().push(field);
-						isExplicitField = true;
+						setMode(Mode::EXPLICIT_FIELD, name);
 						return true;
 					}
 				}
@@ -210,11 +227,11 @@ bool DocumentChildHandler::start(Variant::mapType &args)
 			// Otherwise create a new StructuredEntity
 			// TODO: Consider Anchors and AnnotationEntities
 			Rooted<StructuredClass> strct = scope().resolve<StructuredClass>(
-			    Utils::split(name(), ':'), logger());
+			    Utils::split(name, ':'), logger());
 			if (strct == nullptr) {
 				// if we could not resolve the name, throw an exception.
 				throw LoggableException(
-				    std::string("\"") + name() + "\" could not be resolved.",
+				    std::string("\"") + name + "\" could not be resolved.",
 				    location());
 			}
 
@@ -261,24 +278,56 @@ bool DocumentChildHandler::start(Variant::mapType &args)
 	}
 }
 
+bool DocumentChildHandler::startAnnotation(const std::string &name,
+                                           Variant::mapType &args,
+                                           AnnotationType annotationType)
+{
+	// Set the internal mode and name correctly
+	if (annotationType == AnnotationType::START) {
+		setMode(Mode::ANNOTATION_START, name);
+	} else {
+		setMode(Mode::ANNOTATION_END, name);
+	}
+
+	// TODO: Handle annotation
+	return false;
+}
+
+bool DocumentChildHandler::startToken(const Token &token, Handle<Node> node)
+{
+	// Set the internal mode correctly
+	setMode(Mode::TOKEN, token);
+
+	// TODO: Handle token start
+	return false;
+}
+
+DocumentChildHandler::EndTokenResult DocumentChildHandler::endToken(
+    const Token &token, Handle<Node> node)
+{
+	// TODO: Handle token end
+	return EndTokenResult::ENDED_NONE;
+}
+
 void DocumentChildHandler::end()
 {
-	// in case of explicit fields we do not want to pop something from the
+	// In case of explicit fields we do not want to pop something from the
 	// stack.
-	if (isExplicitField) {
-		return;
+	if (mode == Mode::STRUCT) {
+		// pop the "main" element.
+		scope().pop(logger());
 	}
-	// pop the "main" element.
-	scope().pop(logger());
 }
 
 bool DocumentChildHandler::fieldStart(bool &isDefault, size_t fieldIdx)
 {
-	if (isExplicitField) {
+	// TODO: Handle other cases
+	if (mode == Mode::EXPLICIT_FIELD) {
 		// In case of explicit fields we do not want to create another field.
 		isDefault = true;
 		return fieldIdx == 0;
 	}
+
 	Rooted<Node> parentNode = scope().getLeaf();
 	assert(parentNode->isa(&RttiTypes::StructuredEntity) ||
 	       parentNode->isa(&RttiTypes::AnnotationEntity));
@@ -291,7 +340,7 @@ bool DocumentChildHandler::fieldStart(bool &isDefault, size_t fieldIdx)
 	    parent->getDescriptor()->getFieldDescriptors();
 
 	if (isDefault) {
-		if(fields.empty()){
+		if (fields.empty()) {
 			return false;
 		}
 		fieldIdx = fields.size() - 1;
@@ -317,33 +366,19 @@ void DocumentChildHandler::fieldEnd()
 {
 	assert(scope().getLeaf()->isa(&RttiTypes::DocumentField));
 
-	// pop the field from the stack.
+	// Pop the field from the stack.
 	scope().pop(logger());
 
-	// pop all remaining transparent elements.
+	// Pop all remaining transparent elements.
 	while (scope().getLeaf()->isa(&RttiTypes::StructuredEntity) &&
 	       scope().getLeaf().cast<StructuredEntity>()->isTransparent()) {
-		// pop the transparent element.
+		// Pop the transparent element.
 		scope().pop(logger());
-		// pop the transparent field.
+		// Pop the transparent field.
 		scope().pop(logger());
 	}
 }
 
-bool DocumentChildHandler::annotationStart(const Variant &className,
-                                           Variant::mapType &args)
-{
-	// TODO: Implement
-	return false;
-}
-
-bool DocumentChildHandler::annotationEnd(const Variant &className,
-                                         const Variant &elementName)
-{
-	// TODO: Implement
-	return false;
-}
-
 bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field,
                                        Variant &data, Logger &logger)
 {
@@ -436,7 +471,7 @@ bool DocumentChildHandler::data()
 	// this fact
 	Variant text = readData();
 	if (defaultFields.empty()) {
-		logger().error("Got data, but structure \"" + name() +
+		logger().error("Got data, but structure \"" + name +
 		                   "\" does not have any primitive field",
 		               text);
 	} else {
@@ -467,7 +502,9 @@ const State DocumentChild = StateBuilder()
                                 .createdNodeTypes({&RttiTypes::StructureNode,
                                                    &RttiTypes::AnnotationEntity,
                                                    &RttiTypes::DocumentField})
-                                .elementHandler(DocumentChildHandler::create);
+                                .elementHandler(DocumentChildHandler::create)
+                                .supportsAnnotations(true)
+                                .supportsTokens(true);
 }
 }
 
diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp
index c51c188..9a41508 100644
--- a/src/core/parser/stack/DocumentHandler.hpp
+++ b/src/core/parser/stack/DocumentHandler.hpp
@@ -53,7 +53,8 @@ class DocumentHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &commandName,
+	                  Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -91,8 +92,55 @@ public:
  * defined elements in an Ousía document.
  */
 class DocumentChildHandler : public Handler {
+public:
+	/**
+	 * Enum type used to represent the mode of the DocumentChildHandler.
+	 * TODO: Having to have such a type is actually quite stupid, it would be
+	 * nicer to have separate handler classes for each of these cases. But this
+	 * is a story for a different day.
+	 */
+	enum class Mode {
+		STRUCT,
+		EXPLICIT_FIELD,
+		ANNOTATION_START,
+		ANNOTATION_END,
+		TOKEN
+	};
+
 private:
-	bool isExplicitField = false;
+	/**
+	 * Internal Mode of the DocumentChildHandler.
+	 */
+	Mode mode;
+
+	/**
+	 * Contains the name of the command or the annotation that is represented
+	 * by this DocumentChildHandler.
+	 */
+	std::string name;
+
+	/**
+	 * Token represented by the document child handler.
+	 */
+	Token token;
+
+	/**
+	 * Switches the mode to the given mode and copies the given name. Resets the
+	 * token.
+	 *
+	 * @param mode is the new mode.
+	 * @param name is the new name.
+	 */
+	void setMode(Mode mode, const std::string &name);
+
+	/**
+	 * Switches the mode to the given mode and copies the given token, sets the
+	 * name to the content of the token.
+	 *
+	 * @param mode is the new mode.
+	 * @param token is the new token.
+	 */
+	void setMode(Mode mode, const Token &token);
 
 	/**
 	 * Code shared by both the start(), fieldStart() and the data() method.
@@ -161,22 +209,19 @@ private:
 	                 Logger &logger);
 
 public:
-	using Handler::Handler;
+	DocumentChildHandler(const HandlerData &handlerData);
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &commandName,
+	                  Variant::mapType &args) override;
+	bool startAnnotation(const std::string &name, Variant::mapType &args,
+	                     AnnotationType annotationType) override;
+	bool startToken(const Token &token, Handle<Node> node) override;
+	EndTokenResult endToken(const Token &token, Handle<Node> node) override;
 	void end() override;
 	bool data() override;
-
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
-
 	void fieldEnd() override;
 
-	bool annotationStart(const Variant &className,
-	                     Variant::mapType &args) override;
-
-	bool annotationEnd(const Variant &className,
-	                   const Variant &elementName) override;
-
 	/**
 	 * Creates a new instance of the DocumentChildHandler.
 	 *
diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp
index aa18faa..5ca4f5b 100644
--- a/src/core/parser/stack/DomainHandler.cpp
+++ b/src/core/parser/stack/DomainHandler.cpp
@@ -33,7 +33,8 @@ namespace parser_stack {
 
 /* DomainHandler */
 
-bool DomainHandler::start(Variant::mapType &args)
+bool DomainHandler::startCommand(const std::string &commandName,
+                                 Variant::mapType &args)
 {
 	// Create the Domain node
 	Rooted<Domain> domain =
@@ -57,7 +58,8 @@ void DomainHandler::end() { scope().pop(logger()); }
 
 /* DomainStructHandler */
 
-bool DomainStructHandler::start(Variant::mapType &args)
+bool DomainStructHandler::startCommand(const std::string &commandName,
+                                       Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -88,7 +90,8 @@ bool DomainStructHandler::start(Variant::mapType &args)
 void DomainStructHandler::end() { scope().pop(logger()); }
 
 /* DomainAnnotationHandler */
-bool DomainAnnotationHandler::start(Variant::mapType &args)
+bool DomainAnnotationHandler::startCommand(const std::string &commandName,
+                                           Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -106,7 +109,8 @@ void DomainAnnotationHandler::end() { scope().pop(logger()); }
 
 /* DomainAttributesHandler */
 
-bool DomainAttributesHandler::start(Variant::mapType &args)
+bool DomainAttributesHandler::startCommand(const std::string &commandName,
+                                           Variant::mapType &args)
 {
 	// Fetch the current typesystem and create the struct node
 	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
@@ -122,7 +126,8 @@ void DomainAttributesHandler::end() { scope().pop(logger()); }
 
 /* DomainFieldHandler */
 
-bool DomainFieldHandler::start(Variant::mapType &args)
+bool DomainFieldHandler::startCommand(const std::string &commandName,
+                                      Variant::mapType &args)
 {
 	FieldDescriptor::FieldType type;
 	if (args["isSubtree"].asBool()) {
@@ -152,15 +157,16 @@ void DomainFieldHandler::end() { scope().pop(logger()); }
 
 /* DomainFieldRefHandler */
 
-bool DomainFieldRefHandler::start(Variant::mapType &args)
+bool DomainFieldRefHandler::startCommand(const std::string &commandName,
+                                         Variant::mapType &args)
 {
 	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
 
-	const std::string &name = args["ref"].asString();
+	const std::string &ref = args["ref"].asString();
 
 	auto loc = location();
 
-	scope().resolveFieldDescriptor(name, parent, logger(),
+	scope().resolveFieldDescriptor(ref, parent, logger(),
 	                               [loc](Handle<Node> field,
 	                                     Handle<Node> parent, Logger &logger) {
 		if (field != nullptr) {
@@ -182,7 +188,8 @@ void DomainFieldRefHandler::end() {}
 
 /* DomainPrimitiveHandler */
 
-bool DomainPrimitiveHandler::start(Variant::mapType &args)
+bool DomainPrimitiveHandler::startCommand(const std::string &commandName,
+                                          Variant::mapType &args)
 {
 	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
 
@@ -222,7 +229,8 @@ void DomainPrimitiveHandler::end() { scope().pop(logger()); }
 
 /* DomainChildHandler */
 
-bool DomainChildHandler::start(Variant::mapType &args)
+bool DomainChildHandler::startCommand(const std::string &commandName,
+                                      Variant::mapType &args)
 {
 	Rooted<FieldDescriptor> field = scope().selectOrThrow<FieldDescriptor>();
 
@@ -240,7 +248,8 @@ bool DomainChildHandler::start(Variant::mapType &args)
 
 /* DomainParentHandler */
 
-bool DomainParentHandler::start(Variant::mapType &args)
+bool DomainParentHandler::startCommand(const std::string &commandName,
+                                       Variant::mapType &args)
 {
 	Rooted<StructuredClass> strct = scope().selectOrThrow<StructuredClass>();
 
@@ -255,7 +264,8 @@ void DomainParentHandler::end() { scope().pop(logger()); }
 
 /* DomainParentFieldHandler */
 
-bool DomainParentFieldHandler::start(Variant::mapType &args)
+bool DomainParentFieldHandler::startCommand(const std::string &commandName,
+                                            Variant::mapType &args)
 {
 	Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>();
 	FieldDescriptor::FieldType type;
@@ -265,7 +275,7 @@ bool DomainParentFieldHandler::start(Variant::mapType &args)
 		type = FieldDescriptor::FieldType::TREE;
 	}
 
-	const std::string &name = args["name"].asString();
+	const std::string &fieldName = args["name"].asString();
 	const bool optional = args["optional"].asBool();
 	Rooted<StructuredClass> strct =
 	    parentNameNode->getParent().cast<StructuredClass>();
@@ -274,12 +284,12 @@ bool DomainParentFieldHandler::start(Variant::mapType &args)
 	// StructuredClass as child to it.
 	scope().resolve<Descriptor>(
 	    parentNameNode->getName(), strct, logger(),
-	    [type, name, optional](Handle<Node> parent, Handle<Node> strct,
-	                           Logger &logger) {
+	    [type, fieldName, optional](Handle<Node> parent, Handle<Node> strct,
+	                                Logger &logger) {
 		    if (parent != nullptr) {
 			    Rooted<FieldDescriptor> field =
 			        (parent.cast<Descriptor>()->createFieldDescriptor(
-			             logger, type, name, optional)).first;
+			             logger, type, fieldName, optional)).first;
 			    field->addChild(strct.cast<StructuredClass>());
 		    }
 		});
@@ -288,32 +298,32 @@ bool DomainParentFieldHandler::start(Variant::mapType &args)
 
 /* DomainParentFieldRefHandler */
 
-bool DomainParentFieldRefHandler::start(Variant::mapType &args)
+bool DomainParentFieldRefHandler::startCommand(const std::string &commandName,
+                                               Variant::mapType &args)
 {
 	Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>();
 
-	const std::string &name = args["ref"].asString();
+	const std::string &ref = args["ref"].asString();
 	Rooted<StructuredClass> strct =
 	    parentNameNode->getParent().cast<StructuredClass>();
 	auto loc = location();
 
 	// resolve the parent, get the referenced field and add the declared
 	// StructuredClass as child to it.
-	scope().resolve<Descriptor>(
-	    parentNameNode->getName(), strct, logger(),
-	    [name, loc](Handle<Node> parent, Handle<Node> strct, Logger &logger) {
-		    if (parent != nullptr) {
-			    Rooted<FieldDescriptor> field =
-			        parent.cast<Descriptor>()->getFieldDescriptor(name);
-			    if (field == nullptr) {
-				    logger.error(
-				        std::string("Could not find referenced field ") + name,
-				        loc);
-				    return;
-			    }
-			    field->addChild(strct.cast<StructuredClass>());
-		    }
-		});
+	scope().resolve<Descriptor>(parentNameNode->getName(), strct, logger(),
+	                            [ref, loc](Handle<Node> parent,
+	                                       Handle<Node> strct, Logger &logger) {
+		if (parent != nullptr) {
+			Rooted<FieldDescriptor> field =
+			    parent.cast<Descriptor>()->getFieldDescriptor(ref);
+			if (field == nullptr) {
+				logger.error(
+				    std::string("Could not find referenced field ") + ref, loc);
+				return;
+			}
+			field->addChild(strct.cast<StructuredClass>());
+		}
+	});
 	return true;
 }
 
diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp
index 76172d6..4116919 100644
--- a/src/core/parser/stack/DomainHandler.hpp
+++ b/src/core/parser/stack/DomainHandler.hpp
@@ -46,7 +46,7 @@ class DomainHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -59,7 +59,7 @@ class DomainStructHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -72,7 +72,7 @@ class DomainAnnotationHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -85,7 +85,7 @@ class DomainAttributesHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -98,7 +98,7 @@ class DomainFieldHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -111,7 +111,7 @@ class DomainFieldRefHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -124,7 +124,7 @@ class DomainPrimitiveHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -137,7 +137,7 @@ class DomainChildHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 
 	static Handler *create(const HandlerData &handlerData)
 	{
@@ -154,7 +154,7 @@ class DomainParentHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -167,7 +167,7 @@ class DomainParentFieldHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 
 	static Handler *create(const HandlerData &handlerData)
 	{
@@ -179,7 +179,7 @@ class DomainParentFieldRefHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &name, Variant::mapType &args) override;
 
 	static Handler *create(const HandlerData &handlerData)
 	{
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
index 734976a..f9cefc2 100644
--- a/src/core/parser/stack/Handler.cpp
+++ b/src/core/parser/stack/Handler.cpp
@@ -32,13 +32,8 @@ namespace parser_stack {
 /* Class HandlerData */
 
 HandlerData::HandlerData(ParserContext &ctx, HandlerCallbacks &callbacks,
-                         const std::string &name, const State &state,
-                         const SourceLocation &location)
-    : ctx(ctx),
-      callbacks(callbacks),
-      name(name),
-      state(state),
-      location(location)
+                         const State &state, const SourceLocation &location)
+    : ctx(ctx), callbacks(callbacks), state(state), location(location)
 {
 }
 
@@ -67,22 +62,14 @@ Logger &Handler::logger()
 
 const SourceLocation &Handler::location() const { return handlerData.location; }
 
-const std::string &Handler::name() const { return handlerData.name; }
-
-Variant Handler::readData()
-{
-	return handlerData.callbacks.readData();
-}
+Variant Handler::readData() { return handlerData.callbacks.readData(); }
 
 void Handler::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
 {
 	handlerData.callbacks.pushTokens(tokens);
 }
 
-void Handler::popTokens()
-{
-	handlerData.callbacks.popTokens();
-}
+void Handler::popTokens() { handlerData.callbacks.popTokens(); }
 
 TokenId Handler::registerToken(const std::string &token)
 {
@@ -94,8 +81,6 @@ void Handler::unregisterToken(TokenId id)
 	handlerData.callbacks.unregisterToken(id);
 }
 
-const std::string &Handler::getName() const { return name(); }
-
 const State &Handler::getState() const { return handlerData.state; }
 
 void Handler::setLogger(Logger &logger) { internalLogger = &logger; }
@@ -106,42 +91,51 @@ const SourceLocation &Handler::getLocation() const { return location(); }
 
 /* Class EmptyHandler */
 
-bool EmptyHandler::start(Variant::mapType &args)
+bool EmptyHandler::startCommand(const std::string &commandName,
+                                Variant::mapType &args)
 {
-	// Just accept anything
+	// Well, we'll support any command we get, don't we?
 	return true;
 }
 
-void EmptyHandler::end()
+bool EmptyHandler::startAnnotation(const std::string &name,
+                                   Variant::mapType &args,
+                                   Handler::AnnotationType annotationType)
 {
-	// Do nothing if a command ends
+	// Do not support annotations. Annotations are too complicated for poor
+	// EmptyHandler.
+	return false;
 }
 
-bool EmptyHandler::fieldStart(bool &isDefaultField, size_t fieldIndex)
+bool EmptyHandler::startToken(const Token &token, Handle<Node> node)
 {
-	// Accept any field
-	return true;
+	// EmptyHandler does not support tokens.
+	return false;
 }
 
-void EmptyHandler::fieldEnd()
+Handler::EndTokenResult EmptyHandler::endToken(const Token &token,
+                                               Handle<Node> node)
 {
-	// Do not handle fields
+	// There are no tokens to end here.
+	return EndTokenResult::ENDED_NONE;
 }
 
-bool EmptyHandler::annotationStart(const Variant &className,
-                                   Variant::mapType &args)
+void EmptyHandler::end()
 {
-	// Accept any data
-	return true;
+	// Do nothing if a command ends
 }
 
-bool EmptyHandler::annotationEnd(const Variant &className,
-                                 const Variant &elementName)
+bool EmptyHandler::fieldStart(bool &isDefaultField, size_t fieldIndex)
 {
-	// Accept any annotation
+	// Accept any field
 	return true;
 }
 
+void EmptyHandler::fieldEnd()
+{
+	// Do not handle field ends
+}
+
 bool EmptyHandler::data()
 {
 	// Support any data
@@ -155,12 +149,31 @@ Handler *EmptyHandler::create(const HandlerData &handlerData)
 
 /* Class StaticHandler */
 
-bool StaticHandler::start(Variant::mapType &args)
+bool StaticHandler::startCommand(const std::string &commandName,
+                                 Variant::mapType &args)
 {
 	// Do nothing in the default implementation, accept anything
 	return true;
 }
 
+bool StaticHandler::startAnnotation(const std::string &name,
+                                    Variant::mapType &args,
+                                    Handler::AnnotationType annotationType)
+{
+	return false;
+}
+
+bool StaticHandler::startToken(const Token &token, Handle<Node> node)
+{
+	return false;
+}
+
+Handler::EndTokenResult StaticHandler::endToken(const Token &token,
+                                                Handle<Node> node)
+{
+	return EndTokenResult::ENDED_NONE;
+}
+
 void StaticHandler::end()
 {
 	// Do nothing here
@@ -182,20 +195,6 @@ void StaticHandler::fieldEnd()
 	// Do nothing here
 }
 
-bool StaticHandler::annotationStart(const Variant &className,
-                                    Variant::mapType &args)
-{
-	// No annotations supported
-	return false;
-}
-
-bool StaticHandler::annotationEnd(const Variant &className,
-                                  const Variant &elementName)
-{
-	// No annotations supported
-	return false;
-}
-
 bool StaticHandler::data()
 {
 	logger().error("Did not expect any data here", readData());
@@ -210,7 +209,8 @@ StaticFieldHandler::StaticFieldHandler(const HandlerData &handlerData,
 {
 }
 
-bool StaticFieldHandler::start(Variant::mapType &args)
+bool StaticFieldHandler::startCommand(const std::string &commandName,
+                                      Variant::mapType &args)
 {
 	if (!argName.empty()) {
 		auto it = args.find(argName);
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 19c3d65..f0968e7 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -25,6 +25,7 @@
 #include <core/common/Variant.hpp>
 #include <core/common/Whitespace.hpp>
 #include <core/common/Token.hpp>
+#include <core/model/Node.hpp>
 
 namespace ousia {
 
@@ -60,11 +61,6 @@ public:
 	 */
 	HandlerCallbacks &callbacks;
 
-	/**
-	 * Contains the name of the command that is being handled.
-	 */
-	std::string name;
-
 	/**
 	 * Contains the current state of the state machine.
 	 */
@@ -81,13 +77,11 @@ public:
 	 * @param ctx is the parser context the handler should be executed in.
 	 * @param callbacks is an instance of Callbacks used to notify
 	 * the parser about certain state changes.
-	 * @param name is the name of the string.
 	 * @param state is the state this handler was called for.
 	 * @param location is the location at which the handler is created.
 	 */
 	HandlerData(ParserContext &ctx, HandlerCallbacks &callbacks,
-	            const std::string &name, const State &state,
-	            const SourceLocation &location);
+	            const State &state, const SourceLocation &location);
 };
 
 /**
@@ -154,13 +148,6 @@ protected:
 	 */
 	const SourceLocation &location() const;
 
-	/**
-	 * Returns the command name for which the handler was created.
-	 *
-	 * @return a const reference at the command name.
-	 */
-	const std::string &name() const;
-
 	/**
 	 * Calls the corresponding function in the HandlerCallbacks instance. This
 	 * method registers the given tokens as tokens that are generally available,
@@ -231,19 +218,23 @@ protected:
 	 */
 	//	void popWhitespaceMode();
 
-
 public:
 	/**
-	 * Virtual destructor.
+	 * Enum representing the type of the annotation a Handle instance handles.
+	 * It may either handle the start of an annotation or the end of an
+	 * annotation.
 	 */
-	virtual ~Handler();
+	enum class AnnotationType { START, END };
 
 	/**
-	 * Returns the command name for which the handler was created.
-	 *
-	 * @return a const reference at the command name.
+	 * Enum type representing the possible outcomes of the endToken() method.
 	 */
-	const std::string &getName() const;
+	enum class EndTokenResult { ENDED_THIS, ENDED_HIDDEN, ENDED_NONE };
+
+	/**
+	 * Virtual destructor.
+	 */
+	virtual ~Handler();
 
 	/**
 	 * Reference at the State descriptor for which this Handler was created.
@@ -274,14 +265,63 @@ public:
 	const SourceLocation &getLocation() const;
 
 	/**
-	 * Called when the command that was specified in the constructor is
-	 * instanciated.
+	 * Called whenever the handler should handle the start of a command. This
+	 * method (or any other of the "start" methods) is called exactly once,
+	 * after the constructor.
 	 *
+	 * @param name is the name of the command that is started here.
 	 * @param args is a map from strings to variants (argument name and value).
-	 * @return true if the handler was successful in starting the element it
-	 * represents, false otherwise.
+	 * @return true if the handler was successful in starting an element with
+	 * the given name represents, false otherwise.
 	 */
-	virtual bool start(Variant::mapType &args) = 0;
+	virtual bool startCommand(const std::string &commandName,
+	                          Variant::mapType &args) = 0;
+
+	/**
+	 * Called whenever the handler should handle the start of an annotation.
+	 * This method (or any other of the "start" methods) is called exactly once,
+	 * after the constructor. This method is only called if the
+	 * "supportsAnnotations" flag of the State instance referencing this Handler
+	 * is set to true.
+	 *
+	 * @param name is the name of the annotation that is started here.
+	 * @param args is a map from strings to variants (argument name and value).
+	 * @param type specifies whether this handler should handle the start of an
+	 * annotation or the end of an annotation.
+	 */
+	virtual bool startAnnotation(const std::string &name,
+	                             Variant::mapType &args,
+	                             AnnotationType annotationType) = 0;
+
+	/**
+	 * Called whenever the handler should handle the start of a token. This
+	 * method (or any other of the "start" methods) is called exactly once,
+	 * after the constructor. This method is only called if the "supportsTokens"
+	 * flag of the State instance referencing this Handler is set to true.
+	 *
+	 * @param token is the Token for which the handler should be started.
+	 * @param node is the node for which this token was registered.
+	 */
+	virtual bool startToken(const Token &token, Handle<Node> node) = 0;
+
+	/**
+	 * Called whenever a token is marked as "end" token and this handler happens
+	 * to be the currently active handler. This operation may have three
+	 * outcomes:
+	 * <ol>
+	 *   <li>The token marks the end of the complete handler and the calling
+	 *   code should call the "end" method.</li>
+	 *   <li>The token marks the end of some element that is unknown the calling
+	 *   code. So the operation itself was a success, but the calling code
+	 *   should not call the "end" method.
+	 *   <li>The token did not anything in this context. Basically this shuold
+	 *   never happen, but who knows.</li>
+	 * </ol>
+	 *
+	 * @param id is the Token for which the handler should be started.
+	 * @param node is the node for which this token was registered.
+	 */
+	virtual EndTokenResult endToken(const Token &token, Handle<Node> node) = 0;
 
 	/**
 	 * Called before the command for which this handler is defined ends (is
@@ -310,35 +350,6 @@ public:
 	 */
 	virtual void fieldEnd() = 0;
 
-	/**
-	 * Called whenever an annotation starts while this handler is active. The
-	 * function should return true if starting the annotation was successful,
-	 * false otherwise.
-	 *
-	 * @param className is a string variant containing the name of the
-	 * annotation class and the location of the name in the source code.
-	 * @param args is a map from strings to variants (argument name and value).
-	 * @return true if the mentioned annotation could be started here, false
-	 * if an error occurred.
-	 */
-	virtual bool annotationStart(const Variant &className,
-	                             Variant::mapType &args) = 0;
-
-	/**
-	 * Called whenever an annotation ends while this handler is active. The
-	 * function should return true if ending the annotation was successful,
-	 * false otherwise.
-	 *
-	 * @param className is a string variant containing the name of the
-	 * annotation class and the location of the class name in the source code.
-	 * @param elementName is a string variant containing the name of the
-	 * annotation class and the location of the element name in the source code.
-	 * @return true if the mentioned annotation could be started here, false if
-	 * an error occurred.
-	 */
-	virtual bool annotationEnd(const Variant &className,
-	                           const Variant &elementName) = 0;
-
 	/**
 	 * Called whenever raw data (int the form of a string) is available for the
 	 * Handler instance. Should return true if the data could be handled, false
@@ -369,14 +380,15 @@ protected:
 	using Handler::Handler;
 
 public:
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &commandName,
+	                  Variant::mapType &args) override;
+	bool startAnnotation(const std::string &name, Variant::mapType &args,
+	                     AnnotationType annotationType) override;
+	bool startToken(const Token &token, Handle<Node> node) override;
+	EndTokenResult endToken(const Token &token, Handle<Node> node) override;
 	void end() override;
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
 	void fieldEnd() override;
-	bool annotationStart(const Variant &className,
-	                     Variant::mapType &args) override;
-	bool annotationEnd(const Variant &className,
-	                   const Variant &elementName) override;
 	bool data() override;
 
 	/**
@@ -395,14 +407,15 @@ protected:
 	using Handler::Handler;
 
 public:
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &commandName,
+	                  Variant::mapType &args) override;
+	bool startAnnotation(const std::string &name, Variant::mapType &args,
+	                     AnnotationType annotationType) override;
+	bool startToken(const Token &token, Handle<Node> node) override;
+	EndTokenResult endToken(const Token &token, Handle<Node> node) override;
 	void end() override;
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
 	void fieldEnd() override;
-	bool annotationStart(const Variant &className,
-	                     Variant::mapType &args) override;
-	bool annotationEnd(const Variant &className,
-	                   const Variant &elementName) override;
 	bool data() override;
 };
 
@@ -453,9 +466,10 @@ protected:
 	virtual void doHandle(const Variant &fieldData, Variant::mapType &args) = 0;
 
 public:
-	bool start(Variant::mapType &args) override;
-	void end() override;
+	bool startCommand(const std::string &commandName,
+	                  Variant::mapType &args) override;
 	bool data() override;
+	void end() override;
 };
 }
 }
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index a556999..3545c37 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -32,7 +32,7 @@
 
 namespace ousia {
 namespace parser_stack {
-
+namespace {
 /* Class HandlerInfo */
 
 /**
@@ -41,6 +41,11 @@ namespace parser_stack {
  */
 class HandlerInfo {
 public:
+	/**
+	 * Name of the command or the token sequence.
+	 */
+	std::string name;
+
 	/**
 	 * Pointer pointing at the actual handler instance.
 	 */
@@ -96,6 +101,7 @@ public:
 	 * Default constructor of the HandlerInfo class.
 	 */
 	HandlerInfo();
+
 	/**
 	 * Constructor of the HandlerInfo class, allows to set all flags manually.
 	 */
@@ -182,6 +188,7 @@ void HandlerInfo::fieldEnd()
  * Stub instance of HandlerInfo containing no handler information.
  */
 static HandlerInfo EmptyHandlerInfo{true, true, true, true, false, true};
+}
 
 /* Helper functions */
 
@@ -387,7 +394,7 @@ StackImpl::~StackImpl()
 			    !info.inImplicitDefaultField) {
 				logger().error(
 				    std::string("Reached end of stream, but command \"") +
-				        info.handler->getName() +
+				        info.name +
 				        "\" has not ended yet. Command was started here:",
 				    info.handler->getLocation());
 			}
@@ -421,8 +428,8 @@ void StackImpl::deduceState()
 	HandlerConstructor ctor =
 	    state.elementHandler ? state.elementHandler : EmptyHandler::create;
 
-	std::shared_ptr<Handler> handler = std::shared_ptr<Handler>{
-	    ctor({ctx, *this, "", state, SourceLocation{}})};
+	std::shared_ptr<Handler> handler =
+	    std::shared_ptr<Handler>{ctor({ctx, *this, state, SourceLocation{}})};
 	stack.emplace_back(handler);
 
 	// Set the correct flags for this implicit handler
@@ -450,7 +457,7 @@ const State &StackImpl::currentState() const
 
 std::string StackImpl::currentCommandName() const
 {
-	return stack.empty() ? std::string{} : stack.back().handler->getName();
+	return stack.empty() ? std::string{} : stack.back().name;
 }
 
 const State *StackImpl::findTargetState(const std::string &name)
@@ -608,8 +615,8 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 		HandlerConstructor ctor = targetState->elementHandler
 		                              ? targetState->elementHandler
 		                              : EmptyHandler::create;
-		std::shared_ptr<Handler> handler{ctor(
-		    {ctx, *this, name.asString(), *targetState, name.getLocation()})};
+		std::shared_ptr<Handler> handler{
+		    ctor({ctx, *this, *targetState, name.getLocation()})};
 		stack.emplace_back(handler);
 
 		// Fetch the HandlerInfo for the parent element and the current element
@@ -631,7 +638,8 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 
 			handler->setLogger(loggerFork);
 			try {
-				info.valid = handler->start(canonicalArgs);
+				info.valid =
+				    handler->startCommand(name.asString(), canonicalArgs);
 			}
 			catch (LoggableException ex) {
 				loggerFork.log(ex);
diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp
index de8ee49..110c56f 100644
--- a/src/core/parser/stack/TypesystemHandler.cpp
+++ b/src/core/parser/stack/TypesystemHandler.cpp
@@ -32,7 +32,8 @@ namespace parser_stack {
 
 /* TypesystemHandler */
 
-bool TypesystemHandler::start(Variant::mapType &args)
+bool TypesystemHandler::startCommand(const std::string &commandName,
+                                     Variant::mapType &args)
 {
 	// Create the typesystem instance
 	Rooted<Typesystem> typesystem =
@@ -63,7 +64,8 @@ void TypesystemHandler::end() { scope().pop(logger()); }
 
 /* TypesystemEnumHandler */
 
-bool TypesystemEnumHandler::start(Variant::mapType &args)
+bool TypesystemEnumHandler::startCommand(const std::string &commandName,
+                                         Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -91,17 +93,18 @@ void TypesystemEnumEntryHandler::doHandle(const Variant &fieldData,
 
 /* TypesystemStructHandler */
 
-bool TypesystemStructHandler::start(Variant::mapType &args)
+bool TypesystemStructHandler::startCommand(const std::string &commandName,
+                                           Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
 	// Fetch the arguments used for creating this type
-	const std::string &name = args["name"].asString();
+	const std::string &structNmae = args["name"].asString();
 	const std::string &parent = args["parent"].asString();
 
 	// Fetch the current typesystem and create the struct node
 	Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
-	Rooted<StructType> structType = typesystem->createStructType(name);
+	Rooted<StructType> structType = typesystem->createStructType(structNmae);
 	structType->setLocation(location());
 
 	// Try to resolve the parent type and set it as parent structure
@@ -124,18 +127,19 @@ void TypesystemStructHandler::end() { scope().pop(logger()); }
 
 /* TypesystemStructFieldHandler */
 
-bool TypesystemStructFieldHandler::start(Variant::mapType &args)
+bool TypesystemStructFieldHandler::startCommand(const std::string &commandName,
+                                                Variant::mapType &args)
 {
 	// Read the argument values
-	const std::string &name = args["name"].asString();
+	const std::string &fieldName = args["name"].asString();
 	const std::string &type = args["type"].asString();
 	const Variant &defaultValue = args["default"];
 	const bool optional =
 	    !(defaultValue.isObject() && defaultValue.asObject() == nullptr);
 
 	Rooted<StructType> structType = scope().selectOrThrow<StructType>();
-	Rooted<Attribute> attribute =
-	    structType->createAttribute(name, defaultValue, optional, logger());
+	Rooted<Attribute> attribute = structType->createAttribute(
+	    fieldName, defaultValue, optional, logger());
 	attribute->setLocation(location());
 
 	// Try to resolve the type and default value
@@ -163,17 +167,18 @@ bool TypesystemStructFieldHandler::start(Variant::mapType &args)
 
 /* TypesystemConstantHandler */
 
-bool TypesystemConstantHandler::start(Variant::mapType &args)
+bool TypesystemConstantHandler::startCommand(const std::string &commandName,
+                                             Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
 	// Read the argument values
-	const std::string &name = args["name"].asString();
+	const std::string &constantName = args["name"].asString();
 	const std::string &type = args["type"].asString();
 	const Variant &value = args["value"];
 
 	Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
-	Rooted<Constant> constant = typesystem->createConstant(name, value);
+	Rooted<Constant> constant = typesystem->createConstant(constantName, value);
 	constant->setLocation(location());
 
 	// Try to resolve the type
diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp
index 85494f1..75cba01 100644
--- a/src/core/parser/stack/TypesystemHandler.hpp
+++ b/src/core/parser/stack/TypesystemHandler.hpp
@@ -43,7 +43,8 @@ class TypesystemHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &commandName,
+	                  Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -67,7 +68,8 @@ class TypesystemEnumHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &commandName,
+	                  Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -114,7 +116,8 @@ class TypesystemStructHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &commandName,
+	                  Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -139,7 +142,8 @@ class TypesystemStructFieldHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &commandName,
+	                  Variant::mapType &args) override;
 
 	/**
 	 * Creates a new instance of the TypesystemStructFieldHandler.
@@ -162,7 +166,8 @@ class TypesystemConstantHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool start(Variant::mapType &args) override;
+	bool startCommand(const std::string &commandName,
+	                  Variant::mapType &args) override;
 
 	/**
 	 * Creates a new instance of the TypesystemConstantHandler.
-- 
cgit v1.2.3


From e31968c9e073c64cf718fbcaebbc83ee2bee48c8 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 18:09:34 +0100
Subject: Added additional constructor to Token

---
 src/core/common/Token.hpp | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
index f37151f..4b56f1a 100644
--- a/src/core/common/Token.hpp
+++ b/src/core/common/Token.hpp
@@ -134,7 +134,9 @@ struct Token {
 	 * @param location is the location of the extracted string content in the
 	 * source file.
 	 */
-	Token(SourceLocation location) : id(Tokens::Data), location(location) {}
+	Token(const SourceLocation &location) : id(Tokens::Data), location(location)
+	{
+	}
 
 	/**
 	 * Constructor of the Token struct.
@@ -144,11 +146,25 @@ struct Token {
 	 * @param location is the location of the extracted string content in the
 	 * source file.
 	 */
-	Token(TokenId id, const std::string &content, SourceLocation location)
+	Token(TokenId id, const std::string &content,
+	      const SourceLocation &location)
 	    : id(id), content(content), location(location)
 	{
 	}
 
+	/**
+	 * Constructor of the a "data" Token with the given string data and
+	 * location.
+	 *
+	 * @param content is the string content that should be stored in the token.
+	 * @param location is the location of the content within the source file.
+	 */
+	Token(const std::string &content,
+	      const SourceLocation &location = SourceLocation{})
+	    : id(Tokens::Data), content(content), location(location)
+	{
+	}
+
 	/**
 	 * Constructor of the Token struct, only initializes the token id
 	 *
@@ -172,7 +188,6 @@ struct Token {
 	 */
 	const SourceLocation &getLocation() const { return location; }
 };
-
 }
 
-#endif /* _OUSIA_TOKENS_HPP_ */
\ No newline at end of file
+#endif /* _OUSIA_TOKENS_HPP_ */
-- 
cgit v1.2.3


From e0b9f6ef6692ee8c37386c23f721dc6a57f69ae6 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 18:10:28 +0100
Subject: Storing type and name in the HandlerData once again, using a Token

---
 src/core/parser/stack/Callbacks.hpp         |  3 +-
 src/core/parser/stack/DocumentHandler.cpp   | 61 +++++--------------
 src/core/parser/stack/DocumentHandler.hpp   | 59 +++----------------
 src/core/parser/stack/DomainHandler.cpp     | 55 +++++++----------
 src/core/parser/stack/DomainHandler.hpp     | 22 +++----
 src/core/parser/stack/Handler.cpp           | 44 +++++++-------
 src/core/parser/stack/Stack.cpp             | 91 +++++++++++++++++------------
 src/core/parser/stack/TokenStack.hpp        |  4 +-
 src/core/parser/stack/TypesystemHandler.cpp | 15 ++---
 src/core/parser/stack/TypesystemHandler.hpp | 15 ++---
 10 files changed, 148 insertions(+), 221 deletions(-)

diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp
index 092664a..dfe41fc 100644
--- a/src/core/parser/stack/Callbacks.hpp
+++ b/src/core/parser/stack/Callbacks.hpp
@@ -87,7 +87,8 @@ public:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
+	virtual void pushTokens(const std::vector<SyntaxDescriptor> &tokens) = 0;
+
 	/**
 	 * Removes the previously pushed list of tokens from the stack.
 	 */
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp
index de6e367..e931d8d 100644
--- a/src/core/parser/stack/DocumentHandler.cpp
+++ b/src/core/parser/stack/DocumentHandler.cpp
@@ -37,8 +37,7 @@ namespace parser_stack {
 
 /* DocumentHandler */
 
-bool DocumentHandler::startCommand(const std::string &commandName,
-                                   Variant::mapType &args)
+bool DocumentHandler::startCommand(Variant::mapType &args)
 {
 	Rooted<Document> document =
 	    context().getProject()->createDocument(args["name"].asString());
@@ -54,24 +53,10 @@ void DocumentHandler::end() { scope().pop(logger()); }
 /* DocumentChildHandler */
 
 DocumentChildHandler::DocumentChildHandler(const HandlerData &handlerData)
-    : Handler(handlerData), mode(Mode::STRUCT)
+    : Handler(handlerData), isExplicitField(false)
 {
 }
 
-void DocumentChildHandler::setMode(Mode mode, const std::string &name)
-{
-	this->mode = mode;
-	this->name = name;
-	this->token = Token();
-}
-
-void DocumentChildHandler::setMode(Mode mode, const Token &token)
-{
-	this->mode = mode;
-	this->name = token.content;
-	this->token = token;
-}
-
 void DocumentChildHandler::preamble(Rooted<Node> &parentNode, size_t &fieldIdx,
                                     DocumentEntity *&parent)
 {
@@ -142,12 +127,8 @@ void DocumentChildHandler::createPath(const size_t &firstFieldIdx,
 	scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, false);
 }
 
-bool DocumentChildHandler::startCommand(const std::string &commandName,
-                                        Variant::mapType &args)
+bool DocumentChildHandler::startCommand(Variant::mapType &args)
 {
-	// Set the internal mode to STRUCT and copy the name
-	setMode(Mode::STRUCT, name);
-
 	// Extract the special "name" attribute from the input arguments.
 	// The remaining attributes will be forwarded to the newly constructed
 	// element.
@@ -176,11 +157,11 @@ bool DocumentChildHandler::startCommand(const std::string &commandName,
 				return false;
 			}
 			Rooted<StructuredClass> strct = scope().resolve<StructuredClass>(
-			    Utils::split(name, ':'), logger());
+			    Utils::split(name(), ':'), logger());
 			if (strct == nullptr) {
 				// if we could not resolve the name, throw an exception.
 				throw LoggableException(
-				    std::string("\"") + name + "\" could not be resolved.",
+				    std::string("\"") + name() + "\" could not be resolved.",
 				    location());
 			}
 			entity = parentNode.cast<Document>()->createRootStructuredEntity(
@@ -200,7 +181,7 @@ bool DocumentChildHandler::startCommand(const std::string &commandName,
 			 */
 			{
 				ssize_t newFieldIdx =
-				    parent->getDescriptor()->getFieldDescriptorIndex(name);
+				    parent->getDescriptor()->getFieldDescriptorIndex(name());
 				if (newFieldIdx != -1) {
 					// Check whether explicit fields are allowed here, if not
 					if (scope().getFlag(ParserFlag::POST_EXPLICIT_FIELDS)) {
@@ -208,7 +189,7 @@ bool DocumentChildHandler::startCommand(const std::string &commandName,
 						    std::string(
 						        "Data or structure commands have already been "
 						        "given, command \"") +
-						        name + std::string(
+						        name() + std::string(
 						                   "\" is not interpreted explicit "
 						                   "field. Move explicit field "
 						                   "references to the beginning."),
@@ -218,7 +199,7 @@ bool DocumentChildHandler::startCommand(const std::string &commandName,
 						    manager(), parentNode, newFieldIdx, false)};
 						field->setLocation(location());
 						scope().push(field);
-						setMode(Mode::EXPLICIT_FIELD, name);
+						isExplicitField = true;
 						return true;
 					}
 				}
@@ -227,11 +208,11 @@ bool DocumentChildHandler::startCommand(const std::string &commandName,
 			// Otherwise create a new StructuredEntity
 			// TODO: Consider Anchors and AnnotationEntities
 			Rooted<StructuredClass> strct = scope().resolve<StructuredClass>(
-			    Utils::split(name, ':'), logger());
+			    Utils::split(name(), ':'), logger());
 			if (strct == nullptr) {
 				// if we could not resolve the name, throw an exception.
 				throw LoggableException(
-				    std::string("\"") + name + "\" could not be resolved.",
+				    std::string("\"") + name() + "\" could not be resolved.",
 				    location());
 			}
 
@@ -278,26 +259,15 @@ bool DocumentChildHandler::startCommand(const std::string &commandName,
 	}
 }
 
-bool DocumentChildHandler::startAnnotation(const std::string &name,
-                                           Variant::mapType &args,
+bool DocumentChildHandler::startAnnotation(Variant::mapType &args,
                                            AnnotationType annotationType)
 {
-	// Set the internal mode and name correctly
-	if (annotationType == AnnotationType::START) {
-		setMode(Mode::ANNOTATION_START, name);
-	} else {
-		setMode(Mode::ANNOTATION_END, name);
-	}
-
 	// TODO: Handle annotation
 	return false;
 }
 
-bool DocumentChildHandler::startToken(const Token &token, Handle<Node> node)
+bool DocumentChildHandler::startToken(Handle<Node> node)
 {
-	// Set the internal mode correctly
-	setMode(Mode::TOKEN, token);
-
 	// TODO: Handle token start
 	return false;
 }
@@ -313,7 +283,7 @@ void DocumentChildHandler::end()
 {
 	// In case of explicit fields we do not want to pop something from the
 	// stack.
-	if (mode == Mode::STRUCT) {
+	if (!isExplicitField) {
 		// pop the "main" element.
 		scope().pop(logger());
 	}
@@ -321,8 +291,7 @@ void DocumentChildHandler::end()
 
 bool DocumentChildHandler::fieldStart(bool &isDefault, size_t fieldIdx)
 {
-	// TODO: Handle other cases
-	if (mode == Mode::EXPLICIT_FIELD) {
+	if (isExplicitField) {
 		// In case of explicit fields we do not want to create another field.
 		isDefault = true;
 		return fieldIdx == 0;
@@ -471,7 +440,7 @@ bool DocumentChildHandler::data()
 	// this fact
 	Variant text = readData();
 	if (defaultFields.empty()) {
-		logger().error("Got data, but structure \"" + name +
+		logger().error("Got data, but structure \"" + name() +
 		                   "\" does not have any primitive field",
 		               text);
 	} else {
diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp
index 9a41508..d34c020 100644
--- a/src/core/parser/stack/DocumentHandler.hpp
+++ b/src/core/parser/stack/DocumentHandler.hpp
@@ -53,8 +53,7 @@ class DocumentHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &commandName,
-	                  Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -92,55 +91,11 @@ public:
  * defined elements in an Ousía document.
  */
 class DocumentChildHandler : public Handler {
-public:
-	/**
-	 * Enum type used to represent the mode of the DocumentChildHandler.
-	 * TODO: Having to have such a type is actually quite stupid, it would be
-	 * nicer to have separate handler classes for each of these cases. But this
-	 * is a story for a different day.
-	 */
-	enum class Mode {
-		STRUCT,
-		EXPLICIT_FIELD,
-		ANNOTATION_START,
-		ANNOTATION_END,
-		TOKEN
-	};
-
 private:
 	/**
-	 * Internal Mode of the DocumentChildHandler.
-	 */
-	Mode mode;
-
-	/**
-	 * Contains the name of the command or the annotation that is represented
-	 * by this DocumentChildHandler.
-	 */
-	std::string name;
-
-	/**
-	 * Token represented by the document child handler.
+	 * If set to true, this handler represents an explicit field.
 	 */
-	Token token;
-
-	/**
-	 * Switches the mode to the given mode and copies the given name. Resets the
-	 * token.
-	 *
-	 * @param mode is the new mode.
-	 * @param name is the new name.
-	 */
-	void setMode(Mode mode, const std::string &name);
-
-	/**
-	 * Switches the mode to the given mode and copies the given token, sets the
-	 * name to the content of the token.
-	 *
-	 * @param mode is the new mode.
-	 * @param token is the new token.
-	 */
-	void setMode(Mode mode, const Token &token);
+	bool isExplicitField;
 
 	/**
 	 * Code shared by both the start(), fieldStart() and the data() method.
@@ -211,11 +166,10 @@ private:
 public:
 	DocumentChildHandler(const HandlerData &handlerData);
 
-	bool startCommand(const std::string &commandName,
-	                  Variant::mapType &args) override;
-	bool startAnnotation(const std::string &name, Variant::mapType &args,
+	bool startCommand(Variant::mapType &args) override;
+	bool startAnnotation(Variant::mapType &args,
 	                     AnnotationType annotationType) override;
-	bool startToken(const Token &token, Handle<Node> node) override;
+	bool startToken(Handle<Node> node) override;
 	EndTokenResult endToken(const Token &token, Handle<Node> node) override;
 	void end() override;
 	bool data() override;
@@ -257,3 +211,4 @@ extern const Rtti DocumentField;
 }
 
 #endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */
+
diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp
index 5ca4f5b..aef5b47 100644
--- a/src/core/parser/stack/DomainHandler.cpp
+++ b/src/core/parser/stack/DomainHandler.cpp
@@ -33,8 +33,7 @@ namespace parser_stack {
 
 /* DomainHandler */
 
-bool DomainHandler::startCommand(const std::string &commandName,
-                                 Variant::mapType &args)
+bool DomainHandler::startCommand(Variant::mapType &args)
 {
 	// Create the Domain node
 	Rooted<Domain> domain =
@@ -58,8 +57,7 @@ void DomainHandler::end() { scope().pop(logger()); }
 
 /* DomainStructHandler */
 
-bool DomainStructHandler::startCommand(const std::string &commandName,
-                                       Variant::mapType &args)
+bool DomainStructHandler::startCommand(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -90,8 +88,7 @@ bool DomainStructHandler::startCommand(const std::string &commandName,
 void DomainStructHandler::end() { scope().pop(logger()); }
 
 /* DomainAnnotationHandler */
-bool DomainAnnotationHandler::startCommand(const std::string &commandName,
-                                           Variant::mapType &args)
+bool DomainAnnotationHandler::startCommand(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -109,8 +106,7 @@ void DomainAnnotationHandler::end() { scope().pop(logger()); }
 
 /* DomainAttributesHandler */
 
-bool DomainAttributesHandler::startCommand(const std::string &commandName,
-                                           Variant::mapType &args)
+bool DomainAttributesHandler::startCommand(Variant::mapType &args)
 {
 	// Fetch the current typesystem and create the struct node
 	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
@@ -126,8 +122,7 @@ void DomainAttributesHandler::end() { scope().pop(logger()); }
 
 /* DomainFieldHandler */
 
-bool DomainFieldHandler::startCommand(const std::string &commandName,
-                                      Variant::mapType &args)
+bool DomainFieldHandler::startCommand(Variant::mapType &args)
 {
 	FieldDescriptor::FieldType type;
 	if (args["isSubtree"].asBool()) {
@@ -157,16 +152,15 @@ void DomainFieldHandler::end() { scope().pop(logger()); }
 
 /* DomainFieldRefHandler */
 
-bool DomainFieldRefHandler::startCommand(const std::string &commandName,
-                                         Variant::mapType &args)
+bool DomainFieldRefHandler::startCommand(Variant::mapType &args)
 {
 	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
 
-	const std::string &ref = args["ref"].asString();
+	const std::string &name = args["ref"].asString();
 
 	auto loc = location();
 
-	scope().resolveFieldDescriptor(ref, parent, logger(),
+	scope().resolveFieldDescriptor(name, parent, logger(),
 	                               [loc](Handle<Node> field,
 	                                     Handle<Node> parent, Logger &logger) {
 		if (field != nullptr) {
@@ -188,8 +182,7 @@ void DomainFieldRefHandler::end() {}
 
 /* DomainPrimitiveHandler */
 
-bool DomainPrimitiveHandler::startCommand(const std::string &commandName,
-                                          Variant::mapType &args)
+bool DomainPrimitiveHandler::startCommand(Variant::mapType &args)
 {
 	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
 
@@ -229,14 +222,13 @@ void DomainPrimitiveHandler::end() { scope().pop(logger()); }
 
 /* DomainChildHandler */
 
-bool DomainChildHandler::startCommand(const std::string &commandName,
-                                      Variant::mapType &args)
+bool DomainChildHandler::startCommand(Variant::mapType &args)
 {
 	Rooted<FieldDescriptor> field = scope().selectOrThrow<FieldDescriptor>();
 
-	const std::string &ref = args["ref"].asString();
+	const std::string &name = args["ref"].asString();
 	scope().resolve<StructuredClass>(
-	    ref, field, logger(),
+	    name, field, logger(),
 	    [](Handle<Node> child, Handle<Node> field, Logger &logger) {
 		    if (child != nullptr) {
 			    field.cast<FieldDescriptor>()->addChild(
@@ -248,8 +240,7 @@ bool DomainChildHandler::startCommand(const std::string &commandName,
 
 /* DomainParentHandler */
 
-bool DomainParentHandler::startCommand(const std::string &commandName,
-                                       Variant::mapType &args)
+bool DomainParentHandler::startCommand(Variant::mapType &args)
 {
 	Rooted<StructuredClass> strct = scope().selectOrThrow<StructuredClass>();
 
@@ -264,8 +255,7 @@ void DomainParentHandler::end() { scope().pop(logger()); }
 
 /* DomainParentFieldHandler */
 
-bool DomainParentFieldHandler::startCommand(const std::string &commandName,
-                                            Variant::mapType &args)
+bool DomainParentFieldHandler::startCommand(Variant::mapType &args)
 {
 	Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>();
 	FieldDescriptor::FieldType type;
@@ -275,7 +265,7 @@ bool DomainParentFieldHandler::startCommand(const std::string &commandName,
 		type = FieldDescriptor::FieldType::TREE;
 	}
 
-	const std::string &fieldName = args["name"].asString();
+	const std::string &name = args["name"].asString();
 	const bool optional = args["optional"].asBool();
 	Rooted<StructuredClass> strct =
 	    parentNameNode->getParent().cast<StructuredClass>();
@@ -284,12 +274,12 @@ bool DomainParentFieldHandler::startCommand(const std::string &commandName,
 	// StructuredClass as child to it.
 	scope().resolve<Descriptor>(
 	    parentNameNode->getName(), strct, logger(),
-	    [type, fieldName, optional](Handle<Node> parent, Handle<Node> strct,
+	    [type, name, optional](Handle<Node> parent, Handle<Node> strct,
 	                                Logger &logger) {
 		    if (parent != nullptr) {
 			    Rooted<FieldDescriptor> field =
 			        (parent.cast<Descriptor>()->createFieldDescriptor(
-			             logger, type, fieldName, optional)).first;
+			             logger, type, name, optional)).first;
 			    field->addChild(strct.cast<StructuredClass>());
 		    }
 		});
@@ -298,12 +288,11 @@ bool DomainParentFieldHandler::startCommand(const std::string &commandName,
 
 /* DomainParentFieldRefHandler */
 
-bool DomainParentFieldRefHandler::startCommand(const std::string &commandName,
-                                               Variant::mapType &args)
+bool DomainParentFieldRefHandler::startCommand(Variant::mapType &args)
 {
 	Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>();
 
-	const std::string &ref = args["ref"].asString();
+	const std::string &name = args["ref"].asString();
 	Rooted<StructuredClass> strct =
 	    parentNameNode->getParent().cast<StructuredClass>();
 	auto loc = location();
@@ -311,14 +300,14 @@ bool DomainParentFieldRefHandler::startCommand(const std::string &commandName,
 	// resolve the parent, get the referenced field and add the declared
 	// StructuredClass as child to it.
 	scope().resolve<Descriptor>(parentNameNode->getName(), strct, logger(),
-	                            [ref, loc](Handle<Node> parent,
+	                            [name, loc](Handle<Node> parent,
 	                                       Handle<Node> strct, Logger &logger) {
 		if (parent != nullptr) {
 			Rooted<FieldDescriptor> field =
-			    parent.cast<Descriptor>()->getFieldDescriptor(ref);
+			    parent.cast<Descriptor>()->getFieldDescriptor(name);
 			if (field == nullptr) {
 				logger.error(
-				    std::string("Could not find referenced field ") + ref, loc);
+				    std::string("Could not find referenced field ") + name, loc);
 				return;
 			}
 			field->addChild(strct.cast<StructuredClass>());
diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp
index 4116919..f12d863 100644
--- a/src/core/parser/stack/DomainHandler.hpp
+++ b/src/core/parser/stack/DomainHandler.hpp
@@ -46,7 +46,7 @@ class DomainHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -59,7 +59,7 @@ class DomainStructHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -72,7 +72,7 @@ class DomainAnnotationHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -85,7 +85,7 @@ class DomainAttributesHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -98,7 +98,7 @@ class DomainFieldHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -111,7 +111,7 @@ class DomainFieldRefHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -124,7 +124,7 @@ class DomainPrimitiveHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -137,7 +137,7 @@ class DomainChildHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 
 	static Handler *create(const HandlerData &handlerData)
 	{
@@ -154,7 +154,7 @@ class DomainParentHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -167,7 +167,7 @@ class DomainParentFieldHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 
 	static Handler *create(const HandlerData &handlerData)
 	{
@@ -179,7 +179,7 @@ class DomainParentFieldRefHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &name, Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 
 	static Handler *create(const HandlerData &handlerData)
 	{
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
index 006e521..c01e74c 100644
--- a/src/core/parser/stack/Handler.cpp
+++ b/src/core/parser/stack/Handler.cpp
@@ -32,8 +32,9 @@ namespace parser_stack {
 /* Class HandlerData */
 
 HandlerData::HandlerData(ParserContext &ctx, HandlerCallbacks &callbacks,
-                         const State &state, const SourceLocation &location)
-    : ctx(ctx), callbacks(callbacks), state(state), location(location)
+                         const State &state, const Token &token,
+                         HandlerType type)
+    : ctx(ctx), callbacks(callbacks), state(state), token(token), type(type)
 {
 }
 
@@ -60,7 +61,20 @@ Logger &Handler::logger()
 	return handlerData.ctx.getLogger();
 }
 
-const SourceLocation &Handler::location() const { return handlerData.location; }
+const std::string &Handler::name() const { return handlerData.token.content; }
+
+TokenId Handler::tokenId() const { return handlerData.token.id; }
+
+const Token &Handler::token() const { return handlerData.token; }
+
+const SourceLocation &Handler::location() const
+{
+	return handlerData.token.location;
+}
+
+HandlerType Handler::type() const { return handlerData.type; }
+
+const State &Handler::state() const { return handlerData.state; }
 
 Variant Handler::readData() { return handlerData.callbacks.readData(); }
 
@@ -81,8 +95,6 @@ void Handler::unregisterToken(TokenId id)
 	handlerData.callbacks.unregisterToken(id);
 }
 
-const State &Handler::getState() const { return handlerData.state; }
-
 void Handler::setLogger(Logger &logger) { internalLogger = &logger; }
 
 void Handler::resetLogger() { internalLogger = nullptr; }
@@ -91,15 +103,13 @@ const SourceLocation &Handler::getLocation() const { return location(); }
 
 /* Class EmptyHandler */
 
-bool EmptyHandler::startCommand(const std::string &commandName,
-                                Variant::mapType &args)
+bool EmptyHandler::startCommand(Variant::mapType &args)
 {
 	// Well, we'll support any command we get, don't we?
 	return true;
 }
 
-bool EmptyHandler::startAnnotation(const std::string &name,
-                                   Variant::mapType &args,
+bool EmptyHandler::startAnnotation(Variant::mapType &args,
                                    Handler::AnnotationType annotationType)
 {
 	// Do not support annotations. Annotations are too complicated for poor
@@ -107,7 +117,7 @@ bool EmptyHandler::startAnnotation(const std::string &name,
 	return false;
 }
 
-bool EmptyHandler::startToken(const Token &token, Handle<Node> node)
+bool EmptyHandler::startToken(Handle<Node> node)
 {
 	// EmptyHandler does not support tokens.
 	return false;
@@ -149,24 +159,19 @@ Handler *EmptyHandler::create(const HandlerData &handlerData)
 
 /* Class StaticHandler */
 
-bool StaticHandler::startCommand(const std::string &commandName,
-                                 Variant::mapType &args)
+bool StaticHandler::startCommand(Variant::mapType &args)
 {
 	// Do nothing in the default implementation, accept anything
 	return true;
 }
 
-bool StaticHandler::startAnnotation(const std::string &name,
-                                    Variant::mapType &args,
+bool StaticHandler::startAnnotation(Variant::mapType &args,
                                     Handler::AnnotationType annotationType)
 {
 	return false;
 }
 
-bool StaticHandler::startToken(const Token &token, Handle<Node> node)
-{
-	return false;
-}
+bool StaticHandler::startToken(Handle<Node> node) { return false; }
 
 Handler::EndTokenResult StaticHandler::endToken(const Token &token,
                                                 Handle<Node> node)
@@ -209,8 +214,7 @@ StaticFieldHandler::StaticFieldHandler(const HandlerData &handlerData,
 {
 }
 
-bool StaticFieldHandler::startCommand(const std::string &commandName,
-                                      Variant::mapType &args)
+bool StaticFieldHandler::startCommand(Variant::mapType &args)
 {
 	if (!argName.empty()) {
 		auto it = args.find(argName);
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index 3545c37..cad4078 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -41,11 +41,6 @@ namespace {
  */
 class HandlerInfo {
 public:
-	/**
-	 * Name of the command or the token sequence.
-	 */
-	std::string name;
-
 	/**
 	 * Pointer pointing at the actual handler instance.
 	 */
@@ -362,7 +357,7 @@ public:
 	void unregisterToken(TokenId id) override;
 	Variant readData() override;
 	bool hasData();
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens) override;
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens) override;
 	void popTokens() override;
 };
 
@@ -394,7 +389,7 @@ StackImpl::~StackImpl()
 			    !info.inImplicitDefaultField) {
 				logger().error(
 				    std::string("Reached end of stream, but command \"") +
-				        info.name +
+				        currentCommandName() +
 				        "\" has not ended yet. Command was started here:",
 				    info.handler->getLocation());
 			}
@@ -428,8 +423,8 @@ void StackImpl::deduceState()
 	HandlerConstructor ctor =
 	    state.elementHandler ? state.elementHandler : EmptyHandler::create;
 
-	std::shared_ptr<Handler> handler =
-	    std::shared_ptr<Handler>{ctor({ctx, *this, state, SourceLocation{}})};
+	std::shared_ptr<Handler> handler = std::shared_ptr<Handler>{
+	    ctor({ctx, *this, state, SourceLocation{}, HandlerType::COMMAND})};
 	stack.emplace_back(handler);
 
 	// Set the correct flags for this implicit handler
@@ -452,12 +447,12 @@ std::set<std::string> StackImpl::expectedCommands()
 
 const State &StackImpl::currentState() const
 {
-	return stack.empty() ? States::None : stack.back().handler->getState();
+	return stack.empty() ? States::None : stack.back().handler->state();
 }
 
 std::string StackImpl::currentCommandName() const
 {
-	return stack.empty() ? std::string{} : stack.back().name;
+	return stack.empty() ? std::string{} : stack.back().handler->name();
 }
 
 const State *StackImpl::findTargetState(const std::string &name)
@@ -616,21 +611,29 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 		                              ? targetState->elementHandler
 		                              : EmptyHandler::create;
 		std::shared_ptr<Handler> handler{
-		    ctor({ctx, *this, *targetState, name.getLocation()})};
+		    ctor({ctx,
+		          *this,
+		          *targetState,
+		          {name.asString(), name.getLocation()},
+		          HandlerType::COMMAND})};
 		stack.emplace_back(handler);
 
-		// Fetch the HandlerInfo for the parent element and the current element
+		// Fetch the HandlerInfo for the parent element and the current
+		// element
 		HandlerInfo &parentInfo = lastInfo();
 		HandlerInfo &info = currentInfo();
 
-		// Call the "start" method of the handler, store the result of the start
-		// method as the validity of the handler -- do not call the start method
+		// Call the "start" method of the handler, store the result of the
+		// start
+		// method as the validity of the handler -- do not call the start
+		// method
 		// if the stack is currently invalid (as this may cause further,
 		// unwanted errors)
 		bool validStack = handlersValid();
 		info.valid = false;
 		if (validStack) {
-			// Canonicalize the arguments (if this has not already been done),
+			// Canonicalize the arguments (if this has not already been
+			// done),
 			// allow additional arguments and numeric indices
 			Variant::mapType canonicalArgs = args;
 			targetState->arguments.validateMap(canonicalArgs, loggerFork, true,
@@ -638,8 +641,7 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 
 			handler->setLogger(loggerFork);
 			try {
-				info.valid =
-				    handler->startCommand(name.asString(), canonicalArgs);
+				info.valid = handler->startCommand(canonicalArgs);
 			}
 			catch (LoggableException ex) {
 				loggerFork.log(ex);
@@ -647,8 +649,10 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 			handler->resetLogger();
 		}
 
-		// We started the command within an implicit default field and it is not
-		// valid -- remove both the new handler and the parent field from the
+		// We started the command within an implicit default field and it is
+		// not
+		// valid -- remove both the new handler and the parent field from
+		// the
 		// stack
 		if (!info.valid && parentInfo.inImplicitDefaultField) {
 			endCurrentHandler();
@@ -656,7 +660,8 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 			continue;
 		}
 
-		// If we ended up here, starting the command may or may not have worked,
+		// If we ended up here, starting the command may or may not have
+		// worked,
 		// but after all, we cannot unroll the stack any further. Update the
 		// "valid" flag, commit any potential error messages and return.
 		info.valid = parentInfo.valid && info.valid;
@@ -687,13 +692,15 @@ void StackImpl::data(const TokenizedData &data)
 	// TODO: Rewrite this function for token handling
 	// TODO: This loop needs to be refactored out
 	/*while (!data.atEnd()) {
-	    // End handlers that already had a default field and are currently not
+	    // End handlers that already had a default field and are currently
+	not
 	    // active.
 	    endOverdueHandlers();
 
 	    const bool hasNonWhitespaceText = data.hasNonWhitespaceText();
 
-	    // Check whether there is any command the data can be sent to -- if not,
+	    // Check whether there is any command the data can be sent to -- if
+	not,
 	    // make sure the data actually is data
 	    if (stack.empty()) {
 	        if (hasNonWhitespaceText) {
@@ -712,10 +719,12 @@ void StackImpl::data(const TokenizedData &data)
 	        continue;
 	    }
 
-	    // If this field should not get any data, log an error and do not call
+	    // If this field should not get any data, log an error and do not
+	call
 	    // the "data" handler
 	    if (!info.inValidField) {
-	        // If the "hadDefaultField" flag is set, we already issued an error
+	        // If the "hadDefaultField" flag is set, we already issued an
+	error
 	        // message
 	        if (!info.hadDefaultField) {
 	            if (hasNonWhitespaceText) {
@@ -726,8 +735,10 @@ void StackImpl::data(const TokenizedData &data)
 	    }
 
 	    if (handlersValid() && info.inValidField) {
-	        // Fork the logger and set it as temporary logger for the "start"
-	        // method. We only want to keep error messages if this was not a try
+	        // Fork the logger and set it as temporary logger for the
+	"start"
+	        // method. We only want to keep error messages if this was not a
+	try
 	        // to implicitly open a default field.
 	        LoggerFork loggerFork = logger().fork();
 	        info.handler->setLogger(loggerFork);
@@ -735,12 +746,14 @@ void StackImpl::data(const TokenizedData &data)
 	        // Pass the data to the current Handler instance
 	        bool valid = false;
 	        try {
-	            // Create a fork of the TokenizedData and let the handler work
+	            // Create a fork of the TokenizedData and let the handler
+	work
 	            // on it
 	            TokenizedData dataFork = data;
 	            valid = info.handler->data(dataFork);
 
-	            // If the data was validly handled by the handler, commit the
+	            // If the data was validly handled by the handler, commit
+	the
 	            // change
 	            if (valid) {
 	                data = dataFork;
@@ -754,14 +767,16 @@ void StackImpl::data(const TokenizedData &data)
 	        info.handler->resetLogger();
 
 	        // If placing the data here failed and we're currently in an
-	        // implicitly opened field, just unroll the stack to the next field
+	        // implicitly opened field, just unroll the stack to the next
+	field
 	        // and try again
 	        if (!valid && info.inImplicitDefaultField) {
 	            endCurrentHandler();
 	            continue;
 	        }
 
-	        // Commit the content of the logger fork. Do not change the valid
+	        // Commit the content of the logger fork. Do not change the
+	valid
 	        // flag.
 	        loggerFork.commit();
 	    }
@@ -783,12 +798,14 @@ void StackImpl::fieldStart(bool isDefault)
 	HandlerInfo &info = currentInfo();
 	if (info.inField) {
 		logger().error(
-		    "Got field start, but there is no command for which to start the "
+		    "Got field start, but there is no command for which to start "
+		    "the "
 		    "field.");
 		return;
 	}
 
-	// If the handler already had a default field we cannot start a new field
+	// If the handler already had a default field we cannot start a new
+	// field
 	// (the default field always is the last field) -- mark the command as
 	// invalid
 	if (info.hadDefaultField) {
@@ -797,7 +814,8 @@ void StackImpl::fieldStart(bool isDefault)
 		               std::string("\" does not have any more fields"));
 	}
 
-	// Copy the isDefault flag to a local variable, the fieldStart method will
+	// Copy the isDefault flag to a local variable, the fieldStart method
+	// will
 	// write into this variable
 	bool defaultField = isDefault;
 
@@ -843,7 +861,8 @@ void StackImpl::fieldEnd()
 		return;
 	}
 
-	// Only continue if the current handler stack is in a valid state, do not
+	// Only continue if the current handler stack is in a valid state, do
+	// not
 	// call the fieldEnd function if something went wrong before
 	if (handlersValid() && !info.hadDefaultField && info.inValidField) {
 		try {
@@ -868,7 +887,7 @@ void StackImpl::unregisterToken(TokenId id)
 	tokenRegistry.unregisterToken(id);
 }
 
-void StackImpl::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
+void StackImpl::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
 {
 	// TODO
 }
diff --git a/src/core/parser/stack/TokenStack.hpp b/src/core/parser/stack/TokenStack.hpp
index af734bb..f2e7edc 100644
--- a/src/core/parser/stack/TokenStack.hpp
+++ b/src/core/parser/stack/TokenStack.hpp
@@ -82,9 +82,9 @@ public:
 	TokenStack(const TokenStack &parentStack) : TokenStack(&parentStack) {}
 
 	/**
-	 * Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
+	 * Pushes a list of SyntaxDescriptor instances onto the internal stack.
 	 *
-	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
+	 * @param tokens is a list of SyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
 	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp
index 110c56f..3fa641a 100644
--- a/src/core/parser/stack/TypesystemHandler.cpp
+++ b/src/core/parser/stack/TypesystemHandler.cpp
@@ -32,8 +32,7 @@ namespace parser_stack {
 
 /* TypesystemHandler */
 
-bool TypesystemHandler::startCommand(const std::string &commandName,
-                                     Variant::mapType &args)
+bool TypesystemHandler::startCommand(Variant::mapType &args)
 {
 	// Create the typesystem instance
 	Rooted<Typesystem> typesystem =
@@ -64,8 +63,7 @@ void TypesystemHandler::end() { scope().pop(logger()); }
 
 /* TypesystemEnumHandler */
 
-bool TypesystemEnumHandler::startCommand(const std::string &commandName,
-                                         Variant::mapType &args)
+bool TypesystemEnumHandler::startCommand(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -93,8 +91,7 @@ void TypesystemEnumEntryHandler::doHandle(const Variant &fieldData,
 
 /* TypesystemStructHandler */
 
-bool TypesystemStructHandler::startCommand(const std::string &commandName,
-                                           Variant::mapType &args)
+bool TypesystemStructHandler::startCommand(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -127,8 +124,7 @@ void TypesystemStructHandler::end() { scope().pop(logger()); }
 
 /* TypesystemStructFieldHandler */
 
-bool TypesystemStructFieldHandler::startCommand(const std::string &commandName,
-                                                Variant::mapType &args)
+bool TypesystemStructFieldHandler::startCommand(Variant::mapType &args)
 {
 	// Read the argument values
 	const std::string &fieldName = args["name"].asString();
@@ -167,8 +163,7 @@ bool TypesystemStructFieldHandler::startCommand(const std::string &commandName,
 
 /* TypesystemConstantHandler */
 
-bool TypesystemConstantHandler::startCommand(const std::string &commandName,
-                                             Variant::mapType &args)
+bool TypesystemConstantHandler::startCommand(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp
index 75cba01..0773a3a 100644
--- a/src/core/parser/stack/TypesystemHandler.hpp
+++ b/src/core/parser/stack/TypesystemHandler.hpp
@@ -43,8 +43,7 @@ class TypesystemHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &commandName,
-	                  Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -68,8 +67,7 @@ class TypesystemEnumHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &commandName,
-	                  Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -116,8 +114,7 @@ class TypesystemStructHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &commandName,
-	                  Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 	void end() override;
 
 	/**
@@ -142,8 +139,7 @@ class TypesystemStructFieldHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &commandName,
-	                  Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 
 	/**
 	 * Creates a new instance of the TypesystemStructFieldHandler.
@@ -166,8 +162,7 @@ class TypesystemConstantHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
-	bool startCommand(const std::string &commandName,
-	                  Variant::mapType &args) override;
+	bool startCommand(Variant::mapType &args) override;
 
 	/**
 	 * Creates a new instance of the TypesystemConstantHandler.
-- 
cgit v1.2.3


From cdae062d0cbc19ce605df24b2fff5e3808f21ca6 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 18:33:46 +0100
Subject: Added range flag to HandlerInfo

---
 src/core/parser/stack/Stack.cpp | 60 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index cad4078..e5bd224 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -65,6 +65,11 @@ public:
 	 */
 	bool implicit : 1;
 
+	/**
+	 * Set to true if the handled command or annotation has a range.
+	 */
+	bool range : 1;
+
 	/**
 	 * Set to true if the handler currently is in a field.
 	 */
@@ -100,8 +105,9 @@ public:
 	/**
 	 * Constructor of the HandlerInfo class, allows to set all flags manually.
 	 */
-	HandlerInfo(bool valid, bool implicit, bool inField, bool inDefaultField,
-	            bool inImplicitDefaultField, bool inValidField);
+	HandlerInfo(bool valid, bool implicit, bool range, bool inField,
+	            bool inDefaultField, bool inImplicitDefaultField,
+	            bool inValidField);
 
 	/**
 	 * Constructor of the HandlerInfo class, taking a shared_ptr to the handler
@@ -124,6 +130,30 @@ public:
 	 * Updates the "fields" flags according to a "fieldEnd" event.
 	 */
 	void fieldEnd();
+
+	/**
+	 * Returns the name of the referenced handler or an empty string if no
+	 * handler is present.
+	 *
+	 * @return the current handler name.
+	 */
+	std::string name() const;
+
+	/**
+	 * Returns the type of the referenced handler or COMMAND if no handler is
+	 * present.
+	 *
+	 * @return the current handler type.
+	 */
+	HandlerType type() const;
+
+	/**
+	 * Returns the current state the handler is on or States::None if no handler
+	 * is present.
+	 *
+	 * @return the current state machine state.
+	 */
+	const State &state() const;
 };
 
 HandlerInfo::HandlerInfo() : HandlerInfo(nullptr) {}
@@ -133,6 +163,7 @@ HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler)
       fieldIdx(0),
       valid(true),
       implicit(false),
+      range(false),
       inField(false),
       inDefaultField(false),
       inImplicitDefaultField(false),
@@ -141,13 +172,14 @@ HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler)
 {
 }
 
-HandlerInfo::HandlerInfo(bool valid, bool implicit, bool inField,
+HandlerInfo::HandlerInfo(bool valid, bool implicit, bool range, bool inField,
                          bool inDefaultField, bool inImplicitDefaultField,
                          bool inValidField)
     : handler(nullptr),
       fieldIdx(0),
       valid(valid),
       implicit(implicit),
+      range(range),
       inField(inField),
       inDefaultField(inDefaultField),
       inImplicitDefaultField(inImplicitDefaultField),
@@ -156,6 +188,21 @@ HandlerInfo::HandlerInfo(bool valid, bool implicit, bool inField,
 {
 }
 
+std::string HandlerInfo::name() const
+{
+	return handler == nullptr ? std::string{} : handler->name();
+}
+
+HandlerType HandlerInfo::type() const
+{
+	return handler == nullptr ? HandlerType::COMMAND : handler->type();
+}
+
+const State &HandlerInfo::state() const
+{
+	return handler == nullptr ? States::None : handler->state();
+}
+
 HandlerInfo::~HandlerInfo()
 {
 	// Do nothing
@@ -182,7 +229,7 @@ void HandlerInfo::fieldEnd()
 /**
  * Stub instance of HandlerInfo containing no handler information.
  */
-static HandlerInfo EmptyHandlerInfo{true, true, true, true, false, true};
+static HandlerInfo EmptyHandlerInfo{true, true, false, true, true, false, true};
 }
 
 /* Helper functions */
@@ -447,12 +494,12 @@ std::set<std::string> StackImpl::expectedCommands()
 
 const State &StackImpl::currentState() const
 {
-	return stack.empty() ? States::None : stack.back().handler->state();
+	return stack.empty() ? States::None : stack.back().state();
 }
 
 std::string StackImpl::currentCommandName() const
 {
-	return stack.empty() ? std::string{} : stack.back().handler->name();
+	return stack.empty() ? std::string{} : stack.back().name();
 }
 
 const State *StackImpl::findTargetState(const std::string &name)
@@ -665,6 +712,7 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 		// but after all, we cannot unroll the stack any further. Update the
 		// "valid" flag, commit any potential error messages and return.
 		info.valid = parentInfo.valid && info.valid;
+		info.range = range;
 		loggerFork.commit();
 		return;
 	}
-- 
cgit v1.2.3


From 1c33913ebb5d9202575d3ca99bd17366d30f2261 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Tue, 3 Mar 2015 00:30:38 +0100
Subject: Started restructuring and adapting Stack class, reenabled unit tests
 (does not compile right now)

---
 CMakeLists.txt                          |   2 +-
 src/core/parser/stack/Stack.cpp         | 447 ++++++++-------
 src/core/parser/stack/Stack.hpp         |  13 +-
 src/core/parser/utils/TokenizedData.cpp |  10 +-
 src/core/parser/utils/TokenizedData.hpp |  12 +
 test/core/parser/stack/StackTest.cpp    | 959 ++++++++++++++++----------------
 6 files changed, 744 insertions(+), 699 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f99c212..c7ad7a3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -326,7 +326,7 @@ IF(TEST)
 		test/core/model/StyleTest
 		test/core/model/TypesystemTest
 		test/core/parser/ParserScopeTest
-#		test/core/parser/stack/StackTest
+		test/core/parser/stack/StackTest
 		test/core/parser/stack/StateTest
 		test/core/parser/stack/TokenRegistryTest
 		test/core/parser/utils/SourceOffsetVectorTest
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index e5bd224..89217ea 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -86,6 +86,12 @@ public:
 	 */
 	bool inImplicitDefaultField : 1;
 
+	/**
+	 * Set to true if the handler current is in an implicitly started range
+	 * field.
+	 */
+	bool inImplicitRangeField: 1;
+
 	/**
 	 * Set to false if this field is only opened pro-forma and does not accept
 	 * any data. Otherwise set to true.
@@ -230,6 +236,18 @@ void HandlerInfo::fieldEnd()
  * Stub instance of HandlerInfo containing no handler information.
  */
 static HandlerInfo EmptyHandlerInfo{true, true, false, true, true, false, true};
+
+/**
+ * Small helper class makeing sure the reference at some variable is reset once
+ * the scope is left.
+ */
+template <class T>
+struct GuardedTemporaryPointer {
+	T **ptr;
+	GuardedTemporaryPointer(T *ref, T **ptr) : ptr(ptr) { *ptr = ref; }
+
+	~GuardedTemporaryPointer() { *ptr = nullptr; }
+};
 }
 
 /* Helper functions */
@@ -352,11 +370,18 @@ private:
 	HandlerInfo &lastInfo();
 
 	/**
-	 * Ends all handlers that currently are not inside a field and already had
-	 * a default field. This method is called whenever the data() and command()
-	 * events are reached.
+	 * Returns a set containing the tokens that should currently be processed
+	 * by the TokenizedData instance.
+	 *
+	 * @return a TokenSet instance containing all tokens that should currently
+	 * be processed.
 	 */
-	void endOverdueHandlers();
+	TokenSet currentTokens() const;
+
+	/**
+	 * Returns the whitespace mode defined by the current command.
+	 */
+	WhitespaceMode currentWhitespaceMode() const;
 
 	/**
 	 * Ends the current handler and removes the corresponding element from the
@@ -365,13 +390,14 @@ private:
 	void endCurrentHandler();
 
 	/**
-	 * Tries to start a default field for the current handler, if currently the
-	 * handler is not inside a field and did not have a default field yet.
-	 *
-	 * @return true if the handler is inside a field, false if no field could
-	 * be started.
+	 * Ends all handlers that currently are not inside a field and already had
+	 * a default field. Tries to start a default field for the current handler,
+	 * if currently the handler is not inside a field and did not have a default
+	 * field yet. This method is called whenever the data(), startAnnotation(),
+	 * startToken(), startCommand(), annotationStart() or annotationEnd() events
+	 * are reached.
 	 */
-	bool ensureHandlerIsInField();
+	void prepareCurrentHandler();
 
 	/**
 	 * Returns true if all handlers on the stack are currently valid, or false
@@ -381,6 +407,30 @@ private:
 	 */
 	bool handlersValid();
 
+	/**
+	 * Called whenever there is an actual data pending on the current
+	 * TokenizedDataReader. Tries to feed this data to the current handler.
+	 */
+	void handleData();
+
+    /**
+     * Called whenever there is a token waiting to be processed. If possible
+     * tries to end a current handler with this token or to start a new handler
+     * with the token.
+     *
+     * @param token is the token that should be handled.
+     */
+    void handleToken(const Token &token);
+
+	/**
+	 * Called by the rangeEnd() and fieldEnd() methods to end the current ranged
+	 * command.
+	 *
+	 * @param rangeCommand specifies whether this should end the range of a
+	 * command with range.
+	 */
+	void handleFieldEnd(bool rangeCommand);
+
 public:
 	StackImpl(ParserCallbacks &parser, ParserContext &ctx,
 	          const std::multimap<std::string, const State *> &states);
@@ -403,7 +453,6 @@ public:
 	TokenId registerToken(const std::string &token) override;
 	void unregisterToken(TokenId id) override;
 	Variant readData() override;
-	bool hasData();
 	void pushTokens(const std::vector<SyntaxDescriptor> &tokens) override;
 	void popTokens() override;
 };
@@ -492,16 +541,6 @@ std::set<std::string> StackImpl::expectedCommands()
 	return res;
 }
 
-const State &StackImpl::currentState() const
-{
-	return stack.empty() ? States::None : stack.back().state();
-}
-
-std::string StackImpl::currentCommandName() const
-{
-	return stack.empty() ? std::string{} : stack.back().name();
-}
-
 const State *StackImpl::findTargetState(const std::string &name)
 {
 	const State *currentState = &(this->currentState());
@@ -527,6 +566,28 @@ const State *StackImpl::findTargetStateOrWildcard(const std::string &name)
 	return targetState;
 }
 
+const State &StackImpl::currentState() const
+{
+	return stack.empty() ? States::None : stack.back().state();
+}
+
+std::string StackImpl::currentCommandName() const
+{
+	return stack.empty() ? std::string{} : stack.back().name();
+}
+
+TokenSet StackImpl::currentTokens() const
+{
+	// TODO: Implement
+	return Tokens{};
+}
+
+WhitespaceMode currentWhitespaceMode() const
+{
+	// TODO: Implement
+	return WhitespaceMode::COLLAPSE;
+}
+
 HandlerInfo &StackImpl::currentInfo()
 {
 	return stack.empty() ? EmptyHandlerInfo : stack.back();
@@ -536,6 +597,8 @@ HandlerInfo &StackImpl::lastInfo()
 	return stack.size() < 2U ? EmptyHandlerInfo : stack[stack.size() - 2];
 }
 
+/* Stack helper functions */
+
 void StackImpl::endCurrentHandler()
 {
 	if (!stack.empty()) {
@@ -563,44 +626,37 @@ void StackImpl::endCurrentHandler()
 	}
 }
 
-void StackImpl::endOverdueHandlers()
+void StackImpl::prepareCurrentHandler()
 {
-	if (!stack.empty()) {
-		// Fetch the handler info for the current top-level element
-		HandlerInfo &info = stack.back();
+	// Repeat until a valid handler is found on the stack
+	while (true) {
+		// Fetch the handler for the current top-level element
+		HandlerInfo &info = currentInfo();
 
-		// Abort if this handler currently is inside a field
-		if (info.inField || (!info.hadDefaultField && info.valid)) {
+		// If the current Handler is in a field, there is nothing to be done,
+		// abort
+		if (info.inField) {
 			return;
 		}
 
-		// Otherwise end the current handler
-		endCurrentHandler();
-	}
-}
-
-bool StackImpl::ensureHandlerIsInField()
-{
-	// If the current handler is not in a field (and actually has a handler)
-	// try to start a default field
-	HandlerInfo &info = currentInfo();
-	if (!info.inField && info.handler != nullptr) {
-		// Abort if the element already had a default field or the handler is
-		// not valid
+		// If the current field already had a default field or is not valid,
+		// end it and repeat
 		if (info.hadDefaultField || !info.valid) {
-			return false;
+			endCurrentHandler();
+			continue;
 		}
 
 		// Try to start a new default field, abort if this did not work
 		bool isDefault = true;
 		if (!info.handler->fieldStart(isDefault, info.fieldIdx)) {
-			return false;
+			endCurrentHandler();
+			continue;
 		}
 
-		// Mark the field as started
-		info.fieldStart(true, true, true);
+		// Mark the field as started and return -- the field should be marked
+		// is implicit if this is not a field with range
+		info.fieldStart(true, !info.range, true, info.range);
 	}
-	return true;
 }
 
 bool StackImpl::handlersValid()
@@ -613,13 +669,105 @@ bool StackImpl::handlersValid()
 	return true;
 }
 
+void StackImpl::handleData()
+{
+	// Repeat until we found some handle willingly consuming the data
+	while (true) {
+		// Prepare the stack -- make sure all overdue handlers are ended and
+		// we currently are in an open field
+		prepareCurrentHandler();
+
+		// Fetch the current handler information
+		HandlerInfo &info = currentInfo();
+
+		// If this field should not get any data, log an error and do not
+		// call the "data" handler
+		if (!info.inValidField) {
+			if (!info.hadDefaultField) {
+				logger().error("Did not expect any data here", data);
+			}
+			return;
+		}
+
+		// If we're currently in an invalid subtree, just eat the data and abort
+		if (!handlersValid()) {
+			return;
+		}
+
+		// Fork the logger and set it as temporary logger for the "data"
+		// method. We only want to keep error messages if this was not a
+		// try to implicitly open a default field.
+		LoggerFork loggerFork = logger().fork();
+		info.handler->setLogger(loggerFork);
+
+		// Pass the data to the current Handler instance
+		bool valid = false;
+		try {
+			valid = info.handler->data();
+		}
+		catch (LoggableException ex) {
+			loggerFork.log(ex);
+		}
+
+		// Reset the logger instance of the handler as soon as possible
+		info.handler->resetLogger();
+
+		// If placing the data here failed and we're currently in an
+		// implicitly opened field, just unroll the stack to the next field
+		// and try again
+		if (!valid && info.inImplicitDefaultField) {
+			endCurrentHandler();
+			continue;
+		}
+
+		// Commit the content of the logger fork. Do not change the valid flag.
+		loggerFork.commit();
+	}
+}
+
+void StackImpl::handleToken(const Token &token) {
+	// TODO: Implement
+	// Just eat them for now
+}
+
+void StackImpl::handleFieldEnd(bool rangedCommand)
+{
+	// Throw away all overdue handlers, start the default field at least once
+	// if this has not been done yet (this is important for range commands)
+	prepareStack();
+
+	// Close all implicit default fields
+	while (!stack.empty()) {
+		HandlerInfo &info = currentInfo();
+		if (!info.inImplicitDefaultField) {
+			break;
+		}
+		endCurrentHandler();
+	}
+
+	// Fetch the information attached to the current handler
+	HandlerInfo &info = currentInfo();
+	if (!info.inField || stack.empty()) {
+		logger().error("Got field end, but there is no field here to end");
+		return;
+	}
+
+	// Only continue if the current handler stack is in a valid state, do not
+	// call the fieldEnd function if something went wrong before
+	if (handlersValid()) {
+		if (info.range && info.inDefaultField)
+		info.handler->fieldEnd();
+	}
+
+	// This command no longer is in a field
+	info.fieldEnd();
+}
+
+/* Class StackImpl public functions */
+
 void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
                              bool range)
 {
-	// End handlers that already had a default field and are currently not
-	// active.
-	endOverdueHandlers();
-
 	// Make sure the given identifier is valid (preventing "*" from being
 	// malicously passed to this function)
 	if (!Utils::isNamespacedIdentifier(name.asString())) {
@@ -629,6 +777,10 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 	}
 
 	while (true) {
+		// Prepare the stack -- make sure all overdue handlers are ended and
+		// we currently are in an open field
+		prepareCurrentHandler();
+
 		// Try to find a target state for the given command, if none can be
 		// found and the current command does not have an open field, then try
 		// to create an empty default field, otherwise this is an exception
@@ -644,12 +796,6 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 			}
 		}
 
-		// Make sure we're currently inside a field
-		if (!ensureHandlerIsInField()) {
-			endCurrentHandler();
-			continue;
-		}
-
 		// Fork the logger. We do not want any validation errors to skip
 		LoggerFork loggerFork = logger().fork();
 
@@ -670,17 +816,14 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 		HandlerInfo &parentInfo = lastInfo();
 		HandlerInfo &info = currentInfo();
 
-		// Call the "start" method of the handler, store the result of the
-		// start
-		// method as the validity of the handler -- do not call the start
-		// method
+		// Call the "start" method of the handler, store the result of the start
+		// method as the validity of the handler -- do not call the start method
 		// if the stack is currently invalid (as this may cause further,
 		// unwanted errors)
 		bool validStack = handlersValid();
 		info.valid = false;
 		if (validStack) {
-			// Canonicalize the arguments (if this has not already been
-			// done),
+			// Canonicalize the arguments (if this has not already been done),
 			// allow additional arguments and numeric indices
 			Variant::mapType canonicalArgs = args;
 			targetState->arguments.validateMap(canonicalArgs, loggerFork, true,
@@ -697,10 +840,8 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 		}
 
 		// We started the command within an implicit default field and it is
-		// not
-		// valid -- remove both the new handler and the parent field from
-		// the
-		// stack
+		// not valid -- remove both the new handler and the parent field from
+		// the stack
 		if (!info.valid && parentInfo.inImplicitDefaultField) {
 			endCurrentHandler();
 			endCurrentHandler();
@@ -708,9 +849,8 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 		}
 
 		// If we ended up here, starting the command may or may not have
-		// worked,
-		// but after all, we cannot unroll the stack any further. Update the
-		// "valid" flag, commit any potential error messages and return.
+		// worked, but after all, we cannot unroll the stack any further. Update
+		// the "valid" flag, commit any potential error messages and return.
 		info.valid = parentInfo.valid && info.valid;
 		info.range = range;
 		loggerFork.commit();
@@ -732,106 +872,31 @@ void StackImpl::annotationEnd(const Variant &className,
 
 void StackImpl::rangeEnd()
 {
-	// TODO
+	handleFieldEnd(true);
 }
 
 void StackImpl::data(const TokenizedData &data)
 {
-	// TODO: Rewrite this function for token handling
-	// TODO: This loop needs to be refactored out
-	/*while (!data.atEnd()) {
-	    // End handlers that already had a default field and are currently
-	not
-	    // active.
-	    endOverdueHandlers();
-
-	    const bool hasNonWhitespaceText = data.hasNonWhitespaceText();
-
-	    // Check whether there is any command the data can be sent to -- if
-	not,
-	    // make sure the data actually is data
-	    if (stack.empty()) {
-	        if (hasNonWhitespaceText) {
-	            throw LoggableException("No command here to receive data.",
-	                                    data);
-	        }
-	        return;
-	    }
-
-	    // Fetch the current command handler information
-	    HandlerInfo &info = currentInfo();
-
-	    // Make sure the current handler has an open field
-	    if (!ensureHandlerIsInField()) {
-	        endCurrentHandler();
-	        continue;
-	    }
-
-	    // If this field should not get any data, log an error and do not
-	call
-	    // the "data" handler
-	    if (!info.inValidField) {
-	        // If the "hadDefaultField" flag is set, we already issued an
-	error
-	        // message
-	        if (!info.hadDefaultField) {
-	            if (hasNonWhitespaceText) {
-	                logger().error("Did not expect any data here", data);
-	            }
-	            return;
-	        }
-	    }
-
-	    if (handlersValid() && info.inValidField) {
-	        // Fork the logger and set it as temporary logger for the
-	"start"
-	        // method. We only want to keep error messages if this was not a
-	try
-	        // to implicitly open a default field.
-	        LoggerFork loggerFork = logger().fork();
-	        info.handler->setLogger(loggerFork);
-
-	        // Pass the data to the current Handler instance
-	        bool valid = false;
-	        try {
-	            // Create a fork of the TokenizedData and let the handler
-	work
-	            // on it
-	            TokenizedData dataFork = data;
-	            valid = info.handler->data(dataFork);
-
-	            // If the data was validly handled by the handler, commit
-	the
-	            // change
-	            if (valid) {
-	                data = dataFork;
-	            }
-	        }
-	        catch (LoggableException ex) {
-	            loggerFork.log(ex);
-	        }
-
-	        // Reset the logger instance as soon as possible
-	        info.handler->resetLogger();
-
-	        // If placing the data here failed and we're currently in an
-	        // implicitly opened field, just unroll the stack to the next
-	field
-	        // and try again
-	        if (!valid && info.inImplicitDefaultField) {
-	            endCurrentHandler();
-	            continue;
-	        }
-
-	        // Commit the content of the logger fork. Do not change the
-	valid
-	        // flag.
-	        loggerFork.commit();
-	    }
-
-	    // There was no reason to unroll the stack any further, so continue
-	    return;
-	}*/
+	// Fetch a reader for the given tokenized data instance.
+	TokenizedDataReader reader = data.reader();
+
+	// Use the GuardedTemporaryPointer to make sure that the member variable
+	// dataReader is resetted to nullptr once this scope is left.
+	GuardedTemporaryPointer ptr(&reader, &dataReader);
+
+	// Peek a token from the reader, repeat until all tokens have been read
+	Token token;
+	while (reader.peek(token, currentTokens(), currentWhitespaceMode())) {
+		// Handle the token as text data or as actual token
+		if (token.id == Tokens::Data) {
+			handleData();
+		} else {
+			handleToken(token);
+		}
+
+		// Consume the peeked token
+		reader.consumePeek();
+	}
 }
 
 void StackImpl::fieldStart(bool isDefault)
@@ -853,8 +918,7 @@ void StackImpl::fieldStart(bool isDefault)
 	}
 
 	// If the handler already had a default field we cannot start a new
-	// field
-	// (the default field always is the last field) -- mark the command as
+	// field (the default field always is the last field) -- mark the command as
 	// invalid
 	if (info.hadDefaultField) {
 		logger().error(std::string("Got field start, but command \"") +
@@ -862,8 +926,7 @@ void StackImpl::fieldStart(bool isDefault)
 		               std::string("\" does not have any more fields"));
 	}
 
-	// Copy the isDefault flag to a local variable, the fieldStart method
-	// will
+	// Copy the isDefault flag to a local variable, the fieldStart method will
 	// write into this variable
 	bool defaultField = isDefault;
 
@@ -891,40 +954,11 @@ void StackImpl::fieldStart(bool isDefault)
 
 void StackImpl::fieldEnd()
 {
-	// Unroll the stack until the next explicitly open field
-	while (!stack.empty()) {
-		HandlerInfo &info = currentInfo();
-		if (info.inField && !info.inImplicitDefaultField) {
-			break;
-		}
-		endCurrentHandler();
-	}
-
-	// Fetch the information attached to the current handler
-	HandlerInfo &info = currentInfo();
-	if (!info.inField || info.inImplicitDefaultField || stack.empty()) {
-		logger().error(
-		    "Got field end, but there is no command for which to end the "
-		    "field.");
-		return;
-	}
-
-	// Only continue if the current handler stack is in a valid state, do
-	// not
-	// call the fieldEnd function if something went wrong before
-	if (handlersValid() && !info.hadDefaultField && info.inValidField) {
-		try {
-			info.handler->fieldEnd();
-		}
-		catch (LoggableException ex) {
-			logger().log(ex);
-		}
-	}
-
-	// This command no longer is in a field
-	info.fieldEnd();
+	handleFieldEnd(false);
 }
 
+/* Class StackImpl HandlerCallbacks */
+
 TokenId StackImpl::registerToken(const std::string &token)
 {
 	return tokenRegistry.registerToken(token);
@@ -950,14 +984,7 @@ Variant StackImpl::readData()
 	if (dataReader != nullptr) {
 		TokenizedDataReaderFork dataReaderFork = dataReader->fork();
 		Token token;
-
-		// TODO: Use correct token set
-		TokenSet tokens;
-
-		// TODO: Use correct whitespace mode
-		WhitespaceMode mode = WhitespaceMode::COLLAPSE;
-
-		dataReaderFork.read(token, tokens, mode);
+		dataReaderFork.read(token, currentTokens(), currentWhitespaceMode());
 		if (token.id == Tokens::Data) {
 			Variant res = Variant::fromString(token.content);
 			res.setLocation(token.getLocation());
@@ -967,8 +994,6 @@ Variant StackImpl::readData()
 	return Variant{};
 }
 
-bool StackImpl::hasData() { return readData() != nullptr; }
-
 /* Class Stack */
 
 Stack::Stack(ParserCallbacks &parser, ParserContext &ctx,
@@ -1013,5 +1038,7 @@ void Stack::fieldStart(bool isDefault) { impl->fieldStart(isDefault); }
 void Stack::fieldEnd() { impl->fieldEnd(); }
 
 void Stack::data(const TokenizedData &data) { impl->data(data); }
+
+void Stack::data(const std::string &str) { data(TokenizedData(str)); }
 }
 }
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
index de281d4..1de7cff 100644
--- a/src/core/parser/stack/Stack.hpp
+++ b/src/core/parser/stack/Stack.hpp
@@ -150,13 +150,24 @@ public:
 
 	/**
 	 * Function that should be called whenever character data is found in the
-	 * input stream. May only be called if the currently is a command on the
+	 * input stream. May only be called if there currently is a command on the
 	 * stack.
 	 *
 	 * @param data is a TokenizedData instance containing the pre-segmented data
 	 * that should be read.
 	 */
 	void data(const TokenizedData &data);
+
+	/**
+	 * Function that may be called whenever character data is found in the
+	 * input stream. May only be called if the currently is a command on the
+	 * stack. This method is mainly intended for unit testing. Pass a
+	 * TokenizedData instance to the 
+	 *
+	 * @param str is a string containing the data that should be passed to the
+	 * tokenizer.
+	 */
+	void data(const std::string &str);
 };
 }
 }
diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp
index c3c4f98..d8a8b37 100644
--- a/src/core/parser/utils/TokenizedData.cpp
+++ b/src/core/parser/utils/TokenizedData.cpp
@@ -29,8 +29,7 @@ namespace ousia {
 /**
  * Maximum token length.
  */
-constexpr TokenLength MaxTokenLength =
-	std::numeric_limits<TokenLength>::max();
+constexpr TokenLength MaxTokenLength = std::numeric_limits<TokenLength>::max();
 
 namespace {
 /**
@@ -510,6 +509,13 @@ TokenizedData::TokenizedData(SourceId sourceId)
 {
 }
 
+TokenizedData::TokenizedData(const std::string &data, SourceOffset offsStart,
+                             SourceId sourceId)
+    : TokenizedData(sourceId)
+{
+	append(data, offsStart);
+}
+
 TokenizedData::~TokenizedData() {}
 
 size_t TokenizedData::append(const std::string &data, SourceOffset offsStart,
diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp
index b72ca02..bc937f2 100644
--- a/src/core/parser/utils/TokenizedData.hpp
+++ b/src/core/parser/utils/TokenizedData.hpp
@@ -95,6 +95,18 @@ public:
 	 */
 	TokenizedData(SourceId sourceId);
 
+	/**
+	 * Creates a new instance of TokenizedData, takes a SourceId and an initial
+	 * string buffer.
+	 *
+	 * @param data is the string that should be appended to the buffer.
+	 * @param offsStart is the start offset in bytes in the input file.
+	 * @param sourceId is the source identifier that should be used for
+	 * constructing the location when returning tokens.
+	 */
+	TokenizedData(const std::string &data, SourceOffset offsStart = 0,
+	              SourceId sourceId = InvalidSourceId);
+
 	/**
 	 * Destructor. Needs to be defined explicitly for freeing a shared pointer
 	 * of the incomplete TokenizedDataImpl type.
diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp
index 83966d5..8f6c4df 100644
--- a/test/core/parser/stack/StackTest.cpp
+++ b/test/core/parser/stack/StackTest.cpp
@@ -21,6 +21,7 @@
 #include <gtest/gtest.h>
 
 #include <core/frontend/TerminalLogger.hpp>
+#include <core/parser/stack/Callbacks.hpp>
 #include <core/parser/stack/Handler.hpp>
 #include <core/parser/stack/Stack.hpp>
 #include <core/parser/stack/State.hpp>
@@ -38,70 +39,69 @@ static StandaloneEnvironment env(logger);
 
 namespace {
 
+class Parser : public ParserCallbacks {
+	TokenId registerToken(const std::string &token) override
+	{
+		return Tokens::Empty;
+	}
+
+	void unregisterToken(TokenId id) override
+	{
+		// Do nothing here
+	}
+};
+
+static Parser parser;
+
 struct Tracker {
-	int startCount;
+	int startCommandCount;
+	int startAnnotationCount;
+	int startTokenCount;
+	int endTokenCount;
 	int endCount;
 	int fieldStartCount;
 	int fieldEndCount;
-	int annotationStartCount;
-	int annotationEndCount;
 	int dataCount;
 
-	Variant::mapType startArgs;
-	bool fieldStartIsDefault;
-	size_t fieldStartIdx;
-	Variant annotationStartClassName;
-	Variant::mapType annotationStartArgs;
-	Variant annotationEndClassName;
-	Variant annotationEndElementName;
-	TokenizedData dataData;
-
-	bool startResult;
-	bool fieldStartSetIsDefault;
+	bool startCommandResult;
+	bool startAnnotationResult;
+	bool startTokenResult;
+	Handler::EndTokenResult endTokenResult;
 	bool fieldStartResult;
-	bool annotationStartResult;
-	bool annotationEndResult;
 	bool dataResult;
 
 	Tracker() { reset(); }
 
 	void reset()
 	{
-		startCount = 0;
+		startCommandCount = 0;
+		startAnnotationCount = 0;
+		startTokenCount = 0;
+		endTokenCount = 0;
 		endCount = 0;
 		fieldStartCount = 0;
 		fieldEndCount = 0;
-		annotationStartCount = 0;
-		annotationEndCount = 0;
 		dataCount = 0;
 
-		startArgs = Variant::mapType{};
-		fieldStartIsDefault = false;
-		fieldStartIdx = 0;
-		annotationStartClassName = Variant::fromString(std::string{});
-		annotationStartArgs = Variant::mapType{};
-		annotationEndClassName = Variant::fromString(std::string{});
-		annotationEndElementName = Variant::fromString(std::string{});
-		dataData = TokenizedData();
-
-		startResult = true;
-		fieldStartSetIsDefault = false;
+		startCommandResult = true;
+		startAnnotationResult = true;
+		startTokenResult = true;
+		endTokenResult = Handler::EndTokenResult::ENDED_THIS;
 		fieldStartResult = true;
-		annotationStartResult = true;
-		annotationEndResult = true;
 		dataResult = true;
 	}
 
-	void expect(int startCount, int endCount, int fieldStartCount,
-	            int fieldEndCount, int annotationStartCount,
-	            int annotationEndCount, int dataCount)
+	void expect(int startCommandCount, int endCount, int fieldStartCount,
+	            int fieldEndCount, int dataCount, int startAnnotationCount = 0,
+	            int startTokenCount = 0, int endTokenCount = 0)
 	{
-		EXPECT_EQ(startCount, this->startCount);
+		EXPECT_EQ(startCommandCount, this->startCommandCount);
+		EXPECT_EQ(startAnnotationCount, this->startAnnotationCount);
+		EXPECT_EQ(startTokenCount, this->startTokenCount);
+		EXPECT_EQ(endTokenCount, this->endTokenCount);
 		EXPECT_EQ(endCount, this->endCount);
 		EXPECT_EQ(fieldStartCount, this->fieldStartCount);
 		EXPECT_EQ(fieldEndCount, this->fieldEndCount);
-		EXPECT_EQ(annotationStartCount, this->annotationStartCount);
-		EXPECT_EQ(annotationEndCount, this->annotationEndCount);
 		EXPECT_EQ(dataCount, this->dataCount);
 	}
 };
@@ -113,55 +113,44 @@ private:
 	TestHandler(const HandlerData &handlerData) : Handler(handlerData) {}
 
 public:
-	bool start(Variant::mapType &args) override
+	bool startCommand(Variant::mapType &args) override
 	{
-		tracker.startCount++;
-		tracker.startArgs = args;
-		if (!tracker.startResult) {
-			logger().error(
-			    "The TestHandler was told not to allow a field start. So it "
-			    "doesn't. The TestHandler always obeys its master.");
-		}
-		return tracker.startResult;
+		tracker.startCommandCount++;
+		return tracker.startCommandResult;
 	}
 
-	void end() override { tracker.endCount++; }
-
-	bool fieldStart(bool &isDefault, size_t fieldIdx) override
+	bool startAnnotation(Variant::mapType &args,
+	                     AnnotationType annotationType) override
 	{
-		tracker.fieldStartCount++;
-		tracker.fieldStartIsDefault = isDefault;
-		tracker.fieldStartIdx = fieldIdx;
-		if (tracker.fieldStartSetIsDefault) {
-			isDefault = true;
-		}
-		return tracker.fieldStartResult;
+		tracker.startAnnotationCount++;
+		return tracker.startAnnotationResult;
 	}
 
-	void fieldEnd() override { tracker.fieldEndCount++; }
+	bool startToken(Handle<Node> node) override
+	{
+		tracker.startTokenCount++;
+		return tracker.startTokenResult;
+	}
 
-	bool annotationStart(const Variant &className,
-	                     Variant::mapType &args) override
+	EndTokenResult endToken(const Token &token, Handle<Node> node) override
 	{
-		tracker.annotationStartCount++;
-		tracker.annotationStartClassName = className;
-		tracker.annotationStartArgs = args;
-		return tracker.annotationStartResult;
+		tracker.endTokenCount++;
+		return tracker.endTokenResult;
 	}
 
-	bool annotationEnd(const Variant &className,
-	                   const Variant &elementName) override
+	void end() override { tracker.endCount++; }
+
+	bool fieldStart(bool &isDefault, size_t fieldIdx) override
 	{
-		tracker.annotationEndCount++;
-		tracker.annotationEndClassName = className;
-		tracker.annotationEndElementName = elementName;
-		return tracker.annotationEndResult;
+		tracker.fieldStartCount++;
+		return tracker.fieldStartResult;
 	}
 
-	bool data(TokenizedData &data) override
+	void fieldEnd() override { tracker.fieldEndCount++; }
+
+	bool data() override
 	{
 		tracker.dataCount++;
-		tracker.dataData = data;
 		return tracker.dataResult;
 	}
 
@@ -205,544 +194,544 @@ TEST(Stack, basicTest)
 	tracker.reset();
 	logger.reset();
 	{
-		Stack s{env.context, States::TestHandlers};
+		Stack s{parser, env.context, States::TestHandlers};
 
 		EXPECT_EQ("", s.currentCommandName());
 		EXPECT_EQ(&States::None, &s.currentState());
 
-		s.command("document", {});
+		s.commandStart("document", {}, true);
 		s.fieldStart(true);
 		s.data("test1");
 
 		EXPECT_EQ("document", s.currentCommandName());
 		EXPECT_EQ(&States::Document, &s.currentState());
-		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+		tracker.expect(1, 0, 1, 0, 1);  // scc, ec, fsc, fse, dc, sac, stc, etc
 
-		s.command("body", {});
+		s.commandStart("body", {}, true);
 		s.fieldStart(true);
 		s.data("test2");
 		EXPECT_EQ("body", s.currentCommandName());
 		EXPECT_EQ(&States::Body, &s.currentState());
-		tracker.expect(2, 0, 2, 0, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+		tracker.expect(2, 0, 2, 0, 2);  // scc, ec, fsc, fse, dc, sac, stc, etc
 
-		s.command("inner", {});
+		s.commandStart("inner", {}, true);
 		s.fieldStart(true);
 		EXPECT_EQ("inner", s.currentCommandName());
 		EXPECT_EQ(&States::BodyChildren, &s.currentState());
 
 		s.fieldEnd();
-		tracker.expect(3, 0, 3, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+		tracker.expect(3, 0, 3, 1, 2);  // scc, ec, fsc, fse, dc, sac, stc, etc
 
 		s.fieldEnd();
 		EXPECT_EQ("body", s.currentCommandName());
 		EXPECT_EQ(&States::Body, &s.currentState());
-		tracker.expect(3, 1, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+		tracker.expect(3, 1, 3, 2, 2);  // scc, ec, fsc, fse, dc, sac, stc, etc
 
-		s.command("body", {});
+		s.commandStart("body", {}, true);
 		EXPECT_EQ("body", s.currentCommandName());
 		EXPECT_EQ(&States::Body, &s.currentState());
-		tracker.expect(4, 2, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+		tracker.expect(4, 2, 3, 2, 2);  // scc, ec, fsc, fse, dc, sac, stc, etc
 		s.fieldStart(true);
 		s.data("test3");
 		EXPECT_EQ("body", s.currentCommandName());
 		EXPECT_EQ(&States::Body, &s.currentState());
 		s.fieldEnd();
-		tracker.expect(4, 2, 4, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
+		tracker.expect(4, 2, 4, 3, 3);  // scc, ec, fsc, fse, dc, sac, stc, etc
 
 		EXPECT_EQ("body", s.currentCommandName());
 		EXPECT_EQ(&States::Body, &s.currentState());
 
 		s.fieldEnd();
-		tracker.expect(4, 3, 4, 4, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
+		tracker.expect(4, 3, 4, 4, 3);  // scc, ec, fsc, fse, dc, sac, stc, etc
 
 		EXPECT_EQ("document", s.currentCommandName());
 		EXPECT_EQ(&States::Document, &s.currentState());
 	}
-	tracker.expect(4, 4, 4, 4, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
+	tracker.expect(4, 4, 4, 4, 3);  // scc, ec, fsc, fse, dc, sac, stc, etc
 	ASSERT_FALSE(logger.hasError());
 }
-
+/*
 TEST(Stack, errorInvalidCommands)
 {
-	Stack s{env.context, States::TestHandlers};
-	tracker.reset();
-	EXPECT_THROW(s.command("body", {}), LoggableException);
-	s.command("document", {});
-	s.fieldStart(true);
-	EXPECT_THROW(s.command("document", {}), LoggableException);
-	s.command("empty", {});
-	s.fieldStart(true);
-	EXPECT_THROW(s.command("body", {}), LoggableException);
-	s.command("special", {});
-	s.fieldStart(true);
-	s.fieldEnd();
-	s.fieldEnd();
-	s.fieldEnd();
-
-	logger.reset();
-	s.fieldEnd();
-	ASSERT_TRUE(logger.hasError());
-
-	EXPECT_THROW(s.data("test"), LoggableException);
-	EXPECT_EQ(&States::None, &s.currentState());
+    Stack s{env.context, States::TestHandlers};
+    tracker.reset();
+    EXPECT_THROW(s.command("body", {}), LoggableException);
+    s.command("document", {});
+    s.fieldStart(true);
+    EXPECT_THROW(s.command("document", {}), LoggableException);
+    s.command("empty", {});
+    s.fieldStart(true);
+    EXPECT_THROW(s.command("body", {}), LoggableException);
+    s.command("special", {});
+    s.fieldStart(true);
+    s.fieldEnd();
+    s.fieldEnd();
+    s.fieldEnd();
+
+    logger.reset();
+    s.fieldEnd();
+    ASSERT_TRUE(logger.hasError());
+
+    EXPECT_THROW(s.data("test"), LoggableException);
+    EXPECT_EQ(&States::None, &s.currentState());
 }
 
 TEST(Stack, validation)
 {
-	Stack s{env.context, States::TestHandlers};
-	tracker.reset();
-	logger.reset();
-
-	s.command("arguments", {});
-	EXPECT_TRUE(logger.hasError());
-	s.fieldStart(true);
-	s.fieldEnd();
-
-	logger.reset();
-	s.command("arguments", {{"a", 5}});
-	EXPECT_TRUE(logger.hasError());
-	s.fieldStart(true);
-	s.fieldEnd();
-
-	logger.reset();
-	s.command("arguments", {{"a", 5}, {"b", "test"}});
-	EXPECT_FALSE(logger.hasError());
-	s.fieldStart(true);
-	s.fieldEnd();
+    Stack s{env.context, States::TestHandlers};
+    tracker.reset();
+    logger.reset();
+
+    s.command("arguments", {});
+    EXPECT_TRUE(logger.hasError());
+    s.fieldStart(true);
+    s.fieldEnd();
+
+    logger.reset();
+    s.command("arguments", {{"a", 5}});
+    EXPECT_TRUE(logger.hasError());
+    s.fieldStart(true);
+    s.fieldEnd();
+
+    logger.reset();
+    s.command("arguments", {{"a", 5}, {"b", "test"}});
+    EXPECT_FALSE(logger.hasError());
+    s.fieldStart(true);
+    s.fieldEnd();
 }
 
 TEST(Stack, invalidCommandName)
 {
-	tracker.reset();
-	logger.reset();
-
-	Stack s{env.context, States::AnyHandlers};
-	s.command("a", {});
-	tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	s.fieldStart(true);
-	s.fieldEnd();
-	tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-	s.command("a_", {});
-	tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	s.fieldStart(true);
-	s.fieldEnd();
-	tracker.expect(2, 1, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-	s.command("a_:b", {});
-	tracker.expect(3, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	s.fieldStart(true);
-	s.fieldEnd();
-	tracker.expect(3, 2, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-	ASSERT_THROW(s.command("_a", {}), LoggableException);
-	tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-	ASSERT_THROW(s.command("a:", {}), LoggableException);
-	tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-	ASSERT_THROW(s.command("a:_b", {}), LoggableException);
-	tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    tracker.reset();
+    logger.reset();
+
+    Stack s{env.context, States::AnyHandlers};
+    s.command("a", {});
+    tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    s.fieldStart(true);
+    s.fieldEnd();
+    tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+    s.command("a_", {});
+    tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    s.fieldStart(true);
+    s.fieldEnd();
+    tracker.expect(2, 1, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+    s.command("a_:b", {});
+    tracker.expect(3, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    s.fieldStart(true);
+    s.fieldEnd();
+    tracker.expect(3, 2, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+    ASSERT_THROW(s.command("_a", {}), LoggableException);
+    tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+    ASSERT_THROW(s.command("a:", {}), LoggableException);
+    tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+    ASSERT_THROW(s.command("a:_b", {}), LoggableException);
+    tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
 }
 
 TEST(Stack, multipleFields)
 {
-	tracker.reset();
-	logger.reset();
-	{
-		Stack s{env.context, States::AnyHandlers};
-
-		s.command("a", {{"a", false}});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_EQ("a", s.currentCommandName());
-		EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startArgs);
-
-		s.fieldStart(false);
-		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_FALSE(tracker.fieldStartIsDefault);
-		EXPECT_EQ(0U, tracker.fieldStartIdx);
-
-		s.data("test");
-		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_EQ("test", tracker.dataData.text().asString());
-
-		s.fieldEnd();
-		tracker.expect(1, 0, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.fieldStart(false);
-		tracker.expect(1, 0, 2, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_FALSE(tracker.fieldStartIsDefault);
-		EXPECT_EQ(1U, tracker.fieldStartIdx);
-
-		s.data("test2");
-		tracker.expect(1, 0, 2, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_EQ("test2", tracker.dataData.text().asString());
-
-		s.fieldEnd();
-		tracker.expect(1, 0, 2, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.fieldStart(true);
-		tracker.expect(1, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_TRUE(tracker.fieldStartIsDefault);
-		EXPECT_EQ(2U, tracker.fieldStartIdx);
-
-		s.data("test3");
-		tracker.expect(1, 0, 3, 2, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
-		EXPECT_EQ("test3", tracker.dataData.text().asString());
-
-		s.fieldEnd();
-		tracker.expect(1, 0, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(1, 1, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+    {
+        Stack s{env.context, States::AnyHandlers};
+
+        s.command("a", {{"a", false}});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        EXPECT_EQ("a", s.currentCommandName());
+        EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startArgs);
+
+        s.fieldStart(false);
+        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        EXPECT_FALSE(tracker.fieldStartIsDefault);
+        EXPECT_EQ(0U, tracker.fieldStartIdx);
+
+        s.data("test");
+        tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+        EXPECT_EQ("test", tracker.dataData.text().asString());
+
+        s.fieldEnd();
+        tracker.expect(1, 0, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.fieldStart(false);
+        tracker.expect(1, 0, 2, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+        EXPECT_FALSE(tracker.fieldStartIsDefault);
+        EXPECT_EQ(1U, tracker.fieldStartIdx);
+
+        s.data("test2");
+        tracker.expect(1, 0, 2, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        EXPECT_EQ("test2", tracker.dataData.text().asString());
+
+        s.fieldEnd();
+        tracker.expect(1, 0, 2, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.fieldStart(true);
+        tracker.expect(1, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        EXPECT_TRUE(tracker.fieldStartIsDefault);
+        EXPECT_EQ(2U, tracker.fieldStartIdx);
+
+        s.data("test3");
+        tracker.expect(1, 0, 3, 2, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
+        EXPECT_EQ("test3", tracker.dataData.text().asString());
+
+        s.fieldEnd();
+        tracker.expect(1, 0, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(1, 1, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, implicitDefaultFieldOnNewCommand)
 {
-	tracker.reset();
-	logger.reset();
-	{
-		Stack s{env.context, States::AnyHandlers};
-
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.command("b", {});
-		tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+    {
+        Stack s{env.context, States::AnyHandlers};
+
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.command("b", {});
+        tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, implicitDefaultFieldOnNewCommandWithExplicitDefaultField)
 {
-	tracker.reset();
-	logger.reset();
-	{
-		Stack s{env.context, States::AnyHandlers};
-
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("a", s.currentCommandName());
-
-		s.command("b", {});
-		tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("b", s.currentCommandName());
-		s.fieldStart(true);
-		s.fieldEnd();
-		tracker.expect(2, 0, 2, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("b", s.currentCommandName());
-	}
-	tracker.expect(2, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+    {
+        Stack s{env.context, States::AnyHandlers};
+
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("a", s.currentCommandName());
+
+        s.command("b", {});
+        tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("b", s.currentCommandName());
+        s.fieldStart(true);
+        s.fieldEnd();
+        tracker.expect(2, 0, 2, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("b", s.currentCommandName());
+    }
+    tracker.expect(2, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, noImplicitDefaultFieldOnIncompatibleCommand)
 {
-	tracker.reset();
-	logger.reset();
-	{
-		Stack s{env.context, States::AnyHandlers};
-
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("a", s.currentCommandName());
-
-		tracker.fieldStartResult = false;
-		s.command("b", {});
-		tracker.expect(2, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("b", s.currentCommandName());
-	}
-	tracker.expect(2, 2, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+    {
+        Stack s{env.context, States::AnyHandlers};
+
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("a", s.currentCommandName());
+
+        tracker.fieldStartResult = false;
+        s.command("b", {});
+        tracker.expect(2, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("b", s.currentCommandName());
+    }
+    tracker.expect(2, 2, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, noImplicitDefaultFieldIfDefaultFieldGiven)
 {
-	tracker.reset();
-	logger.reset();
-	{
-		Stack s{env.context, States::AnyHandlers};
-
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("a", s.currentCommandName());
-		s.fieldStart(true);
-		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("a", s.currentCommandName());
-		s.fieldEnd();
-		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("a", s.currentCommandName());
-
-		s.command("b", {});
-		tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("b", s.currentCommandName());
-	}
-	tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+    {
+        Stack s{env.context, States::AnyHandlers};
+
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("a", s.currentCommandName());
+        s.fieldStart(true);
+        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("a", s.currentCommandName());
+        s.fieldEnd();
+        tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("a", s.currentCommandName());
+
+        s.command("b", {});
+        tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("b", s.currentCommandName());
+    }
+    tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, noEndIfStartFails)
 {
-	tracker.reset();
-	logger.reset();
-	{
-		Stack s{env.context, States::AnyHandlers};
-
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("a", s.currentCommandName());
-
-		tracker.startResult = false;
-		s.command("b", {});
-		tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		ASSERT_EQ("b", s.currentCommandName());
-	}
-	tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_TRUE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+    {
+        Stack s{env.context, States::AnyHandlers};
+
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("a", s.currentCommandName());
+
+        tracker.startResult = false;
+        s.command("b", {});
+        tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        ASSERT_EQ("b", s.currentCommandName());
+    }
+    tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_TRUE(logger.hasError());
 }
 
 TEST(Stack, implicitDefaultFieldOnData)
 {
-	tracker.reset();
-	logger.reset();
-	{
-		Stack s{env.context, States::AnyHandlers};
-
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.data("test");
-		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(1, 1, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+    {
+        Stack s{env.context, States::AnyHandlers};
+
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.data("test");
+        tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(1, 1, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, autoFieldEnd)
 {
-	tracker.reset();
-	logger.reset();
-
-	{
-		Stack s{env.context, States::AnyHandlers};
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(1, 1, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{env.context, States::AnyHandlers};
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(1, 1, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, autoImplicitFieldEnd)
 {
-	tracker.reset();
-	logger.reset();
-
-	{
-		Stack s{env.context, States::AnyHandlers};
-		s.command("a", {});
-		s.command("b", {});
-		s.command("c", {});
-		s.command("d", {});
-		s.command("e", {});
-		s.fieldStart(true);
-		s.fieldEnd();
-		tracker.expect(5, 0, 5, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(5, 5, 5, 5, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{env.context, States::AnyHandlers};
+        s.command("a", {});
+        s.command("b", {});
+        s.command("c", {});
+        s.command("d", {});
+        s.command("e", {});
+        s.fieldStart(true);
+        s.fieldEnd();
+        tracker.expect(5, 0, 5, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(5, 5, 5, 5, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, invalidDefaultField)
 {
-	tracker.reset();
-	logger.reset();
-
-	{
-		Stack s{env.context, States::AnyHandlers};
-		s.command("a", {});
-		tracker.fieldStartResult = false;
-		s.fieldStart(true);
-		s.fieldEnd();
-		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{env.context, States::AnyHandlers};
+        s.command("a", {});
+        tracker.fieldStartResult = false;
+        s.fieldStart(true);
+        s.fieldEnd();
+        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, errorInvalidDefaultFieldData)
 {
-	tracker.reset();
-	logger.reset();
-
-	{
-		Stack s{env.context, States::AnyHandlers};
-		s.command("a", {});
-		tracker.fieldStartResult = false;
-		s.fieldStart(true);
-		ASSERT_FALSE(logger.hasError());
-		s.data("test");
-		ASSERT_TRUE(logger.hasError());
-		s.fieldEnd();
-		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{env.context, States::AnyHandlers};
+        s.command("a", {});
+        tracker.fieldStartResult = false;
+        s.fieldStart(true);
+        ASSERT_FALSE(logger.hasError());
+        s.data("test");
+        ASSERT_TRUE(logger.hasError());
+        s.fieldEnd();
+        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
 }
 
 TEST(Stack, errorInvalidFieldData)
 {
-	tracker.reset();
-	logger.reset();
-
-	{
-		Stack s{env.context, States::AnyHandlers};
-		s.command("a", {});
-		tracker.fieldStartResult = false;
-		ASSERT_FALSE(logger.hasError());
-		s.fieldStart(false);
-		ASSERT_TRUE(logger.hasError());
-		s.data("test");
-		s.fieldEnd();
-		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{env.context, States::AnyHandlers};
+        s.command("a", {});
+        tracker.fieldStartResult = false;
+        ASSERT_FALSE(logger.hasError());
+        s.fieldStart(false);
+        ASSERT_TRUE(logger.hasError());
+        s.data("test");
+        s.fieldEnd();
+        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
 }
 
 TEST(Stack, errorFieldStartNoCommand)
 {
-	tracker.reset();
-	logger.reset();
+    tracker.reset();
+    logger.reset();
 
-	Stack s{env.context, States::AnyHandlers};
-	ASSERT_THROW(s.fieldStart(false), LoggableException);
-	ASSERT_THROW(s.fieldStart(true), LoggableException);
-	tracker.expect(0, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    Stack s{env.context, States::AnyHandlers};
+    ASSERT_THROW(s.fieldStart(false), LoggableException);
+    ASSERT_THROW(s.fieldStart(true), LoggableException);
+    tracker.expect(0, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
 }
 
 TEST(Stack, errorMultipleFieldStarts)
 {
-	tracker.reset();
-	logger.reset();
-
-	{
-		Stack s{env.context, States::AnyHandlers};
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.fieldStart(false);
-		ASSERT_FALSE(logger.hasError());
-		s.fieldStart(false);
-		ASSERT_TRUE(logger.hasError());
-		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.fieldEnd();
-		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{env.context, States::AnyHandlers};
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.fieldStart(false);
+        ASSERT_FALSE(logger.hasError());
+        s.fieldStart(false);
+        ASSERT_TRUE(logger.hasError());
+        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.fieldEnd();
+        tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
 }
 
 TEST(Stack, errorMultipleFieldEnds)
 {
-	tracker.reset();
-	logger.reset();
-
-	{
-		Stack s{env.context, States::AnyHandlers};
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.fieldStart(false);
-		s.fieldEnd();
-		ASSERT_FALSE(logger.hasError());
-		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		s.fieldEnd();
-		ASSERT_TRUE(logger.hasError());
-		tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{env.context, States::AnyHandlers};
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.fieldStart(false);
+        s.fieldEnd();
+        ASSERT_FALSE(logger.hasError());
+        tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        s.fieldEnd();
+        ASSERT_TRUE(logger.hasError());
+        tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
 }
 
 TEST(Stack, errorOpenField)
 {
-	tracker.reset();
-	logger.reset();
-
-	{
-		Stack s{env.context, States::AnyHandlers};
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.fieldStart(false);
-		ASSERT_FALSE(logger.hasError());
-	}
-	ASSERT_TRUE(logger.hasError());
-	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{env.context, States::AnyHandlers};
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.fieldStart(false);
+        ASSERT_FALSE(logger.hasError());
+    }
+    ASSERT_TRUE(logger.hasError());
+    tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
 }
 
 TEST(Stack, fieldEndWhenImplicitDefaultFieldOpen)
 {
-	tracker.reset();
-	logger.reset();
-
-	{
-		Stack s{env.context, States::AnyHandlers};
-		s.command("a", {});
-		s.fieldStart(true);
-		s.command("b", {});
-		s.data("test");
-		s.fieldEnd();
-		tracker.expect(2, 1, 2, 2, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(2, 2, 2, 2, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{env.context, States::AnyHandlers};
+        s.command("a", {});
+        s.fieldStart(true);
+        s.command("b", {});
+        s.data("test");
+        s.fieldEnd();
+        tracker.expect(2, 1, 2, 2, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(2, 2, 2, 2, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, fieldAfterDefaultField)
 {
-	tracker.reset();
-	logger.reset();
-
-	{
-		Stack s{env.context, States::AnyHandlers};
-		s.command("a", {});
-		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		s.fieldStart(true);
-		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.command("b", {});
-		tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.fieldStart(false);
-		tracker.expect(2, 0, 2, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-		s.data("f1");
-		tracker.expect(2, 0, 2, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-		s.fieldEnd();
-		tracker.expect(2, 0, 2, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-		tracker.fieldStartSetIsDefault = true;
-
-		s.fieldStart(false);
-		tracker.fieldStartSetIsDefault = false;
-		tracker.expect(2, 0, 3, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-		s.data("f2");
-		tracker.expect(2, 0, 3, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-		s.fieldEnd();
-		tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-
-		ASSERT_FALSE(logger.hasError());
-		s.fieldStart(false);
-		ASSERT_TRUE(logger.hasError());
-		logger.reset();
-		tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-		s.data("f3");
-		tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-		s.fieldEnd();
-		tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-
-		s.fieldEnd();
-		tracker.expect(2, 1, 3, 3, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-	}
-	tracker.expect(2, 2, 3, 3, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-	ASSERT_FALSE(logger.hasError());
-}
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{env.context, States::AnyHandlers};
+        s.command("a", {});
+        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        s.fieldStart(true);
+        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.command("b", {});
+        tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.fieldStart(false);
+        tracker.expect(2, 0, 2, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        s.data("f1");
+        tracker.expect(2, 0, 2, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+        s.fieldEnd();
+        tracker.expect(2, 0, 2, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.fieldStartSetIsDefault = true;
+
+        s.fieldStart(false);
+        tracker.fieldStartSetIsDefault = false;
+        tracker.expect(2, 0, 3, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+        s.data("f2");
+        tracker.expect(2, 0, 3, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        s.fieldEnd();
+        tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+
+        ASSERT_FALSE(logger.hasError());
+        s.fieldStart(false);
+        ASSERT_TRUE(logger.hasError());
+        logger.reset();
+        tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        s.data("f3");
+        tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        s.fieldEnd();
+        tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+
+        s.fieldEnd();
+        tracker.expect(2, 1, 3, 3, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+    }
+    tracker.expect(2, 2, 3, 3, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+    ASSERT_FALSE(logger.hasError());
+}*/
 }
 }
-- 
cgit v1.2.3


From 21aa94db203c0b1bcab18bc4858edcdb2afc894d Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Tue, 3 Mar 2015 14:33:55 +0100
Subject: Reactivated main program

---
 CMakeLists.txt | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c7ad7a3..f6807f3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -276,19 +276,19 @@ TARGET_LINK_LIBRARIES(ousia_xml
 
 # Command line interface
 
-#ADD_EXECUTABLE(ousia
-#	src/cli/Main
-#)
+ADD_EXECUTABLE(ousia
+	src/cli/Main
+)
 
-#TARGET_LINK_LIBRARIES(ousia
-#	ousia_core
-#	ousia_filesystem
-#	ousia_html
-#	ousia_xml
-#	ousia_osml
-#	ousia_osxml
-#	${Boost_LIBRARIES}
-#)
+TARGET_LINK_LIBRARIES(ousia
+	ousia_core
+	ousia_filesystem
+	ousia_html
+	ousia_xml
+	ousia_osml
+	ousia_osxml
+	${Boost_LIBRARIES}
+)
 
 # If testing is enabled, build the unit tests
 IF(TEST)
-- 
cgit v1.2.3


From fb8d4cdf01909b61e4e5d0806ec6de178ff0058c Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Tue, 3 Mar 2015 14:34:14 +0100
Subject: Finished stack and adapted all unit tests

---
 src/core/parser/stack/Stack.cpp      | 218 +++++++---
 src/core/parser/stack/Stack.hpp      |   4 +-
 test/core/parser/stack/StackTest.cpp | 772 ++++++++++++++++++++---------------
 3 files changed, 595 insertions(+), 399 deletions(-)

diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index 89217ea..f341f1d 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -30,9 +30,15 @@
 #include "TokenRegistry.hpp"
 #include "TokenStack.hpp"
 
+#define STACK_DEBUG_OUTPUT 0
+#if STACK_DEBUG_OUTPUT
+#include <iostream>
+#endif
+
 namespace ousia {
 namespace parser_stack {
 namespace {
+
 /* Class HandlerInfo */
 
 /**
@@ -86,12 +92,6 @@ public:
 	 */
 	bool inImplicitDefaultField : 1;
 
-	/**
-	 * Set to true if the handler current is in an implicitly started range
-	 * field.
-	 */
-	bool inImplicitRangeField: 1;
-
 	/**
 	 * Set to false if this field is only opened pro-forma and does not accept
 	 * any data. Otherwise set to true.
@@ -109,11 +109,10 @@ public:
 	HandlerInfo();
 
 	/**
-	 * Constructor of the HandlerInfo class, allows to set all flags manually.
+	 * Constructor of the HandlerInfo class, allows to set some flags manually.
 	 */
-	HandlerInfo(bool valid, bool implicit, bool range, bool inField,
-	            bool inDefaultField, bool inImplicitDefaultField,
-	            bool inValidField);
+	HandlerInfo(bool implicit, bool inField, bool inDefaultField,
+	            bool inImplicitDefaultField);
 
 	/**
 	 * Constructor of the HandlerInfo class, taking a shared_ptr to the handler
@@ -178,18 +177,17 @@ HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler)
 {
 }
 
-HandlerInfo::HandlerInfo(bool valid, bool implicit, bool range, bool inField,
-                         bool inDefaultField, bool inImplicitDefaultField,
-                         bool inValidField)
+HandlerInfo::HandlerInfo(bool implicit, bool inField, bool inDefaultField,
+                         bool inImplicitDefaultField)
     : handler(nullptr),
       fieldIdx(0),
-      valid(valid),
+      valid(true),
       implicit(implicit),
-      range(range),
+      range(false),
       inField(inField),
       inDefaultField(inDefaultField),
       inImplicitDefaultField(inImplicitDefaultField),
-      inValidField(inValidField),
+      inValidField(true),
       hadDefaultField(false)
 {
 }
@@ -235,7 +233,7 @@ void HandlerInfo::fieldEnd()
 /**
  * Stub instance of HandlerInfo containing no handler information.
  */
-static HandlerInfo EmptyHandlerInfo{true, true, false, true, true, false, true};
+static HandlerInfo EmptyHandlerInfo{true, true, true, true};
 
 /**
  * Small helper class makeing sure the reference at some variable is reset once
@@ -386,8 +384,10 @@ private:
 	/**
 	 * Ends the current handler and removes the corresponding element from the
 	 * stack.
+	 *
+	 * @return true if a command was ended, false otherwise.
 	 */
-	void endCurrentHandler();
+	bool endCurrentHandler();
 
 	/**
 	 * Ends all handlers that currently are not inside a field and already had
@@ -396,8 +396,10 @@ private:
 	 * field yet. This method is called whenever the data(), startAnnotation(),
 	 * startToken(), startCommand(), annotationStart() or annotationEnd() events
 	 * are reached.
+	 *
+	 * @return true if the current command is in a valid field.
 	 */
-	void prepareCurrentHandler();
+	bool prepareCurrentHandler(bool startImplicitDefaultField = true);
 
 	/**
 	 * Returns true if all handlers on the stack are currently valid, or false
@@ -413,23 +415,23 @@ private:
 	 */
 	void handleData();
 
-    /**
-     * Called whenever there is a token waiting to be processed. If possible
-     * tries to end a current handler with this token or to start a new handler
-     * with the token.
-     *
-     * @param token is the token that should be handled.
-     */
-    void handleToken(const Token &token);
+	/**
+	 * Called whenever there is a token waiting to be processed. If possible
+	 * tries to end a current handler with this token or to start a new handler
+	 * with the token.
+	 *
+	 * @param token is the token that should be handled.
+	 */
+	void handleToken(const Token &token);
 
 	/**
 	 * Called by the rangeEnd() and fieldEnd() methods to end the current ranged
 	 * command.
 	 *
-	 * @param rangeCommand specifies whether this should end the range of a
+	 * @param endRange specifies whether this should end the range of a
 	 * command with range.
 	 */
-	void handleFieldEnd(bool rangeCommand);
+	void handleFieldEnd(bool endRange);
 
 public:
 	StackImpl(ParserCallbacks &parser, ParserContext &ctx,
@@ -579,10 +581,10 @@ std::string StackImpl::currentCommandName() const
 TokenSet StackImpl::currentTokens() const
 {
 	// TODO: Implement
-	return Tokens{};
+	return TokenSet{};
 }
 
-WhitespaceMode currentWhitespaceMode() const
+WhitespaceMode StackImpl::currentWhitespaceMode() const
 {
 	// TODO: Implement
 	return WhitespaceMode::COLLAPSE;
@@ -599,7 +601,7 @@ HandlerInfo &StackImpl::lastInfo()
 
 /* Stack helper functions */
 
-void StackImpl::endCurrentHandler()
+bool StackImpl::endCurrentHandler()
 {
 	if (!stack.empty()) {
 		// Fetch the handler info for the current top-level element
@@ -623,29 +625,43 @@ void StackImpl::endCurrentHandler()
 
 		// Remove the element from the stack
 		stack.pop_back();
+		return true;
 	}
+	return false;
 }
 
-void StackImpl::prepareCurrentHandler()
+bool StackImpl::prepareCurrentHandler(bool startImplicitDefaultField)
 {
 	// Repeat until a valid handler is found on the stack
-	while (true) {
+	while (!stack.empty()) {
 		// Fetch the handler for the current top-level element
 		HandlerInfo &info = currentInfo();
 
 		// If the current Handler is in a field, there is nothing to be done,
 		// abort
 		if (info.inField) {
-			return;
+			return true;
 		}
 
 		// If the current field already had a default field or is not valid,
 		// end it and repeat
-		if (info.hadDefaultField || !info.valid) {
+		if ((info.hadDefaultField || !startImplicitDefaultField) ||
+		    !info.valid) {
+			// We cannot end the command if it is marked as "range" command
+			if (info.range) {
+				return false;
+			}
+
+			// End the current handler
 			endCurrentHandler();
 			continue;
 		}
 
+		// Abort if starting new default fields is not allowed here
+		if (!startImplicitDefaultField) {
+			return false;
+		}
+
 		// Try to start a new default field, abort if this did not work
 		bool isDefault = true;
 		if (!info.handler->fieldStart(isDefault, info.fieldIdx)) {
@@ -655,8 +671,10 @@ void StackImpl::prepareCurrentHandler()
 
 		// Mark the field as started and return -- the field should be marked
 		// is implicit if this is not a field with range
-		info.fieldStart(true, !info.range, true, info.range);
+		info.fieldStart(true, !info.range, true);
+		return true;
 	}
+	return false;
 }
 
 bool StackImpl::handlersValid()
@@ -675,7 +693,9 @@ void StackImpl::handleData()
 	while (true) {
 		// Prepare the stack -- make sure all overdue handlers are ended and
 		// we currently are in an open field
-		prepareCurrentHandler();
+		if (stack.empty() || !prepareCurrentHandler()) {
+			throw LoggableException("Did not expect any data here");
+		}
 
 		// Fetch the current handler information
 		HandlerInfo &info = currentInfo();
@@ -684,7 +704,7 @@ void StackImpl::handleData()
 		// call the "data" handler
 		if (!info.inValidField) {
 			if (!info.hadDefaultField) {
-				logger().error("Did not expect any data here", data);
+				logger().error("Did not expect any data here");
 			}
 			return;
 		}
@@ -722,24 +742,25 @@ void StackImpl::handleData()
 
 		// Commit the content of the logger fork. Do not change the valid flag.
 		loggerFork.commit();
+		return;
 	}
 }
 
-void StackImpl::handleToken(const Token &token) {
+void StackImpl::handleToken(const Token &token)
+{
 	// TODO: Implement
 	// Just eat them for now
 }
 
-void StackImpl::handleFieldEnd(bool rangedCommand)
+void StackImpl::handleFieldEnd(bool endRange)
 {
-	// Throw away all overdue handlers, start the default field at least once
-	// if this has not been done yet (this is important for range commands)
-	prepareStack();
+	// Throw away all overdue handlers
+	prepareCurrentHandler(false);
 
 	// Close all implicit default fields
 	while (!stack.empty()) {
 		HandlerInfo &info = currentInfo();
-		if (!info.inImplicitDefaultField) {
+		if (!info.inImplicitDefaultField || info.range) {
 			break;
 		}
 		endCurrentHandler();
@@ -747,16 +768,37 @@ void StackImpl::handleFieldEnd(bool rangedCommand)
 
 	// Fetch the information attached to the current handler
 	HandlerInfo &info = currentInfo();
-	if (!info.inField || stack.empty()) {
-		logger().error("Got field end, but there is no field here to end");
+	if (stack.empty() || (!info.inField && !endRange) ||
+	    (!info.range && endRange)) {
+		if (endRange) {
+			logger().error(
+			    "Got end of range, but there is no command here to end");
+		} else {
+			logger().error("Got field end, but there is no field here to end");
+		}
 		return;
 	}
 
 	// Only continue if the current handler stack is in a valid state, do not
 	// call the fieldEnd function if something went wrong before
 	if (handlersValid()) {
-		if (info.range && info.inDefaultField)
-		info.handler->fieldEnd();
+		// End the current field if it is valid
+		if (info.inValidField) {
+			info.handler->fieldEnd();
+			info.fieldEnd();
+		}
+
+		// End the complete command if this is a range command, start the
+		// default field for once if range command did not have a default field
+		if (info.range && endRange) {
+			if (!info.hadDefaultField) {
+				bool isDefault = true;
+				info.handler->fieldStart(isDefault, true);
+				info.fieldStart(true, true, true);
+			}
+			endCurrentHandler();
+			return;
+		}
 	}
 
 	// This command no longer is in a field
@@ -768,6 +810,9 @@ void StackImpl::handleFieldEnd(bool rangedCommand)
 void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
                              bool range)
 {
+	// Call prepareCurrentHandler once to end all overdue commands
+	prepareCurrentHandler();
+
 	// Make sure the given identifier is valid (preventing "*" from being
 	// malicously passed to this function)
 	if (!Utils::isNamespacedIdentifier(name.asString())) {
@@ -787,8 +832,8 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 		const State *targetState = findTargetStateOrWildcard(name.asString());
 		if (targetState == nullptr) {
 			HandlerInfo &info = currentInfo();
-			if (info.inImplicitDefaultField || !info.inField) {
-				endCurrentHandler();
+			if ((info.inImplicitDefaultField || !info.inField) &&
+			    endCurrentHandler()) {
 				continue;
 			} else {
 				throw buildInvalidCommandException(name.asString(),
@@ -843,9 +888,10 @@ void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
 		// not valid -- remove both the new handler and the parent field from
 		// the stack
 		if (!info.valid && parentInfo.inImplicitDefaultField) {
-			endCurrentHandler();
-			endCurrentHandler();
-			continue;
+			// Only continue if the parent handler could actually be removed
+			if (endCurrentHandler() && endCurrentHandler()) {
+				continue;
+			}
 		}
 
 		// If we ended up here, starting the command may or may not have
@@ -870,10 +916,7 @@ void StackImpl::annotationEnd(const Variant &className,
 	// TODO
 }
 
-void StackImpl::rangeEnd()
-{
-	handleFieldEnd(true);
-}
+void StackImpl::rangeEnd() { handleFieldEnd(true); }
 
 void StackImpl::data(const TokenizedData &data)
 {
@@ -882,7 +925,7 @@ void StackImpl::data(const TokenizedData &data)
 
 	// Use the GuardedTemporaryPointer to make sure that the member variable
 	// dataReader is resetted to nullptr once this scope is left.
-	GuardedTemporaryPointer ptr(&reader, &dataReader);
+	GuardedTemporaryPointer<TokenizedDataReader> ptr(&reader, &dataReader);
 
 	// Peek a token from the reader, repeat until all tokens have been read
 	Token token;
@@ -952,10 +995,7 @@ void StackImpl::fieldStart(bool isDefault)
 	info.fieldStart(defaultField, false, valid);
 }
 
-void StackImpl::fieldEnd()
-{
-	handleFieldEnd(false);
-}
+void StackImpl::fieldEnd() { handleFieldEnd(false); }
 
 /* Class StackImpl HandlerCallbacks */
 
@@ -1017,28 +1057,70 @@ std::string Stack::currentCommandName() const
 void Stack::commandStart(const Variant &name, const Variant::mapType &args,
                          bool range)
 {
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: commandStart " << name << " " << args << " " << range
+	          << std::endl;
+#endif
 	impl->commandStart(name, args, range);
 }
 
 void Stack::annotationStart(const Variant &className, const Variant &args,
                             bool range)
 {
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: annotationStart " << className << " " << args << " "
+	          << range << std::endl;
+#endif
 	impl->annotationStart(className, args, range);
 }
 
 void Stack::annotationEnd(const Variant &className, const Variant &elementName)
 {
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: annotationEnd " << className << " " << elementName
+	          << std::endl;
+#endif
 	impl->annotationEnd(className, elementName);
 }
 
-void Stack::rangeEnd() { impl->rangeEnd(); }
+void Stack::rangeEnd()
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: rangeEnd" << std::endl;
+#endif
+	impl->rangeEnd();
+}
 
-void Stack::fieldStart(bool isDefault) { impl->fieldStart(isDefault); }
+void Stack::fieldStart(bool isDefault)
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: fieldStart " << isDefault << std::endl;
+#endif
+	impl->fieldStart(isDefault);
+}
 
-void Stack::fieldEnd() { impl->fieldEnd(); }
+void Stack::fieldEnd()
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: fieldEnd" << std::endl;
+#endif
+	impl->fieldEnd();
+}
 
-void Stack::data(const TokenizedData &data) { impl->data(data); }
+void Stack::data(const TokenizedData &data)
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: data" << std::endl;
+#endif
+	impl->data(data);
+}
 
-void Stack::data(const std::string &str) { data(TokenizedData(str)); }
+void Stack::data(const std::string &str)
+{
+#if STACK_DEBUG_OUTPUT
+	std::cout << "STACK: data (string) " << str << std::endl;
+#endif
+	data(TokenizedData(str));
+}
 }
 }
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
index 1de7cff..6d42f10 100644
--- a/src/core/parser/stack/Stack.hpp
+++ b/src/core/parser/stack/Stack.hpp
@@ -104,7 +104,7 @@ public:
 	 * @param range if true, the started command has an explicit range.
 	 */
 	void commandStart(const Variant &name, const Variant::mapType &args,
-	                  bool range);
+	                  bool range = false);
 
 	/**
 	 * Function that should be called whenever an annotation starts.
@@ -115,7 +115,7 @@ public:
 	 * @param range if true, the annotation fields have an explicit range.
 	 */
 	void annotationStart(const Variant &className, const Variant &args,
-	                     bool range);
+	                     bool range = false);
 
 	/**
 	 * Function that should be called whenever an annotation ends.
diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp
index 8f6c4df..a831c32 100644
--- a/test/core/parser/stack/StackTest.cpp
+++ b/test/core/parser/stack/StackTest.cpp
@@ -70,6 +70,16 @@ struct Tracker {
 	bool fieldStartResult;
 	bool dataResult;
 
+	Variant::mapType startCommandArgs;
+	Variant::mapType startAnnotationArgs;
+
+	bool fieldStartReturnValue;
+	size_t fieldStartIdx;
+	bool fieldStartIsDefault;
+	bool fieldStartSetIsDefault;
+
+	Variant dataData;
+
 	Tracker() { reset(); }
 
 	void reset()
@@ -89,6 +99,15 @@ struct Tracker {
 		endTokenResult = Handler::EndTokenResult::ENDED_THIS;
 		fieldStartResult = true;
 		dataResult = true;
+
+		startCommandArgs = Variant::mapType{};
+		startAnnotationArgs = Variant::mapType{};
+
+		fieldStartIdx = 0;
+		fieldStartIsDefault = false;
+		fieldStartSetIsDefault = false;
+
+		dataData = Variant{};
 	}
 
 	void expect(int startCommandCount, int endCount, int fieldStartCount,
@@ -115,13 +134,20 @@ private:
 public:
 	bool startCommand(Variant::mapType &args) override
 	{
+		tracker.startCommandArgs = args;
 		tracker.startCommandCount++;
+		if (!tracker.startCommandResult) {
+			logger().error(
+			    "TestHandler was told not to allow a command start. "
+			    "TestHandler always obeys its master.");
+		}
 		return tracker.startCommandResult;
 	}
 
 	bool startAnnotation(Variant::mapType &args,
 	                     AnnotationType annotationType) override
 	{
+		tracker.startAnnotationArgs = args;
 		tracker.startAnnotationCount++;
 		return tracker.startAnnotationResult;
 	}
@@ -142,6 +168,11 @@ public:
 
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override
 	{
+		tracker.fieldStartIsDefault = isDefault;
+		tracker.fieldStartIdx = fieldIdx;
+		if (tracker.fieldStartSetIsDefault) {
+			isDefault = true;
+		}
 		tracker.fieldStartCount++;
 		return tracker.fieldStartResult;
 	}
@@ -150,6 +181,7 @@ public:
 
 	bool data() override
 	{
+		tracker.dataData = readData();
 		tracker.dataCount++;
 		return tracker.dataResult;
 	}
@@ -199,456 +231,518 @@ TEST(Stack, basicTest)
 		EXPECT_EQ("", s.currentCommandName());
 		EXPECT_EQ(&States::None, &s.currentState());
 
-		s.commandStart("document", {}, true);
+		s.commandStart("document", {});
 		s.fieldStart(true);
 		s.data("test1");
 
 		EXPECT_EQ("document", s.currentCommandName());
 		EXPECT_EQ(&States::Document, &s.currentState());
-		tracker.expect(1, 0, 1, 0, 1);  // scc, ec, fsc, fse, dc, sac, stc, etc
+		tracker.expect(1, 0, 1, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-		s.commandStart("body", {}, true);
+		s.commandStart("body", {});
 		s.fieldStart(true);
 		s.data("test2");
 		EXPECT_EQ("body", s.currentCommandName());
 		EXPECT_EQ(&States::Body, &s.currentState());
-		tracker.expect(2, 0, 2, 0, 2);  // scc, ec, fsc, fse, dc, sac, stc, etc
+		tracker.expect(2, 0, 2, 0, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-		s.commandStart("inner", {}, true);
+		s.commandStart("inner", {});
 		s.fieldStart(true);
 		EXPECT_EQ("inner", s.currentCommandName());
 		EXPECT_EQ(&States::BodyChildren, &s.currentState());
 
 		s.fieldEnd();
-		tracker.expect(3, 0, 3, 1, 2);  // scc, ec, fsc, fse, dc, sac, stc, etc
+		tracker.expect(3, 0, 3, 1, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
 		s.fieldEnd();
 		EXPECT_EQ("body", s.currentCommandName());
 		EXPECT_EQ(&States::Body, &s.currentState());
-		tracker.expect(3, 1, 3, 2, 2);  // scc, ec, fsc, fse, dc, sac, stc, etc
+		tracker.expect(3, 1, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-		s.commandStart("body", {}, true);
+		s.commandStart("body", {});
 		EXPECT_EQ("body", s.currentCommandName());
 		EXPECT_EQ(&States::Body, &s.currentState());
-		tracker.expect(4, 2, 3, 2, 2);  // scc, ec, fsc, fse, dc, sac, stc, etc
+		tracker.expect(4, 2, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
 		s.fieldStart(true);
 		s.data("test3");
 		EXPECT_EQ("body", s.currentCommandName());
 		EXPECT_EQ(&States::Body, &s.currentState());
 		s.fieldEnd();
-		tracker.expect(4, 2, 4, 3, 3);  // scc, ec, fsc, fse, dc, sac, stc, etc
+		tracker.expect(4, 2, 4, 3, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
 		EXPECT_EQ("body", s.currentCommandName());
 		EXPECT_EQ(&States::Body, &s.currentState());
 
 		s.fieldEnd();
-		tracker.expect(4, 3, 4, 4, 3);  // scc, ec, fsc, fse, dc, sac, stc, etc
+		tracker.expect(4, 3, 4, 4, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
 		EXPECT_EQ("document", s.currentCommandName());
 		EXPECT_EQ(&States::Document, &s.currentState());
 	}
-	tracker.expect(4, 4, 4, 4, 3);  // scc, ec, fsc, fse, dc, sac, stc, etc
+	tracker.expect(4, 4, 4, 4, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
 	ASSERT_FALSE(logger.hasError());
 }
-/*
+
+TEST(Stack, basicTestRangeCommands)
+{
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{parser, env.context, States::TestHandlers};
+
+		EXPECT_EQ("", s.currentCommandName());
+		EXPECT_EQ(&States::None, &s.currentState());
+
+		s.commandStart("document", {}, true);
+		EXPECT_EQ("document", s.currentCommandName());
+		EXPECT_EQ(&States::Document, &s.currentState());
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+
+		s.data("test1");
+		tracker.expect(1, 0, 1, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
+
+		s.commandStart("body", {}, true);
+		tracker.expect(2, 0, 1, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		s.data("test2");
+		tracker.expect(2, 0, 2, 0, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_EQ("body", s.currentCommandName());
+		EXPECT_EQ(&States::Body, &s.currentState());
+
+		s.commandStart("inner", {}, true);
+		tracker.expect(3, 0, 2, 0, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_EQ("inner", s.currentCommandName());
+		EXPECT_EQ(&States::BodyChildren, &s.currentState());
+		s.rangeEnd();
+		tracker.expect(3, 1, 3, 1, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_EQ("body", s.currentCommandName());
+		EXPECT_EQ(&States::Body, &s.currentState());
+		s.rangeEnd();
+		tracker.expect(3, 2, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
+
+		s.commandStart("body", {}, true);
+		EXPECT_EQ("body", s.currentCommandName());
+		EXPECT_EQ(&States::Body, &s.currentState());
+		tracker.expect(4, 2, 3, 2, 2);  // scc, ec, fsc, fse, dc, sac, stc, etc
+		s.fieldStart(true);
+		tracker.expect(4, 2, 4, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		s.data("test3");
+		tracker.expect(4, 2, 4, 2, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_EQ("body", s.currentCommandName());
+		EXPECT_EQ(&States::Body, &s.currentState());
+		s.fieldEnd();
+		tracker.expect(4, 2, 4, 3, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_EQ("body", s.currentCommandName());
+		EXPECT_EQ(&States::Body, &s.currentState());
+		s.rangeEnd();
+		tracker.expect(4, 3, 4, 3, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
+
+		EXPECT_EQ("document", s.currentCommandName());
+		EXPECT_EQ(&States::Document, &s.currentState());
+		s.rangeEnd();
+		tracker.expect(4, 4, 4, 4, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(4, 4, 4, 4, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_FALSE(logger.hasError());
+}
+
 TEST(Stack, errorInvalidCommands)
 {
-    Stack s{env.context, States::TestHandlers};
-    tracker.reset();
-    EXPECT_THROW(s.command("body", {}), LoggableException);
-    s.command("document", {});
-    s.fieldStart(true);
-    EXPECT_THROW(s.command("document", {}), LoggableException);
-    s.command("empty", {});
-    s.fieldStart(true);
-    EXPECT_THROW(s.command("body", {}), LoggableException);
-    s.command("special", {});
-    s.fieldStart(true);
-    s.fieldEnd();
-    s.fieldEnd();
-    s.fieldEnd();
+	Stack s{parser, env.context, States::TestHandlers};
+	tracker.reset();
+	EXPECT_THROW(s.commandStart("body", {}), LoggableException);
+	s.commandStart("document", {});
+	s.fieldStart(true);
+	EXPECT_THROW(s.commandStart("document", {}), LoggableException);
+	s.commandStart("empty", {});
+	s.fieldStart(true);
+	EXPECT_THROW(s.commandStart("body", {}), LoggableException);
+	s.commandStart("special", {});
+	s.fieldStart(true);
+	s.fieldEnd();
+	s.fieldEnd();
+	s.fieldEnd();
 
-    logger.reset();
-    s.fieldEnd();
-    ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	s.fieldEnd();
+	ASSERT_TRUE(logger.hasError());
 
-    EXPECT_THROW(s.data("test"), LoggableException);
-    EXPECT_EQ(&States::None, &s.currentState());
+	EXPECT_THROW(s.data("test"), LoggableException);
+	EXPECT_EQ(&States::None, &s.currentState());
 }
 
 TEST(Stack, validation)
 {
-    Stack s{env.context, States::TestHandlers};
-    tracker.reset();
-    logger.reset();
+	Stack s{parser, env.context, States::TestHandlers};
+	tracker.reset();
+	logger.reset();
 
-    s.command("arguments", {});
-    EXPECT_TRUE(logger.hasError());
-    s.fieldStart(true);
-    s.fieldEnd();
+	s.commandStart("arguments", {});
+	EXPECT_TRUE(logger.hasError());
+	s.fieldStart(true);
+	s.fieldEnd();
 
-    logger.reset();
-    s.command("arguments", {{"a", 5}});
-    EXPECT_TRUE(logger.hasError());
-    s.fieldStart(true);
-    s.fieldEnd();
+	logger.reset();
+	s.commandStart("arguments", {{"a", 5}}, false);
+	EXPECT_TRUE(logger.hasError());
+	s.fieldStart(true);
+	s.fieldEnd();
 
-    logger.reset();
-    s.command("arguments", {{"a", 5}, {"b", "test"}});
-    EXPECT_FALSE(logger.hasError());
-    s.fieldStart(true);
-    s.fieldEnd();
+	logger.reset();
+	s.commandStart("arguments", {{"a", 5}, {"b", "test"}}, false);
+	EXPECT_FALSE(logger.hasError());
+	s.fieldStart(true);
+	s.fieldEnd();
 }
 
 TEST(Stack, invalidCommandName)
 {
-    tracker.reset();
-    logger.reset();
+	tracker.reset();
+	logger.reset();
 
-    Stack s{env.context, States::AnyHandlers};
-    s.command("a", {});
-    tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    s.fieldStart(true);
-    s.fieldEnd();
-    tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-    s.command("a_", {});
-    tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    s.fieldStart(true);
-    s.fieldEnd();
-    tracker.expect(2, 1, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-    s.command("a_:b", {});
-    tracker.expect(3, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    s.fieldStart(true);
-    s.fieldEnd();
-    tracker.expect(3, 2, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-    ASSERT_THROW(s.command("_a", {}), LoggableException);
-    tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-    ASSERT_THROW(s.command("a:", {}), LoggableException);
-    tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-
-    ASSERT_THROW(s.command("a:_b", {}), LoggableException);
-    tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	Stack s{parser, env.context, States::AnyHandlers};
+	s.commandStart("a", {});
+	tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	s.fieldStart(true);
+	s.fieldEnd();
+	tracker.expect(1, 0, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+
+	s.commandStart("a_", {});
+	tracker.expect(2, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	s.fieldStart(true);
+	s.fieldEnd();
+	tracker.expect(2, 1, 2, 2, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+
+	s.commandStart("a_:b", {});
+	tracker.expect(3, 2, 2, 2, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	s.fieldStart(true);
+	s.fieldEnd();
+	tracker.expect(3, 2, 3, 3, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+
+	ASSERT_THROW(s.commandStart("_a", {}), LoggableException);
+	tracker.expect(3, 3, 3, 3, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+
+	ASSERT_THROW(s.commandStart("a:", {}), LoggableException);
+	tracker.expect(3, 3, 3, 3, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+
+	ASSERT_THROW(s.commandStart("a:_b", {}), LoggableException);
+	tracker.expect(3, 3, 3, 3, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 }
 
 TEST(Stack, multipleFields)
 {
-    tracker.reset();
-    logger.reset();
-    {
-        Stack s{env.context, States::AnyHandlers};
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
 
-        s.command("a", {{"a", false}});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        EXPECT_EQ("a", s.currentCommandName());
-        EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startArgs);
+		s.commandStart("a", {{"a", false}}, false);
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_EQ("a", s.currentCommandName());
+		EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startCommandArgs);
 
-        s.fieldStart(false);
-        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        EXPECT_FALSE(tracker.fieldStartIsDefault);
-        EXPECT_EQ(0U, tracker.fieldStartIdx);
+		s.fieldStart(false);
+		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_FALSE(tracker.fieldStartIsDefault);
+		EXPECT_EQ(0U, tracker.fieldStartIdx);
 
-        s.data("test");
-        tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-        EXPECT_EQ("test", tracker.dataData.text().asString());
+		s.data("test");
+		tracker.expect(1, 0, 1, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_EQ("test", tracker.dataData.asString());
 
-        s.fieldEnd();
-        tracker.expect(1, 0, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+		s.fieldEnd();
+		tracker.expect(1, 0, 1, 1, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-        s.fieldStart(false);
-        tracker.expect(1, 0, 2, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-        EXPECT_FALSE(tracker.fieldStartIsDefault);
-        EXPECT_EQ(1U, tracker.fieldStartIdx);
+		s.fieldStart(false);
+		tracker.expect(1, 0, 2, 1, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_FALSE(tracker.fieldStartIsDefault);
+		EXPECT_EQ(1U, tracker.fieldStartIdx);
 
-        s.data("test2");
-        tracker.expect(1, 0, 2, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-        EXPECT_EQ("test2", tracker.dataData.text().asString());
+		s.data("test2");
+		tracker.expect(1, 0, 2, 1, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_EQ("test2", tracker.dataData.asString());
 
-        s.fieldEnd();
-        tracker.expect(1, 0, 2, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+		s.fieldEnd();
+		tracker.expect(1, 0, 2, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-        s.fieldStart(true);
-        tracker.expect(1, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
-        EXPECT_TRUE(tracker.fieldStartIsDefault);
-        EXPECT_EQ(2U, tracker.fieldStartIdx);
+		s.fieldStart(true);
+		tracker.expect(1, 0, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_TRUE(tracker.fieldStartIsDefault);
+		EXPECT_EQ(2U, tracker.fieldStartIdx);
 
-        s.data("test3");
-        tracker.expect(1, 0, 3, 2, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
-        EXPECT_EQ("test3", tracker.dataData.text().asString());
+		s.data("test3");
+		tracker.expect(1, 0, 3, 2, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_EQ("test3", tracker.dataData.asString());
 
-        s.fieldEnd();
-        tracker.expect(1, 0, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
-    }
-    tracker.expect(1, 1, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
-    ASSERT_FALSE(logger.hasError());
+		s.fieldEnd();
+		tracker.expect(1, 0, 3, 3, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(1, 1, 3, 3, 3);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, implicitDefaultFieldOnNewCommand)
 {
-    tracker.reset();
-    logger.reset();
-    {
-        Stack s{env.context, States::AnyHandlers};
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
 
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		s.commandStart("a", {});
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-        s.command("b", {});
-        tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    }
-    tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    ASSERT_FALSE(logger.hasError());
+		s.commandStart("b", {});
+		tracker.expect(2, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(2, 2, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, implicitDefaultFieldOnNewCommandWithExplicitDefaultField)
 {
-    tracker.reset();
-    logger.reset();
-    {
-        Stack s{env.context, States::AnyHandlers};
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
 
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("a", s.currentCommandName());
+		s.commandStart("a", {});
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		ASSERT_EQ("a", s.currentCommandName());
 
-        s.command("b", {});
-        tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("b", s.currentCommandName());
-        s.fieldStart(true);
-        s.fieldEnd();
-        tracker.expect(2, 0, 2, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("b", s.currentCommandName());
-    }
-    tracker.expect(2, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    ASSERT_FALSE(logger.hasError());
+		s.commandStart("b", {});
+		tracker.expect(2, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		ASSERT_EQ("b", s.currentCommandName());
+		s.fieldStart(true);
+		s.fieldEnd();
+		tracker.expect(2, 0, 2, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		ASSERT_EQ("b", s.currentCommandName());
+	}
+	tracker.expect(2, 2, 2, 2, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, noImplicitDefaultFieldOnIncompatibleCommand)
 {
-    tracker.reset();
-    logger.reset();
-    {
-        Stack s{env.context, States::AnyHandlers};
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
 
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("a", s.currentCommandName());
+		s.commandStart("a", {});
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		ASSERT_EQ("a", s.currentCommandName());
 
-        tracker.fieldStartResult = false;
-        s.command("b", {});
-        tracker.expect(2, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("b", s.currentCommandName());
-    }
-    tracker.expect(2, 2, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    ASSERT_FALSE(logger.hasError());
+		tracker.fieldStartResult = false;
+		s.commandStart("b", {});
+		tracker.expect(2, 1, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		ASSERT_EQ("b", s.currentCommandName());
+	}
+	tracker.expect(2, 2, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, noImplicitDefaultFieldIfDefaultFieldGiven)
 {
-    tracker.reset();
-    logger.reset();
-    {
-        Stack s{env.context, States::AnyHandlers};
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
 
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("a", s.currentCommandName());
-        s.fieldStart(true);
-        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("a", s.currentCommandName());
-        s.fieldEnd();
-        tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("a", s.currentCommandName());
+		s.commandStart("a", {});
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		ASSERT_EQ("a", s.currentCommandName());
+		s.fieldStart(true);
+		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		ASSERT_EQ("a", s.currentCommandName());
+		s.fieldEnd();
+		tracker.expect(1, 0, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		ASSERT_EQ("a", s.currentCommandName());
 
-        s.command("b", {});
-        tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("b", s.currentCommandName());
-    }
-    tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    ASSERT_FALSE(logger.hasError());
+		s.commandStart("b", {});
+		tracker.expect(2, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		ASSERT_EQ("b", s.currentCommandName());
+	}
+	tracker.expect(2, 2, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, noEndIfStartFails)
 {
-    tracker.reset();
-    logger.reset();
-    {
-        Stack s{env.context, States::AnyHandlers};
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
 
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("a", s.currentCommandName());
+		s.commandStart("a", {});
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		ASSERT_EQ("a", s.currentCommandName());
 
-        tracker.startResult = false;
-        s.command("b", {});
-        tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        ASSERT_EQ("b", s.currentCommandName());
-    }
-    tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    ASSERT_TRUE(logger.hasError());
+		tracker.startCommandResult = false;
+		s.commandStart("b", {});
+		tracker.expect(3, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		EXPECT_EQ(&States::None, &s.currentState());
+	}
+	tracker.expect(3, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_TRUE(logger.hasError());
 }
 
 TEST(Stack, implicitDefaultFieldOnData)
 {
-    tracker.reset();
-    logger.reset();
-    {
-        Stack s{env.context, States::AnyHandlers};
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
 
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		s.commandStart("a", {});
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-        s.data("test");
-        tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-    }
-    tracker.expect(1, 1, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
-    ASSERT_FALSE(logger.hasError());
+		s.data("test");
+		tracker.expect(1, 0, 1, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(1, 1, 1, 1, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, autoFieldEnd)
 {
-    tracker.reset();
-    logger.reset();
+	tracker.reset();
+	logger.reset();
 
-    {
-        Stack s{env.context, States::AnyHandlers};
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    }
-    tracker.expect(1, 1, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    ASSERT_FALSE(logger.hasError());
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
+		s.commandStart("a", {});
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(1, 1, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, autoImplicitFieldEnd)
 {
-    tracker.reset();
-    logger.reset();
+	tracker.reset();
+	logger.reset();
 
-    {
-        Stack s{env.context, States::AnyHandlers};
-        s.command("a", {});
-        s.command("b", {});
-        s.command("c", {});
-        s.command("d", {});
-        s.command("e", {});
-        s.fieldStart(true);
-        s.fieldEnd();
-        tracker.expect(5, 0, 5, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    }
-    tracker.expect(5, 5, 5, 5, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    ASSERT_FALSE(logger.hasError());
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
+		s.commandStart("a", {});
+		s.commandStart("b", {});
+		s.commandStart("c", {});
+		s.commandStart("d", {});
+		s.commandStart("e", {});
+		s.fieldStart(true);
+		s.fieldEnd();
+		tracker.expect(5, 0, 5, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(5, 5, 5, 5, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, invalidDefaultField)
 {
-    tracker.reset();
-    logger.reset();
+	tracker.reset();
+	logger.reset();
 
-    {
-        Stack s{env.context, States::AnyHandlers};
-        s.command("a", {});
-        tracker.fieldStartResult = false;
-        s.fieldStart(true);
-        s.fieldEnd();
-        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    }
-    tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    ASSERT_FALSE(logger.hasError());
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
+		s.commandStart("a", {});
+		tracker.fieldStartResult = false;
+		s.fieldStart(true);
+		s.fieldEnd();
+		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(1, 1, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	ASSERT_FALSE(logger.hasError());
 }
 
 TEST(Stack, errorInvalidDefaultFieldData)
 {
-    tracker.reset();
-    logger.reset();
+	tracker.reset();
+	logger.reset();
 
-    {
-        Stack s{env.context, States::AnyHandlers};
-        s.command("a", {});
-        tracker.fieldStartResult = false;
-        s.fieldStart(true);
-        ASSERT_FALSE(logger.hasError());
-        s.data("test");
-        ASSERT_TRUE(logger.hasError());
-        s.fieldEnd();
-        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    }
-    tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
+		s.commandStart("a", {});
+		tracker.fieldStartResult = false;
+		s.fieldStart(true);
+		ASSERT_FALSE(logger.hasError());
+		s.data("test");
+		ASSERT_TRUE(logger.hasError());
+		s.fieldEnd();
+		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(1, 1, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 }
 
 TEST(Stack, errorInvalidFieldData)
 {
-    tracker.reset();
-    logger.reset();
+	tracker.reset();
+	logger.reset();
 
-    {
-        Stack s{env.context, States::AnyHandlers};
-        s.command("a", {});
-        tracker.fieldStartResult = false;
-        ASSERT_FALSE(logger.hasError());
-        s.fieldStart(false);
-        ASSERT_TRUE(logger.hasError());
-        s.data("test");
-        s.fieldEnd();
-        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    }
-    tracker.expect(1, 1, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
+		s.commandStart("a", {});
+		tracker.fieldStartResult = false;
+		ASSERT_FALSE(logger.hasError());
+		s.fieldStart(false);
+		ASSERT_TRUE(logger.hasError());
+		s.data("test");
+		s.fieldEnd();
+		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(1, 1, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 }
 
 TEST(Stack, errorFieldStartNoCommand)
 {
-    tracker.reset();
-    logger.reset();
+	tracker.reset();
+	logger.reset();
 
-    Stack s{env.context, States::AnyHandlers};
-    ASSERT_THROW(s.fieldStart(false), LoggableException);
-    ASSERT_THROW(s.fieldStart(true), LoggableException);
-    tracker.expect(0, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	Stack s{parser, env.context, States::AnyHandlers};
+	ASSERT_THROW(s.fieldStart(false), LoggableException);
+	ASSERT_THROW(s.fieldStart(true), LoggableException);
+	tracker.expect(0, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 }
 
 TEST(Stack, errorMultipleFieldStarts)
 {
-    tracker.reset();
-    logger.reset();
+	tracker.reset();
+	logger.reset();
 
-    {
-        Stack s{env.context, States::AnyHandlers};
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
+		s.commandStart("a", {});
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-        s.fieldStart(false);
-        ASSERT_FALSE(logger.hasError());
-        s.fieldStart(false);
-        ASSERT_TRUE(logger.hasError());
-        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		s.fieldStart(false);
+		ASSERT_FALSE(logger.hasError());
+		s.fieldStart(false);
+		ASSERT_TRUE(logger.hasError());
+		tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-        s.fieldEnd();
-        tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    }
-    tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		s.fieldEnd();
+		tracker.expect(1, 0, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(1, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 }
 
 TEST(Stack, errorMultipleFieldEnds)
 {
-    tracker.reset();
-    logger.reset();
+	tracker.reset();
+	logger.reset();
 
-    {
-        Stack s{env.context, States::AnyHandlers};
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	{
+		Stack s{parser, env.context, States::AnyHandlers};
+		s.commandStart("a", {});
+		tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-        s.fieldStart(false);
-        s.fieldEnd();
-        ASSERT_FALSE(logger.hasError());
-        tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-        s.fieldEnd();
-        ASSERT_TRUE(logger.hasError());
-        tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
-    }
-    tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		s.fieldStart(false);
+		s.fieldEnd();
+		ASSERT_FALSE(logger.hasError());
+		tracker.expect(1, 0, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+		s.fieldEnd();
+		ASSERT_TRUE(logger.hasError());
+		tracker.expect(1, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+	}
+	tracker.expect(1, 1, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 }
 
 TEST(Stack, errorOpenField)
@@ -657,15 +751,15 @@ TEST(Stack, errorOpenField)
     logger.reset();
 
     {
-        Stack s{env.context, States::AnyHandlers};
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        Stack s{parser, env.context, States::AnyHandlers};
+        s.commandStart("a", {});
+        tracker.expect(1, 0, 0, 0, 0); // scc, ec, fsc, fec, dc, sac, stc, etc
 
         s.fieldStart(false);
         ASSERT_FALSE(logger.hasError());
     }
     ASSERT_TRUE(logger.hasError());
-    tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+    tracker.expect(1, 1, 1, 1, 0); // scc, ec, fsc, fec, dc, sac, stc, etc
 }
 
 TEST(Stack, fieldEndWhenImplicitDefaultFieldOpen)
@@ -674,15 +768,15 @@ TEST(Stack, fieldEndWhenImplicitDefaultFieldOpen)
     logger.reset();
 
     {
-        Stack s{env.context, States::AnyHandlers};
-        s.command("a", {});
+        Stack s{parser, env.context, States::AnyHandlers};
+        s.commandStart("a", {});
         s.fieldStart(true);
-        s.command("b", {});
+        s.commandStart("b", {});
         s.data("test");
         s.fieldEnd();
-        tracker.expect(2, 1, 2, 2, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 1, 2, 2, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
     }
-    tracker.expect(2, 2, 2, 2, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+    tracker.expect(2, 2, 2, 2, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
     ASSERT_FALSE(logger.hasError());
 }
 
@@ -692,46 +786,66 @@ TEST(Stack, fieldAfterDefaultField)
     logger.reset();
 
     {
-        Stack s{env.context, States::AnyHandlers};
-        s.command("a", {});
-        tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        Stack s{parser, env.context, States::AnyHandlers};
+        s.commandStart("a", {});
+        tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
         s.fieldStart(true);
-        tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(1, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
-        s.command("b", {});
-        tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        s.commandStart("b", {});
+        tracker.expect(2, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
         s.fieldStart(false);
-        tracker.expect(2, 0, 2, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 0, 2, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
         s.data("f1");
-        tracker.expect(2, 0, 2, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 0, 2, 0, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
         s.fieldEnd();
-        tracker.expect(2, 0, 2, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 0, 2, 1, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
         tracker.fieldStartSetIsDefault = true;
 
         s.fieldStart(false);
         tracker.fieldStartSetIsDefault = false;
-        tracker.expect(2, 0, 3, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 0, 3, 1, 1);  // scc, ec, fsc, fec, dc, sac, stc, etc
         s.data("f2");
-        tracker.expect(2, 0, 3, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 0, 3, 1, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
         s.fieldEnd();
-        tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 0, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
         ASSERT_FALSE(logger.hasError());
         s.fieldStart(false);
         ASSERT_TRUE(logger.hasError());
         logger.reset();
-        tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 0, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
         s.data("f3");
-        tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 0, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
         s.fieldEnd();
-        tracker.expect(2, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 0, 3, 2, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
 
         s.fieldEnd();
-        tracker.expect(2, 1, 3, 3, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+        tracker.expect(2, 1, 3, 3, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
     }
-    tracker.expect(2, 2, 3, 3, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+    tracker.expect(2, 2, 3, 3, 2);  // scc, ec, fsc, fec, dc, sac, stc, etc
     ASSERT_FALSE(logger.hasError());
-}*/
+}
+
+TEST(Stack, rangeCommandUnranged)
+{
+    tracker.reset();
+    logger.reset();
+
+    {
+        Stack s{parser, env.context, States::AnyHandlers};
+        tracker.expect(0, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+        s.commandStart("a", {}, true);
+        tracker.expect(1, 0, 0, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+        s.commandStart("b", {});
+        tracker.expect(2, 0, 1, 0, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+        s.rangeEnd();
+        tracker.expect(2, 2, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+    }
+    tracker.expect(2, 2, 1, 1, 0);  // scc, ec, fsc, fec, dc, sac, stc, etc
+    ASSERT_FALSE(logger.hasError());
+}
+
 }
 }
-- 
cgit v1.2.3