From efe60ac3c3a8725ac71329c0bb19fa9d9c58f399 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:42:05 +0100
Subject: Moved specific file format parsers to formats/ folder, moved old
 tokenizer to css code (this is the only place where it is actually used)

---
 test/core/CodeTokenizerTest.cpp        | 100 -----------
 test/core/TokenizerTest.cpp            | 118 -------------
 test/formats/osdmx/OsdmxParserTest.cpp | 314 +++++++++++++++++++++++++++++++++
 test/plugins/css/CodeTokenizerTest.cpp | 100 +++++++++++
 test/plugins/css/TokenizerTest.cpp     | 118 +++++++++++++
 test/plugins/xml/XmlParserTest.cpp     | 314 ---------------------------------
 6 files changed, 532 insertions(+), 532 deletions(-)
 delete mode 100644 test/core/CodeTokenizerTest.cpp
 delete mode 100644 test/core/TokenizerTest.cpp
 create mode 100644 test/formats/osdmx/OsdmxParserTest.cpp
 create mode 100644 test/plugins/css/CodeTokenizerTest.cpp
 create mode 100644 test/plugins/css/TokenizerTest.cpp
 delete mode 100644 test/plugins/xml/XmlParserTest.cpp

(limited to 'test')
diff --git a/test/core/CodeTokenizerTest.cpp b/test/core/CodeTokenizerTest.cpp
deleted file mode 100644
index 2d4d5a7..0000000
--- a/test/core/CodeTokenizerTest.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <core/CodeTokenizer.hpp>
-
-namespace ousia {
-
-static const int BLOCK_COMMENT = 30;
-static const int LINE_COMMENT = 31;
-static const int STRING = 20;
-static const int ESCAPE = 21;
-static const int LINEBREAK = 21;
-static const int CURLY_OPEN = 40;
-static const int CURLY_CLOSE = 41;
-
-TEST(CodeTokenizer, testTokenizer)
-{
-	CharReader reader{
-	    "/**\n"                                 // 1
-	    " * Some Block Comment\n"               // 2
-	    " */\n"                                 // 3
-	    "var my_string = 'My \\'String\\'';\n"  // 4
-	    "// and a line comment\n"               // 5
-	    "var my_obj = { a = 4;}", 0};              // 6
-	//   123456789012345678901234567890123456789
-	//   0        1         2         3
-	TokenTreeNode root{{{"/*", 1},
-	                    {"*/", 2},
-	                    {"//", 3},
-	                    {"'", 4},
-	                    {"\\", 5},
-	                    {"{", CURLY_OPEN},
-	                    {"}", CURLY_CLOSE},
-	                    {"\n", 6}}};
-	std::map<int, CodeTokenDescriptor> descriptors{
-	    // the block comment start Token has the id 1 and if the Tokenizer
-	    // returns a Block Comment Token that should have the id 10.
-	    {1, {CodeTokenMode::BLOCK_COMMENT_START, BLOCK_COMMENT}},
-	    {2, {CodeTokenMode::BLOCK_COMMENT_END, BLOCK_COMMENT}},
-	    {3, {CodeTokenMode::LINE_COMMENT, LINE_COMMENT}},
-	    {4, {CodeTokenMode::STRING_START_END, STRING}},
-	    {5, {CodeTokenMode::ESCAPE, ESCAPE}},
-	    {6, {CodeTokenMode::LINEBREAK, LINEBREAK}}};
-
-	std::vector<Token> expected = {
-	    {BLOCK_COMMENT, "*\n * Some Block Comment\n ", SourceLocation{0, 0, 29}},
-	    {LINEBREAK, "\n", SourceLocation{0, 29, 30}},
-	    {TOKEN_TEXT, "var", SourceLocation{0, 30, 33}},
-	    {TOKEN_TEXT, "my_string", SourceLocation{0, 34, 43}},
-	    {TOKEN_TEXT, "=", SourceLocation{0, 44, 45}},
-	    {STRING, "My 'String'", SourceLocation{0, 46, 61}},
-	    {TOKEN_TEXT, ";", SourceLocation{0, 61, 62}},
-	    {LINEBREAK, "\n", SourceLocation{0, 62, 63}},
-	    // this is slightly counter-intuitive but makes sense if you think about
-	    // it: As a line comment is ended by a line break the line break is
-	    // technically still a part of the line comment and thus the ending
-	    // is in the next line.
-	    {LINE_COMMENT, " and a line comment", SourceLocation{0, 63, 85}},
-	    {TOKEN_TEXT, "var", SourceLocation{0, 85, 88}},
-	    {TOKEN_TEXT, "my_obj", SourceLocation{0, 89, 95}},
-	    {TOKEN_TEXT, "=", SourceLocation{0, 96, 97}},
-	    {CURLY_OPEN, "{", SourceLocation{0, 98, 99}},
-	    {TOKEN_TEXT, "a", SourceLocation{0, 100, 101}},
-	    {TOKEN_TEXT, "=", SourceLocation{0, 102, 103}},
-	    {TOKEN_TEXT, "4;", SourceLocation{0, 104, 106}},
-	    {CURLY_CLOSE, "}", SourceLocation{0, 106, 107}},
-	};
-
-	CodeTokenizer tokenizer{reader, root, descriptors};
-
-	Token t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.next(t));
-		EXPECT_EQ(te.tokenId, t.tokenId);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.next(t));
-}
-}
-
diff --git a/test/core/TokenizerTest.cpp b/test/core/TokenizerTest.cpp
deleted file mode 100644
index c53f93d..0000000
--- a/test/core/TokenizerTest.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <core/common/CharReader.hpp>
-
-#include <core/Tokenizer.hpp>
-
-namespace ousia {
-TEST(TokenTreeNode, testConstructor)
-{
-	TokenTreeNode root{{{"a", 1}, {"aab", 2}, {"aac", 3}, {"abd", 4}}};
-
-	ASSERT_EQ(-1, root.tokenId);
-	ASSERT_EQ(1U, root.children.size());
-	ASSERT_TRUE(root.children.find('a') != root.children.end());
-
-	const TokenTreeNode &a = root.children.at('a');
-	ASSERT_EQ(1, a.tokenId);
-	ASSERT_EQ(2U, a.children.size());
-	ASSERT_TRUE(a.children.find('a') != a.children.end());
-	ASSERT_TRUE(a.children.find('b') != a.children.end());
-
-	const TokenTreeNode &aa = a.children.at('a');
-	ASSERT_EQ(-1, aa.tokenId);
-	ASSERT_EQ(2U, aa.children.size());
-	ASSERT_TRUE(aa.children.find('b') != aa.children.end());
-	ASSERT_TRUE(aa.children.find('c') != aa.children.end());
-
-	const TokenTreeNode &aab = aa.children.at('b');
-	ASSERT_EQ(2, aab.tokenId);
-	ASSERT_EQ(0U, aab.children.size());
-
-	const TokenTreeNode &aac = aa.children.at('c');
-	ASSERT_EQ(3, aac.tokenId);
-	ASSERT_EQ(0U, aac.children.size());
-
-	const TokenTreeNode &ab = a.children.at('b');
-	ASSERT_EQ(-1, ab.tokenId);
-	ASSERT_EQ(1U, ab.children.size());
-	ASSERT_TRUE(ab.children.find('d') != ab.children.end());
-
-	const TokenTreeNode &abd = ab.children.at('d');
-	ASSERT_EQ(4, abd.tokenId);
-	ASSERT_EQ(0U, abd.children.size());
-}
-
-TEST(Tokenizer, testTokenization)
-{
-	TokenTreeNode root{{{"/", 1}, {"/*", 2}, {"*/", 3}}};
-
-	CharReader reader{"Test/Test /* Block Comment */", 0};
-	//                 012345678901234567890123456789
-	//                 0        1         2
-
-	std::vector<Token> expected = {
-	    {TOKEN_TEXT, "Test", SourceLocation{0, 0, 4}},
-	    {1, "/", SourceLocation{0, 4, 5}},
-	    {TOKEN_TEXT, "Test ", SourceLocation{0, 5, 10}},
-	    {2, "/*", SourceLocation{0, 10, 12}},
-	    {TOKEN_TEXT, " Block Comment ", SourceLocation{0, 12, 27}},
-	    {3, "*/", SourceLocation{0, 27, 29}}};
-
-	Tokenizer tokenizer{reader, root};
-
-	Token t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.next(t));
-		EXPECT_EQ(te.tokenId, t.tokenId);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.next(t));
-}
-
-TEST(Tokenizer, testIncompleteTokens)
-{
-	TokenTreeNode root{{{"ab", 1}, {"c", 2}}};
-
-	CharReader reader{"ac", 0};
-
-	std::vector<Token> expected = {
-	    {TOKEN_TEXT, "a", SourceLocation{0, 0, 1}},
-	    {2, "c", SourceLocation{0, 1, 2}}};
-
-	Tokenizer tokenizer{reader, root};
-
-	Token t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.next(t));
-		EXPECT_EQ(te.tokenId, t.tokenId);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.next(t));
-}
-}
-
diff --git a/test/formats/osdmx/OsdmxParserTest.cpp b/test/formats/osdmx/OsdmxParserTest.cpp
new file mode 100644
index 0000000..af1ef56
--- /dev/null
+++ b/test/formats/osdmx/OsdmxParserTest.cpp
@@ -0,0 +1,314 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+#include <core/common/CharReader.hpp>
+#include <core/common/SourceContextReader.hpp>
+#include <core/model/Domain.hpp>
+#include <core/model/Node.hpp>
+#include <core/model/Project.hpp>
+#include <core/frontend/TerminalLogger.hpp>
+#include <core/StandaloneEnvironment.hpp>
+
+#include <plugins/filesystem/FileLocator.hpp>
+#include <plugins/xml/XmlParser.hpp>
+
+namespace ousia {
+
+namespace RttiTypes {
+extern const Rtti Document;
+extern const Rtti Domain;
+extern const Rtti Typesystem;
+}
+
+struct XmlStandaloneEnvironment : public StandaloneEnvironment {
+	XmlParser xmlParser;
+	FileLocator fileLocator;
+
+	XmlStandaloneEnvironment(ConcreteLogger &logger)
+	    : StandaloneEnvironment(logger)
+	{
+		fileLocator.addDefaultSearchPaths();
+		fileLocator.addUnittestSearchPath("xmlparser");
+
+		registry.registerDefaultExtensions();
+		registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"},
+		                        {&RttiTypes::Node}, &xmlParser);
+		registry.registerResourceLocator(&fileLocator);
+	}
+};
+
+static TerminalLogger logger(std::cerr, true);
+
+TEST(XmlParser, mismatchedTag)
+{
+	XmlStandaloneEnvironment env(logger);
+	env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document});
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(XmlParser, generic)
+{
+	XmlStandaloneEnvironment env(logger);
+	env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node});
+#ifdef MANAGER_GRAPHVIZ_EXPORT
+	env.manager.exportGraphviz("xmlDocument.dot");
+#endif
+}
+
+static void checkAttributes(Handle<StructType> expected,
+                            Handle<Descriptor> desc)
+{
+	if (expected == nullptr) {
+		ASSERT_TRUE(desc->getAttributesDescriptor()->getAttributes().empty());
+	} else {
+		ASSERT_EQ(expected->getName(),
+		          desc->getAttributesDescriptor()->getName());
+		auto &attrs_exp = expected->getAttributes();
+		auto &attrs = desc->getAttributesDescriptor()->getAttributes();
+		ASSERT_EQ(attrs_exp.size(), attrs.size());
+		for (size_t i = 0; i < attrs_exp.size(); i++) {
+			ASSERT_EQ(attrs_exp[i]->getName(), attrs[i]->getName());
+			ASSERT_EQ(attrs_exp[i]->getType(), attrs[i]->getType());
+			ASSERT_EQ(attrs_exp[i]->isOptional(), attrs[i]->isOptional());
+			ASSERT_EQ(attrs_exp[i]->getDefaultValue(),
+			          attrs[i]->getDefaultValue());
+		}
+	}
+}
+
+static void checkStructuredClass(
+    Handle<Node> n, const std::string &name, Handle<Domain> domain,
+    Variant cardinality = Cardinality::any(),
+    Handle<StructType> attributesDescriptor = nullptr,
+    Handle<StructuredClass> superclass = nullptr, bool transparent = false,
+    bool root = false)
+{
+	ASSERT_FALSE(n == nullptr);
+	Handle<StructuredClass> sc = n.cast<StructuredClass>();
+	ASSERT_FALSE(sc == nullptr);
+	ASSERT_EQ(name, sc->getName());
+	ASSERT_EQ(domain, sc->getParent());
+	ASSERT_EQ(cardinality, sc->getCardinality());
+	ASSERT_EQ(transparent, sc->isTransparent());
+	ASSERT_EQ(root, sc->hasRootPermission());
+	checkAttributes(attributesDescriptor, sc);
+}
+
+static Rooted<StructuredClass> checkStructuredClass(
+    const std::string &resolve, const std::string &name, Handle<Domain> domain,
+    Variant cardinality = Cardinality::any(),
+    Handle<StructType> attributesDescriptor = nullptr,
+    Handle<StructuredClass> superclass = nullptr, bool transparent = false,
+    bool root = false)
+{
+	auto res = domain->resolve(&RttiTypes::StructuredClass, resolve);
+	if (res.size() != 1) {
+		throw OusiaException("resolution error!");
+	}
+	Handle<StructuredClass> sc = res[0].node.cast<StructuredClass>();
+	checkStructuredClass(sc, name, domain, cardinality, attributesDescriptor,
+	                     superclass, transparent, root);
+	return sc;
+}
+
+static void checkAnnotationClass(
+    Handle<Node> n, const std::string &name, Handle<Domain> domain,
+    Handle<StructType> attributesDescriptor = nullptr)
+{
+	ASSERT_FALSE(n == nullptr);
+	Handle<AnnotationClass> ac = n.cast<AnnotationClass>();
+	ASSERT_FALSE(ac == nullptr);
+	ASSERT_EQ(name, ac->getName());
+	ASSERT_EQ(domain, ac->getParent());
+	checkAttributes(attributesDescriptor, ac);
+}
+
+static Rooted<AnnotationClass> checkAnnotationClass(
+    const std::string &resolve, const std::string &name, Handle<Domain> domain,
+    Handle<StructType> attributesDescriptor = nullptr)
+{
+	auto res = domain->resolve(&RttiTypes::AnnotationClass, resolve);
+	if (res.size() != 1) {
+		throw OusiaException("resolution error!");
+	}
+	Handle<AnnotationClass> ac = res[0].node.cast<AnnotationClass>();
+	checkAnnotationClass(ac, name, domain, attributesDescriptor);
+	return ac;
+}
+
+static void checkFieldDescriptor(
+    Handle<Node> n, const std::string &name, Handle<Descriptor> parent,
+    NodeVector<StructuredClass> children,
+    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
+    Handle<Type> primitiveType = nullptr, bool optional = false)
+{
+	ASSERT_FALSE(n == nullptr);
+	Handle<FieldDescriptor> field = n.cast<FieldDescriptor>();
+	ASSERT_FALSE(field.isNull());
+	ASSERT_EQ(name, field->getName());
+	ASSERT_EQ(parent, field->getParent());
+	ASSERT_EQ(type, field->getFieldType());
+	ASSERT_EQ(primitiveType, field->getPrimitiveType());
+	ASSERT_EQ(optional, field->isOptional());
+	// check the children.
+	ASSERT_EQ(children.size(), field->getChildren().size());
+	for (unsigned int c = 0; c < children.size(); c++) {
+		ASSERT_EQ(children[c], field->getChildren()[c]);
+	}
+}
+
+static void checkFieldDescriptor(
+    Handle<Descriptor> desc, Handle<Descriptor> parent,
+    NodeVector<StructuredClass> children,
+    const std::string &name = DEFAULT_FIELD_NAME,
+    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
+    Handle<Type> primitiveType = nullptr, bool optional = false)
+{
+	auto res = desc->resolve(&RttiTypes::FieldDescriptor, name);
+	ASSERT_EQ(1, res.size());
+	checkFieldDescriptor(res[0].node, name, parent, children, type,
+	                     primitiveType, optional);
+}
+
+static void checkFieldDescriptor(
+    Handle<Descriptor> desc, NodeVector<StructuredClass> children,
+    const std::string &name = DEFAULT_FIELD_NAME,
+    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
+    Handle<Type> primitiveType = nullptr, bool optional = false)
+{
+	checkFieldDescriptor(desc, desc, children, name, type, primitiveType,
+	                     optional);
+}
+
+TEST(XmlParser, domainParsing)
+{
+	XmlStandaloneEnvironment env(logger);
+	Rooted<Node> book_domain_node =
+	    env.parse("book_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
+	ASSERT_FALSE(book_domain_node == nullptr);
+	ASSERT_FALSE(logger.hasError());
+	// check the domain node.
+	Rooted<Domain> book_domain = book_domain_node.cast<Domain>();
+	ASSERT_EQ("book", book_domain->getName());
+	// get the book struct node.
+	Cardinality single;
+	single.merge({1});
+	Rooted<StructType> bookAuthor{
+	    new StructType(book_domain->getManager(), "", nullptr)};
+	bookAuthor->addAttribute(
+	    {new Attribute(book_domain->getManager(), "author",
+	                   env.project->getSystemTypesystem()->getStringType(),
+	                   "")},
+	    logger);
+	Rooted<StructuredClass> book = checkStructuredClass(
+	    "book", "book", book_domain, single, bookAuthor, nullptr, false, true);
+	// get the chapter struct node.
+	Rooted<StructuredClass> chapter =
+	    checkStructuredClass("chapter", "chapter", book_domain);
+	Rooted<StructuredClass> section =
+	    checkStructuredClass("section", "section", book_domain);
+	Rooted<StructuredClass> subsection =
+	    checkStructuredClass("subsection", "subsection", book_domain);
+	Rooted<StructuredClass> paragraph =
+	    checkStructuredClass("paragraph", "paragraph", book_domain,
+	                         Cardinality::any(), nullptr, nullptr, true, false);
+	Rooted<StructuredClass> text =
+	    checkStructuredClass("text", "text", book_domain, Cardinality::any(),
+	                         nullptr, nullptr, true, false);
+
+	// check the FieldDescriptors.
+	checkFieldDescriptor(book, {chapter, paragraph});
+	checkFieldDescriptor(chapter, {section, paragraph});
+	checkFieldDescriptor(section, {subsection, paragraph});
+	checkFieldDescriptor(subsection, {paragraph});
+	checkFieldDescriptor(paragraph, {text});
+	checkFieldDescriptor(
+	    text, {}, DEFAULT_FIELD_NAME, FieldDescriptor::FieldType::PRIMITIVE,
+	    env.project->getSystemTypesystem()->getStringType(), false);
+
+	// check parent handling using the headings domain.
+	Rooted<Node> headings_domain_node =
+	    env.parse("headings_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
+	ASSERT_FALSE(headings_domain_node == nullptr);
+	ASSERT_FALSE(logger.hasError());
+	Rooted<Domain> headings_domain = headings_domain_node.cast<Domain>();
+	// now there should be a heading struct.
+	Rooted<StructuredClass> heading =
+	    checkStructuredClass("heading", "heading", headings_domain, single,
+	                         nullptr, nullptr, true, false);
+	// which should be a reference to the paragraph descriptor.
+	checkFieldDescriptor(heading, paragraph, {text});
+	// and each struct in the book domain (except for text) should have a
+	// heading field now.
+	checkFieldDescriptor(book, {heading}, "heading",
+	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
+	checkFieldDescriptor(chapter, {heading}, "heading",
+	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
+	checkFieldDescriptor(section, {heading}, "heading",
+	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
+	checkFieldDescriptor(subsection, {heading}, "heading",
+	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
+	checkFieldDescriptor(paragraph, {heading}, "heading",
+	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
+
+	// check annotation handling using the comments domain.
+	Rooted<Node> comments_domain_node =
+	    env.parse("comments_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
+	ASSERT_FALSE(comments_domain_node == nullptr);
+	ASSERT_FALSE(logger.hasError());
+	Rooted<Domain> comments_domain = comments_domain_node.cast<Domain>();
+	// now we should be able to find a comment annotation.
+	Rooted<AnnotationClass> comment_anno =
+	    checkAnnotationClass("comment", "comment", comments_domain);
+	// as well as a comment struct
+	Rooted<StructuredClass> comment =
+	    checkStructuredClass("comment", "comment", comments_domain);
+	// and a reply struct
+	Rooted<StructuredClass> reply =
+	    checkStructuredClass("reply", "reply", comments_domain);
+	// check the fields for each of them.
+	{
+		std::vector<Rooted<Descriptor>> descs{comment_anno, comment, reply};
+		for (auto &d : descs) {
+			checkFieldDescriptor(d, {paragraph}, "content",
+			                     FieldDescriptor::FieldType::SUBTREE, nullptr,
+			                     false);
+			checkFieldDescriptor(d, {reply}, "replies",
+			                     FieldDescriptor::FieldType::SUBTREE, nullptr,
+			                     false);
+		}
+	}
+	// paragraph should have comment as child now as well.
+	checkFieldDescriptor(paragraph, {text, comment});
+	// as should heading, because it references the paragraph default field.
+	checkFieldDescriptor(heading, paragraph, {text, comment});
+}
+
+TEST(XmlParser, documentParsing)
+{
+	XmlStandaloneEnvironment env(logger);
+	Rooted<Node> book_domain_node =
+	    env.parse("simple_book.oxd", "", "", RttiSet{&RttiTypes::Document});
+	//TODO: Check result
+}
+}
+
diff --git a/test/plugins/css/CodeTokenizerTest.cpp b/test/plugins/css/CodeTokenizerTest.cpp
new file mode 100644
index 0000000..2d4d5a7
--- /dev/null
+++ b/test/plugins/css/CodeTokenizerTest.cpp
@@ -0,0 +1,100 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/CodeTokenizer.hpp>
+
+namespace ousia {
+
+static const int BLOCK_COMMENT = 30;
+static const int LINE_COMMENT = 31;
+static const int STRING = 20;
+static const int ESCAPE = 21;
+static const int LINEBREAK = 21;
+static const int CURLY_OPEN = 40;
+static const int CURLY_CLOSE = 41;
+
+TEST(CodeTokenizer, testTokenizer)
+{
+	CharReader reader{
+	    "/**\n"                                 // 1
+	    " * Some Block Comment\n"               // 2
+	    " */\n"                                 // 3
+	    "var my_string = 'My \\'String\\'';\n"  // 4
+	    "// and a line comment\n"               // 5
+	    "var my_obj = { a = 4;}", 0};              // 6
+	//   123456789012345678901234567890123456789
+	//   0        1         2         3
+	TokenTreeNode root{{{"/*", 1},
+	                    {"*/", 2},
+	                    {"//", 3},
+	                    {"'", 4},
+	                    {"\\", 5},
+	                    {"{", CURLY_OPEN},
+	                    {"}", CURLY_CLOSE},
+	                    {"\n", 6}}};
+	std::map<int, CodeTokenDescriptor> descriptors{
+	    // the block comment start Token has the id 1 and if the Tokenizer
+	    // returns a Block Comment Token that should have the id 10.
+	    {1, {CodeTokenMode::BLOCK_COMMENT_START, BLOCK_COMMENT}},
+	    {2, {CodeTokenMode::BLOCK_COMMENT_END, BLOCK_COMMENT}},
+	    {3, {CodeTokenMode::LINE_COMMENT, LINE_COMMENT}},
+	    {4, {CodeTokenMode::STRING_START_END, STRING}},
+	    {5, {CodeTokenMode::ESCAPE, ESCAPE}},
+	    {6, {CodeTokenMode::LINEBREAK, LINEBREAK}}};
+
+	std::vector<Token> expected = {
+	    {BLOCK_COMMENT, "*\n * Some Block Comment\n ", SourceLocation{0, 0, 29}},
+	    {LINEBREAK, "\n", SourceLocation{0, 29, 30}},
+	    {TOKEN_TEXT, "var", SourceLocation{0, 30, 33}},
+	    {TOKEN_TEXT, "my_string", SourceLocation{0, 34, 43}},
+	    {TOKEN_TEXT, "=", SourceLocation{0, 44, 45}},
+	    {STRING, "My 'String'", SourceLocation{0, 46, 61}},
+	    {TOKEN_TEXT, ";", SourceLocation{0, 61, 62}},
+	    {LINEBREAK, "\n", SourceLocation{0, 62, 63}},
+	    // this is slightly counter-intuitive but makes sense if you think about
+	    // it: As a line comment is ended by a line break the line break is
+	    // technically still a part of the line comment and thus the ending
+	    // is in the next line.
+	    {LINE_COMMENT, " and a line comment", SourceLocation{0, 63, 85}},
+	    {TOKEN_TEXT, "var", SourceLocation{0, 85, 88}},
+	    {TOKEN_TEXT, "my_obj", SourceLocation{0, 89, 95}},
+	    {TOKEN_TEXT, "=", SourceLocation{0, 96, 97}},
+	    {CURLY_OPEN, "{", SourceLocation{0, 98, 99}},
+	    {TOKEN_TEXT, "a", SourceLocation{0, 100, 101}},
+	    {TOKEN_TEXT, "=", SourceLocation{0, 102, 103}},
+	    {TOKEN_TEXT, "4;", SourceLocation{0, 104, 106}},
+	    {CURLY_CLOSE, "}", SourceLocation{0, 106, 107}},
+	};
+
+	CodeTokenizer tokenizer{reader, root, descriptors};
+
+	Token t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.next(t));
+		EXPECT_EQ(te.tokenId, t.tokenId);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.next(t));
+}
+}
+
diff --git a/test/plugins/css/TokenizerTest.cpp b/test/plugins/css/TokenizerTest.cpp
new file mode 100644
index 0000000..c53f93d
--- /dev/null
+++ b/test/plugins/css/TokenizerTest.cpp
@@ -0,0 +1,118 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/common/CharReader.hpp>
+
+#include <core/Tokenizer.hpp>
+
+namespace ousia {
+TEST(TokenTreeNode, testConstructor)
+{
+	TokenTreeNode root{{{"a", 1}, {"aab", 2}, {"aac", 3}, {"abd", 4}}};
+
+	ASSERT_EQ(-1, root.tokenId);
+	ASSERT_EQ(1U, root.children.size());
+	ASSERT_TRUE(root.children.find('a') != root.children.end());
+
+	const TokenTreeNode &a = root.children.at('a');
+	ASSERT_EQ(1, a.tokenId);
+	ASSERT_EQ(2U, a.children.size());
+	ASSERT_TRUE(a.children.find('a') != a.children.end());
+	ASSERT_TRUE(a.children.find('b') != a.children.end());
+
+	const TokenTreeNode &aa = a.children.at('a');
+	ASSERT_EQ(-1, aa.tokenId);
+	ASSERT_EQ(2U, aa.children.size());
+	ASSERT_TRUE(aa.children.find('b') != aa.children.end());
+	ASSERT_TRUE(aa.children.find('c') != aa.children.end());
+
+	const TokenTreeNode &aab = aa.children.at('b');
+	ASSERT_EQ(2, aab.tokenId);
+	ASSERT_EQ(0U, aab.children.size());
+
+	const TokenTreeNode &aac = aa.children.at('c');
+	ASSERT_EQ(3, aac.tokenId);
+	ASSERT_EQ(0U, aac.children.size());
+
+	const TokenTreeNode &ab = a.children.at('b');
+	ASSERT_EQ(-1, ab.tokenId);
+	ASSERT_EQ(1U, ab.children.size());
+	ASSERT_TRUE(ab.children.find('d') != ab.children.end());
+
+	const TokenTreeNode &abd = ab.children.at('d');
+	ASSERT_EQ(4, abd.tokenId);
+	ASSERT_EQ(0U, abd.children.size());
+}
+
+TEST(Tokenizer, testTokenization)
+{
+	TokenTreeNode root{{{"/", 1}, {"/*", 2}, {"*/", 3}}};
+
+	CharReader reader{"Test/Test /* Block Comment */", 0};
+	//                 012345678901234567890123456789
+	//                 0        1         2
+
+	std::vector<Token> expected = {
+	    {TOKEN_TEXT, "Test", SourceLocation{0, 0, 4}},
+	    {1, "/", SourceLocation{0, 4, 5}},
+	    {TOKEN_TEXT, "Test ", SourceLocation{0, 5, 10}},
+	    {2, "/*", SourceLocation{0, 10, 12}},
+	    {TOKEN_TEXT, " Block Comment ", SourceLocation{0, 12, 27}},
+	    {3, "*/", SourceLocation{0, 27, 29}}};
+
+	Tokenizer tokenizer{reader, root};
+
+	Token t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.next(t));
+		EXPECT_EQ(te.tokenId, t.tokenId);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.next(t));
+}
+
+TEST(Tokenizer, testIncompleteTokens)
+{
+	TokenTreeNode root{{{"ab", 1}, {"c", 2}}};
+
+	CharReader reader{"ac", 0};
+
+	std::vector<Token> expected = {
+	    {TOKEN_TEXT, "a", SourceLocation{0, 0, 1}},
+	    {2, "c", SourceLocation{0, 1, 2}}};
+
+	Tokenizer tokenizer{reader, root};
+
+	Token t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.next(t));
+		EXPECT_EQ(te.tokenId, t.tokenId);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.next(t));
+}
+}
+
diff --git a/test/plugins/xml/XmlParserTest.cpp b/test/plugins/xml/XmlParserTest.cpp
deleted file mode 100644
index af1ef56..0000000
--- a/test/plugins/xml/XmlParserTest.cpp
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <iostream>
-
-#include <gtest/gtest.h>
-
-#include <core/common/CharReader.hpp>
-#include <core/common/SourceContextReader.hpp>
-#include <core/model/Domain.hpp>
-#include <core/model/Node.hpp>
-#include <core/model/Project.hpp>
-#include <core/frontend/TerminalLogger.hpp>
-#include <core/StandaloneEnvironment.hpp>
-
-#include <plugins/filesystem/FileLocator.hpp>
-#include <plugins/xml/XmlParser.hpp>
-
-namespace ousia {
-
-namespace RttiTypes {
-extern const Rtti Document;
-extern const Rtti Domain;
-extern const Rtti Typesystem;
-}
-
-struct XmlStandaloneEnvironment : public StandaloneEnvironment {
-	XmlParser xmlParser;
-	FileLocator fileLocator;
-
-	XmlStandaloneEnvironment(ConcreteLogger &logger)
-	    : StandaloneEnvironment(logger)
-	{
-		fileLocator.addDefaultSearchPaths();
-		fileLocator.addUnittestSearchPath("xmlparser");
-
-		registry.registerDefaultExtensions();
-		registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"},
-		                        {&RttiTypes::Node}, &xmlParser);
-		registry.registerResourceLocator(&fileLocator);
-	}
-};
-
-static TerminalLogger logger(std::cerr, true);
-
-TEST(XmlParser, mismatchedTag)
-{
-	XmlStandaloneEnvironment env(logger);
-	env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document});
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(XmlParser, generic)
-{
-	XmlStandaloneEnvironment env(logger);
-	env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node});
-#ifdef MANAGER_GRAPHVIZ_EXPORT
-	env.manager.exportGraphviz("xmlDocument.dot");
-#endif
-}
-
-static void checkAttributes(Handle<StructType> expected,
-                            Handle<Descriptor> desc)
-{
-	if (expected == nullptr) {
-		ASSERT_TRUE(desc->getAttributesDescriptor()->getAttributes().empty());
-	} else {
-		ASSERT_EQ(expected->getName(),
-		          desc->getAttributesDescriptor()->getName());
-		auto &attrs_exp = expected->getAttributes();
-		auto &attrs = desc->getAttributesDescriptor()->getAttributes();
-		ASSERT_EQ(attrs_exp.size(), attrs.size());
-		for (size_t i = 0; i < attrs_exp.size(); i++) {
-			ASSERT_EQ(attrs_exp[i]->getName(), attrs[i]->getName());
-			ASSERT_EQ(attrs_exp[i]->getType(), attrs[i]->getType());
-			ASSERT_EQ(attrs_exp[i]->isOptional(), attrs[i]->isOptional());
-			ASSERT_EQ(attrs_exp[i]->getDefaultValue(),
-			          attrs[i]->getDefaultValue());
-		}
-	}
-}
-
-static void checkStructuredClass(
-    Handle<Node> n, const std::string &name, Handle<Domain> domain,
-    Variant cardinality = Cardinality::any(),
-    Handle<StructType> attributesDescriptor = nullptr,
-    Handle<StructuredClass> superclass = nullptr, bool transparent = false,
-    bool root = false)
-{
-	ASSERT_FALSE(n == nullptr);
-	Handle<StructuredClass> sc = n.cast<StructuredClass>();
-	ASSERT_FALSE(sc == nullptr);
-	ASSERT_EQ(name, sc->getName());
-	ASSERT_EQ(domain, sc->getParent());
-	ASSERT_EQ(cardinality, sc->getCardinality());
-	ASSERT_EQ(transparent, sc->isTransparent());
-	ASSERT_EQ(root, sc->hasRootPermission());
-	checkAttributes(attributesDescriptor, sc);
-}
-
-static Rooted<StructuredClass> checkStructuredClass(
-    const std::string &resolve, const std::string &name, Handle<Domain> domain,
-    Variant cardinality = Cardinality::any(),
-    Handle<StructType> attributesDescriptor = nullptr,
-    Handle<StructuredClass> superclass = nullptr, bool transparent = false,
-    bool root = false)
-{
-	auto res = domain->resolve(&RttiTypes::StructuredClass, resolve);
-	if (res.size() != 1) {
-		throw OusiaException("resolution error!");
-	}
-	Handle<StructuredClass> sc = res[0].node.cast<StructuredClass>();
-	checkStructuredClass(sc, name, domain, cardinality, attributesDescriptor,
-	                     superclass, transparent, root);
-	return sc;
-}
-
-static void checkAnnotationClass(
-    Handle<Node> n, const std::string &name, Handle<Domain> domain,
-    Handle<StructType> attributesDescriptor = nullptr)
-{
-	ASSERT_FALSE(n == nullptr);
-	Handle<AnnotationClass> ac = n.cast<AnnotationClass>();
-	ASSERT_FALSE(ac == nullptr);
-	ASSERT_EQ(name, ac->getName());
-	ASSERT_EQ(domain, ac->getParent());
-	checkAttributes(attributesDescriptor, ac);
-}
-
-static Rooted<AnnotationClass> checkAnnotationClass(
-    const std::string &resolve, const std::string &name, Handle<Domain> domain,
-    Handle<StructType> attributesDescriptor = nullptr)
-{
-	auto res = domain->resolve(&RttiTypes::AnnotationClass, resolve);
-	if (res.size() != 1) {
-		throw OusiaException("resolution error!");
-	}
-	Handle<AnnotationClass> ac = res[0].node.cast<AnnotationClass>();
-	checkAnnotationClass(ac, name, domain, attributesDescriptor);
-	return ac;
-}
-
-static void checkFieldDescriptor(
-    Handle<Node> n, const std::string &name, Handle<Descriptor> parent,
-    NodeVector<StructuredClass> children,
-    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
-    Handle<Type> primitiveType = nullptr, bool optional = false)
-{
-	ASSERT_FALSE(n == nullptr);
-	Handle<FieldDescriptor> field = n.cast<FieldDescriptor>();
-	ASSERT_FALSE(field.isNull());
-	ASSERT_EQ(name, field->getName());
-	ASSERT_EQ(parent, field->getParent());
-	ASSERT_EQ(type, field->getFieldType());
-	ASSERT_EQ(primitiveType, field->getPrimitiveType());
-	ASSERT_EQ(optional, field->isOptional());
-	// check the children.
-	ASSERT_EQ(children.size(), field->getChildren().size());
-	for (unsigned int c = 0; c < children.size(); c++) {
-		ASSERT_EQ(children[c], field->getChildren()[c]);
-	}
-}
-
-static void checkFieldDescriptor(
-    Handle<Descriptor> desc, Handle<Descriptor> parent,
-    NodeVector<StructuredClass> children,
-    const std::string &name = DEFAULT_FIELD_NAME,
-    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
-    Handle<Type> primitiveType = nullptr, bool optional = false)
-{
-	auto res = desc->resolve(&RttiTypes::FieldDescriptor, name);
-	ASSERT_EQ(1, res.size());
-	checkFieldDescriptor(res[0].node, name, parent, children, type,
-	                     primitiveType, optional);
-}
-
-static void checkFieldDescriptor(
-    Handle<Descriptor> desc, NodeVector<StructuredClass> children,
-    const std::string &name = DEFAULT_FIELD_NAME,
-    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
-    Handle<Type> primitiveType = nullptr, bool optional = false)
-{
-	checkFieldDescriptor(desc, desc, children, name, type, primitiveType,
-	                     optional);
-}
-
-TEST(XmlParser, domainParsing)
-{
-	XmlStandaloneEnvironment env(logger);
-	Rooted<Node> book_domain_node =
-	    env.parse("book_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
-	ASSERT_FALSE(book_domain_node == nullptr);
-	ASSERT_FALSE(logger.hasError());
-	// check the domain node.
-	Rooted<Domain> book_domain = book_domain_node.cast<Domain>();
-	ASSERT_EQ("book", book_domain->getName());
-	// get the book struct node.
-	Cardinality single;
-	single.merge({1});
-	Rooted<StructType> bookAuthor{
-	    new StructType(book_domain->getManager(), "", nullptr)};
-	bookAuthor->addAttribute(
-	    {new Attribute(book_domain->getManager(), "author",
-	                   env.project->getSystemTypesystem()->getStringType(),
-	                   "")},
-	    logger);
-	Rooted<StructuredClass> book = checkStructuredClass(
-	    "book", "book", book_domain, single, bookAuthor, nullptr, false, true);
-	// get the chapter struct node.
-	Rooted<StructuredClass> chapter =
-	    checkStructuredClass("chapter", "chapter", book_domain);
-	Rooted<StructuredClass> section =
-	    checkStructuredClass("section", "section", book_domain);
-	Rooted<StructuredClass> subsection =
-	    checkStructuredClass("subsection", "subsection", book_domain);
-	Rooted<StructuredClass> paragraph =
-	    checkStructuredClass("paragraph", "paragraph", book_domain,
-	                         Cardinality::any(), nullptr, nullptr, true, false);
-	Rooted<StructuredClass> text =
-	    checkStructuredClass("text", "text", book_domain, Cardinality::any(),
-	                         nullptr, nullptr, true, false);
-
-	// check the FieldDescriptors.
-	checkFieldDescriptor(book, {chapter, paragraph});
-	checkFieldDescriptor(chapter, {section, paragraph});
-	checkFieldDescriptor(section, {subsection, paragraph});
-	checkFieldDescriptor(subsection, {paragraph});
-	checkFieldDescriptor(paragraph, {text});
-	checkFieldDescriptor(
-	    text, {}, DEFAULT_FIELD_NAME, FieldDescriptor::FieldType::PRIMITIVE,
-	    env.project->getSystemTypesystem()->getStringType(), false);
-
-	// check parent handling using the headings domain.
-	Rooted<Node> headings_domain_node =
-	    env.parse("headings_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
-	ASSERT_FALSE(headings_domain_node == nullptr);
-	ASSERT_FALSE(logger.hasError());
-	Rooted<Domain> headings_domain = headings_domain_node.cast<Domain>();
-	// now there should be a heading struct.
-	Rooted<StructuredClass> heading =
-	    checkStructuredClass("heading", "heading", headings_domain, single,
-	                         nullptr, nullptr, true, false);
-	// which should be a reference to the paragraph descriptor.
-	checkFieldDescriptor(heading, paragraph, {text});
-	// and each struct in the book domain (except for text) should have a
-	// heading field now.
-	checkFieldDescriptor(book, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(chapter, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(section, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(subsection, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(paragraph, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-
-	// check annotation handling using the comments domain.
-	Rooted<Node> comments_domain_node =
-	    env.parse("comments_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
-	ASSERT_FALSE(comments_domain_node == nullptr);
-	ASSERT_FALSE(logger.hasError());
-	Rooted<Domain> comments_domain = comments_domain_node.cast<Domain>();
-	// now we should be able to find a comment annotation.
-	Rooted<AnnotationClass> comment_anno =
-	    checkAnnotationClass("comment", "comment", comments_domain);
-	// as well as a comment struct
-	Rooted<StructuredClass> comment =
-	    checkStructuredClass("comment", "comment", comments_domain);
-	// and a reply struct
-	Rooted<StructuredClass> reply =
-	    checkStructuredClass("reply", "reply", comments_domain);
-	// check the fields for each of them.
-	{
-		std::vector<Rooted<Descriptor>> descs{comment_anno, comment, reply};
-		for (auto &d : descs) {
-			checkFieldDescriptor(d, {paragraph}, "content",
-			                     FieldDescriptor::FieldType::SUBTREE, nullptr,
-			                     false);
-			checkFieldDescriptor(d, {reply}, "replies",
-			                     FieldDescriptor::FieldType::SUBTREE, nullptr,
-			                     false);
-		}
-	}
-	// paragraph should have comment as child now as well.
-	checkFieldDescriptor(paragraph, {text, comment});
-	// as should heading, because it references the paragraph default field.
-	checkFieldDescriptor(heading, paragraph, {text, comment});
-}
-
-TEST(XmlParser, documentParsing)
-{
-	XmlStandaloneEnvironment env(logger);
-	Rooted<Node> book_domain_node =
-	    env.parse("simple_book.oxd", "", "", RttiSet{&RttiTypes::Document});
-	//TODO: Check result
-}
-}
-
-- 
cgit v1.2.3


From ce4fd84a714d80859aa01bbca32a81302b93c4d7 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:43:32 +0100
Subject: Moved code for handling whitespaces to own header, including the
 "WhitespaceMode" enum

---
 src/core/common/Utils.cpp             |   7 --
 src/core/common/Utils.hpp             |  57 +--------
 src/core/common/Whitespace.cpp        |  38 ++++++
 src/core/common/Whitespace.hpp        | 120 ++++++++++++++++++
 src/core/common/WhitespaceHandler.hpp | 223 ++++++++++++++++++++++++++++++++++
 test/core/common/UtilsTest.cpp        |   8 --
 test/core/common/Whitespace.cpp       |  41 +++++++
 7 files changed, 428 insertions(+), 66 deletions(-)
 create mode 100644 src/core/common/Whitespace.cpp
 create mode 100644 src/core/common/Whitespace.hpp
 create mode 100644 src/core/common/WhitespaceHandler.hpp
 create mode 100644 test/core/common/Whitespace.cpp

(limited to 'test')

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 563fe2a..4005143 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -18,19 +18,12 @@
 
 #include <algorithm>
 #include <cctype>
-#include <limits>
 #include <string>
 
 #include "Utils.hpp"
 
 namespace ousia {
 
-std::string Utils::trim(const std::string &s)
-{
-	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace);
-	return s.substr(bounds.first, bounds.second - bounds.first);
-}
-
 bool Utils::isIdentifier(const std::string &name)
 {
 	bool first = true;
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 2c8a5b3..af7a773 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -78,12 +78,17 @@ public:
 	 */
 	static bool isIdentifier(const std::string &name);
 
+	/**
+	 * Returns true if the given character is a linebreak character.
+	 */
+	static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); }
+
 	/**
 	 * Returns true if the given character is a whitespace character.
 	 */
 	static bool isWhitespace(const char c)
 	{
-		return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r');
+		return (c == ' ') || (c == '\t') || isLinebreak(c);
 	}
 
 	/**
@@ -94,56 +99,6 @@ public:
 	 */
 	static bool hasNonWhitepaceChar(const std::string &s);
 
-	/**
-	 * Returns true if the given character is a whitespace character.
-	 */
-	static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); }
-
-	/**
-	 * Removes whitespace at the beginning and the end of the given string.
-	 *
-	 * @param s is the string that should be trimmed.
-	 * @return a trimmed copy of s.
-	 */
-	static std::string trim(const std::string &s);
-
-	/**
-	 * Trims the given string or vector of chars by returning the start and end
-	 * index.
-	 *
-	 * @param s is the container that should be trimmed.
-	 * @param f is a function that returns true for values that should be
-	 * removed.
-	 * @return start and end index. Note that "end" points at the character
-	 * beyond the end, thus "end" minus "start"
-	 */
-	template <class T, class Filter>
-	static std::pair<size_t, size_t> trim(const T &s, Filter f)
-	{
-		size_t start = 0;
-		for (size_t i = 0; i < s.size(); i++) {
-			if (!f(s[i])) {
-				start = i;
-				break;
-			}
-		}
-
-		size_t end = 0;
-		for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) {
-			if (!f(s[i])) {
-				end = i + 1;
-				break;
-			}
-		}
-
-		if (end < start) {
-			start = 0;
-			end = 0;
-		}
-
-		return std::pair<size_t, size_t>{start, end};
-	}
-
 	/**
 	 * Turns the elements of a collection into a string separated by the
 	 * given delimiter.
diff --git a/src/core/common/Whitespace.cpp b/src/core/common/Whitespace.cpp
new file mode 100644
index 0000000..4d7c01a
--- /dev/null
+++ b/src/core/common/Whitespace.cpp
@@ -0,0 +1,38 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Whitespace.hpp"
+#include "WhitespaceHandler.hpp"
+
+namespace ousia {
+
+std::string Utils::trim(const std::string &s)
+{
+	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace);
+	return s.substr(bounds.first, bounds.second - bounds.first);
+}
+
+std::string Utils::collapse(const std::string &s)
+{
+	CollapsingWhitespaceHandler h;
+	appendToWhitespaceHandler(h, s, 0);
+	return h.toString();
+}
+
+}
+
diff --git a/src/core/common/Whitespace.hpp b/src/core/common/Whitespace.hpp
new file mode 100644
index 0000000..1e9f36a
--- /dev/null
+++ b/src/core/common/Whitespace.hpp
@@ -0,0 +1,120 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Whitespace.hpp
+ *
+ * Contains the WhitespaceMode enum used in various places, as well es functions
+ * for trimming and collapsing whitespaces.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_WHITESPACE_HPP_
+#define _OUSIA_WHITESPACE_HPP_
+
+#include <string>
+#include <utility>
+
+namespace ousia {
+
+/**
+ * Enum specifying the whitespace handling mode of the tokenizer and the
+ * parsers.
+ */
+enum class WhitespaceMode {
+	/**
+     * Preserves all whitespaces as they are found in the source file.
+     */
+	PRESERVE,
+
+	/**
+     * Trims whitespace at the beginning and the end of the found text.
+     */
+	TRIM,
+
+	/**
+     * Whitespaces are trimmed and collapsed, multiple whitespace characters
+     * are replaced by a single space character.
+     */
+	COLLAPSE
+};
+
+/**
+ * Collection of functions for trimming or collapsing whitespace.
+ */
+class Whitespace {
+	/**
+	 * Removes whitespace at the beginning and the end of the given string.
+	 *
+	 * @param s is the string that should be trimmed.
+	 * @return a trimmed copy of s.
+	 */
+	static std::string trim(const std::string &s);
+
+	/**
+	 * Trims the given string or vector of chars by returning the start and end
+	 * index.
+	 *
+	 * @param s is the container that should be trimmed.
+	 * @param f is a function that returns true for values that should be
+	 * removed.
+	 * @return start and end index. Note that "end" points at the character
+	 * beyond the end, thus "end" minus "start"
+	 */
+	template <class T, class Filter>
+	static std::pair<size_t, size_t> trim(const T &s, Filter f)
+	{
+		size_t start = 0;
+		for (size_t i = 0; i < s.size(); i++) {
+			if (!f(s[i])) {
+				start = i;
+				break;
+			}
+		}
+
+		size_t end = 0;
+		for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) {
+			if (!f(s[i])) {
+				end = i + 1;
+				break;
+			}
+		}
+
+		if (end < start) {
+			start = 0;
+			end = 0;
+		}
+
+		return std::pair<size_t, size_t>{start, end};
+	}
+
+	/**
+	 * Collapses the whitespaces in the given string (trims the string and
+	 * replaces all whitespace characters by a single one).
+	 *
+	 * @param s is the string in which the whitespace should be collapsed.
+	 * @return a copy of s with collapsed whitespace.
+	 */
+	static std::string collapse(const std::string &s);
+};
+
+}
+
+#endif /* _OUSIA_WHITESPACE_HPP_ */
+
diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp
new file mode 100644
index 0000000..1935c24
--- /dev/null
+++ b/src/core/common/WhitespaceHandler.hpp
@@ -0,0 +1,223 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file WhitespaceHandler.hpp
+ *
+ * Contains the WhitespaceHandler classes which are used in multiple places to
+ * trim, compact or preserve whitespaces while at the same time maintaining the
+ * position information associated with the input strings.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_WHITESPACE_HANDLER_HPP_
+#define _OUSIA_WHITESPACE_HANDLER_HPP_
+
+#include <string>
+#include <vector>
+
+#include "WhitespaceHandler.hpp"
+
+namespace ousia {
+
+/**
+ * WhitespaceHandler is a based class that can be used to collect text on a
+ * character-by-character basis. Note that this class and its descendants are
+ * hoped to be inlined by the compiler (and used in conjunction with templates),
+ * thus they are fully defined inside this header.
+ */
+class WhitespaceHandler {
+public:
+	/**
+	 * Start position of the extracted text.
+	 */
+	size_t textStart;
+
+	/**
+	 * End position of the extracted text.
+	 */
+	size_t textEnd;
+
+	/**
+	 * Buffer containing the extracted text.
+	 */
+	std::vector<char> textBuf;
+
+	/**
+	 * Constructor of the TextHandlerBase base class. Initializes the start and
+	 * end position with zeros.
+	 */
+	WhitespaceHandler() : textStart(0), textEnd(0) {}
+
+	/**
+	 * Returns true if this whitespace handler has found any text and a text
+	 * token could be emitted.
+	 *
+	 * @return true if the internal data buffer is non-empty.
+	 */
+	bool hasText() { return !textBuf.empty(); }
+
+	/**
+	 * Returns the content of the WhitespaceHandler as string.
+	 */
+	std::string toString()
+	{
+		return std::string(textBuf.data(), textBuf.size());
+	}
+};
+
+/**
+ * The PreservingWhitespaceHandler class preserves all characters unmodified,
+ * including whitepace characters.
+ */
+class PreservingWhitespaceHandler : public WhitespaceHandler {
+public:
+	/**
+	 * Appends the given character to the internal text buffer, does not
+	 * eliminate whitespace.
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 */
+	void append(char c, size_t start, size_t end)
+	{
+		if (textBuf.empty()) {
+			textStart = start;
+		}
+		textEnd = end;
+		textBuf.push_back(c);
+	}
+};
+
+/**
+ * The TrimmingTextHandler class trims all whitespace characters at the begin
+ * and the end of a text section but leaves all other characters unmodified,
+ * including whitepace characters.
+ */
+class TrimmingWhitespaceHandler : public WhitespaceHandler {
+public:
+	/**
+	 * Buffer used internally to temporarily store all whitespace characters.
+	 * They are only added to the output buffer if another non-whitespace
+	 * character is reached.
+	 */
+	std::vector<char> whitespaceBuf;
+
+	/**
+	 * Appends the given character to the internal text buffer, eliminates
+	 * whitespace characters at the begin and end of the text.
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 */
+	void append(char c, size_t start, size_t end)
+	{
+		// Handle whitespace characters
+		if (Utils::isWhitespace(c)) {
+			if (!textBuf.empty()) {
+				whitespaceBuf.push_back(c);
+			}
+			return;
+		}
+
+		// Set the start and end offset correctly
+		if (textBuf.empty()) {
+			textStart = start;
+		}
+		textEnd = end;
+
+		// Store the character
+		if (!whitespaceBuf.empty()) {
+			textBuf.insert(textBuf.end(), whitespaceBuf.begin(),
+			               whitespaceBuf.end());
+			whitespaceBuf.clear();
+		}
+		textBuf.push_back(c);
+	}
+};
+
+/**
+ * The CollapsingTextHandler trims characters at the beginning and end of the
+ * text and reduced multiple whitespace characters to a single blank.
+ */
+class CollapsingWhitespaceHandler : public WhitespaceHandler {
+public:
+	/**
+	 * Flag set to true if a whitespace character was reached.
+	 */
+	bool hasWhitespace = false;
+
+	/**
+	 * Appends the given character to the internal text buffer, eliminates
+	 * redundant whitespace characters.
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 */
+	void append(char c, size_t start, size_t end)
+	{
+		// Handle whitespace characters
+		if (Utils::isWhitespace(c)) {
+			if (!textBuf.empty()) {
+				hasWhitespace = true;
+			}
+			return;
+		}
+
+		// Set the start and end offset correctly
+		if (textBuf.empty()) {
+			textStart = start;
+		}
+		textEnd = end;
+
+		// Store the character
+		if (hasWhitespace) {
+			textBuf.push_back(' ');
+			hasWhitespace = false;
+		}
+		textBuf.push_back(c);
+	}
+};
+
+/**
+ * Function that can be used to append the given buffer (e.g. a string or a
+ * vector) to the whitespace handler.
+ *
+ * @tparam WhitespaceHandler is one of the WhitespaceHandler classes.
+ * @tparam Buffer is an iterable type.
+ * @param handler is the handler to which the characters of the Buffer should be
+ * appended.
+ * @param buf is the buffer from which the characters should be read.
+ * @param start is the start byte offset. Each character is counted as one byte.
+ */
+template <typename WhitespaceHandler, typename Buffer>
+inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf,
+                                      size_t start)
+{
+	for (auto elem : buf) {
+		handler.append(elem, start++);
+	}
+}
+}
+
+#endif /* _OUSIA_WHITESPACE_HANDLER_HPP_ */
+
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 917f45c..6b8a916 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -32,14 +32,6 @@ TEST(Utils, isIdentifier)
 	ASSERT_FALSE(Utils::isIdentifier("invalid key"));
 }
 
-TEST(Utils, trim)
-{
-	ASSERT_EQ("hello world", Utils::trim("\t hello world   \n\r\t"));
-	ASSERT_EQ("hello world", Utils::trim("hello world   \n\r\t"));
-	ASSERT_EQ("hello world", Utils::trim("   hello world"));
-	ASSERT_EQ("hello world", Utils::trim("hello world"));
-}
-
 TEST(Utils, split)
 {
 	ASSERT_EQ(std::vector<std::string>({"ab"}), Utils::split("ab", '.'));
diff --git a/test/core/common/Whitespace.cpp b/test/core/common/Whitespace.cpp
new file mode 100644
index 0000000..d6df8b7
--- /dev/null
+++ b/test/core/common/Whitespace.cpp
@@ -0,0 +1,41 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/common/Whitespace.hpp>
+
+namespace ousia {
+
+TEST(Whitespace, trim)
+{
+	ASSERT_EQ("hello world", Whitespace::trim("\t hello world   \n\r\t"));
+	ASSERT_EQ("hello world", Whitespace::trim("hello world   \n\r\t"));
+	ASSERT_EQ("hello world", Whitespace::trim("   hello world"));
+	ASSERT_EQ("hello world", Whitespace::trim("hello world"));
+}
+
+TEST(Whitespace, collapse)
+{
+	ASSERT("hello world", Whitespace::collapse(" hello \n\t\r  world  \n\r\t"));
+	ASSERT("hello world", Whitespace::collapse("hello \n\t\r  world   \n\r\t"));
+	ASSERT("hello world", Whitespace::collapse("hello \n\t\r     world"));
+	ASSERT("hello world", Whitespace::collapse("hello world"));
+}
+}
+
-- 
cgit v1.2.3


From fec6ac1d65aee3e4e5c948b0f7cbdec7ceb6cb45 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:46:46 +0100
Subject: Added testcase for Osdmx Parser

---
 test/formats/osdmx/OsdmxParserTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'test')

diff --git a/test/formats/osdmx/OsdmxParserTest.cpp b/test/formats/osdmx/OsdmxParserTest.cpp
index af1ef56..c0fb50d 100644
--- a/test/formats/osdmx/OsdmxParserTest.cpp
+++ b/test/formats/osdmx/OsdmxParserTest.cpp
@@ -29,7 +29,7 @@
 #include <core/StandaloneEnvironment.hpp>
 
 #include <plugins/filesystem/FileLocator.hpp>
-#include <plugins/xml/XmlParser.hpp>
+#include <formats/osdmx/OsdmxParser.hpp>
 
 namespace ousia {
 
-- 
cgit v1.2.3


From 65bbbd778f6e0a3668c859b0e22cced7075a726d Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:47:11 +0100
Subject: Moved DynamicTokenizer and TokenTrie to parser/utils

---
 src/core/parser/utils/TokenTrie.cpp        | 119 +++++++++
 src/core/parser/utils/TokenTrie.hpp        | 150 +++++++++++
 src/core/parser/utils/Tokenizer.cpp        | 381 ++++++++++++++++++++++++++
 src/core/parser/utils/Tokenizer.hpp        | 231 ++++++++++++++++
 src/formats/osdm/DynamicTokenizer.cpp      | 381 --------------------------
 src/formats/osdm/DynamicTokenizer.hpp      | 231 ----------------
 src/formats/osdm/TokenTrie.cpp             | 119 ---------
 src/formats/osdm/TokenTrie.hpp             | 150 -----------
 test/core/parser/utils/TokenTrieTest.cpp   |  92 +++++++
 test/core/parser/utils/TokenizerTest.cpp   | 415 +++++++++++++++++++++++++++++
 test/formats/osdm/DynamicTokenizerTest.cpp | 415 -----------------------------
 test/formats/osdm/TokenTrieTest.cpp        |  92 -------
 12 files changed, 1388 insertions(+), 1388 deletions(-)
 create mode 100644 src/core/parser/utils/TokenTrie.cpp
 create mode 100644 src/core/parser/utils/TokenTrie.hpp
 create mode 100644 src/core/parser/utils/Tokenizer.cpp
 create mode 100644 src/core/parser/utils/Tokenizer.hpp
 delete mode 100644 src/formats/osdm/DynamicTokenizer.cpp
 delete mode 100644 src/formats/osdm/DynamicTokenizer.hpp
 delete mode 100644 src/formats/osdm/TokenTrie.cpp
 delete mode 100644 src/formats/osdm/TokenTrie.hpp
 create mode 100644 test/core/parser/utils/TokenTrieTest.cpp
 create mode 100644 test/core/parser/utils/TokenizerTest.cpp
 delete mode 100644 test/formats/osdm/DynamicTokenizerTest.cpp
 delete mode 100644 test/formats/osdm/TokenTrieTest.cpp

(limited to 'test')

diff --git a/src/core/parser/utils/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp
new file mode 100644
index 0000000..4a0430b
--- /dev/null
+++ b/src/core/parser/utils/TokenTrie.cpp
@@ -0,0 +1,119 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "TokenTrie.hpp"
+
+namespace ousia {
+
+/* Class DynamicTokenTree::Node */
+
+TokenTrie::Node::Node() : type(EmptyToken) {}
+
+/* Class DynamicTokenTree */
+
+bool TokenTrie::registerToken(const std::string &token,
+                              TokenTypeId type) noexcept
+{
+	// Abort if the token is empty -- this would taint the root node
+	if (token.empty()) {
+		return false;
+	}
+
+	// Iterate over each character in the given string and insert them as
+	// (new) nodes
+	Node *node = &root;
+	for (size_t i = 0; i < token.size(); i++) {
+		// Insert a new node if this one does not exist
+		const char c = token[i];
+		auto it = node->children.find(c);
+		if (it == node->children.end()) {
+			it = node->children.emplace(c, std::make_shared<Node>()).first;
+		}
+		node = it->second.get();
+	}
+
+	// If the resulting node already has a type set, we're screwed.
+	if (node->type != EmptyToken) {
+		return false;
+	}
+
+	// Otherwise just set the type to the given type.
+	node->type = type;
+	return true;
+}
+
+bool TokenTrie::unregisterToken(const std::string &token) noexcept
+{
+	// We cannot remove empty tokens as we need to access the fist character
+	// upfront
+	if (token.empty()) {
+		return false;
+	}
+
+	// First pass -- search the node in the path that can be deleted
+	Node *subtreeRoot = &root;
+	char subtreeKey = token[0];
+	Node *node = &root;
+	for (size_t i = 0; i < token.size(); i++) {
+		// Go to the next node, abort if the tree ends unexpectedly
+		auto it = node->children.find(token[i]);
+		if (it == node->children.end()) {
+			return false;
+		}
+
+		// Reset the subtree handler if this node has another type
+		node = it->second.get();
+		if ((node->type != EmptyToken || node->children.size() > 1) &&
+		    (i + 1 != token.size())) {
+			subtreeRoot = node;
+			subtreeKey = token[i + 1];
+		}
+	}
+
+	// If the node type is already EmptyToken, we cannot do anything here
+	if (node->type == EmptyToken) {
+		return false;
+	}
+
+	// If the target node has children, we cannot delete the subtree. Set the
+	// type to EmptyToken instead
+	if (!node->children.empty()) {
+		node->type = EmptyToken;
+		return true;
+	}
+
+	// If we end up here, we can safely delete the complete subtree
+	subtreeRoot->children.erase(subtreeKey);
+	return true;
+}
+
+TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept
+{
+	Node const *node = &root;
+	for (size_t i = 0; i < token.size(); i++) {
+		const char c = token[i];
+		auto it = node->children.find(c);
+		if (it == node->children.end()) {
+			return EmptyToken;
+		}
+		node = it->second.get();
+	}
+	return node->type;
+}
+}
+
diff --git a/src/core/parser/utils/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp
new file mode 100644
index 0000000..36c2ffa
--- /dev/null
+++ b/src/core/parser/utils/TokenTrie.hpp
@@ -0,0 +1,150 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file TokenTrie.hpp
+ *
+ * Class representing a token trie that can be updated dynamically.
+ *
+ * @author Benjamin Paaßen (astoecke@techfak.uni-bielefeld.de)
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_TOKEN_TRIE_HPP_
+#define _OUSIA_TOKEN_TRIE_HPP_
+
+#include <cstdint>
+#include <memory>
+#include <limits>
+#include <unordered_map>
+
+namespace ousia {
+
+/**
+ * The TokenTypeId is used to give each token type a unique id.
+ */
+using TokenTypeId = uint32_t;
+
+/**
+ * Token which is not a token.
+ */
+constexpr TokenTypeId EmptyToken = std::numeric_limits<TokenTypeId>::max();
+
+/**
+ * Token which represents a text token.
+ */
+constexpr TokenTypeId TextToken = std::numeric_limits<TokenTypeId>::max() - 1;
+
+/**
+ * The Tokenizer internally uses a TokenTrie to be efficiently able to identify
+ * the longest consecutive token in the text. This is equivalent to a prefix
+ * trie.
+ *
+ * A token trie is a construct that structures all special tokens a Tokenizer
+ * recognizes. Consider the tokens "aab", "a" and "bac" numbered as one, two and
+ * three. Then the token tree would look like this:
+ *
+ * \code{*.txt}
+ *        ~ (0)
+ *       /     \
+ *      a (2)  b (0)
+ *      |      |
+ *      a (0)  a (0)
+ *      |      |
+ *      b (1)  c (0)
+ * \endcode
+ *
+ * Where the number indicates the corresponding token descriptor identifier.
+ */
+class TokenTrie {
+public:
+	/**
+	 * Structure used to build the node tree.
+	 */
+	struct Node {
+		/**
+		 * Type used for the child map.
+		 */
+		using ChildMap = std::unordered_map<char, std::shared_ptr<Node>>;
+
+		/**
+		 * Map from single characters at the corresponding child nodes.
+		 */
+		ChildMap children;
+
+		/**
+		 * Reference at the corresponding token descriptor. Set to nullptr if
+		 * no token is attached to this node.
+		 */
+		TokenTypeId type;
+
+		/**
+		 * Default constructor, initializes the descriptor with nullptr.
+		 */
+		Node();
+	};
+
+private:
+	/**
+	 * Root node of the internal token tree.
+	 */
+	Node root;
+
+public:
+	/**
+	 * Registers a token containing the given string. Returns false if the
+	 * token already exists, true otherwise.
+	 *
+	 * @param token is the character sequence that should be registered as
+	 * token.
+	 * @param type is the descriptor that should be set for this token.
+	 * @return true if the operation is successful, false otherwise.
+	 */
+	bool registerToken(const std::string &token, TokenTypeId type) noexcept;
+
+	/**
+	 * Unregisters the token from the token tree. Returns true if the token was
+	 * unregistered successfully, false otherwise.
+	 *
+	 * @param token is the character sequence that should be unregistered.
+	 * @return true if the operation was successful, false otherwise.
+	 */
+	bool unregisterToken(const std::string &token) noexcept;
+
+	/**
+	 * Returns true, if the given token exists within the TokenTree. This
+	 * function is mostly thought for debugging and unit testing.
+	 *
+	 * @param token is the character sequence that should be searched.
+	 * @return the attached token descriptor or nullptr if the given token is
+	 * not found.
+	 */
+	TokenTypeId hasToken(const std::string &token) const noexcept;
+
+	/**
+	 * Returns a reference at the root node to be used for traversing the token
+	 * tree.
+	 *
+	 * @return a reference at the root node.
+	 */
+	const Node *getRoot() const noexcept { return &root; }
+};
+}
+
+#endif /* _OUSIA_TOKEN_TRIE_HPP_ */
+
diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp
new file mode 100644
index 0000000..1fac25a
--- /dev/null
+++ b/src/core/parser/utils/Tokenizer.cpp
@@ -0,0 +1,381 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <memory>
+#include <vector>
+
+#include <core/common/CharReader.hpp>
+#include <core/common/Exceptions.hpp>
+#include <core/common/Utils.hpp>
+#include <core/common/WhitespaceHandler.hpp>
+
+#include "DynamicTokenizer.hpp"
+
+namespace ousia {
+
+namespace {
+
+/* Internal class TokenMatch */
+
+/**
+ * Contains information about a matching token.
+ */
+struct TokenMatch {
+	/**
+	 * Token that was matched.
+	 */
+	DynamicToken token;
+
+	/**
+	 * Current length of the data within the text handler. The text buffer needs
+	 * to be trimmed to this length if this token matches.
+	 */
+	size_t textLength;
+
+	/**
+	 * End location of the current text handler. This location needs to be used
+	 * for the text token that is emitted before the actual token.
+	 */
+	size_t textEnd;
+
+	/**
+	 * Constructor of the TokenMatch class.
+	 */
+	TokenMatch() : textLength(0), textEnd(0) {}
+
+	/**
+	 * Returns true if this TokenMatch instance actually represents a match.
+	 */
+	bool hasMatch() { return token.type != EmptyToken; }
+};
+
+/* Internal class TokenLookup */
+
+/**
+ * The TokenLookup class is used to represent a thread in a running token
+ * lookup.
+ */
+class TokenLookup {
+private:
+	/**
+	 * Current node within the token trie.
+	 */
+	TokenTrie::Node const *node;
+
+	/**
+	 * Start offset within the source file.
+	 */
+	size_t start;
+
+	/**
+	 * Current length of the data within the text handler. The text buffer needs
+	 * to be trimmed to this length if this token matches.
+	 */
+	size_t textLength;
+
+	/**
+	 * End location of the current text handler. This location needs to be used
+	 * for the text token that is emitted before the actual token.
+	 */
+	size_t textEnd;
+
+public:
+	/**
+	 * Constructor of the TokenLookup class.
+	 *
+	 * @param node is the current node.
+	 * @param start is the start position.
+	 * @param textLength is the text buffer length of the previous text token.
+	 * @param textEnd is the current end location of the previous text token.
+	 */
+	TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength,
+	            size_t textEnd)
+	    : node(node), start(start), textLength(textLength), textEnd(textEnd)
+	{
+	}
+
+	/**
+	 * Tries to extend the current path in the token trie with the given
+	 * character. If a complete token is matched, stores this match in the
+	 * tokens list (in case it is longer than any previous token).
+	 *
+	 * @param c is the character that should be appended to the current prefix.
+	 * @param lookups is a list to which new TokeLookup instances are added --
+	 * which could potentially be expanded in the next iteration.
+	 * @param match is the DynamicToken instance to which the matching token
+	 * should be written.
+	 * @param tokens is a reference at the internal token list of the
+	 * DynamicTokenizer.
+	 * @param end is the end byte offset of the current character.
+	 * @param sourceId is the source if of this file.
+	 */
+	void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
+	             const std::vector<std::string> &tokens, SourceOffset end,
+	             SourceId sourceId)
+	{
+		// Check whether we can continue the current token path with the given
+		// character without visiting an already visited node
+		auto it = node->children.find(c);
+		if (it == node->children.end()) {
+			return;
+		}
+
+		// Check whether the new node represents a complete token a whether it
+		// is longer than the current token. If yes, replace the current token.
+		node = it->second.get();
+		if (node->type != EmptyToken) {
+			const std::string &str = tokens[node->type];
+			size_t len = str.size();
+			if (len > match.token.content.size()) {
+				match.token =
+				    DynamicToken{node->type, str, {sourceId, start, end}};
+				match.textLength = textLength;
+				match.textEnd = textEnd;
+			}
+		}
+
+		// If this state can possibly be advanced, store it in the states list.
+		if (!node->children.empty()) {
+			lookups.emplace_back(*this);
+		}
+	}
+};
+
+/**
+ * Transforms the given token into a text token containing the extracted
+ * text.
+ *
+ * @param handler is the WhitespaceHandler containing the collected data.
+ * @param token is the output token to which the text should be written.
+ * @param sourceId is the source id of the underlying file.
+ */
+static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match,
+                           SourceId sourceId)
+{
+	if (match.hasMatch()) {
+		match.token.content =
+		    std::string{handler.textBuf.data(), match.textLength};
+		match.token.location =
+		    SourceLocation{sourceId, handler.textStart, match.textEnd};
+	} else {
+		match.token.content = handler.toString();
+		match.token.location =
+		    SourceLocation{sourceId, handler.textStart, handler.textEnd};
+	}
+	match.token.type = TextToken;
+}
+}
+
+/* Class DynamicTokenizer */
+
+DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode)
+    : whitespaceMode(whitespaceMode), nextTokenTypeId(0)
+{
+}
+
+template <typename TextHandler, bool read>
+bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
+{
+	// If we're in the read mode, reset the char reader peek position to the
+	// current read position
+	if (read) {
+		reader.resetPeek();
+	}
+
+	// Prepare the lookups in the token trie
+	const TokenTrie::Node *root = trie.getRoot();
+	TokenMatch match;
+	std::vector<TokenLookup> lookups;
+	std::vector<TokenLookup> nextLookups;
+
+	// Instantiate the text handler
+	TextHandler textHandler;
+
+	// Peek characters from the reader and try to advance the current token tree
+	// cursor
+	char c;
+	size_t charStart = reader.getPeekOffset();
+	const SourceId sourceId = reader.getSourceId();
+	while (reader.peek(c)) {
+		const size_t charEnd = reader.getPeekOffset();
+		const size_t textLength = textHandler.textBuf.size();
+		const size_t textEnd = textHandler.textEnd;
+
+		// If we do not have a match yet, start a new lookup from the root
+		if (!match.hasMatch()) {
+			TokenLookup{root, charStart, textLength, textEnd}.advance(
+			    c, nextLookups, match, tokens, charEnd, sourceId);
+		}
+
+		// Try to advance all other lookups with the new character
+		for (TokenLookup &lookup : lookups) {
+			lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId);
+		}
+
+		// We have found a token and there are no more states to advance or the
+		// text handler has found something -- abort to return the new token
+		if (match.hasMatch()) {
+			if ((nextLookups.empty() || textHandler.hasText())) {
+				break;
+			}
+		} else {
+			// Record all incomming characters
+			textHandler.append(c, charStart, charEnd);
+		}
+
+		// Swap the lookups and the nextLookups list
+		lookups = std::move(nextLookups);
+		nextLookups.clear();
+
+		// Advance the offset
+		charStart = charEnd;
+	}
+
+	// If we found text, emit that text
+	if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) {
+		buildTextToken(textHandler, match, sourceId);
+	}
+
+	// Move the read/peek cursor to the end of the token, abort if an error
+	// happens while doing so
+	if (match.hasMatch()) {
+		// Make sure we have a valid location
+		if (match.token.location.getEnd() == InvalidSourceOffset) {
+			throw OusiaException{"Token end position offset out of range"};
+		}
+
+		// Seek to the end of the current token
+		const size_t end = match.token.location.getEnd();
+		if (read) {
+			reader.seek(end);
+		} else {
+			reader.seekPeekCursor(end);
+		}
+		token = match.token;
+	} else {
+		token = DynamicToken{};
+	}
+	return match.hasMatch();
+}
+
+bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token)
+{
+	switch (whitespaceMode) {
+		case WhitespaceMode::PRESERVE:
+			return next<PreservingWhitespaceHandler, true>(reader, token);
+		case WhitespaceMode::TRIM:
+			return next<TrimmingWhitespaceHandler, true>(reader, token);
+		case WhitespaceMode::COLLAPSE:
+			return next<CollapsingWhitespaceHandler, true>(reader, token);
+	}
+	return false;
+}
+
+bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token)
+{
+	switch (whitespaceMode) {
+		case WhitespaceMode::PRESERVE:
+			return next<PreservingWhitespaceHandler, false>(reader, token);
+		case WhitespaceMode::TRIM:
+			return next<TrimmingWhitespaceHandler, false>(reader, token);
+		case WhitespaceMode::COLLAPSE:
+			return next<CollapsingWhitespaceHandler, false>(reader, token);
+	}
+	return false;
+}
+
+TokenTypeId DynamicTokenizer::registerToken(const std::string &token)
+{
+	// Abort if an empty token should be registered
+	if (token.empty()) {
+		return EmptyToken;
+	}
+
+	// Search for a new slot in the tokens list
+	TokenTypeId type = EmptyToken;
+	for (size_t i = nextTokenTypeId; i < tokens.size(); i++) {
+		if (tokens[i].empty()) {
+			tokens[i] = token;
+			type = i;
+			break;
+		}
+	}
+
+	// No existing slot was found, add a new one -- make sure we do not
+	// override the special token type handles
+	if (type == EmptyToken) {
+		type = tokens.size();
+		if (type == TextToken || type == EmptyToken) {
+			throw OusiaException{"Token type ids depleted!"};
+		}
+		tokens.emplace_back(token);
+	}
+	nextTokenTypeId = type + 1;
+
+	// Try to register the token in the trie -- if this fails, remove it
+	// from the tokens list
+	if (!trie.registerToken(token, type)) {
+		tokens[type] = std::string{};
+		nextTokenTypeId = type;
+		return EmptyToken;
+	}
+	return type;
+}
+
+bool DynamicTokenizer::unregisterToken(TokenTypeId type)
+{
+	// Unregister the token from the trie, abort if an invalid type is given
+	if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
+		tokens[type] = std::string{};
+		nextTokenTypeId = type;
+		return true;
+	}
+	return false;
+}
+
+std::string DynamicTokenizer::getTokenString(TokenTypeId type)
+{
+	if (type < tokens.size()) {
+		return tokens[type];
+	}
+	return std::string{};
+}
+
+void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode)
+{
+	whitespaceMode = mode;
+}
+
+WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; }
+
+/* Explicitly instantiate all possible instantiations of the "next" member
+   function */
+template bool DynamicTokenizer::next<PreservingWhitespaceHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<PreservingWhitespaceHandler, true>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, true>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, true>(
+    CharReader &reader, DynamicToken &token);
+}
+
diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp
new file mode 100644
index 0000000..3e5aeb3
--- /dev/null
+++ b/src/core/parser/utils/Tokenizer.hpp
@@ -0,0 +1,231 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file DynamicTokenizer.hpp
+ *
+ * Tokenizer that can be reconfigured at runtime used for parsing the plain
+ * text format.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_
+#define _OUSIA_DYNAMIC_TOKENIZER_HPP_
+
+#include <set>
+#include <string>
+#include <vector>
+
+#include <core/common/Location.hpp>
+#include <core/common/Whitespace.hpp>
+
+#include "TokenTrie.hpp"
+
+namespace ousia {
+
+// Forward declarations
+class CharReader;
+
+/**
+ * The DynamicToken structure describes a token discovered by the Tokenizer.
+ */
+struct DynamicToken {
+	/**
+	 * Id of the type of this token.
+	 */
+	TokenTypeId type;
+
+	/**
+	 * String that was matched.
+	 */
+	std::string content;
+
+	/**
+	 * Location from which the string was extracted.
+	 */
+	SourceLocation location;
+
+	/**
+	 * Default constructor.
+	 */
+	DynamicToken() : type(EmptyToken) {}
+
+	/**
+	 * Constructor of the DynamicToken struct.
+	 *
+	 * @param id represents the token type.
+	 * @param content is the string content that has been extracted.
+	 * @param location is the location of the extracted string content in the
+	 * source file.
+	 */
+	DynamicToken(TokenTypeId type, const std::string &content,
+	             SourceLocation location)
+	    : type(type), content(content), location(location)
+	{
+	}
+
+	/**
+	 * Constructor of the DynamicToken struct, only initializes the token type
+	 *
+	 * @param type is the id corresponding to the type of the token.
+	 */
+	DynamicToken(TokenTypeId type) : type(type) {}
+
+	/**
+	 * The getLocation function allows the tokens to be directly passed as
+	 * parameter to Logger or LoggableException instances.
+	 *
+	 * @return a reference at the location field
+	 */
+	const SourceLocation &getLocation() const { return location; }
+};
+
+/**
+ * The DynamicTokenizer is used to extract tokens and chunks of text from a
+ * CharReader. It allows to register and unregister tokens while parsing and
+ * to modify the handling of whitespace characters. Note that the
+ * DynamicTokenizer always tries to extract the longest possible token from the
+ * tokenizer.
+ */
+class DynamicTokenizer {
+private:
+	/**
+	 * Internally used token trie. This object holds all registered tokens.
+	 */
+	TokenTrie trie;
+
+	/**
+	 * Flag defining whether whitespaces should be preserved or not.
+	 */
+	WhitespaceMode whitespaceMode;
+
+	/**
+	 * Vector containing all registered token types.
+	 */
+	std::vector<std::string> tokens;
+
+	/**
+	 * Next index in the tokens list where to search for a new token id.
+	 */
+	size_t nextTokenTypeId;
+
+	/**
+	 * Templated function used internally to read the current token. The
+	 * function is templated in order to force code generation for all six
+	 * combiations of whitespace modes and reading/peeking.
+	 *
+	 * @tparam TextHandler is the type to be used for the textHandler instance.
+	 * @tparam read specifies whether the function should start from and advance
+	 * the read pointer of the char reader.
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
+	 * @param token is the token structure into which the token information
+	 * should be written.
+	 * @return false if the end of the stream has been reached, true otherwise.
+	 */
+	template <typename TextHandler, bool read>
+	bool next(CharReader &reader, DynamicToken &token);
+
+public:
+	/**
+	 * Constructor of the DynamicTokenizer class.
+	 *
+	 * @param whitespaceMode specifies how whitespace should be handled.
+	 */
+	DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
+
+	/**
+	 * Registers the given string as a token. Returns a const pointer at a
+	 * TokenDescriptor that will be used to reference the newly created token.
+	 *
+	 * @param token is the token string that should be registered.
+	 * @return a unique identifier for the registered token or EmptyToken if
+	 * an error occured.
+	 */
+	TokenTypeId registerToken(const std::string &token);
+
+	/**
+	 * Unregisters the token belonging to the given TokenTypeId.
+	 *
+	 * @param type is the token type that should be unregistered. The
+	 *TokenTypeId
+	 * must have been returned by registerToken.
+	 * @return true if the operation was successful, false otherwise (e.g.
+	 * because the given TokenDescriptor was already unregistered).
+	 */
+	bool unregisterToken(TokenTypeId type);
+
+	/**
+	 * Returns the token that was registered under the given TokenTypeId id or
+	 *an
+	 * empty string if an invalid TokenTypeId id is given.
+	 *
+	 * @param type is the TokenTypeId id for which the corresponding token
+	 *string
+	 * should be returned.
+	 * @return the registered token string or an empty string if the given type
+	 * was invalid.
+	 */
+	std::string getTokenString(TokenTypeId type);
+
+	/**
+	 * Sets the whitespace mode.
+	 *
+	 * @param whitespaceMode defines how whitespace should be treated in text
+	 * tokens.
+	 */
+	void setWhitespaceMode(WhitespaceMode mode);
+
+	/**
+	 * Returns the current value of the whitespace mode.
+	 *
+	 * @return the whitespace mode.
+	 */
+	WhitespaceMode getWhitespaceMode();
+
+	/**
+	 * Reads a new token from the CharReader and stores it in the given
+	 * DynamicToken instance.
+	 *
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
+	 * @param token is a reference at the token instance into which the Token
+	 * information should be written.
+	 * @return true if a token could be read, false if the end of the stream
+	 * has been reached.
+	 */
+	bool read(CharReader &reader, DynamicToken &token);
+
+	/**
+	 * The peek method does not advance the read position of the char reader,
+	 * but reads the next token from the current char reader peek position.
+	 *
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
+	 * @param token is a reference at the token instance into which the Token
+	 * information should be written.
+	 * @return true if a token could be read, false if the end of the stream
+	 * has been reached.
+	 */
+	bool peek(CharReader &reader, DynamicToken &token);
+};
+}
+
+#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */
+
diff --git a/src/formats/osdm/DynamicTokenizer.cpp b/src/formats/osdm/DynamicTokenizer.cpp
deleted file mode 100644
index 1fac25a..0000000
--- a/src/formats/osdm/DynamicTokenizer.cpp
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <memory>
-#include <vector>
-
-#include <core/common/CharReader.hpp>
-#include <core/common/Exceptions.hpp>
-#include <core/common/Utils.hpp>
-#include <core/common/WhitespaceHandler.hpp>
-
-#include "DynamicTokenizer.hpp"
-
-namespace ousia {
-
-namespace {
-
-/* Internal class TokenMatch */
-
-/**
- * Contains information about a matching token.
- */
-struct TokenMatch {
-	/**
-	 * Token that was matched.
-	 */
-	DynamicToken token;
-
-	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
-	 */
-	size_t textLength;
-
-	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
-	 */
-	size_t textEnd;
-
-	/**
-	 * Constructor of the TokenMatch class.
-	 */
-	TokenMatch() : textLength(0), textEnd(0) {}
-
-	/**
-	 * Returns true if this TokenMatch instance actually represents a match.
-	 */
-	bool hasMatch() { return token.type != EmptyToken; }
-};
-
-/* Internal class TokenLookup */
-
-/**
- * The TokenLookup class is used to represent a thread in a running token
- * lookup.
- */
-class TokenLookup {
-private:
-	/**
-	 * Current node within the token trie.
-	 */
-	TokenTrie::Node const *node;
-
-	/**
-	 * Start offset within the source file.
-	 */
-	size_t start;
-
-	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
-	 */
-	size_t textLength;
-
-	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
-	 */
-	size_t textEnd;
-
-public:
-	/**
-	 * Constructor of the TokenLookup class.
-	 *
-	 * @param node is the current node.
-	 * @param start is the start position.
-	 * @param textLength is the text buffer length of the previous text token.
-	 * @param textEnd is the current end location of the previous text token.
-	 */
-	TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength,
-	            size_t textEnd)
-	    : node(node), start(start), textLength(textLength), textEnd(textEnd)
-	{
-	}
-
-	/**
-	 * Tries to extend the current path in the token trie with the given
-	 * character. If a complete token is matched, stores this match in the
-	 * tokens list (in case it is longer than any previous token).
-	 *
-	 * @param c is the character that should be appended to the current prefix.
-	 * @param lookups is a list to which new TokeLookup instances are added --
-	 * which could potentially be expanded in the next iteration.
-	 * @param match is the DynamicToken instance to which the matching token
-	 * should be written.
-	 * @param tokens is a reference at the internal token list of the
-	 * DynamicTokenizer.
-	 * @param end is the end byte offset of the current character.
-	 * @param sourceId is the source if of this file.
-	 */
-	void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
-	             const std::vector<std::string> &tokens, SourceOffset end,
-	             SourceId sourceId)
-	{
-		// Check whether we can continue the current token path with the given
-		// character without visiting an already visited node
-		auto it = node->children.find(c);
-		if (it == node->children.end()) {
-			return;
-		}
-
-		// Check whether the new node represents a complete token a whether it
-		// is longer than the current token. If yes, replace the current token.
-		node = it->second.get();
-		if (node->type != EmptyToken) {
-			const std::string &str = tokens[node->type];
-			size_t len = str.size();
-			if (len > match.token.content.size()) {
-				match.token =
-				    DynamicToken{node->type, str, {sourceId, start, end}};
-				match.textLength = textLength;
-				match.textEnd = textEnd;
-			}
-		}
-
-		// If this state can possibly be advanced, store it in the states list.
-		if (!node->children.empty()) {
-			lookups.emplace_back(*this);
-		}
-	}
-};
-
-/**
- * Transforms the given token into a text token containing the extracted
- * text.
- *
- * @param handler is the WhitespaceHandler containing the collected data.
- * @param token is the output token to which the text should be written.
- * @param sourceId is the source id of the underlying file.
- */
-static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match,
-                           SourceId sourceId)
-{
-	if (match.hasMatch()) {
-		match.token.content =
-		    std::string{handler.textBuf.data(), match.textLength};
-		match.token.location =
-		    SourceLocation{sourceId, handler.textStart, match.textEnd};
-	} else {
-		match.token.content = handler.toString();
-		match.token.location =
-		    SourceLocation{sourceId, handler.textStart, handler.textEnd};
-	}
-	match.token.type = TextToken;
-}
-}
-
-/* Class DynamicTokenizer */
-
-DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode)
-    : whitespaceMode(whitespaceMode), nextTokenTypeId(0)
-{
-}
-
-template <typename TextHandler, bool read>
-bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
-{
-	// If we're in the read mode, reset the char reader peek position to the
-	// current read position
-	if (read) {
-		reader.resetPeek();
-	}
-
-	// Prepare the lookups in the token trie
-	const TokenTrie::Node *root = trie.getRoot();
-	TokenMatch match;
-	std::vector<TokenLookup> lookups;
-	std::vector<TokenLookup> nextLookups;
-
-	// Instantiate the text handler
-	TextHandler textHandler;
-
-	// Peek characters from the reader and try to advance the current token tree
-	// cursor
-	char c;
-	size_t charStart = reader.getPeekOffset();
-	const SourceId sourceId = reader.getSourceId();
-	while (reader.peek(c)) {
-		const size_t charEnd = reader.getPeekOffset();
-		const size_t textLength = textHandler.textBuf.size();
-		const size_t textEnd = textHandler.textEnd;
-
-		// If we do not have a match yet, start a new lookup from the root
-		if (!match.hasMatch()) {
-			TokenLookup{root, charStart, textLength, textEnd}.advance(
-			    c, nextLookups, match, tokens, charEnd, sourceId);
-		}
-
-		// Try to advance all other lookups with the new character
-		for (TokenLookup &lookup : lookups) {
-			lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId);
-		}
-
-		// We have found a token and there are no more states to advance or the
-		// text handler has found something -- abort to return the new token
-		if (match.hasMatch()) {
-			if ((nextLookups.empty() || textHandler.hasText())) {
-				break;
-			}
-		} else {
-			// Record all incomming characters
-			textHandler.append(c, charStart, charEnd);
-		}
-
-		// Swap the lookups and the nextLookups list
-		lookups = std::move(nextLookups);
-		nextLookups.clear();
-
-		// Advance the offset
-		charStart = charEnd;
-	}
-
-	// If we found text, emit that text
-	if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) {
-		buildTextToken(textHandler, match, sourceId);
-	}
-
-	// Move the read/peek cursor to the end of the token, abort if an error
-	// happens while doing so
-	if (match.hasMatch()) {
-		// Make sure we have a valid location
-		if (match.token.location.getEnd() == InvalidSourceOffset) {
-			throw OusiaException{"Token end position offset out of range"};
-		}
-
-		// Seek to the end of the current token
-		const size_t end = match.token.location.getEnd();
-		if (read) {
-			reader.seek(end);
-		} else {
-			reader.seekPeekCursor(end);
-		}
-		token = match.token;
-	} else {
-		token = DynamicToken{};
-	}
-	return match.hasMatch();
-}
-
-bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token)
-{
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingWhitespaceHandler, true>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingWhitespaceHandler, true>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingWhitespaceHandler, true>(reader, token);
-	}
-	return false;
-}
-
-bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token)
-{
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingWhitespaceHandler, false>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingWhitespaceHandler, false>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingWhitespaceHandler, false>(reader, token);
-	}
-	return false;
-}
-
-TokenTypeId DynamicTokenizer::registerToken(const std::string &token)
-{
-	// Abort if an empty token should be registered
-	if (token.empty()) {
-		return EmptyToken;
-	}
-
-	// Search for a new slot in the tokens list
-	TokenTypeId type = EmptyToken;
-	for (size_t i = nextTokenTypeId; i < tokens.size(); i++) {
-		if (tokens[i].empty()) {
-			tokens[i] = token;
-			type = i;
-			break;
-		}
-	}
-
-	// No existing slot was found, add a new one -- make sure we do not
-	// override the special token type handles
-	if (type == EmptyToken) {
-		type = tokens.size();
-		if (type == TextToken || type == EmptyToken) {
-			throw OusiaException{"Token type ids depleted!"};
-		}
-		tokens.emplace_back(token);
-	}
-	nextTokenTypeId = type + 1;
-
-	// Try to register the token in the trie -- if this fails, remove it
-	// from the tokens list
-	if (!trie.registerToken(token, type)) {
-		tokens[type] = std::string{};
-		nextTokenTypeId = type;
-		return EmptyToken;
-	}
-	return type;
-}
-
-bool DynamicTokenizer::unregisterToken(TokenTypeId type)
-{
-	// Unregister the token from the trie, abort if an invalid type is given
-	if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
-		tokens[type] = std::string{};
-		nextTokenTypeId = type;
-		return true;
-	}
-	return false;
-}
-
-std::string DynamicTokenizer::getTokenString(TokenTypeId type)
-{
-	if (type < tokens.size()) {
-		return tokens[type];
-	}
-	return std::string{};
-}
-
-void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode)
-{
-	whitespaceMode = mode;
-}
-
-WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; }
-
-/* Explicitly instantiate all possible instantiations of the "next" member
-   function */
-template bool DynamicTokenizer::next<PreservingWhitespaceHandler, false>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, false>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, false>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<PreservingWhitespaceHandler, true>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, true>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, true>(
-    CharReader &reader, DynamicToken &token);
-}
-
diff --git a/src/formats/osdm/DynamicTokenizer.hpp b/src/formats/osdm/DynamicTokenizer.hpp
deleted file mode 100644
index 3e5aeb3..0000000
--- a/src/formats/osdm/DynamicTokenizer.hpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file DynamicTokenizer.hpp
- *
- * Tokenizer that can be reconfigured at runtime used for parsing the plain
- * text format.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_
-#define _OUSIA_DYNAMIC_TOKENIZER_HPP_
-
-#include <set>
-#include <string>
-#include <vector>
-
-#include <core/common/Location.hpp>
-#include <core/common/Whitespace.hpp>
-
-#include "TokenTrie.hpp"
-
-namespace ousia {
-
-// Forward declarations
-class CharReader;
-
-/**
- * The DynamicToken structure describes a token discovered by the Tokenizer.
- */
-struct DynamicToken {
-	/**
-	 * Id of the type of this token.
-	 */
-	TokenTypeId type;
-
-	/**
-	 * String that was matched.
-	 */
-	std::string content;
-
-	/**
-	 * Location from which the string was extracted.
-	 */
-	SourceLocation location;
-
-	/**
-	 * Default constructor.
-	 */
-	DynamicToken() : type(EmptyToken) {}
-
-	/**
-	 * Constructor of the DynamicToken struct.
-	 *
-	 * @param id represents the token type.
-	 * @param content is the string content that has been extracted.
-	 * @param location is the location of the extracted string content in the
-	 * source file.
-	 */
-	DynamicToken(TokenTypeId type, const std::string &content,
-	             SourceLocation location)
-	    : type(type), content(content), location(location)
-	{
-	}
-
-	/**
-	 * Constructor of the DynamicToken struct, only initializes the token type
-	 *
-	 * @param type is the id corresponding to the type of the token.
-	 */
-	DynamicToken(TokenTypeId type) : type(type) {}
-
-	/**
-	 * The getLocation function allows the tokens to be directly passed as
-	 * parameter to Logger or LoggableException instances.
-	 *
-	 * @return a reference at the location field
-	 */
-	const SourceLocation &getLocation() const { return location; }
-};
-
-/**
- * The DynamicTokenizer is used to extract tokens and chunks of text from a
- * CharReader. It allows to register and unregister tokens while parsing and
- * to modify the handling of whitespace characters. Note that the
- * DynamicTokenizer always tries to extract the longest possible token from the
- * tokenizer.
- */
-class DynamicTokenizer {
-private:
-	/**
-	 * Internally used token trie. This object holds all registered tokens.
-	 */
-	TokenTrie trie;
-
-	/**
-	 * Flag defining whether whitespaces should be preserved or not.
-	 */
-	WhitespaceMode whitespaceMode;
-
-	/**
-	 * Vector containing all registered token types.
-	 */
-	std::vector<std::string> tokens;
-
-	/**
-	 * Next index in the tokens list where to search for a new token id.
-	 */
-	size_t nextTokenTypeId;
-
-	/**
-	 * Templated function used internally to read the current token. The
-	 * function is templated in order to force code generation for all six
-	 * combiations of whitespace modes and reading/peeking.
-	 *
-	 * @tparam TextHandler is the type to be used for the textHandler instance.
-	 * @tparam read specifies whether the function should start from and advance
-	 * the read pointer of the char reader.
-	 * @param reader is the CharReader instance from which the data should be
-	 * read.
-	 * @param token is the token structure into which the token information
-	 * should be written.
-	 * @return false if the end of the stream has been reached, true otherwise.
-	 */
-	template <typename TextHandler, bool read>
-	bool next(CharReader &reader, DynamicToken &token);
-
-public:
-	/**
-	 * Constructor of the DynamicTokenizer class.
-	 *
-	 * @param whitespaceMode specifies how whitespace should be handled.
-	 */
-	DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
-
-	/**
-	 * Registers the given string as a token. Returns a const pointer at a
-	 * TokenDescriptor that will be used to reference the newly created token.
-	 *
-	 * @param token is the token string that should be registered.
-	 * @return a unique identifier for the registered token or EmptyToken if
-	 * an error occured.
-	 */
-	TokenTypeId registerToken(const std::string &token);
-
-	/**
-	 * Unregisters the token belonging to the given TokenTypeId.
-	 *
-	 * @param type is the token type that should be unregistered. The
-	 *TokenTypeId
-	 * must have been returned by registerToken.
-	 * @return true if the operation was successful, false otherwise (e.g.
-	 * because the given TokenDescriptor was already unregistered).
-	 */
-	bool unregisterToken(TokenTypeId type);
-
-	/**
-	 * Returns the token that was registered under the given TokenTypeId id or
-	 *an
-	 * empty string if an invalid TokenTypeId id is given.
-	 *
-	 * @param type is the TokenTypeId id for which the corresponding token
-	 *string
-	 * should be returned.
-	 * @return the registered token string or an empty string if the given type
-	 * was invalid.
-	 */
-	std::string getTokenString(TokenTypeId type);
-
-	/**
-	 * Sets the whitespace mode.
-	 *
-	 * @param whitespaceMode defines how whitespace should be treated in text
-	 * tokens.
-	 */
-	void setWhitespaceMode(WhitespaceMode mode);
-
-	/**
-	 * Returns the current value of the whitespace mode.
-	 *
-	 * @return the whitespace mode.
-	 */
-	WhitespaceMode getWhitespaceMode();
-
-	/**
-	 * Reads a new token from the CharReader and stores it in the given
-	 * DynamicToken instance.
-	 *
-	 * @param reader is the CharReader instance from which the data should be
-	 * read.
-	 * @param token is a reference at the token instance into which the Token
-	 * information should be written.
-	 * @return true if a token could be read, false if the end of the stream
-	 * has been reached.
-	 */
-	bool read(CharReader &reader, DynamicToken &token);
-
-	/**
-	 * The peek method does not advance the read position of the char reader,
-	 * but reads the next token from the current char reader peek position.
-	 *
-	 * @param reader is the CharReader instance from which the data should be
-	 * read.
-	 * @param token is a reference at the token instance into which the Token
-	 * information should be written.
-	 * @return true if a token could be read, false if the end of the stream
-	 * has been reached.
-	 */
-	bool peek(CharReader &reader, DynamicToken &token);
-};
-}
-
-#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */
-
diff --git a/src/formats/osdm/TokenTrie.cpp b/src/formats/osdm/TokenTrie.cpp
deleted file mode 100644
index 4a0430b..0000000
--- a/src/formats/osdm/TokenTrie.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "TokenTrie.hpp"
-
-namespace ousia {
-
-/* Class DynamicTokenTree::Node */
-
-TokenTrie::Node::Node() : type(EmptyToken) {}
-
-/* Class DynamicTokenTree */
-
-bool TokenTrie::registerToken(const std::string &token,
-                              TokenTypeId type) noexcept
-{
-	// Abort if the token is empty -- this would taint the root node
-	if (token.empty()) {
-		return false;
-	}
-
-	// Iterate over each character in the given string and insert them as
-	// (new) nodes
-	Node *node = &root;
-	for (size_t i = 0; i < token.size(); i++) {
-		// Insert a new node if this one does not exist
-		const char c = token[i];
-		auto it = node->children.find(c);
-		if (it == node->children.end()) {
-			it = node->children.emplace(c, std::make_shared<Node>()).first;
-		}
-		node = it->second.get();
-	}
-
-	// If the resulting node already has a type set, we're screwed.
-	if (node->type != EmptyToken) {
-		return false;
-	}
-
-	// Otherwise just set the type to the given type.
-	node->type = type;
-	return true;
-}
-
-bool TokenTrie::unregisterToken(const std::string &token) noexcept
-{
-	// We cannot remove empty tokens as we need to access the fist character
-	// upfront
-	if (token.empty()) {
-		return false;
-	}
-
-	// First pass -- search the node in the path that can be deleted
-	Node *subtreeRoot = &root;
-	char subtreeKey = token[0];
-	Node *node = &root;
-	for (size_t i = 0; i < token.size(); i++) {
-		// Go to the next node, abort if the tree ends unexpectedly
-		auto it = node->children.find(token[i]);
-		if (it == node->children.end()) {
-			return false;
-		}
-
-		// Reset the subtree handler if this node has another type
-		node = it->second.get();
-		if ((node->type != EmptyToken || node->children.size() > 1) &&
-		    (i + 1 != token.size())) {
-			subtreeRoot = node;
-			subtreeKey = token[i + 1];
-		}
-	}
-
-	// If the node type is already EmptyToken, we cannot do anything here
-	if (node->type == EmptyToken) {
-		return false;
-	}
-
-	// If the target node has children, we cannot delete the subtree. Set the
-	// type to EmptyToken instead
-	if (!node->children.empty()) {
-		node->type = EmptyToken;
-		return true;
-	}
-
-	// If we end up here, we can safely delete the complete subtree
-	subtreeRoot->children.erase(subtreeKey);
-	return true;
-}
-
-TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept
-{
-	Node const *node = &root;
-	for (size_t i = 0; i < token.size(); i++) {
-		const char c = token[i];
-		auto it = node->children.find(c);
-		if (it == node->children.end()) {
-			return EmptyToken;
-		}
-		node = it->second.get();
-	}
-	return node->type;
-}
-}
-
diff --git a/src/formats/osdm/TokenTrie.hpp b/src/formats/osdm/TokenTrie.hpp
deleted file mode 100644
index 36c2ffa..0000000
--- a/src/formats/osdm/TokenTrie.hpp
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file TokenTrie.hpp
- *
- * Class representing a token trie that can be updated dynamically.
- *
- * @author Benjamin Paaßen (astoecke@techfak.uni-bielefeld.de)
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_TOKEN_TRIE_HPP_
-#define _OUSIA_TOKEN_TRIE_HPP_
-
-#include <cstdint>
-#include <memory>
-#include <limits>
-#include <unordered_map>
-
-namespace ousia {
-
-/**
- * The TokenTypeId is used to give each token type a unique id.
- */
-using TokenTypeId = uint32_t;
-
-/**
- * Token which is not a token.
- */
-constexpr TokenTypeId EmptyToken = std::numeric_limits<TokenTypeId>::max();
-
-/**
- * Token which represents a text token.
- */
-constexpr TokenTypeId TextToken = std::numeric_limits<TokenTypeId>::max() - 1;
-
-/**
- * The Tokenizer internally uses a TokenTrie to be efficiently able to identify
- * the longest consecutive token in the text. This is equivalent to a prefix
- * trie.
- *
- * A token trie is a construct that structures all special tokens a Tokenizer
- * recognizes. Consider the tokens "aab", "a" and "bac" numbered as one, two and
- * three. Then the token tree would look like this:
- *
- * \code{*.txt}
- *        ~ (0)
- *       /     \
- *      a (2)  b (0)
- *      |      |
- *      a (0)  a (0)
- *      |      |
- *      b (1)  c (0)
- * \endcode
- *
- * Where the number indicates the corresponding token descriptor identifier.
- */
-class TokenTrie {
-public:
-	/**
-	 * Structure used to build the node tree.
-	 */
-	struct Node {
-		/**
-		 * Type used for the child map.
-		 */
-		using ChildMap = std::unordered_map<char, std::shared_ptr<Node>>;
-
-		/**
-		 * Map from single characters at the corresponding child nodes.
-		 */
-		ChildMap children;
-
-		/**
-		 * Reference at the corresponding token descriptor. Set to nullptr if
-		 * no token is attached to this node.
-		 */
-		TokenTypeId type;
-
-		/**
-		 * Default constructor, initializes the descriptor with nullptr.
-		 */
-		Node();
-	};
-
-private:
-	/**
-	 * Root node of the internal token tree.
-	 */
-	Node root;
-
-public:
-	/**
-	 * Registers a token containing the given string. Returns false if the
-	 * token already exists, true otherwise.
-	 *
-	 * @param token is the character sequence that should be registered as
-	 * token.
-	 * @param type is the descriptor that should be set for this token.
-	 * @return true if the operation is successful, false otherwise.
-	 */
-	bool registerToken(const std::string &token, TokenTypeId type) noexcept;
-
-	/**
-	 * Unregisters the token from the token tree. Returns true if the token was
-	 * unregistered successfully, false otherwise.
-	 *
-	 * @param token is the character sequence that should be unregistered.
-	 * @return true if the operation was successful, false otherwise.
-	 */
-	bool unregisterToken(const std::string &token) noexcept;
-
-	/**
-	 * Returns true, if the given token exists within the TokenTree. This
-	 * function is mostly thought for debugging and unit testing.
-	 *
-	 * @param token is the character sequence that should be searched.
-	 * @return the attached token descriptor or nullptr if the given token is
-	 * not found.
-	 */
-	TokenTypeId hasToken(const std::string &token) const noexcept;
-
-	/**
-	 * Returns a reference at the root node to be used for traversing the token
-	 * tree.
-	 *
-	 * @return a reference at the root node.
-	 */
-	const Node *getRoot() const noexcept { return &root; }
-};
-}
-
-#endif /* _OUSIA_TOKEN_TRIE_HPP_ */
-
diff --git a/test/core/parser/utils/TokenTrieTest.cpp b/test/core/parser/utils/TokenTrieTest.cpp
new file mode 100644
index 0000000..aacd6c0
--- /dev/null
+++ b/test/core/parser/utils/TokenTrieTest.cpp
@@ -0,0 +1,92 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <formats/osdm/TokenTrie.hpp>
+
+namespace ousia {
+
+static const TokenTypeId t1 = 0;
+static const TokenTypeId t2 = 1;
+static const TokenTypeId t3 = 2;
+static const TokenTypeId t4 = 3;
+
+TEST(TokenTrie, registerToken)
+{
+	TokenTrie tree;
+
+	ASSERT_TRUE(tree.registerToken("a", t1));
+	ASSERT_TRUE(tree.registerToken("ab", t2));
+	ASSERT_TRUE(tree.registerToken("b", t3));
+	ASSERT_TRUE(tree.registerToken("hello", t4));
+
+	ASSERT_FALSE(tree.registerToken("", t1));
+	ASSERT_FALSE(tree.registerToken("a", t4));
+	ASSERT_FALSE(tree.registerToken("ab", t4));
+	ASSERT_FALSE(tree.registerToken("b", t4));
+	ASSERT_FALSE(tree.registerToken("hello", t4));
+
+	ASSERT_EQ(t1, tree.hasToken("a"));
+	ASSERT_EQ(t2, tree.hasToken("ab"));
+	ASSERT_EQ(t3, tree.hasToken("b"));
+	ASSERT_EQ(t4, tree.hasToken("hello"));
+	ASSERT_EQ(EmptyToken, tree.hasToken(""));
+	ASSERT_EQ(EmptyToken, tree.hasToken("abc"));
+}
+
+TEST(TokenTrie, unregisterToken)
+{
+	TokenTrie tree;
+
+	ASSERT_TRUE(tree.registerToken("a", t1));
+	ASSERT_FALSE(tree.registerToken("a", t4));
+
+	ASSERT_TRUE(tree.registerToken("ab", t2));
+	ASSERT_FALSE(tree.registerToken("ab", t4));
+
+	ASSERT_TRUE(tree.registerToken("b", t3));
+	ASSERT_FALSE(tree.registerToken("b", t4));
+
+	ASSERT_EQ(t1, tree.hasToken("a"));
+	ASSERT_EQ(t2, tree.hasToken("ab"));
+	ASSERT_EQ(t3, tree.hasToken("b"));
+
+	ASSERT_TRUE(tree.unregisterToken("a"));
+	ASSERT_FALSE(tree.unregisterToken("a"));
+
+	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
+	ASSERT_EQ(t2, tree.hasToken("ab"));
+	ASSERT_EQ(t3, tree.hasToken("b"));
+
+	ASSERT_TRUE(tree.unregisterToken("b"));
+	ASSERT_FALSE(tree.unregisterToken("b"));
+
+	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
+	ASSERT_EQ(t2, tree.hasToken("ab"));
+	ASSERT_EQ(EmptyToken, tree.hasToken("b"));
+
+	ASSERT_TRUE(tree.unregisterToken("ab"));
+	ASSERT_FALSE(tree.unregisterToken("ab"));
+
+	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
+	ASSERT_EQ(EmptyToken, tree.hasToken("ab"));
+	ASSERT_EQ(EmptyToken, tree.hasToken("b"));
+}
+}
+
diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp
new file mode 100644
index 0000000..c1f8785
--- /dev/null
+++ b/test/core/parser/utils/TokenizerTest.cpp
@@ -0,0 +1,415 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/common/CharReader.hpp>
+#include <formats/osdm/DynamicTokenizer.hpp>
+
+namespace ousia {
+
+TEST(DynamicTokenizer, tokenRegistration)
+{
+	DynamicTokenizer tokenizer;
+
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken(""));
+
+	ASSERT_EQ(0U, tokenizer.registerToken("a"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("a"));
+	ASSERT_EQ("a", tokenizer.getTokenString(0U));
+
+	ASSERT_EQ(1U, tokenizer.registerToken("b"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("b"));
+	ASSERT_EQ("b", tokenizer.getTokenString(1U));
+
+	ASSERT_EQ(2U, tokenizer.registerToken("c"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("c"));
+	ASSERT_EQ("c", tokenizer.getTokenString(2U));
+
+	ASSERT_TRUE(tokenizer.unregisterToken(1U));
+	ASSERT_FALSE(tokenizer.unregisterToken(1U));
+	ASSERT_EQ("", tokenizer.getTokenString(1U));
+
+	ASSERT_EQ(1U, tokenizer.registerToken("d"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("d"));
+	ASSERT_EQ("d", tokenizer.getTokenString(1U));
+}
+
+TEST(DynamicTokenizer, textTokenPreserveWhitespace)
+{
+	{
+		CharReader reader{" this \t is only a  \n\n test   text   "};
+		//                 012345 6789012345678 9 0123456789012345
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(36U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+
+	{
+		CharReader reader{"this \t is only a  \n\n test   text"};
+		//                 01234 5678901234567 8 9012345678901
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(32U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+}
+
+TEST(DynamicTokenizer, textTokenTrimWhitespace)
+{
+	{
+		CharReader reader{" this \t is only a  \n\n test   text   "};
+		//                 012345 6789012345678 9 0123456789012345
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(1U, loc.getStart());
+		ASSERT_EQ(33U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+
+	{
+		CharReader reader{"this \t is only a  \n\n test   text"};
+		//                 01234 5678901234567 8 9012345678901
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(32U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+}
+
+TEST(DynamicTokenizer, textTokenCollapseWhitespace)
+{
+	{
+		CharReader reader{" this \t is only a  \n\n test   text   "};
+		//                 012345 6789012345678 9 0123456789012345
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this is only a test text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(1U, loc.getStart());
+		ASSERT_EQ(33U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+
+	{
+		CharReader reader{"this \t is only a  \n\n test   text"};
+		//                 01234 5678901234567 8 9012345678901
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this is only a test text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(32U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+}
+
+TEST(DynamicTokenizer, simpleReadToken)
+{
+	CharReader reader{"test1:test2"};
+	DynamicTokenizer tokenizer;
+
+	const TokenTypeId tid = tokenizer.registerToken(":");
+	ASSERT_EQ(0U, tid);
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test1", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(5U, loc.getEnd());
+
+		char c;
+		ASSERT_TRUE(reader.peek(c));
+		ASSERT_EQ(':', c);
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(tid, token.type);
+		ASSERT_EQ(":", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(5U, loc.getStart());
+		ASSERT_EQ(6U, loc.getEnd());
+
+		char c;
+		ASSERT_TRUE(reader.peek(c));
+		ASSERT_EQ('t', c);
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test2", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(6U, loc.getStart());
+		ASSERT_EQ(11U, loc.getEnd());
+
+		char c;
+		ASSERT_FALSE(reader.peek(c));
+	}
+}
+
+TEST(DynamicTokenizer, simplePeekToken)
+{
+	CharReader reader{"test1:test2"};
+	DynamicTokenizer tokenizer;
+
+	const TokenTypeId tid = tokenizer.registerToken(":");
+	ASSERT_EQ(0U, tid);
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.peek(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test1", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(5U, loc.getEnd());
+		ASSERT_EQ(0U, reader.getOffset());
+		ASSERT_EQ(5U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.peek(reader, token));
+
+		ASSERT_EQ(tid, token.type);
+		ASSERT_EQ(":", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(5U, loc.getStart());
+		ASSERT_EQ(6U, loc.getEnd());
+		ASSERT_EQ(0U, reader.getOffset());
+		ASSERT_EQ(6U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.peek(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test2", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(6U, loc.getStart());
+		ASSERT_EQ(11U, loc.getEnd());
+		ASSERT_EQ(0U, reader.getOffset());
+		ASSERT_EQ(11U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test1", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(5U, loc.getEnd());
+		ASSERT_EQ(5U, reader.getOffset());
+		ASSERT_EQ(5U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(tid, token.type);
+		ASSERT_EQ(":", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(5U, loc.getStart());
+		ASSERT_EQ(6U, loc.getEnd());
+		ASSERT_EQ(6U, reader.getOffset());
+		ASSERT_EQ(6U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test2", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(6U, loc.getStart());
+		ASSERT_EQ(11U, loc.getEnd());
+		ASSERT_EQ(11U, reader.getOffset());
+		ASSERT_EQ(11U, reader.getPeekOffset());
+	}
+}
+
+TEST(DynamicTokenizer, ambiguousTokens)
+{
+	CharReader reader{"abc"};
+	DynamicTokenizer tokenizer;
+
+	TokenTypeId t1 = tokenizer.registerToken("abd");
+	TokenTypeId t2 = tokenizer.registerToken("bc");
+
+	ASSERT_EQ(0U, t1);
+	ASSERT_EQ(1U, t2);
+
+	DynamicToken token;
+	ASSERT_TRUE(tokenizer.read(reader, token));
+
+	ASSERT_EQ(TextToken, token.type);
+	ASSERT_EQ("a", token.content);
+
+	SourceLocation loc = token.location;
+	ASSERT_EQ(0U, loc.getStart());
+	ASSERT_EQ(1U, loc.getEnd());
+
+	ASSERT_TRUE(tokenizer.read(reader, token));
+
+	ASSERT_EQ(t2, token.type);
+	ASSERT_EQ("bc", token.content);
+
+	loc = token.location;
+	ASSERT_EQ(1U, loc.getStart());
+	ASSERT_EQ(3U, loc.getEnd());
+
+	ASSERT_FALSE(tokenizer.read(reader, token));
+}
+
+TEST(DynamicTokenizer, commentTestWhitespacePreserve)
+{
+	CharReader reader{"Test/Test /* Block Comment */", 0};
+	//                 012345678901234567890123456789
+	//                 0        1         2
+	DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE);
+
+	const TokenTypeId t1 = tokenizer.registerToken("/");
+	const TokenTypeId t2 = tokenizer.registerToken("/*");
+	const TokenTypeId t3 = tokenizer.registerToken("*/");
+
+	std::vector<DynamicToken> expected = {
+	    {TextToken, "Test", SourceLocation{0, 0, 4}},
+	    {t1, "/", SourceLocation{0, 4, 5}},
+	    {TextToken, "Test ", SourceLocation{0, 5, 10}},
+	    {t2, "/*", SourceLocation{0, 10, 12}},
+	    {TextToken, " Block Comment ", SourceLocation{0, 12, 27}},
+	    {t3, "*/", SourceLocation{0, 27, 29}}};
+
+	DynamicToken t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.read(reader, t));
+		EXPECT_EQ(te.type, t.type);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.read(reader, t));
+}
+
+TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
+{
+	CharReader reader{"Test/Test /* Block Comment */", 0};
+	//                 012345678901234567890123456789
+	//                 0        1         2
+	DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE);
+
+	const TokenTypeId t1 = tokenizer.registerToken("/");
+	const TokenTypeId t2 = tokenizer.registerToken("/*");
+	const TokenTypeId t3 = tokenizer.registerToken("*/");
+
+	std::vector<DynamicToken> expected = {
+	    {TextToken, "Test", SourceLocation{0, 0, 4}},
+	    {t1, "/", SourceLocation{0, 4, 5}},
+	    {TextToken, "Test", SourceLocation{0, 5, 9}},
+	    {t2, "/*", SourceLocation{0, 10, 12}},
+	    {TextToken, "Block Comment", SourceLocation{0, 13, 26}},
+	    {t3, "*/", SourceLocation{0, 27, 29}}};
+
+	DynamicToken t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.read(reader, t));
+		EXPECT_EQ(te.type, t.type);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.read(reader, t));
+}
+
+}
+
diff --git a/test/formats/osdm/DynamicTokenizerTest.cpp b/test/formats/osdm/DynamicTokenizerTest.cpp
deleted file mode 100644
index c1f8785..0000000
--- a/test/formats/osdm/DynamicTokenizerTest.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <core/common/CharReader.hpp>
-#include <formats/osdm/DynamicTokenizer.hpp>
-
-namespace ousia {
-
-TEST(DynamicTokenizer, tokenRegistration)
-{
-	DynamicTokenizer tokenizer;
-
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken(""));
-
-	ASSERT_EQ(0U, tokenizer.registerToken("a"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("a"));
-	ASSERT_EQ("a", tokenizer.getTokenString(0U));
-
-	ASSERT_EQ(1U, tokenizer.registerToken("b"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("b"));
-	ASSERT_EQ("b", tokenizer.getTokenString(1U));
-
-	ASSERT_EQ(2U, tokenizer.registerToken("c"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("c"));
-	ASSERT_EQ("c", tokenizer.getTokenString(2U));
-
-	ASSERT_TRUE(tokenizer.unregisterToken(1U));
-	ASSERT_FALSE(tokenizer.unregisterToken(1U));
-	ASSERT_EQ("", tokenizer.getTokenString(1U));
-
-	ASSERT_EQ(1U, tokenizer.registerToken("d"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("d"));
-	ASSERT_EQ("d", tokenizer.getTokenString(1U));
-}
-
-TEST(DynamicTokenizer, textTokenPreserveWhitespace)
-{
-	{
-		CharReader reader{" this \t is only a  \n\n test   text   "};
-		//                 012345 6789012345678 9 0123456789012345
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(36U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-
-	{
-		CharReader reader{"this \t is only a  \n\n test   text"};
-		//                 01234 5678901234567 8 9012345678901
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-}
-
-TEST(DynamicTokenizer, textTokenTrimWhitespace)
-{
-	{
-		CharReader reader{" this \t is only a  \n\n test   text   "};
-		//                 012345 6789012345678 9 0123456789012345
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(1U, loc.getStart());
-		ASSERT_EQ(33U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-
-	{
-		CharReader reader{"this \t is only a  \n\n test   text"};
-		//                 01234 5678901234567 8 9012345678901
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-}
-
-TEST(DynamicTokenizer, textTokenCollapseWhitespace)
-{
-	{
-		CharReader reader{" this \t is only a  \n\n test   text   "};
-		//                 012345 6789012345678 9 0123456789012345
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this is only a test text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(1U, loc.getStart());
-		ASSERT_EQ(33U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-
-	{
-		CharReader reader{"this \t is only a  \n\n test   text"};
-		//                 01234 5678901234567 8 9012345678901
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this is only a test text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-}
-
-TEST(DynamicTokenizer, simpleReadToken)
-{
-	CharReader reader{"test1:test2"};
-	DynamicTokenizer tokenizer;
-
-	const TokenTypeId tid = tokenizer.registerToken(":");
-	ASSERT_EQ(0U, tid);
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
-
-		char c;
-		ASSERT_TRUE(reader.peek(c));
-		ASSERT_EQ(':', c);
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(tid, token.type);
-		ASSERT_EQ(":", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(5U, loc.getStart());
-		ASSERT_EQ(6U, loc.getEnd());
-
-		char c;
-		ASSERT_TRUE(reader.peek(c));
-		ASSERT_EQ('t', c);
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
-
-		char c;
-		ASSERT_FALSE(reader.peek(c));
-	}
-}
-
-TEST(DynamicTokenizer, simplePeekToken)
-{
-	CharReader reader{"test1:test2"};
-	DynamicTokenizer tokenizer;
-
-	const TokenTypeId tid = tokenizer.registerToken(":");
-	ASSERT_EQ(0U, tid);
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
-		ASSERT_EQ(0U, reader.getOffset());
-		ASSERT_EQ(5U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(tid, token.type);
-		ASSERT_EQ(":", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(5U, loc.getStart());
-		ASSERT_EQ(6U, loc.getEnd());
-		ASSERT_EQ(0U, reader.getOffset());
-		ASSERT_EQ(6U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
-		ASSERT_EQ(0U, reader.getOffset());
-		ASSERT_EQ(11U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
-		ASSERT_EQ(5U, reader.getOffset());
-		ASSERT_EQ(5U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(tid, token.type);
-		ASSERT_EQ(":", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(5U, loc.getStart());
-		ASSERT_EQ(6U, loc.getEnd());
-		ASSERT_EQ(6U, reader.getOffset());
-		ASSERT_EQ(6U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
-		ASSERT_EQ(11U, reader.getOffset());
-		ASSERT_EQ(11U, reader.getPeekOffset());
-	}
-}
-
-TEST(DynamicTokenizer, ambiguousTokens)
-{
-	CharReader reader{"abc"};
-	DynamicTokenizer tokenizer;
-
-	TokenTypeId t1 = tokenizer.registerToken("abd");
-	TokenTypeId t2 = tokenizer.registerToken("bc");
-
-	ASSERT_EQ(0U, t1);
-	ASSERT_EQ(1U, t2);
-
-	DynamicToken token;
-	ASSERT_TRUE(tokenizer.read(reader, token));
-
-	ASSERT_EQ(TextToken, token.type);
-	ASSERT_EQ("a", token.content);
-
-	SourceLocation loc = token.location;
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(1U, loc.getEnd());
-
-	ASSERT_TRUE(tokenizer.read(reader, token));
-
-	ASSERT_EQ(t2, token.type);
-	ASSERT_EQ("bc", token.content);
-
-	loc = token.location;
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(3U, loc.getEnd());
-
-	ASSERT_FALSE(tokenizer.read(reader, token));
-}
-
-TEST(DynamicTokenizer, commentTestWhitespacePreserve)
-{
-	CharReader reader{"Test/Test /* Block Comment */", 0};
-	//                 012345678901234567890123456789
-	//                 0        1         2
-	DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE);
-
-	const TokenTypeId t1 = tokenizer.registerToken("/");
-	const TokenTypeId t2 = tokenizer.registerToken("/*");
-	const TokenTypeId t3 = tokenizer.registerToken("*/");
-
-	std::vector<DynamicToken> expected = {
-	    {TextToken, "Test", SourceLocation{0, 0, 4}},
-	    {t1, "/", SourceLocation{0, 4, 5}},
-	    {TextToken, "Test ", SourceLocation{0, 5, 10}},
-	    {t2, "/*", SourceLocation{0, 10, 12}},
-	    {TextToken, " Block Comment ", SourceLocation{0, 12, 27}},
-	    {t3, "*/", SourceLocation{0, 27, 29}}};
-
-	DynamicToken t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(reader, t));
-		EXPECT_EQ(te.type, t.type);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.read(reader, t));
-}
-
-TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
-{
-	CharReader reader{"Test/Test /* Block Comment */", 0};
-	//                 012345678901234567890123456789
-	//                 0        1         2
-	DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE);
-
-	const TokenTypeId t1 = tokenizer.registerToken("/");
-	const TokenTypeId t2 = tokenizer.registerToken("/*");
-	const TokenTypeId t3 = tokenizer.registerToken("*/");
-
-	std::vector<DynamicToken> expected = {
-	    {TextToken, "Test", SourceLocation{0, 0, 4}},
-	    {t1, "/", SourceLocation{0, 4, 5}},
-	    {TextToken, "Test", SourceLocation{0, 5, 9}},
-	    {t2, "/*", SourceLocation{0, 10, 12}},
-	    {TextToken, "Block Comment", SourceLocation{0, 13, 26}},
-	    {t3, "*/", SourceLocation{0, 27, 29}}};
-
-	DynamicToken t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(reader, t));
-		EXPECT_EQ(te.type, t.type);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.read(reader, t));
-}
-
-}
-
diff --git a/test/formats/osdm/TokenTrieTest.cpp b/test/formats/osdm/TokenTrieTest.cpp
deleted file mode 100644
index aacd6c0..0000000
--- a/test/formats/osdm/TokenTrieTest.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <formats/osdm/TokenTrie.hpp>
-
-namespace ousia {
-
-static const TokenTypeId t1 = 0;
-static const TokenTypeId t2 = 1;
-static const TokenTypeId t3 = 2;
-static const TokenTypeId t4 = 3;
-
-TEST(TokenTrie, registerToken)
-{
-	TokenTrie tree;
-
-	ASSERT_TRUE(tree.registerToken("a", t1));
-	ASSERT_TRUE(tree.registerToken("ab", t2));
-	ASSERT_TRUE(tree.registerToken("b", t3));
-	ASSERT_TRUE(tree.registerToken("hello", t4));
-
-	ASSERT_FALSE(tree.registerToken("", t1));
-	ASSERT_FALSE(tree.registerToken("a", t4));
-	ASSERT_FALSE(tree.registerToken("ab", t4));
-	ASSERT_FALSE(tree.registerToken("b", t4));
-	ASSERT_FALSE(tree.registerToken("hello", t4));
-
-	ASSERT_EQ(t1, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(t3, tree.hasToken("b"));
-	ASSERT_EQ(t4, tree.hasToken("hello"));
-	ASSERT_EQ(EmptyToken, tree.hasToken(""));
-	ASSERT_EQ(EmptyToken, tree.hasToken("abc"));
-}
-
-TEST(TokenTrie, unregisterToken)
-{
-	TokenTrie tree;
-
-	ASSERT_TRUE(tree.registerToken("a", t1));
-	ASSERT_FALSE(tree.registerToken("a", t4));
-
-	ASSERT_TRUE(tree.registerToken("ab", t2));
-	ASSERT_FALSE(tree.registerToken("ab", t4));
-
-	ASSERT_TRUE(tree.registerToken("b", t3));
-	ASSERT_FALSE(tree.registerToken("b", t4));
-
-	ASSERT_EQ(t1, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(t3, tree.hasToken("b"));
-
-	ASSERT_TRUE(tree.unregisterToken("a"));
-	ASSERT_FALSE(tree.unregisterToken("a"));
-
-	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(t3, tree.hasToken("b"));
-
-	ASSERT_TRUE(tree.unregisterToken("b"));
-	ASSERT_FALSE(tree.unregisterToken("b"));
-
-	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(EmptyToken, tree.hasToken("b"));
-
-	ASSERT_TRUE(tree.unregisterToken("ab"));
-	ASSERT_FALSE(tree.unregisterToken("ab"));
-
-	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
-	ASSERT_EQ(EmptyToken, tree.hasToken("ab"));
-	ASSERT_EQ(EmptyToken, tree.hasToken("b"));
-}
-}
-
-- 
cgit v1.2.3


From 919552bad0f3f4db20419d3d3771c724c2ab997f Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:47:25 +0100
Subject: Removed Whitespace file again

---
 src/core/common/Whitespace.cpp  | 38 --------------------------------------
 test/core/common/Whitespace.cpp | 41 -----------------------------------------
 2 files changed, 79 deletions(-)
 delete mode 100644 src/core/common/Whitespace.cpp
 delete mode 100644 test/core/common/Whitespace.cpp

(limited to 'test')

diff --git a/src/core/common/Whitespace.cpp b/src/core/common/Whitespace.cpp
deleted file mode 100644
index 4d7c01a..0000000
--- a/src/core/common/Whitespace.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "Whitespace.hpp"
-#include "WhitespaceHandler.hpp"
-
-namespace ousia {
-
-std::string Utils::trim(const std::string &s)
-{
-	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace);
-	return s.substr(bounds.first, bounds.second - bounds.first);
-}
-
-std::string Utils::collapse(const std::string &s)
-{
-	CollapsingWhitespaceHandler h;
-	appendToWhitespaceHandler(h, s, 0);
-	return h.toString();
-}
-
-}
-
diff --git a/test/core/common/Whitespace.cpp b/test/core/common/Whitespace.cpp
deleted file mode 100644
index d6df8b7..0000000
--- a/test/core/common/Whitespace.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <core/common/Whitespace.hpp>
-
-namespace ousia {
-
-TEST(Whitespace, trim)
-{
-	ASSERT_EQ("hello world", Whitespace::trim("\t hello world   \n\r\t"));
-	ASSERT_EQ("hello world", Whitespace::trim("hello world   \n\r\t"));
-	ASSERT_EQ("hello world", Whitespace::trim("   hello world"));
-	ASSERT_EQ("hello world", Whitespace::trim("hello world"));
-}
-
-TEST(Whitespace, collapse)
-{
-	ASSERT("hello world", Whitespace::collapse(" hello \n\t\r  world  \n\r\t"));
-	ASSERT("hello world", Whitespace::collapse("hello \n\t\r  world   \n\r\t"));
-	ASSERT("hello world", Whitespace::collapse("hello \n\t\r     world"));
-	ASSERT("hello world", Whitespace::collapse("hello world"));
-}
-}
-
-- 
cgit v1.2.3


From 295783320ea3855a14123f9cea163f8f5f689e07 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:50:11 +0100
Subject: Moved some of the whitespace functionality back to Utils

---
 src/core/common/Utils.cpp             | 25 ++++++++++++
 src/core/common/Utils.hpp             | 72 +++++++++++++++++++++++++++++++++++
 src/core/common/Whitespace.hpp        | 62 +-----------------------------
 src/core/common/WhitespaceHandler.hpp |  7 ++--
 test/core/common/UtilsTest.cpp        | 17 +++++++++
 5 files changed, 119 insertions(+), 64 deletions(-)

(limited to 'test')

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 4005143..3739c61 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -21,6 +21,7 @@
 #include <string>
 
 #include "Utils.hpp"
+#include "WhitespaceHandler.hpp"
 
 namespace ousia {
 
@@ -87,5 +88,29 @@ std::string Utils::extractFileExtension(const std::string &filename)
 	}
 	return std::string{};
 }
+
+std::string Utils::trim(const std::string &s)
+{
+	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace);
+	return s.substr(bounds.first, bounds.second - bounds.first);
+}
+
+std::string Utils::collapse(const std::string &s)
+{
+	CollapsingWhitespaceHandler h;
+	appendToWhitespaceHandler(h, s, 0);
+	return h.toString();
+}
+
+bool Utils::startsWith(const std::string &s, const std::string &prefix)
+{
+	return prefix.size() <= s.size() && s.substr(0, prefix.size()) == prefix;
+}
+
+bool Utils::endsWith(const std::string &s, const std::string &suffix)
+{
+	return suffix.size() <= s.size() &&
+	       s.substr(s.size() - suffix.size(), suffix.size()) == suffix;
+}
 }
 
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index af7a773..16a9136 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -99,6 +99,60 @@ public:
 	 */
 	static bool hasNonWhitepaceChar(const std::string &s);
 
+	/**
+	 * Removes whitespace at the beginning and the end of the given string.
+	 *
+	 * @param s is the string that should be trimmed.
+	 * @return a trimmed copy of s.
+	 */
+	static std::string trim(const std::string &s);
+
+	/**
+	 * Trims the given string or vector of chars by returning the start and end
+	 * index.
+	 *
+	 * @param s is the container that should be trimmed.
+	 * @param f is a function that returns true for values that should be
+	 * removed.
+	 * @return start and end index. Note that "end" points at the character
+	 * beyond the end, thus "end" minus "start"
+	 */
+	template <class T, class Filter>
+	static std::pair<size_t, size_t> trim(const T &s, Filter f)
+	{
+		size_t start = 0;
+		for (size_t i = 0; i < s.size(); i++) {
+			if (!f(s[i])) {
+				start = i;
+				break;
+			}
+		}
+
+		size_t end = 0;
+		for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) {
+			if (!f(s[i])) {
+				end = i + 1;
+				break;
+			}
+		}
+
+		if (end < start) {
+			start = 0;
+			end = 0;
+		}
+
+		return std::pair<size_t, size_t>{start, end};
+	}
+
+	/**
+	 * Collapses the whitespaces in the given string (trims the string and
+	 * replaces all whitespace characters by a single one).
+	 *
+	 * @param s is the string in which the whitespace should be collapsed.
+	 * @return a copy of s with collapsed whitespace.
+	 */
+	static std::string collapse(const std::string &s);
+
 	/**
 	 * Turns the elements of a collection into a string separated by the
 	 * given delimiter.
@@ -159,6 +213,24 @@ public:
 	 */
 	static std::string extractFileExtension(const std::string &filename);
 
+	/**
+	 * Checks whether the given string starts with the given prefix.
+	 *
+	 * @param s is the string.
+	 * @param prefix is the string which should be checked for being a prefix of
+	 * s.
+	 */
+	static bool startsWith(const std::string &s, const std::string &prefix);
+
+	/**
+	 * Checks whether the given string ends with the given suffix.
+	 *
+	 * @param s is the string.
+	 * @param suffix is the string which should be checked for being a suffix of
+	 * s.
+	 */
+	static bool endsWith(const std::string &s, const std::string &suffix);
+
 	/**
 	 * Hash functional to be used for enum classes.
 	 * See http://stackoverflow.com/a/24847480/2188211
diff --git a/src/core/common/Whitespace.hpp b/src/core/common/Whitespace.hpp
index 1e9f36a..72a2291 100644
--- a/src/core/common/Whitespace.hpp
+++ b/src/core/common/Whitespace.hpp
@@ -19,8 +19,7 @@
 /**
  * @file Whitespace.hpp
  *
- * Contains the WhitespaceMode enum used in various places, as well es functions
- * for trimming and collapsing whitespaces.
+ * Contains the WhitespaceMode enum used in various places.
  *
  * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
  */
@@ -55,65 +54,6 @@ enum class WhitespaceMode {
 	COLLAPSE
 };
 
-/**
- * Collection of functions for trimming or collapsing whitespace.
- */
-class Whitespace {
-	/**
-	 * Removes whitespace at the beginning and the end of the given string.
-	 *
-	 * @param s is the string that should be trimmed.
-	 * @return a trimmed copy of s.
-	 */
-	static std::string trim(const std::string &s);
-
-	/**
-	 * Trims the given string or vector of chars by returning the start and end
-	 * index.
-	 *
-	 * @param s is the container that should be trimmed.
-	 * @param f is a function that returns true for values that should be
-	 * removed.
-	 * @return start and end index. Note that "end" points at the character
-	 * beyond the end, thus "end" minus "start"
-	 */
-	template <class T, class Filter>
-	static std::pair<size_t, size_t> trim(const T &s, Filter f)
-	{
-		size_t start = 0;
-		for (size_t i = 0; i < s.size(); i++) {
-			if (!f(s[i])) {
-				start = i;
-				break;
-			}
-		}
-
-		size_t end = 0;
-		for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) {
-			if (!f(s[i])) {
-				end = i + 1;
-				break;
-			}
-		}
-
-		if (end < start) {
-			start = 0;
-			end = 0;
-		}
-
-		return std::pair<size_t, size_t>{start, end};
-	}
-
-	/**
-	 * Collapses the whitespaces in the given string (trims the string and
-	 * replaces all whitespace characters by a single one).
-	 *
-	 * @param s is the string in which the whitespace should be collapsed.
-	 * @return a copy of s with collapsed whitespace.
-	 */
-	static std::string collapse(const std::string &s);
-};
-
 }
 
 #endif /* _OUSIA_WHITESPACE_HPP_ */
diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp
index 1935c24..79e0518 100644
--- a/src/core/common/WhitespaceHandler.hpp
+++ b/src/core/common/WhitespaceHandler.hpp
@@ -32,7 +32,7 @@
 #include <string>
 #include <vector>
 
-#include "WhitespaceHandler.hpp"
+#include "Utils.hpp"
 
 namespace ousia {
 
@@ -76,7 +76,7 @@ public:
 	/**
 	 * Returns the content of the WhitespaceHandler as string.
 	 */
-	std::string toString()
+	std::string toString() const
 	{
 		return std::string(textBuf.data(), textBuf.size());
 	}
@@ -214,7 +214,8 @@ inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf,
                                       size_t start)
 {
 	for (auto elem : buf) {
-		handler.append(elem, start++);
+		handler.append(elem, start, start + 1);
+		start++;
 	}
 }
 }
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 6b8a916..a4bf4b2 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -65,5 +65,22 @@ TEST(Utils, extractFileExtension)
 	ASSERT_EQ("ext", Utils::extractFileExtension("foo.bar/test.EXT"));
 }
 
+TEST(Utils, startsWith)
+{
+	ASSERT_TRUE(Utils::startsWith("foobar", "foo"));
+	ASSERT_TRUE(Utils::startsWith("foo", "foo"));
+	ASSERT_FALSE(Utils::startsWith("foo", "foobar"));
+	ASSERT_FALSE(Utils::startsWith("foobar", "bar"));
+	ASSERT_TRUE(Utils::startsWith("foo", ""));
+}
+
+TEST(Utils, endsWith)
+{
+	ASSERT_FALSE(Utils::endsWith("foobar", "foo"));
+	ASSERT_TRUE(Utils::endsWith("foo", "foo"));
+	ASSERT_FALSE(Utils::endsWith("foo", "foobar"));
+	ASSERT_TRUE(Utils::endsWith("foobar", "bar"));
+	ASSERT_TRUE(Utils::endsWith("foo", ""));
+}
 }
 
-- 
cgit v1.2.3


From 98f43328e566b3a77b75808892246a295adb0eb0 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:59:43 +0100
Subject: Renamed osdm to osml and osdmx to osxml

---
 src/formats/osml/OsmlStreamParser.cpp      |  32 +-
 src/formats/osml/OsmlStreamParser.hpp      |  22 +-
 src/formats/osxml/OsxmlEventParser.cpp     | 524 ++++++++++++++++
 src/formats/osxml/OsxmlEventParser.hpp     | 205 ++++++
 src/formats/osxml/OsxmlParser.cpp          | 337 ----------
 test/formats/osdm/OsdmStreamParserTest.cpp | 973 -----------------------------
 test/formats/osdmx/OsdmxParserTest.cpp     | 314 ----------
 7 files changed, 756 insertions(+), 1651 deletions(-)
 create mode 100644 src/formats/osxml/OsxmlEventParser.cpp
 create mode 100644 src/formats/osxml/OsxmlEventParser.hpp
 delete mode 100644 test/formats/osdm/OsdmStreamParserTest.cpp
 delete mode 100644 test/formats/osdmx/OsdmxParserTest.cpp

(limited to 'test')

diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 6a55f12..6b00eef 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -21,7 +21,7 @@
 #include <core/common/Utils.hpp>
 #include <core/common/VariantReader.hpp>
 
-#include "OsdmStreamParser.hpp"
+#include "OsmlStreamParser.hpp"
 
 namespace ousia {
 
@@ -160,14 +160,14 @@ public:
 	}
 };
 
-OsdmStreamParser::OsdmStreamParser(CharReader &reader, Logger &logger)
+OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
     : reader(reader), logger(logger), tokenizer(Tokens)
 {
 	// Place an intial command representing the complete file on the stack
 	commands.push(Command{"", Variant::mapType{}, true, true, true});
 }
 
-Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep)
+Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 {
 	bool first = true;
 	bool hasCharSiceNSSep = false;
@@ -210,7 +210,7 @@ Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 	return res;
 }
 
-OsdmStreamParser::State OsdmStreamParser::parseBeginCommand()
+OsmlStreamParser::State OsmlStreamParser::parseBeginCommand()
 {
 	// Expect a '{' after the command
 	reader.consumeWhitespace();
@@ -251,7 +251,7 @@ OsdmStreamParser::State OsdmStreamParser::parseBeginCommand()
 	return State::COMMAND;
 }
 
-static bool checkStillInField(const OsdmStreamParser::Command &cmd,
+static bool checkStillInField(const OsmlStreamParser::Command &cmd,
                               const Variant &endName, Logger &logger)
 {
 	if (cmd.inField && !cmd.inRangeField) {
@@ -264,7 +264,7 @@ static bool checkStillInField(const OsdmStreamParser::Command &cmd,
 	return false;
 }
 
-OsdmStreamParser::State OsdmStreamParser::parseEndCommand()
+OsmlStreamParser::State OsmlStreamParser::parseEndCommand()
 {
 	// Expect a '{' after the command
 	if (!reader.expect('{')) {
@@ -327,7 +327,7 @@ OsdmStreamParser::State OsdmStreamParser::parseEndCommand()
 	return cmd.inRangeField ? State::FIELD_END : State::NONE;
 }
 
-Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName)
+Variant OsmlStreamParser::parseCommandArguments(Variant commandArgName)
 {
 	// Parse the arguments using the universal VariantReader
 	Variant commandArguments;
@@ -353,7 +353,7 @@ Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName)
 	return commandArguments;
 }
 
-void OsdmStreamParser::pushCommand(Variant commandName,
+void OsmlStreamParser::pushCommand(Variant commandName,
                                    Variant commandArguments, bool hasRange)
 {
 	// Store the location on the stack
@@ -368,7 +368,7 @@ void OsdmStreamParser::pushCommand(Variant commandName,
 	                      hasRange, false, false});
 }
 
-OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start)
+OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
 {
 	// Parse the commandName as a first identifier
 	Variant commandName = parseIdentifier(start, true);
@@ -416,7 +416,7 @@ OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start)
 	return State::COMMAND;
 }
 
-void OsdmStreamParser::parseBlockComment()
+void OsmlStreamParser::parseBlockComment()
 {
 	Token token;
 	size_t depth = 1;
@@ -436,7 +436,7 @@ void OsdmStreamParser::parseBlockComment()
 	logger.error("File ended while being in a block comment", reader);
 }
 
-void OsdmStreamParser::parseLineComment()
+void OsmlStreamParser::parseLineComment()
 {
 	char c;
 	while (reader.read(c)) {
@@ -446,7 +446,7 @@ void OsdmStreamParser::parseLineComment()
 	}
 }
 
-bool OsdmStreamParser::checkIssueData(DataHandler &handler)
+bool OsmlStreamParser::checkIssueData(DataHandler &handler)
 {
 	if (!handler.isEmpty()) {
 		data = handler.toVariant(reader.getSourceId());
@@ -457,7 +457,7 @@ bool OsdmStreamParser::checkIssueData(DataHandler &handler)
 	return false;
 }
 
-bool OsdmStreamParser::checkIssueFieldStart()
+bool OsmlStreamParser::checkIssueFieldStart()
 {
 	// Fetch the current command, and check whether we're currently inside a
 	// field of this command
@@ -482,7 +482,7 @@ bool OsdmStreamParser::checkIssueFieldStart()
 	return false;
 }
 
-OsdmStreamParser::State OsdmStreamParser::parse()
+OsmlStreamParser::State OsmlStreamParser::parse()
 {
 	// Handler for incomming data
 	DataHandler handler;
@@ -627,12 +627,12 @@ OsdmStreamParser::State OsdmStreamParser::parse()
 	return State::END;
 }
 
-const Variant &OsdmStreamParser::getCommandName()
+const Variant &OsmlStreamParser::getCommandName()
 {
 	return commands.top().name;
 }
 
-const Variant &OsdmStreamParser::getCommandArguments()
+const Variant &OsmlStreamParser::getCommandArguments()
 {
 	return commands.top().arguments;
 }
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index 84674c0..1508012 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -17,17 +17,17 @@
 */
 
 /**
- * @file OsdmStreamParser.hpp
+ * @file OsmlStreamParser.hpp
  *
- * Provides classes for low-level classes for reading the TeX-esque osdm
+ * Provides classes for low-level classes for reading the TeX-esque osml
  * format. The class provided here does not build any model objects and does not
  * implement the Parser interface.
  *
  * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
  */
 
-#ifndef _OUSIA_OSDM_STREAM_PARSER_HPP_
-#define _OUSIA_OSDM_STREAM_PARSER_HPP_
+#ifndef _OUSIA_OSML_STREAM_PARSER_HPP_
+#define _OUSIA_OSML_STREAM_PARSER_HPP_
 
 #include <stack>
 
@@ -42,7 +42,7 @@ class Logger;
 class DataHandler;
 
 /**
- * The OsdmStreamParser class provides a low-level reader for the TeX-esque osdm
+ * The OsmlStreamParser class provides a low-level reader for the TeX-esque osml
  * format. The parser is constructed around a "parse" function, which reads data
  * from the underlying CharReader until a new state is reached and indicates
  * this state in a return value. The calling code then has to pull corresponding
@@ -52,10 +52,10 @@ class DataHandler;
  * fields, as this would lead to too many consecutive errors) a
  * LoggableException is thrown.
  */
-class OsdmStreamParser {
+class OsmlStreamParser {
 public:
 	/**
-	 * Enum used to indicate which state the OsdmStreamParser class is in
+	 * Enum used to indicate which state the OsmlStreamParser class is in
 	 * after calling the "parse" function.
 	 */
 	enum class State {
@@ -291,14 +291,14 @@ private:
 
 public:
 	/**
-	 * Constructor of the OsdmStreamParser class. Attaches the new
-	 * OsdmStreamParser to the given CharReader and Logger instances.
+	 * Constructor of the OsmlStreamParser class. Attaches the new
+	 * OsmlStreamParser to the given CharReader and Logger instances.
 	 *
 	 * @param reader is the reader instance from which incomming characters
 	 * should be read.
 	 * @param logger is the logger instance to which errors should be written.
 	 */
-	OsdmStreamParser(CharReader &reader, Logger &logger);
+	OsmlStreamParser(CharReader &reader, Logger &logger);
 
 	/**
 	 * Continues parsing. Returns one of the states defined in the State enum.
@@ -346,5 +346,5 @@ public:
 };
 }
 
-#endif /* _OUSIA_OSDM_STREAM_PARSER_HPP_ */
+#endif /* _OUSIA_OSML_STREAM_PARSER_HPP_ */
 
diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
new file mode 100644
index 0000000..2ef170e
--- /dev/null
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -0,0 +1,524 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <expat.h>
+
+#include <core/common/Logger.hpp>
+#include <core/common/Variant.hpp>
+#include <core/common/Utils.hpp>
+
+#include "OsxmlEventParser.hpp"
+
+namespace ousia {
+
+/**
+ * Class containing data used by the internal functions.
+ */
+class OsxmlEventParserData {
+public:
+	/**
+	 * Contains the current depth of the parsing process.
+	 */
+	ssize_t depth;
+
+	/**
+	 * Set to a value larger or equal to zero if the parser is currently inside
+	 * an annotation end tag -- the value represents the depth in which the
+	 * tag was opened.
+	 */
+	ssize_t annotationEndTagDepth;
+
+	/**
+	 * Default constructor.
+	 */
+	OsxmlEventParserData() : depth(0), annotationEndTagDepth(-1) {}
+
+	/**
+	 * Increments the depth.
+	 */
+	void incrDepth() { depth++; }
+
+	/**
+	 * Decrement the depth and reset the annotationEndTagDepth flag.
+	 */
+	void decrDepth()
+	{
+		if (depth > 0) {
+			depth--;
+		}
+		if (depth < annotationEndTagDepth) {
+			annotationEndTagDepth = -1;
+		}
+	}
+
+	/**
+	 * Returns true if we're currently inside an end tag.
+	 */
+	bool inAnnotationEndTag() { depth >= annotationEndTagDepth; }
+};
+
+namespace {
+/**
+ * Wrapper class around the XML_Parser pointer which safely frees it whenever
+ * the scope is left (e.g. because an exception was thrown).
+ */
+class ScopedExpatXmlParser {
+private:
+	/**
+	 * Internal pointer to the XML_Parser instance.
+	 */
+	XML_Parser parser;
+
+public:
+	/**
+	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS
+	 * from the expat library. Throws a parser exception if the XML parser
+	 * cannot be initialized.
+	 *
+	 * @param encoding is the protocol-defined encoding passed to expat (or
+	 * nullptr if expat should determine the encoding by itself).
+	 */
+	ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
+	{
+		parser = XML_ParserCreate(encoding);
+		if (!parser) {
+			throw LoggableException{
+			    "Internal error: Could not create expat XML parser!"};
+		}
+	}
+
+	/**
+	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance.
+	 */
+	~ScopedExpatXmlParser()
+	{
+		if (parser) {
+			XML_ParserFree(parser);
+			parser = nullptr;
+		}
+	}
+
+	/**
+	 * Returns the XML_Parser pointer.
+	 */
+	XML_Parser operator&() { return parser; }
+};
+
+/**
+ * Enum used internally in the statemachine of the micro-xml argument parser.
+ */
+enum class XmlAttributeState {
+	IN_TAG_NAME,
+	SEARCH_ATTR,
+	IN_ATTR_NAME,
+	HAS_ATTR_NAME,
+	HAS_ATTR_EQUALS,
+	IN_ATTR_DATA
+};
+
+/**
+ * Function used to reconstruct the location of the attributes of a XML tag in
+ * the source code. This is necessary, as the xml parser only returns an offset
+ * to the begining of a tag and not to the position of the individual arguments.
+ *
+ * @param reader is the char reader from which the character data should be
+ * read.
+ * @param offs is a byte offset in the xml file pointing at the "<" character of
+ * the tag.
+ * @return a map from attribute keys to the corresponding location (including
+ * range) of the atribute. Also contains the location of the tagname in the
+ * form of the virtual attribute "$tag".
+ */
+static std::map<std::string, SourceLocation> xmlReconstructAttributeOffsets(
+    CharReader &reader, size_t offs)
+{
+	std::map<std::string, SourceLocation> res;
+
+	// Fork the reader, we don't want to mess up the XML parsing process, do we?
+	CharReaderFork readerFork = reader.fork();
+
+	// Move the read cursor to the start location, abort if this does not work
+	if (!location.isValid() || offs != readerFork.seek(offs)) {
+		return res;
+	}
+
+	// Now all we need to do is to implement one half of an XML parser. As this
+	// is inherently complicated we'll totaly fail at it. Don't care. All we
+	// want to get is those darn offsets for pretty error messages... (and we
+	// can assume the XML is valid as it was already read by expat)
+	XmlAttributeState state = XmlAttributeState::IN_TAG_NAME;
+	char c;
+	std::stringstream attrName;
+	while (readerFork.read(c)) {
+		// Abort at the end of the tag
+		if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) {
+			return res;
+		}
+
+		// One state machine to rule them all, one state machine to find them,
+		// One state machine to bring them all and in the darkness bind them
+		// (the byte offsets)
+		switch (state) {
+			case XmlAttributeState::IN_TAG_NAME:
+				if (Utils::isWhitespace(c)) {
+					res.emplace("$tag",
+					            SourceLocation{reader.getSourceId(), offs + 1,
+					                           readerFork.getOffset() - 1});
+					state = XmlAttributeState::SEARCH_ATTR;
+				}
+				break;
+			case XmlAttributeState::SEARCH_ATTR:
+				if (!Utils::isWhitespace(c)) {
+					state = XmlAttributeState::IN_ATTR_NAME;
+					attrName << c;
+				}
+				break;
+			case XmlAttributeState::IN_ATTR_NAME:
+				if (Utils::isWhitespace(c)) {
+					state = XmlAttributeState::HAS_ATTR_NAME;
+				} else if (c == '=') {
+					state = XmlAttributeState::HAS_ATTR_EQUALS;
+				} else {
+					attrName << c;
+				}
+				break;
+			case XmlAttributeState::HAS_ATTR_NAME:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '=') {
+						state = XmlAttributeState::HAS_ATTR_EQUALS;
+						break;
+					}
+					// Well, this is a strange XML file... We expected to
+					// see a '=' here! Try to continue with the
+					// "HAS_ATTR_EQUALS" state as this state will hopefully
+					// inlcude some error recovery
+				} else {
+					// Skip whitespace here
+					break;
+				}
+			// Fallthrough
+			case XmlAttributeState::HAS_ATTR_EQUALS:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '"') {
+						// Here we are! We have found the beginning of an
+						// attribute. Let's quickly lock the current offset away
+						// in the result map
+						res.emplace(attrName.str(),
+						            SourceLocation{reader.getSourceId(),
+						                           readerFork.getOffset()});
+						state = XmlAttributeState::IN_ATTR_DATA;
+					} else {
+						// No, this XML file is not well formed. Assume we're in
+						// an attribute name once again
+						attrName.str(std::string{&c, 1});
+						state = XmlAttributeState::IN_ATTR_NAME;
+					}
+				}
+				break;
+			case XmlAttributeState::IN_ATTR_DATA:
+				if (c == '"') {
+					// We're at the end of the attribute data, set the end
+					// location
+					auto it = res.find(attrName.str());
+					if (it != res.end()) {
+						it->second.setEnd(readerFork.getOffset() - 1);
+					}
+
+					// Reset the attribute name and restart the search
+					attrName.str(std::string{});
+					state = XmlAttributeState::SEARCH_ATTR;
+				}
+				break;
+		}
+	}
+	return res;
+}
+
+/**
+ * Synchronizes the position of the xml parser with the default location of the
+ * logger instance.
+ *
+ * @param p is a pointer at the xml parser instance.
+ * @param len is the length of the string that should be refered to.
+ * @return the SourceLocation that has been set in the logger.
+ */
+static SourceLocation xmlSyncLoggerPosition(XML_Parser p, size_t len = 0)
+{
+	// Fetch the OsxmlEventParser instance
+	OsxmlEventParser *parser =
+	    static_cast<OsxmlEventParser *>(XML_GetUserData(p));
+
+	// Fetch the current location in the XML file and set the default location
+	// in the logger
+	size_t offs = XML_GetCurrentByteIndex(p);
+	SourceLocation loc =
+	    SourceLocation{parser->getReader().getSourceId(), offs, offs + len};
+	parser->getLogger().setDefaultLocation(location);
+
+	// Return the fetched location
+	return loc;
+}
+
+/**
+ * Prefix used to indicate the start of an annoation,
+ */
+static const std::string ANNOTATION_START_PREFIX{"a:start:"};
+
+/**
+ * Prefix used to indicate the end of an annotation.
+ */
+static const std::string ANNOTATION_END_PREFIX{"a:end"};
+
+/**
+ * Callback called by eXpat whenever a start handler is reached.
+ */
+static void xmlStartElementHandler(void *ref, const XML_Char *name,
+                                   const XML_Char **attrs)
+{
+	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
+	XML_Parser p = static_cast<XML_Parser>(ref);
+	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
+
+	// Read the argument locations -- this is only a stupid and slow hack,
+	// but it is necessary, as expat doesn't give use the byte offset of the
+	// arguments.
+	std::map<std::string, SourceLocation> attributeOffsets =
+	    xmlReconstructXMLAttributeOffsets(*userData->reader,
+	                                      XML_GetCurrentByteIndex(p));
+
+	// Update the logger position
+	SourceLocation loc = xmlSyncLoggerPosition(p);
+
+	// Fetch the location of the name
+	SourceLocation nameLoc = loc;
+	auto it = attributeOffsets.find("$tag");
+	if (it != attributeOffsets.end()) {
+		nameLoc = it->second;
+	}
+	// Increment the current depth
+	parser->getData().incrDepth();
+
+	// Make sure we're currently not inside an annotation end tag -- this would
+	// be highly illegal!
+	if (parser->getData().inAnnotationEndTag()) {
+		logger.error("No tags allowed inside an annotation end tag", nameLoc);
+		return;
+	}
+
+	// Assemble the arguments
+	Variant::mapType args;
+	const XML_Char **attr = attrs;
+	while (*attr) {
+		// Convert the C string to a std::string
+		const std::string key{*(attr++)};
+
+		// Search the location of the key
+		SourceLocation keyLoc;
+		auto it = attributeOffsets.find(key);
+		if (it != attributeOffsets.end()) {
+			keyLoc = it->second;
+		}
+
+		// Parse the string, pass the location of the key
+		std::pair<bool, Variant> value = VariantReader::parseGenericString(
+		    *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(),
+		    keyLoc.getStart());
+
+		// Set the overall location of the parsed element to the attribute
+		// location
+		value.second->setLocation(keyLoc);
+
+		// Store the
+		if (!args.emplace(key, value.second).second) {
+			parser->getLogger().warning(
+			    std::string("Attribute \"") + key +
+			        "\" defined multiple times, only using first definition",
+			    keyLoc);
+		}
+	}
+
+	// Fetch the name of the tag, check for special tags
+	std::string nameStr(name);
+	if (nameStr == "ousia" && parser->getData().depth == 1) {
+		// We're in the top-level and the magic "ousia" tag is reached -- just
+		// ignore it and issue a warning for each argument that has been given
+		for (const auto &arg : args) {
+			parser->getLogger().warning(
+			    std::string("Ignoring attribute \"") + arg.first +
+			        std::string("\" for magic tag \"ousia\""),
+			    arg.second);
+		}
+	} else if (Utils::startsWith(nameStr, ANNOTATION_START_PREFIX)) {
+		// Assemble a name variant containing the name minus the prefix
+		Variant nameVar = nameStr.substr(ANNOTATION_START_PREFIX.size());
+		nameVar.setLocation(nameLoc);
+
+		// Issue the "annotationStart" event
+		parser->getEvents().annotationStart(nameVar, args);
+	} else if (Utils::startsWith(nameStr, ANNOTATION_END_PREFIX)) {
+		// Assemble a name variant containing the name minus the prefix
+		nameStr = nameStr.substr(ANNOTATION_END_PREFIX.size());
+
+		// Discard a potentially leading colon
+		if (!nameStr.empty() && nameStr[0] == ':') {
+			nameStr = nameStr.substr(1);
+		}
+
+		// Assemble the variant containing the name and its location
+		Variant nameVar = Variant::fromString(nameStr);
+		nameVar.setLocation(nameLoc);
+
+		// Check whether a "name" attribute was given
+		Variant elementName;
+		for (const auto &arg : args) {
+			if (arg.first == "name") {
+				elementName = arg.second;
+			} else {
+				parser->getLogger().warning(
+				    std::string("Ignoring attribute \"") + arg.first +
+				        "\" in annotation end tag",
+				    arg.second);
+			}
+		}
+
+		// Set the annotationEndTagDepth to disallow any further tags to be
+		// opened inside the annotation end tag.
+		parser->getData().annotationEndTagDepth = parser->getData().depth;
+
+		// Issue the "annotationEnd" event
+		parser->getEvents().annotationEnd(nameVar, args);
+	} else {
+		// Just issue a "commandStart" event in any other case
+		Variant nameVar = Variant::fromString(nameStr);
+		nameVar.setLocation(nameLoc);
+		parser->getEvents().commandStart(nameVar, args);
+	}
+}
+
+static void xmlEndElementHandler(void *p, const XML_Char *name)
+{
+	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
+	XML_Parser p = static_cast<XML_Parser>(ref);
+	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
+
+	// Synchronize the position of the logger with teh position
+	xmlSyncLoggerPosition(parser);
+
+	// Decrement the current depth
+	parser->getData().decrDepth();
+
+	// Abort as long as we're in an annotation end tag
+	if (parser->getData().inAnnotationEndTag()) {
+		return;
+	}
+
+	// Abort if the special ousia tag ends here
+	if (nameStr == "ousia" && parser->getData().depth == 0) {
+		return;
+	}
+
+	// Issue the "fieldEnd" event
+	parser->getEvents().fieldEnd();
+}
+
+static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len)
+{
+	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
+	XML_Parser p = static_cast<XML_Parser>(ref);
+	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
+
+	// TODO
+/*	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
+	syncLoggerPosition(parser, ulen);
+	const std::string data = Utils::trim(std::string{s, ulen});
+	if (!data.empty()) {
+		stack->data(data);
+	}*/
+}
+}
+
+/* Class OsxmlEventParser */
+
+OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events,
+                                   Logger &logger)
+    : reader(reader),
+      events(events),
+      logger(logger),
+      whitespaceMode(WhitespaceMode::COLLAPSE),
+      data(new OsxmlEventParserData())
+{
+}
+
+void OsxmlEventParser::parse(CharReader &reader)
+{
+	// Create the parser object
+	ScopedExpatXmlParser p{"UTF-8"};
+
+	// Reset the depth
+	depth = 0;
+
+	// Pass the reference to the ParserStack to the XML handler
+	XMLUserData data(&stack, &reader);
+	XML_SetUserData(&p, this);
+	XML_UseParserAsHandlerArg(&p);
+
+	// Set the callback functions
+	XML_SetStartElementHandler(&p, xmlStartElementHandler);
+	XML_SetEndElementHandler(&p, xmlEndElementHandler);
+	XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler);
+
+	// Feed data into expat while there is data to process
+	constexpr size_t BUFFER_SIZE = 64 * 1024;
+	while (true) {
+		// Fetch a buffer from expat for the input data
+		char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE));
+		if (!buf) {
+			throw OusiaException{"Internal error: XML parser out of memory!"};
+		}
+
+		// Read into the buffer
+		size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE);
+
+		// Parse the data and handle any XML error as exception
+		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) {
+			throw LoggableException{
+			    "XML: " + std::string{XML_ErrorString(XML_GetErrorCode(&p))},
+			    xmlSyncLoggerPosition(p)};
+		}
+
+		// Abort once there are no more bytes in the stream
+		if (bytesRead == 0) {
+			break;
+		}
+	}
+}
+
+void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode)
+{
+	this->whitespaceMode = whitespaceMode;
+}
+
+CharReader &OsxmlEventParser::getCharReader() { return charReader; }
+
+Logger &OsxmlEventParser::getLogger() { return logger; }
+
+OsxmlEvents &OsxmlEventParser::getEvents() { return events; }
+
+OsxmlEventParserData &OsxmlEventParser::getData() { return *data; }
+}
+
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
new file mode 100644
index 0000000..5319ca6
--- /dev/null
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -0,0 +1,205 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file OsxmlEventParser.hpp
+ *
+ * The OsxmlEventParser class is responsible for parsing an XML file and calling
+ * the corresponding event handler functions if an XML item is found. Event
+ * handling is performed using a listener interface.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OSXML_EVENT_PARSER_HPP_
+#define _OSXML_EVENT_PARSER_HPP_
+
+#include <memory>
+#include <string>
+
+#include <core/common/Whitespace.hpp>
+
+namespace ousia {
+
+// Forward declarations
+class Logger;
+class Variant;
+class OsxmlEventParserData;
+
+/**
+ * Interface which defines the callback functions which are called by the 
+ * OsxmlEventParser whenever an event occurs.
+ */
+class OsxmlEvents {
+public:
+	/**
+	 * Virtual destructor.
+	 */
+	virtual ~OsxmlEvents() {}
+
+	/**
+	 * Called whenever a command starts. Note that this implicitly always starts
+	 * the default field of the command.
+	 *
+	 * @param name is a string variant containing name and location of the 
+	 * command.
+	 * @param args is a map variant containing the arguments that were given
+	 * to the command.
+	 */
+	virtual void commandStart(Variant name, Variant args) = 0;
+
+	/**
+	 * Called whenever an annotation starts. Note that this implicitly always
+	 * starts the default field of the annotation.
+	 *
+	 * @param name is a string variant containing the name of the annotation 
+	 * class and the location of the annotation definition.
+	 * @param args is a map variant containing the arguments that were given
+	 * to the annotation definition.
+	 */
+	virtual void annotationStart(Variant name, Variant args);
+
+	/**
+	 * Called whenever the range of an annotation ends. The callee must
+	 * disambiguate the actual annotation that is finished here.
+	 *
+	 * @param name is a string variant containing the name of the annotation
+	 * class that should end here. May be empty (or nullptr), if no elementName
+	 * has been specified at the end of the annotation.
+	 * @param elementName is the name of the annotation element that should be
+	 * ended here. May be empty (or nullptr), if no elementName has been
+	 * specified at the end of the annotation.
+	 */
+	virtual void annotationEnd(Variant name, Variant elementName);
+
+	/**
+	 * Called whenever the default field which was implicitly started by 
+	 * commandStart or annotationStart ends. Note that this does not end the
+	 * range of an annotation, but the default field of the annotation. To 
+	 * signal the end of the annotation this, the annotationEnd method will be
+	 * invoked.
+	 */
+	virtual void fieldEnd() = 0;
+
+	/**
+	 * Called whenever data is found. Whitespace data is handled as specified
+	 * and the data has been parsed to the specified variant type. This function
+	 * is not called if the parsing failed, the parser prints an error message
+	 * instead.
+	 *
+	 * @param data is the already parsed data that should be passed to the 
+	 * handler.
+	 */
+	virtual void data(Variant data) = 0;
+
+};
+
+/**
+ * The OsxmlEventParser class is a wrapper around eXpat which implements the
+ * specialities of the osxml formats class (like annotation ranges). It notifies
+ * a specified event handler whenever a command, annotation or data has been
+ * reached.
+ */
+class OsxmlEventParser {
+private:
+	/**
+	 * Reference at the internal CharReader instance.
+	 */
+	CharReader &reader;
+
+	/**
+	 * Set of callback functions to be called whenever an event is triggered.
+	 */
+	OsxmlEvents &events;
+
+	/**
+	 * Reference at the Logger object to which error messages or warnings should
+	 * be logged.
+	 */
+	Logger &logger;
+
+	/**
+	 * Current whitespace mode.
+	 */
+	WhitespaceMode whitespaceMode;
+
+	/**
+	 * Data to be used by the internal functions.
+	 */
+	std::unique_ptr<OsxmlEventParserData> data;
+
+public:
+	/**
+	 * Constructor fo the OsxmlEventParser. Takes a reference at the OsxmlEvents
+	 * of which the callback functions are called.
+	 *
+	 * @param reader is a reference to the CharReader instance from which the 
+	 * XML should be read.
+	 * @param events is a refence at an instance of the OsxmlEvents class. All
+	 * events are forwarded to this class.
+	 * @param logger is the Logger instance to which log messages should be
+	 * written.
+	 */
+	OsxmlEventParser(CharReader &reader, OsxmlEvents &events, Logger &logger);
+
+	/**
+	 * Performs the actual parsing. Reads the XML using eXpat and calles the
+	 * callbacks in the event listener instance whenever something interesting
+	 * happens.
+	 */
+	void parse();
+
+	/**
+	 * Sets the whitespace handling mode.
+	 *
+	 * @param whitespaceMode defines how whitespace in the data should be 
+	 * handled.
+	 */
+	void setWhitespaceMode(WhitespaceMode whitespaceMode);
+
+	/**
+	 * Returns the internal CharReader reference.
+	 *
+	 * @return the CharReader reference.
+	 */
+	CharReader &getCharReader();
+
+	/**
+	 * Returns the internal Logger reference.
+	 *
+	 * @return the internal Logger reference.
+	 */
+	Logger &getLogger();
+
+	/**
+	 * Returns the internal OsxmlEvents reference.
+	 *
+	 * @return the internal OsxmlEvents reference.
+	 */
+	OsxmlEvents &getEvents();
+
+	/**
+	 * Returns a reference at the internal data.
+	 */
+	OsxmlEventParserData &getData();
+};
+
+}
+
+#endif /* _OSXML_EVENT_PARSER_HPP_ */
+
diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp
index c46d9de..4f6503c 100644
--- a/src/formats/osxml/OsxmlParser.cpp
+++ b/src/formats/osxml/OsxmlParser.cpp
@@ -1093,343 +1093,6 @@ static const std::multimap<std::string, const ParserState *> XmlStates{
     {"include", &Include}};
 }
 
-/**
- * Structue containing the private data that is being passed to the
- * XML-Handlers.
- */
-struct XMLUserData {
-	/**
-	 * Containing the depth of the current XML file
-	 */
-	size_t depth;
-
-	/**
-	 * Reference at the ParserStack instance.
-	 */
-	ParserStack *stack;
-
-	/**
-	 * Reference at the CharReader instance.
-	 */
-	CharReader *reader;
-
-	/**
-	 * Constructor of the XMLUserData struct.
-	 *
-	 * @param stack is a pointer at the ParserStack instance.
-	 * @param reader is a pointer at the CharReader instance.
-	 */
-	XMLUserData(ParserStack *stack, CharReader *reader)
-	    : depth(0), stack(stack), reader(reader)
-	{
-	}
-};
-
-/**
- * Wrapper class around the XML_Parser pointer which safely frees it whenever
- * the scope is left (e.g. because an exception was thrown).
- */
-class ScopedExpatXmlParser {
-private:
-	/**
-	 * Internal pointer to the XML_Parser instance.
-	 */
-	XML_Parser parser;
-
-public:
-	/**
-	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS
-	 * from the expat library. Throws a parser exception if the XML parser
-	 * cannot be initialized.
-	 *
-	 * @param encoding is the protocol-defined encoding passed to expat (or
-	 * nullptr if expat should determine the encoding by itself).
-	 */
-	ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
-	{
-		parser = XML_ParserCreate(encoding);
-		if (!parser) {
-			throw LoggableException{
-			    "Internal error: Could not create expat XML parser!"};
-		}
-	}
-
-	/**
-	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance.
-	 */
-	~ScopedExpatXmlParser()
-	{
-		if (parser) {
-			XML_ParserFree(parser);
-			parser = nullptr;
-		}
-	}
-
-	/**
-	 * Returns the XML_Parser pointer.
-	 */
-	XML_Parser operator&() { return parser; }
-};
-
-/* Adapter Expat -> ParserStack */
-
-static SourceLocation syncLoggerPosition(XML_Parser p, size_t len = 0)
-{
-	// Fetch the parser stack and the associated user data
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	// Fetch the current location in the XML file
-	size_t offs = XML_GetCurrentByteIndex(p);
-
-	// Build the source location and update the default location of the
-	// current
-	// logger instance
-	SourceLocation loc{stack->getContext().getSourceId(), offs, offs + len};
-	stack->getContext().getLogger().setDefaultLocation(loc);
-	return loc;
-}
-
-enum class XMLAttributeState {
-	IN_TAG_NAME,
-	SEARCH_ATTR,
-	IN_ATTR_NAME,
-	HAS_ATTR_NAME,
-	HAS_ATTR_EQUALS,
-	IN_ATTR_DATA
-};
-
-static std::map<std::string, SourceLocation> reconstructXMLAttributeOffsets(
-    CharReader &reader, SourceLocation location)
-{
-	std::map<std::string, SourceLocation> res;
-
-	// Fork the reader, we don't want to mess up the XML parsing process, do we?
-	CharReaderFork readerFork = reader.fork();
-
-	// Move the read cursor to the start location, abort if this does not work
-	size_t offs = location.getStart();
-	if (!location.isValid() || offs != readerFork.seek(offs)) {
-		return res;
-	}
-
-	// Now all we need to do is to implement one half of an XML parser. As this
-	// is inherently complicated we'll totaly fail at it. Don't care. All we
-	// want to get is those darn offsets for pretty error messages... (and we
-	// can assume the XML is valid as it was already read by expat)
-	XMLAttributeState state = XMLAttributeState::IN_TAG_NAME;
-	char c;
-	std::stringstream attrName;
-	while (readerFork.read(c)) {
-		// Abort at the end of the tag
-		if (c == '>' && state != XMLAttributeState::IN_ATTR_DATA) {
-			return res;
-		}
-
-		// One state machine to rule them all, one state machine to find them,
-		// One state machine to bring them all and in the darkness bind them
-		// (the byte offsets)
-		switch (state) {
-			case XMLAttributeState::IN_TAG_NAME:
-				if (Utils::isWhitespace(c)) {
-					state = XMLAttributeState::SEARCH_ATTR;
-				}
-				break;
-			case XMLAttributeState::SEARCH_ATTR:
-				if (!Utils::isWhitespace(c)) {
-					state = XMLAttributeState::IN_ATTR_NAME;
-					attrName << c;
-				}
-				break;
-			case XMLAttributeState::IN_ATTR_NAME:
-				if (Utils::isWhitespace(c)) {
-					state = XMLAttributeState::HAS_ATTR_NAME;
-				} else if (c == '=') {
-					state = XMLAttributeState::HAS_ATTR_EQUALS;
-				} else {
-					attrName << c;
-				}
-				break;
-			case XMLAttributeState::HAS_ATTR_NAME:
-				if (!Utils::isWhitespace(c)) {
-					if (c == '=') {
-						state = XMLAttributeState::HAS_ATTR_EQUALS;
-						break;
-					}
-					// Well, this is a strange XML file... We expected to
-					// see a '=' here! Try to continue with the
-					// "HAS_ATTR_EQUALS" state as this state will hopefully
-					// inlcude some error recovery
-				} else {
-					// Skip whitespace here
-					break;
-				}
-			// Fallthrough
-			case XMLAttributeState::HAS_ATTR_EQUALS:
-				if (!Utils::isWhitespace(c)) {
-					if (c == '"') {
-						// Here we are! We have found the beginning of an
-						// attribute. Let's quickly lock the current offset away
-						// in the result map
-						res.emplace(attrName.str(),
-						            SourceLocation{reader.getSourceId(),
-						                           readerFork.getOffset()});
-						attrName.str(std::string{});
-						state = XMLAttributeState::IN_ATTR_DATA;
-					} else {
-						// No, this XML file is not well formed. Assume we're in
-						// an attribute name once again
-						attrName.str(std::string{&c, 1});
-						state = XMLAttributeState::IN_ATTR_NAME;
-					}
-				}
-				break;
-			case XMLAttributeState::IN_ATTR_DATA:
-				if (c == '"') {
-					// We're at the end of the attribute data, start anew
-					state = XMLAttributeState::SEARCH_ATTR;
-				}
-				break;
-		}
-	}
-	return res;
-}
-
-static void xmlStartElementHandler(void *p, const XML_Char *name,
-                                   const XML_Char **attrs)
-{
-	XML_Parser parser = static_cast<XML_Parser>(p);
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	SourceLocation loc = syncLoggerPosition(parser);
-
-	// Read the argument locations -- this is only a stupid and slow hack,
-	// but it is necessary, as expat doesn't give use the byte offset of the
-	// arguments.
-	std::map<std::string, SourceLocation> offs =
-	    reconstructXMLAttributeOffsets(*userData->reader, loc);
-
-	// Assemble the arguments
-	Variant::mapType args;
-
-	const XML_Char **attr = attrs;
-	while (*attr) {
-		// Convert the C string to a std::string
-		const std::string key{*(attr++)};
-
-		// Search the location of the key
-		SourceLocation keyLoc;
-		auto it = offs.find(key);
-		if (it != offs.end()) {
-			keyLoc = it->second;
-		}
-
-		// Parse the string, pass the location of the key
-		std::pair<bool, Variant> value = VariantReader::parseGenericString(
-		    *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(),
-		    keyLoc.getStart());
-		args.emplace(key, value.second);
-	}
-
-	// Call the start function
-	std::string nameStr(name);
-	if (nameStr != "ousia" || userData->depth > 0) {
-		stack->start(std::string(name), args, loc);
-	}
-
-	// Increment the current depth
-	userData->depth++;
-}
-
-static void xmlEndElementHandler(void *p, const XML_Char *name)
-{
-	XML_Parser parser = static_cast<XML_Parser>(p);
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	syncLoggerPosition(parser);
-
-	// Decrement the current depth
-	userData->depth--;
-
-	// Call the end function
-	std::string nameStr(name);
-	if (nameStr != "ousia" || userData->depth > 0) {
-		stack->end();
-	}
-}
 
-static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len)
-{
-	XML_Parser parser = static_cast<XML_Parser>(p);
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
-	syncLoggerPosition(parser, ulen);
-	const std::string data = Utils::trim(std::string{s, ulen});
-	if (!data.empty()) {
-		stack->data(data);
-	}
-}
-
-/* Class XmlParser */
-
-void XmlParser::doParse(CharReader &reader, ParserContext &ctx)
-{
-	// Create the parser object
-	ScopedExpatXmlParser p{"UTF-8"};
-
-	// Create the parser stack instance, if we're starting on a non-empty scope,
-	// try to deduce the parser state
-	ParserStack stack(ctx, ParserStates::XmlStates);
-	if (!ctx.getScope().isEmpty()) {
-		if (!stack.deduceState()) {
-			return;
-		}
-	}
-
-	// Pass the reference to the ParserStack to the XML handler
-	XMLUserData data(&stack, &reader);
-	XML_SetUserData(&p, &data);
-	XML_UseParserAsHandlerArg(&p);
-
-	// Set the callback functions
-	XML_SetStartElementHandler(&p, xmlStartElementHandler);
-	XML_SetEndElementHandler(&p, xmlEndElementHandler);
-	XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler);
-
-	// Feed data into expat while there is data to process
-	constexpr size_t BUFFER_SIZE = 64 * 1024;
-	while (true) {
-		// Fetch a buffer from expat for the input data
-		char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE));
-		if (!buf) {
-			throw LoggableException{
-			    "Internal error: XML parser out of memory!"};
-		}
-
-		// Read into the buffer
-		size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE);
-
-		// Parse the data and handle any XML error
-		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) {
-			// Fetch the xml parser byte offset
-			size_t offs = XML_GetCurrentByteIndex(&p);
-
-			// Throw a corresponding exception
-			XML_Error code = XML_GetErrorCode(&p);
-			std::string msg = std::string{XML_ErrorString(code)};
-			throw LoggableException{"XML: " + msg,
-			                        SourceLocation{ctx.getSourceId(), offs}};
-		}
-
-		// Abort once there are no more bytes in the stream
-		if (bytesRead == 0) {
-			break;
-		}
-	}
-}
 }
 
diff --git a/test/formats/osdm/OsdmStreamParserTest.cpp b/test/formats/osdm/OsdmStreamParserTest.cpp
deleted file mode 100644
index 46f4cf6..0000000
--- a/test/formats/osdm/OsdmStreamParserTest.cpp
+++ /dev/null
@@ -1,973 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <iostream>
-
-#include <core/common/CharReader.hpp>
-#include <core/frontend/TerminalLogger.hpp>
-
-#include <formats/osdm/OsdmStreamParser.hpp>
-
-namespace ousia {
-
-static TerminalLogger logger(std::cerr, true);
-
-TEST(OsdmStreamParser, empty)
-{
-	const char *testString = "";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, oneCharacter)
-{
-	const char *testString = "a";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("a", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(1U, loc.getEnd());
-}
-
-TEST(OsdmStreamParser, whitespaceElimination)
-{
-	const char *testString = " hello \t world ";
-	//                        0123456 78901234
-	//                        0          1
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(14U, loc.getEnd());
-}
-
-TEST(OsdmStreamParser, whitespaceEliminationWithLinebreak)
-{
-	const char *testString = " hello \n world ";
-	//                        0123456 78901234
-	//                        0          1
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(14U, loc.getEnd());
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, escapeWhitespace)
-{
-	const char *testString = " hello\\ \\ world ";
-	//                        012345 67 89012345
-	//                        0           1
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello  world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(15U, loc.getEnd());
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-static void testEscapeSpecialCharacter(const std::string &c)
-{
-	CharReader charReader(std::string("\\") + c);
-	OsdmStreamParser reader(charReader, logger);
-	EXPECT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(c, reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	EXPECT_EQ(0U, loc.getStart());
-	EXPECT_EQ(1U + c.size(), loc.getEnd());
-}
-
-TEST(OsdmStreamParser, escapeSpecialCharacters)
-{
-	testEscapeSpecialCharacter("\\");
-	testEscapeSpecialCharacter("{");
-	testEscapeSpecialCharacter("}");
-	testEscapeSpecialCharacter("<");
-	testEscapeSpecialCharacter(">");
-}
-
-TEST(OsdmStreamParser, simpleSingleLineComment)
-{
-	const char *testString = "% This is a single line comment";
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, singleLineComment)
-{
-	const char *testString = "a% This is a single line comment\nb";
-	//                        01234567890123456789012345678901 23
-	//                        0         1         2         3
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(1U, loc.getEnd());
-	}
-
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(33U, loc.getStart());
-		ASSERT_EQ(34U, loc.getEnd());
-	}
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, multilineComment)
-{
-	const char *testString = "a%{ This is a\n\n multiline line comment}%b";
-	//                        0123456789012 3 456789012345678901234567890
-	//                        0         1           2         3         4
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(1U, loc.getEnd());
-	}
-
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(40U, loc.getStart());
-		ASSERT_EQ(41U, loc.getEnd());
-	}
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, nestedMultilineComment)
-{
-	const char *testString = "a%{%{Another\n\n}%multiline line comment}%b";
-	//                        0123456789012 3 456789012345678901234567890
-	//                        0         1           2         3         4
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(1U, loc.getEnd());
-	}
-
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(40U, loc.getStart());
-		ASSERT_EQ(41U, loc.getEnd());
-	}
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, simpleCommand)
-{
-	const char *testString = "\\test";
-	//                        0 12345
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse());
-
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
-
-	ASSERT_EQ(0U, reader.getCommandArguments().asMap().size());
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, simpleCommandWithName)
-{
-	const char *testString = "\\test#bla";
-	//                        0 12345678
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse());
-
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
-
-	Variant commandArguments = reader.getCommandArguments();
-	ASSERT_TRUE(commandArguments.isMap());
-	ASSERT_EQ(1U, commandArguments.asMap().size());
-	ASSERT_EQ(1U, commandArguments.asMap().count("name"));
-	ASSERT_EQ("bla", commandArguments.asMap()["name"].asString());
-
-	loc = commandArguments.asMap()["name"].getLocation();
-	ASSERT_EQ(5U, loc.getStart());
-	ASSERT_EQ(9U, loc.getEnd());
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, simpleCommandWithArguments)
-{
-	const char *testString = "\\test[a=1,b=2,c=\"test\"]";
-	//                        0 123456789012345 678901 2
-	//                        0          1          2
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse());
-
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
-
-	Variant commandArguments = reader.getCommandArguments();
-	ASSERT_TRUE(commandArguments.isMap());
-	ASSERT_EQ(3U, commandArguments.asMap().size());
-	ASSERT_EQ(1U, commandArguments.asMap().count("a"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("b"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("c"));
-	ASSERT_EQ(1, commandArguments.asMap()["a"].asInt());
-	ASSERT_EQ(2, commandArguments.asMap()["b"].asInt());
-	ASSERT_EQ("test", commandArguments.asMap()["c"].asString());
-
-	loc = commandArguments.asMap()["a"].getLocation();
-	ASSERT_EQ(8U, loc.getStart());
-	ASSERT_EQ(9U, loc.getEnd());
-
-	loc = commandArguments.asMap()["b"].getLocation();
-	ASSERT_EQ(12U, loc.getStart());
-	ASSERT_EQ(13U, loc.getEnd());
-
-	loc = commandArguments.asMap()["c"].getLocation();
-	ASSERT_EQ(16U, loc.getStart());
-	ASSERT_EQ(22U, loc.getEnd());
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, simpleCommandWithArgumentsAndName)
-{
-	const char *testString = "\\test#bla[a=1,b=2,c=\"test\"]";
-	//                        0 1234567890123456789 01234 56
-	//                        0          1          2
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse());
-
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
-
-	Variant commandArguments = reader.getCommandArguments();
-	ASSERT_TRUE(commandArguments.isMap());
-	ASSERT_EQ(4U, commandArguments.asMap().size());
-	ASSERT_EQ(1U, commandArguments.asMap().count("a"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("b"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("c"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("name"));
-	ASSERT_EQ(1, commandArguments.asMap()["a"].asInt());
-	ASSERT_EQ(2, commandArguments.asMap()["b"].asInt());
-	ASSERT_EQ("test", commandArguments.asMap()["c"].asString());
-	ASSERT_EQ("bla", commandArguments.asMap()["name"].asString());
-
-	loc = commandArguments.asMap()["a"].getLocation();
-	ASSERT_EQ(12U, loc.getStart());
-	ASSERT_EQ(13U, loc.getEnd());
-
-	loc = commandArguments.asMap()["b"].getLocation();
-	ASSERT_EQ(16U, loc.getStart());
-	ASSERT_EQ(17U, loc.getEnd());
-
-	loc = commandArguments.asMap()["c"].getLocation();
-	ASSERT_EQ(20U, loc.getStart());
-	ASSERT_EQ(26U, loc.getEnd());
-
-	loc = commandArguments.asMap()["name"].getLocation();
-	ASSERT_EQ(5U, loc.getStart());
-	ASSERT_EQ(9U, loc.getEnd());
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-static void assertCommand(OsdmStreamParser &reader, const std::string &name,
-                          SourceOffset start = InvalidSourceOffset,
-                          SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse());
-	EXPECT_EQ(name, reader.getCommandName().asString());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertCommand(OsdmStreamParser &reader, const std::string &name,
-                          const Variant::mapType &args,
-                          SourceOffset start = InvalidSourceOffset,
-                          SourceOffset end = InvalidSourceOffset)
-{
-	assertCommand(reader, name, start, end);
-	EXPECT_EQ(args, reader.getCommandArguments());
-}
-
-static void assertData(OsdmStreamParser &reader, const std::string &data,
-                       SourceOffset start = InvalidSourceOffset,
-                       SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(data, reader.getData().asString());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getData().getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getData().getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertFieldStart(OsdmStreamParser &reader,
-                             SourceOffset start = InvalidSourceOffset,
-                             SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsdmStreamParser::State::FIELD_START, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertFieldEnd(OsdmStreamParser &reader,
-                           SourceOffset start = InvalidSourceOffset,
-                           SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsdmStreamParser::State::FIELD_END, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertEnd(OsdmStreamParser &reader,
-                      SourceOffset start = InvalidSourceOffset,
-                      SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-TEST(OsdmStreamParser, fields)
-{
-	const char *testString = "\\test{a}{b}{c}";
-	//                         01234567890123
-	//                         0         1
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	assertData(reader, "a", 6, 7);
-	assertFieldEnd(reader, 7, 8);
-
-	assertFieldStart(reader, 8, 9);
-	assertData(reader, "b", 9, 10);
-	assertFieldEnd(reader, 10, 11);
-
-	assertFieldStart(reader, 11, 12);
-	assertData(reader, "c", 12, 13);
-	assertFieldEnd(reader, 13, 14);
-	assertEnd(reader, 14, 14);
-}
-
-TEST(OsdmStreamParser, dataOutsideField)
-{
-	const char *testString = "\\test{a}{b} c";
-	//                         0123456789012
-	//                         0         1
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	assertData(reader, "a", 6, 7);
-	assertFieldEnd(reader, 7, 8);
-
-	assertFieldStart(reader, 8, 9);
-	assertData(reader, "b", 9, 10);
-	assertFieldEnd(reader, 10, 11);
-
-	assertData(reader, "c", 12, 13);
-	assertEnd(reader, 13, 13);
-}
-
-TEST(OsdmStreamParser, nestedCommand)
-{
-	const char *testString = "\\test{a}{\\test2{b} c} d";
-	//                         012345678 90123456789012
-	//                         0          1         2
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-
-	assertFieldStart(reader, 5, 6);
-	assertData(reader, "a", 6, 7);
-	assertFieldEnd(reader, 7, 8);
-
-	assertFieldStart(reader, 8, 9);
-	{
-		assertCommand(reader, "test2", 9, 15);
-		assertFieldStart(reader, 15, 16);
-		assertData(reader, "b", 16, 17);
-		assertFieldEnd(reader, 17, 18);
-	}
-	assertData(reader, "c", 19, 20);
-	assertFieldEnd(reader, 20, 21);
-	assertData(reader, "d", 22, 23);
-	assertEnd(reader, 23, 23);
-}
-
-TEST(OsdmStreamParser, nestedCommandImmediateEnd)
-{
-	const char *testString = "\\test{\\test2{b}} d";
-	//                         012345 678901234567
-	//                         0          1
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	{
-		assertCommand(reader, "test2", 6, 12);
-		assertFieldStart(reader, 12, 13);
-		assertData(reader, "b", 13, 14);
-		assertFieldEnd(reader, 14, 15);
-	}
-	assertFieldEnd(reader, 15, 16);
-	assertData(reader, "d", 17, 18);
-	assertEnd(reader, 18, 18);
-}
-
-TEST(OsdmStreamParser, nestedCommandNoData)
-{
-	const char *testString = "\\test{\\test2}";
-	//                         012345 6789012
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	assertCommand(reader, "test2", 6, 12);
-	assertFieldEnd(reader, 12, 13);
-	assertEnd(reader, 13, 13);
-}
-
-TEST(OsdmStreamParser, multipleCommands)
-{
-	const char *testString = "\\a \\b \\c \\d";
-	//                         012 345 678 90
-	//                         0            1
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "a", 0, 2);
-	assertCommand(reader, "b", 3, 5);
-	assertCommand(reader, "c", 6, 8);
-	assertCommand(reader, "d", 9, 11);
-	assertEnd(reader, 11, 11);
-}
-
-TEST(OsdmStreamParser, fieldsWithSpaces)
-{
-	const char *testString = "\\a {\\b \\c}   \n\n {\\d}";
-	//                         0123 456 789012 3 456 789
-	//                         0           1
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "a", 0, 2);
-	assertFieldStart(reader, 3, 4);
-	assertCommand(reader, "b", 4, 6);
-	assertCommand(reader, "c", 7, 9);
-	assertFieldEnd(reader, 9, 10);
-	assertFieldStart(reader, 16, 17);
-	assertCommand(reader, "d", 17, 19);
-	assertFieldEnd(reader, 19, 20);
-	assertEnd(reader, 20, 20);
-}
-
-TEST(OsdmStreamParser, errorNoFieldToStart)
-{
-	const char *testString = "\\a b {";
-	//                         012345
-	//                         0
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "a", 0, 2);
-	assertData(reader, "b", 3, 4);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 6, 6);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorNoFieldToEnd)
-{
-	const char *testString = "\\a b }";
-	//                         012345
-	//                         0
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "a", 0, 2);
-	assertData(reader, "b", 3, 4);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 6, 6);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorNoFieldEndNested)
-{
-	const char *testString = "\\test{\\test2{}}}";
-	//                         012345 6789012345
-	//                         0          1
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	assertCommand(reader, "test2", 6, 12);
-	assertFieldStart(reader, 12, 13);
-	assertFieldEnd(reader, 13, 14);
-	assertFieldEnd(reader, 14, 15);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 16, 16);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorNoFieldEndNestedData)
-{
-	const char *testString = "\\test{\\test2{}}a}";
-	//                         012345 67890123456
-	//                         0          1
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	assertCommand(reader, "test2", 6, 12);
-	assertFieldStart(reader, 12, 13);
-	assertFieldEnd(reader, 13, 14);
-	assertFieldEnd(reader, 14, 15);
-	assertData(reader, "a", 15, 16);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 17, 17);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, beginEnd)
-{
-	const char *testString = "\\begin{book}\\end{book}";
-	//                         012345678901 2345678901
-	//                         0         1          2
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
-	assertFieldEnd(reader, 17, 21);
-	assertEnd(reader, 22, 22);
-}
-
-TEST(OsdmStreamParser, beginEndWithName)
-{
-	const char *testString = "\\begin{book#a}\\end{book}";
-	//                         01234567890123 4567890123
-	//                         0         1          2
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book", {{"name", "a"}}, 7, 11);
-	assertFieldStart(reader, 14, 15);
-	assertFieldEnd(reader, 19, 23);
-	assertEnd(reader, 24, 24);
-}
-
-TEST(OsdmStreamParser, beginEndWithNameAndArgs)
-{
-	const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}";
-	//                         0123456789012345678901234 56789 01 2345678901
-	//                         0         1         2           3          4
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book",
-	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-	assertFieldStart(reader, 32, 33);
-	assertFieldEnd(reader, 37, 41);
-	assertEnd(reader, 42, 42);
-}
-
-TEST(OsdmStreamParser, beginEndWithNameAndArgsMultipleFields)
-{
-	const char *testString =
-	    "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}";
-	//    0123456789012345678901234 56789 01234 567890123 45678901 2345678901
-	//    0         1         2           3          4          5          6
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book",
-	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-	assertFieldStart(reader, 32, 33);
-	assertData(reader, "a", 33, 34);
-	assertCommand(reader, "test", Variant::mapType{}, 35, 40);
-	assertFieldEnd(reader, 40, 41);
-	assertFieldStart(reader, 41, 42);
-	assertData(reader, "b", 42, 43);
-	assertCommand(reader, "test", Variant::mapType{}, 44, 49);
-	assertFieldStart(reader, 49, 50);
-	assertFieldEnd(reader, 50, 51);
-	assertFieldEnd(reader, 51, 52);
-	assertFieldStart(reader, 52, 53);
-	assertFieldEnd(reader, 57, 61);
-	assertEnd(reader, 62, 62);
-}
-
-TEST(OsdmStreamParser, beginEndWithData)
-{
-	const char *testString = "\\begin{book}a\\end{book}";
-	//                         0123456789012 3456789012
-	//                         0         1          2
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
-	assertData(reader, "a", 12, 13);
-	assertFieldEnd(reader, 18, 22);
-	assertEnd(reader, 23, 23);
-}
-
-TEST(OsdmStreamParser, beginEndWithCommand)
-{
-	const char *testString = "\\begin{book}\\a{test}\\end{book}";
-	//                         012345678901 23456789 0123456789
-	//                         0         1           2
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
-	assertCommand(reader, "a", 12, 14);
-	assertFieldStart(reader, 14, 15);
-	assertData(reader, "test", 15, 19);
-	assertFieldEnd(reader, 19, 20);
-	assertFieldEnd(reader, 25, 29);
-	assertEnd(reader, 30, 30);
-}
-
-TEST(OsdmStreamParser, errorBeginNoBraceOpen)
-{
-	const char *testString = "\\begin a";
-	//                         01234567
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertData(reader, "a", 7, 8);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorBeginNoIdentifier)
-{
-	const char *testString = "\\begin{!";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorBeginNoBraceClose)
-{
-	const char *testString = "\\begin{a";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorBeginNoName)
-{
-	const char *testString = "\\begin{a#}";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "a");
-	ASSERT_TRUE(logger.hasError());
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorEndNoBraceOpen)
-{
-	const char *testString = "\\end a";
-	//                         012345
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertData(reader, "a", 5, 6);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorEndNoIdentifier)
-{
-	const char *testString = "\\end{!";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorEndNoBraceClose)
-{
-	const char *testString = "\\end{a";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorEndNoBegin)
-{
-	const char *testString = "\\end{a}";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorBeginEndMismatch)
-{
-	const char *testString = "\\begin{a} \\begin{b} test \\end{a}";
-	//                         0123456789 012345678901234 5678901
-	//                         0          1         2          3
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "a", 7, 8);
-	assertFieldStart(reader, 10, 11);
-	assertCommand(reader, "b", 17, 18);
-	assertFieldStart(reader, 20, 24);
-	assertData(reader, "test", 20, 24);
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, commandWithNSSep)
-{
-	const char *testString = "\\test1:test2";
-	//                         012345678901
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test1:test2", 0, 12);
-	assertEnd(reader, 12, 12);
-}
-
-TEST(OsdmStreamParser, beginEndWithNSSep)
-{
-	const char *testString = "\\begin{test1:test2}\\end{test1:test2}";
-	//                         0123456789012345678 90123456789012345
-	//                         0         1          2         3
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test1:test2", 7, 18);
-	assertFieldStart(reader, 19, 20);
-	assertFieldEnd(reader, 24, 35);
-	assertEnd(reader, 36, 36);
-}
-
-TEST(OsdmStreamParser, errorBeginNSSep)
-{
-	const char *testString = "\\begin:test{blub}\\end{blub}";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "blub");
-	ASSERT_TRUE(logger.hasError());
-	assertFieldStart(reader);
-	assertFieldEnd(reader);
-	assertEnd(reader);
-}
-
-TEST(OsdmStreamParser, errorEndNSSep)
-{
-	const char *testString = "\\begin{blub}\\end:test{blub}";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "blub");
-	assertFieldStart(reader);
-	ASSERT_FALSE(logger.hasError());
-	assertFieldEnd(reader);
-	ASSERT_TRUE(logger.hasError());
-	assertEnd(reader);
-}
-
-TEST(OsdmStreamParser, errorEmptyNs)
-{
-	const char *testString = "\\test:";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "test");
-	ASSERT_TRUE(logger.hasError());
-	assertData(reader, ":");
-	assertEnd(reader);
-}
-
-TEST(OsdmStreamParser, errorRepeatedNs)
-{
-	const char *testString = "\\test::";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "test");
-	ASSERT_TRUE(logger.hasError());
-	assertData(reader, "::");
-	assertEnd(reader);
-}
-}
-
diff --git a/test/formats/osdmx/OsdmxParserTest.cpp b/test/formats/osdmx/OsdmxParserTest.cpp
deleted file mode 100644
index c0fb50d..0000000
--- a/test/formats/osdmx/OsdmxParserTest.cpp
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <iostream>
-
-#include <gtest/gtest.h>
-
-#include <core/common/CharReader.hpp>
-#include <core/common/SourceContextReader.hpp>
-#include <core/model/Domain.hpp>
-#include <core/model/Node.hpp>
-#include <core/model/Project.hpp>
-#include <core/frontend/TerminalLogger.hpp>
-#include <core/StandaloneEnvironment.hpp>
-
-#include <plugins/filesystem/FileLocator.hpp>
-#include <formats/osdmx/OsdmxParser.hpp>
-
-namespace ousia {
-
-namespace RttiTypes {
-extern const Rtti Document;
-extern const Rtti Domain;
-extern const Rtti Typesystem;
-}
-
-struct XmlStandaloneEnvironment : public StandaloneEnvironment {
-	XmlParser xmlParser;
-	FileLocator fileLocator;
-
-	XmlStandaloneEnvironment(ConcreteLogger &logger)
-	    : StandaloneEnvironment(logger)
-	{
-		fileLocator.addDefaultSearchPaths();
-		fileLocator.addUnittestSearchPath("xmlparser");
-
-		registry.registerDefaultExtensions();
-		registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"},
-		                        {&RttiTypes::Node}, &xmlParser);
-		registry.registerResourceLocator(&fileLocator);
-	}
-};
-
-static TerminalLogger logger(std::cerr, true);
-
-TEST(XmlParser, mismatchedTag)
-{
-	XmlStandaloneEnvironment env(logger);
-	env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document});
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(XmlParser, generic)
-{
-	XmlStandaloneEnvironment env(logger);
-	env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node});
-#ifdef MANAGER_GRAPHVIZ_EXPORT
-	env.manager.exportGraphviz("xmlDocument.dot");
-#endif
-}
-
-static void checkAttributes(Handle<StructType> expected,
-                            Handle<Descriptor> desc)
-{
-	if (expected == nullptr) {
-		ASSERT_TRUE(desc->getAttributesDescriptor()->getAttributes().empty());
-	} else {
-		ASSERT_EQ(expected->getName(),
-		          desc->getAttributesDescriptor()->getName());
-		auto &attrs_exp = expected->getAttributes();
-		auto &attrs = desc->getAttributesDescriptor()->getAttributes();
-		ASSERT_EQ(attrs_exp.size(), attrs.size());
-		for (size_t i = 0; i < attrs_exp.size(); i++) {
-			ASSERT_EQ(attrs_exp[i]->getName(), attrs[i]->getName());
-			ASSERT_EQ(attrs_exp[i]->getType(), attrs[i]->getType());
-			ASSERT_EQ(attrs_exp[i]->isOptional(), attrs[i]->isOptional());
-			ASSERT_EQ(attrs_exp[i]->getDefaultValue(),
-			          attrs[i]->getDefaultValue());
-		}
-	}
-}
-
-static void checkStructuredClass(
-    Handle<Node> n, const std::string &name, Handle<Domain> domain,
-    Variant cardinality = Cardinality::any(),
-    Handle<StructType> attributesDescriptor = nullptr,
-    Handle<StructuredClass> superclass = nullptr, bool transparent = false,
-    bool root = false)
-{
-	ASSERT_FALSE(n == nullptr);
-	Handle<StructuredClass> sc = n.cast<StructuredClass>();
-	ASSERT_FALSE(sc == nullptr);
-	ASSERT_EQ(name, sc->getName());
-	ASSERT_EQ(domain, sc->getParent());
-	ASSERT_EQ(cardinality, sc->getCardinality());
-	ASSERT_EQ(transparent, sc->isTransparent());
-	ASSERT_EQ(root, sc->hasRootPermission());
-	checkAttributes(attributesDescriptor, sc);
-}
-
-static Rooted<StructuredClass> checkStructuredClass(
-    const std::string &resolve, const std::string &name, Handle<Domain> domain,
-    Variant cardinality = Cardinality::any(),
-    Handle<StructType> attributesDescriptor = nullptr,
-    Handle<StructuredClass> superclass = nullptr, bool transparent = false,
-    bool root = false)
-{
-	auto res = domain->resolve(&RttiTypes::StructuredClass, resolve);
-	if (res.size() != 1) {
-		throw OusiaException("resolution error!");
-	}
-	Handle<StructuredClass> sc = res[0].node.cast<StructuredClass>();
-	checkStructuredClass(sc, name, domain, cardinality, attributesDescriptor,
-	                     superclass, transparent, root);
-	return sc;
-}
-
-static void checkAnnotationClass(
-    Handle<Node> n, const std::string &name, Handle<Domain> domain,
-    Handle<StructType> attributesDescriptor = nullptr)
-{
-	ASSERT_FALSE(n == nullptr);
-	Handle<AnnotationClass> ac = n.cast<AnnotationClass>();
-	ASSERT_FALSE(ac == nullptr);
-	ASSERT_EQ(name, ac->getName());
-	ASSERT_EQ(domain, ac->getParent());
-	checkAttributes(attributesDescriptor, ac);
-}
-
-static Rooted<AnnotationClass> checkAnnotationClass(
-    const std::string &resolve, const std::string &name, Handle<Domain> domain,
-    Handle<StructType> attributesDescriptor = nullptr)
-{
-	auto res = domain->resolve(&RttiTypes::AnnotationClass, resolve);
-	if (res.size() != 1) {
-		throw OusiaException("resolution error!");
-	}
-	Handle<AnnotationClass> ac = res[0].node.cast<AnnotationClass>();
-	checkAnnotationClass(ac, name, domain, attributesDescriptor);
-	return ac;
-}
-
-static void checkFieldDescriptor(
-    Handle<Node> n, const std::string &name, Handle<Descriptor> parent,
-    NodeVector<StructuredClass> children,
-    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
-    Handle<Type> primitiveType = nullptr, bool optional = false)
-{
-	ASSERT_FALSE(n == nullptr);
-	Handle<FieldDescriptor> field = n.cast<FieldDescriptor>();
-	ASSERT_FALSE(field.isNull());
-	ASSERT_EQ(name, field->getName());
-	ASSERT_EQ(parent, field->getParent());
-	ASSERT_EQ(type, field->getFieldType());
-	ASSERT_EQ(primitiveType, field->getPrimitiveType());
-	ASSERT_EQ(optional, field->isOptional());
-	// check the children.
-	ASSERT_EQ(children.size(), field->getChildren().size());
-	for (unsigned int c = 0; c < children.size(); c++) {
-		ASSERT_EQ(children[c], field->getChildren()[c]);
-	}
-}
-
-static void checkFieldDescriptor(
-    Handle<Descriptor> desc, Handle<Descriptor> parent,
-    NodeVector<StructuredClass> children,
-    const std::string &name = DEFAULT_FIELD_NAME,
-    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
-    Handle<Type> primitiveType = nullptr, bool optional = false)
-{
-	auto res = desc->resolve(&RttiTypes::FieldDescriptor, name);
-	ASSERT_EQ(1, res.size());
-	checkFieldDescriptor(res[0].node, name, parent, children, type,
-	                     primitiveType, optional);
-}
-
-static void checkFieldDescriptor(
-    Handle<Descriptor> desc, NodeVector<StructuredClass> children,
-    const std::string &name = DEFAULT_FIELD_NAME,
-    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
-    Handle<Type> primitiveType = nullptr, bool optional = false)
-{
-	checkFieldDescriptor(desc, desc, children, name, type, primitiveType,
-	                     optional);
-}
-
-TEST(XmlParser, domainParsing)
-{
-	XmlStandaloneEnvironment env(logger);
-	Rooted<Node> book_domain_node =
-	    env.parse("book_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
-	ASSERT_FALSE(book_domain_node == nullptr);
-	ASSERT_FALSE(logger.hasError());
-	// check the domain node.
-	Rooted<Domain> book_domain = book_domain_node.cast<Domain>();
-	ASSERT_EQ("book", book_domain->getName());
-	// get the book struct node.
-	Cardinality single;
-	single.merge({1});
-	Rooted<StructType> bookAuthor{
-	    new StructType(book_domain->getManager(), "", nullptr)};
-	bookAuthor->addAttribute(
-	    {new Attribute(book_domain->getManager(), "author",
-	                   env.project->getSystemTypesystem()->getStringType(),
-	                   "")},
-	    logger);
-	Rooted<StructuredClass> book = checkStructuredClass(
-	    "book", "book", book_domain, single, bookAuthor, nullptr, false, true);
-	// get the chapter struct node.
-	Rooted<StructuredClass> chapter =
-	    checkStructuredClass("chapter", "chapter", book_domain);
-	Rooted<StructuredClass> section =
-	    checkStructuredClass("section", "section", book_domain);
-	Rooted<StructuredClass> subsection =
-	    checkStructuredClass("subsection", "subsection", book_domain);
-	Rooted<StructuredClass> paragraph =
-	    checkStructuredClass("paragraph", "paragraph", book_domain,
-	                         Cardinality::any(), nullptr, nullptr, true, false);
-	Rooted<StructuredClass> text =
-	    checkStructuredClass("text", "text", book_domain, Cardinality::any(),
-	                         nullptr, nullptr, true, false);
-
-	// check the FieldDescriptors.
-	checkFieldDescriptor(book, {chapter, paragraph});
-	checkFieldDescriptor(chapter, {section, paragraph});
-	checkFieldDescriptor(section, {subsection, paragraph});
-	checkFieldDescriptor(subsection, {paragraph});
-	checkFieldDescriptor(paragraph, {text});
-	checkFieldDescriptor(
-	    text, {}, DEFAULT_FIELD_NAME, FieldDescriptor::FieldType::PRIMITIVE,
-	    env.project->getSystemTypesystem()->getStringType(), false);
-
-	// check parent handling using the headings domain.
-	Rooted<Node> headings_domain_node =
-	    env.parse("headings_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
-	ASSERT_FALSE(headings_domain_node == nullptr);
-	ASSERT_FALSE(logger.hasError());
-	Rooted<Domain> headings_domain = headings_domain_node.cast<Domain>();
-	// now there should be a heading struct.
-	Rooted<StructuredClass> heading =
-	    checkStructuredClass("heading", "heading", headings_domain, single,
-	                         nullptr, nullptr, true, false);
-	// which should be a reference to the paragraph descriptor.
-	checkFieldDescriptor(heading, paragraph, {text});
-	// and each struct in the book domain (except for text) should have a
-	// heading field now.
-	checkFieldDescriptor(book, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(chapter, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(section, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(subsection, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(paragraph, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-
-	// check annotation handling using the comments domain.
-	Rooted<Node> comments_domain_node =
-	    env.parse("comments_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
-	ASSERT_FALSE(comments_domain_node == nullptr);
-	ASSERT_FALSE(logger.hasError());
-	Rooted<Domain> comments_domain = comments_domain_node.cast<Domain>();
-	// now we should be able to find a comment annotation.
-	Rooted<AnnotationClass> comment_anno =
-	    checkAnnotationClass("comment", "comment", comments_domain);
-	// as well as a comment struct
-	Rooted<StructuredClass> comment =
-	    checkStructuredClass("comment", "comment", comments_domain);
-	// and a reply struct
-	Rooted<StructuredClass> reply =
-	    checkStructuredClass("reply", "reply", comments_domain);
-	// check the fields for each of them.
-	{
-		std::vector<Rooted<Descriptor>> descs{comment_anno, comment, reply};
-		for (auto &d : descs) {
-			checkFieldDescriptor(d, {paragraph}, "content",
-			                     FieldDescriptor::FieldType::SUBTREE, nullptr,
-			                     false);
-			checkFieldDescriptor(d, {reply}, "replies",
-			                     FieldDescriptor::FieldType::SUBTREE, nullptr,
-			                     false);
-		}
-	}
-	// paragraph should have comment as child now as well.
-	checkFieldDescriptor(paragraph, {text, comment});
-	// as should heading, because it references the paragraph default field.
-	checkFieldDescriptor(heading, paragraph, {text, comment});
-}
-
-TEST(XmlParser, documentParsing)
-{
-	XmlStandaloneEnvironment env(logger);
-	Rooted<Node> book_domain_node =
-	    env.parse("simple_book.oxd", "", "", RttiSet{&RttiTypes::Document});
-	//TODO: Check result
-}
-}
-
-- 
cgit v1.2.3


From fde9997a9d321823ba6a2685e20769f5a10982cd Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:00:06 +0100
Subject: Moved TokenTrieTest to new directory

---
 test/core/parser/utils/TokenTrieTest.cpp |  2 +-
 test/core/parser/utils/TokenizerTest.cpp | 85 ++++++++++++++++----------------
 2 files changed, 43 insertions(+), 44 deletions(-)

(limited to 'test')

diff --git a/test/core/parser/utils/TokenTrieTest.cpp b/test/core/parser/utils/TokenTrieTest.cpp
index aacd6c0..087e6e6 100644
--- a/test/core/parser/utils/TokenTrieTest.cpp
+++ b/test/core/parser/utils/TokenTrieTest.cpp
@@ -18,7 +18,7 @@
 
 #include <gtest/gtest.h>
 
-#include <formats/osdm/TokenTrie.hpp>
+#include <core/parser/utils/TokenTrie.hpp>
 
 namespace ousia {
 
diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp
index c1f8785..8565057 100644
--- a/test/core/parser/utils/TokenizerTest.cpp
+++ b/test/core/parser/utils/TokenizerTest.cpp
@@ -19,13 +19,13 @@
 #include <gtest/gtest.h>
 
 #include <core/common/CharReader.hpp>
-#include <formats/osdm/DynamicTokenizer.hpp>
+#include <core/parser/utils/Tokenizer.hpp>
 
 namespace ousia {
 
-TEST(DynamicTokenizer, tokenRegistration)
+TEST(Tokenizer, tokenRegistration)
 {
-	DynamicTokenizer tokenizer;
+	Tokenizer tokenizer;
 
 	ASSERT_EQ(EmptyToken, tokenizer.registerToken(""));
 
@@ -50,15 +50,15 @@ TEST(DynamicTokenizer, tokenRegistration)
 	ASSERT_EQ("d", tokenizer.getTokenString(1U));
 }
 
-TEST(DynamicTokenizer, textTokenPreserveWhitespace)
+TEST(Tokenizer, textTokenPreserveWhitespace)
 {
 	{
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
+		Tokenizer tokenizer{WhitespaceMode::PRESERVE};
 
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content);
@@ -74,9 +74,9 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace)
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
+		Tokenizer tokenizer{WhitespaceMode::PRESERVE};
 
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
@@ -89,15 +89,15 @@ TEST(DynamicTokenizer, textTokenPreserveWhitespace)
 	}
 }
 
-TEST(DynamicTokenizer, textTokenTrimWhitespace)
+TEST(Tokenizer, textTokenTrimWhitespace)
 {
 	{
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
+		Tokenizer tokenizer{WhitespaceMode::TRIM};
 
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
@@ -113,9 +113,9 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace)
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
+		Tokenizer tokenizer{WhitespaceMode::TRIM};
 
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
@@ -128,15 +128,15 @@ TEST(DynamicTokenizer, textTokenTrimWhitespace)
 	}
 }
 
-TEST(DynamicTokenizer, textTokenCollapseWhitespace)
+TEST(Tokenizer, textTokenCollapseWhitespace)
 {
 	{
 		CharReader reader{" this \t is only a  \n\n test   text   "};
 		//                 012345 6789012345678 9 0123456789012345
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
+		Tokenizer tokenizer{WhitespaceMode::COLLAPSE};
 
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("this is only a test text", token.content);
@@ -152,9 +152,9 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace)
 		CharReader reader{"this \t is only a  \n\n test   text"};
 		//                 01234 5678901234567 8 9012345678901
 		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
+		Tokenizer tokenizer{WhitespaceMode::COLLAPSE};
 
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 		ASSERT_EQ(TextToken, token.type);
 		ASSERT_EQ("this is only a test text", token.content);
@@ -167,16 +167,16 @@ TEST(DynamicTokenizer, textTokenCollapseWhitespace)
 	}
 }
 
-TEST(DynamicTokenizer, simpleReadToken)
+TEST(Tokenizer, simpleReadToken)
 {
 	CharReader reader{"test1:test2"};
-	DynamicTokenizer tokenizer;
+	Tokenizer tokenizer;
 
 	const TokenTypeId tid = tokenizer.registerToken(":");
 	ASSERT_EQ(0U, tid);
 
 	{
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
@@ -192,7 +192,7 @@ TEST(DynamicTokenizer, simpleReadToken)
 	}
 
 	{
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(tid, token.type);
@@ -208,7 +208,7 @@ TEST(DynamicTokenizer, simpleReadToken)
 	}
 
 	{
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
@@ -223,16 +223,16 @@ TEST(DynamicTokenizer, simpleReadToken)
 	}
 }
 
-TEST(DynamicTokenizer, simplePeekToken)
+TEST(Tokenizer, simplePeekToken)
 {
 	CharReader reader{"test1:test2"};
-	DynamicTokenizer tokenizer;
+	Tokenizer tokenizer;
 
 	const TokenTypeId tid = tokenizer.registerToken(":");
 	ASSERT_EQ(0U, tid);
 
 	{
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.peek(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
@@ -246,7 +246,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 	}
 
 	{
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.peek(reader, token));
 
 		ASSERT_EQ(tid, token.type);
@@ -260,7 +260,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 	}
 
 	{
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.peek(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
@@ -274,7 +274,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 	}
 
 	{
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
@@ -288,7 +288,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 	}
 
 	{
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(tid, token.type);
@@ -302,7 +302,7 @@ TEST(DynamicTokenizer, simplePeekToken)
 	}
 
 	{
-		DynamicToken token;
+		Token token;
 		ASSERT_TRUE(tokenizer.read(reader, token));
 
 		ASSERT_EQ(TextToken, token.type);
@@ -316,10 +316,10 @@ TEST(DynamicTokenizer, simplePeekToken)
 	}
 }
 
-TEST(DynamicTokenizer, ambiguousTokens)
+TEST(Tokenizer, ambiguousTokens)
 {
 	CharReader reader{"abc"};
-	DynamicTokenizer tokenizer;
+	Tokenizer tokenizer;
 
 	TokenTypeId t1 = tokenizer.registerToken("abd");
 	TokenTypeId t2 = tokenizer.registerToken("bc");
@@ -327,7 +327,7 @@ TEST(DynamicTokenizer, ambiguousTokens)
 	ASSERT_EQ(0U, t1);
 	ASSERT_EQ(1U, t2);
 
-	DynamicToken token;
+	Token token;
 	ASSERT_TRUE(tokenizer.read(reader, token));
 
 	ASSERT_EQ(TextToken, token.type);
@@ -349,18 +349,18 @@ TEST(DynamicTokenizer, ambiguousTokens)
 	ASSERT_FALSE(tokenizer.read(reader, token));
 }
 
-TEST(DynamicTokenizer, commentTestWhitespacePreserve)
+TEST(Tokenizer, commentTestWhitespacePreserve)
 {
 	CharReader reader{"Test/Test /* Block Comment */", 0};
 	//                 012345678901234567890123456789
 	//                 0        1         2
-	DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE);
+	Tokenizer tokenizer(WhitespaceMode::PRESERVE);
 
 	const TokenTypeId t1 = tokenizer.registerToken("/");
 	const TokenTypeId t2 = tokenizer.registerToken("/*");
 	const TokenTypeId t3 = tokenizer.registerToken("*/");
 
-	std::vector<DynamicToken> expected = {
+	std::vector<Token> expected = {
 	    {TextToken, "Test", SourceLocation{0, 0, 4}},
 	    {t1, "/", SourceLocation{0, 4, 5}},
 	    {TextToken, "Test ", SourceLocation{0, 5, 10}},
@@ -368,7 +368,7 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve)
 	    {TextToken, " Block Comment ", SourceLocation{0, 12, 27}},
 	    {t3, "*/", SourceLocation{0, 27, 29}}};
 
-	DynamicToken t;
+	Token t;
 	for (auto &te : expected) {
 		EXPECT_TRUE(tokenizer.read(reader, t));
 		EXPECT_EQ(te.type, t.type);
@@ -380,18 +380,18 @@ TEST(DynamicTokenizer, commentTestWhitespacePreserve)
 	ASSERT_FALSE(tokenizer.read(reader, t));
 }
 
-TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
+TEST(Tokenizer, commentTestWhitespaceCollapse)
 {
 	CharReader reader{"Test/Test /* Block Comment */", 0};
 	//                 012345678901234567890123456789
 	//                 0        1         2
-	DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE);
+	Tokenizer tokenizer(WhitespaceMode::COLLAPSE);
 
 	const TokenTypeId t1 = tokenizer.registerToken("/");
 	const TokenTypeId t2 = tokenizer.registerToken("/*");
 	const TokenTypeId t3 = tokenizer.registerToken("*/");
 
-	std::vector<DynamicToken> expected = {
+	std::vector<Token> expected = {
 	    {TextToken, "Test", SourceLocation{0, 0, 4}},
 	    {t1, "/", SourceLocation{0, 4, 5}},
 	    {TextToken, "Test", SourceLocation{0, 5, 9}},
@@ -399,7 +399,7 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
 	    {TextToken, "Block Comment", SourceLocation{0, 13, 26}},
 	    {t3, "*/", SourceLocation{0, 27, 29}}};
 
-	DynamicToken t;
+	Token t;
 	for (auto &te : expected) {
 		EXPECT_TRUE(tokenizer.read(reader, t));
 		EXPECT_EQ(te.type, t.type);
@@ -410,6 +410,5 @@ TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
 	}
 	ASSERT_FALSE(tokenizer.read(reader, t));
 }
-
 }
 
-- 
cgit v1.2.3


From 974afd3fdc54380a43445a180263fb162e1ff2c0 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:00:23 +0100
Subject: Applied renaming to unit tests and added forgotten CMakeLists.txt

---
 CMakeLists.txt                             | 177 +++---
 test/formats/osml/OsmlStreamParserTest.cpp | 973 +++++++++++++++++++++++++++++
 test/formats/osxml/OsxmlParserTest.cpp     | 314 ++++++++++
 3 files changed, 1377 insertions(+), 87 deletions(-)
 create mode 100644 test/formats/osml/OsmlStreamParserTest.cpp
 create mode 100644 test/formats/osxml/OsxmlParserTest.cpp

(limited to 'test')

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4458d1b..6e3b90f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -134,7 +134,6 @@ ADD_LIBRARY(ousia_core
 	src/core/common/VariantConverter
 	src/core/common/VariantReader
 	src/core/common/VariantWriter
-	src/core/common/Whitespace
 	src/core/frontend/Terminal
 	src/core/frontend/TerminalLogger
 	src/core/managed/Events
@@ -148,11 +147,15 @@ ADD_LIBRARY(ousia_core
 	src/core/model/RootNode
 	src/core/model/Style
 	src/core/model/Typesystem
-	src/core/parser/Parser
-	src/core/parser/ParserContext
-	src/core/parser/ParserScope
-	src/core/parser/generic/ParserStateStack
-	src/core/parser/generic/ParserState
+#	src/core/parser/Parser
+#	src/core/parser/ParserContext
+#	src/core/parser/ParserScope
+#	src/core/parser/generic/ParserState
+#	src/core/parser/generic/ParserStateCallbacks
+#	src/core/parser/generic/ParserStateHandler
+#	src/core/parser/generic/ParserStateStack
+	src/core/parser/utils/Tokenizer
+	src/core/parser/utils/TokenTrie
 	src/core/resource/Resource
 	src/core/resource/ResourceLocator
 	src/core/resource/ResourceManager
@@ -160,6 +163,8 @@ ADD_LIBRARY(ousia_core
 #	src/core/script/ScriptEngine
 )
 
+# Format libraries
+
 #ADD_LIBRARY(ousia_css
 #	src/plugins/css/CodeTokenizer
 #	src/plugins/css/Tokenizer
@@ -170,43 +175,44 @@ ADD_LIBRARY(ousia_core
 #	ousia_core
 #)
 
-ADD_LIBRARY(ousia_filesystem
-	src/plugins/filesystem/FileLocator
-	src/plugins/filesystem/SpecialPaths
+ADD_LIBRARY(ousia_osml
+	src/formats/osml/OsmlStreamParser
 )
 
-TARGET_LINK_LIBRARIES(ousia_filesystem
+TARGET_LINK_LIBRARIES(ousia_osml
 	ousia_core
-	${Boost_LIBRARIES}
 )
 
-ADD_LIBRARY(ousia_html
-	src/plugins/html/DemoOutput
-)
+#ADD_LIBRARY(ousia_osxml
+#	src/formats/osxml/osxmlParser
+#)
 
-TARGET_LINK_LIBRARIES(ousia_html
-	ousia_core
-)
+#TARGET_LINK_LIBRARIES(ousia_osxml
+#	ousia_core
+#	${EXPAT_LIBRARIES}
+#)
 
-ADD_LIBRARY(ousia_xml
-	src/plugins/xml/XmlParser
-)
+# Resource locators
 
-TARGET_LINK_LIBRARIES(ousia_xml
-	ousia_core
-	${EXPAT_LIBRARIES}
-)
+#ADD_LIBRARY(ousia_filesystem
+#	src/plugins/filesystem/FileLocator
+#	src/plugins/filesystem/SpecialPaths
+#)
 
-ADD_LIBRARY(ousia_osdm
-	src/formats/osdm/DynamicTokenizer
-	src/formats/osdm/TokenTrie
-	src/formats/osdm/OsdmStreamParser
-)
+#TARGET_LINK_LIBRARIES(ousia_filesystem
+#	ousia_core
+#	${Boost_LIBRARIES}
+#)
 
-TARGET_LINK_LIBRARIES(ousia_osdm
-	ousia_core
-)
+# Output libraries
 
+#ADD_LIBRARY(ousia_html
+#	src/plugins/html/DemoOutput
+#)
+
+#TARGET_LINK_LIBRARIES(ousia_html
+#	ousia_core
+#)
 
 #ADD_LIBRARY(ousia_mozjs
 #	src/plugins/mozjs/MozJsScriptEngine
@@ -219,17 +225,17 @@ TARGET_LINK_LIBRARIES(ousia_osdm
 
 # Command line interface
 
-ADD_EXECUTABLE(ousia
-	src/cli/Main
-)
+#ADD_EXECUTABLE(ousia
+#	src/cli/Main
+#)
 
-TARGET_LINK_LIBRARIES(ousia
-	ousia_core
-	ousia_filesystem
-	ousia_html
-	ousia_xml
-	${Boost_LIBRARIES}
-)
+#TARGET_LINK_LIBRARIES(ousia
+#	ousia_core
+#	ousia_filesystem
+#	ousia_html
+#	ousia_xml
+#	${Boost_LIBRARIES}
+#)
 
 # If testing is enabled, build the unit tests
 IF(TEST)
@@ -240,10 +246,8 @@ IF(TEST)
 	)
 
 	ADD_EXECUTABLE(ousia_test_core
-		test/core/CodeTokenizerTest
 		test/core/RangeSetTest
-		test/core/RegistryTest
-		test/core/TokenizerTest
+#		test/core/RegistryTest
 		test/core/XMLTest
 		test/core/common/ArgumentTest
 		test/core/common/CharReaderTest
@@ -257,7 +261,6 @@ IF(TEST)
 		test/core/common/VariantWriterTest
 		test/core/common/VariantTest
 		test/core/common/UtilsTest
-		test/core/common/WhitespaceTest
 		test/core/frontend/TerminalLoggerTest
 		test/core/managed/ManagedContainerTest
 		test/core/managed/ManagedTest
@@ -269,9 +272,11 @@ IF(TEST)
 		test/core/model/NodeTest
 		test/core/model/StyleTest
 		test/core/model/TypesystemTest
-		test/core/parser/ParserScopeTest
-		test/core/parser/ParserStackTest
-		test/core/parser/ParserStateTest
+#		test/core/parser/ParserScopeTest
+#		test/core/parser/ParserStackTest
+#		test/core/parser/ParserStateTest
+		test/core/parser/utils/TokenizerTest
+		test/core/parser/utils/TokenTrieTest
 		test/core/resource/ResourceLocatorTest
 		test/core/resource/ResourceRequestTest
 #		test/core/script/FunctionTest
@@ -284,15 +289,15 @@ IF(TEST)
 		ousia_core
 	)
 
-	ADD_EXECUTABLE(ousia_test_filesystem
-		test/plugins/filesystem/FileLocatorTest
-	)
+#	ADD_EXECUTABLE(ousia_test_filesystem
+#		test/plugins/filesystem/FileLocatorTest
+#	)
 
-	TARGET_LINK_LIBRARIES(ousia_test_filesystem
-		${GTEST_LIBRARIES}
-		ousia_core
-		ousia_filesystem
-	)
+#	TARGET_LINK_LIBRARIES(ousia_test_filesystem
+#		${GTEST_LIBRARIES}
+#		ousia_core
+#		ousia_filesystem
+#	)
 
 #	ADD_EXECUTABLE(ousia_test_css
 #		test/plugins/css/Tokenizer
@@ -306,38 +311,36 @@ IF(TEST)
 #		ousia_css
 #	)
 
-	ADD_EXECUTABLE(ousia_test_html
-		test/plugins/html/DemoOutputTest
-	)
+#	ADD_EXECUTABLE(ousia_test_html
+#		test/plugins/html/DemoOutputTest
+#	)
 
-	TARGET_LINK_LIBRARIES(ousia_test_html
-		${GTEST_LIBRARIES}
-		ousia_core
-		ousia_html
-	)
+#	TARGET_LINK_LIBRARIES(ousia_test_html
+#		${GTEST_LIBRARIES}
+#		ousia_core
+#		ousia_html
+#	)
 
-	ADD_EXECUTABLE(ousia_test_xml
-		test/plugins/xml/XmlParserTest
+	ADD_EXECUTABLE(ousia_test_osml
+		test/formats/osml/OsmlStreamParserTest
 	)
 
-	TARGET_LINK_LIBRARIES(ousia_test_xml
+	TARGET_LINK_LIBRARIES(ousia_test_osml
 		${GTEST_LIBRARIES}
 		ousia_core
-		ousia_xml
-		ousia_filesystem
+		ousia_osml
 	)
 
-	ADD_EXECUTABLE(ousia_test_osdm
-		test/formats/osdm/TokenTrieTest
-		test/formats/osdm/DynamicTokenizerTest
-		test/formats/osdm/OsdmStreamParserTest
-	)
+#	ADD_EXECUTABLE(ousia_test_osxml
+#		test/plugins/xml/XmlParserTest
+#	)
 
-	TARGET_LINK_LIBRARIES(ousia_test_osdm
-		${GTEST_LIBRARIES}
-		ousia_core
-		ousia_osdm
-	)
+#	TARGET_LINK_LIBRARIES(ousia_test_osxml
+#		${GTEST_LIBRARIES}
+#		ousia_core
+#		ousia_osml
+#		ousia_filesystem
+#	)
 
 #	ADD_EXECUTABLE(ousia_test_mozjs
 #		test/plugins/mozjs/MozJsScriptEngineTest
@@ -351,11 +354,11 @@ IF(TEST)
 
 	# Register the unit tests
 	ADD_TEST(ousia_test_core ousia_test_core)
-	ADD_TEST(ousia_test_filesystem ousia_test_filesystem)
+#	ADD_TEST(ousia_test_filesystem ousia_test_filesystem)
 #	ADD_TEST(ousia_test_css ousia_test_css)
-	ADD_TEST(ousia_test_html ousia_test_html)
-	ADD_TEST(ousia_test_xml ousia_test_xml)
-	ADD_TEST(ousia_test_osdm ousia_test_osdm)
+#	ADD_TEST(ousia_test_html ousia_test_html)
+	ADD_TEST(ousia_test_osml ousia_test_osml)
+#	ADD_TEST(ousia_test_osxml ousia_test_osxml)
 #	ADD_TEST(ousia_test_mozjs ousia_test_mozjs)
 ENDIF()
 
@@ -373,6 +376,6 @@ INSTALL(DIRECTORY data/ DESTINATION share/ousia
 				OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE
 )
 
-INSTALL(TARGETS ousia
-		RUNTIME DESTINATION bin
-)
+#INSTALL(TARGETS ousia
+#		RUNTIME DESTINATION bin
+#)
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
new file mode 100644
index 0000000..e5eff05
--- /dev/null
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -0,0 +1,973 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <iostream>
+
+#include <core/common/CharReader.hpp>
+#include <core/frontend/TerminalLogger.hpp>
+
+#include <formats/osml/OsmlStreamParser.hpp>
+
+namespace ousia {
+
+static TerminalLogger logger(std::cerr, true);
+
+TEST(OsmlStreamParser, empty)
+{
+	const char *testString = "";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+TEST(OsmlStreamParser, oneCharacter)
+{
+	const char *testString = "a";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+	ASSERT_EQ("a", reader.getData().asString());
+
+	SourceLocation loc = reader.getData().getLocation();
+	ASSERT_EQ(0U, loc.getStart());
+	ASSERT_EQ(1U, loc.getEnd());
+}
+
+TEST(OsmlStreamParser, whitespaceElimination)
+{
+	const char *testString = " hello \t world ";
+	//                        0123456 78901234
+	//                        0          1
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+	ASSERT_EQ("hello world", reader.getData().asString());
+
+	SourceLocation loc = reader.getData().getLocation();
+	ASSERT_EQ(1U, loc.getStart());
+	ASSERT_EQ(14U, loc.getEnd());
+}
+
+TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak)
+{
+	const char *testString = " hello \n world ";
+	//                        0123456 78901234
+	//                        0          1
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+	ASSERT_EQ("hello world", reader.getData().asString());
+
+	SourceLocation loc = reader.getData().getLocation();
+	ASSERT_EQ(1U, loc.getStart());
+	ASSERT_EQ(14U, loc.getEnd());
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+TEST(OsmlStreamParser, escapeWhitespace)
+{
+	const char *testString = " hello\\ \\ world ";
+	//                        012345 67 89012345
+	//                        0           1
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+	ASSERT_EQ("hello  world", reader.getData().asString());
+
+	SourceLocation loc = reader.getData().getLocation();
+	ASSERT_EQ(1U, loc.getStart());
+	ASSERT_EQ(15U, loc.getEnd());
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+static void testEscapeSpecialCharacter(const std::string &c)
+{
+	CharReader charReader(std::string("\\") + c);
+	OsmlStreamParser reader(charReader, logger);
+	EXPECT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+	EXPECT_EQ(c, reader.getData().asString());
+
+	SourceLocation loc = reader.getData().getLocation();
+	EXPECT_EQ(0U, loc.getStart());
+	EXPECT_EQ(1U + c.size(), loc.getEnd());
+}
+
+TEST(OsmlStreamParser, escapeSpecialCharacters)
+{
+	testEscapeSpecialCharacter("\\");
+	testEscapeSpecialCharacter("{");
+	testEscapeSpecialCharacter("}");
+	testEscapeSpecialCharacter("<");
+	testEscapeSpecialCharacter(">");
+}
+
+TEST(OsmlStreamParser, simpleSingleLineComment)
+{
+	const char *testString = "% This is a single line comment";
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+TEST(OsmlStreamParser, singleLineComment)
+{
+	const char *testString = "a% This is a single line comment\nb";
+	//                        01234567890123456789012345678901 23
+	//                        0         1         2         3
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+	{
+		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+		ASSERT_EQ("a", reader.getData().asString());
+		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(1U, loc.getEnd());
+	}
+
+	{
+		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+		ASSERT_EQ("b", reader.getData().asString());
+		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ(33U, loc.getStart());
+		ASSERT_EQ(34U, loc.getEnd());
+	}
+
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+TEST(OsmlStreamParser, multilineComment)
+{
+	const char *testString = "a%{ This is a\n\n multiline line comment}%b";
+	//                        0123456789012 3 456789012345678901234567890
+	//                        0         1           2         3         4
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+	{
+		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+		ASSERT_EQ("a", reader.getData().asString());
+		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(1U, loc.getEnd());
+	}
+
+	{
+		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+		ASSERT_EQ("b", reader.getData().asString());
+		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ(40U, loc.getStart());
+		ASSERT_EQ(41U, loc.getEnd());
+	}
+
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+TEST(OsmlStreamParser, nestedMultilineComment)
+{
+	const char *testString = "a%{%{Another\n\n}%multiline line comment}%b";
+	//                        0123456789012 3 456789012345678901234567890
+	//                        0         1           2         3         4
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+	{
+		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+		ASSERT_EQ("a", reader.getData().asString());
+		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(1U, loc.getEnd());
+	}
+
+	{
+		ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+		ASSERT_EQ("b", reader.getData().asString());
+		SourceLocation loc = reader.getData().getLocation();
+		ASSERT_EQ(40U, loc.getStart());
+		ASSERT_EQ(41U, loc.getEnd());
+	}
+
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+TEST(OsmlStreamParser, simpleCommand)
+{
+	const char *testString = "\\test";
+	//                        0 12345
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+
+	Variant commandName = reader.getCommandName();
+	ASSERT_EQ("test", commandName.asString());
+
+	SourceLocation loc = commandName.getLocation();
+	ASSERT_EQ(0U, loc.getStart());
+	ASSERT_EQ(5U, loc.getEnd());
+
+	ASSERT_EQ(0U, reader.getCommandArguments().asMap().size());
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+TEST(OsmlStreamParser, simpleCommandWithName)
+{
+	const char *testString = "\\test#bla";
+	//                        0 12345678
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+
+	Variant commandName = reader.getCommandName();
+	ASSERT_EQ("test", commandName.asString());
+	SourceLocation loc = commandName.getLocation();
+	ASSERT_EQ(0U, loc.getStart());
+	ASSERT_EQ(5U, loc.getEnd());
+
+	Variant commandArguments = reader.getCommandArguments();
+	ASSERT_TRUE(commandArguments.isMap());
+	ASSERT_EQ(1U, commandArguments.asMap().size());
+	ASSERT_EQ(1U, commandArguments.asMap().count("name"));
+	ASSERT_EQ("bla", commandArguments.asMap()["name"].asString());
+
+	loc = commandArguments.asMap()["name"].getLocation();
+	ASSERT_EQ(5U, loc.getStart());
+	ASSERT_EQ(9U, loc.getEnd());
+
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+TEST(OsmlStreamParser, simpleCommandWithArguments)
+{
+	const char *testString = "\\test[a=1,b=2,c=\"test\"]";
+	//                        0 123456789012345 678901 2
+	//                        0          1          2
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+
+	Variant commandName = reader.getCommandName();
+	ASSERT_EQ("test", commandName.asString());
+	SourceLocation loc = commandName.getLocation();
+	ASSERT_EQ(0U, loc.getStart());
+	ASSERT_EQ(5U, loc.getEnd());
+
+	Variant commandArguments = reader.getCommandArguments();
+	ASSERT_TRUE(commandArguments.isMap());
+	ASSERT_EQ(3U, commandArguments.asMap().size());
+	ASSERT_EQ(1U, commandArguments.asMap().count("a"));
+	ASSERT_EQ(1U, commandArguments.asMap().count("b"));
+	ASSERT_EQ(1U, commandArguments.asMap().count("c"));
+	ASSERT_EQ(1, commandArguments.asMap()["a"].asInt());
+	ASSERT_EQ(2, commandArguments.asMap()["b"].asInt());
+	ASSERT_EQ("test", commandArguments.asMap()["c"].asString());
+
+	loc = commandArguments.asMap()["a"].getLocation();
+	ASSERT_EQ(8U, loc.getStart());
+	ASSERT_EQ(9U, loc.getEnd());
+
+	loc = commandArguments.asMap()["b"].getLocation();
+	ASSERT_EQ(12U, loc.getStart());
+	ASSERT_EQ(13U, loc.getEnd());
+
+	loc = commandArguments.asMap()["c"].getLocation();
+	ASSERT_EQ(16U, loc.getStart());
+	ASSERT_EQ(22U, loc.getEnd());
+
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName)
+{
+	const char *testString = "\\test#bla[a=1,b=2,c=\"test\"]";
+	//                        0 1234567890123456789 01234 56
+	//                        0          1          2
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+
+	Variant commandName = reader.getCommandName();
+	ASSERT_EQ("test", commandName.asString());
+	SourceLocation loc = commandName.getLocation();
+	ASSERT_EQ(0U, loc.getStart());
+	ASSERT_EQ(5U, loc.getEnd());
+
+	Variant commandArguments = reader.getCommandArguments();
+	ASSERT_TRUE(commandArguments.isMap());
+	ASSERT_EQ(4U, commandArguments.asMap().size());
+	ASSERT_EQ(1U, commandArguments.asMap().count("a"));
+	ASSERT_EQ(1U, commandArguments.asMap().count("b"));
+	ASSERT_EQ(1U, commandArguments.asMap().count("c"));
+	ASSERT_EQ(1U, commandArguments.asMap().count("name"));
+	ASSERT_EQ(1, commandArguments.asMap()["a"].asInt());
+	ASSERT_EQ(2, commandArguments.asMap()["b"].asInt());
+	ASSERT_EQ("test", commandArguments.asMap()["c"].asString());
+	ASSERT_EQ("bla", commandArguments.asMap()["name"].asString());
+
+	loc = commandArguments.asMap()["a"].getLocation();
+	ASSERT_EQ(12U, loc.getStart());
+	ASSERT_EQ(13U, loc.getEnd());
+
+	loc = commandArguments.asMap()["b"].getLocation();
+	ASSERT_EQ(16U, loc.getStart());
+	ASSERT_EQ(17U, loc.getEnd());
+
+	loc = commandArguments.asMap()["c"].getLocation();
+	ASSERT_EQ(20U, loc.getStart());
+	ASSERT_EQ(26U, loc.getEnd());
+
+	loc = commandArguments.asMap()["name"].getLocation();
+	ASSERT_EQ(5U, loc.getStart());
+	ASSERT_EQ(9U, loc.getEnd());
+
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+}
+
+static void assertCommand(OsmlStreamParser &reader, const std::string &name,
+                          SourceOffset start = InvalidSourceOffset,
+                          SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+	EXPECT_EQ(name, reader.getCommandName().asString());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertCommand(OsmlStreamParser &reader, const std::string &name,
+                          const Variant::mapType &args,
+                          SourceOffset start = InvalidSourceOffset,
+                          SourceOffset end = InvalidSourceOffset)
+{
+	assertCommand(reader, name, start, end);
+	EXPECT_EQ(args, reader.getCommandArguments());
+}
+
+static void assertData(OsmlStreamParser &reader, const std::string &data,
+                       SourceOffset start = InvalidSourceOffset,
+                       SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+	EXPECT_EQ(data, reader.getData().asString());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getData().getLocation().getStart());
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getData().getLocation().getEnd());
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertFieldStart(OsmlStreamParser &reader,
+                             SourceOffset start = InvalidSourceOffset,
+                             SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertFieldEnd(OsmlStreamParser &reader,
+                           SourceOffset start = InvalidSourceOffset,
+                           SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertEnd(OsmlStreamParser &reader,
+                      SourceOffset start = InvalidSourceOffset,
+                      SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+TEST(OsmlStreamParser, fields)
+{
+	const char *testString = "\\test{a}{b}{c}";
+	//                         01234567890123
+	//                         0         1
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "test", 0, 5);
+	assertFieldStart(reader, 5, 6);
+	assertData(reader, "a", 6, 7);
+	assertFieldEnd(reader, 7, 8);
+
+	assertFieldStart(reader, 8, 9);
+	assertData(reader, "b", 9, 10);
+	assertFieldEnd(reader, 10, 11);
+
+	assertFieldStart(reader, 11, 12);
+	assertData(reader, "c", 12, 13);
+	assertFieldEnd(reader, 13, 14);
+	assertEnd(reader, 14, 14);
+}
+
+TEST(OsmlStreamParser, dataOutsideField)
+{
+	const char *testString = "\\test{a}{b} c";
+	//                         0123456789012
+	//                         0         1
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "test", 0, 5);
+	assertFieldStart(reader, 5, 6);
+	assertData(reader, "a", 6, 7);
+	assertFieldEnd(reader, 7, 8);
+
+	assertFieldStart(reader, 8, 9);
+	assertData(reader, "b", 9, 10);
+	assertFieldEnd(reader, 10, 11);
+
+	assertData(reader, "c", 12, 13);
+	assertEnd(reader, 13, 13);
+}
+
+TEST(OsmlStreamParser, nestedCommand)
+{
+	const char *testString = "\\test{a}{\\test2{b} c} d";
+	//                         012345678 90123456789012
+	//                         0          1         2
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "test", 0, 5);
+
+	assertFieldStart(reader, 5, 6);
+	assertData(reader, "a", 6, 7);
+	assertFieldEnd(reader, 7, 8);
+
+	assertFieldStart(reader, 8, 9);
+	{
+		assertCommand(reader, "test2", 9, 15);
+		assertFieldStart(reader, 15, 16);
+		assertData(reader, "b", 16, 17);
+		assertFieldEnd(reader, 17, 18);
+	}
+	assertData(reader, "c", 19, 20);
+	assertFieldEnd(reader, 20, 21);
+	assertData(reader, "d", 22, 23);
+	assertEnd(reader, 23, 23);
+}
+
+TEST(OsmlStreamParser, nestedCommandImmediateEnd)
+{
+	const char *testString = "\\test{\\test2{b}} d";
+	//                         012345 678901234567
+	//                         0          1
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "test", 0, 5);
+	assertFieldStart(reader, 5, 6);
+	{
+		assertCommand(reader, "test2", 6, 12);
+		assertFieldStart(reader, 12, 13);
+		assertData(reader, "b", 13, 14);
+		assertFieldEnd(reader, 14, 15);
+	}
+	assertFieldEnd(reader, 15, 16);
+	assertData(reader, "d", 17, 18);
+	assertEnd(reader, 18, 18);
+}
+
+TEST(OsmlStreamParser, nestedCommandNoData)
+{
+	const char *testString = "\\test{\\test2}";
+	//                         012345 6789012
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "test", 0, 5);
+	assertFieldStart(reader, 5, 6);
+	assertCommand(reader, "test2", 6, 12);
+	assertFieldEnd(reader, 12, 13);
+	assertEnd(reader, 13, 13);
+}
+
+TEST(OsmlStreamParser, multipleCommands)
+{
+	const char *testString = "\\a \\b \\c \\d";
+	//                         012 345 678 90
+	//                         0            1
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "a", 0, 2);
+	assertCommand(reader, "b", 3, 5);
+	assertCommand(reader, "c", 6, 8);
+	assertCommand(reader, "d", 9, 11);
+	assertEnd(reader, 11, 11);
+}
+
+TEST(OsmlStreamParser, fieldsWithSpaces)
+{
+	const char *testString = "\\a {\\b \\c}   \n\n {\\d}";
+	//                         0123 456 789012 3 456 789
+	//                         0           1
+	CharReader charReader(testString);
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "a", 0, 2);
+	assertFieldStart(reader, 3, 4);
+	assertCommand(reader, "b", 4, 6);
+	assertCommand(reader, "c", 7, 9);
+	assertFieldEnd(reader, 9, 10);
+	assertFieldStart(reader, 16, 17);
+	assertCommand(reader, "d", 17, 19);
+	assertFieldEnd(reader, 19, 20);
+	assertEnd(reader, 20, 20);
+}
+
+TEST(OsmlStreamParser, errorNoFieldToStart)
+{
+	const char *testString = "\\a b {";
+	//                         012345
+	//                         0
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	assertCommand(reader, "a", 0, 2);
+	assertData(reader, "b", 3, 4);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(reader, 6, 6);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorNoFieldToEnd)
+{
+	const char *testString = "\\a b }";
+	//                         012345
+	//                         0
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	assertCommand(reader, "a", 0, 2);
+	assertData(reader, "b", 3, 4);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(reader, 6, 6);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorNoFieldEndNested)
+{
+	const char *testString = "\\test{\\test2{}}}";
+	//                         012345 6789012345
+	//                         0          1
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	assertCommand(reader, "test", 0, 5);
+	assertFieldStart(reader, 5, 6);
+	assertCommand(reader, "test2", 6, 12);
+	assertFieldStart(reader, 12, 13);
+	assertFieldEnd(reader, 13, 14);
+	assertFieldEnd(reader, 14, 15);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(reader, 16, 16);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorNoFieldEndNestedData)
+{
+	const char *testString = "\\test{\\test2{}}a}";
+	//                         012345 67890123456
+	//                         0          1
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	assertCommand(reader, "test", 0, 5);
+	assertFieldStart(reader, 5, 6);
+	assertCommand(reader, "test2", 6, 12);
+	assertFieldStart(reader, 12, 13);
+	assertFieldEnd(reader, 13, 14);
+	assertFieldEnd(reader, 14, 15);
+	assertData(reader, "a", 15, 16);
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(reader, 17, 17);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, beginEnd)
+{
+	const char *testString = "\\begin{book}\\end{book}";
+	//                         012345678901 2345678901
+	//                         0         1          2
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "book", 7, 11);
+	assertFieldStart(reader, 12, 13);
+	assertFieldEnd(reader, 17, 21);
+	assertEnd(reader, 22, 22);
+}
+
+TEST(OsmlStreamParser, beginEndWithName)
+{
+	const char *testString = "\\begin{book#a}\\end{book}";
+	//                         01234567890123 4567890123
+	//                         0         1          2
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "book", {{"name", "a"}}, 7, 11);
+	assertFieldStart(reader, 14, 15);
+	assertFieldEnd(reader, 19, 23);
+	assertEnd(reader, 24, 24);
+}
+
+TEST(OsmlStreamParser, beginEndWithNameAndArgs)
+{
+	const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}";
+	//                         0123456789012345678901234 56789 01 2345678901
+	//                         0         1         2           3          4
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "book",
+	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
+	assertFieldStart(reader, 32, 33);
+	assertFieldEnd(reader, 37, 41);
+	assertEnd(reader, 42, 42);
+}
+
+TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields)
+{
+	const char *testString =
+	    "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}";
+	//    0123456789012345678901234 56789 01234 567890123 45678901 2345678901
+	//    0         1         2           3          4          5          6
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "book",
+	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
+	assertFieldStart(reader, 32, 33);
+	assertData(reader, "a", 33, 34);
+	assertCommand(reader, "test", Variant::mapType{}, 35, 40);
+	assertFieldEnd(reader, 40, 41);
+	assertFieldStart(reader, 41, 42);
+	assertData(reader, "b", 42, 43);
+	assertCommand(reader, "test", Variant::mapType{}, 44, 49);
+	assertFieldStart(reader, 49, 50);
+	assertFieldEnd(reader, 50, 51);
+	assertFieldEnd(reader, 51, 52);
+	assertFieldStart(reader, 52, 53);
+	assertFieldEnd(reader, 57, 61);
+	assertEnd(reader, 62, 62);
+}
+
+TEST(OsmlStreamParser, beginEndWithData)
+{
+	const char *testString = "\\begin{book}a\\end{book}";
+	//                         0123456789012 3456789012
+	//                         0         1          2
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "book", 7, 11);
+	assertFieldStart(reader, 12, 13);
+	assertData(reader, "a", 12, 13);
+	assertFieldEnd(reader, 18, 22);
+	assertEnd(reader, 23, 23);
+}
+
+TEST(OsmlStreamParser, beginEndWithCommand)
+{
+	const char *testString = "\\begin{book}\\a{test}\\end{book}";
+	//                         012345678901 23456789 0123456789
+	//                         0         1           2
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "book", 7, 11);
+	assertFieldStart(reader, 12, 13);
+	assertCommand(reader, "a", 12, 14);
+	assertFieldStart(reader, 14, 15);
+	assertData(reader, "test", 15, 19);
+	assertFieldEnd(reader, 19, 20);
+	assertFieldEnd(reader, 25, 29);
+	assertEnd(reader, 30, 30);
+}
+
+TEST(OsmlStreamParser, errorBeginNoBraceOpen)
+{
+	const char *testString = "\\begin a";
+	//                         01234567
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertData(reader, "a", 7, 8);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorBeginNoIdentifier)
+{
+	const char *testString = "\\begin{!";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(reader.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorBeginNoBraceClose)
+{
+	const char *testString = "\\begin{a";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(reader.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorBeginNoName)
+{
+	const char *testString = "\\begin{a#}";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommand(reader, "a");
+	ASSERT_TRUE(logger.hasError());
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertEnd(reader);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorEndNoBraceOpen)
+{
+	const char *testString = "\\end a";
+	//                         012345
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertData(reader, "a", 5, 6);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorEndNoIdentifier)
+{
+	const char *testString = "\\end{!";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(reader.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorEndNoBraceClose)
+{
+	const char *testString = "\\end{a";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(reader.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorEndNoBegin)
+{
+	const char *testString = "\\end{a}";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(reader.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, errorBeginEndMismatch)
+{
+	const char *testString = "\\begin{a} \\begin{b} test \\end{a}";
+	//                         0123456789 012345678901234 5678901
+	//                         0          1         2          3
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	assertCommand(reader, "a", 7, 8);
+	assertFieldStart(reader, 10, 11);
+	assertCommand(reader, "b", 17, 18);
+	assertFieldStart(reader, 20, 24);
+	assertData(reader, "test", 20, 24);
+	ASSERT_FALSE(logger.hasError());
+	ASSERT_THROW(reader.parse(), LoggableException);
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(OsmlStreamParser, commandWithNSSep)
+{
+	const char *testString = "\\test1:test2";
+	//                         012345678901
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "test1:test2", 0, 12);
+	assertEnd(reader, 12, 12);
+}
+
+TEST(OsmlStreamParser, beginEndWithNSSep)
+{
+	const char *testString = "\\begin{test1:test2}\\end{test1:test2}";
+	//                         0123456789012345678 90123456789012345
+	//                         0         1          2         3
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "test1:test2", 7, 18);
+	assertFieldStart(reader, 19, 20);
+	assertFieldEnd(reader, 24, 35);
+	assertEnd(reader, 36, 36);
+}
+
+TEST(OsmlStreamParser, errorBeginNSSep)
+{
+	const char *testString = "\\begin:test{blub}\\end{blub}";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommand(reader, "blub");
+	ASSERT_TRUE(logger.hasError());
+	assertFieldStart(reader);
+	assertFieldEnd(reader);
+	assertEnd(reader);
+}
+
+TEST(OsmlStreamParser, errorEndNSSep)
+{
+	const char *testString = "\\begin{blub}\\end:test{blub}";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	assertCommand(reader, "blub");
+	assertFieldStart(reader);
+	ASSERT_FALSE(logger.hasError());
+	assertFieldEnd(reader);
+	ASSERT_TRUE(logger.hasError());
+	assertEnd(reader);
+}
+
+TEST(OsmlStreamParser, errorEmptyNs)
+{
+	const char *testString = "\\test:";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommand(reader, "test");
+	ASSERT_TRUE(logger.hasError());
+	assertData(reader, ":");
+	assertEnd(reader);
+}
+
+TEST(OsmlStreamParser, errorRepeatedNs)
+{
+	const char *testString = "\\test::";
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommand(reader, "test");
+	ASSERT_TRUE(logger.hasError());
+	assertData(reader, "::");
+	assertEnd(reader);
+}
+}
+
diff --git a/test/formats/osxml/OsxmlParserTest.cpp b/test/formats/osxml/OsxmlParserTest.cpp
new file mode 100644
index 0000000..c0fb50d
--- /dev/null
+++ b/test/formats/osxml/OsxmlParserTest.cpp
@@ -0,0 +1,314 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+#include <core/common/CharReader.hpp>
+#include <core/common/SourceContextReader.hpp>
+#include <core/model/Domain.hpp>
+#include <core/model/Node.hpp>
+#include <core/model/Project.hpp>
+#include <core/frontend/TerminalLogger.hpp>
+#include <core/StandaloneEnvironment.hpp>
+
+#include <plugins/filesystem/FileLocator.hpp>
+#include <formats/osdmx/OsdmxParser.hpp>
+
+namespace ousia {
+
+namespace RttiTypes {
+extern const Rtti Document;
+extern const Rtti Domain;
+extern const Rtti Typesystem;
+}
+
+struct XmlStandaloneEnvironment : public StandaloneEnvironment {
+	XmlParser xmlParser;
+	FileLocator fileLocator;
+
+	XmlStandaloneEnvironment(ConcreteLogger &logger)
+	    : StandaloneEnvironment(logger)
+	{
+		fileLocator.addDefaultSearchPaths();
+		fileLocator.addUnittestSearchPath("xmlparser");
+
+		registry.registerDefaultExtensions();
+		registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"},
+		                        {&RttiTypes::Node}, &xmlParser);
+		registry.registerResourceLocator(&fileLocator);
+	}
+};
+
+static TerminalLogger logger(std::cerr, true);
+
+TEST(XmlParser, mismatchedTag)
+{
+	XmlStandaloneEnvironment env(logger);
+	env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document});
+	ASSERT_TRUE(logger.hasError());
+}
+
+TEST(XmlParser, generic)
+{
+	XmlStandaloneEnvironment env(logger);
+	env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node});
+#ifdef MANAGER_GRAPHVIZ_EXPORT
+	env.manager.exportGraphviz("xmlDocument.dot");
+#endif
+}
+
+static void checkAttributes(Handle<StructType> expected,
+                            Handle<Descriptor> desc)
+{
+	if (expected == nullptr) {
+		ASSERT_TRUE(desc->getAttributesDescriptor()->getAttributes().empty());
+	} else {
+		ASSERT_EQ(expected->getName(),
+		          desc->getAttributesDescriptor()->getName());
+		auto &attrs_exp = expected->getAttributes();
+		auto &attrs = desc->getAttributesDescriptor()->getAttributes();
+		ASSERT_EQ(attrs_exp.size(), attrs.size());
+		for (size_t i = 0; i < attrs_exp.size(); i++) {
+			ASSERT_EQ(attrs_exp[i]->getName(), attrs[i]->getName());
+			ASSERT_EQ(attrs_exp[i]->getType(), attrs[i]->getType());
+			ASSERT_EQ(attrs_exp[i]->isOptional(), attrs[i]->isOptional());
+			ASSERT_EQ(attrs_exp[i]->getDefaultValue(),
+			          attrs[i]->getDefaultValue());
+		}
+	}
+}
+
+static void checkStructuredClass(
+    Handle<Node> n, const std::string &name, Handle<Domain> domain,
+    Variant cardinality = Cardinality::any(),
+    Handle<StructType> attributesDescriptor = nullptr,
+    Handle<StructuredClass> superclass = nullptr, bool transparent = false,
+    bool root = false)
+{
+	ASSERT_FALSE(n == nullptr);
+	Handle<StructuredClass> sc = n.cast<StructuredClass>();
+	ASSERT_FALSE(sc == nullptr);
+	ASSERT_EQ(name, sc->getName());
+	ASSERT_EQ(domain, sc->getParent());
+	ASSERT_EQ(cardinality, sc->getCardinality());
+	ASSERT_EQ(transparent, sc->isTransparent());
+	ASSERT_EQ(root, sc->hasRootPermission());
+	checkAttributes(attributesDescriptor, sc);
+}
+
+static Rooted<StructuredClass> checkStructuredClass(
+    const std::string &resolve, const std::string &name, Handle<Domain> domain,
+    Variant cardinality = Cardinality::any(),
+    Handle<StructType> attributesDescriptor = nullptr,
+    Handle<StructuredClass> superclass = nullptr, bool transparent = false,
+    bool root = false)
+{
+	auto res = domain->resolve(&RttiTypes::StructuredClass, resolve);
+	if (res.size() != 1) {
+		throw OusiaException("resolution error!");
+	}
+	Handle<StructuredClass> sc = res[0].node.cast<StructuredClass>();
+	checkStructuredClass(sc, name, domain, cardinality, attributesDescriptor,
+	                     superclass, transparent, root);
+	return sc;
+}
+
+static void checkAnnotationClass(
+    Handle<Node> n, const std::string &name, Handle<Domain> domain,
+    Handle<StructType> attributesDescriptor = nullptr)
+{
+	ASSERT_FALSE(n == nullptr);
+	Handle<AnnotationClass> ac = n.cast<AnnotationClass>();
+	ASSERT_FALSE(ac == nullptr);
+	ASSERT_EQ(name, ac->getName());
+	ASSERT_EQ(domain, ac->getParent());
+	checkAttributes(attributesDescriptor, ac);
+}
+
+static Rooted<AnnotationClass> checkAnnotationClass(
+    const std::string &resolve, const std::string &name, Handle<Domain> domain,
+    Handle<StructType> attributesDescriptor = nullptr)
+{
+	auto res = domain->resolve(&RttiTypes::AnnotationClass, resolve);
+	if (res.size() != 1) {
+		throw OusiaException("resolution error!");
+	}
+	Handle<AnnotationClass> ac = res[0].node.cast<AnnotationClass>();
+	checkAnnotationClass(ac, name, domain, attributesDescriptor);
+	return ac;
+}
+
+static void checkFieldDescriptor(
+    Handle<Node> n, const std::string &name, Handle<Descriptor> parent,
+    NodeVector<StructuredClass> children,
+    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
+    Handle<Type> primitiveType = nullptr, bool optional = false)
+{
+	ASSERT_FALSE(n == nullptr);
+	Handle<FieldDescriptor> field = n.cast<FieldDescriptor>();
+	ASSERT_FALSE(field.isNull());
+	ASSERT_EQ(name, field->getName());
+	ASSERT_EQ(parent, field->getParent());
+	ASSERT_EQ(type, field->getFieldType());
+	ASSERT_EQ(primitiveType, field->getPrimitiveType());
+	ASSERT_EQ(optional, field->isOptional());
+	// check the children.
+	ASSERT_EQ(children.size(), field->getChildren().size());
+	for (unsigned int c = 0; c < children.size(); c++) {
+		ASSERT_EQ(children[c], field->getChildren()[c]);
+	}
+}
+
+static void checkFieldDescriptor(
+    Handle<Descriptor> desc, Handle<Descriptor> parent,
+    NodeVector<StructuredClass> children,
+    const std::string &name = DEFAULT_FIELD_NAME,
+    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
+    Handle<Type> primitiveType = nullptr, bool optional = false)
+{
+	auto res = desc->resolve(&RttiTypes::FieldDescriptor, name);
+	ASSERT_EQ(1, res.size());
+	checkFieldDescriptor(res[0].node, name, parent, children, type,
+	                     primitiveType, optional);
+}
+
+static void checkFieldDescriptor(
+    Handle<Descriptor> desc, NodeVector<StructuredClass> children,
+    const std::string &name = DEFAULT_FIELD_NAME,
+    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
+    Handle<Type> primitiveType = nullptr, bool optional = false)
+{
+	checkFieldDescriptor(desc, desc, children, name, type, primitiveType,
+	                     optional);
+}
+
+TEST(XmlParser, domainParsing)
+{
+	XmlStandaloneEnvironment env(logger);
+	Rooted<Node> book_domain_node =
+	    env.parse("book_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
+	ASSERT_FALSE(book_domain_node == nullptr);
+	ASSERT_FALSE(logger.hasError());
+	// check the domain node.
+	Rooted<Domain> book_domain = book_domain_node.cast<Domain>();
+	ASSERT_EQ("book", book_domain->getName());
+	// get the book struct node.
+	Cardinality single;
+	single.merge({1});
+	Rooted<StructType> bookAuthor{
+	    new StructType(book_domain->getManager(), "", nullptr)};
+	bookAuthor->addAttribute(
+	    {new Attribute(book_domain->getManager(), "author",
+	                   env.project->getSystemTypesystem()->getStringType(),
+	                   "")},
+	    logger);
+	Rooted<StructuredClass> book = checkStructuredClass(
+	    "book", "book", book_domain, single, bookAuthor, nullptr, false, true);
+	// get the chapter struct node.
+	Rooted<StructuredClass> chapter =
+	    checkStructuredClass("chapter", "chapter", book_domain);
+	Rooted<StructuredClass> section =
+	    checkStructuredClass("section", "section", book_domain);
+	Rooted<StructuredClass> subsection =
+	    checkStructuredClass("subsection", "subsection", book_domain);
+	Rooted<StructuredClass> paragraph =
+	    checkStructuredClass("paragraph", "paragraph", book_domain,
+	                         Cardinality::any(), nullptr, nullptr, true, false);
+	Rooted<StructuredClass> text =
+	    checkStructuredClass("text", "text", book_domain, Cardinality::any(),
+	                         nullptr, nullptr, true, false);
+
+	// check the FieldDescriptors.
+	checkFieldDescriptor(book, {chapter, paragraph});
+	checkFieldDescriptor(chapter, {section, paragraph});
+	checkFieldDescriptor(section, {subsection, paragraph});
+	checkFieldDescriptor(subsection, {paragraph});
+	checkFieldDescriptor(paragraph, {text});
+	checkFieldDescriptor(
+	    text, {}, DEFAULT_FIELD_NAME, FieldDescriptor::FieldType::PRIMITIVE,
+	    env.project->getSystemTypesystem()->getStringType(), false);
+
+	// check parent handling using the headings domain.
+	Rooted<Node> headings_domain_node =
+	    env.parse("headings_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
+	ASSERT_FALSE(headings_domain_node == nullptr);
+	ASSERT_FALSE(logger.hasError());
+	Rooted<Domain> headings_domain = headings_domain_node.cast<Domain>();
+	// now there should be a heading struct.
+	Rooted<StructuredClass> heading =
+	    checkStructuredClass("heading", "heading", headings_domain, single,
+	                         nullptr, nullptr, true, false);
+	// which should be a reference to the paragraph descriptor.
+	checkFieldDescriptor(heading, paragraph, {text});
+	// and each struct in the book domain (except for text) should have a
+	// heading field now.
+	checkFieldDescriptor(book, {heading}, "heading",
+	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
+	checkFieldDescriptor(chapter, {heading}, "heading",
+	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
+	checkFieldDescriptor(section, {heading}, "heading",
+	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
+	checkFieldDescriptor(subsection, {heading}, "heading",
+	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
+	checkFieldDescriptor(paragraph, {heading}, "heading",
+	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
+
+	// check annotation handling using the comments domain.
+	Rooted<Node> comments_domain_node =
+	    env.parse("comments_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
+	ASSERT_FALSE(comments_domain_node == nullptr);
+	ASSERT_FALSE(logger.hasError());
+	Rooted<Domain> comments_domain = comments_domain_node.cast<Domain>();
+	// now we should be able to find a comment annotation.
+	Rooted<AnnotationClass> comment_anno =
+	    checkAnnotationClass("comment", "comment", comments_domain);
+	// as well as a comment struct
+	Rooted<StructuredClass> comment =
+	    checkStructuredClass("comment", "comment", comments_domain);
+	// and a reply struct
+	Rooted<StructuredClass> reply =
+	    checkStructuredClass("reply", "reply", comments_domain);
+	// check the fields for each of them.
+	{
+		std::vector<Rooted<Descriptor>> descs{comment_anno, comment, reply};
+		for (auto &d : descs) {
+			checkFieldDescriptor(d, {paragraph}, "content",
+			                     FieldDescriptor::FieldType::SUBTREE, nullptr,
+			                     false);
+			checkFieldDescriptor(d, {reply}, "replies",
+			                     FieldDescriptor::FieldType::SUBTREE, nullptr,
+			                     false);
+		}
+	}
+	// paragraph should have comment as child now as well.
+	checkFieldDescriptor(paragraph, {text, comment});
+	// as should heading, because it references the paragraph default field.
+	checkFieldDescriptor(heading, paragraph, {text, comment});
+}
+
+TEST(XmlParser, documentParsing)
+{
+	XmlStandaloneEnvironment env(logger);
+	Rooted<Node> book_domain_node =
+	    env.parse("simple_book.oxd", "", "", RttiSet{&RttiTypes::Document});
+	//TODO: Check result
+}
+}
+
-- 
cgit v1.2.3


From 2659b4595d809cbd69a77e5ff7e2fc08d225f065 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:02:54 +0100
Subject: Tidied OsxmlEventParser up, implemented correct whitespace handling,
 started to write unit tests for the osxml parser

---
 CMakeLists.txt                              |  93 +++---
 src/core/common/Utils.hpp                   |  21 +-
 src/core/common/WhitespaceHandler.hpp       |  60 ++++
 src/formats/osxml/OsxmlAttributeLocator.cpp | 144 ++++++++++
 src/formats/osxml/OsxmlAttributeLocator.hpp |  67 +++++
 src/formats/osxml/OsxmlEventParser.cpp      | 425 +++++++++++++++-------------
 src/formats/osxml/OsxmlEventParser.hpp      |  44 +--
 test/formats/osml/OsmlStreamParserTest.cpp  |   1 +
 test/formats/osxml/OsxmlEventParserTest.cpp | 222 +++++++++++++++
 9 files changed, 811 insertions(+), 266 deletions(-)
 create mode 100644 src/formats/osxml/OsxmlAttributeLocator.cpp
 create mode 100644 src/formats/osxml/OsxmlAttributeLocator.hpp
 create mode 100644 test/formats/osxml/OsxmlEventParserTest.cpp

(limited to 'test')

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6e3b90f..bdc9541 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -147,9 +147,9 @@ ADD_LIBRARY(ousia_core
 	src/core/model/RootNode
 	src/core/model/Style
 	src/core/model/Typesystem
-#	src/core/parser/Parser
-#	src/core/parser/ParserContext
-#	src/core/parser/ParserScope
+	src/core/parser/Parser
+	src/core/parser/ParserContext
+	src/core/parser/ParserScope
 #	src/core/parser/generic/ParserState
 #	src/core/parser/generic/ParserStateCallbacks
 #	src/core/parser/generic/ParserStateHandler
@@ -183,36 +183,37 @@ TARGET_LINK_LIBRARIES(ousia_osml
 	ousia_core
 )
 
-#ADD_LIBRARY(ousia_osxml
-#	src/formats/osxml/osxmlParser
-#)
+ADD_LIBRARY(ousia_osxml
+	src/formats/osxml/OsxmlAttributeLocator
+	src/formats/osxml/OsxmlEventParser
+)
 
-#TARGET_LINK_LIBRARIES(ousia_osxml
-#	ousia_core
-#	${EXPAT_LIBRARIES}
-#)
+TARGET_LINK_LIBRARIES(ousia_osxml
+	ousia_core
+	${EXPAT_LIBRARIES}
+)
 
 # Resource locators
 
-#ADD_LIBRARY(ousia_filesystem
-#	src/plugins/filesystem/FileLocator
-#	src/plugins/filesystem/SpecialPaths
-#)
+ADD_LIBRARY(ousia_filesystem
+	src/plugins/filesystem/FileLocator
+	src/plugins/filesystem/SpecialPaths
+)
 
-#TARGET_LINK_LIBRARIES(ousia_filesystem
-#	ousia_core
-#	${Boost_LIBRARIES}
-#)
+TARGET_LINK_LIBRARIES(ousia_filesystem
+	ousia_core
+	${Boost_LIBRARIES}
+)
 
 # Output libraries
 
-#ADD_LIBRARY(ousia_html
-#	src/plugins/html/DemoOutput
-#)
+ADD_LIBRARY(ousia_html
+	src/plugins/html/DemoOutput
+)
 
-#TARGET_LINK_LIBRARIES(ousia_html
-#	ousia_core
-#)
+TARGET_LINK_LIBRARIES(ousia_html
+	ousia_core
+)
 
 #ADD_LIBRARY(ousia_mozjs
 #	src/plugins/mozjs/MozJsScriptEngine
@@ -247,7 +248,7 @@ IF(TEST)
 
 	ADD_EXECUTABLE(ousia_test_core
 		test/core/RangeSetTest
-#		test/core/RegistryTest
+		test/core/RegistryTest
 		test/core/XMLTest
 		test/core/common/ArgumentTest
 		test/core/common/CharReaderTest
@@ -272,7 +273,7 @@ IF(TEST)
 		test/core/model/NodeTest
 		test/core/model/StyleTest
 		test/core/model/TypesystemTest
-#		test/core/parser/ParserScopeTest
+		test/core/parser/ParserScopeTest
 #		test/core/parser/ParserStackTest
 #		test/core/parser/ParserStateTest
 		test/core/parser/utils/TokenizerTest
@@ -311,15 +312,15 @@ IF(TEST)
 #		ousia_css
 #	)
 
-#	ADD_EXECUTABLE(ousia_test_html
-#		test/plugins/html/DemoOutputTest
-#	)
+	ADD_EXECUTABLE(ousia_test_html
+		test/plugins/html/DemoOutputTest
+	)
 
-#	TARGET_LINK_LIBRARIES(ousia_test_html
-#		${GTEST_LIBRARIES}
-#		ousia_core
-#		ousia_html
-#	)
+	TARGET_LINK_LIBRARIES(ousia_test_html
+		${GTEST_LIBRARIES}
+		ousia_core
+		ousia_html
+	)
 
 	ADD_EXECUTABLE(ousia_test_osml
 		test/formats/osml/OsmlStreamParserTest
@@ -331,16 +332,16 @@ IF(TEST)
 		ousia_osml
 	)
 
-#	ADD_EXECUTABLE(ousia_test_osxml
-#		test/plugins/xml/XmlParserTest
-#	)
+	ADD_EXECUTABLE(ousia_test_osxml
+		test/formats/osxml/OsxmlEventParserTest
+	)
 
-#	TARGET_LINK_LIBRARIES(ousia_test_osxml
-#		${GTEST_LIBRARIES}
-#		ousia_core
-#		ousia_osml
-#		ousia_filesystem
-#	)
+	TARGET_LINK_LIBRARIES(ousia_test_osxml
+		${GTEST_LIBRARIES}
+		ousia_core
+		ousia_osxml
+		ousia_filesystem
+	)
 
 #	ADD_EXECUTABLE(ousia_test_mozjs
 #		test/plugins/mozjs/MozJsScriptEngineTest
@@ -354,11 +355,11 @@ IF(TEST)
 
 	# Register the unit tests
 	ADD_TEST(ousia_test_core ousia_test_core)
-#	ADD_TEST(ousia_test_filesystem ousia_test_filesystem)
+	ADD_TEST(ousia_test_filesystem ousia_test_filesystem)
 #	ADD_TEST(ousia_test_css ousia_test_css)
-#	ADD_TEST(ousia_test_html ousia_test_html)
+	ADD_TEST(ousia_test_html ousia_test_html)
 	ADD_TEST(ousia_test_osml ousia_test_osml)
-#	ADD_TEST(ousia_test_osxml ousia_test_osxml)
+	ADD_TEST(ousia_test_osxml ousia_test_osxml)
 #	ADD_TEST(ousia_test_mozjs ousia_test_mozjs)
 ENDIF()
 
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 16a9136..8361973 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -119,9 +119,26 @@ public:
 	 */
 	template <class T, class Filter>
 	static std::pair<size_t, size_t> trim(const T &s, Filter f)
+	{
+		return trim(s, s.size(), f);
+	}
+
+	/**
+	 * Trims the given string or vector of chars by returning the start and end
+	 * index.
+	 *
+	 * @param s is the container that should be trimmed.
+	 * @param len is the number of elements in the container.
+	 * @param f is a function that returns true for values that should be
+	 * removed.
+	 * @return start and end index. Note that "end" points at the character
+	 * beyond the end, thus "end" minus "start"
+	 */
+	template <class T, class Filter>
+	static std::pair<size_t, size_t> trim(const T &s, size_t len, Filter f)
 	{
 		size_t start = 0;
-		for (size_t i = 0; i < s.size(); i++) {
+		for (size_t i = 0; i < len; i++) {
 			if (!f(s[i])) {
 				start = i;
 				break;
@@ -129,7 +146,7 @@ public:
 		}
 
 		size_t end = 0;
-		for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) {
+		for (ssize_t i = len - 1; i >= static_cast<ssize_t>(start); i--) {
 			if (!f(s[i])) {
 				end = i + 1;
 				break;
diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp
index 79e0518..ed52ea3 100644
--- a/src/core/common/WhitespaceHandler.hpp
+++ b/src/core/common/WhitespaceHandler.hpp
@@ -97,6 +97,25 @@ public:
 	 * @param end is the end byte offset of the given character.
 	 */
 	void append(char c, size_t start, size_t end)
+	{
+		append(c, start, end, textBuf, textStart, textEnd);
+	}
+
+	/**
+	 * Static version of PreservingWhitespaceHandler append
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 * @param textBuf is a reference at the text buffer that is to be used.
+	 * @param textStart is a reference at the text start variable that is to be
+	 * used.
+	 * @param textEnd is a reference at the text end variable that is to be
+	 * used.
+	 */
+	static void append(char c, size_t start, size_t end,
+	                   std::vector<char> &textBuf, size_t &textStart,
+	                   size_t &textEnd)
 	{
 		if (textBuf.empty()) {
 			textStart = start;
@@ -129,6 +148,27 @@ public:
 	 * @param end is the end byte offset of the given character.
 	 */
 	void append(char c, size_t start, size_t end)
+	{
+		append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf);
+	}
+
+	/**
+	 * Static version of TrimmingWhitespaceHandler append
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 * @param textBuf is a reference at the text buffer that is to be used.
+	 * @param textStart is a reference at the text start variable that is to be
+	 * used.
+	 * @param textEnd is a reference at the text end variable that is to be
+	 * used.
+	 * @param whitespaceBuf is a reference at the buffer for storing whitespace
+	 * characters.
+	 */
+	static void append(char c, size_t start, size_t end,
+	                   std::vector<char> &textBuf, size_t &textStart,
+	                   size_t &textEnd, std::vector<char> &whitespaceBuf)
 	{
 		// Handle whitespace characters
 		if (Utils::isWhitespace(c)) {
@@ -174,6 +214,26 @@ public:
 	 * @param end is the end byte offset of the given character.
 	 */
 	void append(char c, size_t start, size_t end)
+	{
+		append(c, start, end, textBuf, textStart, textEnd, hasWhitespace);
+	}
+
+	/**
+	 * Static version of CollapsingWhitespaceHandler append
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 * @param textBuf is a reference at the text buffer that is to be used.
+	 * @param textStart is a reference at the text start variable that is to be
+	 * used.
+	 * @param textEnd is a reference at the text end variable that is to be
+	 * used.
+	 * @param hasWhitespace is a reference at the "hasWhitespace" flag.
+	 */
+	static void append(char c, size_t start, size_t end,
+	                   std::vector<char> &textBuf, size_t &textStart,
+	                   size_t &textEnd, bool &hasWhitespace)
 	{
 		// Handle whitespace characters
 		if (Utils::isWhitespace(c)) {
diff --git a/src/formats/osxml/OsxmlAttributeLocator.cpp b/src/formats/osxml/OsxmlAttributeLocator.cpp
new file mode 100644
index 0000000..e37446a
--- /dev/null
+++ b/src/formats/osxml/OsxmlAttributeLocator.cpp
@@ -0,0 +1,144 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <core/common/Location.hpp>
+#include <core/common/CharReader.hpp>
+#include <core/common/Utils.hpp>
+
+#include "OsxmlAttributeLocator.hpp"
+
+namespace ousia {
+
+/**
+ * Enum used internally in the statemachine of the xml argument parser.
+ */
+enum class XmlAttributeState {
+	IN_TAG_NAME,
+	SEARCH_ATTR,
+	IN_ATTR_NAME,
+	HAS_ATTR_NAME,
+	HAS_ATTR_EQUALS,
+	IN_ATTR_DATA
+};
+
+std::map<std::string, SourceLocation> OsxmlAttributeLocator::locate(
+    CharReader &reader, size_t offs)
+{
+	std::map<std::string, SourceLocation> res;
+
+	// Fork the reader, we don't want to mess up the XML parsing process, do we?
+	CharReaderFork readerFork = reader.fork();
+
+	// Move the read cursor to the start location, abort if this does not work
+	if (offs != readerFork.seek(offs)) {
+		return res;
+	}
+
+	// Now all we need to do is to implement one half of an XML parser. As this
+	// is inherently complicated we'll totaly fail at it. Don't care. All we
+	// want to get is those darn offsets for pretty error messages... (and we
+	// can assume the XML is valid as it was already read by expat)
+	XmlAttributeState state = XmlAttributeState::IN_TAG_NAME;
+	char c;
+	std::stringstream attrName;
+	while (readerFork.read(c)) {
+		// Abort at the end of the tag
+		if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) {
+			return res;
+		}
+
+		// One state machine to rule them all, one state machine to find them,
+		// One state machine to bring them all and in the darkness bind them
+		// (the byte offsets)
+		switch (state) {
+			case XmlAttributeState::IN_TAG_NAME:
+				if (Utils::isWhitespace(c)) {
+					res.emplace("$tag",
+					            SourceLocation{reader.getSourceId(), offs + 1,
+					                           readerFork.getOffset() - 1});
+					state = XmlAttributeState::SEARCH_ATTR;
+				}
+				break;
+			case XmlAttributeState::SEARCH_ATTR:
+				if (!Utils::isWhitespace(c)) {
+					state = XmlAttributeState::IN_ATTR_NAME;
+					attrName << c;
+				}
+				break;
+			case XmlAttributeState::IN_ATTR_NAME:
+				if (Utils::isWhitespace(c)) {
+					state = XmlAttributeState::HAS_ATTR_NAME;
+				} else if (c == '=') {
+					state = XmlAttributeState::HAS_ATTR_EQUALS;
+				} else {
+					attrName << c;
+				}
+				break;
+			case XmlAttributeState::HAS_ATTR_NAME:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '=') {
+						state = XmlAttributeState::HAS_ATTR_EQUALS;
+						break;
+					}
+					// Well, this is a strange XML file... We expected to
+					// see a '=' here! Try to continue with the
+					// "HAS_ATTR_EQUALS" state as this state will hopefully
+					// inlcude some error recovery
+				} else {
+					// Skip whitespace here
+					break;
+				}
+			// Fallthrough
+			case XmlAttributeState::HAS_ATTR_EQUALS:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '"') {
+						// Here we are! We have found the beginning of an
+						// attribute. Let's quickly lock the current offset away
+						// in the result map
+						res.emplace(attrName.str(),
+						            SourceLocation{reader.getSourceId(),
+						                           readerFork.getOffset()});
+						state = XmlAttributeState::IN_ATTR_DATA;
+					} else {
+						// No, this XML file is not well formed. Assume we're in
+						// an attribute name once again
+						attrName.str(std::string{&c, 1});
+						state = XmlAttributeState::IN_ATTR_NAME;
+					}
+				}
+				break;
+			case XmlAttributeState::IN_ATTR_DATA:
+				if (c == '"') {
+					// We're at the end of the attribute data, set the end
+					// location
+					auto it = res.find(attrName.str());
+					if (it != res.end()) {
+						it->second.setEnd(readerFork.getOffset() - 1);
+					}
+
+					// Reset the attribute name and restart the search
+					attrName.str(std::string{});
+					state = XmlAttributeState::SEARCH_ATTR;
+				}
+				break;
+		}
+	}
+	return res;
+}
+}
+
diff --git a/src/formats/osxml/OsxmlAttributeLocator.hpp b/src/formats/osxml/OsxmlAttributeLocator.hpp
new file mode 100644
index 0000000..f9a3437
--- /dev/null
+++ b/src/formats/osxml/OsxmlAttributeLocator.hpp
@@ -0,0 +1,67 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file OsxmlAttributeLocator.hpp
+ *
+ * Contains a class used for locating the byte offsets of the attributes given
+ * in a XML tag.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_
+#define _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_
+
+#include <map>
+
+namespace ousia {
+
+// Forward declarations
+class CharReader;
+class SourceLocation;
+
+/**
+ * Class containing one static function for locating the byte offsets of the
+ * attributes in a XML tag. This are not retrieved by our xml parser, so we have
+ * to do this manually.
+ */
+class OsxmlAttributeLocator {
+public:
+	/**
+	 * Function used to reconstruct the location of the attributes of a XML tag
+	 * in the source code. This is necessary, as the xml parser only returns an
+	 * offset to the begining of a tag and not to the position of the individual
+	 * arguments.
+	 *
+	 * @param reader is the char reader from which the character data should be
+	 * read.
+	 * @param offs is a byte offset in the xml file pointing at the "<"
+	 * character of the tag.
+	 * @return a map from attribute keys to the corresponding location
+	 * (including range) of the atribute. Also contains the location of the
+	 * tagname in the form of the virtual attribute "$tag".
+	 */
+	static std::map<std::string, SourceLocation> locate(CharReader &reader,
+	                                                    size_t offs);
+};
+
+}
+
+#endif /* _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ */
+
diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
index 2ef170e..b4aff77 100644
--- a/src/formats/osxml/OsxmlEventParser.cpp
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -18,14 +18,22 @@
 
 #include <expat.h>
 
+#include <vector>
+
+#include <core/common/CharReader.hpp>
 #include <core/common/Logger.hpp>
 #include <core/common/Variant.hpp>
+#include <core/common/VariantReader.hpp>
 #include <core/common/Utils.hpp>
+#include <core/common/WhitespaceHandler.hpp>
 
+#include "OsxmlAttributeLocator.hpp"
 #include "OsxmlEventParser.hpp"
 
 namespace ousia {
 
+/* Class OsxmlEventParser */
+
 /**
  * Class containing data used by the internal functions.
  */
@@ -43,41 +51,75 @@ public:
 	 */
 	ssize_t annotationEndTagDepth;
 
+	/**
+	 * Current character data buffer.
+	 */
+	std::vector<char> textBuf;
+
+	/**
+	 * Current whitespace buffer (for the trimming whitspace mode)
+	 */
+	std::vector<char> whitespaceBuf;
+
+	/**
+	 * Flag indicating whether a whitespace character was present (for the
+	 * collapsing whitespace mode).
+	 */
+	bool hasWhitespace;
+
+	/**
+	 * Current character data start.
+	 */
+	size_t textStart;
+
+	/**
+	 * Current character data end.
+	 */
+	size_t textEnd;
+
 	/**
 	 * Default constructor.
 	 */
-	OsxmlEventParserData() : depth(0), annotationEndTagDepth(-1) {}
+	OsxmlEventParserData();
 
 	/**
 	 * Increments the depth.
 	 */
-	void incrDepth() { depth++; }
+	void incrDepth();
 
 	/**
 	 * Decrement the depth and reset the annotationEndTagDepth flag.
 	 */
-	void decrDepth()
-	{
-		if (depth > 0) {
-			depth--;
-		}
-		if (depth < annotationEndTagDepth) {
-			annotationEndTagDepth = -1;
-		}
-	}
+	void decrDepth();
 
 	/**
 	 * Returns true if we're currently inside an end tag.
 	 */
-	bool inAnnotationEndTag() { depth >= annotationEndTagDepth; }
+	bool inAnnotationEndTag();
+
+	/**
+	 * Returns true if character data is available.
+	 *
+	 * @return true if character data is available.
+	 */
+	bool hasText();
+
+	/**
+	 * Returns a Variant containing the character data and its location.
+	 *
+	 * @return a string variant containing the text data and the character
+	 * location.
+	 */
+	Variant getText(SourceId sourceId);
 };
 
-namespace {
+/* Class GuardedExpatXmlParser */
+
 /**
  * Wrapper class around the XML_Parser pointer which safely frees it whenever
  * the scope is left (e.g. because an exception was thrown).
  */
-class ScopedExpatXmlParser {
+class GuardedExpatXmlParser {
 private:
 	/**
 	 * Internal pointer to the XML_Parser instance.
@@ -86,14 +128,14 @@ private:
 
 public:
 	/**
-	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS
+	 * Constructor of the GuardedExpatXmlParser class. Calls XML_ParserCreateNS
 	 * from the expat library. Throws a parser exception if the XML parser
 	 * cannot be initialized.
 	 *
 	 * @param encoding is the protocol-defined encoding passed to expat (or
 	 * nullptr if expat should determine the encoding by itself).
 	 */
-	ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
+	GuardedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
 	{
 		parser = XML_ParserCreate(encoding);
 		if (!parser) {
@@ -103,9 +145,9 @@ public:
 	}
 
 	/**
-	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance.
+	 * Destuctor of the GuardedExpatXmlParser, frees the XML parser instance.
 	 */
-	~ScopedExpatXmlParser()
+	~GuardedExpatXmlParser()
 	{
 		if (parser) {
 			XML_ParserFree(parser);
@@ -120,134 +162,20 @@ public:
 };
 
 /**
- * Enum used internally in the statemachine of the micro-xml argument parser.
+ * Name of the special outer tag used for allowing multiple top-level elements
+ * in an xml file.
  */
-enum class XmlAttributeState {
-	IN_TAG_NAME,
-	SEARCH_ATTR,
-	IN_ATTR_NAME,
-	HAS_ATTR_NAME,
-	HAS_ATTR_EQUALS,
-	IN_ATTR_DATA
-};
+static const std::string TOP_LEVEL_TAG{"ousia"};
 
 /**
- * Function used to reconstruct the location of the attributes of a XML tag in
- * the source code. This is necessary, as the xml parser only returns an offset
- * to the begining of a tag and not to the position of the individual arguments.
- *
- * @param reader is the char reader from which the character data should be
- * read.
- * @param offs is a byte offset in the xml file pointing at the "<" character of
- * the tag.
- * @return a map from attribute keys to the corresponding location (including
- * range) of the atribute. Also contains the location of the tagname in the
- * form of the virtual attribute "$tag".
+ * Prefix used to indicate the start of an annoation (note the trailing colon)
  */
-static std::map<std::string, SourceLocation> xmlReconstructAttributeOffsets(
-    CharReader &reader, size_t offs)
-{
-	std::map<std::string, SourceLocation> res;
-
-	// Fork the reader, we don't want to mess up the XML parsing process, do we?
-	CharReaderFork readerFork = reader.fork();
-
-	// Move the read cursor to the start location, abort if this does not work
-	if (!location.isValid() || offs != readerFork.seek(offs)) {
-		return res;
-	}
-
-	// Now all we need to do is to implement one half of an XML parser. As this
-	// is inherently complicated we'll totaly fail at it. Don't care. All we
-	// want to get is those darn offsets for pretty error messages... (and we
-	// can assume the XML is valid as it was already read by expat)
-	XmlAttributeState state = XmlAttributeState::IN_TAG_NAME;
-	char c;
-	std::stringstream attrName;
-	while (readerFork.read(c)) {
-		// Abort at the end of the tag
-		if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) {
-			return res;
-		}
+static const std::string ANNOTATION_START_PREFIX{"a:start:"};
 
-		// One state machine to rule them all, one state machine to find them,
-		// One state machine to bring them all and in the darkness bind them
-		// (the byte offsets)
-		switch (state) {
-			case XmlAttributeState::IN_TAG_NAME:
-				if (Utils::isWhitespace(c)) {
-					res.emplace("$tag",
-					            SourceLocation{reader.getSourceId(), offs + 1,
-					                           readerFork.getOffset() - 1});
-					state = XmlAttributeState::SEARCH_ATTR;
-				}
-				break;
-			case XmlAttributeState::SEARCH_ATTR:
-				if (!Utils::isWhitespace(c)) {
-					state = XmlAttributeState::IN_ATTR_NAME;
-					attrName << c;
-				}
-				break;
-			case XmlAttributeState::IN_ATTR_NAME:
-				if (Utils::isWhitespace(c)) {
-					state = XmlAttributeState::HAS_ATTR_NAME;
-				} else if (c == '=') {
-					state = XmlAttributeState::HAS_ATTR_EQUALS;
-				} else {
-					attrName << c;
-				}
-				break;
-			case XmlAttributeState::HAS_ATTR_NAME:
-				if (!Utils::isWhitespace(c)) {
-					if (c == '=') {
-						state = XmlAttributeState::HAS_ATTR_EQUALS;
-						break;
-					}
-					// Well, this is a strange XML file... We expected to
-					// see a '=' here! Try to continue with the
-					// "HAS_ATTR_EQUALS" state as this state will hopefully
-					// inlcude some error recovery
-				} else {
-					// Skip whitespace here
-					break;
-				}
-			// Fallthrough
-			case XmlAttributeState::HAS_ATTR_EQUALS:
-				if (!Utils::isWhitespace(c)) {
-					if (c == '"') {
-						// Here we are! We have found the beginning of an
-						// attribute. Let's quickly lock the current offset away
-						// in the result map
-						res.emplace(attrName.str(),
-						            SourceLocation{reader.getSourceId(),
-						                           readerFork.getOffset()});
-						state = XmlAttributeState::IN_ATTR_DATA;
-					} else {
-						// No, this XML file is not well formed. Assume we're in
-						// an attribute name once again
-						attrName.str(std::string{&c, 1});
-						state = XmlAttributeState::IN_ATTR_NAME;
-					}
-				}
-				break;
-			case XmlAttributeState::IN_ATTR_DATA:
-				if (c == '"') {
-					// We're at the end of the attribute data, set the end
-					// location
-					auto it = res.find(attrName.str());
-					if (it != res.end()) {
-						it->second.setEnd(readerFork.getOffset() - 1);
-					}
-
-					// Reset the attribute name and restart the search
-					attrName.str(std::string{});
-					state = XmlAttributeState::SEARCH_ATTR;
-				}
-				break;
-		}
-	}
-	return res;
-}
+/**
+ * Prefix used to indicate the end of an annotation.
+ */
+static const std::string ANNOTATION_END_PREFIX{"a:end"};
 
 /**
  * Synchronizes the position of the xml parser with the default location of the
@@ -268,22 +196,12 @@ static SourceLocation xmlSyncLoggerPosition(XML_Parser p, size_t len = 0)
 	size_t offs = XML_GetCurrentByteIndex(p);
 	SourceLocation loc =
 	    SourceLocation{parser->getReader().getSourceId(), offs, offs + len};
-	parser->getLogger().setDefaultLocation(location);
+	parser->getLogger().setDefaultLocation(loc);
 
 	// Return the fetched location
 	return loc;
 }
 
-/**
- * Prefix used to indicate the start of an annoation,
- */
-static const std::string ANNOTATION_START_PREFIX{"a:start:"};
-
-/**
- * Prefix used to indicate the end of an annotation.
- */
-static const std::string ANNOTATION_END_PREFIX{"a:end"};
-
 /**
  * Callback called by eXpat whenever a start handler is reached.
  */
@@ -292,14 +210,21 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 {
 	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
 	XML_Parser p = static_cast<XML_Parser>(ref);
-	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
+	OsxmlEventParser *parser =
+	    static_cast<OsxmlEventParser *>(XML_GetUserData(p));
+
+	// If there is any text data in the buffer, issue that first
+	if (parser->getData().hasText()) {
+		parser->getEvents().data(
+		    parser->getData().getText(parser->getReader().getSourceId()));
+	}
 
 	// Read the argument locations -- this is only a stupid and slow hack,
 	// but it is necessary, as expat doesn't give use the byte offset of the
 	// arguments.
 	std::map<std::string, SourceLocation> attributeOffsets =
-	    xmlReconstructXMLAttributeOffsets(*userData->reader,
-	                                      XML_GetCurrentByteIndex(p));
+	    OsxmlAttributeLocator::locate(parser->getReader(),
+	                                  XML_GetCurrentByteIndex(p));
 
 	// Update the logger position
 	SourceLocation loc = xmlSyncLoggerPosition(p);
@@ -316,7 +241,8 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 	// Make sure we're currently not inside an annotation end tag -- this would
 	// be highly illegal!
 	if (parser->getData().inAnnotationEndTag()) {
-		logger.error("No tags allowed inside an annotation end tag", nameLoc);
+		parser->getLogger().error(
+		    "No tags allowed inside an annotation end tag", nameLoc);
 		return;
 	}
 
@@ -336,36 +262,33 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 
 		// Parse the string, pass the location of the key
 		std::pair<bool, Variant> value = VariantReader::parseGenericString(
-		    *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(),
+		    *(attr++), parser->getLogger(), keyLoc.getSourceId(),
 		    keyLoc.getStart());
 
 		// Set the overall location of the parsed element to the attribute
 		// location
-		value.second->setLocation(keyLoc);
-
-		// Store the
-		if (!args.emplace(key, value.second).second) {
-			parser->getLogger().warning(
-			    std::string("Attribute \"") + key +
-			        "\" defined multiple times, only using first definition",
-			    keyLoc);
-		}
+		value.second.setLocation(keyLoc);
+
+		// Store the keys in the map
+		args.emplace(key, value.second).second;
 	}
 
 	// Fetch the name of the tag, check for special tags
 	std::string nameStr(name);
-	if (nameStr == "ousia" && parser->getData().depth == 1) {
-		// We're in the top-level and the magic "ousia" tag is reached -- just
+	if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 1) {
+		// We're in the top-level and the magic tag is reached -- just
 		// ignore it and issue a warning for each argument that has been given
 		for (const auto &arg : args) {
-			parser->getLogger().warning(
-			    std::string("Ignoring attribute \"") + arg.first +
-			        std::string("\" for magic tag \"ousia\""),
-			    arg.second);
+			parser->getLogger().warning(std::string("Ignoring attribute \"") +
+			                                arg.first +
+			                                std::string("\" for magic tag \"") +
+			                                TOP_LEVEL_TAG + std::string("\""),
+			                            arg.second);
 		}
 	} else if (Utils::startsWith(nameStr, ANNOTATION_START_PREFIX)) {
 		// Assemble a name variant containing the name minus the prefix
-		Variant nameVar = nameStr.substr(ANNOTATION_START_PREFIX.size());
+		Variant nameVar =
+		    Variant::fromString(nameStr.substr(ANNOTATION_START_PREFIX.size()));
 		nameVar.setLocation(nameLoc);
 
 		// Issue the "annotationStart" event
@@ -410,25 +333,34 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 	}
 }
 
-static void xmlEndElementHandler(void *p, const XML_Char *name)
+static void xmlEndElementHandler(void *ref, const XML_Char *name)
 {
 	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
 	XML_Parser p = static_cast<XML_Parser>(ref);
-	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
+	OsxmlEventParser *parser =
+	    static_cast<OsxmlEventParser *>(XML_GetUserData(p));
 
 	// Synchronize the position of the logger with teh position
-	xmlSyncLoggerPosition(parser);
-
-	// Decrement the current depth
-	parser->getData().decrDepth();
+	xmlSyncLoggerPosition(p);
 
 	// Abort as long as we're in an annotation end tag
 	if (parser->getData().inAnnotationEndTag()) {
+		parser->getData().decrDepth();
 		return;
 	}
 
+	// Decrement the current depth
+	parser->getData().decrDepth();
+
+	// If there is any text data in the buffer, issue that first
+	if (parser->getData().hasText()) {
+		parser->getEvents().data(
+		    parser->getData().getText(parser->getReader().getSourceId()));
+	}
+
 	// Abort if the special ousia tag ends here
-	if (nameStr == "ousia" && parser->getData().depth == 0) {
+	std::string nameStr{name};
+	if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 0) {
 		return;
 	}
 
@@ -436,20 +368,105 @@ static void xmlEndElementHandler(void *p, const XML_Char *name)
 	parser->getEvents().fieldEnd();
 }
 
-static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len)
+static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len)
 {
 	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
 	XML_Parser p = static_cast<XML_Parser>(ref);
-	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
-
-	// TODO
-/*	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
-	syncLoggerPosition(parser, ulen);
-	const std::string data = Utils::trim(std::string{s, ulen});
-	if (!data.empty()) {
-		stack->data(data);
-	}*/
+	OsxmlEventParser *parser =
+	    static_cast<OsxmlEventParser *>(XML_GetUserData(p));
+
+	// Abort as long as we're in an annotation end tag
+	if (parser->getData().inAnnotationEndTag()) {
+		return;
+	}
+
+	// Convert the signed (smell the 90's C library here?) length to an usigned
+	// value
+	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
+
+	// Synchronize the logger position
+	SourceLocation loc = xmlSyncLoggerPosition(p, ulen);
+
+	// Fetch some variables for convenience
+	const WhitespaceMode mode = parser->getWhitespaceMode();
+	OsxmlEventParserData &data = parser->getData();
+	std::vector<char> &textBuf = data.textBuf;
+	std::vector<char> &whitespaceBuf = data.whitespaceBuf;
+	bool &hasWhitespace = data.hasWhitespace;
+	size_t &textStart = data.textStart;
+	size_t &textEnd = data.textEnd;
+
+	size_t pos = loc.getStart();
+	for (size_t i = 0; i < ulen; i++, pos++) {
+		switch (mode) {
+			case WhitespaceMode::PRESERVE:
+				PreservingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
+				                                    textStart, textEnd);
+				break;
+			case WhitespaceMode::TRIM:
+				TrimmingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
+				                                  textStart, textEnd,
+				                                  whitespaceBuf);
+				break;
+			case WhitespaceMode::COLLAPSE:
+				CollapsingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
+				                                    textStart, textEnd,
+				                                    hasWhitespace);
+				break;
+		}
+	}
+}
+
+/* Class OsxmlEvents */
+
+OsxmlEvents::~OsxmlEvents() {}
+
+/* Class OsxmlEventParser */
+
+OsxmlEventParserData::OsxmlEventParserData()
+    : depth(0),
+      annotationEndTagDepth(-1),
+      hasWhitespace(false),
+      textStart(0),
+      textEnd(0)
+{
+}
+
+void OsxmlEventParserData::incrDepth() { depth++; }
+
+void OsxmlEventParserData::decrDepth()
+{
+	if (depth > 0) {
+		depth--;
+	}
+	if (depth < annotationEndTagDepth) {
+		annotationEndTagDepth = -1;
+	}
+}
+
+bool OsxmlEventParserData::inAnnotationEndTag()
+{
+	return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth);
 }
+
+bool OsxmlEventParserData::hasText() { return !textBuf.empty(); }
+
+Variant OsxmlEventParserData::getText(SourceId sourceId)
+{
+	// Create a variant containing the string data and the location
+	Variant var =
+	    Variant::fromString(std::string{textBuf.data(), textBuf.size()});
+	var.setLocation({sourceId, textStart, textEnd});
+
+	// Reset the text buffers
+	textBuf.clear();
+	whitespaceBuf.clear();
+	hasWhitespace = false;
+	textStart = 0;
+	textEnd = 0;
+
+	// Return the variant
+	return var;
 }
 
 /* Class OsxmlEventParser */
@@ -459,21 +476,22 @@ OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events,
     : reader(reader),
       events(events),
       logger(logger),
-      whitespaceMode(WhitespaceMode::COLLAPSE),
+      whitespaceMode(WhitespaceMode::TRIM),
       data(new OsxmlEventParserData())
 {
 }
 
-void OsxmlEventParser::parse(CharReader &reader)
+OsxmlEventParser::~OsxmlEventParser() {}
+
+void OsxmlEventParser::parse()
 {
 	// Create the parser object
-	ScopedExpatXmlParser p{"UTF-8"};
+	GuardedExpatXmlParser p{"UTF-8"};
 
 	// Reset the depth
-	depth = 0;
+	data->depth = 0;
 
-	// Pass the reference to the ParserStack to the XML handler
-	XMLUserData data(&stack, &reader);
+	// Pass the reference to this parser instance to the XML handler
 	XML_SetUserData(&p, this);
 	XML_UseParserAsHandlerArg(&p);
 
@@ -498,7 +516,7 @@ void OsxmlEventParser::parse(CharReader &reader)
 		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) {
 			throw LoggableException{
 			    "XML: " + std::string{XML_ErrorString(XML_GetErrorCode(&p))},
-			    xmlSyncLoggerPosition(p)};
+			    xmlSyncLoggerPosition(&p)};
 		}
 
 		// Abort once there are no more bytes in the stream
@@ -513,12 +531,17 @@ void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode)
 	this->whitespaceMode = whitespaceMode;
 }
 
-CharReader &OsxmlEventParser::getCharReader() { return charReader; }
+WhitespaceMode OsxmlEventParser::getWhitespaceMode() const
+{
+	return whitespaceMode;
+}
+
+CharReader &OsxmlEventParser::getReader() const { return reader; }
 
-Logger &OsxmlEventParser::getLogger() { return logger; }
+Logger &OsxmlEventParser::getLogger() const { return logger; }
 
-OsxmlEvents &OsxmlEventParser::getEvents() { return events; }
+OsxmlEvents &OsxmlEventParser::getEvents() const { return events; }
 
-OsxmlEventParserData &OsxmlEventParser::getData() { return *data; }
+OsxmlEventParserData &OsxmlEventParser::getData() const { return *data; }
 }
 
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
index 5319ca6..aa20ea9 100644
--- a/src/formats/osxml/OsxmlEventParser.hpp
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -42,7 +42,7 @@ class Variant;
 class OsxmlEventParserData;
 
 /**
- * Interface which defines the callback functions which are called by the 
+ * Interface which defines the callback functions which are called by the
  * OsxmlEventParser whenever an event occurs.
  */
 class OsxmlEvents {
@@ -50,13 +50,13 @@ public:
 	/**
 	 * Virtual destructor.
 	 */
-	virtual ~OsxmlEvents() {}
+	virtual ~OsxmlEvents();
 
 	/**
 	 * Called whenever a command starts. Note that this implicitly always starts
 	 * the default field of the command.
 	 *
-	 * @param name is a string variant containing name and location of the 
+	 * @param name is a string variant containing name and location of the
 	 * command.
 	 * @param args is a map variant containing the arguments that were given
 	 * to the command.
@@ -67,12 +67,12 @@ public:
 	 * Called whenever an annotation starts. Note that this implicitly always
 	 * starts the default field of the annotation.
 	 *
-	 * @param name is a string variant containing the name of the annotation 
+	 * @param name is a string variant containing the name of the annotation
 	 * class and the location of the annotation definition.
 	 * @param args is a map variant containing the arguments that were given
 	 * to the annotation definition.
 	 */
-	virtual void annotationStart(Variant name, Variant args);
+	virtual void annotationStart(Variant name, Variant args) = 0;
 
 	/**
 	 * Called whenever the range of an annotation ends. The callee must
@@ -85,12 +85,12 @@ public:
 	 * ended here. May be empty (or nullptr), if no elementName has been
 	 * specified at the end of the annotation.
 	 */
-	virtual void annotationEnd(Variant name, Variant elementName);
+	virtual void annotationEnd(Variant name, Variant elementName) = 0;
 
 	/**
-	 * Called whenever the default field which was implicitly started by 
+	 * Called whenever the default field which was implicitly started by
 	 * commandStart or annotationStart ends. Note that this does not end the
-	 * range of an annotation, but the default field of the annotation. To 
+	 * range of an annotation, but the default field of the annotation. To
 	 * signal the end of the annotation this, the annotationEnd method will be
 	 * invoked.
 	 */
@@ -102,11 +102,10 @@ public:
 	 * is not called if the parsing failed, the parser prints an error message
 	 * instead.
 	 *
-	 * @param data is the already parsed data that should be passed to the 
+	 * @param data is the already parsed data that should be passed to the
 	 * handler.
 	 */
 	virtual void data(Variant data) = 0;
-
 };
 
 /**
@@ -148,7 +147,7 @@ public:
 	 * Constructor fo the OsxmlEventParser. Takes a reference at the OsxmlEvents
 	 * of which the callback functions are called.
 	 *
-	 * @param reader is a reference to the CharReader instance from which the 
+	 * @param reader is a reference to the CharReader instance from which the
 	 * XML should be read.
 	 * @param events is a refence at an instance of the OsxmlEvents class. All
 	 * events are forwarded to this class.
@@ -157,6 +156,11 @@ public:
 	 */
 	OsxmlEventParser(CharReader &reader, OsxmlEvents &events, Logger &logger);
 
+	/**
+	 * Destructor of OsxmlEventParser (needed for unique_ptr to incomplete type)
+	 */
+	~OsxmlEventParser();
+
 	/**
 	 * Performs the actual parsing. Reads the XML using eXpat and calles the
 	 * callbacks in the event listener instance whenever something interesting
@@ -167,38 +171,44 @@ public:
 	/**
 	 * Sets the whitespace handling mode.
 	 *
-	 * @param whitespaceMode defines how whitespace in the data should be 
+	 * @param whitespaceMode defines how whitespace in the data should be
 	 * handled.
 	 */
 	void setWhitespaceMode(WhitespaceMode whitespaceMode);
 
+	/**
+	 * Returns the current whitespace handling mode.
+	 *
+	 * @return the currently set whitespace handling mode.
+	 */
+	WhitespaceMode getWhitespaceMode() const;
+
 	/**
 	 * Returns the internal CharReader reference.
 	 *
 	 * @return the CharReader reference.
 	 */
-	CharReader &getCharReader();
+	CharReader &getReader() const;
 
 	/**
 	 * Returns the internal Logger reference.
 	 *
 	 * @return the internal Logger reference.
 	 */
-	Logger &getLogger();
+	Logger &getLogger() const;
 
 	/**
 	 * Returns the internal OsxmlEvents reference.
 	 *
 	 * @return the internal OsxmlEvents reference.
 	 */
-	OsxmlEvents &getEvents();
+	OsxmlEvents &getEvents() const;
 
 	/**
 	 * Returns a reference at the internal data.
 	 */
-	OsxmlEventParserData &getData();
+	OsxmlEventParserData &getData() const;
 };
-
 }
 
 #endif /* _OSXML_EVENT_PARSER_HPP_ */
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index e5eff05..b944af8 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -28,6 +28,7 @@
 namespace ousia {
 
 static TerminalLogger logger(std::cerr, true);
+//static ConcreteLogger logger;
 
 TEST(OsmlStreamParser, empty)
 {
diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp
new file mode 100644
index 0000000..06c800f
--- /dev/null
+++ b/test/formats/osxml/OsxmlEventParserTest.cpp
@@ -0,0 +1,222 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/frontend/TerminalLogger.hpp>
+#include <core/common/CharReader.hpp>
+#include <core/common/Variant.hpp>
+
+#include <formats/osxml/OsxmlEventParser.hpp>
+
+namespace ousia {
+
+static TerminalLogger logger(std::cerr, true);
+// static ConcreteLogger logger;
+
+namespace {
+enum class OsxmlEvent {
+	COMMAND_START,
+	ANNOTATION_START,
+	ANNOTATION_END,
+	FIELD_END,
+	DATA
+};
+
+class TestOsxmlEventListener : public OsxmlEvents {
+public:
+	std::vector<std::pair<OsxmlEvent, Variant>> events;
+
+	void commandStart(Variant name, Variant args) override
+	{
+		events.emplace_back(OsxmlEvent::COMMAND_START,
+		                    Variant::arrayType{name, args});
+	}
+
+	void annotationStart(Variant name, Variant args) override
+	{
+		events.emplace_back(OsxmlEvent::ANNOTATION_START,
+		                    Variant::arrayType{name, args});
+	}
+
+	void annotationEnd(Variant name, Variant elementName) override
+	{
+		events.emplace_back(OsxmlEvent::ANNOTATION_END,
+		                    Variant::arrayType{name, elementName});
+	}
+
+	void fieldEnd() override
+	{
+		events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{});
+	}
+
+	void data(Variant data) override
+	{
+		events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data});
+	}
+};
+
+static std::vector<std::pair<OsxmlEvent, Variant>> parseXml(
+    const char *testString,
+    WhitespaceMode whitespaceMode = WhitespaceMode::TRIM)
+{
+	TestOsxmlEventListener listener;
+	CharReader reader(testString);
+	OsxmlEventParser parser(reader, listener, logger);
+	parser.setWhitespaceMode(whitespaceMode);
+	parser.parse();
+	return listener.events;
+}
+}
+
+TEST(OsxmlEventParser, simpleCommandWithArgs)
+{
+	const char *testString = "<a name=\"test\" a=\"1\" b=\"2\" c=\"blub\"/>";
+	//                        01234567 89012 3456 78 9012 34 5678 90123 456
+	//                        0          1            2            3
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{
+	         "a", Variant::mapType{
+	                  {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString);
+	ASSERT_EQ(expectedEvents, events);
+
+	// Check the locations (I'll do this one time and then just assume it works)
+	ASSERT_EQ(1U, events[0].second.asArray()[0].getLocation().getStart());
+	ASSERT_EQ(2U, events[0].second.asArray()[0].getLocation().getEnd());
+	ASSERT_EQ(
+	    9U,
+	    events[0].second.asArray()[1].asMap()["name"].getLocation().getStart());
+	ASSERT_EQ(
+	    13U,
+	    events[0].second.asArray()[1].asMap()["name"].getLocation().getEnd());
+	ASSERT_EQ(
+	    18U,
+	    events[0].second.asArray()[1].asMap()["a"].getLocation().getStart());
+	ASSERT_EQ(
+	    19U, events[0].second.asArray()[1].asMap()["a"].getLocation().getEnd());
+	ASSERT_EQ(
+	    24U,
+	    events[0].second.asArray()[1].asMap()["b"].getLocation().getStart());
+	ASSERT_EQ(
+	    25U, events[0].second.asArray()[1].asMap()["b"].getLocation().getEnd());
+	ASSERT_EQ(
+	    30U,
+	    events[0].second.asArray()[1].asMap()["c"].getLocation().getStart());
+	ASSERT_EQ(
+	    34U, events[0].second.asArray()[1].asMap()["c"].getLocation().getEnd());
+}
+
+TEST(OsxmlEventParser, magicTopLevelTag)
+{
+	const char *testString = "<ousia><a/><b/></ousia>";
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"a", Variant::mapType{}}}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}},
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"b", Variant::mapType{}}}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString);
+	ASSERT_EQ(expectedEvents, events);
+}
+
+TEST(OsxmlEventParser, magicTopLevelTagInside)
+{
+	const char *testString = "<a><ousia/></a>";
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"a", Variant::mapType{}}}},
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"ousia", Variant::mapType{}}}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString);
+	ASSERT_EQ(expectedEvents, events);
+}
+
+TEST(OsxmlEventParser, commandWithDataPreserveWhitespace)
+{
+	const char *testString = "<a>  hello  \n world </a>";
+	//                        012345678901 234567890123
+	//                        0         1          2
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::DATA, Variant::arrayType{"  hello  \n world "}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString, WhitespaceMode::PRESERVE);
+	ASSERT_EQ(expectedEvents, events);
+
+	// Check the location of the text
+	ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart());
+	ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd());
+}
+
+TEST(OsxmlEventParser, commandWithDataTrimWhitespace)
+{
+	const char *testString = "<a>  hello  \n world </a>";
+	//                        012345678901 234567890123
+	//                        0         1          2
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::DATA, Variant::arrayType{"hello  \n world"}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString, WhitespaceMode::TRIM);
+	ASSERT_EQ(expectedEvents, events);
+
+	// Check the location of the text
+	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart());
+	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd());
+}
+
+TEST(OsxmlEventParser, commandWithDataCollapseWhitespace)
+{
+	const char *testString = "<a>  hello  \n world </a>";
+	//                        012345678901 234567890123
+	//                        0         1          2
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::DATA, Variant::arrayType{"hello world"}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString, WhitespaceMode::COLLAPSE);
+	ASSERT_EQ(expectedEvents, events);
+
+	// Check the location of the text
+	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart());
+	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd());
+}
+
+}
+
-- 
cgit v1.2.3


From 9b4cdfabf6527440d6ffa499cc6b57a44daaeadb Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:05:42 +0100
Subject: Added code for the handling of explicit default fields and improved
 unit tests

---
 CMakeLists.txt                             |  16 +-
 src/formats/osml/OsmlStreamParser.cpp      |  78 +++++--
 src/formats/osml/OsmlStreamParser.hpp      |  45 +++-
 test/formats/osml/OsmlStreamParserTest.cpp | 340 +++++++++++++++++------------
 4 files changed, 302 insertions(+), 177 deletions(-)

(limited to 'test')

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bdc9541..d311f7a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -290,15 +290,15 @@ IF(TEST)
 		ousia_core
 	)
 
-#	ADD_EXECUTABLE(ousia_test_filesystem
-#		test/plugins/filesystem/FileLocatorTest
-#	)
+	ADD_EXECUTABLE(ousia_test_filesystem
+		test/plugins/filesystem/FileLocatorTest
+	)
 
-#	TARGET_LINK_LIBRARIES(ousia_test_filesystem
-#		${GTEST_LIBRARIES}
-#		ousia_core
-#		ousia_filesystem
-#	)
+	TARGET_LINK_LIBRARIES(ousia_test_filesystem
+		${GTEST_LIBRARIES}
+		ousia_core
+		ousia_filesystem
+	)
 
 #	ADD_EXECUTABLE(ousia_test_css
 #		test/plugins/css/Tokenizer
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 6b00eef..6606120 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -60,6 +60,11 @@ public:
 	 */
 	TokenTypeId FieldEnd;
 
+	/**
+	 * Id of the default field start token.
+	 */
+	TokenTypeId DefaultFieldStart;
+
 	/**
 	 * Registers the plain format tokens in the internal tokenizer.
 	 */
@@ -71,6 +76,7 @@ public:
 		BlockCommentEnd = registerToken("}%");
 		FieldStart = registerToken("{");
 		FieldEnd = registerToken("}");
+		DefaultFieldStart = registerToken("{!");
 	}
 };
 
@@ -164,7 +170,7 @@ OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
     : reader(reader), logger(logger), tokenizer(Tokens)
 {
 	// Place an intial command representing the complete file on the stack
-	commands.push(Command{"", Variant::mapType{}, true, true, true});
+	commands.push(Command{"", Variant::mapType{}, true, true, true, false});
 }
 
 Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
@@ -365,7 +371,7 @@ void OsmlStreamParser::pushCommand(Variant commandName,
 		commands.pop();
 	}
 	commands.push(Command{std::move(commandName), std::move(commandArguments),
-	                      hasRange, false, false});
+	                      hasRange, false, false, false});
 }
 
 OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
@@ -482,6 +488,29 @@ bool OsmlStreamParser::checkIssueFieldStart()
 	return false;
 }
 
+bool OsmlStreamParser::closeField()
+{
+	// Try to end an open field of the current command -- if the current command
+	// is not inside an open field, end this command and try to close the next
+	// one
+	for (int i = 0; i < 2 && commands.size() > 1; i++) {
+		Command &cmd = commands.top();
+		if (!cmd.inRangeField) {
+			if (cmd.inField) {
+				cmd.inField = false;
+				if (cmd.inDefaultField) {
+					commands.pop();
+				}
+				return true;
+			}
+			commands.pop();
+		} else {
+			return false;
+		}
+	}
+	return false;
+}
+
 OsmlStreamParser::State OsmlStreamParser::parse()
 {
 	// Handler for incomming data
@@ -579,27 +608,29 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			}
 			logger.error(
 			    "Got field start token \"{\", but no command for which to "
-			    "start the field. Did you mean \"\\{\"?",
+			    "start the field. Write \"\\{\" to insert this sequence as "
+			    "text.",
 			    token);
 		} else if (token.type == Tokens.FieldEnd) {
-			// Try to end an open field of the current command -- if the current
-			// command is not inside an open field, end this command and try to
-			// close the next one
-			for (int i = 0; i < 2 && commands.size() > 1; i++) {
-				Command &cmd = commands.top();
-				if (!cmd.inRangeField) {
-					if (cmd.inField) {
-						cmd.inField = false;
-						return State::FIELD_END;
-					}
-					commands.pop();
-				} else {
-					break;
-				}
+			if (closeField()) {
+				return State::FIELD_END;
+			}
+			logger.error(
+			    "Got field end token \"}\", but there is no field to end. "
+			    "Write \"\\}\" to insert this sequence as text.",
+			    token);
+		} else if (token.type == Tokens.DefaultFieldStart) {
+			// Try to start a default field the first time the token is reached
+			Command &topCmd = commands.top();
+			if (!topCmd.inField) {
+				topCmd.inField = true;
+				topCmd.inDefaultField = true;
+				return State::FIELD_START;
 			}
 			logger.error(
-			    "Got field end token \"}\", but there is no field to end. Did "
-			    "you mean \"\\}\"?",
+			    "Got default field start token \"{!\", but no command for "
+			    "which to start the field. Write \"\\{!\" to insert this "
+			    "sequence as text",
 			    token);
 		} else {
 			logger.error("Unexpected token \"" + token.content + "\"", token);
@@ -627,14 +658,19 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 	return State::END;
 }
 
-const Variant &OsmlStreamParser::getCommandName()
+const Variant &OsmlStreamParser::getCommandName() const
 {
 	return commands.top().name;
 }
 
-const Variant &OsmlStreamParser::getCommandArguments()
+const Variant &OsmlStreamParser::getCommandArguments() const
 {
 	return commands.top().arguments;
 }
+
+bool OsmlStreamParser::inDefaultField() const
+{
+	return commands.top().inRangeField || commands.top().inDefaultField;
+}
 }
 
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index 1508012..bb5db65 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -152,10 +152,16 @@ public:
 		 */
 		bool inRangeField;
 
+		/**
+		 * Set to true if we are currently in a field that has been especially
+		 * marked as default field (using the "|") syntax.
+		 */
+		bool inDefaultField;
+
 		/**
 		 * Default constructor.
 		 */
-		Command() : hasRange(false), inField(false), inRangeField(false) {}
+		Command() : hasRange(false), inField(false), inRangeField(false), inDefaultField() {}
 
 		/**
 		 * Constructor of the Command class.
@@ -168,16 +174,19 @@ public:
 		 * explicit range.
 		 * @param inField is set to true if we currently are inside a field
 		 * of this command.
-		 * @param inRangeField is set to true if we currently inside the outer
-		 * field of the command.
+		 * @param inRangeField is set to true if we currently are inside the
+		 * outer field of a ranged command.
+		 * @param inDefaultField is set to true if we currently are in a
+		 * specially marked default field.
 		 */
 		Command(Variant name, Variant arguments, bool hasRange, bool inField,
-		        bool inRangeField)
+		        bool inRangeField, bool inDefaultField)
 		    : name(std::move(name)),
 		      arguments(std::move(arguments)),
 		      hasRange(hasRange),
 		      inField(inField),
-		      inRangeField(inRangeField)
+		      inRangeField(inRangeField),
+		      inDefaultField(inDefaultField)
 		{
 		}
 	};
@@ -289,6 +298,16 @@ private:
 	 */
 	bool checkIssueFieldStart();
 
+	/**
+	 * Closes a currently open field. Note that the command will be removed from
+	 * the internal command stack if the field that is being closed is a
+	 * field marked as default field.
+	 *
+	 * @return true if the field could be closed, false if there was no field
+	 * to close.
+	 */
+	bool closeField();
+
 public:
 	/**
 	 * Constructor of the OsmlStreamParser class. Attaches the new
@@ -317,7 +336,7 @@ public:
 	 * @return a reference at a variant containing the data parsed by the
 	 * "parse" function.
 	 */
-	const Variant &getData() { return data; }
+	const Variant &getData() const { return data; }
 
 	/**
 	 * Returns a reference at the internally stored command name. Only valid if
@@ -326,7 +345,7 @@ public:
 	 * @return a reference at a variant containing name and location of the
 	 * parsed command.
 	 */
-	const Variant &getCommandName();
+	const Variant &getCommandName() const;
 
 	/**
 	 * Returns a reference at the internally stored command name. Only valid if
@@ -335,14 +354,22 @@ public:
 	 * @return a reference at a variant containing arguments given to the
 	 * command.
 	 */
-	const Variant &getCommandArguments();
+	const Variant &getCommandArguments() const;
+
+	/**
+	 * Returns true if the current field is the "default" field. This is true if
+	 * the parser either is in the outer range of a range command or inside a
+	 * field that has been especially marked as "default" field (using the "|"
+	 * syntax).
+	 */
+	bool inDefaultField() const;
 
 	/**
 	 * Returns a reference at the char reader.
 	 *
 	 * @return the last internal token location.
 	 */
-	SourceLocation &getLocation() { return location; }
+	const SourceLocation &getLocation() const { return location; }
 };
 }
 
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index b944af8..da9fe8a 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -28,7 +28,88 @@
 namespace ousia {
 
 static TerminalLogger logger(std::cerr, true);
-//static ConcreteLogger logger;
+// static ConcreteLogger logger;
+
+static void assertCommand(OsmlStreamParser &reader, const std::string &name,
+                          SourceOffset start = InvalidSourceOffset,
+                          SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+	EXPECT_EQ(name, reader.getCommandName().asString());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertCommand(OsmlStreamParser &reader, const std::string &name,
+                          const Variant::mapType &args,
+                          SourceOffset start = InvalidSourceOffset,
+                          SourceOffset end = InvalidSourceOffset)
+{
+	assertCommand(reader, name, start, end);
+	EXPECT_EQ(args, reader.getCommandArguments());
+}
+
+static void assertData(OsmlStreamParser &reader, const std::string &data,
+                       SourceOffset start = InvalidSourceOffset,
+                       SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+	EXPECT_EQ(data, reader.getData().asString());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getData().getLocation().getStart());
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getData().getLocation().getEnd());
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertFieldStart(OsmlStreamParser &reader, bool defaultField,
+                             SourceOffset start = InvalidSourceOffset,
+                             SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse());
+	EXPECT_EQ(defaultField, reader.inDefaultField());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertFieldEnd(OsmlStreamParser &reader,
+                           SourceOffset start = InvalidSourceOffset,
+                           SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertEnd(OsmlStreamParser &reader,
+                      SourceOffset start = InvalidSourceOffset,
+                      SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
 
 TEST(OsmlStreamParser, empty)
 {
@@ -47,12 +128,7 @@ TEST(OsmlStreamParser, oneCharacter)
 
 	OsmlStreamParser reader(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("a", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(1U, loc.getEnd());
+	assertData(reader, "a", 0, 1);
 }
 
 TEST(OsmlStreamParser, whitespaceElimination)
@@ -64,12 +140,7 @@ TEST(OsmlStreamParser, whitespaceElimination)
 
 	OsmlStreamParser reader(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(14U, loc.getEnd());
+	assertData(reader, "hello world", 1, 14);
 }
 
 TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak)
@@ -81,13 +152,7 @@ TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak)
 
 	OsmlStreamParser reader(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(14U, loc.getEnd());
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	assertData(reader, "hello world", 1, 14);
 }
 
 TEST(OsmlStreamParser, escapeWhitespace)
@@ -99,13 +164,7 @@ TEST(OsmlStreamParser, escapeWhitespace)
 
 	OsmlStreamParser reader(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello  world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(15U, loc.getEnd());
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	assertData(reader, "hello  world", 1, 15);
 }
 
 static void testEscapeSpecialCharacter(const std::string &c)
@@ -127,6 +186,7 @@ TEST(OsmlStreamParser, escapeSpecialCharacters)
 	testEscapeSpecialCharacter("}");
 	testEscapeSpecialCharacter("<");
 	testEscapeSpecialCharacter(">");
+	testEscapeSpecialCharacter("|");
 }
 
 TEST(OsmlStreamParser, simpleSingleLineComment)
@@ -347,86 +407,6 @@ TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName)
 	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
 }
 
-static void assertCommand(OsmlStreamParser &reader, const std::string &name,
-                          SourceOffset start = InvalidSourceOffset,
-                          SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
-	EXPECT_EQ(name, reader.getCommandName().asString());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertCommand(OsmlStreamParser &reader, const std::string &name,
-                          const Variant::mapType &args,
-                          SourceOffset start = InvalidSourceOffset,
-                          SourceOffset end = InvalidSourceOffset)
-{
-	assertCommand(reader, name, start, end);
-	EXPECT_EQ(args, reader.getCommandArguments());
-}
-
-static void assertData(OsmlStreamParser &reader, const std::string &data,
-                       SourceOffset start = InvalidSourceOffset,
-                       SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(data, reader.getData().asString());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getData().getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getData().getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertFieldStart(OsmlStreamParser &reader,
-                             SourceOffset start = InvalidSourceOffset,
-                             SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertFieldEnd(OsmlStreamParser &reader,
-                           SourceOffset start = InvalidSourceOffset,
-                           SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertEnd(OsmlStreamParser &reader,
-                      SourceOffset start = InvalidSourceOffset,
-                      SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
 TEST(OsmlStreamParser, fields)
 {
 	const char *testString = "\\test{a}{b}{c}";
@@ -436,15 +416,15 @@ TEST(OsmlStreamParser, fields)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertData(reader, "a", 6, 7);
 	assertFieldEnd(reader, 7, 8);
 
-	assertFieldStart(reader, 8, 9);
+	assertFieldStart(reader, false, 8, 9);
 	assertData(reader, "b", 9, 10);
 	assertFieldEnd(reader, 10, 11);
 
-	assertFieldStart(reader, 11, 12);
+	assertFieldStart(reader, false, 11, 12);
 	assertData(reader, "c", 12, 13);
 	assertFieldEnd(reader, 13, 14);
 	assertEnd(reader, 14, 14);
@@ -459,11 +439,11 @@ TEST(OsmlStreamParser, dataOutsideField)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertData(reader, "a", 6, 7);
 	assertFieldEnd(reader, 7, 8);
 
-	assertFieldStart(reader, 8, 9);
+	assertFieldStart(reader, false, 8, 9);
 	assertData(reader, "b", 9, 10);
 	assertFieldEnd(reader, 10, 11);
 
@@ -481,14 +461,14 @@ TEST(OsmlStreamParser, nestedCommand)
 
 	assertCommand(reader, "test", 0, 5);
 
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertData(reader, "a", 6, 7);
 	assertFieldEnd(reader, 7, 8);
 
-	assertFieldStart(reader, 8, 9);
+	assertFieldStart(reader, false, 8, 9);
 	{
 		assertCommand(reader, "test2", 9, 15);
-		assertFieldStart(reader, 15, 16);
+		assertFieldStart(reader, false, 15, 16);
 		assertData(reader, "b", 16, 17);
 		assertFieldEnd(reader, 17, 18);
 	}
@@ -507,10 +487,10 @@ TEST(OsmlStreamParser, nestedCommandImmediateEnd)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	{
 		assertCommand(reader, "test2", 6, 12);
-		assertFieldStart(reader, 12, 13);
+		assertFieldStart(reader, false, 12, 13);
 		assertData(reader, "b", 13, 14);
 		assertFieldEnd(reader, 14, 15);
 	}
@@ -527,7 +507,7 @@ TEST(OsmlStreamParser, nestedCommandNoData)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertCommand(reader, "test2", 6, 12);
 	assertFieldEnd(reader, 12, 13);
 	assertEnd(reader, 13, 13);
@@ -557,11 +537,11 @@ TEST(OsmlStreamParser, fieldsWithSpaces)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "a", 0, 2);
-	assertFieldStart(reader, 3, 4);
+	assertFieldStart(reader, false, 3, 4);
 	assertCommand(reader, "b", 4, 6);
 	assertCommand(reader, "c", 7, 9);
 	assertFieldEnd(reader, 9, 10);
-	assertFieldStart(reader, 16, 17);
+	assertFieldStart(reader, false, 16, 17);
 	assertCommand(reader, "d", 17, 19);
 	assertFieldEnd(reader, 19, 20);
 	assertEnd(reader, 20, 20);
@@ -612,9 +592,9 @@ TEST(OsmlStreamParser, errorNoFieldEndNested)
 
 	logger.reset();
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertCommand(reader, "test2", 6, 12);
-	assertFieldStart(reader, 12, 13);
+	assertFieldStart(reader, false, 12, 13);
 	assertFieldEnd(reader, 13, 14);
 	assertFieldEnd(reader, 14, 15);
 	ASSERT_FALSE(logger.hasError());
@@ -633,9 +613,9 @@ TEST(OsmlStreamParser, errorNoFieldEndNestedData)
 
 	logger.reset();
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertCommand(reader, "test2", 6, 12);
-	assertFieldStart(reader, 12, 13);
+	assertFieldStart(reader, false, 12, 13);
 	assertFieldEnd(reader, 13, 14);
 	assertFieldEnd(reader, 14, 15);
 	assertData(reader, "a", 15, 16);
@@ -654,7 +634,7 @@ TEST(OsmlStreamParser, beginEnd)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
+	assertFieldStart(reader, true, 12, 13);
 	assertFieldEnd(reader, 17, 21);
 	assertEnd(reader, 22, 22);
 }
@@ -669,7 +649,7 @@ TEST(OsmlStreamParser, beginEndWithName)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "book", {{"name", "a"}}, 7, 11);
-	assertFieldStart(reader, 14, 15);
+	assertFieldStart(reader, true, 14, 15);
 	assertFieldEnd(reader, 19, 23);
 	assertEnd(reader, 24, 24);
 }
@@ -685,7 +665,7 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgs)
 
 	assertCommand(reader, "book",
 	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-	assertFieldStart(reader, 32, 33);
+	assertFieldStart(reader, true, 32, 33);
 	assertFieldEnd(reader, 37, 41);
 	assertEnd(reader, 42, 42);
 }
@@ -702,17 +682,17 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields)
 
 	assertCommand(reader, "book",
 	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-	assertFieldStart(reader, 32, 33);
+	assertFieldStart(reader, false, 32, 33);
 	assertData(reader, "a", 33, 34);
 	assertCommand(reader, "test", Variant::mapType{}, 35, 40);
 	assertFieldEnd(reader, 40, 41);
-	assertFieldStart(reader, 41, 42);
+	assertFieldStart(reader, false, 41, 42);
 	assertData(reader, "b", 42, 43);
 	assertCommand(reader, "test", Variant::mapType{}, 44, 49);
-	assertFieldStart(reader, 49, 50);
+	assertFieldStart(reader, false, 49, 50);
 	assertFieldEnd(reader, 50, 51);
 	assertFieldEnd(reader, 51, 52);
-	assertFieldStart(reader, 52, 53);
+	assertFieldStart(reader, true, 52, 53);
 	assertFieldEnd(reader, 57, 61);
 	assertEnd(reader, 62, 62);
 }
@@ -727,12 +707,45 @@ TEST(OsmlStreamParser, beginEndWithData)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
+	assertFieldStart(reader, true, 12, 13);
 	assertData(reader, "a", 12, 13);
 	assertFieldEnd(reader, 18, 22);
 	assertEnd(reader, 23, 23);
 }
 
+TEST(OsmlStreamParser, beginEndNested)
+{
+	const char *testString =
+	    "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}";
+	//    012345678901234 5678901234567890 123456 7890123 4567890
+	//    0         1          2         3           4          5
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "a", 7, 8);
+	assertFieldStart(reader, false, 9, 10);
+	assertData(reader, "b", 10, 11);
+	assertFieldEnd(reader, 11, 12);
+	assertFieldStart(reader, true, 13, 14);
+	assertData(reader, "c", 13, 14);
+	assertCommand(reader, "d", 22, 23);
+	assertFieldStart(reader, false, 24, 25);
+	assertData(reader, "e", 25, 26);
+	assertFieldEnd(reader, 26, 27);
+	assertFieldStart(reader, false, 27, 28);
+	assertData(reader, "f", 28, 29);
+	assertFieldEnd(reader, 29, 30);
+	assertFieldStart(reader, true, 31, 32);
+	assertCommand(reader, "g", 31, 33);
+	assertFieldStart(reader, false, 33, 34);
+	assertData(reader, "h", 34, 35);
+	assertFieldEnd(reader, 35, 36);
+	assertFieldEnd(reader, 42, 43);
+	assertFieldEnd(reader, 49, 50);
+	assertEnd(reader, 51, 51);
+}
+
 TEST(OsmlStreamParser, beginEndWithCommand)
 {
 	const char *testString = "\\begin{book}\\a{test}\\end{book}";
@@ -743,9 +756,9 @@ TEST(OsmlStreamParser, beginEndWithCommand)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
+	assertFieldStart(reader, true, 12, 13);
 	assertCommand(reader, "a", 12, 14);
-	assertFieldStart(reader, 14, 15);
+	assertFieldStart(reader, false, 14, 15);
 	assertData(reader, "test", 15, 19);
 	assertFieldEnd(reader, 19, 20);
 	assertFieldEnd(reader, 25, 29);
@@ -873,9 +886,9 @@ TEST(OsmlStreamParser, errorBeginEndMismatch)
 
 	logger.reset();
 	assertCommand(reader, "a", 7, 8);
-	assertFieldStart(reader, 10, 11);
+	assertFieldStart(reader, true, 10, 11);
 	assertCommand(reader, "b", 17, 18);
-	assertFieldStart(reader, 20, 24);
+	assertFieldStart(reader, true, 20, 24);
 	assertData(reader, "test", 20, 24);
 	ASSERT_FALSE(logger.hasError());
 	ASSERT_THROW(reader.parse(), LoggableException);
@@ -904,7 +917,7 @@ TEST(OsmlStreamParser, beginEndWithNSSep)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "test1:test2", 7, 18);
-	assertFieldStart(reader, 19, 20);
+	assertFieldStart(reader, true, 19, 20);
 	assertFieldEnd(reader, 24, 35);
 	assertEnd(reader, 36, 36);
 }
@@ -920,7 +933,7 @@ TEST(OsmlStreamParser, errorBeginNSSep)
 	ASSERT_FALSE(logger.hasError());
 	assertCommand(reader, "blub");
 	ASSERT_TRUE(logger.hasError());
-	assertFieldStart(reader);
+	assertFieldStart(reader, true);
 	assertFieldEnd(reader);
 	assertEnd(reader);
 }
@@ -934,7 +947,7 @@ TEST(OsmlStreamParser, errorEndNSSep)
 
 	logger.reset();
 	assertCommand(reader, "blub");
-	assertFieldStart(reader);
+	assertFieldStart(reader, true);
 	ASSERT_FALSE(logger.hasError());
 	assertFieldEnd(reader);
 	ASSERT_TRUE(logger.hasError());
@@ -970,5 +983,54 @@ TEST(OsmlStreamParser, errorRepeatedNs)
 	assertData(reader, "::");
 	assertEnd(reader);
 }
+
+TEST(OsmlStreamParser, explicitDefaultField)
+{
+	const char *testString = "\\a{!b}c";
+	//                         01234567
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "a", 0, 2);
+	assertFieldStart(reader, true, 2, 4);
+	assertData(reader, "b", 4, 5);
+	assertFieldEnd(reader, 5, 6);
+	assertData(reader, "c", 6, 7);
+	assertEnd(reader, 7, 7);
+}
+
+TEST(OsmlStreamParser, explicitDefaultFieldWithCommand)
+{
+	const char *testString = "\\a{!\\b}c";
+	//                         0123 4567
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "a", 0, 2);
+	assertFieldStart(reader, true, 2, 4);
+	assertCommand(reader, "b", 4, 6);
+	assertFieldEnd(reader, 6, 7);
+	assertData(reader, "c", 7, 8);
+	assertEnd(reader, 8, 8);
+}
+
+TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField)
+{
+	const char *testString = "\\a{!\\b}{c}";
+	//                         0123 4567
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "a", 0, 2);
+	assertFieldStart(reader, true, 2, 4);
+	assertCommand(reader, "b", 4, 6);
+	assertFieldEnd(reader, 6, 7);
+	assertData(reader, "c", 7, 8);
+	assertEnd(reader, 8, 8);
+}
+
 }
 
-- 
cgit v1.2.3


From 856fa8298d55c07313d9638d9f8b8c0913202b2c Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:06:05 +0100
Subject: Fixed forgotten unit test

---
 test/formats/osml/OsmlStreamParserTest.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'test')

diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index da9fe8a..5f23822 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -1019,17 +1019,20 @@ TEST(OsmlStreamParser, explicitDefaultFieldWithCommand)
 TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField)
 {
 	const char *testString = "\\a{!\\b}{c}";
-	//                         0123 4567
+	//                         0123 456789
 	CharReader charReader(testString);
 
 	OsmlStreamParser reader(charReader, logger);
 
+	logger.reset();
 	assertCommand(reader, "a", 0, 2);
 	assertFieldStart(reader, true, 2, 4);
 	assertCommand(reader, "b", 4, 6);
 	assertFieldEnd(reader, 6, 7);
-	assertData(reader, "c", 7, 8);
-	assertEnd(reader, 8, 8);
+	ASSERT_FALSE(logger.hasError());
+	assertData(reader, "c", 8, 9);
+	ASSERT_TRUE(logger.hasError());
+	assertEnd(reader, 10, 10);
 }
 
 }
-- 
cgit v1.2.3


From 205810b44c980998958dcd857c2cb34a914dc760 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Thu, 12 Feb 2015 16:21:36 +0100
Subject: Implemented annotation start and end field

---
 contrib/test.osdm                          |  29 ----
 contrib/test.osml                          |  29 ++++
 src/formats/osml/OsmlStreamParser.cpp      | 116 ++++++++++++---
 src/formats/osml/OsmlStreamParser.hpp      |  16 +-
 test/formats/osml/OsmlStreamParserTest.cpp | 228 ++++++++++++++++++++++++++++-
 5 files changed, 363 insertions(+), 55 deletions(-)
 delete mode 100644 contrib/test.osdm
 create mode 100644 contrib/test.osml

(limited to 'test')

diff --git a/contrib/test.osdm b/contrib/test.osdm
deleted file mode 100644
index 100bc77..0000000
--- a/contrib/test.osdm
+++ /dev/null
@@ -1,29 +0,0 @@
-%{
-	We're currently inside a block comment.
-	%{
-		Note that block comments can be nested, easily allowing you to comment
-		out blocks which already contain comments.
-	}%
-}%
-
-% Well, line comments, as we know them from TeX also work
-
-\import{meta}
-\import{book}
-
-\domain#special_words{
-	\struct#latex
-	\struct#ousia
-}
-
-\book{
-	\include{chapters/chapter1}
-	\include{chapters/chapter2}
-
-	\begin{note}{Behaviour of "Include"}
-		Analogous to the `include` command in \latex, \ousia forces the included
-		file to be *complete* in a sense, that it must not have dangling open
-		commands.
-	\end{note}
-}
-
diff --git a/contrib/test.osml b/contrib/test.osml
new file mode 100644
index 0000000..100bc77
--- /dev/null
+++ b/contrib/test.osml
@@ -0,0 +1,29 @@
+%{
+	We're currently inside a block comment.
+	%{
+		Note that block comments can be nested, easily allowing you to comment
+		out blocks which already contain comments.
+	}%
+}%
+
+% Well, line comments, as we know them from TeX also work
+
+\import{meta}
+\import{book}
+
+\domain#special_words{
+	\struct#latex
+	\struct#ousia
+}
+
+\book{
+	\include{chapters/chapter1}
+	\include{chapters/chapter2}
+
+	\begin{note}{Behaviour of "Include"}
+		Analogous to the `include` command in \latex, \ousia forces the included
+		file to be *complete* in a sense, that it must not have dangling open
+		commands.
+	\end{note}
+}
+
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 6606120..0174fa4 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -65,6 +65,16 @@ public:
 	 */
 	TokenTypeId DefaultFieldStart;
 
+	/**
+	 * Id of the annotation start token.
+	 */
+	TokenTypeId AnnotationStart;
+
+	/**
+	 * Id of the annotation end token.
+	 */
+	TokenTypeId AnnotationEnd;
+
 	/**
 	 * Registers the plain format tokens in the internal tokenizer.
 	 */
@@ -77,6 +87,8 @@ public:
 		FieldStart = registerToken("{");
 		FieldEnd = registerToken("}");
 		DefaultFieldStart = registerToken("{!");
+		AnnotationStart = registerToken("<\\");
+		AnnotationEnd = registerToken("\\>");
 	}
 };
 
@@ -374,7 +386,8 @@ void OsmlStreamParser::pushCommand(Variant commandName,
 	                      hasRange, false, false, false});
 }
 
-OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
+OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
+                                                       bool isAnnotation)
 {
 	// Parse the commandName as a first identifier
 	Variant commandName = parseIdentifier(start, true);
@@ -388,6 +401,9 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
 	    Utils::split(commandName.asString(), ':');
 	const bool isBegin = commandNameComponents[0] == "begin";
 	const bool isEnd = commandNameComponents[0] == "end";
+
+	// Parse the begin or end command
+	State res = State::COMMAND;
 	if (isBegin || isEnd) {
 		if (commandNameComponents.size() > 1) {
 			logger.error(
@@ -396,30 +412,76 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
 			    commandName);
 		}
 		if (isBegin) {
-			return parseBeginCommand();
+			res = parseBeginCommand();
 		} else if (isEnd) {
-			return parseEndCommand();
+			res = parseEndCommand();
+		}
+	} else {
+		// Check whether the next character is a '#', indicating the start of
+		// the command name
+		Variant commandArgName;
+		start = reader.getOffset();
+		if (reader.expect('#')) {
+			commandArgName = parseIdentifier(start);
+			if (commandArgName.asString().empty()) {
+				logger.error("Expected identifier after \"#\"", commandArgName);
+			}
 		}
+
+		// Parse the arugments
+		Variant commandArguments =
+		    parseCommandArguments(std::move(commandArgName));
+
+		// Push the command onto the command stack
+		pushCommand(std::move(commandName), std::move(commandArguments), false);
 	}
 
-	// Check whether the next character is a '#', indicating the start of the
-	// command name
-	Variant commandArgName;
-	start = reader.getOffset();
-	if (reader.expect('#')) {
-		commandArgName = parseIdentifier(start);
-		if (commandArgName.asString().empty()) {
-			logger.error("Expected identifier after \"#\"", commandArgName);
+	// Check whether a ">" character is the next character that is to be read.
+	// In that case the current command could be an annotation end command!
+	char c;
+	if (reader.fetch(c) && c == '>') {
+		// Ignore the character after a begin or end command
+		if (isBegin || isEnd) {
+			logger.warning(
+			    "Ignoring annotation end character \">\" after special "
+			    "commands \"begin\" or \"end\". Write \"\\>\" to end a "
+			    "\"begin\"/\"end\" enclosed annotation.",
+			    reader);
+			return res;
 		}
-	}
 
-	// Parse the arugments
-	Variant commandArguments = parseCommandArguments(std::move(commandArgName));
+		// If this should be an annoation, ignore the character
+		if (isAnnotation) {
+			logger.warning(
+			    "Ignoring annotation end character \">\" after annotation "
+			    "start command. Write \"\\>\" to end the annotation.",
+			    reader);
+		} else {
+			// Make sure no arguments apart from the "name" argument are given
+			// to an annotation end
+			Variant::mapType &map = commands.top().arguments.asMap();
+			if (!map.empty()) {
+				if (map.count("name") == 0 || map.size() > 1U) {
+					logger.error(
+					    "An annotation end command may not have any arguments "
+					    "other than \"name\"");
+					return res;
+				}
+			}
 
-	// Push the command onto the command stack
-	pushCommand(std::move(commandName), std::move(commandArguments), false);
+			// If we got here, this is a valid ANNOTATION_END command, issue it
+			reader.peek(c);
+			reader.consumePeek();
+			return State::ANNOTATION_END;
+		}
+	}
 
-	return State::COMMAND;
+	// If we're starting an annotation, return the command as annotation start
+	// instead of command
+	if (isAnnotation && res == State::COMMAND) {
+		return State::ANNOTATION_START;
+	}
+	return res;
 }
 
 void OsmlStreamParser::parseBlockComment()
@@ -522,7 +584,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 		const TokenTypeId type = token.type;
 
 		// Special handling for Backslash and Text
-		if (type == Tokens.Backslash) {
+		if (type == Tokens.Backslash || type == Tokens.AnnotationStart) {
 			// Before appending anything to the output data or starting a new
 			// command, check whether FIELD_START has to be issued, as the
 			// current command is a command with range
@@ -548,7 +610,8 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 				}
 
 				// Parse the actual command
-				State res = parseCommand(token.location.getStart());
+				State res = parseCommand(token.location.getStart(),
+				                         type == Tokens.AnnotationStart);
 				switch (res) {
 					case State::ERROR:
 						throw LoggableException(
@@ -565,6 +628,14 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			// to the data buffer, use the escape character start as start
 			// location and the peek offset as end location
 			reader.peek(c);  // Peek the previously fetched character
+
+			// If this was an annotation start token, add the parsed < to the
+			// output
+			if (type == Tokens.AnnotationStart) {
+				handler.append('<', token.location.getStart(),
+				               token.location.getStart() + 1);
+			}
+
 			handler.append(c, token.location.getStart(),
 			               reader.getPeekOffset());
 			reader.consumePeek();
@@ -632,6 +703,13 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			    "which to start the field. Write \"\\{!\" to insert this "
 			    "sequence as text",
 			    token);
+		} else if (token.type == Tokens.AnnotationEnd) {
+			// We got a single annotation end token "\>" -- simply issue the
+			// ANNOTATION_END event
+			Variant annotationName = Variant::fromString("");
+			annotationName.setLocation(token.location);
+			pushCommand(annotationName, Variant::mapType{}, false);
+			return State::ANNOTATION_END;
 		} else {
 			logger.error("Unexpected token \"" + token.content + "\"", token);
 		}
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index bb5db65..3827118 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -161,7 +161,13 @@ public:
 		/**
 		 * Default constructor.
 		 */
-		Command() : hasRange(false), inField(false), inRangeField(false), inDefaultField() {}
+		Command()
+		    : hasRange(false),
+		      inField(false),
+		      inRangeField(false),
+		      inDefaultField()
+		{
+		}
 
 		/**
 		 * Constructor of the Command class.
@@ -179,8 +185,8 @@ public:
 		 * @param inDefaultField is set to true if we currently are in a
 		 * specially marked default field.
 		 */
-		Command(Variant name, Variant arguments, bool hasRange, bool inField,
-		        bool inRangeField, bool inDefaultField)
+		Command(Variant name, Variant arguments, bool hasRange,
+		        bool inField, bool inRangeField, bool inDefaultField)
 		    : name(std::move(name)),
 		      arguments(std::move(arguments)),
 		      hasRange(hasRange),
@@ -266,9 +272,11 @@ private:
 	 *
 	 * @param start is the start byte offset of the command (including the
 	 * backslash)
+	 * @param isAnnotation if true, the command is not returned as command, but
+	 * as annotation start.
 	 * @return true if a command was actuall parsed, false otherwise.
 	 */
-	State parseCommand(size_t start);
+	State parseCommand(size_t start, bool isAnnotation);
 
 	/**
 	 * Function used internally to parse a block comment.
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index 5f23822..d52fa5b 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -98,6 +98,56 @@ static void assertFieldEnd(OsmlStreamParser &reader,
 	}
 }
 
+static void assertAnnotationStart(OsmlStreamParser &reader,
+                                  const std::string &name,
+                                  SourceOffset start = InvalidSourceOffset,
+                                  SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse());
+	EXPECT_EQ(name, reader.getCommandName().asString());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertAnnotationStart(OsmlStreamParser &reader,
+                                  const std::string &name,
+                                  const Variant::mapType &args,
+                                  SourceOffset start = InvalidSourceOffset,
+                                  SourceOffset end = InvalidSourceOffset)
+{
+	assertAnnotationStart(reader, name, start, end);
+	EXPECT_EQ(args, reader.getCommandArguments());
+}
+
+static void assertAnnotationEnd(OsmlStreamParser &reader,
+                                const std::string &name,
+                                const std::string &elementName,
+                                SourceOffset start = InvalidSourceOffset,
+                                SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse());
+	ASSERT_EQ(name, reader.getCommandName().asString());
+	if (!elementName.empty()) {
+		ASSERT_EQ(1U, reader.getCommandArguments().asMap().size());
+		ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name"));
+
+		auto it = reader.getCommandArguments().asMap().find("name");
+		ASSERT_EQ(elementName, it->second.asString());
+	}
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
 static void assertEnd(OsmlStreamParser &reader,
                       SourceOffset start = InvalidSourceOffset,
                       SourceOffset end = InvalidSourceOffset)
@@ -184,9 +234,6 @@ TEST(OsmlStreamParser, escapeSpecialCharacters)
 	testEscapeSpecialCharacter("\\");
 	testEscapeSpecialCharacter("{");
 	testEscapeSpecialCharacter("}");
-	testEscapeSpecialCharacter("<");
-	testEscapeSpecialCharacter(">");
-	testEscapeSpecialCharacter("|");
 }
 
 TEST(OsmlStreamParser, simpleSingleLineComment)
@@ -1035,5 +1082,180 @@ TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField)
 	assertEnd(reader, 10, 10);
 }
 
+TEST(OsmlStreamParser, annotationStart)
+{
+	const char *testString = "<\\a";
+	//                        0 12
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
+	assertEnd(reader, 3, 3);
+}
+
+TEST(OsmlStreamParser, annotationStartWithName)
+{
+	const char *testString = "<\\annotationWithName#aName";
+	//                        0 1234567890123456789012345
+	//                        0          1         2
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationStart(reader, "annotationWithName",
+	                      Variant::mapType{{"name", "aName"}}, 0, 20);
+	assertEnd(reader, 26, 26);
+}
+
+TEST(OsmlStreamParser, annotationStartWithArguments)
+{
+	const char *testString = "<\\annotationWithName#aName[a=1,b=2]";
+	//                        0 1234567890123456789012345678901234
+	//                        0          1         2         3
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationStart(
+	    reader, "annotationWithName",
+	    Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20);
+	assertEnd(reader, 35, 35);
+}
+
+TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd)
+{
+	const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>";
+	//                        0 123456789012345678901234567 89012345 67
+	//                        0          1         2          3
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationStart(
+	    reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
+	    10);
+	assertFieldStart(reader, true, 26, 27);
+	assertData(reader, "a", 26, 27);
+	assertFieldEnd(reader, 33, 35);
+	assertAnnotationEnd(reader, "", "", 36, 38);
+	assertEnd(reader, 38, 38);
+}
+
+TEST(OsmlStreamParser, annotationEnd)
+{
+	const char *testString = "\\a>";
+	//                         012
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationEnd(reader, "a", "", 0, 2);
+	assertEnd(reader, 3, 3);
+}
+
+TEST(OsmlStreamParser, annotationEndWithName)
+{
+	const char *testString = "\\a#name>";
+	//                         01234567
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationEnd(reader, "a", "name", 0, 2);
+	assertEnd(reader, 8, 8);
+}
+
+TEST(OsmlStreamParser, annotationEndWithNameAsArgs)
+{
+	const char *testString = "\\a[name=name]>";
+	//                         01234567890123
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationEnd(reader, "a", "name", 0, 2);
+	assertEnd(reader, 14, 14);
+}
+
+TEST(OsmlStreamParser, errorAnnotationEndWithArguments)
+{
+	const char *testString = "\\a[foo=bar]>";
+	//                         012345678901
+	//                         0         1
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2);
+	ASSERT_TRUE(logger.hasError());
+	assertData(reader, ">", 11, 12);
+	assertEnd(reader, 12, 12);
+}
+
+TEST(OsmlStreamParser, closingAnnotation)
+{
+	const char *testString = "<\\a>";
+	//                        0 123
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
+	assertData(reader, ">", 3, 4);
+	assertEnd(reader, 4, 4);
+}
+
+TEST(OsmlStreamParser, annotationWithFields)
+{
+	const char *testString = "a <\\b{c}{d}{!e} f \\> g";
+	//                        012 345678901234567 8901
+	//                        0          1          2
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertData(reader, "a", 0, 1);
+	assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5);
+	assertFieldStart(reader, false, 5, 6);
+	assertData(reader, "c", 6, 7);
+	assertFieldEnd(reader, 7, 8);
+	assertFieldStart(reader, false, 8, 9);
+	assertData(reader, "d", 9, 10);
+	assertFieldEnd(reader, 10, 11);
+	assertFieldStart(reader, true, 11, 13);
+	assertData(reader, "e", 13, 14);
+	assertFieldEnd(reader, 14, 15);
+	assertData(reader, "f", 16, 17);
+	assertAnnotationEnd(reader, "", "", 18, 20);
+	assertData(reader, "g", 21, 22);
+	assertEnd(reader, 22, 22);
+}
+
+TEST(OsmlStreamParser, annotationStartEscape)
+{
+	const char *testString = "<\\%test";
+	//                        0 123456
+	//                        0
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertData(reader, "<%test", 0, 7);
+	assertEnd(reader, 7, 7);
+}
 }
 
-- 
cgit v1.2.3


From cc281d91def921b7bbf5d3d4a0fce53afc5a317b Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:07:58 +0100
Subject: Renamed parser/generic to parser/stack and made filenames much
 shorter

---
 src/core/parser/generic/ParserState.cpp          | 161 ------------
 src/core/parser/generic/ParserState.hpp          | 284 ---------------------
 src/core/parser/generic/ParserStateCallbacks.cpp |  26 --
 src/core/parser/generic/ParserStateCallbacks.hpp | 106 --------
 src/core/parser/generic/ParserStateHandler.cpp   | 104 --------
 src/core/parser/generic/ParserStateHandler.hpp   | 281 ---------------------
 src/core/parser/generic/ParserStateStack.cpp     | 187 --------------
 src/core/parser/generic/ParserStateStack.hpp     | 191 --------------
 src/core/parser/stack/Callbacks.cpp              |  23 ++
 src/core/parser/stack/Callbacks.hpp              |  99 ++++++++
 src/core/parser/stack/Handler.cpp                |  90 +++++++
 src/core/parser/stack/Handler.hpp                | 302 ++++++++++++++++++++++
 src/core/parser/stack/Stack.cpp                  | 188 ++++++++++++++
 src/core/parser/stack/Stack.hpp                  | 191 ++++++++++++++
 src/core/parser/stack/State.cpp                  | 171 +++++++++++++
 src/core/parser/stack/State.hpp                  | 307 +++++++++++++++++++++++
 test/core/parser/ParserStateTest.cpp             |  77 ------
 test/core/parser/stack/StateTest.cpp             |  79 ++++++
 18 files changed, 1450 insertions(+), 1417 deletions(-)
 delete mode 100644 src/core/parser/generic/ParserState.cpp
 delete mode 100644 src/core/parser/generic/ParserState.hpp
 delete mode 100644 src/core/parser/generic/ParserStateCallbacks.cpp
 delete mode 100644 src/core/parser/generic/ParserStateCallbacks.hpp
 delete mode 100644 src/core/parser/generic/ParserStateHandler.cpp
 delete mode 100644 src/core/parser/generic/ParserStateHandler.hpp
 delete mode 100644 src/core/parser/generic/ParserStateStack.cpp
 delete mode 100644 src/core/parser/generic/ParserStateStack.hpp
 create mode 100644 src/core/parser/stack/Callbacks.cpp
 create mode 100644 src/core/parser/stack/Callbacks.hpp
 create mode 100644 src/core/parser/stack/Handler.cpp
 create mode 100644 src/core/parser/stack/Handler.hpp
 create mode 100644 src/core/parser/stack/Stack.cpp
 create mode 100644 src/core/parser/stack/Stack.hpp
 create mode 100644 src/core/parser/stack/State.cpp
 create mode 100644 src/core/parser/stack/State.hpp
 delete mode 100644 test/core/parser/ParserStateTest.cpp
 create mode 100644 test/core/parser/stack/StateTest.cpp

(limited to 'test')

diff --git a/src/core/parser/generic/ParserState.cpp b/src/core/parser/generic/ParserState.cpp
deleted file mode 100644
index f635d86..0000000
--- a/src/core/parser/generic/ParserState.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "ParserState.hpp"
-
-namespace ousia {
-
-/* Class ParserState */
-
-ParserState::ParserState() : elementHandler(nullptr) {}
-
-ParserState::ParserState(ParserStateSet parents, Arguments arguments,
-                         RttiSet createdNodeTypes,
-                         HandlerConstructor elementHandler)
-    : parents(parents),
-      arguments(arguments),
-      createdNodeTypes(createdNodeTypes),
-      elementHandler(elementHandler)
-{
-}
-
-ParserState::ParserState(const ParserStateBuilder &builder)
-    : ParserState(builder.build())
-{
-}
-
-/* Class ParserStateBuilder */
-
-ParserStateBuilder &ParserStateBuilder::copy(const ParserState &state)
-{
-	this->state = state;
-	return *this;
-}
-
-ParserStateBuilder &ParserStateBuilder::parent(const ParserState *parent)
-{
-	state.parents = ParserStateSet{parent};
-	return *this;
-}
-
-ParserStateBuilder &ParserStateBuilder::parents(const ParserStateSet &parents)
-{
-	state.parents = parents;
-	return *this;
-}
-
-ParserStateBuilder &ParserStateBuilder::arguments(const Arguments &arguments)
-{
-	state.arguments = arguments;
-	return *this;
-}
-
-ParserStateBuilder &ParserStateBuilder::createdNodeType(const Rtti *type)
-{
-	state.createdNodeTypes = RttiSet{type};
-	return *this;
-}
-
-ParserStateBuilder &ParserStateBuilder::createdNodeTypes(const RttiSet &types)
-{
-	state.createdNodeTypes = types;
-	return *this;
-}
-
-ParserStateBuilder &ParserStateBuilder::elementHandler(
-    HandlerConstructor elementHandler)
-{
-	state.elementHandler = elementHandler;
-	return *this;
-}
-
-const ParserState &ParserStateBuilder::build() const { return state; }
-
-/* Class ParserStateDeductor */
-
-ParserStateDeductor::ParserStateDeductor(
-    std::vector<const Rtti *> signature,
-    std::vector<const ParserState *> states)
-    : tbl(signature.size()),
-      signature(std::move(signature)),
-      states(std::move(states))
-{
-}
-
-bool ParserStateDeductor::isActive(size_t d, const ParserState *s)
-{
-	// Lookup the "active" state of (d, s), if it was not already set
-	// (e.second is true) we'll have to calculate it
-	auto e = tbl[d].emplace(s, false);
-	bool &res = e.first->second;
-	if (!e.second) {
-		return res;
-	}
-
-	// Check whether this node is generative (may have produced the Node
-	// described by the current Signature element)
-	bool isGenerative = signature[d]->isOneOf(s->createdNodeTypes);
-
-	if (isGenerative && d == 0) {
-		// End of recursion -- the last signature element is reached and the
-		// node was generative
-		res = true;
-	} else {
-		// Try repetition of this node
-		if (isGenerative && isActive(d - 1, s)) {
-			res = true;
-		} else {
-			// Check whether any of the parent nodes were active -- either for
-			// the previous element (if this one is generative) or for the
-			// current element (assuming this node was not generative)
-			for (const ParserState *parent : s->parents) {
-				if ((isGenerative && isActive(d - 1, parent)) ||
-					isActive(d, parent)) {
-					res = true;
-					break;
-				}
-			}
-		}
-	}
-
-	return res;
-}
-
-std::vector<const ParserState *> ParserStateDeductor::deduce()
-{
-	std::vector<const ParserState *> res;
-	if (!signature.empty()) {
-		const size_t D = signature.size();
-		for (auto s : states) {
-			if (signature[D - 1]->isOneOf(s->createdNodeTypes) &&
-			    isActive(D - 1, s)) {
-				res.push_back(s);
-			}
-		}
-	}
-	return res;
-}
-
-/* Constant initializations */
-
-namespace ParserStates {
-const ParserState All;
-const ParserState None;
-}
-}
-
diff --git a/src/core/parser/generic/ParserState.hpp b/src/core/parser/generic/ParserState.hpp
deleted file mode 100644
index 6487fdd..0000000
--- a/src/core/parser/generic/ParserState.hpp
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file ParserState.hpp
- *
- * Defines the ParserState class used within the ParserStack pushdown
- * automaton and the ParserStateBuilder class for convenient construction of
- * such classes.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_PARSER_STATE_HPP_
-#define _OUSIA_PARSER_STATE_HPP_
-
-#include <unordered_set>
-
-#include <core/common/Rtti.hpp>
-#include <core/common/Argument.hpp>
-
-namespace ousia {
-
-// Forward declarations
-class ParserStateBuilder;
-class ParserState;
-class HandlerData;
-class Handler;
-using HandlerConstructor = Handler *(*)(const HandlerData &handlerData);
-
-/**
- * Set of pointers of parser states -- used for specifying a set of parent
- * states.
- */
-using ParserStateSet = std::unordered_set<const ParserState *>;
-
-/**
- * Class used for the complete specification of a ParserState. Stores possible
- * parent states, state handlers and arguments to be passed to that state.
- */
-struct ParserState {
-	/**
-	 * Vector containing all possible parent states.
-	 */
-	ParserStateSet parents;
-
-	/**
-	 * Descriptor of the arguments that should be passed to the handler.
-	 */
-	Arguments arguments;
-
-	/**
-	 * Set containing the types of the nodes that may be created in this
-	 * ParserState. This information is needed for Parsers to reconstruct the
-	 * current ParserState from a given ParserScope when a file is included.
-	 */
-	RttiSet createdNodeTypes;
-
-	/**
-	 * Pointer at a function which creates a new concrete Handler instance for
-	 * the elements described by this state. May be nullptr in which case no
-	 * handler instance is created.
-	 */
-	HandlerConstructor elementHandler;
-
-	/**
-	 * Default constructor, initializes the handlers with nullptr.
-	 */
-	ParserState();
-
-	/**
-	 * Constructor taking values for all fields. Use the ParserStateBuilder
-	 * class for a more convenient construction of ParserState instances.
-	 *
-	 * @param parents is a vector containing all possible parent states.
-	 * @param arguments is a descriptor of arguments that should be passed to
-	 * the handler.
-	 * @param createdNodeTypes is a set containing the types of the nodes tha
-	 * may be created in this ParserState. This information is needed for
-	 * Parsers to reconstruct the current ParserState from a given ParserScope
-	 * when a file is included.
-	 * @param elementHandler is a pointer at a function which creates a new
-	 * concrete Handler instance for the elements described by this state. May
-	 * be nullptr in which case no handler instance is created.
-	 */
-	ParserState(ParserStateSet parents, Arguments arguments = Arguments{},
-	            RttiSet createdNodeTypes = RttiSet{},
-	            HandlerConstructor elementHandler = nullptr);
-
-	/**
-	 * Creates this ParserState from the given ParserStateBuilder instance.
-	 */
-	ParserState(const ParserStateBuilder &builder);
-};
-
-/**
- * The ParserStateBuilder class is a class used for conveniently building new
- * ParserState instances.
- */
-class ParserStateBuilder {
-private:
-	/**
-	 * ParserState instance that is currently being built by the
-	 * ParserStateBuilder.
-	 */
-	ParserState state;
-
-public:
-	/**
-	 * Copies the ParserState instance and uses it as internal state. Overrides
-	 * all changes made by the ParserStateBuilder.
-	 *
-	 * @param state is the state that should be copied.
-	 * @return a reference at this ParserStateBuilder instance for method
-	 * chaining.
-	 */
-	ParserStateBuilder &copy(const ParserState &state);
-
-	/**
-	 * Sets the possible parent states to the single given parent element.
-	 *
-	 * @param parent is a pointer at the parent ParserState instance that should
-	 * be the possible parent state.
-	 * @return a reference at this ParserStateBuilder instance for method
-	 * chaining.
-	 */
-	ParserStateBuilder &parent(const ParserState *parent);
-
-	/**
-	 * Sets the ParserState instances in the given ParserStateSet as the list of
-	 * supported parent states.
-	 *
-	 * @param parents is a set of pointers at ParserState instances that should
-	 * be the possible parent states.
-	 * @return a reference at this ParserStateBuilder instance for method
-	 * chaining.
-	 */
-	ParserStateBuilder &parents(const ParserStateSet &parents);
-
-	/**
-	 * Sets the arguments that should be passed to the parser state handler to
-	 * those given as argument.
-	 *
-	 * @param arguments is the Arguments instance describing the Arguments that
-	 * should be parsed to a Handler for this ParserState.
-	 * @return a reference at this ParserStateBuilder instance for method
-	 * chaining.
-	 */
-	ParserStateBuilder &arguments(const Arguments &arguments);
-
-	/**
-	 * Sets the Node types this state may produce to the given Rtti descriptor.
-	 *
-	 * @param type is the Rtti descriptor of the Type that may be produced by
-	 * this state.
-	 * @return a reference at this ParserStateBuilder instance for method
-	 * chaining.
-	 */
-	ParserStateBuilder &createdNodeType(const Rtti *type);
-
-	/**
-	 * Sets the Node types this state may produce to the given Rtti descriptors.
-	 *
-	 * @param types is a set of Rtti descriptors of the Types that may be
-	 * produced by this state.
-	 * @return a reference at this ParserStateBuilder instance for method
-	 * chaining.
-	 */
-	ParserStateBuilder &createdNodeTypes(const RttiSet &types);
-
-	/**
-	 * Sets the constructor for the element handler. The constructor creates a
-	 * new concrete Handler instance for the elements described by this state.
-	 * May be nullptr in which case no handler instance is created (this is
-	 * the default value).
-	 *
-	 * @param elementHandler is the HandlerConstructor that should create a
-	 * new Handler instance.
-	 * @return a reference at this ParserStateBuilder instance for method
-	 * chaining.
-	 */
-	ParserStateBuilder &elementHandler(HandlerConstructor elementHandler);
-
-	/**
-	 * Returns a reference at the internal ParserState instance that was built
-	 * using the ParserStateBuilder.
-	 *
-	 * @return the built ParserState.
-	 */
-	const ParserState &build() const;
-};
-
-/**
- * Class used to deduce the ParserState a Parser is currently in based on the
- * types of the Nodes that currently are on the ParserStack. Uses dynamic
- * programming in order to solve this problem.
- */
-class ParserStateDeductor {
-public:
-	/**
-	 * Type containing the dynamic programming table.
-	 */
-	using Table = std::vector<std::unordered_map<const ParserState *, bool>>;
-
-private:
-	/**
-	 * Dynamic programming table.
-	 */
-	Table tbl;
-
-	/**
-	 * Signature given in the constructor.
-	 */
-	const std::vector<const Rtti *> signature;
-
-	/**
-	 * List of states that should be checked for being active.
-	 */
-	const std::vector<const ParserState *> states;
-
-	/**
-	 * Used internally to check whether the given parser stack s may have been
-	 * active for signature element d.
-	 *
-	 * @param d is the signature element.
-	 * @param s is the parser state.
-	 * @return true if the the given ParserState may have been active.
-	 */
-	bool isActive(size_t d, const ParserState *s);
-
-public:
-	/**
-	 * Constructor of the ParserStateDeductor class.
-	 *
-	 * @param signature a Node type signature describing the types of the nodes
-	 * which currently reside on e.g. the ParserScope stack.
-	 * @param states is a list of states that should be checked.
-	 */
-	ParserStateDeductor(std::vector<const Rtti *> signature,
-	                    std::vector<const ParserState *> states);
-
-	/**
-	 * Selects all active states from the given states. Only considers those
-	 * states that may have produced the last signature element.
-	 *
-	 * @return a list of states that may actually have been active.
-	 */
-	std::vector<const ParserState *> deduce();
-};
-
-/**
- * The ParserStates namespace contains all the global state constants used
- * in the ParserStack class.
- */
-namespace ParserStates {
-/**
- * State representing all states.
- */
-extern const ParserState All;
-
-/**
- * State representing the initial state.
- */
-extern const ParserState None;
-}
-}
-
-#endif /* _OUSIA_PARSER_STATE_HPP_ */
-
diff --git a/src/core/parser/generic/ParserStateCallbacks.cpp b/src/core/parser/generic/ParserStateCallbacks.cpp
deleted file mode 100644
index 50bac57..0000000
--- a/src/core/parser/generic/ParserStateCallbacks.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <ParserStateCallbacks.hpp>
-
-namespace ousia {
-
-/* Class ParserStateCallbacks */
-
-}
-
diff --git a/src/core/parser/generic/ParserStateCallbacks.hpp b/src/core/parser/generic/ParserStateCallbacks.hpp
deleted file mode 100644
index 7ec5264..0000000
--- a/src/core/parser/generic/ParserStateCallbacks.hpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file ParserStateCallbacks.hpp
- *
- * Contains an interface defining the callbacks that can be directed from a
- * ParserStateHandler to the ParserStateStack, and from the ParserStateStack to
- * the actual parser.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_PARSER_STATE_CALLBACKS_HPP_
-#define _OUSIA_PARSER_STATE_CALLBACKS_HPP_
-
-#include <string>
-
-#include <core/common/Whitespace.hpp>
-
-namespace ousia {
-
-/**
- * Interface defining a set of callback functions that act as a basis for the
- * ParserStateStackCallbacks and the ParserCallbacks.
- */
-class ParserStateCallbacks {
-public:
-	/**
-	 * Virtual descructor.
-	 */
-	virtual ~ParserStateCallbacks() {};
-
-	/**
-	 * Sets the whitespace mode that specifies how string data should be
-	 * processed.
-	 *
-	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
-	 * PRESERVE, TRIM or COLLAPSE.
-	 */
-	virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0;
-
-	/**
-	 * Sets the type as which the variant data should be parsed.
-	 *
-	 * @param type is one of the VariantType constants, specifying with which
-	 * type the data that is passed to the ParserStateHandler in the "data"
-	 * function should be handled.
-	 */
-	virtual void setDataType(VariantType type) = 0;
-
-	/**
-	 * Registers the given token as token that should be reported to the handler
-	 * using the "token" function.
-	 *
-	 * @param token is the token string that should be reported.
-	 */
-	virtual void registerToken(const std::string &token) = 0;
-
-	/**
-	 * Unregisters the given token, it will no longer be reported to the handler
-	 * using the "token" function.
-	 *
-	 * @param token is the token string that should be unregistered.
-	 */
-	virtual void unregisterToken(const std::string &token) = 0;
-};
-
-/**
- * Interface defining the callback functions that can be passed from a
- * ParserStateStack to the underlying parser.
- */
-class ParserCallbacks : public ParserStateCallbacks {
-	/**
-	 * Checks whether the given token is supported by the parser. The parser
-	 * returns true, if the token is supported, false if this token cannot be
-	 * registered. Note that parsers that do not support the registration of
-	 * tokens at all should always return "true".
-	 *
-	 * @param token is the token that should be checked for support.
-	 * @return true if the token is generally supported (or the parser does not
-	 * support registering tokens at all), false if the token is not supported,
-	 * because e.g. it is a reserved token or it interferes with other tokens.
-	 */
-	virtual bool supportsToken(const std::string &token) = 0;
-}
-
-}
-
-#endif /* _OUSIA_PARSER_STATE_CALLBACKS_HPP_ */
-
diff --git a/src/core/parser/generic/ParserStateHandler.cpp b/src/core/parser/generic/ParserStateHandler.cpp
deleted file mode 100644
index 64e2bfa..0000000
--- a/src/core/parser/generic/ParserStateHandler.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <core/parser/ParserContext.hpp>
-
-#include "ParserStateHandler.hpp"
-
-namespace ousia {
-
-/* Class ParserStatedata */
-
-ParserStatedata::ParserStatedata(ParserContext &ctx,
-                                 ParserStateCallbacks &callbacks,
-                                 std::string name, const ParserState &state,
-                                 const ParserState &parentState,
-                                 const SourceLocation location)
-    : ctx(ctx),
-      callbacks(callbacks),
-      name(std::move(name)),
-      state(state),
-      parentState(parentState),
-      location(location){};
-
-/* Class ParserStateHandler */
-
-ParserStateHandler::ParserStateHandler(const ParserStatedata &data) : data(data)
-{
-}
-
-ParserContext &ParserStateHandler::context() { return data.ctx; }
-
-const std::string &ParserStateHandler::name() { return data.name; }
-
-ParserScope &ParserStateHandler::scope() { return data.ctx.getScope(); }
-
-Manager &ParserStateHandler::manager() { return data.ctx.getManager(); }
-
-Logger &ParserStateHandler::logger() { return data.ctx.getLogger(); }
-
-Rooted<Project> ParserStateHandler::project() { return data.ctx.getProject(); }
-
-const ParserState &ParserStateHandler::state() { return data.state; }
-
-SourceLocation ParserStateHandler::location() { return data.location; }
-
-void ParserStateHandler::setWhitespaceMode(WhitespaceMode whitespaceMode)
-{
-	data.callbacks.setWhitespaceMode(whitespaceMode);
-}
-
-void ParserStateHandler::setDataType(VariantType type)
-{
-	data.callbacks.setDataType(type);
-}
-
-bool ParserStateHandler::supportsToken(const std::string &token)
-{
-	return data.callbacks.supportsToken(token);
-}
-
-void ParserStateHandler::registerToken(const std::string &token)
-{
-	data.callbacks.registerToken(token);
-}
-
-void ParserStateHandler::unregisterToken(const std::string &token)
-{
-	data.callbacks.unregisterToken(token);
-}
-
-void ParserStateHandler::data(const std::string &data, int field)
-{
-	if (Utils::hasNonWhitepaceChar(data)) {
-		logger().error("Expected command but found character data.");
-	}
-}
-
-/* Class DefaultParserStateHandler */
-
-void DefaultParserStateHandler::start(Variant::mapType &args) {}
-
-void DefaultParserStateHandler::end() {}
-
-ParserStateHandler *DefaultParserStateHandler::create(const data &data)
-{
-	return new DefaultHandler{data};
-}
-}
-
diff --git a/src/core/parser/generic/ParserStateHandler.hpp b/src/core/parser/generic/ParserStateHandler.hpp
deleted file mode 100644
index f3c836e..0000000
--- a/src/core/parser/generic/ParserStateHandler.hpp
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _OUSIA_PARSER_STATE_HANDLER_HPP_
-#define _OUSIA_PARSER_STATE_HANDLER_HPP_
-
-#include <memory>
-#include <string>
-
-#include <core/utils/Location.hpp>
-
-namespace ousia {
-
-// Forward declarations
-class ParserContext;
-class ParserState;
-class ParserStateCallbacks;
-
-/**
- * Class collecting all the data that is being passed to a ParserStateHandler
- * instance.
- */
-class ParserStateHandlerData {
-public:
-	/**
-	 * Reference to the ParserContext instance that should be used to resolve
-	 * references to nodes in the Graph.
-	 */
-	ParserContext &ctx;
-
-	/**
-	 * Reference at an instance of the ParserStateCallbacks class, used for
-	 * modifying the behaviour of the parser (like registering tokens, setting
-	 * the data type or changing the whitespace handling mode).
-	 */
-	ParserStateCallbacks &callbacks;
-
-	/**
-	 * Contains the name of the command that is being handled.
-	 */
-	const std::string name;
-
-	/**
-	 * Contains the current state of the state machine.
-	 */
-	const ParserState &state;
-
-	/**
-	 * Contains the state of the state machine when the parent node was handled.
-	 */
-	const ParserState &parentState;
-
-	/**
-	 * Current source code location.
-	 */
-	const SourceLocation location;
-
-	/**
-	 * Constructor of the HandlerData class.
-	 *
-	 * @param ctx is the parser context the handler should be executed in.
-	 * @param callbacks is an instance of ParserStateCallbacks used to notify
-	 * the parser about certain state changes.
-	 * @param name is the name of the string.
-	 * @param state is the state this handler was called for.
-	 * @param parentState is the state of the parent command.
-	 * @param location is the location at which the handler is created.
-	 */
-	ParserStateHandlerData(ParserContext &ctx, ParserStateCallbacks &callbacks,
-	                       std::string name, const ParserState &state,
-	                       const ParserState &parentState,
-	                       const SourceLocation &location);
-};
-
-/**
- * The handler class provides a context for handling an XML tag. It has to be
- * overridden and registered in the StateStack class to form handlers for
- * concrete XML tags.
- */
-class ParserStateHandler {
-private:
-	/**
-	 * Structure containing the internal handler data.
-	 */
-	const ParserStateHandlerData data;
-
-protected:
-	/**
-	 * Constructor of the Handler class.
-	 *
-	 * @param data is a structure containing all data being passed to the
-	 * handler.
-	 */
-	ParserStateHandler(const ParserStateHandlerData &data){};
-
-public:
-	/**
-	 * Virtual destructor.
-	 */
-	virtual ~Handler(){};
-
-	/**
-	 * Returns a reference at the ParserContext.
-	 *
-	 * @return a reference at the ParserContext.
-	 */
-	ParserContext &context();
-
-	/**
-	 * Returns the command name for which the handler was created.
-	 *
-	 * @return a const reference at the command name.
-	 */
-	const std::string &name();
-
-	/**
-	 * Returns a reference at the ParserScope instance.
-	 *
-	 * @return a reference at the ParserScope instance.
-	 */
-	ParserScope &scope();
-
-	/**
-	 * Returns a reference at the Manager instance which manages all nodes.
-	 *
-	 * @return a referance at the Manager instance.
-	 */
-	Manager &manager();
-
-	/**
-	 * Returns a reference at the Logger instance used for logging error
-	 * messages.
-	 *
-	 * @return a reference at the Logger instance.
-	 */
-	Logger &logger();
-
-	/**
-	 * Returns a reference at the Project Node, representing the project into
-	 * which the file is currently being parsed.
-	 *
-	 * @return a referance at the Project Node.
-	 */
-	Rooted<Project> project();
-
-	/**
-	 * Reference at the ParserState descriptor for which this Handler was
-	 * created.
-	 *
-	 * @return a const reference at the constructing ParserState descriptor.
-	 */
-	const ParserState &state();
-
-	/**
-	 * Returns the current location in the source file.
-	 *
-	 * @return the current location in the source file.
-	 */
-	SourceLocation location();
-
-	/**
-	 * Calls the corresponding function in the ParserStateCallbacks instance.
-	 * Sets the whitespace mode that specifies how string data should be
-	 * processed.
-	 *
-	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
-	 * PRESERVE, TRIM or COLLAPSE.
-	 */
-	void setWhitespaceMode(WhitespaceMode whitespaceMode);
-
-	/**
-	 * Calls the corresponding function in the ParserStateCallbacks instance.
-	 * Sets the type as which the variant data should be parsed.
-	 *
-	 * @param type is one of the VariantType constants, specifying with which
-	 * type the data that is passed to the ParserStateHandler in the "data"
-	 * function should be handled.
-	 */
-	void setDataType(VariantType type);
-
-	/**
-	 * Calls the corresponding function in the ParserStateCallbacks instance.
-	 * Checks whether the given token is supported by the parser. The parser
-	 * returns true, if the token is supported, false if this token cannot be
-	 * registered. Note that parsers that do not support the registration of
-	 * tokens at all should always return "true".
-	 *
-	 * @param token is the token that should be checked for support.
-	 * @return true if the token is generally supported (or the parser does not
-	 * support registering tokens at all), false if the token is not supported,
-	 * because e.g. it is a reserved token or it interferes with other tokens.
-	 */
-	bool supportsToken(const std::string &token);
-
-	/**
-	 * Calls the corresponding function in the ParserStateCallbacks instance.
-	 * Registers the given token as token that should be reported to the handler
-	 * using the "token" function.
-	 *
-	 * @param token is the token string that should be reported.
-	 */
-	void registerToken(const std::string &token);
-
-	/**
-	 * Calls the corresponding function in the ParserStateCallbacks instance.
-	 * Unregisters the given token, it will no longer be reported to the handler
-	 * using the "token" function.
-	 *
-	 * @param token is the token string that should be unregistered.
-	 */
-	void unregisterToken(const std::string &token);
-
-	/**
-	 * Called when the command that was specified in the constructor is
-	 * instanciated.
-	 *
-	 * @param args is a map from strings to variants (argument name and value).
-	 */
-	virtual void start(Variant::mapType &args) = 0;
-
-	/**
-	 * Called whenever the command for which this handler is defined ends.
-	 */
-	virtual void end() = 0;
-
-	/**
-	 * Called whenever raw data (int the form of a string) is available for the
-	 * Handler instance. In the default handler an exception is raised if the
-	 * received data contains non-whitespace characters.
-	 *
-	 * @param data is a pointer at the character data that is available for the
-	 * Handler instance.
-	 * @param field is the field number (the interpretation of this value
-	 * depends on the format that is being parsed).
-	 */
-	virtual void data(const std::string &data, int field);
-};
-
-/**
- * HandlerConstructor is a function pointer type used to create concrete
- * instances of the Handler class.
- *
- * @param handlerData is the data that should be passed to the new handler
- * instance.
- * @return a newly created handler instance.
- */
-using HandlerConstructor = Handler *(*)(const HandlerData &handlerData);
-
-/**
- * The DefaultHandler class is used in case no element handler is specified in
- * the ParserState descriptor.
- */
-class DefaultParserStateHandler : public ParserStateHandler {
-public:
-	using ParserStateHandler::ParserStateHandler;
-
-	void start(Variant::mapType &args) override;
-
-	void end() override;
-
-	static Handler *create(const HandlerData &handlerData);
-};
-}
-
-#endif /* _OUSIA_PARSER_STATE_HANDLER_HPP_ */
-
diff --git a/src/core/parser/generic/ParserStateStack.cpp b/src/core/parser/generic/ParserStateStack.cpp
deleted file mode 100644
index 8c32f17..0000000
--- a/src/core/parser/generic/ParserStateStack.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <sstream>
-
-#include <core/common/Utils.hpp>
-#include <core/common/Exceptions.hpp>
-#include <core/model/Project.hpp>
-
-#include "ParserScope.hpp"
-#include "ParserStateStack.hpp"
-
-namespace ousia {
-
-/* Class ParserStateStack */
-
-/**
- * Returns an Exception that should be thrown when a currently invalid command
- * is thrown.
- */
-static LoggableException InvalidCommand(const std::string &name,
-                                        const std::set<std::string> &expected)
-{
-	if (expected.empty()) {
-		return LoggableException{
-		    std::string{"No nested elements allowed, but got \""} + name +
-		    std::string{"\""}};
-	} else {
-		return LoggableException{
-		    std::string{"Expected "} +
-		    (expected.size() == 1 ? std::string{"\""}
-		                          : std::string{"one of \""}) +
-		    Utils::join(expected, "\", \"") + std::string{"\", but got \""} +
-		    name + std::string{"\""}};
-	}
-}
-
-ParserStateStack::ParserStateStack(
-    ParserContext &ctx,
-    const std::multimap<std::string, const ParserState *> &states)
-    : ctx(ctx), states(states)
-{
-}
-
-bool ParserStateStack::deduceState()
-{
-	// Assemble all states
-	std::vector<const ParserState *> states;
-	for (const auto &e : this->states) {
-		states.push_back(e.second);
-	}
-
-	// Fetch the type signature of the scope and derive all possible states,
-	// abort if no unique parser state was found
-	std::vector<const ParserState *> possibleStates =
-	    ParserStateDeductor(ctx.getScope().getStackTypeSignature(), states)
-	        .deduce();
-	if (possibleStates.size() != 1) {
-		ctx.getLogger().error(
-		    "Error while including file: Cannot deduce parser state.");
-		return false;
-	}
-
-	// Switch to this state by creating a dummy handler
-	const ParserState *state = possibleStates[0];
-	Handler *handler =
-	    DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}});
-	stack.emplace(handler);
-	return true;
-}
-
-std::set<std::string> ParserStateStack::expectedCommands()
-{
-	const ParserState *currentState = &(this->currentState());
-	std::set<std::string> res;
-	for (const auto &v : states) {
-		if (v.second->parents.count(currentState)) {
-			res.insert(v.first);
-		}
-	}
-	return res;
-}
-
-const ParserState &ParserStateStack::currentState()
-{
-	return stack.empty() ? ParserStates::None : stack.top()->state();
-}
-
-std::string ParserStateStack::currentCommandName()
-{
-	return stack.empty() ? std::string{} : stack.top()->name();
-}
-
-const ParserState *ParserStateStack::findTargetState(const std::string &name)
-{
-	const ParserState *currentState = &(this->currentState());
-	auto range = states.equal_range(name);
-	for (auto it = range.first; it != range.second; it++) {
-		const ParserStateSet &parents = it->second->parents;
-		if (parents.count(currentState) || parents.count(&ParserStates::All)) {
-			return it->second;
-		}
-	}
-
-	return nullptr;
-}
-
-void ParserStateStack::start(const std::string &name, Variant::mapType &args,
-                        const SourceLocation &location)
-{
-	ParserState const *targetState = findTargetState(name);
-// TODO: Andreas, please improve this.
-//	if (!Utils::isIdentifier(name)) {
-//		throw LoggableException(std::string("Invalid identifier \"") + name +
-//		                        std::string("\""));
-//	}
-
-	if (targetState == nullptr) {
-		targetState = findTargetState("*");
-	}
-	if (targetState == nullptr) {
-		throw InvalidCommand(name, expectedCommands());
-	}
-
-	// Fetch the associated constructor
-	HandlerConstructor ctor = targetState->elementHandler
-	                              ? targetState->elementHandler
-	                              : DefaultHandler::create;
-
-	// Canonicalize the arguments, allow additional arguments
-	targetState->arguments.validateMap(args, ctx.getLogger(), true);
-
-	// Instantiate the handler and call its start function
-	Handler *handler = ctor({ctx, name, *targetState, currentState(), location});
-	handler->start(args);
-	stack.emplace(handler);
-}
-
-void ParserStateStack::start(std::string name, const Variant::mapType &args,
-                        const SourceLocation &location)
-{
-	Variant::mapType argsCopy(args);
-	start(name, argsCopy);
-}
-
-void ParserStateStack::end()
-{
-	// Check whether the current command could be ended
-	if (stack.empty()) {
-		throw LoggableException{"No command to end."};
-	}
-
-	// Remove the current HandlerInstance from the stack
-	std::shared_ptr<Handler> inst{stack.top()};
-	stack.pop();
-
-	// Call the end function of the last Handler
-	inst->end();
-}
-
-void ParserStateStack::data(const std::string &data, int field)
-{
-	// Check whether there is any command the data can be sent to
-	if (stack.empty()) {
-		throw LoggableException{"No command to receive data."};
-	}
-
-	// Pass the data to the current Handler instance
-	stack.top()->data(data, field);
-}
-}
-
diff --git a/src/core/parser/generic/ParserStateStack.hpp b/src/core/parser/generic/ParserStateStack.hpp
deleted file mode 100644
index b106475..0000000
--- a/src/core/parser/generic/ParserStateStack.hpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file ParserStateStack.hpp
- *
- * Helper classes for document or description parsers. Contains the
- * ParserStateStack class, which is an pushdown automaton responsible for
- * accepting commands in the correct order and calling specified handlers.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_PARSER_STATE_STACK_HPP_
-#define _OUSIA_PARSER_STATE_STACK_HPP_
-
-#include <cstdint>
-
-#include <map>
-#include <memory>
-#include <set>
-#include <stack>
-#include <vector>
-
-#include <core/common/Variant.hpp>
-#include <core/common/Logger.hpp>
-#include <core/common/Argument.hpp>
-
-#include "Parser.hpp"
-#include "ParserContext.hpp"
-#include "ParserState.hpp"
-
-namespace ousia {
-
-/**
- * The ParserStateStack class is a pushdown automaton responsible for turning a
- * command stream into a tree of Node instances.
- */
-class ParserStateStack {
-private:
-	/**
-	 * Reference at the parser context.
-	 */
-	ParserContext &ctx;
-
-	/**
-	 * Map containing all registered command names and the corresponding
-	 * state descriptors.
-	 */
-	const std::multimap<std::string, const ParserState *> &states;
-
-	/**
-	 * Internal stack used for managing the currently active Handler instances.
-	 */
-	std::stack<std::shared_ptr<Handler>> stack;
-
-	/**
-	 * Used internally to get all expected command names for the current state.
-	 * This function is used to build error messages.
-	 *
-	 * @return a set of strings containing the names of the expected commands.
-	 */
-	std::set<std::string> expectedCommands();
-
-	/**
-	 * Returns the targetState for a command with the given name that can be
-	 * reached from for the current state.
-	 *
-	 * @param name is the name of the requested command.
-	 * @return nullptr if no target state was found, a pointer at the target
-	 *state
-	 * otherwise.
-	 */
-	const ParserState *findTargetState(const std::string &name);
-
-public:
-	/**
-	 * Creates a new instance of the ParserStateStack class.
-	 *
-	 * @param ctx is the parser context the parser stack is working on.
-	 * @param states is a map containing the command names and pointers at the
-	 * corresponding ParserState instances.
-	 */
-	ParserStateStack(
-	    ParserContext &ctx,
-	    const std::multimap<std::string, const ParserState *> &states);
-
-	/**
-	 * Tries to reconstruct the parser state from the Scope instance of the
-	 * ParserContext given in the constructor. This functionality is needed for
-	 * including files,as the Parser of the included file needs to be brought to
-	 + an equivalent state as the one in the including file.
-	 *
-	 * @param scope is the ParserScope instance from which the ParserState
-	 * should be reconstructed.
-	 * @param logger is the logger instance to which error messages should be
-	 * written.
-	 * @return true if the operation was sucessful, false otherwise.
-	 */
-	bool deduceState();
-
-	/**
-	 * Returns the state the ParserStateStack instance currently is in.
-	 *
-	 * @return the state of the currently active Handler instance or STATE_NONE
-	 * if no handler is on the stack.
-	 */
-	const ParserState &currentState();
-
-	/**
-	 * Returns the command name that is currently being handled.
-	 *
-	 * @return the name of the command currently being handled by the active
-	 * Handler instance or an empty string if no handler is currently active.
-	 */
-	std::string currentCommandName();
-
-	/**
-	 * Function that should be called whenever a new command is reached.
-	 *
-	 * @param name is the name of the command (including the namespace
-	 * separator ':') and its corresponding location. Must be a string variant.
-	 * @param args is a map variant containing the arguments that were passed to
-	 * the command.
-	 */
-	void command(Variant name, Variant args);
-
-	/**
-	 * Function that should be called whenever a new field starts. Fields of the
-	 * same command may not be separated by calls to 
-	 */
-	void fieldStart();
-
-	/**
-	 * Function that should be called whenever a field ends.
-	 */
-	void fieldEnd();
-
-	/**
-	 * Function that shuold be called whenever character data is found in the
-	 * input stream.
-	 *
-	 * @param data is a variant of any type containing the data that was parsed
-	 * as data.
-	 */
-	void data(Variant data);
-
-	/**
-	 * Function that should be called whenever an annotation starts.
-	 *
-	 * @param name is the name of the annotation class.
-	 * @param args is a map variant containing the arguments that were passed
-	 * to the annotation.
-	 */
-	void annotationStart(Variant name, Variant args);
-
-	/**
-	 * Function that should be called whenever an annotation ends.
-	 *
-	 * @param name is the name of the annotation class that was ended.
-	 * @param annotationName is the name of the annotation that was ended.
-	 */
-	void annotationEnd(Variant name, Variant annotationName);
-
-	/**
-	 * Function that should be called whenever a previously registered token
-	 * is found in the input stream.
-	 *
-	 * @param token is string variant containing the token that was encountered.
-	 */
-	void token(Variant token);
-};
-}
-
-#endif /* _OUSIA_PARSER_STATE_STACK_HPP_ */
-
diff --git a/src/core/parser/stack/Callbacks.cpp b/src/core/parser/stack/Callbacks.cpp
new file mode 100644
index 0000000..6ebc549
--- /dev/null
+++ b/src/core/parser/stack/Callbacks.cpp
@@ -0,0 +1,23 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Callbacks.hpp"
+
+namespace ousia {
+}
+
diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp
new file mode 100644
index 0000000..bb56e44
--- /dev/null
+++ b/src/core/parser/stack/Callbacks.hpp
@@ -0,0 +1,99 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Callbacks.hpp
+ *
+ * Contains an interface defining the callbacks that can be directed from a
+ * StateHandler to the StateStack, and from the StateStack to
+ * the actual parser.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_PARSER_STATE_CALLBACKS_HPP_
+#define _OUSIA_PARSER_STATE_CALLBACKS_HPP_
+
+#include <string>
+
+#include <core/common/Whitespace.hpp>
+
+namespace ousia {
+namespace parser_stack {
+
+/**
+ * Interface defining a set of callback functions that act as a basis for the
+ * StateStackCallbacks and the ParserCallbacks.
+ */
+class Callbacks {
+public:
+	/**
+	 * Virtual descructor.
+	 */
+	virtual ~Callbacks() {};
+
+	/**
+	 * Sets the whitespace mode that specifies how string data should be
+	 * processed.
+	 *
+	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
+	 * PRESERVE, TRIM or COLLAPSE.
+	 */
+	virtual void setWhitespaceMode(WhitespaceMode whitespaceMode) = 0;
+
+	/**
+	 * Registers the given token as token that should be reported to the handler
+	 * using the "token" function.
+	 *
+	 * @param token is the token string that should be reported.
+	 */
+	virtual void registerToken(const std::string &token) = 0;
+
+	/**
+	 * Unregisters the given token, it will no longer be reported to the handler
+	 * using the "token" function.
+	 *
+	 * @param token is the token string that should be unregistered.
+	 */
+	virtual void unregisterToken(const std::string &token) = 0;
+};
+
+/**
+ * Interface defining the callback functions that can be passed from a
+ * StateStack to the underlying parser.
+ */
+class ParserCallbacks : public Callbacks {
+	/**
+	 * Checks whether the given token is supported by the parser. The parser
+	 * returns true, if the token is supported, false if this token cannot be
+	 * registered. Note that parsers that do not support the registration of
+	 * tokens at all should always return "true".
+	 *
+	 * @param token is the token that should be checked for support.
+	 * @return true if the token is generally supported (or the parser does not
+	 * support registering tokens at all), false if the token is not supported,
+	 * because e.g. it is a reserved token or it interferes with other tokens.
+	 */
+	virtual bool supportsToken(const std::string &token) = 0;
+};
+
+}
+}
+
+#endif /* _OUSIA_PARSER_STATE_CALLBACKS_HPP_ */
+
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
new file mode 100644
index 0000000..66af2a4
--- /dev/null
+++ b/src/core/parser/stack/Handler.cpp
@@ -0,0 +1,90 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <core/parser/ParserContext.hpp>
+
+#include "Callbacks.hpp"
+#include "Handler.hpp"
+#include "State.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+/* Class HandlerData */
+
+HandlerData::HandlerData(ParserContext &ctx, Callbacks &callbacks,
+                         std::string name, const State &state,
+                         const SourceLocation &location)
+    : ctx(ctx),
+      callbacks(callbacks),
+      name(std::move(name)),
+      state(state),
+      location(location)
+{
+}
+
+/* Class Handler */
+
+Handler::Handler(const HandlerData &internalData) : internalData(internalData)
+{
+}
+
+Handler::~Handler() {}
+
+ParserContext &Handler::context() { return internalData.ctx; }
+
+const std::string &Handler::name() { return internalData.name; }
+
+ParserScope &Handler::scope() { return internalData.ctx.getScope(); }
+
+Manager &Handler::manager() { return internalData.ctx.getManager(); }
+
+Logger &Handler::logger() { return internalData.ctx.getLogger(); }
+
+const State &Handler::state() { return internalData.state; }
+
+SourceLocation Handler::location() { return internalData.location; }
+
+void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode)
+{
+	internalData.callbacks.setWhitespaceMode(whitespaceMode);
+}
+
+void Handler::registerToken(const std::string &token)
+{
+	internalData.callbacks.registerToken(token);
+}
+
+void Handler::unregisterToken(const std::string &token)
+{
+	internalData.callbacks.unregisterToken(token);
+}
+
+/* Class DefaultHandler */
+
+/*void DefaultHandler::start(Variant::mapType &args) {}
+
+void DefaultHandler::end() {}
+
+Handler *DefaultHandler::create(const data &data)
+{
+    return new DefaultHandler{data};
+}*/
+}
+}
+
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
new file mode 100644
index 0000000..0701343
--- /dev/null
+++ b/src/core/parser/stack/Handler.hpp
@@ -0,0 +1,302 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _OUSIA_PARSER_STATE_HANDLER_HPP_
+#define _OUSIA_PARSER_STATE_HANDLER_HPP_
+
+#include <memory>
+#include <string>
+
+#include <core/common/Location.hpp>
+#include <core/common/Variant.hpp>
+
+namespace ousia {
+
+// Forward declarations
+class ParserContext;
+class Callbacks;
+class Logger;
+class Project;
+
+namespace parser_stack {
+
+// More forward declarations
+class State;
+
+/**
+ * Class collecting all the data that is being passed to a Handler
+ * instance.
+ */
+class HandlerData {
+public:
+	/**
+	 * Reference to the ParserContext instance that should be used to resolve
+	 * references to nodes in the Graph.
+	 */
+	ParserContext &ctx;
+
+	/**
+	 * Reference at an instance of the Callbacks class, used for
+	 * modifying the behaviour of the parser (like registering tokens, setting
+	 * the data type or changing the whitespace handling mode).
+	 */
+	Callbacks &callbacks;
+
+	/**
+	 * Contains the name of the command that is being handled.
+	 */
+	std::string name;
+
+	/**
+	 * Contains the current state of the state machine.
+	 */
+	const State &state;
+
+	/**
+	 * Current source code location.
+	 */
+	SourceLocation location;
+
+	/**
+	 * Constructor of the HandlerData class.
+	 *
+	 * @param ctx is the parser context the handler should be executed in.
+	 * @param callbacks is an instance of Callbacks used to notify
+	 * the parser about certain state changes.
+	 * @param name is the name of the string.
+	 * @param state is the state this handler was called for.
+	 * @param location is the location at which the handler is created.
+	 */
+	HandlerData(ParserContext &ctx, Callbacks &callbacks, std::string name,
+	            const State &state, const SourceLocation &location);
+};
+
+/**
+ * The Handler class provides a context for handling a generic stack element.
+ * It has to beoverridden and registered in the StateStack class to form
+ * handlers for concrete XML tags.
+ */
+class Handler {
+private:
+	/**
+	 * Structure containing the internal handler data.
+	 */
+	const HandlerData internalData;
+
+protected:
+	/**
+	 * Constructor of the Handler class.
+	 *
+	 * @param data is a structure containing all data being passed to the
+	 * handler.
+	 */
+	Handler(const HandlerData &internalData);
+
+	/**
+	 * Returns a reference at the ParserContext.
+	 *
+	 * @return a reference at the ParserContext.
+	 */
+	ParserContext &context();
+
+	/**
+	 * Returns the command name for which the handler was created.
+	 *
+	 * @return a const reference at the command name.
+	 */
+	const std::string &name();
+
+	/**
+	 * Returns a reference at the ParserScope instance.
+	 *
+	 * @return a reference at the ParserScope instance.
+	 */
+	ParserScope &scope();
+
+	/**
+	 * Returns a reference at the Manager instance which manages all nodes.
+	 *
+	 * @return a referance at the Manager instance.
+	 */
+	Manager &manager();
+
+	/**
+	 * Returns a reference at the Logger instance used for logging error
+	 * messages.
+	 *
+	 * @return a reference at the Logger instance.
+	 */
+	Logger &logger();
+
+	/**
+	 * Reference at the State descriptor for which this Handler was created.
+	 *
+	 * @return a const reference at the constructing State descriptor.
+	 */
+	const State &state();
+
+	/**
+	 * Returns the current location in the source file.
+	 *
+	 * @return the current location in the source file.
+	 */
+	SourceLocation location();
+
+public:
+	/**
+	 * Virtual destructor.
+	 */
+	virtual ~Handler();
+
+	/**
+	 * Calls the corresponding function in the Callbacks instance. Sets the
+	 * whitespace mode that specifies how string data should be processed. The
+	 * calls to this function are placed on a stack by the underlying Stack
+	 * class.
+	 *
+	 * @param whitespaceMode specifies one of the three WhitespaceMode constants
+	 * PRESERVE, TRIM or COLLAPSE.
+	 */
+	void setWhitespaceMode(WhitespaceMode whitespaceMode);
+
+	/**
+	 * Calls the corresponding function in the Callbacks instance.
+	 * Registers the given token as token that should be reported to the handler
+	 * using the "token" function.
+	 *
+	 * @param token is the token string that should be reported.
+	 */
+	void registerToken(const std::string &token);
+
+	/**
+	 * Calls the corresponding function in the Callbacks instance.
+	 * Unregisters the given token, it will no longer be reported to the handler
+	 * using the "token" function.
+	 *
+	 * @param token is the token string that should be unregistered.
+	 */
+	void unregisterToken(const std::string &token);
+
+	/**
+	 * Called when the command that was specified in the constructor is
+	 * instanciated.
+	 *
+	 * @param args is a map from strings to variants (argument name and value).
+	 * @return true if the handler was successful in starting the element it
+	 * represents, false otherwise.
+	 */
+	virtual bool start(Variant::mapType &args) = 0;
+
+	/**
+	 * Called before the command for which this handler is defined ends (is
+	 * forever removed from the stack).
+	 */
+	virtual void end() = 0;
+
+	/**
+	 * Called when a new field starts, while the handler is active. This
+	 * function should return true if the field is supported, false otherwise.
+	 * No error should be logged if the field cannot be started, the caller will
+	 * take care of that (since it is always valid to start a default field,
+	 * even though the corresponding structure does not have a field, as long as
+	 * no data is fed into the field).
+	 *
+	 * @param isDefaultField is set to true if the field that is being started
+	 * is the default/tree field. The handler should set the value of this
+	 * variable to true if the referenced field is indeed the default field.
+	 * @param isImplicit is set to true if the field is implicitly being started
+	 * by the stack (this field always implies isDefaultField being set to
+	 * true).
+	 * @param fieldIndex is the numerical index of the field.
+	 */
+	virtual bool fieldStart(bool &isDefaultField, bool isImplicit,
+	                        size_t fieldIndex) = 0;
+
+	/**
+	 * Called when a previously opened field ends, while the handler is active.
+	 * Note that a "fieldStart" and "fieldEnd" are always called alternately.
+	 */
+	virtual void fieldEnd() = 0;
+
+	/**
+	 * Called whenever an annotation starts while this handler is active. The
+	 * function should return true if starting the annotation was successful,
+	 * false otherwise.
+	 *
+	 * @param className is a string variant containing the name of the
+	 * annotation class and the location of the name in the source code.
+	 * @param args is a map from strings to variants (argument name and value).
+	 * @return true if the mentioned annotation could be started here, false
+	 * if an error occurred.
+	 */
+	virtual bool annotationStart(Variant className, Variant::mapType &args) = 0;
+
+	/**
+	 * Called whenever an annotation ends while this handler is active. The 
+	 * function should return true if ending the annotation was successful,
+	 * false otherwise.
+	 *
+	 * @param className is a string variant containing the name of the
+	 * annotation class and the location of the class name in the source code.
+	 * @param elementName is a string variant containing the name of the
+	 * annotation class and the location of the element name in the source code.
+	 * @return true if the mentioned annotation could be started here, false if
+	 * an error occurred.
+	 */
+	virtual bool annotationEnd(Variant className, Variant elementName) = 0;
+
+	/**
+	 * Called whenever raw data (int the form of a string) is available for the
+	 * Handler instance.
+	 *
+	 * @param data is a string variant containing the character data and its
+	 * location.
+	 */
+	virtual void data(Variant data) = 0;
+};
+
+/**
+ * HandlerConstructor is a function pointer type used to create concrete
+ * instances of the Handler class.
+ *
+ * @param handlerData is the data that should be passed to the new handler
+ * instance.
+ * @return a newly created handler instance.
+ */
+using HandlerConstructor = Handler *(*)(const HandlerData &handlerData);
+
+/**
+ * The DefaultHandler class is used in case no element handler is specified in
+ * the State descriptor.
+ */
+/*class EmptyHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override;
+
+	void end() override;
+
+	static Handler *create(const HandlerData &handlerData);
+};*/
+
+}
+}
+
+#endif /* _OUSIA_PARSER_STATE_HANDLER_HPP_ */
+
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
new file mode 100644
index 0000000..1d83a68
--- /dev/null
+++ b/src/core/parser/stack/Stack.cpp
@@ -0,0 +1,188 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <sstream>
+
+#include <core/common/Utils.hpp>
+#include <core/common/Exceptions.hpp>
+#include <core/parser/ParserScope.hpp>
+
+#include "Stack.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+/* Class StateStack */
+
+/**
+ * Returns an Exception that should be thrown when a currently invalid command
+ * is thrown.
+ */
+static LoggableException InvalidCommand(const std::string &name,
+                                        const std::set<std::string> &expected)
+{
+	if (expected.empty()) {
+		return LoggableException{
+		    std::string{"No nested elements allowed, but got \""} + name +
+		    std::string{"\""}};
+	} else {
+		return LoggableException{
+		    std::string{"Expected "} +
+		    (expected.size() == 1 ? std::string{"\""}
+		                          : std::string{"one of \""}) +
+		    Utils::join(expected, "\", \"") + std::string{"\", but got \""} +
+		    name + std::string{"\""}};
+	}
+}
+
+StateStack::StateStack(
+    ParserContext &ctx,
+    const std::multimap<std::string, const State *> &states)
+    : ctx(ctx), states(states)
+{
+}
+
+bool StateStack::deduceState()
+{
+	// Assemble all states
+	std::vector<const State *> states;
+	for (const auto &e : this->states) {
+		states.push_back(e.second);
+	}
+
+	// Fetch the type signature of the scope and derive all possible states,
+	// abort if no unique parser state was found
+	std::vector<const State *> possibleStates =
+	    StateDeductor(ctx.getScope().getStackTypeSignature(), states)
+	        .deduce();
+	if (possibleStates.size() != 1) {
+		ctx.getLogger().error(
+		    "Error while including file: Cannot deduce parser state.");
+		return false;
+	}
+
+	// Switch to this state by creating a dummy handler
+	const State *state = possibleStates[0];
+	Handler *handler =
+	    DefaultHandler::create({ctx, "", *state, *state, SourceLocation{}});
+	stack.emplace(handler);
+	return true;
+}
+
+std::set<std::string> StateStack::expectedCommands()
+{
+	const State *currentState = &(this->currentState());
+	std::set<std::string> res;
+	for (const auto &v : states) {
+		if (v.second->parents.count(currentState)) {
+			res.insert(v.first);
+		}
+	}
+	return res;
+}
+
+const State &StateStack::currentState()
+{
+	return stack.empty() ? States::None : stack.top()->state();
+}
+
+std::string StateStack::currentCommandName()
+{
+	return stack.empty() ? std::string{} : stack.top()->name();
+}
+
+const State *StateStack::findTargetState(const std::string &name)
+{
+	const State *currentState = &(this->currentState());
+	auto range = states.equal_range(name);
+	for (auto it = range.first; it != range.second; it++) {
+		const StateSet &parents = it->second->parents;
+		if (parents.count(currentState) || parents.count(&States::All)) {
+			return it->second;
+		}
+	}
+
+	return nullptr;
+}
+
+void StateStack::start(const std::string &name, Variant::mapType &args,
+                        const SourceLocation &location)
+{
+	State const *targetState = findTargetState(name);
+// TODO: Andreas, please improve this.
+//	if (!Utils::isIdentifier(name)) {
+//		throw LoggableException(std::string("Invalid identifier \"") + name +
+//		                        std::string("\""));
+//	}
+
+	if (targetState == nullptr) {
+		targetState = findTargetState("*");
+	}
+	if (targetState == nullptr) {
+		throw InvalidCommand(name, expectedCommands());
+	}
+
+	// Fetch the associated constructor
+	HandlerConstructor ctor = targetState->elementHandler
+	                              ? targetState->elementHandler
+	                              : DefaultHandler::create;
+
+	// Canonicalize the arguments, allow additional arguments
+	targetState->arguments.validateMap(args, ctx.getLogger(), true);
+
+	// Instantiate the handler and call its start function
+	Handler *handler = ctor({ctx, name, *targetState, currentState(), location});
+	handler->start(args);
+	stack.emplace(handler);
+}
+
+void StateStack::start(std::string name, const Variant::mapType &args,
+                        const SourceLocation &location)
+{
+	Variant::mapType argsCopy(args);
+	start(name, argsCopy);
+}
+
+void StateStack::end()
+{
+	// Check whether the current command could be ended
+	if (stack.empty()) {
+		throw LoggableException{"No command to end."};
+	}
+
+	// Remove the current HandlerInstance from the stack
+	std::shared_ptr<Handler> inst{stack.top()};
+	stack.pop();
+
+	// Call the end function of the last Handler
+	inst->end();
+}
+
+void StateStack::data(const std::string &data, int field)
+{
+	// Check whether there is any command the data can be sent to
+	if (stack.empty()) {
+		throw LoggableException{"No command to receive data."};
+	}
+
+	// Pass the data to the current Handler instance
+	stack.top()->data(data, field);
+}
+}
+}
+
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
new file mode 100644
index 0000000..b106475
--- /dev/null
+++ b/src/core/parser/stack/Stack.hpp
@@ -0,0 +1,191 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file ParserStateStack.hpp
+ *
+ * Helper classes for document or description parsers. Contains the
+ * ParserStateStack class, which is an pushdown automaton responsible for
+ * accepting commands in the correct order and calling specified handlers.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_PARSER_STATE_STACK_HPP_
+#define _OUSIA_PARSER_STATE_STACK_HPP_
+
+#include <cstdint>
+
+#include <map>
+#include <memory>
+#include <set>
+#include <stack>
+#include <vector>
+
+#include <core/common/Variant.hpp>
+#include <core/common/Logger.hpp>
+#include <core/common/Argument.hpp>
+
+#include "Parser.hpp"
+#include "ParserContext.hpp"
+#include "ParserState.hpp"
+
+namespace ousia {
+
+/**
+ * The ParserStateStack class is a pushdown automaton responsible for turning a
+ * command stream into a tree of Node instances.
+ */
+class ParserStateStack {
+private:
+	/**
+	 * Reference at the parser context.
+	 */
+	ParserContext &ctx;
+
+	/**
+	 * Map containing all registered command names and the corresponding
+	 * state descriptors.
+	 */
+	const std::multimap<std::string, const ParserState *> &states;
+
+	/**
+	 * Internal stack used for managing the currently active Handler instances.
+	 */
+	std::stack<std::shared_ptr<Handler>> stack;
+
+	/**
+	 * Used internally to get all expected command names for the current state.
+	 * This function is used to build error messages.
+	 *
+	 * @return a set of strings containing the names of the expected commands.
+	 */
+	std::set<std::string> expectedCommands();
+
+	/**
+	 * Returns the targetState for a command with the given name that can be
+	 * reached from for the current state.
+	 *
+	 * @param name is the name of the requested command.
+	 * @return nullptr if no target state was found, a pointer at the target
+	 *state
+	 * otherwise.
+	 */
+	const ParserState *findTargetState(const std::string &name);
+
+public:
+	/**
+	 * Creates a new instance of the ParserStateStack class.
+	 *
+	 * @param ctx is the parser context the parser stack is working on.
+	 * @param states is a map containing the command names and pointers at the
+	 * corresponding ParserState instances.
+	 */
+	ParserStateStack(
+	    ParserContext &ctx,
+	    const std::multimap<std::string, const ParserState *> &states);
+
+	/**
+	 * Tries to reconstruct the parser state from the Scope instance of the
+	 * ParserContext given in the constructor. This functionality is needed for
+	 * including files,as the Parser of the included file needs to be brought to
+	 + an equivalent state as the one in the including file.
+	 *
+	 * @param scope is the ParserScope instance from which the ParserState
+	 * should be reconstructed.
+	 * @param logger is the logger instance to which error messages should be
+	 * written.
+	 * @return true if the operation was sucessful, false otherwise.
+	 */
+	bool deduceState();
+
+	/**
+	 * Returns the state the ParserStateStack instance currently is in.
+	 *
+	 * @return the state of the currently active Handler instance or STATE_NONE
+	 * if no handler is on the stack.
+	 */
+	const ParserState &currentState();
+
+	/**
+	 * Returns the command name that is currently being handled.
+	 *
+	 * @return the name of the command currently being handled by the active
+	 * Handler instance or an empty string if no handler is currently active.
+	 */
+	std::string currentCommandName();
+
+	/**
+	 * Function that should be called whenever a new command is reached.
+	 *
+	 * @param name is the name of the command (including the namespace
+	 * separator ':') and its corresponding location. Must be a string variant.
+	 * @param args is a map variant containing the arguments that were passed to
+	 * the command.
+	 */
+	void command(Variant name, Variant args);
+
+	/**
+	 * Function that should be called whenever a new field starts. Fields of the
+	 * same command may not be separated by calls to 
+	 */
+	void fieldStart();
+
+	/**
+	 * Function that should be called whenever a field ends.
+	 */
+	void fieldEnd();
+
+	/**
+	 * Function that shuold be called whenever character data is found in the
+	 * input stream.
+	 *
+	 * @param data is a variant of any type containing the data that was parsed
+	 * as data.
+	 */
+	void data(Variant data);
+
+	/**
+	 * Function that should be called whenever an annotation starts.
+	 *
+	 * @param name is the name of the annotation class.
+	 * @param args is a map variant containing the arguments that were passed
+	 * to the annotation.
+	 */
+	void annotationStart(Variant name, Variant args);
+
+	/**
+	 * Function that should be called whenever an annotation ends.
+	 *
+	 * @param name is the name of the annotation class that was ended.
+	 * @param annotationName is the name of the annotation that was ended.
+	 */
+	void annotationEnd(Variant name, Variant annotationName);
+
+	/**
+	 * Function that should be called whenever a previously registered token
+	 * is found in the input stream.
+	 *
+	 * @param token is string variant containing the token that was encountered.
+	 */
+	void token(Variant token);
+};
+}
+
+#endif /* _OUSIA_PARSER_STATE_STACK_HPP_ */
+
diff --git a/src/core/parser/stack/State.cpp b/src/core/parser/stack/State.cpp
new file mode 100644
index 0000000..d72f533
--- /dev/null
+++ b/src/core/parser/stack/State.cpp
@@ -0,0 +1,171 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "State.hpp"
+
+namespace ousia {
+namespace parser_stack {
+
+/* Class State */
+
+State::State() : elementHandler(nullptr) {}
+
+State::State(StateSet parents, Arguments arguments,
+                         RttiSet createdNodeTypes,
+                         HandlerConstructor elementHandler,
+                         bool supportsAnnotations)
+    : parents(parents),
+      arguments(arguments),
+      createdNodeTypes(createdNodeTypes),
+      elementHandler(elementHandler),
+      supportsAnnotations(supportsAnnotations)
+{
+}
+
+State::State(const StateBuilder &builder)
+    : State(builder.build())
+{
+}
+
+/* Class StateBuilder */
+
+StateBuilder &StateBuilder::copy(const State &state)
+{
+	this->state = state;
+	return *this;
+}
+
+StateBuilder &StateBuilder::parent(const State *parent)
+{
+	state.parents = StateSet{parent};
+	return *this;
+}
+
+StateBuilder &StateBuilder::parents(const StateSet &parents)
+{
+	state.parents = parents;
+	return *this;
+}
+
+StateBuilder &StateBuilder::arguments(const Arguments &arguments)
+{
+	state.arguments = arguments;
+	return *this;
+}
+
+StateBuilder &StateBuilder::createdNodeType(const Rtti *type)
+{
+	state.createdNodeTypes = RttiSet{type};
+	return *this;
+}
+
+StateBuilder &StateBuilder::createdNodeTypes(const RttiSet &types)
+{
+	state.createdNodeTypes = types;
+	return *this;
+}
+
+StateBuilder &StateBuilder::elementHandler(
+    HandlerConstructor elementHandler)
+{
+	state.elementHandler = elementHandler;
+	return *this;
+}
+
+StateBuilder &StateBuilder::supportsAnnotations(bool supportsAnnotations)
+{
+	state.supportsAnnotations = supportsAnnotations;
+	return *this;
+}
+
+const State &StateBuilder::build() const { return state; }
+
+/* Class StateDeductor */
+
+StateDeductor::StateDeductor(
+    std::vector<const Rtti *> signature,
+    std::vector<const State *> states)
+    : tbl(signature.size()),
+      signature(std::move(signature)),
+      states(std::move(states))
+{
+}
+
+bool StateDeductor::isActive(size_t d, const State *s)
+{
+	// Lookup the "active" state of (d, s), if it was not already set
+	// (e.second is true) we'll have to calculate it
+	auto e = tbl[d].emplace(s, false);
+	bool &res = e.first->second;
+	if (!e.second) {
+		return res;
+	}
+
+	// Check whether this node is generative (may have produced the Node
+	// described by the current Signature element)
+	bool isGenerative = signature[d]->isOneOf(s->createdNodeTypes);
+
+	if (isGenerative && d == 0) {
+		// End of recursion -- the last signature element is reached and the
+		// node was generative
+		res = true;
+	} else {
+		// Try repetition of this node
+		if (isGenerative && isActive(d - 1, s)) {
+			res = true;
+		} else {
+			// Check whether any of the parent nodes were active -- either for
+			// the previous element (if this one is generative) or for the
+			// current element (assuming this node was not generative)
+			for (const State *parent : s->parents) {
+				if ((isGenerative && isActive(d - 1, parent)) ||
+					isActive(d, parent)) {
+					res = true;
+					break;
+				}
+			}
+		}
+	}
+
+	return res;
+}
+
+std::vector<const State *> StateDeductor::deduce()
+{
+	std::vector<const State *> res;
+	if (!signature.empty()) {
+		const size_t D = signature.size();
+		for (auto s : states) {
+			if (signature[D - 1]->isOneOf(s->createdNodeTypes) &&
+			    isActive(D - 1, s)) {
+				res.push_back(s);
+			}
+		}
+	}
+	return res;
+}
+
+/* Constant initializations */
+
+namespace States {
+const State All;
+const State None;
+}
+}
+}
+
diff --git a/src/core/parser/stack/State.hpp b/src/core/parser/stack/State.hpp
new file mode 100644
index 0000000..ea326ec
--- /dev/null
+++ b/src/core/parser/stack/State.hpp
@@ -0,0 +1,307 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file State.hpp
+ *
+ * Defines the State class used within the ParserStack pushdown
+ * automaton and the StateBuilder class for convenient construction of
+ * such classes.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_PARSER_STATE_HPP_
+#define _OUSIA_PARSER_STATE_HPP_
+
+#include <unordered_set>
+
+#include <core/common/Rtti.hpp>
+#include <core/common/Argument.hpp>
+
+namespace ousia {
+namespace parser_stack {
+
+// Forward declarations
+class StateBuilder;
+class State;
+class HandlerData;
+class Handler;
+using HandlerConstructor = Handler *(*)(const HandlerData &handlerData);
+
+/**
+ * Set of pointers of parser states -- used for specifying a set of parent
+ * states.
+ */
+using StateSet = std::unordered_set<const State *>;
+
+/**
+ * Class used for the complete specification of a State. Stores possible
+ * parent states, state handlers and arguments to be passed to that state.
+ */
+struct State {
+	/**
+	 * Vector containing all possible parent states.
+	 */
+	StateSet parents;
+
+	/**
+	 * Descriptor of the arguments that should be passed to the handler.
+	 */
+	Arguments arguments;
+
+	/**
+	 * Set containing the types of the nodes that may be created in this
+	 * State. This information is needed for Parsers to reconstruct the
+	 * current State from a given ParserScope when a file is included.
+	 */
+	RttiSet createdNodeTypes;
+
+	/**
+	 * Pointer at a function which creates a new concrete Handler instance for
+	 * the elements described by this state. May be nullptr in which case no
+	 * handler instance is created.
+	 */
+	HandlerConstructor elementHandler;
+
+	/**
+	 * Set to true if this handler does support annotations. This is almost
+	 * always false (e.g. all description handlers), except for document 
+	 * element handlers.
+	 */
+	bool supportsAnnotations;
+
+	/**
+	 * Default constructor, initializes the handlers with nullptr.
+	 */
+	State();
+
+	/**
+	 * Constructor taking values for all fields. Use the StateBuilder
+	 * class for a more convenient construction of State instances.
+	 *
+	 * @param parents is a vector containing all possible parent states.
+	 * @param arguments is a descriptor of arguments that should be passed to
+	 * the handler.
+	 * @param createdNodeTypes is a set containing the types of the nodes tha
+	 * may be created in this State. This information is needed for
+	 * Parsers to reconstruct the current State from a given ParserScope
+	 * when a file is included.
+	 * @param elementHandler is a pointer at a function which creates a new
+	 * concrete Handler instance for the elements described by this state. May
+	 * be nullptr in which case no handler instance is created.
+	 * @param supportsAnnotations specifies whether annotations are supported
+	 * here at all.
+	 */
+	State(StateSet parents, Arguments arguments = Arguments{},
+	            RttiSet createdNodeTypes = RttiSet{},
+	            HandlerConstructor elementHandler = nullptr,
+	            bool supportsAnnotations = false);
+
+	/**
+	 * Creates this State from the given StateBuilder instance.
+	 */
+	State(const StateBuilder &builder);
+};
+
+/**
+ * The StateBuilder class is a class used for conveniently building new
+ * State instances.
+ */
+class StateBuilder {
+private:
+	/**
+	 * State instance that is currently being built by the
+	 * StateBuilder.
+	 */
+	State state;
+
+public:
+	/**
+	 * Copies the State instance and uses it as internal state. Overrides
+	 * all changes made by the StateBuilder.
+	 *
+	 * @param state is the state that should be copied.
+	 * @return a reference at this StateBuilder instance for method
+	 * chaining.
+	 */
+	StateBuilder &copy(const State &state);
+
+	/**
+	 * Sets the possible parent states to the single given parent element.
+	 *
+	 * @param parent is a pointer at the parent State instance that should
+	 * be the possible parent state.
+	 * @return a reference at this StateBuilder instance for method
+	 * chaining.
+	 */
+	StateBuilder &parent(const State *parent);
+
+	/**
+	 * Sets the State instances in the given StateSet as the list of
+	 * supported parent states.
+	 *
+	 * @param parents is a set of pointers at State instances that should
+	 * be the possible parent states.
+	 * @return a reference at this StateBuilder instance for method
+	 * chaining.
+	 */
+	StateBuilder &parents(const StateSet &parents);
+
+	/**
+	 * Sets the arguments that should be passed to the parser state handler to
+	 * those given as argument.
+	 *
+	 * @param arguments is the Arguments instance describing the Arguments that
+	 * should be parsed to a Handler for this State.
+	 * @return a reference at this StateBuilder instance for method
+	 * chaining.
+	 */
+	StateBuilder &arguments(const Arguments &arguments);
+
+	/**
+	 * Sets the Node types this state may produce to the given Rtti descriptor.
+	 *
+	 * @param type is the Rtti descriptor of the Type that may be produced by
+	 * this state.
+	 * @return a reference at this StateBuilder instance for method
+	 * chaining.
+	 */
+	StateBuilder &createdNodeType(const Rtti *type);
+
+	/**
+	 * Sets the Node types this state may produce to the given Rtti descriptors.
+	 *
+	 * @param types is a set of Rtti descriptors of the Types that may be
+	 * produced by this state.
+	 * @return a reference at this StateBuilder instance for method
+	 * chaining.
+	 */
+	StateBuilder &createdNodeTypes(const RttiSet &types);
+
+	/**
+	 * Sets the constructor for the element handler. The constructor creates a
+	 * new concrete Handler instance for the elements described by this state.
+	 * May be nullptr in which case no handler instance is created (this is
+	 * the default value).
+	 *
+	 * @param elementHandler is the HandlerConstructor that should create a
+	 * new Handler instance.
+	 * @return a reference at this StateBuilder instance for method
+	 * chaining.
+	 */
+	StateBuilder &elementHandler(HandlerConstructor elementHandler);
+
+	/**
+	 * Sets the state of the "supportsAnnotations" flags (default value is
+	 * false)
+	 *
+	 * @param supportsAnnotations should be set to true, if annotations are
+	 * supported for the handlers associated with this document.
+	 * @return a reference at this StateBuilder instance for method
+	 * chaining.
+	 */
+	StateBuilder &supportsAnnotations(bool supportsAnnotations);
+
+	/**
+	 * Returns a reference at the internal State instance that was built
+	 * using the StateBuilder.
+	 *
+	 * @return the built State.
+	 */
+	const State &build() const;
+};
+
+/**
+ * Class used to deduce the State a Parser is currently in based on the
+ * types of the Nodes that currently are on the ParserStack. Uses dynamic
+ * programming in order to solve this problem.
+ */
+class StateDeductor {
+public:
+	/**
+	 * Type containing the dynamic programming table.
+	 */
+	using Table = std::vector<std::unordered_map<const State *, bool>>;
+
+private:
+	/**
+	 * Dynamic programming table.
+	 */
+	Table tbl;
+
+	/**
+	 * Signature given in the constructor.
+	 */
+	const std::vector<const Rtti *> signature;
+
+	/**
+	 * List of states that should be checked for being active.
+	 */
+	const std::vector<const State *> states;
+
+	/**
+	 * Used internally to check whether the given parser stack s may have been
+	 * active for signature element d.
+	 *
+	 * @param d is the signature element.
+	 * @param s is the parser state.
+	 * @return true if the the given State may have been active.
+	 */
+	bool isActive(size_t d, const State *s);
+
+public:
+	/**
+	 * Constructor of the StateDeductor class.
+	 *
+	 * @param signature a Node type signature describing the types of the nodes
+	 * which currently reside on e.g. the ParserScope stack.
+	 * @param states is a list of states that should be checked.
+	 */
+	StateDeductor(std::vector<const Rtti *> signature,
+	                    std::vector<const State *> states);
+
+	/**
+	 * Selects all active states from the given states. Only considers those
+	 * states that may have produced the last signature element.
+	 *
+	 * @return a list of states that may actually have been active.
+	 */
+	std::vector<const State *> deduce();
+};
+
+/**
+ * The States namespace contains all the global state constants used
+ * in the ParserStack class.
+ */
+namespace States {
+/**
+ * State representing all states.
+ */
+extern const State All;
+
+/**
+ * State representing the initial state.
+ */
+extern const State None;
+}
+}
+}
+
+#endif /* _OUSIA_PARSER_STATE_HPP_ */
+
diff --git a/test/core/parser/ParserStateTest.cpp b/test/core/parser/ParserStateTest.cpp
deleted file mode 100644
index 91d8dcd..0000000
--- a/test/core/parser/ParserStateTest.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <core/common/Rtti.hpp>
-#include <core/parser/ParserState.hpp>
-
-namespace ousia {
-
-static const Rtti t1;
-static const Rtti t2;
-static const Rtti t3;
-static const Rtti t4;
-static const Rtti t5;
-
-static const ParserState s1 = ParserStateBuilder().createdNodeType(&t1);
-static const ParserState s2a =
-    ParserStateBuilder().parent(&s1).createdNodeType(&t2);
-static const ParserState s2b =
-    ParserStateBuilder().parent(&s1).createdNodeType(&t2);
-static const ParserState s3 =
-    ParserStateBuilder().parents({&s2a, &s1}).createdNodeType(&t3);
-static const ParserState s4 =
-    ParserStateBuilder().parent(&s3).createdNodeType(&t4);
-static const ParserState s5 =
-    ParserStateBuilder().parent(&s2b).createdNodeType(&t5);
-
-TEST(ParserStateDeductor, deduce)
-{
-	using Result = std::vector<const ParserState *>;
-	using Signature = std::vector<const Rtti *>;
-	std::vector<const ParserState *> states{&s1, &s2a, &s2b, &s3, &s4, &s5};
-
-	// Should not crash on empty signature
-	ASSERT_EQ(Result{}, ParserStateDeductor(Signature{}, states).deduce());
-
-	// Try repeating signature elements
-	ASSERT_EQ(Result({&s1}),
-	          ParserStateDeductor(Signature({&t1}), states).deduce());
-	ASSERT_EQ(Result({&s1}),
-	          ParserStateDeductor(Signature({&t1, &t1}), states).deduce());
-	ASSERT_EQ(Result({&s1}),
-	          ParserStateDeductor(Signature({&t1, &t1, &t1}), states).deduce());
-
-	// Go to another state
-	ASSERT_EQ(Result({&s2a, &s2b}),
-	          ParserStateDeductor(Signature({&t1, &t1, &t2}), states).deduce());
-	ASSERT_EQ(Result({&s4}),
-	          ParserStateDeductor(Signature({&t1, &t3, &t4}), states).deduce());
-
-	// Skip one state
-	ASSERT_EQ(Result({&s4}),
-	          ParserStateDeductor(Signature({&t2, &t4}), states).deduce());
-
-	// Impossible signature
-	ASSERT_EQ(Result({}),
-	          ParserStateDeductor(Signature({&t4, &t5}), states).deduce());
-
-}
-}
-
diff --git a/test/core/parser/stack/StateTest.cpp b/test/core/parser/stack/StateTest.cpp
new file mode 100644
index 0000000..e503d30
--- /dev/null
+++ b/test/core/parser/stack/StateTest.cpp
@@ -0,0 +1,79 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/common/Rtti.hpp>
+#include <core/parser/stack/State.hpp>
+
+namespace ousia {
+namespace parser_stack {
+
+static const Rtti t1;
+static const Rtti t2;
+static const Rtti t3;
+static const Rtti t4;
+static const Rtti t5;
+
+static const State s1 = StateBuilder().createdNodeType(&t1);
+static const State s2a =
+    StateBuilder().parent(&s1).createdNodeType(&t2);
+static const State s2b =
+    StateBuilder().parent(&s1).createdNodeType(&t2);
+static const State s3 =
+    StateBuilder().parents({&s2a, &s1}).createdNodeType(&t3);
+static const State s4 =
+    StateBuilder().parent(&s3).createdNodeType(&t4);
+static const State s5 =
+    StateBuilder().parent(&s2b).createdNodeType(&t5);
+
+TEST(StateDeductor, deduce)
+{
+	using Result = std::vector<const State *>;
+	using Signature = std::vector<const Rtti *>;
+	std::vector<const State *> states{&s1, &s2a, &s2b, &s3, &s4, &s5};
+
+	// Should not crash on empty signature
+	ASSERT_EQ(Result{}, StateDeductor(Signature{}, states).deduce());
+
+	// Try repeating signature elements
+	ASSERT_EQ(Result({&s1}),
+	          StateDeductor(Signature({&t1}), states).deduce());
+	ASSERT_EQ(Result({&s1}),
+	          StateDeductor(Signature({&t1, &t1}), states).deduce());
+	ASSERT_EQ(Result({&s1}),
+	          StateDeductor(Signature({&t1, &t1, &t1}), states).deduce());
+
+	// Go to another state
+	ASSERT_EQ(Result({&s2a, &s2b}),
+	          StateDeductor(Signature({&t1, &t1, &t2}), states).deduce());
+	ASSERT_EQ(Result({&s4}),
+	          StateDeductor(Signature({&t1, &t3, &t4}), states).deduce());
+
+	// Skip one state
+	ASSERT_EQ(Result({&s4}),
+	          StateDeductor(Signature({&t2, &t4}), states).deduce());
+
+	// Impossible signature
+	ASSERT_EQ(Result({}),
+	          StateDeductor(Signature({&t4, &t5}), states).deduce());
+
+}
+}
+}
+
-- 
cgit v1.2.3


From 9acab70815a0f62bdaf2c7f01e588066b818d330 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 22:45:19 +0100
Subject: Fixed isIdentifier and isNamespacedIdentifier, added and used
 isIdentifierOrEmpty for use in Node

---
 src/core/common/Utils.cpp      | 13 +++++++++----
 src/core/common/Utils.hpp      |  5 +++++
 src/core/model/Node.cpp        |  2 +-
 test/core/common/UtilsTest.cpp | 39 +++++++++++++++++++++++++++++++++------
 4 files changed, 48 insertions(+), 11 deletions(-)

(limited to 'test')

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index fc8ee00..f8b53c6 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -37,22 +37,27 @@ bool Utils::isIdentifier(const std::string &name)
 		}
 		first = false;
 	}
-	return true;
+	return !first;
 }
 
-bool Utils::isNamespaceIdentifier(const std::string &name)
+bool Utils::isIdentifierOrEmpty(const std::string &name)
+{
+	return name.empty() || isIdentifier(name);
+}
+
+bool Utils::isNamespacedIdentifier(const std::string &name)
 {
 	bool first = true;
 	for (char c : name) {
 		if (first && !isIdentifierStartCharacter(c)) {
 			return false;
 		}
-		if (!first && (!isIdentifierCharacter(c) || c == ':')) {
+		if (!first && (!isIdentifierCharacter(c) && c != ':')) {
 			return false;
 		}
 		first = (c == ':');
 	}
-	return true;
+	return !first;
 }
 
 bool Utils::hasNonWhitepaceChar(const std::string &s)
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index b5cd178..b5a54fc 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -85,6 +85,11 @@ public:
 	 */
 	static bool isIdentifier(const std::string &name);
 
+	/**
+	 * Returns true if the given string is an identifier or an empty string.
+	 */
+	static bool isIdentifierOrEmpty(const std::string &name);
+
 	/**
 	 * Returns true if the given string is in
 	 * \code{.txt}
diff --git a/src/core/model/Node.cpp b/src/core/model/Node.cpp
index 39ee2e4..ce15cad 100644
--- a/src/core/model/Node.cpp
+++ b/src/core/model/Node.cpp
@@ -448,7 +448,7 @@ bool Node::doValidate(Logger &logger) const { return true; }
 
 bool Node::validateName(Logger &logger) const
 {
-	if (!Utils::isIdentifier(name)) {
+	if (!Utils::isIdentifierOrEmpty(name)) {
 		logger.error(type()->name + std::string(" name \"") + name +
 		                 std::string("\" is not a valid identifier"),
 		             this);
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index a4bf4b2..7801296 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -24,14 +24,40 @@ namespace ousia {
 
 TEST(Utils, isIdentifier)
 {
-	ASSERT_TRUE(Utils::isIdentifier("test"));
-	ASSERT_TRUE(Utils::isIdentifier("t0-_est"));
-	ASSERT_FALSE(Utils::isIdentifier("_t0-_EST"));
-	ASSERT_FALSE(Utils::isIdentifier("-t0-_EST"));
-	ASSERT_FALSE(Utils::isIdentifier("0t-_EST"));
-	ASSERT_FALSE(Utils::isIdentifier("invalid key"));
+	EXPECT_TRUE(Utils::isIdentifier("test"));
+	EXPECT_TRUE(Utils::isIdentifier("t0-_est"));
+	EXPECT_FALSE(Utils::isIdentifier("_t0-_EST"));
+	EXPECT_FALSE(Utils::isIdentifier("-t0-_EST"));
+	EXPECT_FALSE(Utils::isIdentifier("0t-_EST"));
+	EXPECT_FALSE(Utils::isIdentifier("_A"));
+	EXPECT_FALSE(Utils::isIdentifier("invalid key"));
+	EXPECT_FALSE(Utils::isIdentifier(""));
 }
 
+
+TEST(Utils, isNamespacedIdentifier)
+{
+	EXPECT_TRUE(Utils::isNamespacedIdentifier("test"));
+	EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier("_t0-_EST"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier("-t0-_EST"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier("0t-_EST"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier("invalid key"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier("_A"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier(""));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier(":"));
+	EXPECT_TRUE(Utils::isNamespacedIdentifier("test:a"));
+	EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est:b"));
+	EXPECT_TRUE(Utils::isNamespacedIdentifier("test:test"));
+	EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est:t0-_est"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier("test:_A"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier("test::a"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier(":test"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier("t0-_est:_t0-_EST"));
+	EXPECT_FALSE(Utils::isNamespacedIdentifier("t0-_est: b"));
+}
+
+
 TEST(Utils, split)
 {
 	ASSERT_EQ(std::vector<std::string>({"ab"}), Utils::split("ab", '.'));
@@ -82,5 +108,6 @@ TEST(Utils, endsWith)
 	ASSERT_TRUE(Utils::endsWith("foobar", "bar"));
 	ASSERT_TRUE(Utils::endsWith("foo", ""));
 }
+
 }
 
-- 
cgit v1.2.3


From 02995f1f9b5a0905ed8f79a5149f4b6375a622bf Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 22:45:49 +0100
Subject: Fixed gcc 4.9 warnings

---
 test/core/RangeSetTest.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'test')

diff --git a/test/core/RangeSetTest.cpp b/test/core/RangeSetTest.cpp
index cbf8f59..446ee51 100644
--- a/test/core/RangeSetTest.cpp
+++ b/test/core/RangeSetTest.cpp
@@ -110,7 +110,7 @@ TEST(RangeSet, Merge)
 	s.merge(Range<int>(40, 50));
 	s.merge(Range<int>(60, 70));
 	{
-		ASSERT_EQ(ranges.size(), 4);
+		ASSERT_EQ(ranges.size(), 4U);
 
 		auto it = ranges.begin();
 		ASSERT_EQ((*it).start, 0);
@@ -132,7 +132,7 @@ TEST(RangeSet, Merge)
 	// Now insert an element which spans the second and third element
 	s.merge(Range<int>(15, 55));
 	{
-		ASSERT_EQ(ranges.size(), 3);
+		ASSERT_EQ(ranges.size(), 3U);
 
 		auto it = ranges.begin();
 		ASSERT_EQ((*it).start, 0);
@@ -150,7 +150,7 @@ TEST(RangeSet, Merge)
 	// Now insert an element which expands the first element
 	s.merge(Range<int>(-10, 11));
 	{
-		ASSERT_EQ(ranges.size(), 3);
+		ASSERT_EQ(ranges.size(), 3U);
 
 		auto it = ranges.begin();
 		ASSERT_EQ((*it).start, -10);
@@ -168,7 +168,7 @@ TEST(RangeSet, Merge)
 	// Now insert an element which merges the last two elements
 	s.merge(Range<int>(13, 70));
 	{
-		ASSERT_EQ(ranges.size(), 2);
+		ASSERT_EQ(ranges.size(), 2U);
 
 		auto it = ranges.begin();
 		ASSERT_EQ((*it).start, -10);
@@ -182,7 +182,7 @@ TEST(RangeSet, Merge)
 	// Now insert an element which merges the remaining elements
 	s.merge(Range<int>(-9, 12));
 	{
-		ASSERT_EQ(ranges.size(), 1);
+		ASSERT_EQ(ranges.size(), 1U);
 
 		auto it = ranges.begin();
 		ASSERT_EQ((*it).start, -10);
-- 
cgit v1.2.3


From 0a8a012850bb7c730ccac4c91c7aca5c88cbedc9 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:14:58 +0100
Subject: Implemented most of the desired behaviour of the Stack class, added
 unit tests

---
 src/core/parser/stack/Stack.cpp      | 437 ++++++++++++++++++++----
 src/core/parser/stack/Stack.hpp      |  85 ++++-
 test/core/parser/stack/StackTest.cpp | 639 +++++++++++++++++++++++++++++++++++
 3 files changed, 1075 insertions(+), 86 deletions(-)
 create mode 100644 test/core/parser/stack/StackTest.cpp

(limited to 'test')

diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index b0df39b..d84a19c 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -18,6 +18,7 @@
 
 #include <sstream>
 
+#include <core/common/Logger.hpp>
 #include <core/common/Utils.hpp>
 #include <core/common/Exceptions.hpp>
 #include <core/parser/ParserScope.hpp>
@@ -37,10 +38,28 @@ HandlerInfo::HandlerInfo() : HandlerInfo(nullptr) {}
 HandlerInfo::HandlerInfo(std::shared_ptr<Handler> handler)
     : handler(handler),
       fieldIdx(0),
+      valid(true),
+      implicit(false),
       inField(false),
       inDefaultField(false),
       inImplicitDefaultField(false),
-      hasDefaultField(false)
+      inValidField(false),
+      hadDefaultField(false)
+{
+}
+
+HandlerInfo::HandlerInfo(bool valid, bool implicit, bool inField,
+                         bool inDefaultField, bool inImplicitDefaultField,
+                         bool inValidField)
+    : handler(nullptr),
+      fieldIdx(0),
+      valid(valid),
+      implicit(implicit),
+      inField(inField),
+      inDefaultField(inDefaultField),
+      inImplicitDefaultField(inImplicitDefaultField),
+      inValidField(inValidField),
+      hadDefaultField(false)
 {
 }
 
@@ -55,7 +74,7 @@ void HandlerInfo::fieldStart(bool isDefault, bool isImplicit, bool isValid)
 	inDefaultField = isDefault || isImplicit;
 	inImplicitDefaultField = isImplicit;
 	inValidField = isValid;
-	hasDefaultField = hasDefaultField || inDefaultField;
+	hadDefaultField = hadDefaultField || inDefaultField;
 	fieldIdx++;
 }
 
@@ -65,11 +84,13 @@ void HandlerInfo::fieldEnd()
 	inDefaultField = false;
 	inImplicitDefaultField = false;
 	inValidField = false;
-	if (fieldIdx > 0) {
-		fieldIdx--;
-	}
 }
 
+/**
+ * Stub instance of HandlerInfo containing no handler information.
+ */
+static HandlerInfo EmptyHandlerInfo{true, true, true, true, false, true};
+
 /* Helper functions */
 
 /**
@@ -110,9 +131,31 @@ Stack::Stack(ParserContext &ctx,
 	}
 }
 
-Stack::~Stack() {}
+Stack::~Stack()
+{
+	while (!stack.empty()) {
+		// Fetch the topmost stack element
+		HandlerInfo &info = currentInfo();
+
+		// It is an error if we're still in a field of an element while the
+		// Stack instance is destroyed. Log that
+		if (handlersValid()) {
+			if (info.inField && !info.implicit &&
+			    !info.inImplicitDefaultField) {
+				logger().error(
+				    std::string("Reached end of stream, but command \"") +
+				        info.handler->getName() +
+				        "\" has not ended yet. Command was started here:",
+				    info.handler->getLocation());
+			}
+		}
 
-bool Stack::deduceState()
+		// Remove the command from the stack
+		endCurrentHandler();
+	}
+}
+
+void Stack::deduceState()
 {
 	// Assemble all states
 	std::vector<const State *> states;
@@ -125,23 +168,24 @@ bool Stack::deduceState()
 	std::vector<const State *> possibleStates =
 	    StateDeductor(ctx.getScope().getStackTypeSignature(), states).deduce();
 	if (possibleStates.size() != 1U) {
-		throw LoggableException{
-		    "Error while including file: Cannot deduce parser state."};
+		throw LoggableException(
+		    "Error while including file: Cannot deduce parser state.");
 	}
 
-	// Switch to this state by creating a dummy handler
-	const State *state = possibleStates[0];
-	stack.emplace(std::shared_ptr<Handler>{EmptyHandler::create({ctx, "", *state, *state, SourceLocation{}})});
-}
+	// Switch to this state by creating a handler, but do not call its start
+	// function
+	const State &state = *possibleStates[0];
+	HandlerConstructor ctor =
+	    state.elementHandler ? state.elementHandler : EmptyHandler::create;
 
-bool Stack::handlersValid()
-{
-	for (auto it = stack.crbegin(); it != stack.crend(); it++) {
-		if (!it->valid) {
-			return false;
-		}
-	}
-	return true;
+	std::shared_ptr<Handler> handler =
+	    std::shared_ptr<Handler>{ctor({ctx, "", state, SourceLocation{}})};
+	stack.emplace_back(handler);
+
+	// Set the correct flags for this implicit handler
+	HandlerInfo &info = currentInfo();
+	info.implicit = true;
+	info.fieldStart(true, false, true);
 }
 
 std::set<std::string> Stack::expectedCommands()
@@ -158,12 +202,12 @@ std::set<std::string> Stack::expectedCommands()
 
 const State &Stack::currentState()
 {
-	return stack.empty() ? States::None : stack.top()->state();
+	return stack.empty() ? States::None : stack.back().handler->getState();
 }
 
 std::string Stack::currentCommandName()
 {
-	return stack.empty() ? std::string{} : stack.top()->name();
+	return stack.empty() ? std::string{} : stack.back().handler->getName();
 }
 
 const State *Stack::findTargetState(const std::string &name)
@@ -180,77 +224,330 @@ const State *Stack::findTargetState(const std::string &name)
 	return nullptr;
 }
 
+const State *Stack::findTargetStateOrWildcard(const std::string &name)
+{
+	// Try to find the target state with the given name, if none is found, try
+	// find a matching "*" state.
+	State const *targetState = findTargetState(name);
+	if (targetState == nullptr) {
+		return findTargetState("*");
+	}
+	return targetState;
+}
+
+HandlerInfo &Stack::currentInfo()
+{
+	return stack.empty() ? EmptyHandlerInfo : stack.back();
+}
+HandlerInfo &Stack::lastInfo()
+{
+	return stack.size() < 2U ? EmptyHandlerInfo : stack[stack.size() - 2];
+}
+
+void Stack::endCurrentHandler()
+{
+	if (!stack.empty()) {
+		// Fetch the handler info for the current top-level element
+		HandlerInfo &info = stack.back();
+
+		// Do not call any callback functions while the stack is marked as
+		// invalid or this is an elment marked as "implicit"
+		if (!info.implicit && handlersValid()) {
+			// Make sure the fieldEnd handler is called if the element still
+			// is in a field
+			if (info.inField) {
+				info.handler->fieldEnd();
+				info.fieldEnd();
+			}
+
+			// Call the "end" function of the corresponding Handler instance
+			info.handler->end();
+		}
+
+		// Remove the element from the stack
+		stack.pop_back();
+	}
+}
+
+bool Stack::ensureHandlerIsInField()
+{
+	// If the current handler is not in a field (and actually has a handler)
+	// try to start a default field
+	HandlerInfo &info = currentInfo();
+	if (!info.inField && info.handler != nullptr) {
+		// Abort if the element already had a default field
+		if (info.hadDefaultField) {
+			return false;
+		}
+
+		// Try to start a new default field, abort if this did not work
+		bool isDefault = true;
+		if (!info.handler->fieldStart(isDefault, info.fieldIdx)) {
+			info.handler->fieldEnd();
+			endCurrentHandler();
+			return false;
+		}
+
+		// Mark the field as started
+		info.fieldStart(true, true, true);
+	}
+	return true;
+}
+
+bool Stack::handlersValid()
+{
+	for (auto it = stack.crbegin(); it != stack.crend(); it++) {
+		if (!it->valid) {
+			return false;
+		}
+	}
+	return true;
+}
+
+Logger &Stack::logger() { return ctx.getLogger(); }
+
 void Stack::command(const Variant &name, const Variant::mapType &args)
 {
-	// Make sure the given identifier is valid
+	// Make sure the given identifier is valid (preventing "*" from being
+	// malicously passed to this function)
 	if (!Utils::isNamespacedIdentifier(name.asString())) {
 		throw LoggableException(std::string("Invalid identifier \"") +
-		                        name.asString() + std::string("\""), name);
+		                            name.asString() + std::string("\""),
+		                        name);
 	}
 
-	// Try to find a target state for the given command
-	State const *targetState = findTargetState(name.asString());
+	State const *lastTargetState = nullptr;
+	Variant::mapType canonicalArgs;
+	while (true) {
+		// Try to find a target state for the given command, if none can be
+		// found and the current command does not have an open field, then try
+		// to create an empty default field, otherwise this is an exception
+		const State *targetState = findTargetStateOrWildcard(name.asString());
+		if (targetState == nullptr) {
+			if (!currentInfo().inField) {
+				endCurrentHandler();
+				continue;
+			} else {
+				throw buildInvalidCommandException(name.asString(),
+				                                   expectedCommands());
+			}
+		}
+
+		// Make sure we're currently inside a field
+		if (!ensureHandlerIsInField()) {
+			endCurrentHandler();
+			continue;
+		}
 
-	// No target state is found, try to find a wildcard handler for the current
-	// state
-	if (targetState == nullptr) {
-		targetState = findTargetState("*");
-	}
+		// Fork the logger. We do not want any validation errors to skip
+		LoggerFork loggerFork = logger().fork();
 
-	// No handler has been found at all,
-	if (targetState == nullptr) {
-		throw buildInvalidCommandException(name.asString(), expectedCommands());
+		// Canonicalize the arguments (if this has not already been done), allow
+		// additional arguments
+		if (lastTargetState != targetState) {
+			canonicalArgs = args;
+			targetState->arguments.validateMap(canonicalArgs, loggerFork, true);
+			lastTargetState = targetState;
+		}
+
+		// Instantiate the handler and push it onto the stack
+		HandlerConstructor ctor = targetState->elementHandler
+		                              ? targetState->elementHandler
+		                              : EmptyHandler::create;
+		std::shared_ptr<Handler> handler{
+		    ctor({ctx, name.asString(), *targetState, name.getLocation()})};
+		stack.emplace_back(handler);
+
+		// Fetch the HandlerInfo for the parent element and the current element
+		HandlerInfo &parentInfo = lastInfo();
+		HandlerInfo &info = currentInfo();
+
+		// Call the "start" method of the handler, store the result of the start
+		// method as the validity of the handler -- do not call the start method
+		// if the stack is currently invalid (as this may cause further,
+		// unwanted errors)
+		bool validStack = handlersValid();
+		info.valid = false;
+		if (validStack) {
+			handler->setLogger(loggerFork);
+			try {
+				info.valid = handler->start(canonicalArgs);
+			}
+			catch (LoggableException ex) {
+				loggerFork.log(ex);
+			}
+			handler->resetLogger();
+		}
+
+		// We started the command within an implicit default field and it is not
+		// valid -- remove both the new handler and the parent field from the
+		// stack
+		if (!info.valid && parentInfo.inImplicitDefaultField) {
+			endCurrentHandler();
+			endCurrentHandler();
+			continue;
+		}
+
+		// If we ended up here, starting the command may or may not have worked,
+		// but after all, we cannot unroll the stack any further. Update the
+		// "valid" flag, commit any potential error messages and return.
+		info.valid = parentInfo.valid && info.valid;
+		loggerFork.commit();
+		return;
 	}
+}
+
+void Stack::data(const Variant &data)
+{
+	while (true) {
+		// Check whether there is any command the data can be sent to
+		if (stack.empty()) {
+			throw LoggableException("No command here to receive data.");
+		}
+
+		// Fetch the current command handler information
+		HandlerInfo &info = currentInfo();
+
+		// Make sure the current handler has an open field
+		if (!ensureHandlerIsInField()) {
+			endCurrentHandler();
+			continue;
+		}
+
+		// If this field should not get any data, log an error and do not call
+		// the "data" handler
+		if (!info.inValidField) {
+			logger().error("Did not expect any data here", data);
+		}
+
+		if (handlersValid() && info.inValidField) {
+			// Fork the logger and set it as temporary logger for the "start"
+			// method. We only want to keep error messages if this was not a try
+			// to implicitly open a default field.
+			LoggerFork loggerFork = logger().fork();
+			info.handler->setLogger(loggerFork);
+
+			// Pass the data to the current Handler instance
+			bool valid = false;
+			try {
+				valid = info.handler->data(data);
+			}
+			catch (LoggableException ex) {
+				loggerFork.log(ex);
+			}
+
+			// Reset the logger instance as soon as possible
+			info.handler->resetLogger();
+
+			// If placing the data here failed and we're currently in an
+			// implicitly opened field, just unroll the stack to the next field
+			// and try again
+			if (!valid && info.inImplicitDefaultField) {
+				endCurrentHandler();
+				continue;
+			}
+
+			// Commit the content of the logger fork. Do not change the valid
+			// flag.
+			loggerFork.commit();
+		}
 
-	// Fetch the associated constructor
-	HandlerConstructor ctor = targetState->elementHandler
-	                              ? targetState->elementHandler
-	                              : DefaultHandler::create;
-
-	// Canonicalize the arguments, allow additional arguments
-	targetState->arguments.validateMap(args, ctx.getLogger(), true);
-
-	// Instantiate the handler and push it onto the stack
-	Handler *handler =
-	    ctor({ctx, name.asString(), *targetState, currentState(), name.getLocation()});
-	stack.emplace_back(std::shared_ptr<Handler>{handler});
-
-	// Call the "start" method of the handler, store the result of the start
-	// method as the validity of the handler -- do not call the start method
-	// if the stack is currently invalid (as this may cause further, unwanted
-	// errors)
-	try {
-		stack.back().valid = handlersValid() && handler->start(args);
-	} catch (LoggableException ex) {
-		stack.back().valid = false;
-		logger.log(ex, )
+		// There was no reason to unroll the stack any further, so continue
+		return;
 	}
 }
 
-void Stack::end()
+void Stack::fieldStart(bool isDefault)
 {
-	// Check whether the current command could be ended
+	// Make sure the current handler stack is not empty
 	if (stack.empty()) {
-		throw LoggableException{"No command to end."};
+		throw LoggableException(
+		    "No command for which a field could be started");
 	}
 
-	// Remove the current HandlerInstance from the stack
-	std::shared_ptr<Handler> inst{stack.top()};
-	stack.pop();
+	// Fetch the information attached to the current handler
+	HandlerInfo &info = currentInfo();
+	if (info.inField) {
+		logger().error(
+		    "Got field start, but there is no command for which to start the "
+		    "field.");
+		return;
+	}
+
+	// Copy the isDefault flag to a local variable, the fieldStart method will
+	// write into this variable
+	bool defaultField = isDefault;
+
+	// Do not call the "fieldStart" function if we're in an invalid subtree
+	bool valid = false;
+	if (handlersValid()) {
+		try {
+			valid = info.handler->fieldStart(defaultField, info.fieldIdx);
+		}
+		catch (LoggableException ex) {
+			logger().log(ex);
+		}
+		if (!valid && !defaultField) {
+			logger().error(
+			    std::string("Cannot start a new field here (index ") +
+			    std::to_string(info.fieldIdx + 1) +
+			    std::string("), field does not exist"));
+		}
+	}
 
-	// Call the end function of the last Handler
-	inst->end();
+	// Mark the field as started
+	info.fieldStart(defaultField, false, valid);
 }
 
-void Stack::data(const std::string &data, int field)
+void Stack::fieldEnd()
 {
-	// Check whether there is any command the data can be sent to
+	// Make sure the current handler stack is not empty
 	if (stack.empty()) {
-		throw LoggableException{"No command to receive data."};
+		throw LoggableException("No command for which a field could be ended");
 	}
 
-	// Pass the data to the current Handler instance
-	stack.top()->data(data, field);
+	// Fetch the information attached to the current handler
+	HandlerInfo &info = currentInfo();
+	if (!info.inField) {
+		logger().error(
+		    "Got field end, but there is no command for which to end the "
+		    "field.");
+		return;
+	}
+
+	// Only continue if the current handler stack is in a valid state, do not
+	// call the fieldEnd function if something went wrong before
+	if (handlersValid()) {
+		try {
+			info.handler->fieldEnd();
+		}
+		catch (LoggableException ex) {
+			logger().log(ex);
+		}
+	}
+
+	// This command no longer is in a field
+	info.fieldEnd();
+
+	// As soon as this command had a default field, remove it from the stack
+	if (info.hadDefaultField) {
+		endCurrentHandler();
+	}
+}
+
+void Stack::annotationStart(const Variant &className, const Variant &args)
+{
+	// TODO
+}
+
+void Stack::annotationEnd(const Variant &className, const Variant &elementName)
+{
+	// TODO
+}
+
+void Stack::token(Variant token)
+{
+	// TODO
 }
 }
 }
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
index 294f7ec..76eefd9 100644
--- a/src/core/parser/stack/Stack.hpp
+++ b/src/core/parser/stack/Stack.hpp
@@ -43,6 +43,7 @@ namespace ousia {
 
 // Forward declarations
 class ParserContext;
+class Logger;
 
 namespace parser_stack {
 
@@ -75,7 +76,13 @@ public:
 	bool valid : 1;
 
 	/**
-	 * Set to true if the handler currently is in a filed.
+	 * Set to true if this is an implicit handler, that was created when the
+	 * current stack state was deduced.
+	 */
+	bool implicit : 1;
+
+	/**
+	 * Set to true if the handler currently is in a field.
 	 */
 	bool inField : 1;
 
@@ -99,12 +106,17 @@ public:
 	/**
 	 * Set to true, if the default field was already started.
 	 */
-	bool hasDefaultField : 1;
+	bool hadDefaultField : 1;
 
 	/**
 	 * Default constructor of the HandlerInfo class.
 	 */
 	HandlerInfo();
+	/**
+	 * Constructor of the HandlerInfo class, allows to set all flags manually.
+	 */
+	HandlerInfo(bool valid, bool implicit, bool inField, bool inDefaultField,
+	            bool inImplicitDefaultField, bool inValidField);
 
 	/**
 	 * Constructor of the HandlerInfo class, taking a shared_ptr to the handler
@@ -129,7 +141,6 @@ public:
 	void fieldEnd();
 };
 
-
 /**
  * The Stack class is a pushdown automaton responsible for turning a command
  * stream into a tree of Node instances. It does so by following a state
@@ -154,6 +165,11 @@ private:
 	 */
 	std::vector<HandlerInfo> stack;
 
+	/**
+	 * Return the reference in the Logger instance stored within the context.
+	 */
+	Logger &logger();
+
 	/**
 	 * Used internally to get all expected command names for the current state.
 	 * This function is used to build error messages.
@@ -164,7 +180,7 @@ private:
 
 	/**
 	 * Returns the targetState for a command with the given name that can be
-	 * reached from for the current state.
+	 * reached from the current state.
 	 *
 	 * @param name is the name of the requested command.
 	 * @return nullptr if no target state was found, a pointer at the target
@@ -172,6 +188,17 @@ private:
 	 */
 	const State *findTargetState(const std::string &name);
 
+	/**
+	 * Returns the targetState for a command with the given name that can be
+	 * reached from the current state, also including the wildcard "*" state.
+	 * Throws an exception if the given target state is not a valid identifier.
+	 *
+	 * @param name is the name of the requested command.
+	 * @return nullptr if no target state was found, a pointer at the target
+	 * state otherwise.
+	 */
+	const State *findTargetStateOrWildcard(const std::string &name);
+
 	/**
 	 * Tries to reconstruct the parser state from the Scope instance of the
 	 * ParserContext given in the constructor. This functionality is needed for
@@ -180,6 +207,33 @@ private:
 	 */
 	void deduceState();
 
+	/**
+	 * Returns a reference at the current HandlerInfo instance (or a stub
+	 * HandlerInfo instance if the stack is empty).
+	 */
+	HandlerInfo &currentInfo();
+
+	/**
+	 * Returns a reference at the last HandlerInfo instance (or a stub
+	 * HandlerInfo instance if the stack has only one element).
+	 */
+	HandlerInfo &lastInfo();
+
+	/**
+	 * Ends the current handler and removes the corresponding element from the
+	 * stack.
+	 */
+	void endCurrentHandler();
+
+	/**
+	 * Tries to start a default field for the current handler, if currently the
+	 * handler is not inside a field and did not have a default field yet.
+	 *
+	 * @return true if the handler is inside a field, false if no field could
+	 * be started.
+	 */
+	bool ensureHandlerIsInField();
+
 	/**
 	 * Returns true if all handlers on the stack are currently valid, or false
 	 * if at least one handler is invalid.
@@ -196,9 +250,8 @@ public:
 	 * @param states is a map containing the command names and pointers at the
 	 * corresponding State instances.
 	 */
-	Stack(
-	    ParserContext &ctx,
-	    const std::multimap<std::string, const State *> &states);
+	Stack(ParserContext &ctx,
+	      const std::multimap<std::string, const State *> &states);
 
 	/**
 	 * Destructor of the Stack class.
@@ -231,6 +284,15 @@ public:
 	 */
 	void command(const Variant &name, const Variant::mapType &args);
 
+	/**
+	 * Function that shuold be called whenever character data is found in the
+	 * input stream. May only be called if the currently is a command on the
+	 * stack.
+	 *
+	 * @param data is a string variant containing the data that has been found.
+	 */
+	void data(const Variant &data);
+
 	/**
 	 * Function that should be called whenever a new field starts. Fields of the
 	 * same command may not be separated by calls to data or annotations. Doing
@@ -247,15 +309,6 @@ public:
 	 */
 	void fieldEnd();
 
-	/**
-	 * Function that shuold be called whenever character data is found in the
-	 * input stream. May only be called if the currently is a command on the
-	 * stack.
-	 *
-	 * @param data is a string variant containing the data that has been found.
-	 */
-	void data(const Variant &data);
-
 	/**
 	 * Function that should be called whenever an annotation starts.
 	 *
diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp
new file mode 100644
index 0000000..7cc8bc5
--- /dev/null
+++ b/test/core/parser/stack/StackTest.cpp
@@ -0,0 +1,639 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+#include <core/frontend/TerminalLogger.hpp>
+#include <core/parser/stack/Handler.hpp>
+#include <core/parser/stack/Stack.hpp>
+#include <core/parser/stack/State.hpp>
+
+#include <core/StandaloneEnvironment.hpp>
+
+namespace ousia {
+namespace parser_stack {
+
+// Build an instance of the StandaloneEnvironment used for this unit test
+static TerminalLogger logger(std::cerr, true);
+// static ConcreteLogger logger;
+static StandaloneEnvironment env(logger);
+
+namespace {
+
+struct Tracker {
+	int startCount;
+	int endCount;
+	int fieldStartCount;
+	int fieldEndCount;
+	int annotationStartCount;
+	int annotationEndCount;
+	int dataCount;
+
+	Variant::mapType startArgs;
+	bool fieldStartIsDefault;
+	size_t fieldStartIdx;
+	Variant annotationStartClassName;
+	Variant::mapType annotationStartArgs;
+	Variant annotationEndClassName;
+	Variant annotationEndElementName;
+	Variant dataData;
+
+	bool startResult;
+	bool fieldStartSetIsDefault;
+	bool fieldStartResult;
+	bool annotationStartResult;
+	bool annotationEndResult;
+	bool dataResult;
+
+	Tracker() { reset(); }
+
+	void reset()
+	{
+		startCount = 0;
+		endCount = 0;
+		fieldStartCount = 0;
+		fieldEndCount = 0;
+		annotationStartCount = 0;
+		annotationEndCount = 0;
+		dataCount = 0;
+
+		startArgs = Variant::mapType{};
+		fieldStartIsDefault = false;
+		fieldStartIdx = 0;
+		annotationStartClassName = Variant::fromString(std::string{});
+		annotationStartArgs = Variant::mapType{};
+		annotationEndClassName = Variant::fromString(std::string{});
+		annotationEndElementName = Variant::fromString(std::string{});
+		dataData = Variant::fromString(std::string{});
+
+		startResult = true;
+		fieldStartSetIsDefault = false;
+		fieldStartResult = true;
+		annotationStartResult = true;
+		annotationEndResult = true;
+		dataResult = true;
+	}
+
+	void expect(int startCount, int endCount, int fieldStartCount,
+	            int fieldEndCount, int annotationStartCount,
+	            int annotationEndCount, int dataCount)
+	{
+		EXPECT_EQ(startCount, this->startCount);
+		EXPECT_EQ(endCount, this->endCount);
+		EXPECT_EQ(fieldStartCount, this->fieldStartCount);
+		EXPECT_EQ(fieldEndCount, this->fieldEndCount);
+		EXPECT_EQ(annotationStartCount, this->annotationStartCount);
+		EXPECT_EQ(annotationEndCount, this->annotationEndCount);
+		EXPECT_EQ(dataCount, this->dataCount);
+	}
+};
+
+static Tracker tracker;
+
+class TestHandler : public Handler {
+private:
+	TestHandler(const HandlerData &handlerData) : Handler(handlerData) {}
+
+public:
+	bool start(const Variant::mapType &args)
+	{
+		tracker.startCount++;
+		tracker.startArgs = args;
+		return tracker.startResult;
+	}
+
+	void end() { tracker.endCount++; }
+
+	bool fieldStart(bool &isDefault, size_t fieldIdx)
+	{
+		tracker.fieldStartCount++;
+		tracker.fieldStartIsDefault = isDefault;
+		tracker.fieldStartIdx = fieldIdx;
+		if (tracker.fieldStartSetIsDefault) {
+			isDefault = true;
+		}
+		return tracker.fieldStartResult;
+	}
+
+	void fieldEnd() { tracker.fieldEndCount++; }
+
+	bool annotationStart(const Variant &className, const Variant::mapType &args)
+	{
+		tracker.annotationStartCount++;
+		tracker.annotationStartClassName = className;
+		tracker.annotationStartArgs = args;
+		return tracker.annotationStartResult;
+	}
+
+	bool annotationEnd(const Variant &className, const Variant &elementName)
+	{
+		tracker.annotationEndCount++;
+		tracker.annotationEndClassName = className;
+		tracker.annotationEndElementName = elementName;
+		return tracker.annotationEndResult;
+	}
+
+	bool data(const Variant &data)
+	{
+		tracker.dataCount++;
+		tracker.dataData = data;
+		return tracker.dataResult;
+	}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TestHandler(handlerData);
+	}
+};
+}
+
+namespace States {
+static const State Document =
+    StateBuilder().parent(&None).elementHandler(TestHandler::create);
+static const State Body =
+    StateBuilder().parent(&Document).elementHandler(TestHandler::create);
+static const State Empty =
+    StateBuilder().parent(&Document).elementHandler(TestHandler::create);
+static const State Special =
+    StateBuilder().parent(&All).elementHandler(TestHandler::create);
+static const State Arguments =
+    StateBuilder().parent(&None).elementHandler(TestHandler::create).arguments(
+        {Argument::Int("a"), Argument::String("b")});
+static const State BodyChildren =
+    StateBuilder().parent(&Body).elementHandler(TestHandler::create);
+static const State Any =
+    StateBuilder().parents({&None, &Any}).elementHandler(TestHandler::create);
+
+static const std::multimap<std::string, const State *> TestHandlers{
+    {"document", &Document},
+    {"body", &Body},
+    {"empty", &Empty},
+    {"special", &Special},
+    {"arguments", &Arguments},
+    {"*", &BodyChildren}};
+
+static const std::multimap<std::string, const State *> AnyHandlers{{"*", &Any}};
+}
+
+TEST(Stack, basicTest)
+{
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{env.context, States::TestHandlers};
+
+		EXPECT_EQ("", s.currentCommandName());
+		EXPECT_EQ(&States::None, &s.currentState());
+
+		s.command("document", {});
+		s.fieldStart(true);
+		s.data("test1");
+
+		EXPECT_EQ("document", s.currentCommandName());
+		EXPECT_EQ(&States::Document, &s.currentState());
+		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.command("body", {});
+		s.fieldStart(true);
+		s.data("test2");
+		EXPECT_EQ("body", s.currentCommandName());
+		EXPECT_EQ(&States::Body, &s.currentState());
+		tracker.expect(2, 0, 2, 0, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.command("inner", {});
+		s.fieldStart(true);
+		EXPECT_EQ("inner", s.currentCommandName());
+		EXPECT_EQ(&States::BodyChildren, &s.currentState());
+
+		s.fieldEnd();
+		tracker.expect(3, 1, 3, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.fieldEnd();
+		EXPECT_EQ("document", s.currentCommandName());
+		EXPECT_EQ(&States::Document, &s.currentState());
+		tracker.expect(3, 2, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.command("body", {});
+		s.fieldStart(true);
+		s.data("test3");
+		EXPECT_EQ("body", s.currentCommandName());
+		EXPECT_EQ(&States::Body, &s.currentState());
+		s.fieldEnd();
+		tracker.expect(4, 3, 4, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
+
+		EXPECT_EQ("document", s.currentCommandName());
+		EXPECT_EQ(&States::Document, &s.currentState());
+
+		s.fieldEnd();
+		tracker.expect(4, 4, 4, 4, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
+
+		EXPECT_EQ("", s.currentCommandName());
+		EXPECT_EQ(&States::None, &s.currentState());
+	}
+	ASSERT_FALSE(logger.hasError());
+}
+
+TEST(Stack, errorInvalidCommands)
+{
+	Stack s{env.context, States::TestHandlers};
+	tracker.reset();
+	EXPECT_THROW(s.command("body", {}), LoggableException);
+	s.command("document", {});
+	s.fieldStart(true);
+	EXPECT_THROW(s.command("document", {}), LoggableException);
+	s.command("empty", {});
+	s.fieldStart(true);
+	EXPECT_THROW(s.command("body", {}), LoggableException);
+	s.command("special", {});
+	s.fieldStart(true);
+	s.fieldEnd();
+	s.fieldEnd();
+	s.fieldEnd();
+	EXPECT_EQ(&States::None, &s.currentState());
+	ASSERT_THROW(s.fieldEnd(), LoggableException);
+	ASSERT_THROW(s.data("test"), LoggableException);
+}
+
+TEST(Stack, validation)
+{
+	Stack s{env.context, States::TestHandlers};
+	tracker.reset();
+	logger.reset();
+
+	s.command("arguments", {});
+	EXPECT_TRUE(logger.hasError());
+	s.fieldStart(true);
+	s.fieldEnd();
+
+	logger.reset();
+	s.command("arguments", {{"a", 5}});
+	EXPECT_TRUE(logger.hasError());
+	s.fieldStart(true);
+	s.fieldEnd();
+
+	logger.reset();
+	s.command("arguments", {{"a", 5}, {"b", "test"}});
+	EXPECT_FALSE(logger.hasError());
+	s.fieldStart(true);
+	s.fieldEnd();
+}
+
+TEST(Stack, invalidCommandName)
+{
+	Stack s{env.context, States::AnyHandlers};
+	tracker.reset();
+	logger.reset();
+
+	s.command("a", {});
+	s.fieldStart(true);
+	s.fieldEnd();
+	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+	s.command("a_", {});
+	s.fieldStart(true);
+	s.fieldEnd();
+	tracker.expect(2, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+	s.command("a_:b", {});
+	s.fieldStart(true);
+	s.fieldEnd();
+	tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+	ASSERT_THROW(s.command("_a", {}), LoggableException);
+	ASSERT_THROW(s.command("a:", {}), LoggableException);
+	ASSERT_THROW(s.command("a:_b", {}), LoggableException);
+	tracker.expect(3, 3, 3, 3, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+}
+
+TEST(Stack, multipleFields)
+{
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{env.context, States::AnyHandlers};
+
+		s.command("a", {{"a", false}});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		EXPECT_EQ("a", s.currentCommandName());
+		EXPECT_EQ(Variant::mapType({{"a", false}}), tracker.startArgs);
+
+		s.fieldStart(false);
+		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		EXPECT_FALSE(tracker.fieldStartIsDefault);
+		EXPECT_EQ(0U, tracker.fieldStartIdx);
+
+		s.data("test");
+		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+		EXPECT_EQ("test", tracker.dataData);
+
+		s.fieldEnd();
+		tracker.expect(1, 0, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.fieldStart(false);
+		tracker.expect(1, 0, 2, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+		EXPECT_FALSE(tracker.fieldStartIsDefault);
+		EXPECT_EQ(1U, tracker.fieldStartIdx);
+
+		s.data("test2");
+		tracker.expect(1, 0, 2, 1, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+		EXPECT_EQ("test2", tracker.dataData);
+
+		s.fieldEnd();
+		tracker.expect(1, 0, 2, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.fieldStart(true);
+		tracker.expect(1, 0, 3, 2, 0, 0, 2);  // sc, ec, fsc, fse, asc, aec, dc
+		EXPECT_TRUE(tracker.fieldStartIsDefault);
+		EXPECT_EQ(2U, tracker.fieldStartIdx);
+
+		s.data("test3");
+		tracker.expect(1, 0, 3, 2, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
+		EXPECT_EQ("test3", tracker.dataData);
+
+		s.fieldEnd();
+		tracker.expect(1, 1, 3, 3, 0, 0, 3);  // sc, ec, fsc, fse, asc, aec, dc
+	}
+	ASSERT_FALSE(logger.hasError());
+}
+
+TEST(Stack, implicitDefaultFieldOnNewCommand)
+{
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{env.context, States::AnyHandlers};
+
+		s.command("a", {});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.command("b", {});
+		tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	}
+	tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	ASSERT_FALSE(logger.hasError());
+}
+
+TEST(Stack, implicitDefaultFieldOnNewCommandWithExplicitDefaultField)
+{
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{env.context, States::AnyHandlers};
+
+		s.command("a", {});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("a", s.currentCommandName());
+
+		s.command("b", {});
+		tracker.expect(2, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("b", s.currentCommandName());
+		s.fieldStart(true);
+		s.fieldEnd();
+		tracker.expect(2, 1, 2, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("a", s.currentCommandName());
+	}
+	tracker.expect(2, 2, 2, 2, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	ASSERT_FALSE(logger.hasError());
+}
+
+TEST(Stack, noImplicitDefaultFieldOnIncompatibleCommand)
+{
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{env.context, States::AnyHandlers};
+
+		s.command("a", {});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("a", s.currentCommandName());
+
+		tracker.fieldStartResult = false;
+		s.command("b", {});
+		tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("b", s.currentCommandName());
+	}
+	tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	ASSERT_FALSE(logger.hasError());
+}
+
+TEST(Stack, noImplicitDefaultFieldIfDefaultFieldGiven)
+{
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{env.context, States::AnyHandlers};
+
+		s.command("a", {});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("a", s.currentCommandName());
+		s.fieldStart(true);
+		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("a", s.currentCommandName());
+		s.fieldEnd();
+		tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("", s.currentCommandName());
+
+		s.command("b", {});
+		tracker.expect(2, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("b", s.currentCommandName());
+	}
+	tracker.expect(2, 2, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	ASSERT_FALSE(logger.hasError());
+}
+
+TEST(Stack, implicitDefaultFieldOnData)
+{
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{env.context, States::AnyHandlers};
+
+		s.command("a", {});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.data("test");
+		tracker.expect(1, 0, 1, 0, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+	}
+	tracker.expect(1, 1, 1, 1, 0, 0, 1);  // sc, ec, fsc, fse, asc, aec, dc
+	ASSERT_FALSE(logger.hasError());
+}
+
+TEST(Stack, autoFieldEnd)
+{
+	tracker.reset();
+	logger.reset();
+
+	{
+		Stack s{env.context, States::AnyHandlers};
+		s.command("a", {});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	}
+	tracker.expect(1, 1, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	ASSERT_FALSE(logger.hasError());
+}
+
+TEST(Stack, autoImplicitFieldEnd)
+{
+	tracker.reset();
+	logger.reset();
+
+	{
+		Stack s{env.context, States::AnyHandlers};
+		s.command("a", {});
+		s.command("b", {});
+		s.command("c", {});
+		s.command("d", {});
+		s.command("e", {});
+		s.fieldStart(true);
+		s.fieldEnd();
+		tracker.expect(5, 1, 5, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	}
+	tracker.expect(5, 5, 5, 5, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	ASSERT_FALSE(logger.hasError());
+}
+
+TEST(Stack, invalidDefaultField)
+{
+	tracker.reset();
+	logger.reset();
+
+	{
+		Stack s{env.context, States::AnyHandlers};
+		s.command("a", {});
+		tracker.fieldStartResult = false;
+		s.fieldStart(true);
+		s.fieldEnd();
+		tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	}
+	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	ASSERT_FALSE(logger.hasError());
+}
+
+TEST(Stack, errorInvalidDefaultFieldData)
+{
+	tracker.reset();
+	logger.reset();
+
+	{
+		Stack s{env.context, States::AnyHandlers};
+		s.command("a", {});
+		tracker.fieldStartResult = false;
+		s.fieldStart(true);
+		ASSERT_FALSE(logger.hasError());
+		s.data("test");
+		ASSERT_TRUE(logger.hasError());
+		s.fieldEnd();
+		tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	}
+	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+}
+
+TEST(Stack, errorInvalidFieldData)
+{
+	tracker.reset();
+	logger.reset();
+
+	{
+		Stack s{env.context, States::AnyHandlers};
+		s.command("a", {});
+		tracker.fieldStartResult = false;
+		ASSERT_FALSE(logger.hasError());
+		s.fieldStart(false);
+		ASSERT_TRUE(logger.hasError());
+		s.data("test");
+		s.fieldEnd();
+		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	}
+	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+}
+
+TEST(Stack, errorFieldStartNoCommand)
+{
+	tracker.reset();
+	logger.reset();
+
+	Stack s{env.context, States::AnyHandlers};
+	ASSERT_THROW(s.fieldStart(false), LoggableException);
+	ASSERT_THROW(s.fieldStart(true), LoggableException);
+	tracker.expect(0, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+}
+
+TEST(Stack, errorMutlipleFieldStarts)
+{
+	tracker.reset();
+	logger.reset();
+
+	{
+		Stack s{env.context, States::AnyHandlers};
+		s.command("a", {});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.fieldStart(false);
+		ASSERT_FALSE(logger.hasError());
+		s.fieldStart(false);
+		ASSERT_TRUE(logger.hasError());
+		tracker.expect(1, 0, 1, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.fieldEnd();
+		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	}
+	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+}
+
+TEST(Stack, errorMutlipleFieldEnds)
+{
+	tracker.reset();
+	logger.reset();
+
+	{
+		Stack s{env.context, States::AnyHandlers};
+		s.command("a", {});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.fieldStart(false);
+		s.fieldEnd();
+		ASSERT_FALSE(logger.hasError());
+		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		s.fieldEnd();
+		ASSERT_TRUE(logger.hasError());
+		tracker.expect(1, 0, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	}
+	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+}
+
+TEST(Stack, errorOpenField)
+{
+	tracker.reset();
+	logger.reset();
+
+	{
+		Stack s{env.context, States::AnyHandlers};
+		s.command("a", {});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+
+		s.fieldStart(false);
+		ASSERT_FALSE(logger.hasError());
+	}
+	ASSERT_TRUE(logger.hasError());
+	tracker.expect(1, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+}
+}
+}
+
-- 
cgit v1.2.3


From 36b712c9f9af5c008fbd193392546fd472a35189 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:15:08 +0100
Subject: Added lonely comment to StandaloneEnvironment

---
 test/core/StandaloneEnvironment.hpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'test')

diff --git a/test/core/StandaloneEnvironment.hpp b/test/core/StandaloneEnvironment.hpp
index a9dcdce..790bad4 100644
--- a/test/core/StandaloneEnvironment.hpp
+++ b/test/core/StandaloneEnvironment.hpp
@@ -31,6 +31,10 @@
 
 namespace ousia {
 
+/**
+ * StandaloneEnvironment is a class used for quickly setting up an entire
+ * environment needed for running an Ousia instance.
+ */
 struct StandaloneEnvironment {
 	ConcreteLogger &logger;
 	Manager manager;
-- 
cgit v1.2.3


From b1aade072781b0eca9b4c2fd15c360ec7d3ed25f Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 14:53:34 +0100
Subject: Removed legacy test file

---
 test/core/parser/ParserStackTest.cpp | 177 -----------------------------------
 1 file changed, 177 deletions(-)
 delete mode 100644 test/core/parser/ParserStackTest.cpp

(limited to 'test')

diff --git a/test/core/parser/ParserStackTest.cpp b/test/core/parser/ParserStackTest.cpp
deleted file mode 100644
index 3a0decb..0000000
--- a/test/core/parser/ParserStackTest.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <iostream>
-
-#include <gtest/gtest.h>
-
-#include <core/parser/ParserStack.hpp>
-#include <core/StandaloneEnvironment.hpp>
-
-namespace ousia {
-
-ConcreteLogger logger;
-
-static int startCount = 0;
-static int endCount = 0;
-static int dataCount = 0;
-
-class TestHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override { startCount++; }
-
-	void end() override { endCount++; }
-
-	void data(const std::string &data, int field) override { dataCount++; }
-
-	static Handler *create(const HandlerData &data)
-	{
-		return new TestHandler(data);
-	}
-};
-
-namespace ParserStates {
-static const ParserState Document =
-    ParserStateBuilder().parent(&None).elementHandler(TestHandler::create);
-static const ParserState Body = ParserStateBuilder()
-                                    .parent(&Document)
-                                    .elementHandler(TestHandler::create);
-static const ParserState Empty =
-    ParserStateBuilder().parent(&Document).elementHandler(TestHandler::create);
-static const ParserState Special =
-    ParserStateBuilder().parent(&All).elementHandler(TestHandler::create);
-static const ParserState Arguments =
-    ParserStateBuilder()
-        .parent(&None)
-        .elementHandler(TestHandler::create)
-        .arguments({Argument::Int("a"), Argument::String("b")});
-static const ParserState BodyChildren =
-    ParserStateBuilder()
-        .parent(&Body)
-        .elementHandler(TestHandler::create);
-
-static const std::multimap<std::string, const ParserState *> TestHandlers{
-    {"document", &Document},
-    {"body", &Body},
-    {"empty", &Empty},
-    {"special", &Special},
-    {"arguments", &Arguments},
-    {"*", &BodyChildren}};
-}
-
-TEST(ParserStack, simpleTest)
-{
-	StandaloneEnvironment env(logger);
-	ParserStack s{env.context, ParserStates::TestHandlers};
-
-	startCount = 0;
-	endCount = 0;
-	dataCount = 0;
-
-	EXPECT_EQ("", s.currentCommandName());
-	EXPECT_EQ(&ParserStates::None, &s.currentState());
-
-	s.start("document", {});
-	s.data("test1");
-
-	EXPECT_EQ("document", s.currentCommandName());
-	EXPECT_EQ(&ParserStates::Document, &s.currentState());
-	EXPECT_EQ(1, startCount);
-	EXPECT_EQ(1, dataCount);
-
-	s.start("body", {});
-	s.data("test2");
-	EXPECT_EQ("body", s.currentCommandName());
-	EXPECT_EQ(&ParserStates::Body, &s.currentState());
-	EXPECT_EQ(2, startCount);
-	EXPECT_EQ(2, dataCount);
-
-	s.start("inner", {});
-	EXPECT_EQ("inner", s.currentCommandName());
-	EXPECT_EQ(&ParserStates::BodyChildren, &s.currentState());
-	s.end();
-	EXPECT_EQ(3, startCount);
-	EXPECT_EQ(1, endCount);
-
-	s.end();
-	EXPECT_EQ(2, endCount);
-
-	EXPECT_EQ("document", s.currentCommandName());
-	EXPECT_EQ(&ParserStates::Document, &s.currentState());
-
-	s.start("body", {});
-	s.data("test3");
-	EXPECT_EQ("body", s.currentCommandName());
-	EXPECT_EQ(&ParserStates::Body, &s.currentState());
-	s.end();
-	EXPECT_EQ(4, startCount);
-	EXPECT_EQ(3, dataCount);
-	EXPECT_EQ(3, endCount);
-
-	EXPECT_EQ("document", s.currentCommandName());
-	EXPECT_EQ(&ParserStates::Document, &s.currentState());
-
-	s.end();
-	EXPECT_EQ(4, endCount);
-
-	EXPECT_EQ("", s.currentCommandName());
-	EXPECT_EQ(&ParserStates::None, &s.currentState());
-}
-
-TEST(ParserStack, errorHandling)
-{
-	StandaloneEnvironment env(logger);
-	ParserStack s{env.context, ParserStates::TestHandlers};
-
-	EXPECT_THROW(s.start("body", {}), OusiaException);
-	s.start("document", {});
-	EXPECT_THROW(s.start("document", {}), OusiaException);
-	s.start("empty", {});
-	EXPECT_THROW(s.start("body", {}), OusiaException);
-	s.start("special", {});
-	s.end();
-	s.end();
-	s.end();
-	EXPECT_EQ(&ParserStates::None, &s.currentState());
-	ASSERT_THROW(s.end(), OusiaException);
-	ASSERT_THROW(s.data("test", 1), OusiaException);
-}
-
-TEST(ParserStack, validation)
-{
-	StandaloneEnvironment env(logger);
-	ParserStack s{env.context, ParserStates::TestHandlers};
-
-	logger.reset();
-	s.start("arguments", {});
-	EXPECT_TRUE(logger.hasError());
-	s.end();
-
-	s.start("arguments", {{"a", 5}});
-	EXPECT_TRUE(logger.hasError());
-	s.end();
-
-	logger.reset();
-	s.start("arguments", {{"a", 5}, {"b", "test"}});
-	EXPECT_FALSE(logger.hasError());
-	s.end();
-}
-}
-
-- 
cgit v1.2.3


From 53c92aea125a439858d03245a914e20f55e5bcba Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 14:53:50 +0100
Subject: Fixed GCC 4.9 warnings

---
 test/core/model/DomainTest.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'test')

diff --git a/test/core/model/DomainTest.cpp b/test/core/model/DomainTest.cpp
index 8fcbdf2..4cb4331 100644
--- a/test/core/model/DomainTest.cpp
+++ b/test/core/model/DomainTest.cpp
@@ -242,7 +242,7 @@ TEST(Descriptor, getDefaultFields)
 	    A->createPrimitiveFieldDescriptor(sys->getStringType(), logger);
 	// now we should find that.
 	auto fields = A->getDefaultFields();
-	ASSERT_EQ(1, fields.size());
+	ASSERT_EQ(1U, fields.size());
 	ASSERT_EQ(A_prim_field, fields[0]);
 
 	// remove that field from A and add it to another class.
@@ -258,7 +258,7 @@ TEST(Descriptor, getDefaultFields)
 	// but we should find it again if we set B as superclass of A.
 	A->setSuperclass(B, logger);
 	fields = A->getDefaultFields();
-	ASSERT_EQ(1, fields.size());
+	ASSERT_EQ(1U, fields.size());
 	ASSERT_EQ(A_prim_field, fields[0]);
 
 	// and we should not be able to find it if we override the field.
@@ -277,7 +277,7 @@ TEST(Descriptor, getDefaultFields)
 
 	// now we should find that.
 	fields = A->getDefaultFields();
-	ASSERT_EQ(1, fields.size());
+	ASSERT_EQ(1U, fields.size());
 	ASSERT_EQ(C_field, fields[0]);
 
 	// add another transparent child class to A with a daughter class that has
@@ -296,7 +296,7 @@ TEST(Descriptor, getDefaultFields)
 
 	// now we should find both primitive fields, but the C field first.
 	fields = A->getDefaultFields();
-	ASSERT_EQ(2, fields.size());
+	ASSERT_EQ(2U, fields.size());
 	ASSERT_EQ(C_field, fields[0]);
 	ASSERT_EQ(F_field, fields[1]);
 }
@@ -321,7 +321,7 @@ TEST(Descriptor, getPermittedChildren)
 	 * in between.
 	 */
 	NodeVector<StructuredClass> children = book->getPermittedChildren();
-	ASSERT_EQ(3, children.size());
+	ASSERT_EQ(3U, children.size());
 	ASSERT_EQ(section, children[0]);
 	ASSERT_EQ(paragraph, children[1]);
 	ASSERT_EQ(text, children[2]);
@@ -331,7 +331,7 @@ TEST(Descriptor, getPermittedChildren)
 	    mgr, "Subclass", domain, Cardinality::any(), text, true, false)};
 	// And that should be in the result list as well now.
 	children = book->getPermittedChildren();
-	ASSERT_EQ(4, children.size());
+	ASSERT_EQ(4U, children.size());
 	ASSERT_EQ(section, children[0]);
 	ASSERT_EQ(paragraph, children[1]);
 	ASSERT_EQ(text, children[2]);
-- 
cgit v1.2.3


From 69ebaddbeaea1aa651a0f0babbf9283240d9c07b Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 14:58:46 +0100
Subject: Slightly adapted Handler instances to new Handler, once again passing
 non-const references to data and start, using "parseGenericString" in
 DocumentHandler for resolving non-string values, added unit test for testing
 whether "end()" is not called if "start()" fails.

---
 src/core/parser/stack/DocumentHandler.cpp      | 141 +++++++++++++++----------
 src/core/parser/stack/DocumentHandler.hpp      |  96 ++++++++++++++---
 src/core/parser/stack/DomainHandler.cpp        |  51 +++++----
 src/core/parser/stack/DomainHandler.hpp        |  28 +++--
 src/core/parser/stack/Handler.cpp              |  20 ++--
 src/core/parser/stack/Handler.hpp              |  31 +++---
 src/core/parser/stack/ImportIncludeHandler.cpp |  54 ++--------
 src/core/parser/stack/ImportIncludeHandler.hpp |  13 ++-
 src/core/parser/stack/Stack.cpp                |  18 ++--
 src/core/parser/stack/TypesystemHandler.cpp    |  48 ++++-----
 src/core/parser/stack/TypesystemHandler.hpp    | 131 +++++++++++++++++------
 test/core/parser/stack/StackTest.cpp           |  41 +++++--
 12 files changed, 422 insertions(+), 250 deletions(-)

(limited to 'test')

diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp
index ba7430d..b28f0fb 100644
--- a/src/core/parser/stack/DocumentHandler.cpp
+++ b/src/core/parser/stack/DocumentHandler.cpp
@@ -22,22 +22,28 @@
 
 #include <core/common/RttiBuilder.hpp>
 #include <core/common/Utils.hpp>
+#include <core/common/VariantReader.hpp>
 #include <core/model/Document.hpp>
 #include <core/model/Domain.hpp>
+#include <core/model/Project.hpp>
 #include <core/model/Typesystem.hpp>
 #include <core/parser/ParserScope.hpp>
+#include <core/parser/ParserContext.hpp>
 
 namespace ousia {
+namespace parser_stack {
 
 /* DocumentHandler */
 
-void DocumentHandler::start(Variant::mapType &args)
+bool DocumentHandler::start(Variant::mapType &args)
 {
 	Rooted<Document> document =
-	    project()->createDocument(args["name"].asString());
+	    context().getProject()->createDocument(args["name"].asString());
 	document->setLocation(location());
 	scope().push(document);
 	scope().setFlag(ParserFlag::POST_HEAD, false);
+
+	return true;
 }
 
 void DocumentHandler::end() { scope().pop(); }
@@ -48,7 +54,7 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode,
                                     std::string &fieldName,
                                     DocumentEntity *&parent, bool &inField)
 {
-	// check if the parent in the structure tree was an explicit field
+	// Check if the parent in the structure tree was an explicit field
 	// reference.
 	inField = parentNode->isa(&RttiTypes::DocumentField);
 	if (inField) {
@@ -56,10 +62,11 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode,
 		parentNode = scope().selectOrThrow(
 		    {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity});
 	} else {
-		// if it wasn't an explicit reference, we use the default field.
+		// If it wasn't an explicit reference, we use the default field.
 		fieldName = DEFAULT_FIELD_NAME;
 	}
-	// reference the parent entity explicitly.
+
+	// Reference the parent entity explicitly.
 	parent = nullptr;
 	if (parentNode->isa(&RttiTypes::StructuredEntity)) {
 		parent = static_cast<DocumentEntity *>(
@@ -73,6 +80,8 @@ void DocumentChildHandler::preamble(Handle<Node> parentNode,
 void DocumentChildHandler::createPath(const NodeVector<Node> &path,
                                       DocumentEntity *&parent)
 {
+	// TODO (@benjamin): These should be pushed onto the scope and poped once
+	// the scope is left. Otherwise stuff may not be correclty resolved.
 	size_t S = path.size();
 	for (size_t p = 1; p < S; p = p + 2) {
 		parent = static_cast<DocumentEntity *>(
@@ -82,7 +91,7 @@ void DocumentChildHandler::createPath(const NodeVector<Node> &path,
 	}
 }
 
-void DocumentChildHandler::start(Variant::mapType &args)
+bool DocumentChildHandler::start(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 	Rooted<Node> parentNode = scope().selectOrThrow(
@@ -95,7 +104,7 @@ void DocumentChildHandler::start(Variant::mapType &args)
 
 	preamble(parentNode, fieldName, parent, inField);
 
-	// try to find a FieldDescriptor for the given tag if we are not in a
+	// Try to find a FieldDescriptor for the given tag if we are not in a
 	// field already. This does _not_ try to construct transparent paths
 	// in between.
 	if (!inField && parent != nullptr &&
@@ -104,7 +113,7 @@ void DocumentChildHandler::start(Variant::mapType &args)
 		    new DocumentField(parentNode->getManager(), fieldName, parentNode)};
 		field->setLocation(location());
 		scope().push(field);
-		return;
+		return true;
 	}
 
 	// Otherwise create a new StructuredEntity
@@ -147,27 +156,39 @@ void DocumentChildHandler::start(Variant::mapType &args)
 	}
 	entity->setLocation(location());
 	scope().push(entity);
+	return true;
 }
 
 void DocumentChildHandler::end() { scope().pop(); }
 
-std::pair<bool, Variant> DocumentChildHandler::convertData(
-    Handle<FieldDescriptor> field, Logger &logger, const std::string &data)
+bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field,
+                                       Variant &data, Logger &logger)
 {
-	// if the content is supposed to be of type string, we can finish
-	// directly.
-	auto vts = field->getPrimitiveType()->getVariantTypes();
-	if (std::find(vts.begin(), vts.end(), VariantType::STRING) != vts.end()) {
-		return std::make_pair(true, Variant::fromString(data));
+	bool valid = true;
+	Rooted<Type> type = field->getPrimitiveType();
+
+	// If the content is supposed to be of type string, we only need to check
+	// for "magic" values -- otherwise just call the "parseGenericString"
+	// function on the string data
+	if (type->isa(&RttiTypes::StringType)) {
+		const std::string &str = data.asString();
+		// TODO: Referencing constants with "." separator should also work
+		if (Utils::isIdentifier(str)) {
+			data.markAsMagic();
+		}
+	} else {
+		// Parse the string as generic string, assign the result
+		auto res = VariantReader::parseGenericString(
+		    data.asString(), logger, data.getLocation().getSourceId(),
+		    data.getLocation().getStart());
+		data = res.second;
 	}
 
-	// then try to parse the content using the type specification.
-	auto res = field->getPrimitiveType()->read(
-	    data, logger, location().getSourceId(), location().getStart());
-	return res;
+	// Now try to resolve the value for the primitive type
+	return valid && scope().resolveValue(data, type, logger);
 }
 
-void DocumentChildHandler::data(const std::string &data, int fieldIdx)
+bool DocumentChildHandler::data(Variant &data)
 {
 	Rooted<Node> parentNode = scope().selectOrThrow(
 	    {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity,
@@ -180,11 +201,10 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx)
 	preamble(parentNode, fieldName, parent, inField);
 
 	Rooted<Descriptor> desc = parent->getDescriptor();
-	/*
-	 * We distinguish two cases here: One for fields that are given.
-	 */
+
+	// We distinguish two cases here: One for fields that are given.
 	if (fieldName != DEFAULT_FIELD_NAME) {
-		// retrieve the actual FieldDescriptor
+		// Retrieve the actual FieldDescriptor
 		Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName);
 		if (field == nullptr) {
 			logger().error(
@@ -192,49 +212,57 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx)
 			        fieldName + "\" exists in descriptor\"" + desc->getName() +
 			        "\".",
 			    location());
-			return;
+			return false;
 		}
-		// if it is not primitive at all, we can't parse the content.
+		// If it is not primitive at all, we can't parse the content.
 		if (!field->isPrimitive()) {
 			logger().error(std::string("Can't handle data because field \"") +
 			                   fieldName + "\" of descriptor \"" +
 			                   desc->getName() + "\" is not primitive!",
 			               location());
-			return;
+			return false;
 		}
-		// then try to parse the content using the type specification.
-		auto res = convertData(field, logger(), data);
-		// add it as primitive content.
-		if (res.first) {
-			parent->createChildDocumentPrimitive(res.second, fieldName);
+
+		// Try to convert the data variable to the correct format, abort if this
+		// does not work
+		if (!convertData(field, data, logger())) {
+			return false;
 		}
+
+		// Add it as primitive content
+		parent->createChildDocumentPrimitive(data, fieldName);
+		return true;
 	} else {
-		/*
-		 * The second case is for primitive fields. Here we search through
-		 * all FieldDescriptors that allow primitive content at this point
-		 * and could be constructed via transparent intermediate entities.
-		 * We then try to parse the data using the type specified by the
-		 * respective field. If that does not work we proceed to the next
-		 * possible field.
-		 */
-		// retrieve all fields.
+		// The second case is for primitive fields. Here we search through
+		// all FieldDescriptors that allow primitive content at this point
+		// and could be constructed via transparent intermediate entities.
+		// We then try to parse the data using the type specified by the
+		// respective field. If that does not work we proceed to the next
+		// possible field.
 		NodeVector<FieldDescriptor> fields = desc->getDefaultFields();
 		std::vector<LoggerFork> forks;
 		for (auto field : fields) {
-			// then try to parse the content using the type specification.
+			// Then try to parse the content using the type specification
 			forks.emplace_back(logger().fork());
-			auto res = convertData(field, forks.back(), data);
-			if (res.first) {
-				forks.back().commit();
-				// if that worked, construct the necessary path.
-				auto pathRes = desc->pathTo(field, logger());
-				assert(pathRes.second);
-				NodeVector<Node> path = pathRes.first;
-				createPath(path, parent);
-				// then create the primitive element.
-				parent->createChildDocumentPrimitive(res.second, fieldName);
-				return;
+
+			// Try to convert the data variable to the correct format, abort if
+			// this does not work
+			if (!convertData(field, data, forks.back())) {
+				return false;
 			}
+
+			// Show possible warnings that were emitted by this type conversion
+			forks.back().commit();
+
+			// If that worked, construct the necessary path
+			auto pathRes = desc->pathTo(field, logger());
+			assert(pathRes.second);
+			NodeVector<Node> path = pathRes.first;
+			createPath(path, parent);
+
+			// Then create the primitive element
+			parent->createChildDocumentPrimitive(data, fieldName);
+			return true;
 		}
 		logger().error("Could not read data with any of the possible fields:");
 		for (size_t f = 0; f < fields.size(); f++) {
@@ -242,11 +270,14 @@ void DocumentChildHandler::data(const std::string &data, int fieldIdx)
 			              SourceLocation{}, MessageMode::NO_CONTEXT);
 			forks[f].commit();
 		}
+		return false;
 	}
+	return true;
+}
 }
 
 namespace RttiTypes {
-const Rtti DocumentField =
-    RttiBuilder<ousia::DocumentField>("DocumentField").parent(&Node);
+const Rtti DocumentField = RttiBuilder<ousia::parser_stack::DocumentField>(
+                               "DocumentField").parent(&Node);
 }
 }
diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp
index 475fe69..7dc4c86 100644
--- a/src/core/parser/stack/DocumentHandler.hpp
+++ b/src/core/parser/stack/DocumentHandler.hpp
@@ -19,13 +19,19 @@
 /**
  * @file DocumentHandler.hpp
  *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ * Contains the Handler instances used for parsing actual documents. This file
+ * declares to classes: The Document handler which parses the "document" command
+ * that introduces a new document and the "DocumentChildHandler" which parses
+ * the actual user defined tags.
+ *
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
  */
 
-#ifndef _OUSIA_DOCUMENT_HANDLER_HPP_
-#define _OUSIA_DOCUMENT_HANDLER_HPP_
+#ifndef _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_
+#define _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_
 
 #include <core/common/Variant.hpp>
+#include <core/model/Node.hpp>
 
 #include "Handler.hpp"
 
@@ -36,53 +42,117 @@ class Rtti;
 class DocumentEntity;
 class FieldDescriptor;
 
+namespace parser_stack {
+/**
+ * The DocumentHandler class parses the "document" tag that is used to introduce
+ * a new document. Note that this tag is not mandatory in osml files -- if the
+ * first command is not a typesystem, domain or any other declarative command,
+ * the DocumentHandler will be implicitly called.
+ */
 class DocumentHandler : public StaticHandler {
 public:
 	using StaticHandler::StaticHandler;
 
 	bool start(Variant::mapType &args) override;
-
 	void end() override;
 
+	/**
+	 * Creates a new instance of the ImportHandler.
+	 *
+	 * @param handlerData is the data that is passed to the constructor of the
+	 * Handler base class and used there to e.g. access the ParserContext and
+	 * the Callbacks instance.
+	 */
 	static Handler *create(const HandlerData &handlerData)
 	{
 		return new DocumentHandler{handlerData};
 	}
 };
 
+/**
+ * Temporary Node that is being pushed onto the ParserScope in order to indicate
+ * the field the parser is currently in. The name of the Node is stored in the
+ * "name" field of the parent Node class.
+ */
 class DocumentField : public Node {
 public:
 	using Node::Node;
 };
 
+/**
+ * The DocumentChildHandler class performs the actual parsing of the user
+ * defined elements in an Ousía document.
+ */
 class DocumentChildHandler : public StaticHandler {
 private:
+	/**
+	 * Code shared by both the start() and the end() method. Checks whether the
+	 * parser currently is in a field and returns the name of this field.
+	 *
+	 * @param parentNode is the next possible parent node (a document,
+	 * a structured entity, an annotation entity or a field).
+	 * @param fieldName is an output parameter to which the name of the current
+	 * field is written (or unchanged if we're not in a field).
+	 * @param parent is an output parameter to which the parent document entity
+	 * will be written.
+	 * @param inField is set to true if we actually are in a field.
+	 */
 	void preamble(Handle<Node> parentNode, std::string &fieldName,
 	              DocumentEntity *&parent, bool &inField);
 
+	/**
+	 * Constructs all structured entites along the given path and inserts them
+	 * into the document graph.
+	 *
+	 * @param path is a path containing an alternating series of structured
+	 * classes and fields.
+	 * @pram parent is the root entity from which the process should be started.
+	 */
 	void createPath(const NodeVector<Node> &path, DocumentEntity *&parent);
 
-	std::pair<bool, Variant> convertData(Handle<FieldDescriptor> field,
-	                                     Logger &logger,
-	                                     const std::string &data);
+	/**
+	 * Tries to convert the given data to the type that is specified in the
+	 * given primitive field.
+	 *
+	 * @param field is the primitive field for which the data is intended.
+	 * @param data is the is the data that should be converted, the result is
+	 * written into this argument as output variable.
+	 * @param logger is the Logger instance to which error messages should be
+	 * written. Needed to allow the convertData function to write to a forked
+	 * Logger instance.
+	 * @return true if the operation was successful, false otherwise.
+	 */
+	bool convertData(Handle<FieldDescriptor> field, Variant &data,
+	                 Logger &logger);
 
 public:
-	using Handler::Handler;
+	using StaticHandler::StaticHandler;
 
 	bool start(Variant::mapType &args) override;
-
 	void end() override;
-
-	bool data(const Variant &data) override;
-
+	bool data(Variant &data) override;
+
+	/**
+	 * Creates a new instance of the DocumentChildHandler.
+	 *
+	 * @param handlerData is the data that is passed to the constructor of the
+	 * Handler base class and used there to e.g. access the ParserContext and
+	 * the Callbacks instance.
+	 */
 	static Handler *create(const HandlerData &handlerData)
 	{
 		return new DocumentChildHandler{handlerData};
 	}
 };
+}
 
 namespace RttiTypes {
+/**
+ * RttiType for the internally used DocumentField class.
+ */
 extern const Rtti DocumentField;
 }
 }
-#endif
+
+#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */
+
diff --git a/src/core/parser/stack/DomainHandler.cpp b/src/core/parser/stack/DomainHandler.cpp
index 6571717..cb12543 100644
--- a/src/core/parser/stack/DomainHandler.cpp
+++ b/src/core/parser/stack/DomainHandler.cpp
@@ -20,25 +20,30 @@
 
 #include <core/common/RttiBuilder.hpp>
 #include <core/model/Domain.hpp>
+#include <core/model/Project.hpp>
 #include <core/parser/ParserScope.hpp>
+#include <core/parser/ParserContext.hpp>
 
 namespace ousia {
+namespace parser_stack {
 
 /* DomainHandler */
 
-void DomainHandler::start(Variant::mapType &args)
+bool DomainHandler::start(Variant::mapType &args)
 {
-	Rooted<Domain> domain = project()->createDomain(args["name"].asString());
+	Rooted<Domain> domain =
+	    context().getProject()->createDomain(args["name"].asString());
 	domain->setLocation(location());
 
 	scope().push(domain);
+	return true;
 }
 
 void DomainHandler::end() { scope().pop(); }
 
 /* DomainStructHandler */
 
-void DomainStructHandler::start(Variant::mapType &args)
+bool DomainStructHandler::start(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -63,12 +68,13 @@ void DomainStructHandler::start(Variant::mapType &args)
 	}
 
 	scope().push(structuredClass);
+	return true;
 }
 
 void DomainStructHandler::end() { scope().pop(); }
 
 /* DomainAnnotationHandler */
-void DomainAnnotationHandler::start(Variant::mapType &args)
+bool DomainAnnotationHandler::start(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -79,13 +85,14 @@ void DomainAnnotationHandler::start(Variant::mapType &args)
 	annotationClass->setLocation(location());
 
 	scope().push(annotationClass);
+	return true;
 }
 
 void DomainAnnotationHandler::end() { scope().pop(); }
 
 /* DomainAttributesHandler */
 
-void DomainAttributesHandler::start(Variant::mapType &args)
+bool DomainAttributesHandler::start(Variant::mapType &args)
 {
 	// Fetch the current typesystem and create the struct node
 	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
@@ -94,13 +101,14 @@ void DomainAttributesHandler::start(Variant::mapType &args)
 	attrDesc->setLocation(location());
 
 	scope().push(attrDesc);
+	return true;
 }
 
 void DomainAttributesHandler::end() { scope().pop(); }
 
 /* DomainFieldHandler */
 
-void DomainFieldHandler::start(Variant::mapType &args)
+bool DomainFieldHandler::start(Variant::mapType &args)
 {
 	FieldDescriptor::FieldType type;
 	if (args["isSubtree"].asBool()) {
@@ -116,13 +124,14 @@ void DomainFieldHandler::start(Variant::mapType &args)
 	field->setLocation(location());
 
 	scope().push(field);
+	return true;
 }
 
 void DomainFieldHandler::end() { scope().pop(); }
 
 /* DomainFieldRefHandler */
 
-void DomainFieldRefHandler::start(Variant::mapType &args)
+bool DomainFieldRefHandler::start(Variant::mapType &args)
 {
 	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
 
@@ -135,13 +144,14 @@ void DomainFieldRefHandler::start(Variant::mapType &args)
 			        field.cast<FieldDescriptor>(), logger);
 		    }
 		});
+	return true;
 }
 
 void DomainFieldRefHandler::end() {}
 
 /* DomainPrimitiveHandler */
 
-void DomainPrimitiveHandler::start(Variant::mapType &args)
+bool DomainPrimitiveHandler::start(Variant::mapType &args)
 {
 	Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
 
@@ -167,13 +177,14 @@ void DomainPrimitiveHandler::start(Variant::mapType &args)
 	});
 
 	scope().push(field);
+	return true;
 }
 
 void DomainPrimitiveHandler::end() { scope().pop(); }
 
 /* DomainChildHandler */
 
-void DomainChildHandler::start(Variant::mapType &args)
+bool DomainChildHandler::start(Variant::mapType &args)
 {
 	Rooted<FieldDescriptor> field = scope().selectOrThrow<FieldDescriptor>();
 
@@ -186,13 +197,12 @@ void DomainChildHandler::start(Variant::mapType &args)
 			        child.cast<StructuredClass>());
 		    }
 		});
+	return true;
 }
 
-void DomainChildHandler::end() {}
-
 /* DomainParentHandler */
 
-void DomainParentHandler::start(Variant::mapType &args)
+bool DomainParentHandler::start(Variant::mapType &args)
 {
 	Rooted<StructuredClass> strct = scope().selectOrThrow<StructuredClass>();
 
@@ -200,12 +210,14 @@ void DomainParentHandler::start(Variant::mapType &args)
 	    new DomainParent(strct->getManager(), args["ref"].asString(), strct)};
 	parent->setLocation(location());
 	scope().push(parent);
+	return true;
 }
 
 void DomainParentHandler::end() { scope().pop(); }
 
 /* DomainParentFieldHandler */
-void DomainParentFieldHandler::start(Variant::mapType &args)
+
+bool DomainParentFieldHandler::start(Variant::mapType &args)
 {
 	Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>();
 	FieldDescriptor::FieldType type;
@@ -233,13 +245,12 @@ void DomainParentFieldHandler::start(Variant::mapType &args)
 			    field->addChild(strct.cast<StructuredClass>());
 		    }
 		});
+	return true;
 }
 
-void DomainParentFieldHandler::end() {}
-
 /* DomainParentFieldRefHandler */
 
-void DomainParentFieldRefHandler::start(Variant::mapType &args)
+bool DomainParentFieldRefHandler::start(Variant::mapType &args)
 {
 	Rooted<DomainParent> parentNameNode = scope().selectOrThrow<DomainParent>();
 
@@ -265,12 +276,12 @@ void DomainParentFieldRefHandler::start(Variant::mapType &args)
 			    field->addChild(strct.cast<StructuredClass>());
 		    }
 		});
+	return true;
+}
 }
-
-void DomainParentFieldRefHandler::end() {}
 
 namespace RttiTypes {
-const Rtti DomainParent =
-    RttiBuilder<ousia::DomainParent>("DomainParent").parent(&Node);
+const Rtti DomainParent = RttiBuilder<ousia::parser_stack::DomainParent>(
+                              "DomainParent").parent(&Node);
 }
 }
diff --git a/src/core/parser/stack/DomainHandler.hpp b/src/core/parser/stack/DomainHandler.hpp
index 5e8ea60..917d65d 100644
--- a/src/core/parser/stack/DomainHandler.hpp
+++ b/src/core/parser/stack/DomainHandler.hpp
@@ -19,17 +19,24 @@
 /**
  * @file DomainHandler.hpp
  *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ * Contains the Handler classes used for parsing Domain descriptors. This
+ * includes the "domain" tag and all describing tags below the "domain" tag.
+ *
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
  */
 
 #ifndef _OUSIA_DOMAIN_HANDLER_HPP_
 #define _OUSIA_DOMAIN_HANDLER_HPP_
 
 #include <core/common/Variant.hpp>
+#include <core/model/Node.hpp>
 
 #include "Handler.hpp"
 
 namespace ousia {
+namespace parser_stack {
+
+// TODO: Documentation
 
 // Forward declarations
 class Rtti;
@@ -39,7 +46,6 @@ public:
 	using StaticHandler::StaticHandler;
 
 	bool start(Variant::mapType &args) override;
-
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -53,7 +59,6 @@ public:
 	using StaticHandler::StaticHandler;
 
 	bool start(Variant::mapType &args) override;
-
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -67,7 +72,6 @@ public:
 	using StaticHandler::StaticHandler;
 
 	bool start(Variant::mapType &args) override;
-
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -81,7 +85,6 @@ public:
 	using StaticHandler::StaticHandler;
 
 	bool start(Variant::mapType &args) override;
-
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -95,7 +98,6 @@ public:
 	using StaticHandler::StaticHandler;
 
 	bool start(Variant::mapType &args) override;
-
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -109,7 +111,6 @@ public:
 	using StaticHandler::StaticHandler;
 
 	bool start(Variant::mapType &args) override;
-
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -123,7 +124,6 @@ public:
 	using StaticHandler::StaticHandler;
 
 	bool start(Variant::mapType &args) override;
-
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -138,8 +138,6 @@ public:
 
 	bool start(Variant::mapType &args) override;
 
-	void end() override;
-
 	static Handler *create(const HandlerData &handlerData)
 	{
 		return new DomainChildHandler{handlerData};
@@ -160,7 +158,6 @@ public:
 	using StaticHandler::StaticHandler;
 
 	bool start(Variant::mapType &args) override;
-
 	void end() override;
 
 	static Handler *create(const HandlerData &handlerData)
@@ -175,8 +172,6 @@ public:
 
 	bool start(Variant::mapType &args) override;
 
-	void end() override;
-
 	static Handler *create(const HandlerData &handlerData)
 	{
 		return new DomainParentFieldHandler{handlerData};
@@ -189,12 +184,15 @@ public:
 
 	bool start(Variant::mapType &args) override;
 
-	void end() override;
-
 	static Handler *create(const HandlerData &handlerData)
 	{
 		return new DomainParentFieldRefHandler{handlerData};
 	}
 };
 }
+
+namespace RttiTypes {
+extern const Rtti DomainParent;
+}
+}
 #endif
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
index a608f7f..86000c4 100644
--- a/src/core/parser/stack/Handler.cpp
+++ b/src/core/parser/stack/Handler.cpp
@@ -65,6 +65,8 @@ Logger &Handler::logger()
 
 const SourceLocation &Handler::location() const { return handlerData.location; }
 
+const std::string &Handler::name() const { return handlerData.name; }
+
 void Handler::setWhitespaceMode(WhitespaceMode whitespaceMode)
 {
 	/*handlerData.callbacks.setWhitespaceMode(whitespaceMode);*/
@@ -80,7 +82,7 @@ void Handler::unregisterToken(const std::string &token)
 	/*handlerData.callbacks.unregisterToken(token);*/
 }
 
-const std::string &Handler::getName() const { return handlerData.name; }
+const std::string &Handler::getName() const { return name(); }
 
 const State &Handler::getState() const { return handlerData.state; }
 
@@ -92,7 +94,7 @@ const SourceLocation &Handler::getLocation() const { return location(); }
 
 /* Class EmptyHandler */
 
-bool EmptyHandler::start(const Variant::mapType &args)
+bool EmptyHandler::start(Variant::mapType &args)
 {
 	// Just accept anything
 	return true;
@@ -115,7 +117,7 @@ void EmptyHandler::fieldEnd()
 }
 
 bool EmptyHandler::annotationStart(const Variant &className,
-                                   const Variant::mapType &args)
+                                   Variant::mapType &args)
 {
 	// Accept any data
 	return true;
@@ -128,7 +130,7 @@ bool EmptyHandler::annotationEnd(const Variant &className,
 	return true;
 }
 
-bool EmptyHandler::data(const Variant &data)
+bool EmptyHandler::data(Variant &data)
 {
 	// Support any data
 	return true;
@@ -141,7 +143,7 @@ Handler *EmptyHandler::create(const HandlerData &handlerData)
 
 /* Class StaticHandler */
 
-bool StaticHandler::start(const Variant::mapType &args)
+bool StaticHandler::start(Variant::mapType &args)
 {
 	// Do nothing in the default implementation, accept anything
 	return true;
@@ -169,7 +171,7 @@ void StaticHandler::fieldEnd()
 }
 
 bool StaticHandler::annotationStart(const Variant &className,
-                                    const Variant::mapType &args)
+                                    Variant::mapType &args)
 {
 	// No annotations supported
 	return false;
@@ -182,7 +184,7 @@ bool StaticHandler::annotationEnd(const Variant &className,
 	return false;
 }
 
-bool StaticHandler::data(const Variant &data)
+bool StaticHandler::data(Variant &data)
 {
 	logger().error("Did not expect any data here", data);
 	return false;
@@ -196,7 +198,7 @@ StaticFieldHandler::StaticFieldHandler(const HandlerData &handlerData,
 {
 }
 
-bool StaticFieldHandler::start(const Variant::mapType &args)
+bool StaticFieldHandler::start(Variant::mapType &args)
 {
 	if (!argName.empty()) {
 		auto it = args.find(argName);
@@ -225,7 +227,7 @@ void StaticFieldHandler::end()
 	}
 }
 
-bool StaticFieldHandler::data(const Variant &data)
+bool StaticFieldHandler::data(Variant &data)
 {
 	// Call the doHandle function if this has not been done before
 	if (!handled) {
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index eeaf555..7cda7a4 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -151,6 +151,13 @@ protected:
 	 */
 	const SourceLocation &location() const;
 
+	/**
+	 * Returns the command name for which the handler was created.
+	 *
+	 * @return a const reference at the command name.
+	 */
+	const std::string &name() const;
+
 public:
 	/**
 	 * Virtual destructor.
@@ -229,7 +236,7 @@ public:
 	 * @return true if the handler was successful in starting the element it
 	 * represents, false otherwise.
 	 */
-	virtual bool start(const Variant::mapType &args) = 0;
+	virtual bool start(Variant::mapType &args) = 0;
 
 	/**
 	 * Called before the command for which this handler is defined ends (is
@@ -270,7 +277,7 @@ public:
 	 * if an error occurred.
 	 */
 	virtual bool annotationStart(const Variant &className,
-	                             const Variant::mapType &args) = 0;
+	                             Variant::mapType &args) = 0;
 
 	/**
 	 * Called whenever an annotation ends while this handler is active. The
@@ -296,7 +303,7 @@ public:
 	 * location.
 	 * @return true if the data could be handled, false otherwise.
 	 */
-	virtual bool data(const Variant &data) = 0;
+	virtual bool data(Variant &data) = 0;
 };
 
 /**
@@ -318,15 +325,15 @@ protected:
 	using Handler::Handler;
 
 public:
-	bool start(const Variant::mapType &args) override;
+	bool start(Variant::mapType &args) override;
 	void end() override;
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
 	void fieldEnd() override;
 	bool annotationStart(const Variant &className,
-	                     const Variant::mapType &args) override;
+	                     Variant::mapType &args) override;
 	bool annotationEnd(const Variant &className,
 	                   const Variant &elementName) override;
-	bool data(const Variant &data) override;
+	bool data(Variant &data) override;
 
 	/**
 	 * Creates an instance of the EmptyHandler class.
@@ -344,15 +351,15 @@ protected:
 	using Handler::Handler;
 
 public:
-	bool start(const Variant::mapType &args) override;
+	bool start(Variant::mapType &args) override;
 	void end() override;
 	bool fieldStart(bool &isDefault, size_t fieldIdx) override;
 	void fieldEnd() override;
 	bool annotationStart(const Variant &className,
-	                     const Variant::mapType &args) override;
+	                     Variant::mapType &args) override;
 	bool annotationEnd(const Variant &className,
 	                   const Variant &elementName) override;
-	bool data(const Variant &data) override;
+	bool data(Variant &data) override;
 };
 
 /**
@@ -400,12 +407,12 @@ protected:
 	 * @param args are the arguments that were given in the "start" function.
 	 */
 	virtual void doHandle(const Variant &fieldData,
-	                      const Variant::mapType &args) = 0;
+	                      Variant::mapType &args) = 0;
 
 public:
-	bool start(const Variant::mapType &args) override;
+	bool start(Variant::mapType &args) override;
 	void end() override;
-	bool data(const Variant &data) override;
+	bool data(Variant &data) override;
 };
 }
 }
diff --git a/src/core/parser/stack/ImportIncludeHandler.cpp b/src/core/parser/stack/ImportIncludeHandler.cpp
index 94ee82d..797dd8d 100644
--- a/src/core/parser/stack/ImportIncludeHandler.cpp
+++ b/src/core/parser/stack/ImportIncludeHandler.cpp
@@ -18,48 +18,16 @@
 
 #include "ImportIncludeHandler.hpp"
 
+#include <core/model/RootNode.hpp>
 #include <core/parser/ParserScope.hpp>
+#include <core/parser/ParserContext.hpp>
 
 namespace ousia {
-
-/* ImportIncludeHandler */
-
-void ImportIncludeHandler::start(Variant::mapType &args)
-{
-	rel = args["rel"].asString();
-	type = args["type"].asString();
-	src = args["src"].asString();
-	srcInArgs = !src.empty();
-}
-
-void ImportIncludeHandler::data(const std::string &data, int field)
-{
-	if (srcInArgs) {
-		logger().error("\"src\" attribute has already been set");
-		return;
-	}
-	if (field != 0) {
-		logger().error("Command has only one field.");
-		return;
-	}
-	src.append(data);
-}
+namespace parser_stack {
 
 /* ImportHandler */
 
-void ImportHandler::start(Variant::mapType &args)
-{
-	ImportIncludeHandler::start(args);
-
-	// Make sure imports are still possible
-	if (scope().getFlag(ParserFlag::POST_HEAD)) {
-		logger().error("Imports must be listed before other commands.",
-		               location());
-		return;
-	}
-}
-
-void ImportHandler::end()
+void ImportHandler::doHandle(const Variant &fieldData, Variant::mapType &args)
 {
 	// Fetch the last node and check whether an import is valid at this
 	// position
@@ -75,8 +43,9 @@ void ImportHandler::end()
 
 	// Perform the actual import, register the imported node within the leaf
 	// node
-	Rooted<Node> imported =
-	    context().import(src, type, rel, leafRootNode->getReferenceTypes());
+	Rooted<Node> imported = context().import(
+	    fieldData.asString(), args["type"].asString(), args["rel"].asString(),
+	    leafRootNode->getReferenceTypes());
 	if (imported != nullptr) {
 		leafRootNode->reference(imported);
 	}
@@ -84,13 +53,10 @@ void ImportHandler::end()
 
 /* IncludeHandler */
 
-void IncludeHandler::start(Variant::mapType &args)
+void IncludeHandler::doHandle(const Variant &fieldData, Variant::mapType &args)
 {
-	ImportIncludeHandler::start(args);
+	context().include(fieldData.asString(), args["type"].asString(),
+	                  args["rel"].asString(), {&RttiTypes::Node});
 }
-
-void IncludeHandler::end()
-{
-	context().include(src, type, rel, {&RttiTypes::Node});
 }
 }
diff --git a/src/core/parser/stack/ImportIncludeHandler.hpp b/src/core/parser/stack/ImportIncludeHandler.hpp
index f9abe55..8f3d3d0 100644
--- a/src/core/parser/stack/ImportIncludeHandler.hpp
+++ b/src/core/parser/stack/ImportIncludeHandler.hpp
@@ -29,9 +29,11 @@
 #define _OUSIA_IMPORT_INCLUDE_HANDLER_HPP_
 
 #include <core/common/Variant.hpp>
-#include <core/parser/ParserStack.hpp>
+
+#include "Handler.hpp"
 
 namespace ousia {
+namespace parser_stack {
 
 /**
  * The ImportHandler is responsible for handling the "import" command. An import
@@ -46,7 +48,7 @@ public:
 	using StaticFieldHandler::StaticFieldHandler;
 
 	void doHandle(const Variant &fieldData,
-	                      const Variant::mapType &args) override;
+	              Variant::mapType &args) override;
 
 	/**
 	 * Creates a new instance of the ImportHandler.
@@ -57,7 +59,7 @@ public:
 	 */
 	static Handler *create(const HandlerData &handlerData)
 	{
-		return new ImportHandler{handlerData};
+		return new ImportHandler{handlerData, "src"};
 	}
 };
 
@@ -72,7 +74,7 @@ public:
 	using StaticFieldHandler::StaticFieldHandler;
 
 	void doHandle(const Variant &fieldData,
-	                      const Variant::mapType &args) override;
+	              Variant::mapType &args) override;
 
 	/**
 	 * Creates a new instance of the IncludeHandler.
@@ -83,8 +85,9 @@ public:
 	 */
 	static Handler *create(const HandlerData &handlerData)
 	{
-		return new IncludeHandler{handlerData};
+		return new IncludeHandler{handlerData, "src"};
 	}
 };
 }
+}
 #endif
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index d84a19c..47f7d2c 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -316,8 +316,6 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 		                        name);
 	}
 
-	State const *lastTargetState = nullptr;
-	Variant::mapType canonicalArgs;
 	while (true) {
 		// Try to find a target state for the given command, if none can be
 		// found and the current command does not have an open field, then try
@@ -342,14 +340,6 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 		// Fork the logger. We do not want any validation errors to skip
 		LoggerFork loggerFork = logger().fork();
 
-		// Canonicalize the arguments (if this has not already been done), allow
-		// additional arguments
-		if (lastTargetState != targetState) {
-			canonicalArgs = args;
-			targetState->arguments.validateMap(canonicalArgs, loggerFork, true);
-			lastTargetState = targetState;
-		}
-
 		// Instantiate the handler and push it onto the stack
 		HandlerConstructor ctor = targetState->elementHandler
 		                              ? targetState->elementHandler
@@ -369,6 +359,11 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
 		bool validStack = handlersValid();
 		info.valid = false;
 		if (validStack) {
+			// Canonicalize the arguments (if this has not already been done),
+			// allow additional arguments
+			Variant::mapType canonicalArgs = args;
+			targetState->arguments.validateMap(canonicalArgs, loggerFork, true);
+
 			handler->setLogger(loggerFork);
 			try {
 				info.valid = handler->start(canonicalArgs);
@@ -430,7 +425,8 @@ void Stack::data(const Variant &data)
 			// Pass the data to the current Handler instance
 			bool valid = false;
 			try {
-				valid = info.handler->data(data);
+				Variant dataCopy = data;
+				valid = info.handler->data(dataCopy);
 			}
 			catch (LoggableException ex) {
 				loggerFork.log(ex);
diff --git a/src/core/parser/stack/TypesystemHandler.cpp b/src/core/parser/stack/TypesystemHandler.cpp
index 2cc7dfb..34f64f9 100644
--- a/src/core/parser/stack/TypesystemHandler.cpp
+++ b/src/core/parser/stack/TypesystemHandler.cpp
@@ -20,28 +20,33 @@
 
 #include <core/model/Typesystem.hpp>
 #include <core/parser/ParserScope.hpp>
+#include <core/parser/ParserContext.hpp>
+
 
 namespace ousia {
+namespace parser_stack {
 
 /* TypesystemHandler */
 
-void TypesystemHandler::start(Variant::mapType &args)
+bool TypesystemHandler::start(Variant::mapType &args)
 {
 	// Create the typesystem instance
 	Rooted<Typesystem> typesystem =
-	    project()->createTypesystem(args["name"].asString());
+	    context().getProject()->createTypesystem(args["name"].asString());
 	typesystem->setLocation(location());
 
 	// Push the typesystem onto the scope, set the POST_HEAD flag to true
 	scope().push(typesystem);
 	scope().setFlag(ParserFlag::POST_HEAD, false);
+
+	return true;
 }
 
 void TypesystemHandler::end() { scope().pop(); }
 
 /* TypesystemEnumHandler */
 
-void TypesystemEnumHandler::start(Variant::mapType &args)
+bool TypesystemEnumHandler::start(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -52,33 +57,24 @@ void TypesystemEnumHandler::start(Variant::mapType &args)
 	enumType->setLocation(location());
 
 	scope().push(enumType);
+
+	return true;
 }
 
 void TypesystemEnumHandler::end() { scope().pop(); }
 
 /* TypesystemEnumEntryHandler */
 
-void TypesystemEnumEntryHandler::start(Variant::mapType &args) {}
-
-void TypesystemEnumEntryHandler::end()
+void TypesystemEnumEntryHandler::doHandle(const Variant &fieldData,
+                                          Variant::mapType &args)
 {
 	Rooted<EnumType> enumType = scope().selectOrThrow<EnumType>();
-	enumType->addEntry(entry, logger());
-}
-
-void TypesystemEnumEntryHandler::data(const std::string &data, int field)
-{
-	if (field != 0) {
-		// TODO: This should be stored in the HandlerData
-		logger().error("Enum entry only has one field.");
-		return;
-	}
-	entry.append(data);
+	enumType->addEntry(fieldData.asString(), logger());
 }
 
 /* TypesystemStructHandler */
 
-void TypesystemStructHandler::start(Variant::mapType &args)
+bool TypesystemStructHandler::start(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -103,13 +99,15 @@ void TypesystemStructHandler::start(Variant::mapType &args)
 			});
 	}
 	scope().push(structType);
+
+	return true;
 }
 
 void TypesystemStructHandler::end() { scope().pop(); }
 
 /* TypesystemStructFieldHandler */
 
-void TypesystemStructFieldHandler::start(Variant::mapType &args)
+bool TypesystemStructFieldHandler::start(Variant::mapType &args)
 {
 	// Read the argument values
 	const std::string &name = args["name"].asString();
@@ -142,13 +140,13 @@ void TypesystemStructFieldHandler::start(Variant::mapType &args)
 			}
 		});
 	}
-}
 
-void TypesystemStructFieldHandler::end() {}
+	return true;
+}
 
 /* TypesystemConstantHandler */
 
-void TypesystemConstantHandler::start(Variant::mapType &args)
+bool TypesystemConstantHandler::start(Variant::mapType &args)
 {
 	scope().setFlag(ParserFlag::POST_HEAD, true);
 
@@ -169,7 +167,9 @@ void TypesystemConstantHandler::start(Variant::mapType &args)
 			    constant.cast<Constant>()->setType(type.cast<Type>(), logger);
 		    }
 		});
-}
 
-void TypesystemConstantHandler::end() {}
+	return true;
 }
+}
+}
+
diff --git a/src/core/parser/stack/TypesystemHandler.hpp b/src/core/parser/stack/TypesystemHandler.hpp
index 76a7bc9..55277a1 100644
--- a/src/core/parser/stack/TypesystemHandler.hpp
+++ b/src/core/parser/stack/TypesystemHandler.hpp
@@ -19,6 +19,9 @@
 /**
  * @file TypesystemHandler.hpp
  *
+ * Contains the Handler classes used to parse Typesystem descriptions. The
+ * Handlers parse all the tags found below and including the "typesystem" tag.
+ *
  * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
  */
 
@@ -26,96 +29,154 @@
 #define _OUSIA_TYPESYSTEM_HANDLER_HPP_
 
 #include <core/common/Variant.hpp>
-#include <core/parser/ParserStack.hpp>
+
+#include "Handler.hpp"
 
 namespace ousia {
+namespace parser_stack {
 
-class TypesystemHandler : public Handler {
+/**
+ * Handles the occurance of the "typesystem" tag. Creates a new Typesystem
+ * instance and places it on the ParserScope.
+ */
+class TypesystemHandler : public StaticHandler {
 public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override;
+	using StaticHandler::StaticHandler;
 
+	bool start(Variant::mapType &args) override;
 	void end() override;
 
+	/**
+	 * Creates a new instance of the TypesystemHandler.
+	 *
+	 * @param handlerData is the data that is passed to the constructor of the
+	 * Handler base class and used there to e.g. access the ParserContext and
+	 * the Callbacks instance.
+	 */
 	static Handler *create(const HandlerData &handlerData)
 	{
 		return new TypesystemHandler{handlerData};
 	}
 };
 
-class TypesystemEnumHandler : public Handler {
+/**
+ * Handles the occurance of the "enum" tag. Creates a new EnumType instance and
+ * places it on the ParserScope.
+ */
+class TypesystemEnumHandler : public StaticHandler {
 public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override;
+	using StaticHandler::StaticHandler;
 
+	bool start(Variant::mapType &args) override;
 	void end() override;
 
+	/**
+	 * Creates a new instance of the TypesystemEnumHandler.
+	 *
+	 * @param handlerData is the data that is passed to the constructor of the
+	 * Handler base class and used there to e.g. access the ParserContext and
+	 * the Callbacks instance.
+	 */
 	static Handler *create(const HandlerData &handlerData)
 	{
 		return new TypesystemEnumHandler{handlerData};
 	}
 };
 
-class TypesystemEnumEntryHandler : public Handler {
+/**
+ * Handles the occurance of the "entry" tag within an "enum" tag. Creates a new
+ * EnumType instance and places it on the ParserScope.
+ */
+class TypesystemEnumEntryHandler : public StaticFieldHandler {
 public:
-	using Handler::Handler;
-
-	std::string entry;
-
-	void start(Variant::mapType &args) override;
-
-	void end() override;
-
-	void data(const std::string &data, int field) override;
-
+	using StaticFieldHandler::StaticFieldHandler;
+
+	void doHandle(const Variant &fieldData,
+	              Variant::mapType &args) override;
+
+	/**
+	 * Creates a new instance of the TypesystemEnumEntryHandler.
+	 *
+	 * @param handlerData is the data that is passed to the constructor of the
+	 * Handler base class and used there to e.g. access the ParserContext and
+	 * the Callbacks instance.
+	 */
 	static Handler *create(const HandlerData &handlerData)
 	{
-		return new TypesystemEnumEntryHandler{handlerData};
+		return new TypesystemEnumEntryHandler{handlerData, "name"};
 	}
 };
 
-class TypesystemStructHandler : public Handler {
+/**
+ * Handles the occurance of the "struct" tag within a typesystem description.
+ * Creates a new StructType instance and places it on the ParserScope.
+ */
+class TypesystemStructHandler : public StaticHandler {
 public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override;
+	using StaticHandler::StaticHandler;
 
+	bool start(Variant::mapType &args) override;
 	void end() override;
 
+	/**
+	 * Creates a new instance of the TypesystemStructHandler.
+	 *
+	 * @param handlerData is the data that is passed to the constructor of the
+	 * Handler base class and used there to e.g. access the ParserContext and
+	 * the Callbacks instance.
+	 */
 	static Handler *create(const HandlerData &handlerData)
 	{
 		return new TypesystemStructHandler{handlerData};
 	}
 };
 
-class TypesystemStructFieldHandler : public Handler {
+/**
+ * Handles the occurance of the "field" tag within a typesystem structure
+ * description. Places a new Attribute instance in the StructType instance
+ * that is currently at the top of the scope.
+ */
+class TypesystemStructFieldHandler : public StaticHandler {
 public:
-	using Handler::Handler;
+	using StaticHandler::StaticHandler;
 
-	void start(Variant::mapType &args) override;
-
-	void end() override;
+	bool start(Variant::mapType &args) override;
 
+	/**
+	 * Creates a new instance of the TypesystemStructFieldHandler.
+	 *
+	 * @param handlerData is the data that is passed to the constructor of the
+	 * Handler base class and used there to e.g. access the ParserContext and
+	 * the Callbacks instance.
+	 */
 	static Handler *create(const HandlerData &handlerData)
 	{
 		return new TypesystemStructFieldHandler{handlerData};
 	}
 };
 
-class TypesystemConstantHandler : public Handler {
+/**
+ * Handles the occurance of the "constant" tag within a typesystem structure
+ * description. Places a new Constant instance in the current typesystem.
+ */
+class TypesystemConstantHandler : public StaticHandler {
 public:
-	using Handler::Handler;
+	using StaticHandler::StaticHandler;
 
-	void start(Variant::mapType &args) override;
-
-	void end() override;
+	bool start(Variant::mapType &args) override;
 
+	/**
+	 * Creates a new instance of the TypesystemConstantHandler.
+	 *
+	 * @param handlerData is the data that is passed to the constructor of the
+	 * Handler base class and used there to e.g. access the ParserContext and
+	 * the Callbacks instance.
+	 */
 	static Handler *create(const HandlerData &handlerData)
 	{
 		return new TypesystemConstantHandler{handlerData};
 	}
 };
 }
+}
 #endif
diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp
index 7cc8bc5..321d471 100644
--- a/test/core/parser/stack/StackTest.cpp
+++ b/test/core/parser/stack/StackTest.cpp
@@ -112,16 +112,21 @@ private:
 	TestHandler(const HandlerData &handlerData) : Handler(handlerData) {}
 
 public:
-	bool start(const Variant::mapType &args)
+	bool start(Variant::mapType &args) override
 	{
 		tracker.startCount++;
 		tracker.startArgs = args;
+		if (!tracker.startResult) {
+			logger().error(
+			    "The TestHandler was told not to allow a field start. So it "
+			    "doesn't. The TestHandler always obeys its master.");
+		}
 		return tracker.startResult;
 	}
 
-	void end() { tracker.endCount++; }
+	void end() override { tracker.endCount++; }
 
-	bool fieldStart(bool &isDefault, size_t fieldIdx)
+	bool fieldStart(bool &isDefault, size_t fieldIdx) override
 	{
 		tracker.fieldStartCount++;
 		tracker.fieldStartIsDefault = isDefault;
@@ -132,9 +137,10 @@ public:
 		return tracker.fieldStartResult;
 	}
 
-	void fieldEnd() { tracker.fieldEndCount++; }
+	void fieldEnd() override { tracker.fieldEndCount++; }
 
-	bool annotationStart(const Variant &className, const Variant::mapType &args)
+	bool annotationStart(const Variant &className,
+	                     Variant::mapType &args) override
 	{
 		tracker.annotationStartCount++;
 		tracker.annotationStartClassName = className;
@@ -142,7 +148,8 @@ public:
 		return tracker.annotationStartResult;
 	}
 
-	bool annotationEnd(const Variant &className, const Variant &elementName)
+	bool annotationEnd(const Variant &className,
+	                   const Variant &elementName) override
 	{
 		tracker.annotationEndCount++;
 		tracker.annotationEndClassName = className;
@@ -150,7 +157,7 @@ public:
 		return tracker.annotationEndResult;
 	}
 
-	bool data(const Variant &data)
+	bool data(Variant &data) override
 	{
 		tracker.dataCount++;
 		tracker.dataData = data;
@@ -458,6 +465,26 @@ TEST(Stack, noImplicitDefaultFieldIfDefaultFieldGiven)
 	ASSERT_FALSE(logger.hasError());
 }
 
+TEST(Stack, noEndIfStartFails)
+{
+	tracker.reset();
+	logger.reset();
+	{
+		Stack s{env.context, States::AnyHandlers};
+
+		s.command("a", {});
+		tracker.expect(1, 0, 0, 0, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("a", s.currentCommandName());
+
+		tracker.startResult = false;
+		s.command("b", {});
+		tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+		ASSERT_EQ("b", s.currentCommandName());
+	}
+	tracker.expect(3, 1, 1, 1, 0, 0, 0);  // sc, ec, fsc, fse, asc, aec, dc
+	ASSERT_TRUE(logger.hasError());
+}
+
 TEST(Stack, implicitDefaultFieldOnData)
 {
 	tracker.reset();
-- 
cgit v1.2.3


From b7ffeb3dca889aee1c878e2ef0f07644f910dba2 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 20:58:05 +0100
Subject: Made OsxmlEvents interface consistent with Stack commands

---
 src/formats/osxml/OsxmlEventParser.cpp      |  2 +-
 src/formats/osxml/OsxmlEventParser.hpp      | 24 +++++++++--------
 test/formats/osxml/OsxmlEventParserTest.cpp | 41 +++++++++++++----------------
 3 files changed, 32 insertions(+), 35 deletions(-)

(limited to 'test')

diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
index b4aff77..7404960 100644
--- a/src/formats/osxml/OsxmlEventParser.cpp
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -329,7 +329,7 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 		// Just issue a "commandStart" event in any other case
 		Variant nameVar = Variant::fromString(nameStr);
 		nameVar.setLocation(nameLoc);
-		parser->getEvents().commandStart(nameVar, args);
+		parser->getEvents().command(nameVar, args);
 	}
 }
 
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
index aa20ea9..e39245f 100644
--- a/src/formats/osxml/OsxmlEventParser.hpp
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -58,34 +58,36 @@ public:
 	 *
 	 * @param name is a string variant containing name and location of the
 	 * command.
-	 * @param args is a map variant containing the arguments that were given
-	 * to the command.
+	 * @param args is a map containing the arguments that were given to the
+	 * command.
 	 */
-	virtual void commandStart(Variant name, Variant args) = 0;
+	virtual void command(const Variant &name, const Variant::mapType &args) = 0;
 
 	/**
 	 * Called whenever an annotation starts. Note that this implicitly always
 	 * starts the default field of the annotation.
 	 *
-	 * @param name is a string variant containing the name of the annotation
-	 * class and the location of the annotation definition.
+	 * @param className is a string variant containing the name of the
+	 * annotation class and the location of the annotation definition.
 	 * @param args is a map variant containing the arguments that were given
 	 * to the annotation definition.
 	 */
-	virtual void annotationStart(Variant name, Variant args) = 0;
+	virtual void annotationStart(const Variant &className,
+	                             const Variant::mapType &args) = 0;
 
 	/**
 	 * Called whenever the range of an annotation ends. The callee must
 	 * disambiguate the actual annotation that is finished here.
 	 *
-	 * @param name is a string variant containing the name of the annotation
-	 * class that should end here. May be empty (or nullptr), if no elementName
-	 * has been specified at the end of the annotation.
+	 * @param className is a string variant containing the name of the
+	 * annotation class that should end here. May be empty (or nullptr), if no
+	 * elementName has been specified at the end of the annotation.
 	 * @param elementName is the name of the annotation element that should be
 	 * ended here. May be empty (or nullptr), if no elementName has been
 	 * specified at the end of the annotation.
 	 */
-	virtual void annotationEnd(Variant name, Variant elementName) = 0;
+	virtual void annotationEnd(const Variant &className,
+	                           const Variant &elementName) = 0;
 
 	/**
 	 * Called whenever the default field which was implicitly started by
@@ -105,7 +107,7 @@ public:
 	 * @param data is the already parsed data that should be passed to the
 	 * handler.
 	 */
-	virtual void data(Variant data) = 0;
+	virtual void data(const Variant &data) = 0;
 };
 
 /**
diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp
index 06c800f..3293370 100644
--- a/test/formats/osxml/OsxmlEventParserTest.cpp
+++ b/test/formats/osxml/OsxmlEventParserTest.cpp
@@ -31,7 +31,7 @@ static TerminalLogger logger(std::cerr, true);
 
 namespace {
 enum class OsxmlEvent {
-	COMMAND_START,
+	COMMAND,
 	ANNOTATION_START,
 	ANNOTATION_END,
 	FIELD_END,
@@ -42,22 +42,24 @@ class TestOsxmlEventListener : public OsxmlEvents {
 public:
 	std::vector<std::pair<OsxmlEvent, Variant>> events;
 
-	void commandStart(Variant name, Variant args) override
+	void command(const Variant &name, const Variant::mapType &args) override
 	{
-		events.emplace_back(OsxmlEvent::COMMAND_START,
+		events.emplace_back(OsxmlEvent::COMMAND,
 		                    Variant::arrayType{name, args});
 	}
 
-	void annotationStart(Variant name, Variant args) override
+	void annotationStart(const Variant &className,
+	                     const Variant::mapType &args) override
 	{
 		events.emplace_back(OsxmlEvent::ANNOTATION_START,
-		                    Variant::arrayType{name, args});
+		                    Variant::arrayType{className, args});
 	}
 
-	void annotationEnd(Variant name, Variant elementName) override
+	void annotationEnd(const Variant &className,
+	                   const Variant &elementName) override
 	{
 		events.emplace_back(OsxmlEvent::ANNOTATION_END,
-		                    Variant::arrayType{name, elementName});
+		                    Variant::arrayType{className, elementName});
 	}
 
 	void fieldEnd() override
@@ -65,7 +67,7 @@ public:
 		events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{});
 	}
 
-	void data(Variant data) override
+	void data(const Variant &data) override
 	{
 		events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data});
 	}
@@ -91,7 +93,7 @@ TEST(OsxmlEventParser, simpleCommandWithArgs)
 	//                        0          1            2            3
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
+	    {OsxmlEvent::COMMAND,
 	     Variant::arrayType{
 	         "a", Variant::mapType{
 	                  {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}},
@@ -131,11 +133,9 @@ TEST(OsxmlEventParser, magicTopLevelTag)
 	const char *testString = "<ousia><a/><b/></ousia>";
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{{"a", Variant::mapType{}}}},
+	    {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}},
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{{"b", Variant::mapType{}}}},
+	    {OsxmlEvent::COMMAND, Variant::arrayType{{"b", Variant::mapType{}}}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
 
 	auto events = parseXml(testString);
@@ -147,9 +147,8 @@ TEST(OsxmlEventParser, magicTopLevelTagInside)
 	const char *testString = "<a><ousia/></a>";
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{{"a", Variant::mapType{}}}},
-	    {OsxmlEvent::COMMAND_START,
+	    {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}},
+	    {OsxmlEvent::COMMAND,
 	     Variant::arrayType{{"ousia", Variant::mapType{}}}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
@@ -165,8 +164,7 @@ TEST(OsxmlEventParser, commandWithDataPreserveWhitespace)
 	//                        0         1          2
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
 	    {OsxmlEvent::DATA, Variant::arrayType{"  hello  \n world "}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
 
@@ -185,8 +183,7 @@ TEST(OsxmlEventParser, commandWithDataTrimWhitespace)
 	//                        0         1          2
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
 	    {OsxmlEvent::DATA, Variant::arrayType{"hello  \n world"}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
 
@@ -205,8 +202,7 @@ TEST(OsxmlEventParser, commandWithDataCollapseWhitespace)
 	//                        0         1          2
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
 	    {OsxmlEvent::DATA, Variant::arrayType{"hello world"}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
 
@@ -217,6 +213,5 @@ TEST(OsxmlEventParser, commandWithDataCollapseWhitespace)
 	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart());
 	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd());
 }
-
 }
 
-- 
cgit v1.2.3


From c298f00ef1633a663775fe9a715a249b9f4d255d Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 20:58:26 +0100
Subject: Implemented OsxmlParser

---
 CMakeLists.txt                         |   2 +
 src/formats/osxml/OsxmlParser.cpp      | 288 +++++++++------------------------
 src/formats/osxml/OsxmlParser.hpp      |   2 +-
 test/formats/osxml/OsxmlParserTest.cpp |  28 ++--
 4 files changed, 91 insertions(+), 229 deletions(-)

(limited to 'test')

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2106cf0..ec1bb4d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,6 +203,7 @@ TARGET_LINK_LIBRARIES(ousia_osml
 ADD_LIBRARY(ousia_osxml
 	src/formats/osxml/OsxmlAttributeLocator
 	src/formats/osxml/OsxmlEventParser
+	src/formats/osxml/OsxmlParser
 )
 
 TARGET_LINK_LIBRARIES(ousia_osxml
@@ -351,6 +352,7 @@ IF(TEST)
 
 	ADD_EXECUTABLE(ousia_test_osxml
 		test/formats/osxml/OsxmlEventParserTest
+		test/formats/osxml/OsxmlParserTest
 	)
 
 	TARGET_LINK_LIBRARIES(ousia_test_osxml
diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp
index 869c76a..c216855 100644
--- a/src/formats/osxml/OsxmlParser.cpp
+++ b/src/formats/osxml/OsxmlParser.cpp
@@ -16,223 +16,83 @@
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
-#include <map>
-#include <sstream>
-#include <vector>
-
-#include <expat.h>
-
-#include <core/common/CharReader.hpp>
-#include <core/common/Utils.hpp>
-#include <core/common/VariantReader.hpp>
-#include <core/parser/ParserScope.hpp>
-#include <core/parser/ParserStack.hpp>
-#include <core/parser/stack/DocumentHandler.hpp>
-#include <core/parser/stack/DomainHandler.hpp>
-#include <core/parser/stack/ImportIncludeHandler.hpp>
-#include <core/parser/stack/TypesystemHandler.hpp>
-#include <core/model/Document.hpp>
-#include <core/model/Domain.hpp>
-#include <core/model/Typesystem.hpp>
-
-#include "XmlParser.hpp"
+#include <core/parser/stack/GenericParserStates.hpp>
+#include <core/parser/stack/Stack.hpp>
+#include <core/parser/ParserContext.hpp>
+
+#include "OsxmlEventParser.hpp"
+#include "OsxmlParser.hpp"
 
 namespace ousia {
 
-namespace ParserStates {
-/* Document states */
-static const ParserState Document =
-    ParserStateBuilder()
-        .parent(&None)
-        .createdNodeType(&RttiTypes::Document)
-        .elementHandler(DocumentHandler::create)
-        .arguments({Argument::String("name", "")});
-
-static const ParserState DocumentChild =
-    ParserStateBuilder()
-        .parents({&Document, &DocumentChild})
-        .createdNodeTypes({&RttiTypes::StructureNode,
-                           &RttiTypes::AnnotationEntity,
-                           &RttiTypes::DocumentField})
-        .elementHandler(DocumentChildHandler::create);
-
-/* Domain states */
-static const ParserState Domain = ParserStateBuilder()
-                                      .parents({&None, &Document})
-                                      .createdNodeType(&RttiTypes::Domain)
-                                      .elementHandler(DomainHandler::create)
-                                      .arguments({Argument::String("name")});
-
-static const ParserState DomainStruct =
-    ParserStateBuilder()
-        .parent(&Domain)
-        .createdNodeType(&RttiTypes::StructuredClass)
-        .elementHandler(DomainStructHandler::create)
-        .arguments({Argument::String("name"),
-                    Argument::Cardinality("cardinality", Cardinality::any()),
-                    Argument::Bool("isRoot", false),
-                    Argument::Bool("transparent", false),
-                    Argument::String("isa", "")});
-
-static const ParserState DomainAnnotation =
-    ParserStateBuilder()
-        .parent(&Domain)
-        .createdNodeType(&RttiTypes::AnnotationClass)
-        .elementHandler(DomainAnnotationHandler::create)
-        .arguments({Argument::String("name")});
-
-static const ParserState DomainAttributes =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::StructType)
-        .elementHandler(DomainAttributesHandler::create)
-        .arguments({});
-
-static const ParserState DomainAttribute =
-    ParserStateBuilder()
-        .parent(&DomainAttributes)
-        .elementHandler(TypesystemStructFieldHandler::create)
-        .arguments({Argument::String("name"), Argument::String("type"),
-                    Argument::Any("default", Variant::fromObject(nullptr))});
-
-static const ParserState DomainField =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainFieldHandler::create)
-        .arguments({Argument::String("name", ""),
-                    Argument::Bool("isSubtree", false),
-                    Argument::Bool("optional", false)});
-
-static const ParserState DomainFieldRef =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainFieldRefHandler::create)
-        .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)});
-
-static const ParserState DomainStructPrimitive =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainPrimitiveHandler::create)
-        .arguments(
-            {Argument::String("name", ""), Argument::Bool("isSubtree", false),
-             Argument::Bool("optional", false), Argument::String("type")});
-
-static const ParserState DomainStructChild =
-    ParserStateBuilder()
-        .parent(&DomainField)
-        .elementHandler(DomainChildHandler::create)
-        .arguments({Argument::String("ref")});
-
-static const ParserState DomainStructParent =
-    ParserStateBuilder()
-        .parent(&DomainStruct)
-        .createdNodeType(&RttiTypes::DomainParent)
-        .elementHandler(DomainParentHandler::create)
-        .arguments({Argument::String("ref")});
-
-static const ParserState DomainStructParentField =
-    ParserStateBuilder()
-        .parent(&DomainStructParent)
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainParentFieldHandler::create)
-        .arguments({Argument::String("name", ""),
-                    Argument::Bool("isSubtree", false),
-                    Argument::Bool("optional", false)});
-
-static const ParserState DomainStructParentFieldRef =
-    ParserStateBuilder()
-        .parent(&DomainStructParent)
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainParentFieldRefHandler::create)
-        .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)});
-
-/* Typesystem states */
-static const ParserState Typesystem =
-    ParserStateBuilder()
-        .parents({&None, &Domain})
-        .createdNodeType(&RttiTypes::Typesystem)
-        .elementHandler(TypesystemHandler::create)
-        .arguments({Argument::String("name", "")});
-
-static const ParserState TypesystemEnum =
-    ParserStateBuilder()
-        .parent(&Typesystem)
-        .createdNodeType(&RttiTypes::EnumType)
-        .elementHandler(TypesystemEnumHandler::create)
-        .arguments({Argument::String("name")});
-
-static const ParserState TypesystemEnumEntry =
-    ParserStateBuilder()
-        .parent(&TypesystemEnum)
-        .elementHandler(TypesystemEnumEntryHandler::create)
-        .arguments({});
-
-static const ParserState TypesystemStruct =
-    ParserStateBuilder()
-        .parent(&Typesystem)
-        .createdNodeType(&RttiTypes::StructType)
-        .elementHandler(TypesystemStructHandler::create)
-        .arguments({Argument::String("name"), Argument::String("parent", "")});
-
-static const ParserState TypesystemStructField =
-    ParserStateBuilder()
-        .parent(&TypesystemStruct)
-        .elementHandler(TypesystemStructFieldHandler::create)
-        .arguments({Argument::String("name"), Argument::String("type"),
-                    Argument::Any("default", Variant::fromObject(nullptr))});
-
-static const ParserState TypesystemConstant =
-    ParserStateBuilder()
-        .parent(&Typesystem)
-        .createdNodeType(&RttiTypes::Constant)
-        .elementHandler(TypesystemConstantHandler::create)
-        .arguments({Argument::String("name"), Argument::String("type"),
-                    Argument::Any("value")});
-
-/* Special states for import and include */
-static const ParserState Import =
-    ParserStateBuilder()
-        .parents({&Document, &Typesystem, &Domain})
-        .elementHandler(ImportHandler::create)
-        .arguments({Argument::String("rel", ""), Argument::String("type", ""),
-                    Argument::String("src", "")});
-
-static const ParserState Include =
-    ParserStateBuilder()
-        .parent(&All)
-        .elementHandler(IncludeHandler::create)
-        .arguments({Argument::String("rel", ""), Argument::String("type", ""),
-                    Argument::String("src", "")});
-
-static const std::multimap<std::string, const ParserState *> XmlStates{
-    {"document", &Document},
-    {"*", &DocumentChild},
-    {"domain", &Domain},
-    {"struct", &DomainStruct},
-    {"annotation", &DomainAnnotation},
-    {"attributes", &DomainAttributes},
-    {"attribute", &DomainAttribute},
-    {"field", &DomainField},
-    {"fieldRef", &DomainFieldRef},
-    {"primitive", &DomainStructPrimitive},
-    {"childRef", &DomainStructChild},
-    {"parentRef", &DomainStructParent},
-    {"field", &DomainStructParentField},
-    {"fieldRef", &DomainStructParentFieldRef},
-    {"typesystem", &Typesystem},
-    {"enum", &TypesystemEnum},
-    {"entry", &TypesystemEnumEntry},
-    {"struct", &TypesystemStruct},
-    {"field", &TypesystemStructField},
-    {"constant", &TypesystemConstant},
-    {"import", &Import},
-    {"include", &Include}};
+using namespace parser_stack;
+
+/**
+ * Class containing the actual OsxmlParser implementation.
+ */
+class OsxmlParserImplementation : public OsxmlEvents {
+private:
+	/**
+	 * Actual xml parser -- converts the xml stream into a set of events.
+	 */
+	OsxmlEventParser parser;
+
+	/**
+	 * Pushdown automaton responsible for converting the xml events into an
+	 * actual Node tree.
+	 */
+	Stack stack;
+
+public:
+	/**
+	 * Constructor of the OsxmlParserImplementation class.
+	 *
+	 * @param reader is a reference to the CharReader instance from which the
+	 * XML should be read.
+	 * @param ctx is a reference to the ParserContext instance that should be
+	 * used.
+	 */
+	OsxmlParserImplementation(CharReader &reader, ParserContext &ctx)
+	    : parser(reader, *this, ctx.getLogger()),
+	      stack(ctx, GenericParserStates)
+	{
+	}
+
+	/**
+	 * Starts the actual parsing process.
+	 */
+	void parse() { parser.parse(); }
+
+	void command(const Variant &name, const Variant::mapType &args) override
+	{
+		stack.command(name, args);
+		stack.fieldStart(true);
+	}
+
+	void annotationStart(const Variant &name,
+	                     const Variant::mapType &args) override
+	{
+		stack.annotationStart(name, args);
+		stack.fieldStart(true);
+	}
+
+	void annotationEnd(const Variant &className,
+	                   const Variant &elementName) override
+	{
+		stack.annotationEnd(className, elementName);
+	}
+
+	void fieldEnd() override { stack.fieldEnd(); }
+
+	void data(const Variant &data) override { stack.data(data); }
+};
+
+/* Class OsxmlParser */
+
+void OsxmlParser::doParse(CharReader &reader, ParserContext &ctx)
+{
+	OsxmlParserImplementation impl(reader, ctx);
+	impl.parse();
 }
-
-
 }
 
diff --git a/src/formats/osxml/OsxmlParser.hpp b/src/formats/osxml/OsxmlParser.hpp
index 281a49c..0fbf83c 100644
--- a/src/formats/osxml/OsxmlParser.hpp
+++ b/src/formats/osxml/OsxmlParser.hpp
@@ -17,7 +17,7 @@
 */
 
 /**
- * @file XmlParser.hpp
+ * @file OsxmlParser.hpp
  *
  * Contains the parser responsible for reading Ousía XML Documents (extension
  * oxd) and Ousía XML Modules (extension oxm).
diff --git a/test/formats/osxml/OsxmlParserTest.cpp b/test/formats/osxml/OsxmlParserTest.cpp
index 269a3f6..a2bd8b1 100644
--- a/test/formats/osxml/OsxmlParserTest.cpp
+++ b/test/formats/osxml/OsxmlParserTest.cpp
@@ -30,7 +30,7 @@
 #include <core/StandaloneEnvironment.hpp>
 
 #include <plugins/filesystem/FileLocator.hpp>
-#include <formats/osdmx/OsdmxParser.hpp>
+#include <formats/osxml/OsxmlParser.hpp>
 
 namespace ousia {
 
@@ -41,7 +41,7 @@ extern const Rtti Typesystem;
 }
 
 struct XmlStandaloneEnvironment : public StandaloneEnvironment {
-	XmlParser xmlParser;
+	OsxmlParser parser;
 	FileLocator fileLocator;
 
 	XmlStandaloneEnvironment(ConcreteLogger &logger)
@@ -52,21 +52,21 @@ struct XmlStandaloneEnvironment : public StandaloneEnvironment {
 
 		registry.registerDefaultExtensions();
 		registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"},
-		                        {&RttiTypes::Node}, &xmlParser);
+		                        {&RttiTypes::Node}, &parser);
 		registry.registerResourceLocator(&fileLocator);
 	}
 };
 
 static TerminalLogger logger(std::cerr, true);
 
-TEST(XmlParser, mismatchedTag)
+TEST(OsxmlParser, mismatchedTag)
 {
 	XmlStandaloneEnvironment env(logger);
 	env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document});
 	ASSERT_TRUE(logger.hasError());
 }
 
-TEST(XmlParser, generic)
+TEST(OsxmlParser, generic)
 {
 	XmlStandaloneEnvironment env(logger);
 	env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node});
@@ -186,7 +186,7 @@ static void checkFieldDescriptor(
     Handle<Type> primitiveType = nullptr, bool optional = false)
 {
 	auto res = desc->resolve(&RttiTypes::FieldDescriptor, name);
-	ASSERT_EQ(1, res.size());
+	ASSERT_EQ(1U, res.size());
 	checkFieldDescriptor(res[0].node, name, parent, children, type,
 	                     primitiveType, optional);
 }
@@ -201,7 +201,7 @@ static void checkFieldDescriptor(
 	                     optional);
 }
 
-TEST(XmlParser, domainParsing)
+TEST(OsxmlParser, domainParsing)
 {
 	XmlStandaloneEnvironment env(logger);
 	Rooted<Node> book_domain_node =
@@ -339,10 +339,10 @@ static void checkText(Handle<Node> p, Handle<Node> expectedParent,
 {
 	checkStructuredEntity(p, expectedParent, doc, "paragraph");
 	Rooted<StructuredEntity> par = p.cast<StructuredEntity>();
-	ASSERT_EQ(1, par->getField().size());
+	ASSERT_EQ(1U, par->getField().size());
 	checkStructuredEntity(par->getField()[0], par, doc, "text");
 	Rooted<StructuredEntity> text = par->getField()[0].cast<StructuredEntity>();
-	ASSERT_EQ(1, text->getField().size());
+	ASSERT_EQ(1U, text->getField().size());
 
 	Handle<StructureNode> d = text->getField()[0];
 	ASSERT_FALSE(d == nullptr);
@@ -352,7 +352,7 @@ static void checkText(Handle<Node> p, Handle<Node> expectedParent,
 	ASSERT_EQ(expected, prim->getContent());
 }
 
-TEST(XmlParser, documentParsing)
+TEST(OsxmlParser, documentParsing)
 {
 	XmlStandaloneEnvironment env(logger);
 	Rooted<Node> book_document_node =
@@ -364,7 +364,7 @@ TEST(XmlParser, documentParsing)
 	checkStructuredEntity(doc->getRoot(), doc, doc, "book");
 	{
 		Rooted<StructuredEntity> book = doc->getRoot();
-		ASSERT_EQ(2, book->getField().size());
+		ASSERT_EQ(2U, book->getField().size());
 		checkText(book->getField()[0], book, doc,
 		          "This might be some introductory text or a dedication.");
 		checkStructuredEntity(book->getField()[1], book, doc, "chapter",
@@ -372,7 +372,7 @@ TEST(XmlParser, documentParsing)
 		{
 			Rooted<StructuredEntity> chapter =
 			    book->getField()[1].cast<StructuredEntity>();
-			ASSERT_EQ(3, chapter->getField().size());
+			ASSERT_EQ(3U, chapter->getField().size());
 			checkText(chapter->getField()[0], chapter, doc,
 			          "Here we might have an introduction to the chapter.");
 			checkStructuredEntity(chapter->getField()[1], chapter, doc,
@@ -381,7 +381,7 @@ TEST(XmlParser, documentParsing)
 			{
 				Rooted<StructuredEntity> section =
 				    chapter->getField()[1].cast<StructuredEntity>();
-				ASSERT_EQ(1, section->getField().size());
+				ASSERT_EQ(1U, section->getField().size());
 				checkText(section->getField()[0], section, doc,
 				          "Here we might find the actual section content.");
 			}
@@ -391,7 +391,7 @@ TEST(XmlParser, documentParsing)
 			{
 				Rooted<StructuredEntity> section =
 				    chapter->getField()[2].cast<StructuredEntity>();
-				ASSERT_EQ(1, section->getField().size());
+				ASSERT_EQ(1U, section->getField().size());
 				checkText(section->getField()[0], section, doc,
 				          "Here we might find the actual section content.");
 			}
-- 
cgit v1.2.3