From efe60ac3c3a8725ac71329c0bb19fa9d9c58f399 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:42:05 +0100
Subject: Moved specific file format parsers to formats/ folder, moved old
 tokenizer to css code (this is the only place where it is actually used)

---
 src/formats/osdmx/OsdmxParser.cpp | 1435 +++++++++++++++++++++++++++++++++++++
 src/formats/osdmx/OsdmxParser.hpp |   55 ++
 2 files changed, 1490 insertions(+)
 create mode 100644 src/formats/osdmx/OsdmxParser.cpp
 create mode 100644 src/formats/osdmx/OsdmxParser.hpp

(limited to 'src/formats')
diff --git a/src/formats/osdmx/OsdmxParser.cpp b/src/formats/osdmx/OsdmxParser.cpp
new file mode 100644
index 0000000..c46d9de
--- /dev/null
+++ b/src/formats/osdmx/OsdmxParser.cpp
@@ -0,0 +1,1435 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <vector>
+
+#include <expat.h>
+
+#include <core/common/CharReader.hpp>
+#include <core/common/RttiBuilder.hpp>
+#include <core/common/Utils.hpp>
+#include <core/common/VariantReader.hpp>
+#include <core/parser/ParserStack.hpp>
+#include <core/parser/ParserScope.hpp>
+#include <core/model/Document.hpp>
+#include <core/model/Domain.hpp>
+#include <core/model/Project.hpp>
+#include <core/model/RootNode.hpp>
+#include <core/model/Typesystem.hpp>
+
+#include "XmlParser.hpp"
+
+namespace ousia {
+
+/* HeadNode Helper class */
+
+namespace {
+class HeadNode : public Node {
+public:
+	using Node::Node;
+};
+}
+
+namespace RttiTypes {
+static Rtti HeadNode = RttiBuilder<ousia::HeadNode>("HeadNode");
+}
+
+/* Element Handler Classes */
+
+class DocumentHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<Document> document =
+		    project()->createDocument(args["name"].asString());
+		document->setLocation(location());
+		scope().push(document);
+		scope().setFlag(ParserFlag::POST_HEAD, false);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DocumentHandler{handlerData};
+	}
+};
+
+class DocumentField : public Node {
+public:
+	DocumentField(Manager &mgr, std::string name, Handle<Node> parent)
+	    : Node(mgr, name, parent)
+	{
+	}
+};
+
+namespace RttiTypes {
+const Rtti DocumentField =
+    RttiBuilder<ousia::DocumentField>("DocumentField").parent(&Node);
+}
+
+class DocumentChildHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void preamble(Handle<Node> parentNode, std::string &fieldName,
+	              DocumentEntity *&parent, bool &inField)
+	{
+		// check if the parent in the structure tree was an explicit field
+		// reference.
+		inField = parentNode->isa(&RttiTypes::DocumentField);
+		if (inField) {
+			fieldName = parentNode->getName();
+			parentNode = scope().selectOrThrow(
+			    {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity});
+		} else {
+			// if it wasn't an explicit reference, we use the default field.
+			fieldName = DEFAULT_FIELD_NAME;
+		}
+		// reference the parent entity explicitly.
+		parent = nullptr;
+		if (parentNode->isa(&RttiTypes::StructuredEntity)) {
+			parent = static_cast<DocumentEntity *>(
+			    parentNode.cast<StructuredEntity>().get());
+		} else if (parentNode->isa(&RttiTypes::AnnotationEntity)) {
+			parent = static_cast<DocumentEntity *>(
+			    parentNode.cast<AnnotationEntity>().get());
+		}
+	}
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+		Rooted<Node> parentNode = scope().selectOrThrow(
+		    {&RttiTypes::Document, &RttiTypes::StructuredEntity,
+		     &RttiTypes::AnnotationEntity, &RttiTypes::DocumentField});
+
+		std::string fieldName;
+		DocumentEntity *parent;
+		bool inField;
+
+		preamble(parentNode, fieldName, parent, inField);
+
+		// try to find a FieldDescriptor for the given tag if we are not in a
+		// field already.
+		// TODO: Consider fields of transparent classes
+		if (!inField && parent != nullptr &&
+		    parent->getDescriptor()->hasField(name())) {
+			Rooted<DocumentField> field{new DocumentField(
+			    parentNode->getManager(), fieldName, parentNode)};
+			field->setLocation(location());
+			scope().push(field);
+			return;
+		}
+
+		// Otherwise create a new StructuredEntity
+		// TODO: Consider Anchors and AnnotationEntities
+		Rooted<StructuredClass> strct = scope().resolve<StructuredClass>(
+		    Utils::split(name(), ':'), logger());
+		if (strct == nullptr) {
+			// if we could not resolve the name, throw an exception.
+			throw LoggableException(
+			    std::string("\"") + name() + "\" could not be resolved.",
+			    location());
+		}
+
+		std::string name;
+		auto it = args.find("name");
+		if (it != args.end()) {
+			name = it->second.asString();
+			args.erase(it);
+		}
+
+		Rooted<StructuredEntity> entity;
+		if (parentNode->isa(&RttiTypes::Document)) {
+			entity = parentNode.cast<Document>()->createRootStructuredEntity(
+			    strct, args, name);
+		} else {
+			// calculate a path if transparent entities are needed in between.
+			auto path = parent->getDescriptor()->pathTo(strct);
+			if (path.empty()) {
+				throw LoggableException(
+				    std::string("An instance of \"") + strct->getName() +
+				        "\" is not allowed as child of an instance of \"" +
+				        parent->getDescriptor()->getName() + "\"",
+				    location());
+			}
+
+			// create all transparent entities until the last field.
+			for (size_t p = 1; p < path.size() - 1; p = p + 2) {
+				parent = static_cast<DocumentEntity *>(
+				    parent->createChildStructuredEntity(
+				                path[p].cast<StructuredClass>(),
+				                Variant::mapType{}, path[p - 1]->getName(),
+				                "").get());
+			}
+			entity = parent->createChildStructuredEntity(strct, args, fieldName,
+			                                             name);
+		}
+		entity->setLocation(location());
+		scope().push(entity);
+	}
+
+	void end() override { scope().pop(); }
+
+	void data(const std::string &data, int fieldIdx) override
+	{
+		Rooted<Node> parentNode = scope().selectOrThrow(
+		    {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity,
+		     &RttiTypes::DocumentField});
+
+		std::string fieldName;
+		DocumentEntity *parent;
+		bool inField;
+
+		preamble(parentNode, fieldName, parent, inField);
+
+		// retrieve the correct FieldDescriptor.
+		// TODO: Consider fields of transparent classes
+		Rooted<Descriptor> desc = parent->getDescriptor();
+		Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName);
+		if (field == nullptr) {
+			logger().error(
+			    std::string("Can't handle data because no field with name \"") +
+			        fieldName + "\" exists in descriptor\"" + desc->getName() +
+			        "\".",
+			    location());
+			return;
+		}
+		if (!field->isPrimitive()) {
+			logger().error(std::string("Can't handle data because field \"") +
+			                   fieldName + "\" of descriptor \"" +
+			                   desc->getName() + "\" is not primitive!",
+			               location());
+			return;
+		}
+
+		// try to parse the content.
+		auto res = VariantReader::parseGenericString(
+		    data, logger(), location().getSourceId(), location().getStart());
+		if (!res.first) {
+			return;
+		}
+		// try to convert it to the correct type.
+		if (!field->getPrimitiveType()->build(res.second, logger())) {
+			return;
+		}
+		// add it as primitive content.
+		parent->createChildDocumentPrimitive(res.second, fieldName);
+	}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DocumentChildHandler{handlerData};
+	}
+};
+
+class TypesystemHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		// Create the typesystem instance
+		Rooted<Typesystem> typesystem =
+		    project()->createTypesystem(args["name"].asString());
+		typesystem->setLocation(location());
+
+		// Push the typesystem onto the scope, set the POST_HEAD flag to true
+		scope().push(typesystem);
+		scope().setFlag(ParserFlag::POST_HEAD, false);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemHandler{handlerData};
+	}
+};
+
+class TypesystemEnumHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+
+		// Fetch the current typesystem and create the enum node
+		Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
+		Rooted<EnumType> enumType =
+		    typesystem->createEnumType(args["name"].asString());
+		enumType->setLocation(location());
+
+		scope().push(enumType);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemEnumHandler{handlerData};
+	}
+};
+
+class TypesystemEnumEntryHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	std::string entry;
+
+	void start(Variant::mapType &args) override {}
+
+	void end() override
+	{
+		Rooted<EnumType> enumType = scope().selectOrThrow<EnumType>();
+		enumType->addEntry(entry, logger());
+	}
+
+	void data(const std::string &data, int field) override
+	{
+		if (field != 0) {
+			// TODO: This should be stored in the HandlerData
+			logger().error("Enum entry only has one field.");
+			return;
+		}
+		entry.append(data);
+	}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemEnumEntryHandler{handlerData};
+	}
+};
+
+class TypesystemStructHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+
+		// Fetch the arguments used for creating this type
+		const std::string &name = args["name"].asString();
+		const std::string &parent = args["parent"].asString();
+
+		// Fetch the current typesystem and create the struct node
+		Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
+		Rooted<StructType> structType = typesystem->createStructType(name);
+		structType->setLocation(location());
+
+		// Try to resolve the parent type and set it as parent structure
+		if (!parent.empty()) {
+			scope().resolve<StructType>(
+			    parent, structType, logger(),
+			    [](Handle<Node> parent, Handle<Node> structType,
+			       Logger &logger) {
+				    if (parent != nullptr) {
+					    structType.cast<StructType>()->setParentStructure(
+					        parent.cast<StructType>(), logger);
+				    }
+				});
+		}
+		scope().push(structType);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemStructHandler{handlerData};
+	}
+};
+
+class TypesystemStructFieldHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		// Read the argument values
+		const std::string &name = args["name"].asString();
+		const std::string &type = args["type"].asString();
+		const Variant &defaultValue = args["default"];
+		const bool optional =
+		    !(defaultValue.isObject() && defaultValue.asObject() == nullptr);
+
+		Rooted<StructType> structType = scope().selectOrThrow<StructType>();
+		Rooted<Attribute> attribute =
+		    structType->createAttribute(name, defaultValue, optional, logger());
+		attribute->setLocation(location());
+
+		// Try to resolve the type and default value
+		if (optional) {
+			scope().resolveTypeWithValue(
+			    type, attribute, attribute->getDefaultValue(), logger(),
+			    [](Handle<Node> type, Handle<Node> attribute, Logger &logger) {
+				    if (type != nullptr) {
+					    attribute.cast<Attribute>()->setType(type.cast<Type>(),
+					                                         logger);
+				    }
+				});
+		} else {
+			scope().resolveType(
+			    type, attribute, logger(),
+			    [](Handle<Node> type, Handle<Node> attribute, Logger &logger) {
+				    if (type != nullptr) {
+					    attribute.cast<Attribute>()->setType(type.cast<Type>(),
+					                                         logger);
+				    }
+				});
+		}
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemStructFieldHandler{handlerData};
+	}
+};
+
+class TypesystemConstantHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+
+		// Read the argument values
+		const std::string &name = args["name"].asString();
+		const std::string &type = args["type"].asString();
+		const Variant &value = args["value"];
+
+		Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
+		Rooted<Constant> constant = typesystem->createConstant(name, value);
+		constant->setLocation(location());
+
+		// Try to resolve the type
+		scope().resolveTypeWithValue(
+		    type, constant, constant->getValue(), logger(),
+		    [](Handle<Node> type, Handle<Node> constant, Logger &logger) {
+			    if (type != nullptr) {
+				    constant.cast<Constant>()->setType(type.cast<Type>(),
+				                                       logger);
+			    }
+			});
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemConstantHandler{handlerData};
+	}
+};
+
+/*
+ * Domain Handlers
+ */
+
+class DomainHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<Domain> domain =
+		    project()->createDomain(args["name"].asString());
+		domain->setLocation(location());
+
+		scope().push(domain);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainHandler{handlerData};
+	}
+};
+
+class DomainStructHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+
+		Rooted<Domain> domain = scope().selectOrThrow<Domain>();
+
+		Rooted<StructuredClass> structuredClass = domain->createStructuredClass(
+		    args["name"].asString(), args["cardinality"].asCardinality(),
+		    nullptr, args["transparent"].asBool(), args["isRoot"].asBool());
+		structuredClass->setLocation(location());
+
+		const std::string &isa = args["isa"].asString();
+		if (!isa.empty()) {
+			scope().resolve<StructuredClass>(
+			    isa, structuredClass, logger(),
+			    [](Handle<Node> superclass, Handle<Node> structuredClass,
+			       Logger &logger) {
+				    if (superclass != nullptr) {
+					    structuredClass.cast<StructuredClass>()->setSuperclass(
+					        superclass.cast<StructuredClass>(), logger);
+				    }
+				});
+		}
+
+		scope().push(structuredClass);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainStructHandler{handlerData};
+	}
+};
+
+class DomainAnnotationHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+
+		Rooted<Domain> domain = scope().selectOrThrow<Domain>();
+
+		Rooted<AnnotationClass> annotationClass =
+		    domain->createAnnotationClass(args["name"].asString());
+		annotationClass->setLocation(location());
+
+		scope().push(annotationClass);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainAnnotationHandler{handlerData};
+	}
+};
+
+class DomainAttributesHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		// Fetch the current typesystem and create the struct node
+		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
+
+		Rooted<StructType> attrDesc = parent->getAttributesDescriptor();
+		attrDesc->setLocation(location());
+
+		scope().push(attrDesc);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainAttributesHandler{handlerData};
+	}
+};
+
+class DomainFieldHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		FieldDescriptor::FieldType type;
+		if (args["isSubtree"].asBool()) {
+			type = FieldDescriptor::FieldType::SUBTREE;
+		} else {
+			type = FieldDescriptor::FieldType::TREE;
+		}
+
+		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
+
+		Rooted<FieldDescriptor> field = parent->createFieldDescriptor(
+		    type, args["name"].asString(), args["optional"].asBool());
+		field->setLocation(location());
+
+		scope().push(field);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainFieldHandler{handlerData};
+	}
+};
+
+class DomainFieldRefHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
+
+		const std::string &name = args["name"].asString();
+		scope().resolve<FieldDescriptor>(
+		    name, parent, logger(),
+		    [](Handle<Node> field, Handle<Node> parent, Logger &logger) {
+			    if (field != nullptr) {
+				    parent.cast<StructuredClass>()->addFieldDescriptor(
+				        field.cast<FieldDescriptor>());
+			    }
+			});
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainFieldRefHandler{handlerData};
+	}
+};
+
+class DomainPrimitiveHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
+
+		Rooted<FieldDescriptor> field = parent->createPrimitiveFieldDescriptor(
+		    nullptr, args["name"].asString(), args["optional"].asBool());
+		field->setLocation(location());
+
+		const std::string &type = args["type"].asString();
+		scope().resolve<Type>(
+		    type, field, logger(),
+		    [](Handle<Node> type, Handle<Node> field, Logger &logger) {
+			    if (type != nullptr) {
+				    field.cast<FieldDescriptor>()->setPrimitiveType(
+				        type.cast<Type>());
+			    }
+			});
+
+		scope().push(field);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainPrimitiveHandler{handlerData};
+	}
+};
+
+class DomainChildHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<FieldDescriptor> field =
+		    scope().selectOrThrow<FieldDescriptor>();
+
+		const std::string &ref = args["ref"].asString();
+		scope().resolve<StructuredClass>(
+		    ref, field, logger(),
+		    [](Handle<Node> child, Handle<Node> field, Logger &logger) {
+			    if (child != nullptr) {
+				    field.cast<FieldDescriptor>()->addChild(
+				        child.cast<StructuredClass>());
+			    }
+			});
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainChildHandler{handlerData};
+	}
+};
+
+class DomainParent : public Node {
+public:
+	DomainParent(Manager &mgr, std::string name, Handle<Node> parent)
+	    : Node(mgr, name, parent)
+	{
+	}
+};
+
+namespace RttiTypes {
+const Rtti DomainParent =
+    RttiBuilder<ousia::DomainParent>("DomainParent").parent(&Node);
+}
+
+class DomainParentHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<StructuredClass> strct =
+		    scope().selectOrThrow<StructuredClass>();
+
+		Rooted<DomainParent> parent{new DomainParent(
+		    strct->getManager(), args["name"].asString(), strct)};
+		parent->setLocation(location());
+		scope().push(parent);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainParentHandler{handlerData};
+	}
+};
+
+class DomainParentFieldHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<DomainParent> parentNameNode =
+		    scope().selectOrThrow<DomainParent>();
+		FieldDescriptor::FieldType type;
+		if (args["isSubtree"].asBool()) {
+			type = FieldDescriptor::FieldType::SUBTREE;
+		} else {
+			type = FieldDescriptor::FieldType::TREE;
+		}
+
+		const std::string &name = args["name"].asString();
+		const bool optional = args["optional"].asBool();
+		Rooted<StructuredClass> strct =
+		    parentNameNode->getParent().cast<StructuredClass>();
+
+		// resolve the parent, create the declared field and add the declared
+		// StructuredClass as child to it.
+		scope().resolve<Descriptor>(
+		    parentNameNode->getName(), strct, logger(),
+		    [type, name, optional](Handle<Node> parent, Handle<Node> strct,
+		                           Logger &logger) {
+			    if (parent != nullptr) {
+				    Rooted<FieldDescriptor> field =
+				        parent.cast<Descriptor>()->createFieldDescriptor(
+				            type, name, optional);
+				    field->addChild(strct.cast<StructuredClass>());
+			    }
+			});
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainParentFieldHandler{handlerData};
+	}
+};
+
+class DomainParentFieldRefHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<DomainParent> parentNameNode =
+		    scope().selectOrThrow<DomainParent>();
+
+		const std::string &name = args["name"].asString();
+		Rooted<StructuredClass> strct =
+		    parentNameNode->getParent().cast<StructuredClass>();
+		auto loc = location();
+
+		// resolve the parent, get the referenced field and add the declared
+		// StructuredClass as child to it.
+		scope().resolve<Descriptor>(parentNameNode->getName(), strct, logger(),
+		                            [name, loc](Handle<Node> parent,
+		                                        Handle<Node> strct,
+		                                        Logger &logger) {
+			if (parent != nullptr) {
+				auto res = parent.cast<Descriptor>()->resolve(
+				    &RttiTypes::FieldDescriptor, name);
+				if (res.size() != 1) {
+					logger.error(
+					    std::string("Could not find referenced field ") + name,
+					    loc);
+					return;
+				}
+				Rooted<FieldDescriptor> field =
+				    res[0].node.cast<FieldDescriptor>();
+				field->addChild(strct.cast<StructuredClass>());
+			}
+		});
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainParentFieldRefHandler{handlerData};
+	}
+};
+
+/*
+ * Import and Include Handler
+ */
+
+class ImportIncludeHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	bool srcInArgs = false;
+	std::string rel;
+	std::string type;
+	std::string src;
+
+	void start(Variant::mapType &args) override
+	{
+		rel = args["rel"].asString();
+		type = args["type"].asString();
+		src = args["src"].asString();
+		srcInArgs = !src.empty();
+	}
+
+	void data(const std::string &data, int field) override
+	{
+		if (srcInArgs) {
+			logger().error("\"src\" attribute has already been set");
+			return;
+		}
+		if (field != 0) {
+			logger().error("Command has only one field.");
+			return;
+		}
+		src.append(data);
+	}
+};
+
+class ImportHandler : public ImportIncludeHandler {
+public:
+	using ImportIncludeHandler::ImportIncludeHandler;
+
+	void start(Variant::mapType &args) override
+	{
+		ImportIncludeHandler::start(args);
+
+		// Make sure imports are still possible
+		if (scope().getFlag(ParserFlag::POST_HEAD)) {
+			logger().error("Imports must be listed before other commands.",
+			               location());
+			return;
+		}
+	}
+
+	void end() override
+	{
+		// Fetch the last node and check whether an import is valid at this
+		// position
+		Rooted<Node> leaf = scope().getLeaf();
+		if (leaf == nullptr || !leaf->isa(&RttiTypes::RootNode)) {
+			logger().error(
+			    "Import not supported here, must be inside a document, domain "
+			    "or typesystem command.",
+			    location());
+			return;
+		}
+		Rooted<RootNode> leafRootNode = leaf.cast<RootNode>();
+
+		// Perform the actual import, register the imported node within the leaf
+		// node
+		Rooted<Node> imported =
+		    context().import(src, type, rel, leafRootNode->getReferenceTypes());
+		if (imported != nullptr) {
+			leafRootNode->reference(imported);
+		}
+	}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new ImportHandler{handlerData};
+	}
+};
+
+class IncludeHandler : public ImportIncludeHandler {
+public:
+	using ImportIncludeHandler::ImportIncludeHandler;
+
+	void start(Variant::mapType &args) override
+	{
+		ImportIncludeHandler::start(args);
+	}
+
+	void end() override
+	{
+		context().include(src, type, rel, {&RttiTypes::Node});
+	}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new IncludeHandler{handlerData};
+	}
+};
+
+namespace ParserStates {
+/* Document states */
+static const ParserState Document =
+    ParserStateBuilder()
+        .parent(&None)
+        .createdNodeType(&RttiTypes::Document)
+        .elementHandler(DocumentHandler::create)
+        .arguments({Argument::String("name", "")});
+
+static const ParserState DocumentChild =
+    ParserStateBuilder()
+        .parents({&Document, &DocumentChild})
+        .createdNodeTypes({&RttiTypes::StructureNode,
+                           &RttiTypes::AnnotationEntity,
+                           &RttiTypes::DocumentField})
+        .elementHandler(DocumentChildHandler::create);
+
+/* Domain states */
+static const ParserState Domain = ParserStateBuilder()
+                                      .parents({&None, &Document})
+                                      .createdNodeType(&RttiTypes::Domain)
+                                      .elementHandler(DomainHandler::create)
+                                      .arguments({Argument::String("name")});
+
+static const ParserState DomainStruct =
+    ParserStateBuilder()
+        .parent(&Domain)
+        .createdNodeType(&RttiTypes::StructuredClass)
+        .elementHandler(DomainStructHandler::create)
+        .arguments({Argument::String("name"),
+                    Argument::Cardinality("cardinality", Cardinality::any()),
+                    Argument::Bool("isRoot", false),
+                    Argument::Bool("transparent", false),
+                    Argument::String("isa", "")});
+
+static const ParserState DomainAnnotation =
+    ParserStateBuilder()
+        .parent(&Domain)
+        .createdNodeType(&RttiTypes::AnnotationClass)
+        .elementHandler(DomainAnnotationHandler::create)
+        .arguments({Argument::String("name")});
+
+static const ParserState DomainAttributes =
+    ParserStateBuilder()
+        .parents({&DomainStruct, &DomainAnnotation})
+        .createdNodeType(&RttiTypes::StructType)
+        .elementHandler(DomainAttributesHandler::create)
+        .arguments({});
+
+static const ParserState DomainAttribute =
+    ParserStateBuilder()
+        .parent(&DomainAttributes)
+        .elementHandler(TypesystemStructFieldHandler::create)
+        .arguments({Argument::String("name"), Argument::String("type"),
+                    Argument::Any("default", Variant::fromObject(nullptr))});
+
+static const ParserState DomainField =
+    ParserStateBuilder()
+        .parents({&DomainStruct, &DomainAnnotation})
+        .createdNodeType(&RttiTypes::FieldDescriptor)
+        .elementHandler(DomainFieldHandler::create)
+        .arguments({Argument::String("name", DEFAULT_FIELD_NAME),
+                    Argument::Bool("isSubtree", false),
+                    Argument::Bool("optional", false)});
+
+static const ParserState DomainFieldRef =
+    ParserStateBuilder()
+        .parents({&DomainStruct, &DomainAnnotation})
+        .createdNodeType(&RttiTypes::FieldDescriptor)
+        .elementHandler(DomainFieldRefHandler::create)
+        .arguments({Argument::String("name", DEFAULT_FIELD_NAME)});
+
+static const ParserState DomainStructPrimitive =
+    ParserStateBuilder()
+        .parents({&DomainStruct, &DomainAnnotation})
+        .createdNodeType(&RttiTypes::FieldDescriptor)
+        .elementHandler(DomainPrimitiveHandler::create)
+        .arguments({Argument::String("name", DEFAULT_FIELD_NAME),
+                    Argument::Bool("optional", false),
+                    Argument::String("type")});
+
+static const ParserState DomainStructChild =
+    ParserStateBuilder()
+        .parent(&DomainField)
+        .elementHandler(DomainChildHandler::create)
+        .arguments({Argument::String("ref")});
+
+static const ParserState DomainStructParent =
+    ParserStateBuilder()
+        .parent(&DomainStruct)
+        .createdNodeType(&RttiTypes::DomainParent)
+        .elementHandler(DomainParentHandler::create)
+        .arguments({Argument::String("name")});
+
+static const ParserState DomainStructParentField =
+    ParserStateBuilder()
+        .parent(&DomainStructParent)
+        .createdNodeType(&RttiTypes::FieldDescriptor)
+        .elementHandler(DomainParentFieldHandler::create)
+        .arguments({Argument::String("name", DEFAULT_FIELD_NAME),
+                    Argument::Bool("isSubtree", false),
+                    Argument::Bool("optional", false)});
+
+static const ParserState DomainStructParentFieldRef =
+    ParserStateBuilder()
+        .parent(&DomainStructParent)
+        .createdNodeType(&RttiTypes::FieldDescriptor)
+        .elementHandler(DomainParentFieldRefHandler::create)
+        .arguments({Argument::String("name", DEFAULT_FIELD_NAME)});
+
+/* Typesystem states */
+static const ParserState Typesystem =
+    ParserStateBuilder()
+        .parents({&None, &Domain})
+        .createdNodeType(&RttiTypes::Typesystem)
+        .elementHandler(TypesystemHandler::create)
+        .arguments({Argument::String("name", "")});
+
+static const ParserState TypesystemEnum =
+    ParserStateBuilder()
+        .parent(&Typesystem)
+        .createdNodeType(&RttiTypes::EnumType)
+        .elementHandler(TypesystemEnumHandler::create)
+        .arguments({Argument::String("name")});
+
+static const ParserState TypesystemEnumEntry =
+    ParserStateBuilder()
+        .parent(&TypesystemEnum)
+        .elementHandler(TypesystemEnumEntryHandler::create)
+        .arguments({});
+
+static const ParserState TypesystemStruct =
+    ParserStateBuilder()
+        .parent(&Typesystem)
+        .createdNodeType(&RttiTypes::StructType)
+        .elementHandler(TypesystemStructHandler::create)
+        .arguments({Argument::String("name"), Argument::String("parent", "")});
+
+static const ParserState TypesystemStructField =
+    ParserStateBuilder()
+        .parent(&TypesystemStruct)
+        .elementHandler(TypesystemStructFieldHandler::create)
+        .arguments({Argument::String("name"), Argument::String("type"),
+                    Argument::Any("default", Variant::fromObject(nullptr))});
+
+static const ParserState TypesystemConstant =
+    ParserStateBuilder()
+        .parent(&Typesystem)
+        .createdNodeType(&RttiTypes::Constant)
+        .elementHandler(TypesystemConstantHandler::create)
+        .arguments({Argument::String("name"), Argument::String("type"),
+                    Argument::Any("value")});
+
+/* Special states for import and include */
+static const ParserState Import =
+    ParserStateBuilder()
+        .parents({&Document, &Typesystem, &Domain})
+        .elementHandler(ImportHandler::create)
+        .arguments({Argument::String("rel", ""), Argument::String("type", ""),
+                    Argument::String("src", "")});
+
+static const ParserState Include =
+    ParserStateBuilder()
+        .parent(&All)
+        .elementHandler(IncludeHandler::create)
+        .arguments({Argument::String("rel", ""), Argument::String("type", ""),
+                    Argument::String("src", "")});
+
+static const std::multimap<std::string, const ParserState *> XmlStates{
+    {"document", &Document},
+    {"*", &DocumentChild},
+    {"domain", &Domain},
+    {"struct", &DomainStruct},
+    {"annotation", &DomainAnnotation},
+    {"attributes", &DomainAttributes},
+    {"attribute", &DomainAttribute},
+    {"field", &DomainField},
+    {"fieldRef", &DomainFieldRef},
+    {"primitive", &DomainStructPrimitive},
+    {"child", &DomainStructChild},
+    {"parent", &DomainStructParent},
+    {"field", &DomainStructParentField},
+    {"fieldRef", &DomainStructParentFieldRef},
+    {"typesystem", &Typesystem},
+    {"enum", &TypesystemEnum},
+    {"entry", &TypesystemEnumEntry},
+    {"struct", &TypesystemStruct},
+    {"field", &TypesystemStructField},
+    {"constant", &TypesystemConstant},
+    {"import", &Import},
+    {"include", &Include}};
+}
+
+/**
+ * Structue containing the private data that is being passed to the
+ * XML-Handlers.
+ */
+struct XMLUserData {
+	/**
+	 * Containing the depth of the current XML file
+	 */
+	size_t depth;
+
+	/**
+	 * Reference at the ParserStack instance.
+	 */
+	ParserStack *stack;
+
+	/**
+	 * Reference at the CharReader instance.
+	 */
+	CharReader *reader;
+
+	/**
+	 * Constructor of the XMLUserData struct.
+	 *
+	 * @param stack is a pointer at the ParserStack instance.
+	 * @param reader is a pointer at the CharReader instance.
+	 */
+	XMLUserData(ParserStack *stack, CharReader *reader)
+	    : depth(0), stack(stack), reader(reader)
+	{
+	}
+};
+
+/**
+ * Wrapper class around the XML_Parser pointer which safely frees it whenever
+ * the scope is left (e.g. because an exception was thrown).
+ */
+class ScopedExpatXmlParser {
+private:
+	/**
+	 * Internal pointer to the XML_Parser instance.
+	 */
+	XML_Parser parser;
+
+public:
+	/**
+	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS
+	 * from the expat library. Throws a parser exception if the XML parser
+	 * cannot be initialized.
+	 *
+	 * @param encoding is the protocol-defined encoding passed to expat (or
+	 * nullptr if expat should determine the encoding by itself).
+	 */
+	ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
+	{
+		parser = XML_ParserCreate(encoding);
+		if (!parser) {
+			throw LoggableException{
+			    "Internal error: Could not create expat XML parser!"};
+		}
+	}
+
+	/**
+	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance.
+	 */
+	~ScopedExpatXmlParser()
+	{
+		if (parser) {
+			XML_ParserFree(parser);
+			parser = nullptr;
+		}
+	}
+
+	/**
+	 * Returns the XML_Parser pointer.
+	 */
+	XML_Parser operator&() { return parser; }
+};
+
+/* Adapter Expat -> ParserStack */
+
+static SourceLocation syncLoggerPosition(XML_Parser p, size_t len = 0)
+{
+	// Fetch the parser stack and the associated user data
+	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
+	ParserStack *stack = userData->stack;
+
+	// Fetch the current location in the XML file
+	size_t offs = XML_GetCurrentByteIndex(p);
+
+	// Build the source location and update the default location of the
+	// current
+	// logger instance
+	SourceLocation loc{stack->getContext().getSourceId(), offs, offs + len};
+	stack->getContext().getLogger().setDefaultLocation(loc);
+	return loc;
+}
+
+enum class XMLAttributeState {
+	IN_TAG_NAME,
+	SEARCH_ATTR,
+	IN_ATTR_NAME,
+	HAS_ATTR_NAME,
+	HAS_ATTR_EQUALS,
+	IN_ATTR_DATA
+};
+
+static std::map<std::string, SourceLocation> reconstructXMLAttributeOffsets(
+    CharReader &reader, SourceLocation location)
+{
+	std::map<std::string, SourceLocation> res;
+
+	// Fork the reader, we don't want to mess up the XML parsing process, do we?
+	CharReaderFork readerFork = reader.fork();
+
+	// Move the read cursor to the start location, abort if this does not work
+	size_t offs = location.getStart();
+	if (!location.isValid() || offs != readerFork.seek(offs)) {
+		return res;
+	}
+
+	// Now all we need to do is to implement one half of an XML parser. As this
+	// is inherently complicated we'll totaly fail at it. Don't care. All we
+	// want to get is those darn offsets for pretty error messages... (and we
+	// can assume the XML is valid as it was already read by expat)
+	XMLAttributeState state = XMLAttributeState::IN_TAG_NAME;
+	char c;
+	std::stringstream attrName;
+	while (readerFork.read(c)) {
+		// Abort at the end of the tag
+		if (c == '>' && state != XMLAttributeState::IN_ATTR_DATA) {
+			return res;
+		}
+
+		// One state machine to rule them all, one state machine to find them,
+		// One state machine to bring them all and in the darkness bind them
+		// (the byte offsets)
+		switch (state) {
+			case XMLAttributeState::IN_TAG_NAME:
+				if (Utils::isWhitespace(c)) {
+					state = XMLAttributeState::SEARCH_ATTR;
+				}
+				break;
+			case XMLAttributeState::SEARCH_ATTR:
+				if (!Utils::isWhitespace(c)) {
+					state = XMLAttributeState::IN_ATTR_NAME;
+					attrName << c;
+				}
+				break;
+			case XMLAttributeState::IN_ATTR_NAME:
+				if (Utils::isWhitespace(c)) {
+					state = XMLAttributeState::HAS_ATTR_NAME;
+				} else if (c == '=') {
+					state = XMLAttributeState::HAS_ATTR_EQUALS;
+				} else {
+					attrName << c;
+				}
+				break;
+			case XMLAttributeState::HAS_ATTR_NAME:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '=') {
+						state = XMLAttributeState::HAS_ATTR_EQUALS;
+						break;
+					}
+					// Well, this is a strange XML file... We expected to
+					// see a '=' here! Try to continue with the
+					// "HAS_ATTR_EQUALS" state as this state will hopefully
+					// inlcude some error recovery
+				} else {
+					// Skip whitespace here
+					break;
+				}
+			// Fallthrough
+			case XMLAttributeState::HAS_ATTR_EQUALS:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '"') {
+						// Here we are! We have found the beginning of an
+						// attribute. Let's quickly lock the current offset away
+						// in the result map
+						res.emplace(attrName.str(),
+						            SourceLocation{reader.getSourceId(),
+						                           readerFork.getOffset()});
+						attrName.str(std::string{});
+						state = XMLAttributeState::IN_ATTR_DATA;
+					} else {
+						// No, this XML file is not well formed. Assume we're in
+						// an attribute name once again
+						attrName.str(std::string{&c, 1});
+						state = XMLAttributeState::IN_ATTR_NAME;
+					}
+				}
+				break;
+			case XMLAttributeState::IN_ATTR_DATA:
+				if (c == '"') {
+					// We're at the end of the attribute data, start anew
+					state = XMLAttributeState::SEARCH_ATTR;
+				}
+				break;
+		}
+	}
+	return res;
+}
+
+static void xmlStartElementHandler(void *p, const XML_Char *name,
+                                   const XML_Char **attrs)
+{
+	XML_Parser parser = static_cast<XML_Parser>(p);
+	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
+	ParserStack *stack = userData->stack;
+
+	SourceLocation loc = syncLoggerPosition(parser);
+
+	// Read the argument locations -- this is only a stupid and slow hack,
+	// but it is necessary, as expat doesn't give use the byte offset of the
+	// arguments.
+	std::map<std::string, SourceLocation> offs =
+	    reconstructXMLAttributeOffsets(*userData->reader, loc);
+
+	// Assemble the arguments
+	Variant::mapType args;
+
+	const XML_Char **attr = attrs;
+	while (*attr) {
+		// Convert the C string to a std::string
+		const std::string key{*(attr++)};
+
+		// Search the location of the key
+		SourceLocation keyLoc;
+		auto it = offs.find(key);
+		if (it != offs.end()) {
+			keyLoc = it->second;
+		}
+
+		// Parse the string, pass the location of the key
+		std::pair<bool, Variant> value = VariantReader::parseGenericString(
+		    *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(),
+		    keyLoc.getStart());
+		args.emplace(key, value.second);
+	}
+
+	// Call the start function
+	std::string nameStr(name);
+	if (nameStr != "ousia" || userData->depth > 0) {
+		stack->start(std::string(name), args, loc);
+	}
+
+	// Increment the current depth
+	userData->depth++;
+}
+
+static void xmlEndElementHandler(void *p, const XML_Char *name)
+{
+	XML_Parser parser = static_cast<XML_Parser>(p);
+	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
+	ParserStack *stack = userData->stack;
+
+	syncLoggerPosition(parser);
+
+	// Decrement the current depth
+	userData->depth--;
+
+	// Call the end function
+	std::string nameStr(name);
+	if (nameStr != "ousia" || userData->depth > 0) {
+		stack->end();
+	}
+}
+
+static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len)
+{
+	XML_Parser parser = static_cast<XML_Parser>(p);
+	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
+	ParserStack *stack = userData->stack;
+
+	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
+	syncLoggerPosition(parser, ulen);
+	const std::string data = Utils::trim(std::string{s, ulen});
+	if (!data.empty()) {
+		stack->data(data);
+	}
+}
+
+/* Class XmlParser */
+
+void XmlParser::doParse(CharReader &reader, ParserContext &ctx)
+{
+	// Create the parser object
+	ScopedExpatXmlParser p{"UTF-8"};
+
+	// Create the parser stack instance, if we're starting on a non-empty scope,
+	// try to deduce the parser state
+	ParserStack stack(ctx, ParserStates::XmlStates);
+	if (!ctx.getScope().isEmpty()) {
+		if (!stack.deduceState()) {
+			return;
+		}
+	}
+
+	// Pass the reference to the ParserStack to the XML handler
+	XMLUserData data(&stack, &reader);
+	XML_SetUserData(&p, &data);
+	XML_UseParserAsHandlerArg(&p);
+
+	// Set the callback functions
+	XML_SetStartElementHandler(&p, xmlStartElementHandler);
+	XML_SetEndElementHandler(&p, xmlEndElementHandler);
+	XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler);
+
+	// Feed data into expat while there is data to process
+	constexpr size_t BUFFER_SIZE = 64 * 1024;
+	while (true) {
+		// Fetch a buffer from expat for the input data
+		char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE));
+		if (!buf) {
+			throw LoggableException{
+			    "Internal error: XML parser out of memory!"};
+		}
+
+		// Read into the buffer
+		size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE);
+
+		// Parse the data and handle any XML error
+		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) {
+			// Fetch the xml parser byte offset
+			size_t offs = XML_GetCurrentByteIndex(&p);
+
+			// Throw a corresponding exception
+			XML_Error code = XML_GetErrorCode(&p);
+			std::string msg = std::string{XML_ErrorString(code)};
+			throw LoggableException{"XML: " + msg,
+			                        SourceLocation{ctx.getSourceId(), offs}};
+		}
+
+		// Abort once there are no more bytes in the stream
+		if (bytesRead == 0) {
+			break;
+		}
+	}
+}
+}
+
diff --git a/src/formats/osdmx/OsdmxParser.hpp b/src/formats/osdmx/OsdmxParser.hpp
new file mode 100644
index 0000000..c8b6302
--- /dev/null
+++ b/src/formats/osdmx/OsdmxParser.hpp
@@ -0,0 +1,55 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file XmlParser.hpp
+ *
+ * Contains the parser responsible for reading Ousía XML Documents (extension
+ * oxd) and Ousía XML Modules (extension oxm).
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_XML_PARSER_HPP_
+#define _OUSIA_XML_PARSER_HPP_
+
+#include <core/parser/Parser.hpp>
+
+namespace ousia {
+
+/**
+ * The XmlParser class implements parsing the various types of Ousía XML
+ * documents using the expat stream XML parser.
+ */
+class XmlParser : public Parser {
+protected:
+	/**
+	 * Parses the given input stream as XML file and returns the parsed
+	 * top-level node.
+	 *
+	 * @param reader is the CharReader from which the input should be read.
+	 * @param ctx is a reference to the ParserContext instance that should be
+	 * used.
+	 */
+	void doParse(CharReader &reader, ParserContext &ctx) override;
+};
+
+}
+
+#endif /* _OUSIA_XML_PARSER_HPP_ */
+
-- 
cgit v1.2.3


From 2b0632764c26728675090c4cd0920f1b7c093ed1 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:46:30 +0100
Subject: Moved textHandlers to whitespace handlers

---
 src/formats/osdm/DynamicTokenizer.cpp | 251 ++++++----------------------------
 src/formats/osdm/DynamicTokenizer.hpp |  23 +---
 2 files changed, 45 insertions(+), 229 deletions(-)

(limited to 'src/formats')

diff --git a/src/formats/osdm/DynamicTokenizer.cpp b/src/formats/osdm/DynamicTokenizer.cpp
index f2cfcd1..1fac25a 100644
--- a/src/formats/osdm/DynamicTokenizer.cpp
+++ b/src/formats/osdm/DynamicTokenizer.cpp
@@ -22,6 +22,7 @@
 #include <core/common/CharReader.hpp>
 #include <core/common/Exceptions.hpp>
 #include <core/common/Utils.hpp>
+#include <core/common/WhitespaceHandler.hpp>
 
 #include "DynamicTokenizer.hpp"
 
@@ -102,8 +103,8 @@ public:
 	 * @param textLength is the text buffer length of the previous text token.
 	 * @param textEnd is the current end location of the previous text token.
 	 */
-	TokenLookup(const TokenTrie::Node *node, size_t start,
-	            size_t textLength, size_t textEnd)
+	TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength,
+	            size_t textEnd)
 	    : node(node), start(start), textLength(textLength), textEnd(textEnd)
 	{
 	}
@@ -155,192 +156,29 @@ public:
 	}
 };
 
-/* Internal class TextHandlerBase */
-
 /**
- * Base class used for those classes that may be used as TextHandler in the
- * DynamicTokenizer::next function.
+ * Transforms the given token into a text token containing the extracted
+ * text.
+ *
+ * @param handler is the WhitespaceHandler containing the collected data.
+ * @param token is the output token to which the text should be written.
+ * @param sourceId is the source id of the underlying file.
  */
-class TextHandlerBase {
-public:
-	/**
-	 * Start position of the extracted text.
-	 */
-	size_t textStart;
-
-	/**
-	 * End position of the extracted text.
-	 */
-	size_t textEnd;
-
-	/**
-	 * Buffer containing the extracted text.
-	 */
-	std::vector<char> textBuf;
-
-	/**
-	 * Constructor of the TextHandlerBase base class. Initializes the start and
-	 * end position with zeros.
-	 */
-	TextHandlerBase() : textStart(0), textEnd(0) {}
-
-	/**
-	 * Transforms the given token into a text token containing the extracted
-	 * text.
-	 *
-	 * @param token is the output token to which the text should be written.
-	 * @param sourceId is the source id of the underlying file.
-	 */
-	void buildTextToken(TokenMatch &match, SourceId sourceId)
-	{
-		if (match.hasMatch()) {
-			match.token.content =
-			    std::string{textBuf.data(), match.textLength};
-			match.token.location =
-			    SourceLocation{sourceId, textStart, match.textEnd};
-		} else {
-			match.token.content = std::string{textBuf.data(), textBuf.size()};
-			match.token.location = SourceLocation{sourceId, textStart, textEnd};
-		}
-		match.token.type = TextToken;
-	}
-
-	/**
-	 * Returns true if this whitespace handler has found any text and a text
-	 * token could be emitted.
-	 *
-	 * @return true if the internal data buffer is non-empty.
-	 */
-	bool hasText() { return !textBuf.empty(); }
-};
-
-/* Internal class PreservingTextHandler */
-
-/**
- * The PreservingTextHandler class preserves all characters unmodified,
- * including whitepace characters.
- */
-class PreservingTextHandler : public TextHandlerBase {
-public:
-	using TextHandlerBase::TextHandlerBase;
-
-	/**
-	 * Appends the given character to the internal text buffer, does not
-	 * eliminate whitespace.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-		textBuf.push_back(c);
-	}
-};
-
-/* Internal class TrimmingTextHandler */
-
-/**
- * The TrimmingTextHandler class trims all whitespace characters at the begin
- * and the end of a text section but leaves all other characters unmodified,
- * including whitepace characters.
- */
-class TrimmingTextHandler : public TextHandlerBase {
-public:
-	using TextHandlerBase::TextHandlerBase;
-
-	/**
-	 * Buffer used internally to temporarily store all whitespace characters.
-	 * They are only added to the output buffer if another non-whitespace
-	 * character is reached.
-	 */
-	std::vector<char> whitespaceBuf;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * whitespace characters at the begin and end of the text.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				whitespaceBuf.push_back(c);
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (!whitespaceBuf.empty()) {
-			textBuf.insert(textBuf.end(), whitespaceBuf.begin(),
-			               whitespaceBuf.end());
-			whitespaceBuf.clear();
-		}
-		textBuf.push_back(c);
-	}
-};
-
-/* Internal class CollapsingTextHandler */
-
-/**
- * The CollapsingTextHandler trims characters at the beginning and end of the
- * text and reduced multiple whitespace characters to a single blank.
- */
-class CollapsingTextHandler : public TextHandlerBase {
-public:
-	using TextHandlerBase::TextHandlerBase;
-
-	/**
-	 * Flag set to true if a whitespace character was reached.
-	 */
-	bool hasWhitespace = false;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * redundant whitespace characters.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				hasWhitespace = true;
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (hasWhitespace) {
-			textBuf.push_back(' ');
-			hasWhitespace = false;
-		}
-		textBuf.push_back(c);
+static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match,
+                           SourceId sourceId)
+{
+	if (match.hasMatch()) {
+		match.token.content =
+		    std::string{handler.textBuf.data(), match.textLength};
+		match.token.location =
+		    SourceLocation{sourceId, handler.textStart, match.textEnd};
+	} else {
+		match.token.content = handler.toString();
+		match.token.location =
+		    SourceLocation{sourceId, handler.textStart, handler.textEnd};
 	}
-};
+	match.token.type = TextToken;
+}
 }
 
 /* Class DynamicTokenizer */
@@ -409,9 +247,8 @@ bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
 	}
 
 	// If we found text, emit that text
-	if (textHandler.hasText() &&
-	    (!match.hasMatch() || match.textLength > 0)) {
-		textHandler.buildTextToken(match, sourceId);
+	if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) {
+		buildTextToken(textHandler, match, sourceId);
 	}
 
 	// Move the read/peek cursor to the end of the token, abort if an error
@@ -436,28 +273,28 @@ bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
 	return match.hasMatch();
 }
 
-bool DynamicTokenizer::read(CharReader &reader,DynamicToken &token)
+bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token)
 {
 	switch (whitespaceMode) {
 		case WhitespaceMode::PRESERVE:
-			return next<PreservingTextHandler, true>(reader, token);
+			return next<PreservingWhitespaceHandler, true>(reader, token);
 		case WhitespaceMode::TRIM:
-			return next<TrimmingTextHandler, true>(reader, token);
+			return next<TrimmingWhitespaceHandler, true>(reader, token);
 		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingTextHandler, true>(reader, token);
+			return next<CollapsingWhitespaceHandler, true>(reader, token);
 	}
 	return false;
 }
 
-bool DynamicTokenizer::peek(CharReader &reader,DynamicToken &token)
+bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token)
 {
 	switch (whitespaceMode) {
 		case WhitespaceMode::PRESERVE:
-			return next<PreservingTextHandler, false>(reader, token);
+			return next<PreservingWhitespaceHandler, false>(reader, token);
 		case WhitespaceMode::TRIM:
-			return next<TrimmingTextHandler, false>(reader, token);
+			return next<TrimmingWhitespaceHandler, false>(reader, token);
 		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingTextHandler, false>(reader, token);
+			return next<CollapsingWhitespaceHandler, false>(reader, token);
 	}
 	return false;
 }
@@ -493,7 +330,7 @@ TokenTypeId DynamicTokenizer::registerToken(const std::string &token)
 	// Try to register the token in the trie -- if this fails, remove it
 	// from the tokens list
 	if (!trie.registerToken(token, type)) {
-		tokens[type] = std::string();
+		tokens[type] = std::string{};
 		nextTokenTypeId = type;
 		return EmptyToken;
 	}
@@ -528,17 +365,17 @@ WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; }
 
 /* Explicitly instantiate all possible instantiations of the "next" member
    function */
-template bool DynamicTokenizer::next<PreservingTextHandler, false>(
+template bool DynamicTokenizer::next<PreservingWhitespaceHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<PreservingWhitespaceHandler, true>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, true>(
     CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<TrimmingTextHandler, false>(
+template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, true>(
     CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<CollapsingTextHandler, false>(
-    CharReader &reader,DynamicToken &token);
-template bool DynamicTokenizer::next<PreservingTextHandler, true>(
-    CharReader &reader,DynamicToken &token);
-template bool DynamicTokenizer::next<TrimmingTextHandler, true>(
-    CharReader &reader,DynamicToken &token);
-template bool DynamicTokenizer::next<CollapsingTextHandler, true>(
-    CharReader &reader,DynamicToken &token);
 }
 
diff --git a/src/formats/osdm/DynamicTokenizer.hpp b/src/formats/osdm/DynamicTokenizer.hpp
index 0cac2e8..3e5aeb3 100644
--- a/src/formats/osdm/DynamicTokenizer.hpp
+++ b/src/formats/osdm/DynamicTokenizer.hpp
@@ -33,6 +33,7 @@
 #include <vector>
 
 #include <core/common/Location.hpp>
+#include <core/common/Whitespace.hpp>
 
 #include "TokenTrie.hpp"
 
@@ -95,28 +96,6 @@ struct DynamicToken {
 	const SourceLocation &getLocation() const { return location; }
 };
 
-/**
- * Enum specifying the whitespace handling of the DynamicTokenizer class when
- * reading non-token text.
- */
-enum class WhitespaceMode {
-	/**
-     * Preserves all whitespaces as they are found in the source file.
-     */
-	PRESERVE,
-
-	/**
-     * Trims whitespace at the beginning and the end of the found text.
-     */
-	TRIM,
-
-	/**
-     * Whitespaces are trimmed and collapsed, multiple whitespace characters
-     * are replaced by a single space character.
-     */
-	COLLAPSE
-};
-
 /**
  * The DynamicTokenizer is used to extract tokens and chunks of text from a
  * CharReader. It allows to register and unregister tokens while parsing and
-- 
cgit v1.2.3


From 65bbbd778f6e0a3668c859b0e22cced7075a726d Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:47:11 +0100
Subject: Moved DynamicTokenizer and TokenTrie to parser/utils

---
 src/core/parser/utils/TokenTrie.cpp        | 119 +++++++++
 src/core/parser/utils/TokenTrie.hpp        | 150 +++++++++++
 src/core/parser/utils/Tokenizer.cpp        | 381 ++++++++++++++++++++++++++
 src/core/parser/utils/Tokenizer.hpp        | 231 ++++++++++++++++
 src/formats/osdm/DynamicTokenizer.cpp      | 381 --------------------------
 src/formats/osdm/DynamicTokenizer.hpp      | 231 ----------------
 src/formats/osdm/TokenTrie.cpp             | 119 ---------
 src/formats/osdm/TokenTrie.hpp             | 150 -----------
 test/core/parser/utils/TokenTrieTest.cpp   |  92 +++++++
 test/core/parser/utils/TokenizerTest.cpp   | 415 +++++++++++++++++++++++++++++
 test/formats/osdm/DynamicTokenizerTest.cpp | 415 -----------------------------
 test/formats/osdm/TokenTrieTest.cpp        |  92 -------
 12 files changed, 1388 insertions(+), 1388 deletions(-)
 create mode 100644 src/core/parser/utils/TokenTrie.cpp
 create mode 100644 src/core/parser/utils/TokenTrie.hpp
 create mode 100644 src/core/parser/utils/Tokenizer.cpp
 create mode 100644 src/core/parser/utils/Tokenizer.hpp
 delete mode 100644 src/formats/osdm/DynamicTokenizer.cpp
 delete mode 100644 src/formats/osdm/DynamicTokenizer.hpp
 delete mode 100644 src/formats/osdm/TokenTrie.cpp
 delete mode 100644 src/formats/osdm/TokenTrie.hpp
 create mode 100644 test/core/parser/utils/TokenTrieTest.cpp
 create mode 100644 test/core/parser/utils/TokenizerTest.cpp
 delete mode 100644 test/formats/osdm/DynamicTokenizerTest.cpp
 delete mode 100644 test/formats/osdm/TokenTrieTest.cpp

(limited to 'src/formats')

diff --git a/src/core/parser/utils/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp
new file mode 100644
index 0000000..4a0430b
--- /dev/null
+++ b/src/core/parser/utils/TokenTrie.cpp
@@ -0,0 +1,119 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "TokenTrie.hpp"
+
+namespace ousia {
+
+/* Class DynamicTokenTree::Node */
+
+TokenTrie::Node::Node() : type(EmptyToken) {}
+
+/* Class DynamicTokenTree */
+
+bool TokenTrie::registerToken(const std::string &token,
+                              TokenTypeId type) noexcept
+{
+	// Abort if the token is empty -- this would taint the root node
+	if (token.empty()) {
+		return false;
+	}
+
+	// Iterate over each character in the given string and insert them as
+	// (new) nodes
+	Node *node = &root;
+	for (size_t i = 0; i < token.size(); i++) {
+		// Insert a new node if this one does not exist
+		const char c = token[i];
+		auto it = node->children.find(c);
+		if (it == node->children.end()) {
+			it = node->children.emplace(c, std::make_shared<Node>()).first;
+		}
+		node = it->second.get();
+	}
+
+	// If the resulting node already has a type set, we're screwed.
+	if (node->type != EmptyToken) {
+		return false;
+	}
+
+	// Otherwise just set the type to the given type.
+	node->type = type;
+	return true;
+}
+
+bool TokenTrie::unregisterToken(const std::string &token) noexcept
+{
+	// We cannot remove empty tokens as we need to access the fist character
+	// upfront
+	if (token.empty()) {
+		return false;
+	}
+
+	// First pass -- search the node in the path that can be deleted
+	Node *subtreeRoot = &root;
+	char subtreeKey = token[0];
+	Node *node = &root;
+	for (size_t i = 0; i < token.size(); i++) {
+		// Go to the next node, abort if the tree ends unexpectedly
+		auto it = node->children.find(token[i]);
+		if (it == node->children.end()) {
+			return false;
+		}
+
+		// Reset the subtree handler if this node has another type
+		node = it->second.get();
+		if ((node->type != EmptyToken || node->children.size() > 1) &&
+		    (i + 1 != token.size())) {
+			subtreeRoot = node;
+			subtreeKey = token[i + 1];
+		}
+	}
+
+	// If the node type is already EmptyToken, we cannot do anything here
+	if (node->type == EmptyToken) {
+		return false;
+	}
+
+	// If the target node has children, we cannot delete the subtree. Set the
+	// type to EmptyToken instead
+	if (!node->children.empty()) {
+		node->type = EmptyToken;
+		return true;
+	}
+
+	// If we end up here, we can safely delete the complete subtree
+	subtreeRoot->children.erase(subtreeKey);
+	return true;
+}
+
+TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept
+{
+	Node const *node = &root;
+	for (size_t i = 0; i < token.size(); i++) {
+		const char c = token[i];
+		auto it = node->children.find(c);
+		if (it == node->children.end()) {
+			return EmptyToken;
+		}
+		node = it->second.get();
+	}
+	return node->type;
+}
+}
+
diff --git a/src/core/parser/utils/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp
new file mode 100644
index 0000000..36c2ffa
--- /dev/null
+++ b/src/core/parser/utils/TokenTrie.hpp
@@ -0,0 +1,150 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file TokenTrie.hpp
+ *
+ * Class representing a token trie that can be updated dynamically.
+ *
+ * @author Benjamin Paaßen (astoecke@techfak.uni-bielefeld.de)
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_TOKEN_TRIE_HPP_
+#define _OUSIA_TOKEN_TRIE_HPP_
+
+#include <cstdint>
+#include <memory>
+#include <limits>
+#include <unordered_map>
+
+namespace ousia {
+
+/**
+ * The TokenTypeId is used to give each token type a unique id.
+ */
+using TokenTypeId = uint32_t;
+
+/**
+ * Token which is not a token.
+ */
+constexpr TokenTypeId EmptyToken = std::numeric_limits<TokenTypeId>::max();
+
+/**
+ * Token which represents a text token.
+ */
+constexpr TokenTypeId TextToken = std::numeric_limits<TokenTypeId>::max() - 1;
+
+/**
+ * The Tokenizer internally uses a TokenTrie to be efficiently able to identify
+ * the longest consecutive token in the text. This is equivalent to a prefix
+ * trie.
+ *
+ * A token trie is a construct that structures all special tokens a Tokenizer
+ * recognizes. Consider the tokens "aab", "a" and "bac" numbered as one, two and
+ * three. Then the token tree would look like this:
+ *
+ * \code{*.txt}
+ *        ~ (0)
+ *       /     \
+ *      a (2)  b (0)
+ *      |      |
+ *      a (0)  a (0)
+ *      |      |
+ *      b (1)  c (0)
+ * \endcode
+ *
+ * Where the number indicates the corresponding token descriptor identifier.
+ */
+class TokenTrie {
+public:
+	/**
+	 * Structure used to build the node tree.
+	 */
+	struct Node {
+		/**
+		 * Type used for the child map.
+		 */
+		using ChildMap = std::unordered_map<char, std::shared_ptr<Node>>;
+
+		/**
+		 * Map from single characters at the corresponding child nodes.
+		 */
+		ChildMap children;
+
+		/**
+		 * Reference at the corresponding token descriptor. Set to nullptr if
+		 * no token is attached to this node.
+		 */
+		TokenTypeId type;
+
+		/**
+		 * Default constructor, initializes the descriptor with nullptr.
+		 */
+		Node();
+	};
+
+private:
+	/**
+	 * Root node of the internal token tree.
+	 */
+	Node root;
+
+public:
+	/**
+	 * Registers a token containing the given string. Returns false if the
+	 * token already exists, true otherwise.
+	 *
+	 * @param token is the character sequence that should be registered as
+	 * token.
+	 * @param type is the descriptor that should be set for this token.
+	 * @return true if the operation is successful, false otherwise.
+	 */
+	bool registerToken(const std::string &token, TokenTypeId type) noexcept;
+
+	/**
+	 * Unregisters the token from the token tree. Returns true if the token was
+	 * unregistered successfully, false otherwise.
+	 *
+	 * @param token is the character sequence that should be unregistered.
+	 * @return true if the operation was successful, false otherwise.
+	 */
+	bool unregisterToken(const std::string &token) noexcept;
+
+	/**
+	 * Returns true, if the given token exists within the TokenTree. This
+	 * function is mostly thought for debugging and unit testing.
+	 *
+	 * @param token is the character sequence that should be searched.
+	 * @return the attached token descriptor or nullptr if the given token is
+	 * not found.
+	 */
+	TokenTypeId hasToken(const std::string &token) const noexcept;
+
+	/**
+	 * Returns a reference at the root node to be used for traversing the token
+	 * tree.
+	 *
+	 * @return a reference at the root node.
+	 */
+	const Node *getRoot() const noexcept { return &root; }
+};
+}
+
+#endif /* _OUSIA_TOKEN_TRIE_HPP_ */
+
diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp
new file mode 100644
index 0000000..1fac25a
--- /dev/null
+++ b/src/core/parser/utils/Tokenizer.cpp
@@ -0,0 +1,381 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <memory>
+#include <vector>
+
+#include <core/common/CharReader.hpp>
+#include <core/common/Exceptions.hpp>
+#include <core/common/Utils.hpp>
+#include <core/common/WhitespaceHandler.hpp>
+
+#include "DynamicTokenizer.hpp"
+
+namespace ousia {
+
+namespace {
+
+/* Internal class TokenMatch */
+
+/**
+ * Contains information about a matching token.
+ */
+struct TokenMatch {
+	/**
+	 * Token that was matched.
+	 */
+	DynamicToken token;
+
+	/**
+	 * Current length of the data within the text handler. The text buffer needs
+	 * to be trimmed to this length if this token matches.
+	 */
+	size_t textLength;
+
+	/**
+	 * End location of the current text handler. This location needs to be used
+	 * for the text token that is emitted before the actual token.
+	 */
+	size_t textEnd;
+
+	/**
+	 * Constructor of the TokenMatch class.
+	 */
+	TokenMatch() : textLength(0), textEnd(0) {}
+
+	/**
+	 * Returns true if this TokenMatch instance actually represents a match.
+	 */
+	bool hasMatch() { return token.type != EmptyToken; }
+};
+
+/* Internal class TokenLookup */
+
+/**
+ * The TokenLookup class is used to represent a thread in a running token
+ * lookup.
+ */
+class TokenLookup {
+private:
+	/**
+	 * Current node within the token trie.
+	 */
+	TokenTrie::Node const *node;
+
+	/**
+	 * Start offset within the source file.
+	 */
+	size_t start;
+
+	/**
+	 * Current length of the data within the text handler. The text buffer needs
+	 * to be trimmed to this length if this token matches.
+	 */
+	size_t textLength;
+
+	/**
+	 * End location of the current text handler. This location needs to be used
+	 * for the text token that is emitted before the actual token.
+	 */
+	size_t textEnd;
+
+public:
+	/**
+	 * Constructor of the TokenLookup class.
+	 *
+	 * @param node is the current node.
+	 * @param start is the start position.
+	 * @param textLength is the text buffer length of the previous text token.
+	 * @param textEnd is the current end location of the previous text token.
+	 */
+	TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength,
+	            size_t textEnd)
+	    : node(node), start(start), textLength(textLength), textEnd(textEnd)
+	{
+	}
+
+	/**
+	 * Tries to extend the current path in the token trie with the given
+	 * character. If a complete token is matched, stores this match in the
+	 * tokens list (in case it is longer than any previous token).
+	 *
+	 * @param c is the character that should be appended to the current prefix.
+	 * @param lookups is a list to which new TokeLookup instances are added --
+	 * which could potentially be expanded in the next iteration.
+	 * @param match is the DynamicToken instance to which the matching token
+	 * should be written.
+	 * @param tokens is a reference at the internal token list of the
+	 * DynamicTokenizer.
+	 * @param end is the end byte offset of the current character.
+	 * @param sourceId is the source if of this file.
+	 */
+	void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
+	             const std::vector<std::string> &tokens, SourceOffset end,
+	             SourceId sourceId)
+	{
+		// Check whether we can continue the current token path with the given
+		// character without visiting an already visited node
+		auto it = node->children.find(c);
+		if (it == node->children.end()) {
+			return;
+		}
+
+		// Check whether the new node represents a complete token a whether it
+		// is longer than the current token. If yes, replace the current token.
+		node = it->second.get();
+		if (node->type != EmptyToken) {
+			const std::string &str = tokens[node->type];
+			size_t len = str.size();
+			if (len > match.token.content.size()) {
+				match.token =
+				    DynamicToken{node->type, str, {sourceId, start, end}};
+				match.textLength = textLength;
+				match.textEnd = textEnd;
+			}
+		}
+
+		// If this state can possibly be advanced, store it in the states list.
+		if (!node->children.empty()) {
+			lookups.emplace_back(*this);
+		}
+	}
+};
+
+/**
+ * Transforms the given token into a text token containing the extracted
+ * text.
+ *
+ * @param handler is the WhitespaceHandler containing the collected data.
+ * @param token is the output token to which the text should be written.
+ * @param sourceId is the source id of the underlying file.
+ */
+static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match,
+                           SourceId sourceId)
+{
+	if (match.hasMatch()) {
+		match.token.content =
+		    std::string{handler.textBuf.data(), match.textLength};
+		match.token.location =
+		    SourceLocation{sourceId, handler.textStart, match.textEnd};
+	} else {
+		match.token.content = handler.toString();
+		match.token.location =
+		    SourceLocation{sourceId, handler.textStart, handler.textEnd};
+	}
+	match.token.type = TextToken;
+}
+}
+
+/* Class DynamicTokenizer */
+
+DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode)
+    : whitespaceMode(whitespaceMode), nextTokenTypeId(0)
+{
+}
+
+template <typename TextHandler, bool read>
+bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
+{
+	// If we're in the read mode, reset the char reader peek position to the
+	// current read position
+	if (read) {
+		reader.resetPeek();
+	}
+
+	// Prepare the lookups in the token trie
+	const TokenTrie::Node *root = trie.getRoot();
+	TokenMatch match;
+	std::vector<TokenLookup> lookups;
+	std::vector<TokenLookup> nextLookups;
+
+	// Instantiate the text handler
+	TextHandler textHandler;
+
+	// Peek characters from the reader and try to advance the current token tree
+	// cursor
+	char c;
+	size_t charStart = reader.getPeekOffset();
+	const SourceId sourceId = reader.getSourceId();
+	while (reader.peek(c)) {
+		const size_t charEnd = reader.getPeekOffset();
+		const size_t textLength = textHandler.textBuf.size();
+		const size_t textEnd = textHandler.textEnd;
+
+		// If we do not have a match yet, start a new lookup from the root
+		if (!match.hasMatch()) {
+			TokenLookup{root, charStart, textLength, textEnd}.advance(
+			    c, nextLookups, match, tokens, charEnd, sourceId);
+		}
+
+		// Try to advance all other lookups with the new character
+		for (TokenLookup &lookup : lookups) {
+			lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId);
+		}
+
+		// We have found a token and there are no more states to advance or the
+		// text handler has found something -- abort to return the new token
+		if (match.hasMatch()) {
+			if ((nextLookups.empty() || textHandler.hasText())) {
+				break;
+			}
+		} else {
+			// Record all incomming characters
+			textHandler.append(c, charStart, charEnd);
+		}
+
+		// Swap the lookups and the nextLookups list
+		lookups = std::move(nextLookups);
+		nextLookups.clear();
+
+		// Advance the offset
+		charStart = charEnd;
+	}
+
+	// If we found text, emit that text
+	if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) {
+		buildTextToken(textHandler, match, sourceId);
+	}
+
+	// Move the read/peek cursor to the end of the token, abort if an error
+	// happens while doing so
+	if (match.hasMatch()) {
+		// Make sure we have a valid location
+		if (match.token.location.getEnd() == InvalidSourceOffset) {
+			throw OusiaException{"Token end position offset out of range"};
+		}
+
+		// Seek to the end of the current token
+		const size_t end = match.token.location.getEnd();
+		if (read) {
+			reader.seek(end);
+		} else {
+			reader.seekPeekCursor(end);
+		}
+		token = match.token;
+	} else {
+		token = DynamicToken{};
+	}
+	return match.hasMatch();
+}
+
+bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token)
+{
+	switch (whitespaceMode) {
+		case WhitespaceMode::PRESERVE:
+			return next<PreservingWhitespaceHandler, true>(reader, token);
+		case WhitespaceMode::TRIM:
+			return next<TrimmingWhitespaceHandler, true>(reader, token);
+		case WhitespaceMode::COLLAPSE:
+			return next<CollapsingWhitespaceHandler, true>(reader, token);
+	}
+	return false;
+}
+
+bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token)
+{
+	switch (whitespaceMode) {
+		case WhitespaceMode::PRESERVE:
+			return next<PreservingWhitespaceHandler, false>(reader, token);
+		case WhitespaceMode::TRIM:
+			return next<TrimmingWhitespaceHandler, false>(reader, token);
+		case WhitespaceMode::COLLAPSE:
+			return next<CollapsingWhitespaceHandler, false>(reader, token);
+	}
+	return false;
+}
+
+TokenTypeId DynamicTokenizer::registerToken(const std::string &token)
+{
+	// Abort if an empty token should be registered
+	if (token.empty()) {
+		return EmptyToken;
+	}
+
+	// Search for a new slot in the tokens list
+	TokenTypeId type = EmptyToken;
+	for (size_t i = nextTokenTypeId; i < tokens.size(); i++) {
+		if (tokens[i].empty()) {
+			tokens[i] = token;
+			type = i;
+			break;
+		}
+	}
+
+	// No existing slot was found, add a new one -- make sure we do not
+	// override the special token type handles
+	if (type == EmptyToken) {
+		type = tokens.size();
+		if (type == TextToken || type == EmptyToken) {
+			throw OusiaException{"Token type ids depleted!"};
+		}
+		tokens.emplace_back(token);
+	}
+	nextTokenTypeId = type + 1;
+
+	// Try to register the token in the trie -- if this fails, remove it
+	// from the tokens list
+	if (!trie.registerToken(token, type)) {
+		tokens[type] = std::string{};
+		nextTokenTypeId = type;
+		return EmptyToken;
+	}
+	return type;
+}
+
+bool DynamicTokenizer::unregisterToken(TokenTypeId type)
+{
+	// Unregister the token from the trie, abort if an invalid type is given
+	if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
+		tokens[type] = std::string{};
+		nextTokenTypeId = type;
+		return true;
+	}
+	return false;
+}
+
+std::string DynamicTokenizer::getTokenString(TokenTypeId type)
+{
+	if (type < tokens.size()) {
+		return tokens[type];
+	}
+	return std::string{};
+}
+
+void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode)
+{
+	whitespaceMode = mode;
+}
+
+WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; }
+
+/* Explicitly instantiate all possible instantiations of the "next" member
+   function */
+template bool DynamicTokenizer::next<PreservingWhitespaceHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<PreservingWhitespaceHandler, true>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, true>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, true>(
+    CharReader &reader, DynamicToken &token);
+}
+
diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp
new file mode 100644
index 0000000..3e5aeb3
--- /dev/null
+++ b/src/core/parser/utils/Tokenizer.hpp
@@ -0,0 +1,231 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file DynamicTokenizer.hpp
+ *
+ * Tokenizer that can be reconfigured at runtime used for parsing the plain
+ * text format.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_
+#define _OUSIA_DYNAMIC_TOKENIZER_HPP_
+
+#include <set>
+#include <string>
+#include <vector>
+
+#include <core/common/Location.hpp>
+#include <core/common/Whitespace.hpp>
+
+#include "TokenTrie.hpp"
+
+namespace ousia {
+
+// Forward declarations
+class CharReader;
+
+/**
+ * The DynamicToken structure describes a token discovered by the Tokenizer.
+ */
+struct DynamicToken {
+	/**
+	 * Id of the type of this token.
+	 */
+	TokenTypeId type;
+
+	/**
+	 * String that was matched.
+	 */
+	std::string content;
+
+	/**
+	 * Location from which the string was extracted.
+	 */
+	SourceLocation location;
+
+	/**
+	 * Default constructor.
+	 */
+	DynamicToken() : type(EmptyToken) {}
+
+	/**
+	 * Constructor of the DynamicToken struct.
+	 *
+	 * @param id represents the token type.
+	 * @param content is the string content that has been extracted.
+	 * @param location is the location of the extracted string content in the
+	 * source file.
+	 */
+	DynamicToken(TokenTypeId type, const std::string &content,
+	             SourceLocation location)
+	    : type(type), content(content), location(location)
+	{
+	}
+
+	/**
+	 * Constructor of the DynamicToken struct, only initializes the token type
+	 *
+	 * @param type is the id corresponding to the type of the token.
+	 */
+	DynamicToken(TokenTypeId type) : type(type) {}
+
+	/**
+	 * The getLocation function allows the tokens to be directly passed as
+	 * parameter to Logger or LoggableException instances.
+	 *
+	 * @return a reference at the location field
+	 */
+	const SourceLocation &getLocation() const { return location; }
+};
+
+/**
+ * The DynamicTokenizer is used to extract tokens and chunks of text from a
+ * CharReader. It allows to register and unregister tokens while parsing and
+ * to modify the handling of whitespace characters. Note that the
+ * DynamicTokenizer always tries to extract the longest possible token from the
+ * tokenizer.
+ */
+class DynamicTokenizer {
+private:
+	/**
+	 * Internally used token trie. This object holds all registered tokens.
+	 */
+	TokenTrie trie;
+
+	/**
+	 * Flag defining whether whitespaces should be preserved or not.
+	 */
+	WhitespaceMode whitespaceMode;
+
+	/**
+	 * Vector containing all registered token types.
+	 */
+	std::vector<std::string> tokens;
+
+	/**
+	 * Next index in the tokens list where to search for a new token id.
+	 */
+	size_t nextTokenTypeId;
+
+	/**
+	 * Templated function used internally to read the current token. The
+	 * function is templated in order to force code generation for all six
+	 * combiations of whitespace modes and reading/peeking.
+	 *
+	 * @tparam TextHandler is the type to be used for the textHandler instance.
+	 * @tparam read specifies whether the function should start from and advance
+	 * the read pointer of the char reader.
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
+	 * @param token is the token structure into which the token information
+	 * should be written.
+	 * @return false if the end of the stream has been reached, true otherwise.
+	 */
+	template <typename TextHandler, bool read>
+	bool next(CharReader &reader, DynamicToken &token);
+
+public:
+	/**
+	 * Constructor of the DynamicTokenizer class.
+	 *
+	 * @param whitespaceMode specifies how whitespace should be handled.
+	 */
+	DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
+
+	/**
+	 * Registers the given string as a token. Returns a const pointer at a
+	 * TokenDescriptor that will be used to reference the newly created token.
+	 *
+	 * @param token is the token string that should be registered.
+	 * @return a unique identifier for the registered token or EmptyToken if
+	 * an error occured.
+	 */
+	TokenTypeId registerToken(const std::string &token);
+
+	/**
+	 * Unregisters the token belonging to the given TokenTypeId.
+	 *
+	 * @param type is the token type that should be unregistered. The
+	 *TokenTypeId
+	 * must have been returned by registerToken.
+	 * @return true if the operation was successful, false otherwise (e.g.
+	 * because the given TokenDescriptor was already unregistered).
+	 */
+	bool unregisterToken(TokenTypeId type);
+
+	/**
+	 * Returns the token that was registered under the given TokenTypeId id or
+	 *an
+	 * empty string if an invalid TokenTypeId id is given.
+	 *
+	 * @param type is the TokenTypeId id for which the corresponding token
+	 *string
+	 * should be returned.
+	 * @return the registered token string or an empty string if the given type
+	 * was invalid.
+	 */
+	std::string getTokenString(TokenTypeId type);
+
+	/**
+	 * Sets the whitespace mode.
+	 *
+	 * @param whitespaceMode defines how whitespace should be treated in text
+	 * tokens.
+	 */
+	void setWhitespaceMode(WhitespaceMode mode);
+
+	/**
+	 * Returns the current value of the whitespace mode.
+	 *
+	 * @return the whitespace mode.
+	 */
+	WhitespaceMode getWhitespaceMode();
+
+	/**
+	 * Reads a new token from the CharReader and stores it in the given
+	 * DynamicToken instance.
+	 *
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
+	 * @param token is a reference at the token instance into which the Token
+	 * information should be written.
+	 * @return true if a token could be read, false if the end of the stream
+	 * has been reached.
+	 */
+	bool read(CharReader &reader, DynamicToken &token);
+
+	/**
+	 * The peek method does not advance the read position of the char reader,
+	 * but reads the next token from the current char reader peek position.
+	 *
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
+	 * @param token is a reference at the token instance into which the Token
+	 * information should be written.
+	 * @return true if a token could be read, false if the end of the stream
+	 * has been reached.
+	 */
+	bool peek(CharReader &reader, DynamicToken &token);
+};
+}
+
+#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */
+
diff --git a/src/formats/osdm/DynamicTokenizer.cpp b/src/formats/osdm/DynamicTokenizer.cpp
deleted file mode 100644
index 1fac25a..0000000
--- a/src/formats/osdm/DynamicTokenizer.cpp
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <memory>
-#include <vector>
-
-#include <core/common/CharReader.hpp>
-#include <core/common/Exceptions.hpp>
-#include <core/common/Utils.hpp>
-#include <core/common/WhitespaceHandler.hpp>
-
-#include "DynamicTokenizer.hpp"
-
-namespace ousia {
-
-namespace {
-
-/* Internal class TokenMatch */
-
-/**
- * Contains information about a matching token.
- */
-struct TokenMatch {
-	/**
-	 * Token that was matched.
-	 */
-	DynamicToken token;
-
-	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
-	 */
-	size_t textLength;
-
-	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
-	 */
-	size_t textEnd;
-
-	/**
-	 * Constructor of the TokenMatch class.
-	 */
-	TokenMatch() : textLength(0), textEnd(0) {}
-
-	/**
-	 * Returns true if this TokenMatch instance actually represents a match.
-	 */
-	bool hasMatch() { return token.type != EmptyToken; }
-};
-
-/* Internal class TokenLookup */
-
-/**
- * The TokenLookup class is used to represent a thread in a running token
- * lookup.
- */
-class TokenLookup {
-private:
-	/**
-	 * Current node within the token trie.
-	 */
-	TokenTrie::Node const *node;
-
-	/**
-	 * Start offset within the source file.
-	 */
-	size_t start;
-
-	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
-	 */
-	size_t textLength;
-
-	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
-	 */
-	size_t textEnd;
-
-public:
-	/**
-	 * Constructor of the TokenLookup class.
-	 *
-	 * @param node is the current node.
-	 * @param start is the start position.
-	 * @param textLength is the text buffer length of the previous text token.
-	 * @param textEnd is the current end location of the previous text token.
-	 */
-	TokenLookup(const TokenTrie::Node *node, size_t start, size_t textLength,
-	            size_t textEnd)
-	    : node(node), start(start), textLength(textLength), textEnd(textEnd)
-	{
-	}
-
-	/**
-	 * Tries to extend the current path in the token trie with the given
-	 * character. If a complete token is matched, stores this match in the
-	 * tokens list (in case it is longer than any previous token).
-	 *
-	 * @param c is the character that should be appended to the current prefix.
-	 * @param lookups is a list to which new TokeLookup instances are added --
-	 * which could potentially be expanded in the next iteration.
-	 * @param match is the DynamicToken instance to which the matching token
-	 * should be written.
-	 * @param tokens is a reference at the internal token list of the
-	 * DynamicTokenizer.
-	 * @param end is the end byte offset of the current character.
-	 * @param sourceId is the source if of this file.
-	 */
-	void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
-	             const std::vector<std::string> &tokens, SourceOffset end,
-	             SourceId sourceId)
-	{
-		// Check whether we can continue the current token path with the given
-		// character without visiting an already visited node
-		auto it = node->children.find(c);
-		if (it == node->children.end()) {
-			return;
-		}
-
-		// Check whether the new node represents a complete token a whether it
-		// is longer than the current token. If yes, replace the current token.
-		node = it->second.get();
-		if (node->type != EmptyToken) {
-			const std::string &str = tokens[node->type];
-			size_t len = str.size();
-			if (len > match.token.content.size()) {
-				match.token =
-				    DynamicToken{node->type, str, {sourceId, start, end}};
-				match.textLength = textLength;
-				match.textEnd = textEnd;
-			}
-		}
-
-		// If this state can possibly be advanced, store it in the states list.
-		if (!node->children.empty()) {
-			lookups.emplace_back(*this);
-		}
-	}
-};
-
-/**
- * Transforms the given token into a text token containing the extracted
- * text.
- *
- * @param handler is the WhitespaceHandler containing the collected data.
- * @param token is the output token to which the text should be written.
- * @param sourceId is the source id of the underlying file.
- */
-static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match,
-                           SourceId sourceId)
-{
-	if (match.hasMatch()) {
-		match.token.content =
-		    std::string{handler.textBuf.data(), match.textLength};
-		match.token.location =
-		    SourceLocation{sourceId, handler.textStart, match.textEnd};
-	} else {
-		match.token.content = handler.toString();
-		match.token.location =
-		    SourceLocation{sourceId, handler.textStart, handler.textEnd};
-	}
-	match.token.type = TextToken;
-}
-}
-
-/* Class DynamicTokenizer */
-
-DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode)
-    : whitespaceMode(whitespaceMode), nextTokenTypeId(0)
-{
-}
-
-template <typename TextHandler, bool read>
-bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
-{
-	// If we're in the read mode, reset the char reader peek position to the
-	// current read position
-	if (read) {
-		reader.resetPeek();
-	}
-
-	// Prepare the lookups in the token trie
-	const TokenTrie::Node *root = trie.getRoot();
-	TokenMatch match;
-	std::vector<TokenLookup> lookups;
-	std::vector<TokenLookup> nextLookups;
-
-	// Instantiate the text handler
-	TextHandler textHandler;
-
-	// Peek characters from the reader and try to advance the current token tree
-	// cursor
-	char c;
-	size_t charStart = reader.getPeekOffset();
-	const SourceId sourceId = reader.getSourceId();
-	while (reader.peek(c)) {
-		const size_t charEnd = reader.getPeekOffset();
-		const size_t textLength = textHandler.textBuf.size();
-		const size_t textEnd = textHandler.textEnd;
-
-		// If we do not have a match yet, start a new lookup from the root
-		if (!match.hasMatch()) {
-			TokenLookup{root, charStart, textLength, textEnd}.advance(
-			    c, nextLookups, match, tokens, charEnd, sourceId);
-		}
-
-		// Try to advance all other lookups with the new character
-		for (TokenLookup &lookup : lookups) {
-			lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId);
-		}
-
-		// We have found a token and there are no more states to advance or the
-		// text handler has found something -- abort to return the new token
-		if (match.hasMatch()) {
-			if ((nextLookups.empty() || textHandler.hasText())) {
-				break;
-			}
-		} else {
-			// Record all incomming characters
-			textHandler.append(c, charStart, charEnd);
-		}
-
-		// Swap the lookups and the nextLookups list
-		lookups = std::move(nextLookups);
-		nextLookups.clear();
-
-		// Advance the offset
-		charStart = charEnd;
-	}
-
-	// If we found text, emit that text
-	if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) {
-		buildTextToken(textHandler, match, sourceId);
-	}
-
-	// Move the read/peek cursor to the end of the token, abort if an error
-	// happens while doing so
-	if (match.hasMatch()) {
-		// Make sure we have a valid location
-		if (match.token.location.getEnd() == InvalidSourceOffset) {
-			throw OusiaException{"Token end position offset out of range"};
-		}
-
-		// Seek to the end of the current token
-		const size_t end = match.token.location.getEnd();
-		if (read) {
-			reader.seek(end);
-		} else {
-			reader.seekPeekCursor(end);
-		}
-		token = match.token;
-	} else {
-		token = DynamicToken{};
-	}
-	return match.hasMatch();
-}
-
-bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token)
-{
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingWhitespaceHandler, true>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingWhitespaceHandler, true>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingWhitespaceHandler, true>(reader, token);
-	}
-	return false;
-}
-
-bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token)
-{
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingWhitespaceHandler, false>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingWhitespaceHandler, false>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingWhitespaceHandler, false>(reader, token);
-	}
-	return false;
-}
-
-TokenTypeId DynamicTokenizer::registerToken(const std::string &token)
-{
-	// Abort if an empty token should be registered
-	if (token.empty()) {
-		return EmptyToken;
-	}
-
-	// Search for a new slot in the tokens list
-	TokenTypeId type = EmptyToken;
-	for (size_t i = nextTokenTypeId; i < tokens.size(); i++) {
-		if (tokens[i].empty()) {
-			tokens[i] = token;
-			type = i;
-			break;
-		}
-	}
-
-	// No existing slot was found, add a new one -- make sure we do not
-	// override the special token type handles
-	if (type == EmptyToken) {
-		type = tokens.size();
-		if (type == TextToken || type == EmptyToken) {
-			throw OusiaException{"Token type ids depleted!"};
-		}
-		tokens.emplace_back(token);
-	}
-	nextTokenTypeId = type + 1;
-
-	// Try to register the token in the trie -- if this fails, remove it
-	// from the tokens list
-	if (!trie.registerToken(token, type)) {
-		tokens[type] = std::string{};
-		nextTokenTypeId = type;
-		return EmptyToken;
-	}
-	return type;
-}
-
-bool DynamicTokenizer::unregisterToken(TokenTypeId type)
-{
-	// Unregister the token from the trie, abort if an invalid type is given
-	if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
-		tokens[type] = std::string{};
-		nextTokenTypeId = type;
-		return true;
-	}
-	return false;
-}
-
-std::string DynamicTokenizer::getTokenString(TokenTypeId type)
-{
-	if (type < tokens.size()) {
-		return tokens[type];
-	}
-	return std::string{};
-}
-
-void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode)
-{
-	whitespaceMode = mode;
-}
-
-WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; }
-
-/* Explicitly instantiate all possible instantiations of the "next" member
-   function */
-template bool DynamicTokenizer::next<PreservingWhitespaceHandler, false>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, false>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, false>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<PreservingWhitespaceHandler, true>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, true>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, true>(
-    CharReader &reader, DynamicToken &token);
-}
-
diff --git a/src/formats/osdm/DynamicTokenizer.hpp b/src/formats/osdm/DynamicTokenizer.hpp
deleted file mode 100644
index 3e5aeb3..0000000
--- a/src/formats/osdm/DynamicTokenizer.hpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file DynamicTokenizer.hpp
- *
- * Tokenizer that can be reconfigured at runtime used for parsing the plain
- * text format.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_
-#define _OUSIA_DYNAMIC_TOKENIZER_HPP_
-
-#include <set>
-#include <string>
-#include <vector>
-
-#include <core/common/Location.hpp>
-#include <core/common/Whitespace.hpp>
-
-#include "TokenTrie.hpp"
-
-namespace ousia {
-
-// Forward declarations
-class CharReader;
-
-/**
- * The DynamicToken structure describes a token discovered by the Tokenizer.
- */
-struct DynamicToken {
-	/**
-	 * Id of the type of this token.
-	 */
-	TokenTypeId type;
-
-	/**
-	 * String that was matched.
-	 */
-	std::string content;
-
-	/**
-	 * Location from which the string was extracted.
-	 */
-	SourceLocation location;
-
-	/**
-	 * Default constructor.
-	 */
-	DynamicToken() : type(EmptyToken) {}
-
-	/**
-	 * Constructor of the DynamicToken struct.
-	 *
-	 * @param id represents the token type.
-	 * @param content is the string content that has been extracted.
-	 * @param location is the location of the extracted string content in the
-	 * source file.
-	 */
-	DynamicToken(TokenTypeId type, const std::string &content,
-	             SourceLocation location)
-	    : type(type), content(content), location(location)
-	{
-	}
-
-	/**
-	 * Constructor of the DynamicToken struct, only initializes the token type
-	 *
-	 * @param type is the id corresponding to the type of the token.
-	 */
-	DynamicToken(TokenTypeId type) : type(type) {}
-
-	/**
-	 * The getLocation function allows the tokens to be directly passed as
-	 * parameter to Logger or LoggableException instances.
-	 *
-	 * @return a reference at the location field
-	 */
-	const SourceLocation &getLocation() const { return location; }
-};
-
-/**
- * The DynamicTokenizer is used to extract tokens and chunks of text from a
- * CharReader. It allows to register and unregister tokens while parsing and
- * to modify the handling of whitespace characters. Note that the
- * DynamicTokenizer always tries to extract the longest possible token from the
- * tokenizer.
- */
-class DynamicTokenizer {
-private:
-	/**
-	 * Internally used token trie. This object holds all registered tokens.
-	 */
-	TokenTrie trie;
-
-	/**
-	 * Flag defining whether whitespaces should be preserved or not.
-	 */
-	WhitespaceMode whitespaceMode;
-
-	/**
-	 * Vector containing all registered token types.
-	 */
-	std::vector<std::string> tokens;
-
-	/**
-	 * Next index in the tokens list where to search for a new token id.
-	 */
-	size_t nextTokenTypeId;
-
-	/**
-	 * Templated function used internally to read the current token. The
-	 * function is templated in order to force code generation for all six
-	 * combiations of whitespace modes and reading/peeking.
-	 *
-	 * @tparam TextHandler is the type to be used for the textHandler instance.
-	 * @tparam read specifies whether the function should start from and advance
-	 * the read pointer of the char reader.
-	 * @param reader is the CharReader instance from which the data should be
-	 * read.
-	 * @param token is the token structure into which the token information
-	 * should be written.
-	 * @return false if the end of the stream has been reached, true otherwise.
-	 */
-	template <typename TextHandler, bool read>
-	bool next(CharReader &reader, DynamicToken &token);
-
-public:
-	/**
-	 * Constructor of the DynamicTokenizer class.
-	 *
-	 * @param whitespaceMode specifies how whitespace should be handled.
-	 */
-	DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
-
-	/**
-	 * Registers the given string as a token. Returns a const pointer at a
-	 * TokenDescriptor that will be used to reference the newly created token.
-	 *
-	 * @param token is the token string that should be registered.
-	 * @return a unique identifier for the registered token or EmptyToken if
-	 * an error occured.
-	 */
-	TokenTypeId registerToken(const std::string &token);
-
-	/**
-	 * Unregisters the token belonging to the given TokenTypeId.
-	 *
-	 * @param type is the token type that should be unregistered. The
-	 *TokenTypeId
-	 * must have been returned by registerToken.
-	 * @return true if the operation was successful, false otherwise (e.g.
-	 * because the given TokenDescriptor was already unregistered).
-	 */
-	bool unregisterToken(TokenTypeId type);
-
-	/**
-	 * Returns the token that was registered under the given TokenTypeId id or
-	 *an
-	 * empty string if an invalid TokenTypeId id is given.
-	 *
-	 * @param type is the TokenTypeId id for which the corresponding token
-	 *string
-	 * should be returned.
-	 * @return the registered token string or an empty string if the given type
-	 * was invalid.
-	 */
-	std::string getTokenString(TokenTypeId type);
-
-	/**
-	 * Sets the whitespace mode.
-	 *
-	 * @param whitespaceMode defines how whitespace should be treated in text
-	 * tokens.
-	 */
-	void setWhitespaceMode(WhitespaceMode mode);
-
-	/**
-	 * Returns the current value of the whitespace mode.
-	 *
-	 * @return the whitespace mode.
-	 */
-	WhitespaceMode getWhitespaceMode();
-
-	/**
-	 * Reads a new token from the CharReader and stores it in the given
-	 * DynamicToken instance.
-	 *
-	 * @param reader is the CharReader instance from which the data should be
-	 * read.
-	 * @param token is a reference at the token instance into which the Token
-	 * information should be written.
-	 * @return true if a token could be read, false if the end of the stream
-	 * has been reached.
-	 */
-	bool read(CharReader &reader, DynamicToken &token);
-
-	/**
-	 * The peek method does not advance the read position of the char reader,
-	 * but reads the next token from the current char reader peek position.
-	 *
-	 * @param reader is the CharReader instance from which the data should be
-	 * read.
-	 * @param token is a reference at the token instance into which the Token
-	 * information should be written.
-	 * @return true if a token could be read, false if the end of the stream
-	 * has been reached.
-	 */
-	bool peek(CharReader &reader, DynamicToken &token);
-};
-}
-
-#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */
-
diff --git a/src/formats/osdm/TokenTrie.cpp b/src/formats/osdm/TokenTrie.cpp
deleted file mode 100644
index 4a0430b..0000000
--- a/src/formats/osdm/TokenTrie.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "TokenTrie.hpp"
-
-namespace ousia {
-
-/* Class DynamicTokenTree::Node */
-
-TokenTrie::Node::Node() : type(EmptyToken) {}
-
-/* Class DynamicTokenTree */
-
-bool TokenTrie::registerToken(const std::string &token,
-                              TokenTypeId type) noexcept
-{
-	// Abort if the token is empty -- this would taint the root node
-	if (token.empty()) {
-		return false;
-	}
-
-	// Iterate over each character in the given string and insert them as
-	// (new) nodes
-	Node *node = &root;
-	for (size_t i = 0; i < token.size(); i++) {
-		// Insert a new node if this one does not exist
-		const char c = token[i];
-		auto it = node->children.find(c);
-		if (it == node->children.end()) {
-			it = node->children.emplace(c, std::make_shared<Node>()).first;
-		}
-		node = it->second.get();
-	}
-
-	// If the resulting node already has a type set, we're screwed.
-	if (node->type != EmptyToken) {
-		return false;
-	}
-
-	// Otherwise just set the type to the given type.
-	node->type = type;
-	return true;
-}
-
-bool TokenTrie::unregisterToken(const std::string &token) noexcept
-{
-	// We cannot remove empty tokens as we need to access the fist character
-	// upfront
-	if (token.empty()) {
-		return false;
-	}
-
-	// First pass -- search the node in the path that can be deleted
-	Node *subtreeRoot = &root;
-	char subtreeKey = token[0];
-	Node *node = &root;
-	for (size_t i = 0; i < token.size(); i++) {
-		// Go to the next node, abort if the tree ends unexpectedly
-		auto it = node->children.find(token[i]);
-		if (it == node->children.end()) {
-			return false;
-		}
-
-		// Reset the subtree handler if this node has another type
-		node = it->second.get();
-		if ((node->type != EmptyToken || node->children.size() > 1) &&
-		    (i + 1 != token.size())) {
-			subtreeRoot = node;
-			subtreeKey = token[i + 1];
-		}
-	}
-
-	// If the node type is already EmptyToken, we cannot do anything here
-	if (node->type == EmptyToken) {
-		return false;
-	}
-
-	// If the target node has children, we cannot delete the subtree. Set the
-	// type to EmptyToken instead
-	if (!node->children.empty()) {
-		node->type = EmptyToken;
-		return true;
-	}
-
-	// If we end up here, we can safely delete the complete subtree
-	subtreeRoot->children.erase(subtreeKey);
-	return true;
-}
-
-TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept
-{
-	Node const *node = &root;
-	for (size_t i = 0; i < token.size(); i++) {
-		const char c = token[i];
-		auto it = node->children.find(c);
-		if (it == node->children.end()) {
-			return EmptyToken;
-		}
-		node = it->second.get();
-	}
-	return node->type;
-}
-}
-
diff --git a/src/formats/osdm/TokenTrie.hpp b/src/formats/osdm/TokenTrie.hpp
deleted file mode 100644
index 36c2ffa..0000000
--- a/src/formats/osdm/TokenTrie.hpp
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file TokenTrie.hpp
- *
- * Class representing a token trie that can be updated dynamically.
- *
- * @author Benjamin Paaßen (astoecke@techfak.uni-bielefeld.de)
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_TOKEN_TRIE_HPP_
-#define _OUSIA_TOKEN_TRIE_HPP_
-
-#include <cstdint>
-#include <memory>
-#include <limits>
-#include <unordered_map>
-
-namespace ousia {
-
-/**
- * The TokenTypeId is used to give each token type a unique id.
- */
-using TokenTypeId = uint32_t;
-
-/**
- * Token which is not a token.
- */
-constexpr TokenTypeId EmptyToken = std::numeric_limits<TokenTypeId>::max();
-
-/**
- * Token which represents a text token.
- */
-constexpr TokenTypeId TextToken = std::numeric_limits<TokenTypeId>::max() - 1;
-
-/**
- * The Tokenizer internally uses a TokenTrie to be efficiently able to identify
- * the longest consecutive token in the text. This is equivalent to a prefix
- * trie.
- *
- * A token trie is a construct that structures all special tokens a Tokenizer
- * recognizes. Consider the tokens "aab", "a" and "bac" numbered as one, two and
- * three. Then the token tree would look like this:
- *
- * \code{*.txt}
- *        ~ (0)
- *       /     \
- *      a (2)  b (0)
- *      |      |
- *      a (0)  a (0)
- *      |      |
- *      b (1)  c (0)
- * \endcode
- *
- * Where the number indicates the corresponding token descriptor identifier.
- */
-class TokenTrie {
-public:
-	/**
-	 * Structure used to build the node tree.
-	 */
-	struct Node {
-		/**
-		 * Type used for the child map.
-		 */
-		using ChildMap = std::unordered_map<char, std::shared_ptr<Node>>;
-
-		/**
-		 * Map from single characters at the corresponding child nodes.
-		 */
-		ChildMap children;
-
-		/**
-		 * Reference at the corresponding token descriptor. Set to nullptr if
-		 * no token is attached to this node.
-		 */
-		TokenTypeId type;
-
-		/**
-		 * Default constructor, initializes the descriptor with nullptr.
-		 */
-		Node();
-	};
-
-private:
-	/**
-	 * Root node of the internal token tree.
-	 */
-	Node root;
-
-public:
-	/**
-	 * Registers a token containing the given string. Returns false if the
-	 * token already exists, true otherwise.
-	 *
-	 * @param token is the character sequence that should be registered as
-	 * token.
-	 * @param type is the descriptor that should be set for this token.
-	 * @return true if the operation is successful, false otherwise.
-	 */
-	bool registerToken(const std::string &token, TokenTypeId type) noexcept;
-
-	/**
-	 * Unregisters the token from the token tree. Returns true if the token was
-	 * unregistered successfully, false otherwise.
-	 *
-	 * @param token is the character sequence that should be unregistered.
-	 * @return true if the operation was successful, false otherwise.
-	 */
-	bool unregisterToken(const std::string &token) noexcept;
-
-	/**
-	 * Returns true, if the given token exists within the TokenTree. This
-	 * function is mostly thought for debugging and unit testing.
-	 *
-	 * @param token is the character sequence that should be searched.
-	 * @return the attached token descriptor or nullptr if the given token is
-	 * not found.
-	 */
-	TokenTypeId hasToken(const std::string &token) const noexcept;
-
-	/**
-	 * Returns a reference at the root node to be used for traversing the token
-	 * tree.
-	 *
-	 * @return a reference at the root node.
-	 */
-	const Node *getRoot() const noexcept { return &root; }
-};
-}
-
-#endif /* _OUSIA_TOKEN_TRIE_HPP_ */
-
diff --git a/test/core/parser/utils/TokenTrieTest.cpp b/test/core/parser/utils/TokenTrieTest.cpp
new file mode 100644
index 0000000..aacd6c0
--- /dev/null
+++ b/test/core/parser/utils/TokenTrieTest.cpp
@@ -0,0 +1,92 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <formats/osdm/TokenTrie.hpp>
+
+namespace ousia {
+
+static const TokenTypeId t1 = 0;
+static const TokenTypeId t2 = 1;
+static const TokenTypeId t3 = 2;
+static const TokenTypeId t4 = 3;
+
+TEST(TokenTrie, registerToken)
+{
+	TokenTrie tree;
+
+	ASSERT_TRUE(tree.registerToken("a", t1));
+	ASSERT_TRUE(tree.registerToken("ab", t2));
+	ASSERT_TRUE(tree.registerToken("b", t3));
+	ASSERT_TRUE(tree.registerToken("hello", t4));
+
+	ASSERT_FALSE(tree.registerToken("", t1));
+	ASSERT_FALSE(tree.registerToken("a", t4));
+	ASSERT_FALSE(tree.registerToken("ab", t4));
+	ASSERT_FALSE(tree.registerToken("b", t4));
+	ASSERT_FALSE(tree.registerToken("hello", t4));
+
+	ASSERT_EQ(t1, tree.hasToken("a"));
+	ASSERT_EQ(t2, tree.hasToken("ab"));
+	ASSERT_EQ(t3, tree.hasToken("b"));
+	ASSERT_EQ(t4, tree.hasToken("hello"));
+	ASSERT_EQ(EmptyToken, tree.hasToken(""));
+	ASSERT_EQ(EmptyToken, tree.hasToken("abc"));
+}
+
+TEST(TokenTrie, unregisterToken)
+{
+	TokenTrie tree;
+
+	ASSERT_TRUE(tree.registerToken("a", t1));
+	ASSERT_FALSE(tree.registerToken("a", t4));
+
+	ASSERT_TRUE(tree.registerToken("ab", t2));
+	ASSERT_FALSE(tree.registerToken("ab", t4));
+
+	ASSERT_TRUE(tree.registerToken("b", t3));
+	ASSERT_FALSE(tree.registerToken("b", t4));
+
+	ASSERT_EQ(t1, tree.hasToken("a"));
+	ASSERT_EQ(t2, tree.hasToken("ab"));
+	ASSERT_EQ(t3, tree.hasToken("b"));
+
+	ASSERT_TRUE(tree.unregisterToken("a"));
+	ASSERT_FALSE(tree.unregisterToken("a"));
+
+	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
+	ASSERT_EQ(t2, tree.hasToken("ab"));
+	ASSERT_EQ(t3, tree.hasToken("b"));
+
+	ASSERT_TRUE(tree.unregisterToken("b"));
+	ASSERT_FALSE(tree.unregisterToken("b"));
+
+	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
+	ASSERT_EQ(t2, tree.hasToken("ab"));
+	ASSERT_EQ(EmptyToken, tree.hasToken("b"));
+
+	ASSERT_TRUE(tree.unregisterToken("ab"));
+	ASSERT_FALSE(tree.unregisterToken("ab"));
+
+	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
+	ASSERT_EQ(EmptyToken, tree.hasToken("ab"));
+	ASSERT_EQ(EmptyToken, tree.hasToken("b"));
+}
+}
+
diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp
new file mode 100644
index 0000000..c1f8785
--- /dev/null
+++ b/test/core/parser/utils/TokenizerTest.cpp
@@ -0,0 +1,415 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/common/CharReader.hpp>
+#include <formats/osdm/DynamicTokenizer.hpp>
+
+namespace ousia {
+
+TEST(DynamicTokenizer, tokenRegistration)
+{
+	DynamicTokenizer tokenizer;
+
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken(""));
+
+	ASSERT_EQ(0U, tokenizer.registerToken("a"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("a"));
+	ASSERT_EQ("a", tokenizer.getTokenString(0U));
+
+	ASSERT_EQ(1U, tokenizer.registerToken("b"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("b"));
+	ASSERT_EQ("b", tokenizer.getTokenString(1U));
+
+	ASSERT_EQ(2U, tokenizer.registerToken("c"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("c"));
+	ASSERT_EQ("c", tokenizer.getTokenString(2U));
+
+	ASSERT_TRUE(tokenizer.unregisterToken(1U));
+	ASSERT_FALSE(tokenizer.unregisterToken(1U));
+	ASSERT_EQ("", tokenizer.getTokenString(1U));
+
+	ASSERT_EQ(1U, tokenizer.registerToken("d"));
+	ASSERT_EQ(EmptyToken, tokenizer.registerToken("d"));
+	ASSERT_EQ("d", tokenizer.getTokenString(1U));
+}
+
+TEST(DynamicTokenizer, textTokenPreserveWhitespace)
+{
+	{
+		CharReader reader{" this \t is only a  \n\n test   text   "};
+		//                 012345 6789012345678 9 0123456789012345
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(36U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+
+	{
+		CharReader reader{"this \t is only a  \n\n test   text"};
+		//                 01234 5678901234567 8 9012345678901
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(32U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+}
+
+TEST(DynamicTokenizer, textTokenTrimWhitespace)
+{
+	{
+		CharReader reader{" this \t is only a  \n\n test   text   "};
+		//                 012345 6789012345678 9 0123456789012345
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(1U, loc.getStart());
+		ASSERT_EQ(33U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+
+	{
+		CharReader reader{"this \t is only a  \n\n test   text"};
+		//                 01234 5678901234567 8 9012345678901
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(32U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+}
+
+TEST(DynamicTokenizer, textTokenCollapseWhitespace)
+{
+	{
+		CharReader reader{" this \t is only a  \n\n test   text   "};
+		//                 012345 6789012345678 9 0123456789012345
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this is only a test text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(1U, loc.getStart());
+		ASSERT_EQ(33U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+
+	{
+		CharReader reader{"this \t is only a  \n\n test   text"};
+		//                 01234 5678901234567 8 9012345678901
+		//                 0          1           2         3
+		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
+
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("this is only a test text", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(32U, loc.getEnd());
+
+		ASSERT_FALSE(tokenizer.read(reader, token));
+	}
+}
+
+TEST(DynamicTokenizer, simpleReadToken)
+{
+	CharReader reader{"test1:test2"};
+	DynamicTokenizer tokenizer;
+
+	const TokenTypeId tid = tokenizer.registerToken(":");
+	ASSERT_EQ(0U, tid);
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test1", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(5U, loc.getEnd());
+
+		char c;
+		ASSERT_TRUE(reader.peek(c));
+		ASSERT_EQ(':', c);
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(tid, token.type);
+		ASSERT_EQ(":", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(5U, loc.getStart());
+		ASSERT_EQ(6U, loc.getEnd());
+
+		char c;
+		ASSERT_TRUE(reader.peek(c));
+		ASSERT_EQ('t', c);
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test2", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(6U, loc.getStart());
+		ASSERT_EQ(11U, loc.getEnd());
+
+		char c;
+		ASSERT_FALSE(reader.peek(c));
+	}
+}
+
+TEST(DynamicTokenizer, simplePeekToken)
+{
+	CharReader reader{"test1:test2"};
+	DynamicTokenizer tokenizer;
+
+	const TokenTypeId tid = tokenizer.registerToken(":");
+	ASSERT_EQ(0U, tid);
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.peek(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test1", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(5U, loc.getEnd());
+		ASSERT_EQ(0U, reader.getOffset());
+		ASSERT_EQ(5U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.peek(reader, token));
+
+		ASSERT_EQ(tid, token.type);
+		ASSERT_EQ(":", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(5U, loc.getStart());
+		ASSERT_EQ(6U, loc.getEnd());
+		ASSERT_EQ(0U, reader.getOffset());
+		ASSERT_EQ(6U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.peek(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test2", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(6U, loc.getStart());
+		ASSERT_EQ(11U, loc.getEnd());
+		ASSERT_EQ(0U, reader.getOffset());
+		ASSERT_EQ(11U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test1", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(0U, loc.getStart());
+		ASSERT_EQ(5U, loc.getEnd());
+		ASSERT_EQ(5U, reader.getOffset());
+		ASSERT_EQ(5U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(tid, token.type);
+		ASSERT_EQ(":", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(5U, loc.getStart());
+		ASSERT_EQ(6U, loc.getEnd());
+		ASSERT_EQ(6U, reader.getOffset());
+		ASSERT_EQ(6U, reader.getPeekOffset());
+	}
+
+	{
+		DynamicToken token;
+		ASSERT_TRUE(tokenizer.read(reader, token));
+
+		ASSERT_EQ(TextToken, token.type);
+		ASSERT_EQ("test2", token.content);
+
+		SourceLocation loc = token.location;
+		ASSERT_EQ(6U, loc.getStart());
+		ASSERT_EQ(11U, loc.getEnd());
+		ASSERT_EQ(11U, reader.getOffset());
+		ASSERT_EQ(11U, reader.getPeekOffset());
+	}
+}
+
+TEST(DynamicTokenizer, ambiguousTokens)
+{
+	CharReader reader{"abc"};
+	DynamicTokenizer tokenizer;
+
+	TokenTypeId t1 = tokenizer.registerToken("abd");
+	TokenTypeId t2 = tokenizer.registerToken("bc");
+
+	ASSERT_EQ(0U, t1);
+	ASSERT_EQ(1U, t2);
+
+	DynamicToken token;
+	ASSERT_TRUE(tokenizer.read(reader, token));
+
+	ASSERT_EQ(TextToken, token.type);
+	ASSERT_EQ("a", token.content);
+
+	SourceLocation loc = token.location;
+	ASSERT_EQ(0U, loc.getStart());
+	ASSERT_EQ(1U, loc.getEnd());
+
+	ASSERT_TRUE(tokenizer.read(reader, token));
+
+	ASSERT_EQ(t2, token.type);
+	ASSERT_EQ("bc", token.content);
+
+	loc = token.location;
+	ASSERT_EQ(1U, loc.getStart());
+	ASSERT_EQ(3U, loc.getEnd());
+
+	ASSERT_FALSE(tokenizer.read(reader, token));
+}
+
+TEST(DynamicTokenizer, commentTestWhitespacePreserve)
+{
+	CharReader reader{"Test/Test /* Block Comment */", 0};
+	//                 012345678901234567890123456789
+	//                 0        1         2
+	DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE);
+
+	const TokenTypeId t1 = tokenizer.registerToken("/");
+	const TokenTypeId t2 = tokenizer.registerToken("/*");
+	const TokenTypeId t3 = tokenizer.registerToken("*/");
+
+	std::vector<DynamicToken> expected = {
+	    {TextToken, "Test", SourceLocation{0, 0, 4}},
+	    {t1, "/", SourceLocation{0, 4, 5}},
+	    {TextToken, "Test ", SourceLocation{0, 5, 10}},
+	    {t2, "/*", SourceLocation{0, 10, 12}},
+	    {TextToken, " Block Comment ", SourceLocation{0, 12, 27}},
+	    {t3, "*/", SourceLocation{0, 27, 29}}};
+
+	DynamicToken t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.read(reader, t));
+		EXPECT_EQ(te.type, t.type);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.read(reader, t));
+}
+
+TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
+{
+	CharReader reader{"Test/Test /* Block Comment */", 0};
+	//                 012345678901234567890123456789
+	//                 0        1         2
+	DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE);
+
+	const TokenTypeId t1 = tokenizer.registerToken("/");
+	const TokenTypeId t2 = tokenizer.registerToken("/*");
+	const TokenTypeId t3 = tokenizer.registerToken("*/");
+
+	std::vector<DynamicToken> expected = {
+	    {TextToken, "Test", SourceLocation{0, 0, 4}},
+	    {t1, "/", SourceLocation{0, 4, 5}},
+	    {TextToken, "Test", SourceLocation{0, 5, 9}},
+	    {t2, "/*", SourceLocation{0, 10, 12}},
+	    {TextToken, "Block Comment", SourceLocation{0, 13, 26}},
+	    {t3, "*/", SourceLocation{0, 27, 29}}};
+
+	DynamicToken t;
+	for (auto &te : expected) {
+		EXPECT_TRUE(tokenizer.read(reader, t));
+		EXPECT_EQ(te.type, t.type);
+		EXPECT_EQ(te.content, t.content);
+		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
+		EXPECT_EQ(te.location.getStart(), t.location.getStart());
+		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
+	}
+	ASSERT_FALSE(tokenizer.read(reader, t));
+}
+
+}
+
diff --git a/test/formats/osdm/DynamicTokenizerTest.cpp b/test/formats/osdm/DynamicTokenizerTest.cpp
deleted file mode 100644
index c1f8785..0000000
--- a/test/formats/osdm/DynamicTokenizerTest.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <core/common/CharReader.hpp>
-#include <formats/osdm/DynamicTokenizer.hpp>
-
-namespace ousia {
-
-TEST(DynamicTokenizer, tokenRegistration)
-{
-	DynamicTokenizer tokenizer;
-
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken(""));
-
-	ASSERT_EQ(0U, tokenizer.registerToken("a"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("a"));
-	ASSERT_EQ("a", tokenizer.getTokenString(0U));
-
-	ASSERT_EQ(1U, tokenizer.registerToken("b"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("b"));
-	ASSERT_EQ("b", tokenizer.getTokenString(1U));
-
-	ASSERT_EQ(2U, tokenizer.registerToken("c"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("c"));
-	ASSERT_EQ("c", tokenizer.getTokenString(2U));
-
-	ASSERT_TRUE(tokenizer.unregisterToken(1U));
-	ASSERT_FALSE(tokenizer.unregisterToken(1U));
-	ASSERT_EQ("", tokenizer.getTokenString(1U));
-
-	ASSERT_EQ(1U, tokenizer.registerToken("d"));
-	ASSERT_EQ(EmptyToken, tokenizer.registerToken("d"));
-	ASSERT_EQ("d", tokenizer.getTokenString(1U));
-}
-
-TEST(DynamicTokenizer, textTokenPreserveWhitespace)
-{
-	{
-		CharReader reader{" this \t is only a  \n\n test   text   "};
-		//                 012345 6789012345678 9 0123456789012345
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ(" this \t is only a  \n\n test   text   ", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(36U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-
-	{
-		CharReader reader{"this \t is only a  \n\n test   text"};
-		//                 01234 5678901234567 8 9012345678901
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::PRESERVE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-}
-
-TEST(DynamicTokenizer, textTokenTrimWhitespace)
-{
-	{
-		CharReader reader{" this \t is only a  \n\n test   text   "};
-		//                 012345 6789012345678 9 0123456789012345
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(1U, loc.getStart());
-		ASSERT_EQ(33U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-
-	{
-		CharReader reader{"this \t is only a  \n\n test   text"};
-		//                 01234 5678901234567 8 9012345678901
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::TRIM};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this \t is only a  \n\n test   text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-}
-
-TEST(DynamicTokenizer, textTokenCollapseWhitespace)
-{
-	{
-		CharReader reader{" this \t is only a  \n\n test   text   "};
-		//                 012345 6789012345678 9 0123456789012345
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this is only a test text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(1U, loc.getStart());
-		ASSERT_EQ(33U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-
-	{
-		CharReader reader{"this \t is only a  \n\n test   text"};
-		//                 01234 5678901234567 8 9012345678901
-		//                 0          1           2         3
-		DynamicTokenizer tokenizer{WhitespaceMode::COLLAPSE};
-
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("this is only a test text", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(32U, loc.getEnd());
-
-		ASSERT_FALSE(tokenizer.read(reader, token));
-	}
-}
-
-TEST(DynamicTokenizer, simpleReadToken)
-{
-	CharReader reader{"test1:test2"};
-	DynamicTokenizer tokenizer;
-
-	const TokenTypeId tid = tokenizer.registerToken(":");
-	ASSERT_EQ(0U, tid);
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
-
-		char c;
-		ASSERT_TRUE(reader.peek(c));
-		ASSERT_EQ(':', c);
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(tid, token.type);
-		ASSERT_EQ(":", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(5U, loc.getStart());
-		ASSERT_EQ(6U, loc.getEnd());
-
-		char c;
-		ASSERT_TRUE(reader.peek(c));
-		ASSERT_EQ('t', c);
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
-
-		char c;
-		ASSERT_FALSE(reader.peek(c));
-	}
-}
-
-TEST(DynamicTokenizer, simplePeekToken)
-{
-	CharReader reader{"test1:test2"};
-	DynamicTokenizer tokenizer;
-
-	const TokenTypeId tid = tokenizer.registerToken(":");
-	ASSERT_EQ(0U, tid);
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
-		ASSERT_EQ(0U, reader.getOffset());
-		ASSERT_EQ(5U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(tid, token.type);
-		ASSERT_EQ(":", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(5U, loc.getStart());
-		ASSERT_EQ(6U, loc.getEnd());
-		ASSERT_EQ(0U, reader.getOffset());
-		ASSERT_EQ(6U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.peek(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
-		ASSERT_EQ(0U, reader.getOffset());
-		ASSERT_EQ(11U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test1", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(5U, loc.getEnd());
-		ASSERT_EQ(5U, reader.getOffset());
-		ASSERT_EQ(5U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(tid, token.type);
-		ASSERT_EQ(":", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(5U, loc.getStart());
-		ASSERT_EQ(6U, loc.getEnd());
-		ASSERT_EQ(6U, reader.getOffset());
-		ASSERT_EQ(6U, reader.getPeekOffset());
-	}
-
-	{
-		DynamicToken token;
-		ASSERT_TRUE(tokenizer.read(reader, token));
-
-		ASSERT_EQ(TextToken, token.type);
-		ASSERT_EQ("test2", token.content);
-
-		SourceLocation loc = token.location;
-		ASSERT_EQ(6U, loc.getStart());
-		ASSERT_EQ(11U, loc.getEnd());
-		ASSERT_EQ(11U, reader.getOffset());
-		ASSERT_EQ(11U, reader.getPeekOffset());
-	}
-}
-
-TEST(DynamicTokenizer, ambiguousTokens)
-{
-	CharReader reader{"abc"};
-	DynamicTokenizer tokenizer;
-
-	TokenTypeId t1 = tokenizer.registerToken("abd");
-	TokenTypeId t2 = tokenizer.registerToken("bc");
-
-	ASSERT_EQ(0U, t1);
-	ASSERT_EQ(1U, t2);
-
-	DynamicToken token;
-	ASSERT_TRUE(tokenizer.read(reader, token));
-
-	ASSERT_EQ(TextToken, token.type);
-	ASSERT_EQ("a", token.content);
-
-	SourceLocation loc = token.location;
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(1U, loc.getEnd());
-
-	ASSERT_TRUE(tokenizer.read(reader, token));
-
-	ASSERT_EQ(t2, token.type);
-	ASSERT_EQ("bc", token.content);
-
-	loc = token.location;
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(3U, loc.getEnd());
-
-	ASSERT_FALSE(tokenizer.read(reader, token));
-}
-
-TEST(DynamicTokenizer, commentTestWhitespacePreserve)
-{
-	CharReader reader{"Test/Test /* Block Comment */", 0};
-	//                 012345678901234567890123456789
-	//                 0        1         2
-	DynamicTokenizer tokenizer(WhitespaceMode::PRESERVE);
-
-	const TokenTypeId t1 = tokenizer.registerToken("/");
-	const TokenTypeId t2 = tokenizer.registerToken("/*");
-	const TokenTypeId t3 = tokenizer.registerToken("*/");
-
-	std::vector<DynamicToken> expected = {
-	    {TextToken, "Test", SourceLocation{0, 0, 4}},
-	    {t1, "/", SourceLocation{0, 4, 5}},
-	    {TextToken, "Test ", SourceLocation{0, 5, 10}},
-	    {t2, "/*", SourceLocation{0, 10, 12}},
-	    {TextToken, " Block Comment ", SourceLocation{0, 12, 27}},
-	    {t3, "*/", SourceLocation{0, 27, 29}}};
-
-	DynamicToken t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(reader, t));
-		EXPECT_EQ(te.type, t.type);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.read(reader, t));
-}
-
-TEST(DynamicTokenizer, commentTestWhitespaceCollapse)
-{
-	CharReader reader{"Test/Test /* Block Comment */", 0};
-	//                 012345678901234567890123456789
-	//                 0        1         2
-	DynamicTokenizer tokenizer(WhitespaceMode::COLLAPSE);
-
-	const TokenTypeId t1 = tokenizer.registerToken("/");
-	const TokenTypeId t2 = tokenizer.registerToken("/*");
-	const TokenTypeId t3 = tokenizer.registerToken("*/");
-
-	std::vector<DynamicToken> expected = {
-	    {TextToken, "Test", SourceLocation{0, 0, 4}},
-	    {t1, "/", SourceLocation{0, 4, 5}},
-	    {TextToken, "Test", SourceLocation{0, 5, 9}},
-	    {t2, "/*", SourceLocation{0, 10, 12}},
-	    {TextToken, "Block Comment", SourceLocation{0, 13, 26}},
-	    {t3, "*/", SourceLocation{0, 27, 29}}};
-
-	DynamicToken t;
-	for (auto &te : expected) {
-		EXPECT_TRUE(tokenizer.read(reader, t));
-		EXPECT_EQ(te.type, t.type);
-		EXPECT_EQ(te.content, t.content);
-		EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
-		EXPECT_EQ(te.location.getStart(), t.location.getStart());
-		EXPECT_EQ(te.location.getEnd(), t.location.getEnd());
-	}
-	ASSERT_FALSE(tokenizer.read(reader, t));
-}
-
-}
-
diff --git a/test/formats/osdm/TokenTrieTest.cpp b/test/formats/osdm/TokenTrieTest.cpp
deleted file mode 100644
index aacd6c0..0000000
--- a/test/formats/osdm/TokenTrieTest.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <formats/osdm/TokenTrie.hpp>
-
-namespace ousia {
-
-static const TokenTypeId t1 = 0;
-static const TokenTypeId t2 = 1;
-static const TokenTypeId t3 = 2;
-static const TokenTypeId t4 = 3;
-
-TEST(TokenTrie, registerToken)
-{
-	TokenTrie tree;
-
-	ASSERT_TRUE(tree.registerToken("a", t1));
-	ASSERT_TRUE(tree.registerToken("ab", t2));
-	ASSERT_TRUE(tree.registerToken("b", t3));
-	ASSERT_TRUE(tree.registerToken("hello", t4));
-
-	ASSERT_FALSE(tree.registerToken("", t1));
-	ASSERT_FALSE(tree.registerToken("a", t4));
-	ASSERT_FALSE(tree.registerToken("ab", t4));
-	ASSERT_FALSE(tree.registerToken("b", t4));
-	ASSERT_FALSE(tree.registerToken("hello", t4));
-
-	ASSERT_EQ(t1, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(t3, tree.hasToken("b"));
-	ASSERT_EQ(t4, tree.hasToken("hello"));
-	ASSERT_EQ(EmptyToken, tree.hasToken(""));
-	ASSERT_EQ(EmptyToken, tree.hasToken("abc"));
-}
-
-TEST(TokenTrie, unregisterToken)
-{
-	TokenTrie tree;
-
-	ASSERT_TRUE(tree.registerToken("a", t1));
-	ASSERT_FALSE(tree.registerToken("a", t4));
-
-	ASSERT_TRUE(tree.registerToken("ab", t2));
-	ASSERT_FALSE(tree.registerToken("ab", t4));
-
-	ASSERT_TRUE(tree.registerToken("b", t3));
-	ASSERT_FALSE(tree.registerToken("b", t4));
-
-	ASSERT_EQ(t1, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(t3, tree.hasToken("b"));
-
-	ASSERT_TRUE(tree.unregisterToken("a"));
-	ASSERT_FALSE(tree.unregisterToken("a"));
-
-	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(t3, tree.hasToken("b"));
-
-	ASSERT_TRUE(tree.unregisterToken("b"));
-	ASSERT_FALSE(tree.unregisterToken("b"));
-
-	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
-	ASSERT_EQ(t2, tree.hasToken("ab"));
-	ASSERT_EQ(EmptyToken, tree.hasToken("b"));
-
-	ASSERT_TRUE(tree.unregisterToken("ab"));
-	ASSERT_FALSE(tree.unregisterToken("ab"));
-
-	ASSERT_EQ(EmptyToken, tree.hasToken("a"));
-	ASSERT_EQ(EmptyToken, tree.hasToken("ab"));
-	ASSERT_EQ(EmptyToken, tree.hasToken("b"));
-}
-}
-
-- 
cgit v1.2.3


From ce5ab62b564476dfacba33507f1541166fda2bfb Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:47:40 +0100
Subject: renamed osdm to osml and osdmx to osxml

---
 src/formats/osdm/OsdmStreamParser.cpp |  640 ---------------
 src/formats/osdm/OsdmStreamParser.hpp |  351 --------
 src/formats/osdmx/OsdmxParser.cpp     | 1435 ---------------------------------
 src/formats/osdmx/OsdmxParser.hpp     |   55 --
 src/formats/osml/OsmlParser.cpp       |   57 ++
 src/formats/osml/OsmlParser.hpp       |   48 ++
 src/formats/osml/OsmlStreamParser.cpp |  640 +++++++++++++++
 src/formats/osml/OsmlStreamParser.hpp |  350 ++++++++
 src/formats/osxml/OsxmlParser.cpp     | 1435 +++++++++++++++++++++++++++++++++
 src/formats/osxml/OsxmlParser.hpp     |   55 ++
 10 files changed, 2585 insertions(+), 2481 deletions(-)
 delete mode 100644 src/formats/osdm/OsdmStreamParser.cpp
 delete mode 100644 src/formats/osdm/OsdmStreamParser.hpp
 delete mode 100644 src/formats/osdmx/OsdmxParser.cpp
 delete mode 100644 src/formats/osdmx/OsdmxParser.hpp
 create mode 100644 src/formats/osml/OsmlParser.cpp
 create mode 100644 src/formats/osml/OsmlParser.hpp
 create mode 100644 src/formats/osml/OsmlStreamParser.cpp
 create mode 100644 src/formats/osml/OsmlStreamParser.hpp
 create mode 100644 src/formats/osxml/OsxmlParser.cpp
 create mode 100644 src/formats/osxml/OsxmlParser.hpp

(limited to 'src/formats')

diff --git a/src/formats/osdm/OsdmStreamParser.cpp b/src/formats/osdm/OsdmStreamParser.cpp
deleted file mode 100644
index 8cb8caf..0000000
--- a/src/formats/osdm/OsdmStreamParser.cpp
+++ /dev/null
@@ -1,640 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <core/common/CharReader.hpp>
-#include <core/common/Logger.hpp>
-#include <core/common/Utils.hpp>
-#include <core/common/VariantReader.hpp>
-
-#include "OsdmStreamParser.hpp"
-
-namespace ousia {
-
-/**
- * Plain format default tokenizer.
- */
-class PlainFormatTokens : public DynamicTokenizer {
-public:
-	/**
-	 * Id of the backslash token.
-	 */
-	TokenTypeId Backslash;
-
-	/**
-	 * Id of the line comment token.
-	 */
-	TokenTypeId LineComment;
-
-	/**
-	 * Id of the block comment start token.
-	 */
-	TokenTypeId BlockCommentStart;
-
-	/**
-	 * Id of the block comment end token.
-	 */
-	TokenTypeId BlockCommentEnd;
-
-	/**
-	 * Id of the field start token.
-	 */
-	TokenTypeId FieldStart;
-
-	/**
-	 * Id of the field end token.
-	 */
-	TokenTypeId FieldEnd;
-
-	/**
-	 * Registers the plain format tokens in the internal tokenizer.
-	 */
-	PlainFormatTokens()
-	{
-		Backslash = registerToken("\\");
-		LineComment = registerToken("%");
-		BlockCommentStart = registerToken("%{");
-		BlockCommentEnd = registerToken("}%");
-		FieldStart = registerToken("{");
-		FieldEnd = registerToken("}");
-	}
-};
-
-static const PlainFormatTokens Tokens;
-
-/**
- * Class used internally to collect data issued via "DATA" event.
- */
-class DataHandler {
-private:
-	/**
-	 * Internal character buffer.
-	 */
-	std::vector<char> buf;
-
-	/**
-	 * Start location of the character data.
-	 */
-	SourceOffset start;
-
-	/**
-	 * End location of the character data.
-	 */
-	SourceOffset end;
-
-public:
-	/**
-	 * Default constructor, initializes start and end with zeros.
-	 */
-	DataHandler() : start(0), end(0) {}
-
-	/**
-	 * Returns true if the internal buffer is empty.
-	 *
-	 * @return true if no characters were added to the internal buffer, false
-	 * otherwise.
-	 */
-	bool isEmpty() { return buf.empty(); }
-
-	/**
-	 * Appends a single character to the internal buffer.
-	 *
-	 * @param c is the character that should be added to the internal buffer.
-	 * @param charStart is the start position of the character.
-	 * @param charEnd is the end position of the character.
-	 */
-	void append(char c, SourceOffset charStart, SourceOffset charEnd)
-	{
-		if (isEmpty()) {
-			start = charStart;
-		}
-		buf.push_back(c);
-		end = charEnd;
-	}
-
-	/**
-	 * Appends a string to the internal buffer.
-	 *
-	 * @param s is the string that should be added to the internal buffer.
-	 * @param stringStart is the start position of the string.
-	 * @param stringEnd is the end position of the string.
-	 */
-	void append(const std::string &s, SourceOffset stringStart,
-	            SourceOffset stringEnd)
-	{
-		if (isEmpty()) {
-			start = stringStart;
-		}
-		std::copy(s.c_str(), s.c_str() + s.size(), back_inserter(buf));
-		end = stringEnd;
-	}
-
-	/**
-	 * Converts the internal buffer to a variant with attached location
-	 * information.
-	 *
-	 * @param sourceId is the source id which is needed for building the
-	 * location information.
-	 * @return a Variant with the internal buffer content as string and
-	 * the correct start and end location.
-	 */
-	Variant toVariant(SourceId sourceId)
-	{
-		Variant res = Variant::fromString(std::string(buf.data(), buf.size()));
-		res.setLocation({sourceId, start, end});
-		return res;
-	}
-};
-
-OsdmStreamParser::OsdmStreamParser(CharReader &reader, Logger &logger)
-    : reader(reader), logger(logger), tokenizer(Tokens)
-{
-	// Place an intial command representing the complete file on the stack
-	commands.push(Command{"", Variant::mapType{}, true, true, true});
-}
-
-Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep)
-{
-	bool first = true;
-	bool hasCharSiceNSSep = false;
-	std::vector<char> identifier;
-	size_t end = reader.getPeekOffset();
-	char c, c2;
-	while (reader.peek(c)) {
-		// Abort if this character is not a valid identifer character
-		if ((first && Utils::isIdentifierStartCharacter(c)) ||
-		    (!first && Utils::isIdentifierCharacter(c))) {
-			identifier.push_back(c);
-		} else if (c == ':' && hasCharSiceNSSep && reader.fetchPeek(c2) &&
-		           Utils::isIdentifierStartCharacter(c2)) {
-			identifier.push_back(c);
-		} else {
-			if (c == ':' && allowNSSep) {
-				logger.error(
-				    "Expected character before and after namespace separator "
-				    "\":\"",
-				    reader);
-			}
-			reader.resetPeek();
-			break;
-		}
-
-		// This is no longer the first character
-		first = false;
-
-		// Advance the hasCharSiceNSSep flag
-		hasCharSiceNSSep = allowNSSep && (c != ':');
-
-		end = reader.getPeekOffset();
-		reader.consumePeek();
-	}
-
-	// Return the identifier at its location
-	Variant res =
-	    Variant::fromString(std::string(identifier.data(), identifier.size()));
-	res.setLocation({reader.getSourceId(), start, end});
-	return res;
-}
-
-OsdmStreamParser::State OsdmStreamParser::parseBeginCommand()
-{
-	// Expect a '{' after the command
-	reader.consumeWhitespace();
-	if (!reader.expect('{')) {
-		logger.error("Expected \"{\" after \\begin", reader);
-		return State::NONE;
-	}
-
-	// Parse the name of the command that should be opened
-	Variant commandName = parseIdentifier(reader.getOffset(), true);
-	if (commandName.asString().empty()) {
-		logger.error("Expected identifier", commandName);
-		return State::ERROR;
-	}
-
-	// Check whether the next character is a '#', indicating the start of the
-	// command name
-	Variant commandArgName;
-	SourceOffset start = reader.getOffset();
-	if (reader.expect('#')) {
-		commandArgName = parseIdentifier(start);
-		if (commandArgName.asString().empty()) {
-			logger.error("Expected identifier after \"#\"", commandArgName);
-		}
-	}
-
-	if (!reader.expect('}')) {
-		logger.error("Expected \"}\"", reader);
-		return State::ERROR;
-	}
-
-	// Parse the arguments
-	Variant commandArguments = parseCommandArguments(std::move(commandArgName));
-
-	// Push the command onto the command stack
-	pushCommand(std::move(commandName), std::move(commandArguments), true);
-
-	return State::COMMAND;
-}
-
-static bool checkStillInField(const OsdmStreamParser::Command &cmd,
-                              const Variant &endName, Logger &logger)
-{
-	if (cmd.inField && !cmd.inRangeField) {
-		logger.error(std::string("\\end in open field of command \"") +
-		                 cmd.name.asString() + std::string("\""),
-		             endName);
-		logger.note(std::string("Open command started here:"), cmd.name);
-		return true;
-	}
-	return false;
-}
-
-OsdmStreamParser::State OsdmStreamParser::parseEndCommand()
-{
-	// Expect a '{' after the command
-	if (!reader.expect('{')) {
-		logger.error("Expected \"{\" after \\end", reader);
-		return State::NONE;
-	}
-
-	// Fetch the name of the command that should be ended here
-	Variant name = parseIdentifier(reader.getOffset(), true);
-
-	// Make sure the given command name is not empty
-	if (name.asString().empty()) {
-		logger.error("Expected identifier", name);
-		return State::ERROR;
-	}
-
-	// Make sure the command name is terminated with a '}'
-	if (!reader.expect('}')) {
-		logger.error("Expected \"}\"", reader);
-		return State::ERROR;
-	}
-
-	// Unroll the command stack up to the last range command
-	while (!commands.top().hasRange) {
-		if (checkStillInField(commands.top(), name, logger)) {
-			return State::ERROR;
-		}
-		commands.pop();
-	}
-
-	// Make sure we're not in an open field of this command
-	if (checkStillInField(commands.top(), name, logger)) {
-		return State::ERROR;
-	}
-
-	// Special error message if the top-level command is reached
-	if (commands.size() == 1) {
-		logger.error(std::string("Cannot end command \"") + name.asString() +
-		                 std::string("\" here, no command open"),
-		             name);
-		return State::ERROR;
-	}
-
-	// Inform the about command mismatches
-	const Command &cmd = commands.top();
-	if (commands.top().name.asString() != name.asString()) {
-		logger.error(std::string("Trying to end command \"") +
-		                 cmd.name.asString() +
-		                 std::string("\", but open command is \"") +
-		                 name.asString() + std::string("\""),
-		             name);
-		logger.note("Last command was opened here:", cmd.name);
-		return State::ERROR;
-	}
-
-	// Set the location to the location of the command that was ended, then end
-	// the current command
-	location = name.getLocation();
-	commands.pop();
-	return cmd.inRangeField ? State::FIELD_END : State::NONE;
-}
-
-Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName)
-{
-	// Parse the arguments using the universal VariantReader
-	Variant commandArguments;
-	if (reader.expect('[')) {
-		auto res = VariantReader::parseObject(reader, logger, ']');
-		commandArguments = res.second;
-	} else {
-		commandArguments = Variant::mapType{};
-	}
-
-	// Insert the parsed name, make sure "name" was not specified in the
-	// arguments
-	if (commandArgName.isString()) {
-		auto res =
-		    commandArguments.asMap().emplace("name", std::move(commandArgName));
-		if (!res.second) {
-			logger.error("Name argument specified multiple times",
-			             SourceLocation{}, MessageMode::NO_CONTEXT);
-			logger.note("First occurance is here: ", commandArgName);
-			logger.note("Second occurance is here: ", res.first->second);
-		}
-	}
-	return commandArguments;
-}
-
-void OsdmStreamParser::pushCommand(Variant commandName,
-                                   Variant commandArguments, bool hasRange)
-{
-	// Store the location on the stack
-	location = commandName.getLocation();
-
-	// Place the command on the command stack, remove the last commands if we're
-	// not currently inside a field of these commands
-	while (!commands.top().inField) {
-		commands.pop();
-	}
-	commands.push(Command{std::move(commandName), std::move(commandArguments),
-	                      hasRange, false, false});
-}
-
-OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start)
-{
-	// Parse the commandName as a first identifier
-	Variant commandName = parseIdentifier(start, true);
-	if (commandName.asString().empty()) {
-		logger.error("Empty command name", reader);
-		return State::NONE;
-	}
-
-	// Handle the special "begin" and "end" commands
-	const auto commandNameComponents =
-	    Utils::split(commandName.asString(), ':');
-	const bool isBegin = commandNameComponents[0] == "begin";
-	const bool isEnd = commandNameComponents[0] == "end";
-	if (isBegin || isEnd) {
-		if (commandNameComponents.size() > 1) {
-			logger.error(
-			    "Special commands \"\\begin\" and \"\\end\" may not contain a "
-			    "namespace separator \":\"",
-			    commandName);
-		}
-		if (isBegin) {
-			return parseBeginCommand();
-		} else if (isEnd) {
-			return parseEndCommand();
-		}
-	}
-
-	// Check whether the next character is a '#', indicating the start of the
-	// command name
-	Variant commandArgName;
-	start = reader.getOffset();
-	if (reader.expect('#')) {
-		commandArgName = parseIdentifier(start);
-		if (commandArgName.asString().empty()) {
-			logger.error("Expected identifier after \"#\"", commandArgName);
-		}
-	}
-
-	// Parse the arugments
-	Variant commandArguments = parseCommandArguments(std::move(commandArgName));
-
-	// Push the command onto the command stack
-	pushCommand(std::move(commandName), std::move(commandArguments), false);
-
-	return State::COMMAND;
-}
-
-void OsdmStreamParser::parseBlockComment()
-{
-	DynamicToken token;
-	size_t depth = 1;
-	while (tokenizer.read(reader, token)) {
-		if (token.type == Tokens.BlockCommentEnd) {
-			depth--;
-			if (depth == 0) {
-				return;
-			}
-		}
-		if (token.type == Tokens.BlockCommentStart) {
-			depth++;
-		}
-	}
-
-	// Issue an error if the file ends while we are in a block comment
-	logger.error("File ended while being in a block comment", reader);
-}
-
-void OsdmStreamParser::parseLineComment()
-{
-	char c;
-	while (reader.read(c)) {
-		if (c == '\n') {
-			return;
-		}
-	}
-}
-
-bool OsdmStreamParser::checkIssueData(DataHandler &handler)
-{
-	if (!handler.isEmpty()) {
-		data = handler.toVariant(reader.getSourceId());
-		location = data.getLocation();
-		reader.resetPeek();
-		return true;
-	}
-	return false;
-}
-
-bool OsdmStreamParser::checkIssueFieldStart()
-{
-	// Fetch the current command, and check whether we're currently inside a
-	// field of this command
-	Command &cmd = commands.top();
-	if (!cmd.inField) {
-		// If this is a range command, we're now implicitly inside the field of
-		// this command -- we'll have to issue a field start command!
-		if (cmd.hasRange) {
-			cmd.inField = true;
-			cmd.inRangeField = true;
-			reader.resetPeek();
-			return true;
-		}
-
-		// This was not a range command, so obviously we're now inside within
-		// a field of some command -- so unroll the commands stack until a
-		// command with open field is reached
-		while (!commands.top().inField) {
-			commands.pop();
-		}
-	}
-	return false;
-}
-
-OsdmStreamParser::State OsdmStreamParser::parse()
-{
-	// Handler for incomming data
-	DataHandler handler;
-
-	// Read tokens until the outer loop should be left
-	DynamicToken token;
-	while (tokenizer.peek(reader, token)) {
-		const TokenTypeId type = token.type;
-
-		// Special handling for Backslash and Text
-		if (type == Tokens.Backslash) {
-			// Before appending anything to the output data or starting a new
-			// command, check whether FIELD_START has to be issued, as the
-			// current command is a command with range
-			if (checkIssueFieldStart()) {
-				location = token.location;
-				return State::FIELD_START;
-			}
-
-			// Check whether a command starts now, without advancing the peek
-			// cursor
-			char c;
-			if (!reader.fetchPeek(c)) {
-				logger.error("Trailing backslash at the end of the file.",
-				             token);
-				return State::END;
-			}
-
-			// Try to parse a command
-			if (Utils::isIdentifierStartCharacter(c)) {
-				// Make sure to issue any data before it is to late
-				if (checkIssueData(handler)) {
-					return State::DATA;
-				}
-
-				// Parse the actual command
-				State res = parseCommand(token.location.getStart());
-				switch (res) {
-					case State::ERROR:
-						throw LoggableException(
-						    "Last error was irrecoverable, ending parsing "
-						    "process");
-					case State::NONE:
-						continue;
-					default:
-						return res;
-				}
-			}
-
-			// This was not a special character, just append the given character
-			// to the data buffer, use the escape character start as start
-			// location and the peek offset as end location
-			reader.peek(c);  // Peek the previously fetched character
-			handler.append(c, token.location.getStart(),
-			               reader.getPeekOffset());
-			reader.consumePeek();
-			continue;
-		} else if (type == TextToken) {
-			// Check whether FIELD_START has to be issued before appending text
-			if (checkIssueFieldStart()) {
-				location = token.location;
-				return State::FIELD_START;
-			}
-
-			// Append the text to the data handler
-			handler.append(token.content, token.location.getStart(),
-			               token.location.getEnd());
-
-			reader.consumePeek();
-			continue;
-		}
-
-		// A non-text token was reached, make sure all pending data commands
-		// have been issued
-		if (checkIssueData(handler)) {
-			return State::DATA;
-		}
-
-		// We will handle the token now, consume the peeked characters
-		reader.consumePeek();
-
-		// Update the location to the current token location
-		location = token.location;
-
-		if (token.type == Tokens.LineComment) {
-			parseLineComment();
-		} else if (token.type == Tokens.BlockCommentStart) {
-			parseBlockComment();
-		} else if (token.type == Tokens.FieldStart) {
-			Command &cmd = commands.top();
-			if (!cmd.inField) {
-				cmd.inField = true;
-				return State::FIELD_START;
-			}
-			logger.error(
-			    "Got field start token \"{\", but no command for which to "
-			    "start the field. Did you mean \"\\{\"?",
-			    token);
-		} else if (token.type == Tokens.FieldEnd) {
-			// Try to end an open field of the current command -- if the current
-			// command is not inside an open field, end this command and try to
-			// close the next one
-			for (int i = 0; i < 2 && commands.size() > 1; i++) {
-				Command &cmd = commands.top();
-				if (!cmd.inRangeField) {
-					if (cmd.inField) {
-						cmd.inField = false;
-						return State::FIELD_END;
-					}
-					commands.pop();
-				} else {
-					break;
-				}
-			}
-			logger.error(
-			    "Got field end token \"}\", but there is no field to end. Did "
-			    "you mean \"\\}\"?",
-			    token);
-		} else {
-			logger.error("Unexpected token \"" + token.content + "\"", token);
-		}
-	}
-
-	// Issue available data
-	if (checkIssueData(handler)) {
-		return State::DATA;
-	}
-
-	// Make sure all open commands and fields have been ended at the end of the
-	// stream
-	while (commands.size() > 1) {
-		Command &cmd = commands.top();
-		if (cmd.inField || cmd.hasRange) {
-			logger.error("Reached end of stream, but command \"" +
-			                 cmd.name.asString() + "\" has not been ended",
-			             cmd.name);
-		}
-		commands.pop();
-	}
-
-	location = SourceLocation{reader.getSourceId(), reader.getOffset()};
-	return State::END;
-}
-
-const Variant &OsdmStreamParser::getCommandName()
-{
-	return commands.top().name;
-}
-
-const Variant &OsdmStreamParser::getCommandArguments()
-{
-	return commands.top().arguments;
-}
-}
-
diff --git a/src/formats/osdm/OsdmStreamParser.hpp b/src/formats/osdm/OsdmStreamParser.hpp
deleted file mode 100644
index 48d8fb7..0000000
--- a/src/formats/osdm/OsdmStreamParser.hpp
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file OsdmStreamParser.hpp
- *
- * Provides classes for low-level classes for reading the TeX-esque osdm
- * format. The class provided here does not build any model objects and does not
- * implement the Parser interface.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_OSDM_STREAM_PARSER_HPP_
-#define _OUSIA_OSDM_STREAM_PARSER_HPP_
-
-#include <stack>
-
-#include <core/common/Variant.hpp>
-
-#include "DynamicTokenizer.hpp"
-
-namespace ousia {
-
-// Forward declarations
-class CharReader;
-class Logger;
-class DataHandler;
-
-/**
- * The OsdmStreamParser class provides a low-level reader for the TeX-esque osdm
- * format. The parser is constructed around a "parse" function, which reads data
- * from the underlying CharReader until a new state is reached and indicates
- * this state in a return value. The calling code then has to pull corresponding
- * data from the stream reader. The reader makes sure the incommind file is
- * syntactically valid and tries to recorver from most errors. If an error is
- * irrecoverable (this is the case for errors with wrong nesting of commands or
- * fields, as this would lead to too many consecutive errors) a
- * LoggableException is thrown.
- */
-class OsdmStreamParser {
-public:
-	/**
-	 * Enum used to indicate which state the OsdmStreamParser class is in
-	 * after calling the "parse" function.
-	 */
-	enum class State {
-		/**
-	     * State returned if a fully featured command has been read. A command
-	     * consists of the command name and its arguments (which optionally
-	     * includes the name).
-	     */
-		COMMAND,
-
-		/**
-	     * State returned if data is given. The reader must decide which field
-	     * or command this should be routed to. Trailing or leading whitespace
-	     * has been removed. Only called if the data is non-empty.
-	     */
-		DATA,
-
-		/**
-	     * A user-defined entity has been found. The entity sequence is stored
-	     * in the command name.
-	     */
-		ENTITY,
-
-		/**
-	     * State returned if an annotation was started. An annotation consists
-	     * of the command name and its arguments (which optionally include the
-	     * name).
-	     */
-		ANNOTATION_START,
-
-		/**
-	     * State returned if an annotation ends. The reader indicates which
-	     * annotation ends.
-	     */
-		ANNOTATION_END,
-
-		/**
-	     * State returned if a new field started. The reader assures that the
-	     * current field ends before a new field is started and that the field
-	     * is not started if data has been given outside of a field. The
-	     * field number is set to the current field index.
-	     */
-		FIELD_START,
-
-		/**
-	     * State returned if the current field ends. The reader assures that a
-	     * field was actually open.
-	     */
-		FIELD_END,
-
-		/**
-	     * The end of the stream has been reached.
-	     */
-		END,
-
-		/**
-	     * Returned from internal functions if nothing should be done.
-	     */
-		NONE,
-
-		/**
-	     * Returned from internal function to indicate irrecoverable errors.
-	     */
-		ERROR
-	};
-
-	/**
-	 * Entry used for the command stack.
-	 */
-	struct Command {
-		/**
-		 * Name and location of the current command.
-		 */
-		Variant name;
-
-		/**
-		 * Arguments that were passed to the command.
-		 */
-		Variant arguments;
-
-		/**
-		 * Set to true if this is a command with clear begin and end.
-		 */
-		bool hasRange;
-
-		/**
-		 * Set to true if we are currently inside a field of this command.
-		 */
-		bool inField;
-
-		/**
-		 * Set to true if we are currently in the range field of the command
-		 * (implies inField being set to true).
-		 */
-		bool inRangeField;
-
-		/**
-		 * Default constructor.
-		 */
-		Command() : hasRange(false), inField(false), inRangeField(false) {}
-
-		/**
-		 * Constructor of the Command class.
-		 *
-		 * @param name is a string variant with name and location of the
-		 * command.
-		 * @param arguments is a map variant with the arguments given to the
-		 * command.
-		 * @param hasRange should be set to true if this is a command with
-		 * explicit range.
-		 * @param inField is set to true if we currently are inside a field
-		 * of this command.
-		 * @param inRangeField is set to true if we currently inside the outer
-		 * field of the command.
-		 */
-		Command(Variant name, Variant arguments, bool hasRange, bool inField,
-		        bool inRangeField)
-		    : name(std::move(name)),
-		      arguments(std::move(arguments)),
-		      hasRange(hasRange),
-		      inField(inField),
-		      inRangeField(inRangeField)
-		{
-		}
-	};
-
-private:
-	/**
-	 * Reference to the CharReader instance from which the incomming bytes are
-	 * read.
-	 */
-	CharReader &reader;
-
-	/**
-	 * Reference at the logger instance to which all error messages are sent.
-	 */
-	Logger &logger;
-
-	/**
-	 * Tokenizer instance used to read individual tokens from the text.
-	 */
-	DynamicTokenizer tokenizer;
-
-	/**
-	 * Stack containing the current commands.
-	 */
-	std::stack<Command> commands;
-
-	/**
-	 * Variant containing the data that has been read (always is a string,
-	 * contains the exact location of the data in the source file).
-	 */
-	Variant data;
-
-	/**
-	 * Contains the location of the last token.
-	 */
-	SourceLocation location;
-
-	/**
-	 * Contains the field index of the current command.
-	 */
-	size_t fieldIdx;
-
-	/**
-	 * Function used internall to parse an identifier.
-	 *
-	 * @param start is the start byte offset of the identifier (including the
-	 * backslash).
-	 * @param allowNSSep should be set to true if the namespace separator is
-	 * allowed in the identifier name. Issues error if the namespace separator
-	 * is placed incorrectly.
-	 */
-	Variant parseIdentifier(size_t start, bool allowNSSep = false);
-
-	/**
-	 * Function used internally to handle the special "\begin" command.
-	 */
-	State parseBeginCommand();
-
-	/**
-	 * Function used internally to handle the special "\end" command.
-	 */
-	State parseEndCommand();
-
-	/**
-	 * Pushes the parsed command onto the command stack.
-	 */
-	void pushCommand(Variant commandName, Variant commandArguments,
-	                 bool hasRange);
-
-	/**
-	 * Parses the command arguments.
-	 */
-	Variant parseCommandArguments(Variant commandArgName);
-
-	/**
-	 * Function used internally to parse a command.
-	 *
-	 * @param start is the start byte offset of the command (including the
-	 * backslash)
-	 * @return true if a command was actuall parsed, false otherwise.
-	 */
-	State parseCommand(size_t start);
-
-	/**
-	 * Function used internally to parse a block comment.
-	 */
-	void parseBlockComment();
-
-	/**
-	 * Function used internally to parse a generic comment.
-	 */
-	void parseLineComment();
-
-	/**
-	 * Checks whether there is any data pending to be issued, if yes, issues it.
-	 *
-	 * @param handler is the data handler that contains the data that may be
-	 * returned to the user.
-	 * @return true if there was any data and DATA should be returned by the
-	 * parse function, false otherwise.
-	 */
-	bool checkIssueData(DataHandler &handler);
-
-	/**
-	 * Called before any data is appended to the internal data handler. Checks
-	 * whether a new field should be started or implicitly ended.
-	 *
-	 * @return true if FIELD_START should be returned by the parse function.
-	 */
-	bool checkIssueFieldStart();
-
-public:
-	/**
-	 * Constructor of the OsdmStreamParser class. Attaches the new
-	 * OsdmStreamParser to the given CharReader and Logger instances.
-	 *
-	 * @param reader is the reader instance from which incomming characters
-	 * should be read.
-	 * @param logger is the logger instance to which errors should be written.
-	 */
-	OsdmStreamParser(CharReader &reader, Logger &logger);
-
-	/**
-	 * Continues parsing. Returns one of the states defined in the State enum.
-	 * Callers should stop once the State::END state is reached. Use the getter
-	 * functions to get more information about the current state, such as the
-	 * command name or the data or the current field index.
-	 *
-	 * @return the new state the parser has reached.
-	 */
-	State parse();
-
-	/**
-	 * Returns a reference at the internally stored data. Only valid if
-	 * State::DATA was returned by the "parse" function.
-	 *
-	 * @return a reference at a variant containing the data parsed by the
-	 * "parse" function.
-	 */
-	const Variant &getData() { return data; }
-
-	/**
-	 * Returns a reference at the internally stored command name. Only valid if
-	 * State::COMMAND was returned by the "parse" function.
-	 *
-	 * @return a reference at a variant containing name and location of the
-	 * parsed command.
-	 */
-	const Variant &getCommandName();
-
-	/**
-	 * Returns a reference at the internally stored command name. Only valid if
-	 * State::COMMAND was returned by the "parse" function.
-	 *
-	 * @return a reference at a variant containing arguments given to the
-	 * command.
-	 */
-	const Variant &getCommandArguments();
-
-	/**
-	 * Returns a reference at the char reader.
-	 *
-	 * @return the last internal token location.
-	 */
-	SourceLocation &getLocation() { return location; }
-};
-}
-
-#endif /* _OUSIA_OSDM_STREAM_PARSER_HPP_ */
-
diff --git a/src/formats/osdmx/OsdmxParser.cpp b/src/formats/osdmx/OsdmxParser.cpp
deleted file mode 100644
index c46d9de..0000000
--- a/src/formats/osdmx/OsdmxParser.cpp
+++ /dev/null
@@ -1,1435 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <iostream>
-#include <map>
-#include <sstream>
-#include <vector>
-
-#include <expat.h>
-
-#include <core/common/CharReader.hpp>
-#include <core/common/RttiBuilder.hpp>
-#include <core/common/Utils.hpp>
-#include <core/common/VariantReader.hpp>
-#include <core/parser/ParserStack.hpp>
-#include <core/parser/ParserScope.hpp>
-#include <core/model/Document.hpp>
-#include <core/model/Domain.hpp>
-#include <core/model/Project.hpp>
-#include <core/model/RootNode.hpp>
-#include <core/model/Typesystem.hpp>
-
-#include "XmlParser.hpp"
-
-namespace ousia {
-
-/* HeadNode Helper class */
-
-namespace {
-class HeadNode : public Node {
-public:
-	using Node::Node;
-};
-}
-
-namespace RttiTypes {
-static Rtti HeadNode = RttiBuilder<ousia::HeadNode>("HeadNode");
-}
-
-/* Element Handler Classes */
-
-class DocumentHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		Rooted<Document> document =
-		    project()->createDocument(args["name"].asString());
-		document->setLocation(location());
-		scope().push(document);
-		scope().setFlag(ParserFlag::POST_HEAD, false);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DocumentHandler{handlerData};
-	}
-};
-
-class DocumentField : public Node {
-public:
-	DocumentField(Manager &mgr, std::string name, Handle<Node> parent)
-	    : Node(mgr, name, parent)
-	{
-	}
-};
-
-namespace RttiTypes {
-const Rtti DocumentField =
-    RttiBuilder<ousia::DocumentField>("DocumentField").parent(&Node);
-}
-
-class DocumentChildHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void preamble(Handle<Node> parentNode, std::string &fieldName,
-	              DocumentEntity *&parent, bool &inField)
-	{
-		// check if the parent in the structure tree was an explicit field
-		// reference.
-		inField = parentNode->isa(&RttiTypes::DocumentField);
-		if (inField) {
-			fieldName = parentNode->getName();
-			parentNode = scope().selectOrThrow(
-			    {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity});
-		} else {
-			// if it wasn't an explicit reference, we use the default field.
-			fieldName = DEFAULT_FIELD_NAME;
-		}
-		// reference the parent entity explicitly.
-		parent = nullptr;
-		if (parentNode->isa(&RttiTypes::StructuredEntity)) {
-			parent = static_cast<DocumentEntity *>(
-			    parentNode.cast<StructuredEntity>().get());
-		} else if (parentNode->isa(&RttiTypes::AnnotationEntity)) {
-			parent = static_cast<DocumentEntity *>(
-			    parentNode.cast<AnnotationEntity>().get());
-		}
-	}
-
-	void start(Variant::mapType &args) override
-	{
-		scope().setFlag(ParserFlag::POST_HEAD, true);
-		Rooted<Node> parentNode = scope().selectOrThrow(
-		    {&RttiTypes::Document, &RttiTypes::StructuredEntity,
-		     &RttiTypes::AnnotationEntity, &RttiTypes::DocumentField});
-
-		std::string fieldName;
-		DocumentEntity *parent;
-		bool inField;
-
-		preamble(parentNode, fieldName, parent, inField);
-
-		// try to find a FieldDescriptor for the given tag if we are not in a
-		// field already.
-		// TODO: Consider fields of transparent classes
-		if (!inField && parent != nullptr &&
-		    parent->getDescriptor()->hasField(name())) {
-			Rooted<DocumentField> field{new DocumentField(
-			    parentNode->getManager(), fieldName, parentNode)};
-			field->setLocation(location());
-			scope().push(field);
-			return;
-		}
-
-		// Otherwise create a new StructuredEntity
-		// TODO: Consider Anchors and AnnotationEntities
-		Rooted<StructuredClass> strct = scope().resolve<StructuredClass>(
-		    Utils::split(name(), ':'), logger());
-		if (strct == nullptr) {
-			// if we could not resolve the name, throw an exception.
-			throw LoggableException(
-			    std::string("\"") + name() + "\" could not be resolved.",
-			    location());
-		}
-
-		std::string name;
-		auto it = args.find("name");
-		if (it != args.end()) {
-			name = it->second.asString();
-			args.erase(it);
-		}
-
-		Rooted<StructuredEntity> entity;
-		if (parentNode->isa(&RttiTypes::Document)) {
-			entity = parentNode.cast<Document>()->createRootStructuredEntity(
-			    strct, args, name);
-		} else {
-			// calculate a path if transparent entities are needed in between.
-			auto path = parent->getDescriptor()->pathTo(strct);
-			if (path.empty()) {
-				throw LoggableException(
-				    std::string("An instance of \"") + strct->getName() +
-				        "\" is not allowed as child of an instance of \"" +
-				        parent->getDescriptor()->getName() + "\"",
-				    location());
-			}
-
-			// create all transparent entities until the last field.
-			for (size_t p = 1; p < path.size() - 1; p = p + 2) {
-				parent = static_cast<DocumentEntity *>(
-				    parent->createChildStructuredEntity(
-				                path[p].cast<StructuredClass>(),
-				                Variant::mapType{}, path[p - 1]->getName(),
-				                "").get());
-			}
-			entity = parent->createChildStructuredEntity(strct, args, fieldName,
-			                                             name);
-		}
-		entity->setLocation(location());
-		scope().push(entity);
-	}
-
-	void end() override { scope().pop(); }
-
-	void data(const std::string &data, int fieldIdx) override
-	{
-		Rooted<Node> parentNode = scope().selectOrThrow(
-		    {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity,
-		     &RttiTypes::DocumentField});
-
-		std::string fieldName;
-		DocumentEntity *parent;
-		bool inField;
-
-		preamble(parentNode, fieldName, parent, inField);
-
-		// retrieve the correct FieldDescriptor.
-		// TODO: Consider fields of transparent classes
-		Rooted<Descriptor> desc = parent->getDescriptor();
-		Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName);
-		if (field == nullptr) {
-			logger().error(
-			    std::string("Can't handle data because no field with name \"") +
-			        fieldName + "\" exists in descriptor\"" + desc->getName() +
-			        "\".",
-			    location());
-			return;
-		}
-		if (!field->isPrimitive()) {
-			logger().error(std::string("Can't handle data because field \"") +
-			                   fieldName + "\" of descriptor \"" +
-			                   desc->getName() + "\" is not primitive!",
-			               location());
-			return;
-		}
-
-		// try to parse the content.
-		auto res = VariantReader::parseGenericString(
-		    data, logger(), location().getSourceId(), location().getStart());
-		if (!res.first) {
-			return;
-		}
-		// try to convert it to the correct type.
-		if (!field->getPrimitiveType()->build(res.second, logger())) {
-			return;
-		}
-		// add it as primitive content.
-		parent->createChildDocumentPrimitive(res.second, fieldName);
-	}
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DocumentChildHandler{handlerData};
-	}
-};
-
-class TypesystemHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		// Create the typesystem instance
-		Rooted<Typesystem> typesystem =
-		    project()->createTypesystem(args["name"].asString());
-		typesystem->setLocation(location());
-
-		// Push the typesystem onto the scope, set the POST_HEAD flag to true
-		scope().push(typesystem);
-		scope().setFlag(ParserFlag::POST_HEAD, false);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new TypesystemHandler{handlerData};
-	}
-};
-
-class TypesystemEnumHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		scope().setFlag(ParserFlag::POST_HEAD, true);
-
-		// Fetch the current typesystem and create the enum node
-		Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
-		Rooted<EnumType> enumType =
-		    typesystem->createEnumType(args["name"].asString());
-		enumType->setLocation(location());
-
-		scope().push(enumType);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new TypesystemEnumHandler{handlerData};
-	}
-};
-
-class TypesystemEnumEntryHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	std::string entry;
-
-	void start(Variant::mapType &args) override {}
-
-	void end() override
-	{
-		Rooted<EnumType> enumType = scope().selectOrThrow<EnumType>();
-		enumType->addEntry(entry, logger());
-	}
-
-	void data(const std::string &data, int field) override
-	{
-		if (field != 0) {
-			// TODO: This should be stored in the HandlerData
-			logger().error("Enum entry only has one field.");
-			return;
-		}
-		entry.append(data);
-	}
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new TypesystemEnumEntryHandler{handlerData};
-	}
-};
-
-class TypesystemStructHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		scope().setFlag(ParserFlag::POST_HEAD, true);
-
-		// Fetch the arguments used for creating this type
-		const std::string &name = args["name"].asString();
-		const std::string &parent = args["parent"].asString();
-
-		// Fetch the current typesystem and create the struct node
-		Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
-		Rooted<StructType> structType = typesystem->createStructType(name);
-		structType->setLocation(location());
-
-		// Try to resolve the parent type and set it as parent structure
-		if (!parent.empty()) {
-			scope().resolve<StructType>(
-			    parent, structType, logger(),
-			    [](Handle<Node> parent, Handle<Node> structType,
-			       Logger &logger) {
-				    if (parent != nullptr) {
-					    structType.cast<StructType>()->setParentStructure(
-					        parent.cast<StructType>(), logger);
-				    }
-				});
-		}
-		scope().push(structType);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new TypesystemStructHandler{handlerData};
-	}
-};
-
-class TypesystemStructFieldHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		// Read the argument values
-		const std::string &name = args["name"].asString();
-		const std::string &type = args["type"].asString();
-		const Variant &defaultValue = args["default"];
-		const bool optional =
-		    !(defaultValue.isObject() && defaultValue.asObject() == nullptr);
-
-		Rooted<StructType> structType = scope().selectOrThrow<StructType>();
-		Rooted<Attribute> attribute =
-		    structType->createAttribute(name, defaultValue, optional, logger());
-		attribute->setLocation(location());
-
-		// Try to resolve the type and default value
-		if (optional) {
-			scope().resolveTypeWithValue(
-			    type, attribute, attribute->getDefaultValue(), logger(),
-			    [](Handle<Node> type, Handle<Node> attribute, Logger &logger) {
-				    if (type != nullptr) {
-					    attribute.cast<Attribute>()->setType(type.cast<Type>(),
-					                                         logger);
-				    }
-				});
-		} else {
-			scope().resolveType(
-			    type, attribute, logger(),
-			    [](Handle<Node> type, Handle<Node> attribute, Logger &logger) {
-				    if (type != nullptr) {
-					    attribute.cast<Attribute>()->setType(type.cast<Type>(),
-					                                         logger);
-				    }
-				});
-		}
-	}
-
-	void end() override {}
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new TypesystemStructFieldHandler{handlerData};
-	}
-};
-
-class TypesystemConstantHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		scope().setFlag(ParserFlag::POST_HEAD, true);
-
-		// Read the argument values
-		const std::string &name = args["name"].asString();
-		const std::string &type = args["type"].asString();
-		const Variant &value = args["value"];
-
-		Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
-		Rooted<Constant> constant = typesystem->createConstant(name, value);
-		constant->setLocation(location());
-
-		// Try to resolve the type
-		scope().resolveTypeWithValue(
-		    type, constant, constant->getValue(), logger(),
-		    [](Handle<Node> type, Handle<Node> constant, Logger &logger) {
-			    if (type != nullptr) {
-				    constant.cast<Constant>()->setType(type.cast<Type>(),
-				                                       logger);
-			    }
-			});
-	}
-
-	void end() override {}
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new TypesystemConstantHandler{handlerData};
-	}
-};
-
-/*
- * Domain Handlers
- */
-
-class DomainHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		Rooted<Domain> domain =
-		    project()->createDomain(args["name"].asString());
-		domain->setLocation(location());
-
-		scope().push(domain);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainHandler{handlerData};
-	}
-};
-
-class DomainStructHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		scope().setFlag(ParserFlag::POST_HEAD, true);
-
-		Rooted<Domain> domain = scope().selectOrThrow<Domain>();
-
-		Rooted<StructuredClass> structuredClass = domain->createStructuredClass(
-		    args["name"].asString(), args["cardinality"].asCardinality(),
-		    nullptr, args["transparent"].asBool(), args["isRoot"].asBool());
-		structuredClass->setLocation(location());
-
-		const std::string &isa = args["isa"].asString();
-		if (!isa.empty()) {
-			scope().resolve<StructuredClass>(
-			    isa, structuredClass, logger(),
-			    [](Handle<Node> superclass, Handle<Node> structuredClass,
-			       Logger &logger) {
-				    if (superclass != nullptr) {
-					    structuredClass.cast<StructuredClass>()->setSuperclass(
-					        superclass.cast<StructuredClass>(), logger);
-				    }
-				});
-		}
-
-		scope().push(structuredClass);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainStructHandler{handlerData};
-	}
-};
-
-class DomainAnnotationHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		scope().setFlag(ParserFlag::POST_HEAD, true);
-
-		Rooted<Domain> domain = scope().selectOrThrow<Domain>();
-
-		Rooted<AnnotationClass> annotationClass =
-		    domain->createAnnotationClass(args["name"].asString());
-		annotationClass->setLocation(location());
-
-		scope().push(annotationClass);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainAnnotationHandler{handlerData};
-	}
-};
-
-class DomainAttributesHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		// Fetch the current typesystem and create the struct node
-		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
-
-		Rooted<StructType> attrDesc = parent->getAttributesDescriptor();
-		attrDesc->setLocation(location());
-
-		scope().push(attrDesc);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainAttributesHandler{handlerData};
-	}
-};
-
-class DomainFieldHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		FieldDescriptor::FieldType type;
-		if (args["isSubtree"].asBool()) {
-			type = FieldDescriptor::FieldType::SUBTREE;
-		} else {
-			type = FieldDescriptor::FieldType::TREE;
-		}
-
-		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
-
-		Rooted<FieldDescriptor> field = parent->createFieldDescriptor(
-		    type, args["name"].asString(), args["optional"].asBool());
-		field->setLocation(location());
-
-		scope().push(field);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainFieldHandler{handlerData};
-	}
-};
-
-class DomainFieldRefHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
-
-		const std::string &name = args["name"].asString();
-		scope().resolve<FieldDescriptor>(
-		    name, parent, logger(),
-		    [](Handle<Node> field, Handle<Node> parent, Logger &logger) {
-			    if (field != nullptr) {
-				    parent.cast<StructuredClass>()->addFieldDescriptor(
-				        field.cast<FieldDescriptor>());
-			    }
-			});
-	}
-
-	void end() override {}
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainFieldRefHandler{handlerData};
-	}
-};
-
-class DomainPrimitiveHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
-
-		Rooted<FieldDescriptor> field = parent->createPrimitiveFieldDescriptor(
-		    nullptr, args["name"].asString(), args["optional"].asBool());
-		field->setLocation(location());
-
-		const std::string &type = args["type"].asString();
-		scope().resolve<Type>(
-		    type, field, logger(),
-		    [](Handle<Node> type, Handle<Node> field, Logger &logger) {
-			    if (type != nullptr) {
-				    field.cast<FieldDescriptor>()->setPrimitiveType(
-				        type.cast<Type>());
-			    }
-			});
-
-		scope().push(field);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainPrimitiveHandler{handlerData};
-	}
-};
-
-class DomainChildHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		Rooted<FieldDescriptor> field =
-		    scope().selectOrThrow<FieldDescriptor>();
-
-		const std::string &ref = args["ref"].asString();
-		scope().resolve<StructuredClass>(
-		    ref, field, logger(),
-		    [](Handle<Node> child, Handle<Node> field, Logger &logger) {
-			    if (child != nullptr) {
-				    field.cast<FieldDescriptor>()->addChild(
-				        child.cast<StructuredClass>());
-			    }
-			});
-	}
-
-	void end() override {}
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainChildHandler{handlerData};
-	}
-};
-
-class DomainParent : public Node {
-public:
-	DomainParent(Manager &mgr, std::string name, Handle<Node> parent)
-	    : Node(mgr, name, parent)
-	{
-	}
-};
-
-namespace RttiTypes {
-const Rtti DomainParent =
-    RttiBuilder<ousia::DomainParent>("DomainParent").parent(&Node);
-}
-
-class DomainParentHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		Rooted<StructuredClass> strct =
-		    scope().selectOrThrow<StructuredClass>();
-
-		Rooted<DomainParent> parent{new DomainParent(
-		    strct->getManager(), args["name"].asString(), strct)};
-		parent->setLocation(location());
-		scope().push(parent);
-	}
-
-	void end() override { scope().pop(); }
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainParentHandler{handlerData};
-	}
-};
-
-class DomainParentFieldHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		Rooted<DomainParent> parentNameNode =
-		    scope().selectOrThrow<DomainParent>();
-		FieldDescriptor::FieldType type;
-		if (args["isSubtree"].asBool()) {
-			type = FieldDescriptor::FieldType::SUBTREE;
-		} else {
-			type = FieldDescriptor::FieldType::TREE;
-		}
-
-		const std::string &name = args["name"].asString();
-		const bool optional = args["optional"].asBool();
-		Rooted<StructuredClass> strct =
-		    parentNameNode->getParent().cast<StructuredClass>();
-
-		// resolve the parent, create the declared field and add the declared
-		// StructuredClass as child to it.
-		scope().resolve<Descriptor>(
-		    parentNameNode->getName(), strct, logger(),
-		    [type, name, optional](Handle<Node> parent, Handle<Node> strct,
-		                           Logger &logger) {
-			    if (parent != nullptr) {
-				    Rooted<FieldDescriptor> field =
-				        parent.cast<Descriptor>()->createFieldDescriptor(
-				            type, name, optional);
-				    field->addChild(strct.cast<StructuredClass>());
-			    }
-			});
-	}
-
-	void end() override {}
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainParentFieldHandler{handlerData};
-	}
-};
-
-class DomainParentFieldRefHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	void start(Variant::mapType &args) override
-	{
-		Rooted<DomainParent> parentNameNode =
-		    scope().selectOrThrow<DomainParent>();
-
-		const std::string &name = args["name"].asString();
-		Rooted<StructuredClass> strct =
-		    parentNameNode->getParent().cast<StructuredClass>();
-		auto loc = location();
-
-		// resolve the parent, get the referenced field and add the declared
-		// StructuredClass as child to it.
-		scope().resolve<Descriptor>(parentNameNode->getName(), strct, logger(),
-		                            [name, loc](Handle<Node> parent,
-		                                        Handle<Node> strct,
-		                                        Logger &logger) {
-			if (parent != nullptr) {
-				auto res = parent.cast<Descriptor>()->resolve(
-				    &RttiTypes::FieldDescriptor, name);
-				if (res.size() != 1) {
-					logger.error(
-					    std::string("Could not find referenced field ") + name,
-					    loc);
-					return;
-				}
-				Rooted<FieldDescriptor> field =
-				    res[0].node.cast<FieldDescriptor>();
-				field->addChild(strct.cast<StructuredClass>());
-			}
-		});
-	}
-
-	void end() override {}
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new DomainParentFieldRefHandler{handlerData};
-	}
-};
-
-/*
- * Import and Include Handler
- */
-
-class ImportIncludeHandler : public Handler {
-public:
-	using Handler::Handler;
-
-	bool srcInArgs = false;
-	std::string rel;
-	std::string type;
-	std::string src;
-
-	void start(Variant::mapType &args) override
-	{
-		rel = args["rel"].asString();
-		type = args["type"].asString();
-		src = args["src"].asString();
-		srcInArgs = !src.empty();
-	}
-
-	void data(const std::string &data, int field) override
-	{
-		if (srcInArgs) {
-			logger().error("\"src\" attribute has already been set");
-			return;
-		}
-		if (field != 0) {
-			logger().error("Command has only one field.");
-			return;
-		}
-		src.append(data);
-	}
-};
-
-class ImportHandler : public ImportIncludeHandler {
-public:
-	using ImportIncludeHandler::ImportIncludeHandler;
-
-	void start(Variant::mapType &args) override
-	{
-		ImportIncludeHandler::start(args);
-
-		// Make sure imports are still possible
-		if (scope().getFlag(ParserFlag::POST_HEAD)) {
-			logger().error("Imports must be listed before other commands.",
-			               location());
-			return;
-		}
-	}
-
-	void end() override
-	{
-		// Fetch the last node and check whether an import is valid at this
-		// position
-		Rooted<Node> leaf = scope().getLeaf();
-		if (leaf == nullptr || !leaf->isa(&RttiTypes::RootNode)) {
-			logger().error(
-			    "Import not supported here, must be inside a document, domain "
-			    "or typesystem command.",
-			    location());
-			return;
-		}
-		Rooted<RootNode> leafRootNode = leaf.cast<RootNode>();
-
-		// Perform the actual import, register the imported node within the leaf
-		// node
-		Rooted<Node> imported =
-		    context().import(src, type, rel, leafRootNode->getReferenceTypes());
-		if (imported != nullptr) {
-			leafRootNode->reference(imported);
-		}
-	}
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new ImportHandler{handlerData};
-	}
-};
-
-class IncludeHandler : public ImportIncludeHandler {
-public:
-	using ImportIncludeHandler::ImportIncludeHandler;
-
-	void start(Variant::mapType &args) override
-	{
-		ImportIncludeHandler::start(args);
-	}
-
-	void end() override
-	{
-		context().include(src, type, rel, {&RttiTypes::Node});
-	}
-
-	static Handler *create(const HandlerData &handlerData)
-	{
-		return new IncludeHandler{handlerData};
-	}
-};
-
-namespace ParserStates {
-/* Document states */
-static const ParserState Document =
-    ParserStateBuilder()
-        .parent(&None)
-        .createdNodeType(&RttiTypes::Document)
-        .elementHandler(DocumentHandler::create)
-        .arguments({Argument::String("name", "")});
-
-static const ParserState DocumentChild =
-    ParserStateBuilder()
-        .parents({&Document, &DocumentChild})
-        .createdNodeTypes({&RttiTypes::StructureNode,
-                           &RttiTypes::AnnotationEntity,
-                           &RttiTypes::DocumentField})
-        .elementHandler(DocumentChildHandler::create);
-
-/* Domain states */
-static const ParserState Domain = ParserStateBuilder()
-                                      .parents({&None, &Document})
-                                      .createdNodeType(&RttiTypes::Domain)
-                                      .elementHandler(DomainHandler::create)
-                                      .arguments({Argument::String("name")});
-
-static const ParserState DomainStruct =
-    ParserStateBuilder()
-        .parent(&Domain)
-        .createdNodeType(&RttiTypes::StructuredClass)
-        .elementHandler(DomainStructHandler::create)
-        .arguments({Argument::String("name"),
-                    Argument::Cardinality("cardinality", Cardinality::any()),
-                    Argument::Bool("isRoot", false),
-                    Argument::Bool("transparent", false),
-                    Argument::String("isa", "")});
-
-static const ParserState DomainAnnotation =
-    ParserStateBuilder()
-        .parent(&Domain)
-        .createdNodeType(&RttiTypes::AnnotationClass)
-        .elementHandler(DomainAnnotationHandler::create)
-        .arguments({Argument::String("name")});
-
-static const ParserState DomainAttributes =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::StructType)
-        .elementHandler(DomainAttributesHandler::create)
-        .arguments({});
-
-static const ParserState DomainAttribute =
-    ParserStateBuilder()
-        .parent(&DomainAttributes)
-        .elementHandler(TypesystemStructFieldHandler::create)
-        .arguments({Argument::String("name"), Argument::String("type"),
-                    Argument::Any("default", Variant::fromObject(nullptr))});
-
-static const ParserState DomainField =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainFieldHandler::create)
-        .arguments({Argument::String("name", DEFAULT_FIELD_NAME),
-                    Argument::Bool("isSubtree", false),
-                    Argument::Bool("optional", false)});
-
-static const ParserState DomainFieldRef =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainFieldRefHandler::create)
-        .arguments({Argument::String("name", DEFAULT_FIELD_NAME)});
-
-static const ParserState DomainStructPrimitive =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainPrimitiveHandler::create)
-        .arguments({Argument::String("name", DEFAULT_FIELD_NAME),
-                    Argument::Bool("optional", false),
-                    Argument::String("type")});
-
-static const ParserState DomainStructChild =
-    ParserStateBuilder()
-        .parent(&DomainField)
-        .elementHandler(DomainChildHandler::create)
-        .arguments({Argument::String("ref")});
-
-static const ParserState DomainStructParent =
-    ParserStateBuilder()
-        .parent(&DomainStruct)
-        .createdNodeType(&RttiTypes::DomainParent)
-        .elementHandler(DomainParentHandler::create)
-        .arguments({Argument::String("name")});
-
-static const ParserState DomainStructParentField =
-    ParserStateBuilder()
-        .parent(&DomainStructParent)
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainParentFieldHandler::create)
-        .arguments({Argument::String("name", DEFAULT_FIELD_NAME),
-                    Argument::Bool("isSubtree", false),
-                    Argument::Bool("optional", false)});
-
-static const ParserState DomainStructParentFieldRef =
-    ParserStateBuilder()
-        .parent(&DomainStructParent)
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainParentFieldRefHandler::create)
-        .arguments({Argument::String("name", DEFAULT_FIELD_NAME)});
-
-/* Typesystem states */
-static const ParserState Typesystem =
-    ParserStateBuilder()
-        .parents({&None, &Domain})
-        .createdNodeType(&RttiTypes::Typesystem)
-        .elementHandler(TypesystemHandler::create)
-        .arguments({Argument::String("name", "")});
-
-static const ParserState TypesystemEnum =
-    ParserStateBuilder()
-        .parent(&Typesystem)
-        .createdNodeType(&RttiTypes::EnumType)
-        .elementHandler(TypesystemEnumHandler::create)
-        .arguments({Argument::String("name")});
-
-static const ParserState TypesystemEnumEntry =
-    ParserStateBuilder()
-        .parent(&TypesystemEnum)
-        .elementHandler(TypesystemEnumEntryHandler::create)
-        .arguments({});
-
-static const ParserState TypesystemStruct =
-    ParserStateBuilder()
-        .parent(&Typesystem)
-        .createdNodeType(&RttiTypes::StructType)
-        .elementHandler(TypesystemStructHandler::create)
-        .arguments({Argument::String("name"), Argument::String("parent", "")});
-
-static const ParserState TypesystemStructField =
-    ParserStateBuilder()
-        .parent(&TypesystemStruct)
-        .elementHandler(TypesystemStructFieldHandler::create)
-        .arguments({Argument::String("name"), Argument::String("type"),
-                    Argument::Any("default", Variant::fromObject(nullptr))});
-
-static const ParserState TypesystemConstant =
-    ParserStateBuilder()
-        .parent(&Typesystem)
-        .createdNodeType(&RttiTypes::Constant)
-        .elementHandler(TypesystemConstantHandler::create)
-        .arguments({Argument::String("name"), Argument::String("type"),
-                    Argument::Any("value")});
-
-/* Special states for import and include */
-static const ParserState Import =
-    ParserStateBuilder()
-        .parents({&Document, &Typesystem, &Domain})
-        .elementHandler(ImportHandler::create)
-        .arguments({Argument::String("rel", ""), Argument::String("type", ""),
-                    Argument::String("src", "")});
-
-static const ParserState Include =
-    ParserStateBuilder()
-        .parent(&All)
-        .elementHandler(IncludeHandler::create)
-        .arguments({Argument::String("rel", ""), Argument::String("type", ""),
-                    Argument::String("src", "")});
-
-static const std::multimap<std::string, const ParserState *> XmlStates{
-    {"document", &Document},
-    {"*", &DocumentChild},
-    {"domain", &Domain},
-    {"struct", &DomainStruct},
-    {"annotation", &DomainAnnotation},
-    {"attributes", &DomainAttributes},
-    {"attribute", &DomainAttribute},
-    {"field", &DomainField},
-    {"fieldRef", &DomainFieldRef},
-    {"primitive", &DomainStructPrimitive},
-    {"child", &DomainStructChild},
-    {"parent", &DomainStructParent},
-    {"field", &DomainStructParentField},
-    {"fieldRef", &DomainStructParentFieldRef},
-    {"typesystem", &Typesystem},
-    {"enum", &TypesystemEnum},
-    {"entry", &TypesystemEnumEntry},
-    {"struct", &TypesystemStruct},
-    {"field", &TypesystemStructField},
-    {"constant", &TypesystemConstant},
-    {"import", &Import},
-    {"include", &Include}};
-}
-
-/**
- * Structue containing the private data that is being passed to the
- * XML-Handlers.
- */
-struct XMLUserData {
-	/**
-	 * Containing the depth of the current XML file
-	 */
-	size_t depth;
-
-	/**
-	 * Reference at the ParserStack instance.
-	 */
-	ParserStack *stack;
-
-	/**
-	 * Reference at the CharReader instance.
-	 */
-	CharReader *reader;
-
-	/**
-	 * Constructor of the XMLUserData struct.
-	 *
-	 * @param stack is a pointer at the ParserStack instance.
-	 * @param reader is a pointer at the CharReader instance.
-	 */
-	XMLUserData(ParserStack *stack, CharReader *reader)
-	    : depth(0), stack(stack), reader(reader)
-	{
-	}
-};
-
-/**
- * Wrapper class around the XML_Parser pointer which safely frees it whenever
- * the scope is left (e.g. because an exception was thrown).
- */
-class ScopedExpatXmlParser {
-private:
-	/**
-	 * Internal pointer to the XML_Parser instance.
-	 */
-	XML_Parser parser;
-
-public:
-	/**
-	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS
-	 * from the expat library. Throws a parser exception if the XML parser
-	 * cannot be initialized.
-	 *
-	 * @param encoding is the protocol-defined encoding passed to expat (or
-	 * nullptr if expat should determine the encoding by itself).
-	 */
-	ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
-	{
-		parser = XML_ParserCreate(encoding);
-		if (!parser) {
-			throw LoggableException{
-			    "Internal error: Could not create expat XML parser!"};
-		}
-	}
-
-	/**
-	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance.
-	 */
-	~ScopedExpatXmlParser()
-	{
-		if (parser) {
-			XML_ParserFree(parser);
-			parser = nullptr;
-		}
-	}
-
-	/**
-	 * Returns the XML_Parser pointer.
-	 */
-	XML_Parser operator&() { return parser; }
-};
-
-/* Adapter Expat -> ParserStack */
-
-static SourceLocation syncLoggerPosition(XML_Parser p, size_t len = 0)
-{
-	// Fetch the parser stack and the associated user data
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	// Fetch the current location in the XML file
-	size_t offs = XML_GetCurrentByteIndex(p);
-
-	// Build the source location and update the default location of the
-	// current
-	// logger instance
-	SourceLocation loc{stack->getContext().getSourceId(), offs, offs + len};
-	stack->getContext().getLogger().setDefaultLocation(loc);
-	return loc;
-}
-
-enum class XMLAttributeState {
-	IN_TAG_NAME,
-	SEARCH_ATTR,
-	IN_ATTR_NAME,
-	HAS_ATTR_NAME,
-	HAS_ATTR_EQUALS,
-	IN_ATTR_DATA
-};
-
-static std::map<std::string, SourceLocation> reconstructXMLAttributeOffsets(
-    CharReader &reader, SourceLocation location)
-{
-	std::map<std::string, SourceLocation> res;
-
-	// Fork the reader, we don't want to mess up the XML parsing process, do we?
-	CharReaderFork readerFork = reader.fork();
-
-	// Move the read cursor to the start location, abort if this does not work
-	size_t offs = location.getStart();
-	if (!location.isValid() || offs != readerFork.seek(offs)) {
-		return res;
-	}
-
-	// Now all we need to do is to implement one half of an XML parser. As this
-	// is inherently complicated we'll totaly fail at it. Don't care. All we
-	// want to get is those darn offsets for pretty error messages... (and we
-	// can assume the XML is valid as it was already read by expat)
-	XMLAttributeState state = XMLAttributeState::IN_TAG_NAME;
-	char c;
-	std::stringstream attrName;
-	while (readerFork.read(c)) {
-		// Abort at the end of the tag
-		if (c == '>' && state != XMLAttributeState::IN_ATTR_DATA) {
-			return res;
-		}
-
-		// One state machine to rule them all, one state machine to find them,
-		// One state machine to bring them all and in the darkness bind them
-		// (the byte offsets)
-		switch (state) {
-			case XMLAttributeState::IN_TAG_NAME:
-				if (Utils::isWhitespace(c)) {
-					state = XMLAttributeState::SEARCH_ATTR;
-				}
-				break;
-			case XMLAttributeState::SEARCH_ATTR:
-				if (!Utils::isWhitespace(c)) {
-					state = XMLAttributeState::IN_ATTR_NAME;
-					attrName << c;
-				}
-				break;
-			case XMLAttributeState::IN_ATTR_NAME:
-				if (Utils::isWhitespace(c)) {
-					state = XMLAttributeState::HAS_ATTR_NAME;
-				} else if (c == '=') {
-					state = XMLAttributeState::HAS_ATTR_EQUALS;
-				} else {
-					attrName << c;
-				}
-				break;
-			case XMLAttributeState::HAS_ATTR_NAME:
-				if (!Utils::isWhitespace(c)) {
-					if (c == '=') {
-						state = XMLAttributeState::HAS_ATTR_EQUALS;
-						break;
-					}
-					// Well, this is a strange XML file... We expected to
-					// see a '=' here! Try to continue with the
-					// "HAS_ATTR_EQUALS" state as this state will hopefully
-					// inlcude some error recovery
-				} else {
-					// Skip whitespace here
-					break;
-				}
-			// Fallthrough
-			case XMLAttributeState::HAS_ATTR_EQUALS:
-				if (!Utils::isWhitespace(c)) {
-					if (c == '"') {
-						// Here we are! We have found the beginning of an
-						// attribute. Let's quickly lock the current offset away
-						// in the result map
-						res.emplace(attrName.str(),
-						            SourceLocation{reader.getSourceId(),
-						                           readerFork.getOffset()});
-						attrName.str(std::string{});
-						state = XMLAttributeState::IN_ATTR_DATA;
-					} else {
-						// No, this XML file is not well formed. Assume we're in
-						// an attribute name once again
-						attrName.str(std::string{&c, 1});
-						state = XMLAttributeState::IN_ATTR_NAME;
-					}
-				}
-				break;
-			case XMLAttributeState::IN_ATTR_DATA:
-				if (c == '"') {
-					// We're at the end of the attribute data, start anew
-					state = XMLAttributeState::SEARCH_ATTR;
-				}
-				break;
-		}
-	}
-	return res;
-}
-
-static void xmlStartElementHandler(void *p, const XML_Char *name,
-                                   const XML_Char **attrs)
-{
-	XML_Parser parser = static_cast<XML_Parser>(p);
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	SourceLocation loc = syncLoggerPosition(parser);
-
-	// Read the argument locations -- this is only a stupid and slow hack,
-	// but it is necessary, as expat doesn't give use the byte offset of the
-	// arguments.
-	std::map<std::string, SourceLocation> offs =
-	    reconstructXMLAttributeOffsets(*userData->reader, loc);
-
-	// Assemble the arguments
-	Variant::mapType args;
-
-	const XML_Char **attr = attrs;
-	while (*attr) {
-		// Convert the C string to a std::string
-		const std::string key{*(attr++)};
-
-		// Search the location of the key
-		SourceLocation keyLoc;
-		auto it = offs.find(key);
-		if (it != offs.end()) {
-			keyLoc = it->second;
-		}
-
-		// Parse the string, pass the location of the key
-		std::pair<bool, Variant> value = VariantReader::parseGenericString(
-		    *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(),
-		    keyLoc.getStart());
-		args.emplace(key, value.second);
-	}
-
-	// Call the start function
-	std::string nameStr(name);
-	if (nameStr != "ousia" || userData->depth > 0) {
-		stack->start(std::string(name), args, loc);
-	}
-
-	// Increment the current depth
-	userData->depth++;
-}
-
-static void xmlEndElementHandler(void *p, const XML_Char *name)
-{
-	XML_Parser parser = static_cast<XML_Parser>(p);
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	syncLoggerPosition(parser);
-
-	// Decrement the current depth
-	userData->depth--;
-
-	// Call the end function
-	std::string nameStr(name);
-	if (nameStr != "ousia" || userData->depth > 0) {
-		stack->end();
-	}
-}
-
-static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len)
-{
-	XML_Parser parser = static_cast<XML_Parser>(p);
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
-	syncLoggerPosition(parser, ulen);
-	const std::string data = Utils::trim(std::string{s, ulen});
-	if (!data.empty()) {
-		stack->data(data);
-	}
-}
-
-/* Class XmlParser */
-
-void XmlParser::doParse(CharReader &reader, ParserContext &ctx)
-{
-	// Create the parser object
-	ScopedExpatXmlParser p{"UTF-8"};
-
-	// Create the parser stack instance, if we're starting on a non-empty scope,
-	// try to deduce the parser state
-	ParserStack stack(ctx, ParserStates::XmlStates);
-	if (!ctx.getScope().isEmpty()) {
-		if (!stack.deduceState()) {
-			return;
-		}
-	}
-
-	// Pass the reference to the ParserStack to the XML handler
-	XMLUserData data(&stack, &reader);
-	XML_SetUserData(&p, &data);
-	XML_UseParserAsHandlerArg(&p);
-
-	// Set the callback functions
-	XML_SetStartElementHandler(&p, xmlStartElementHandler);
-	XML_SetEndElementHandler(&p, xmlEndElementHandler);
-	XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler);
-
-	// Feed data into expat while there is data to process
-	constexpr size_t BUFFER_SIZE = 64 * 1024;
-	while (true) {
-		// Fetch a buffer from expat for the input data
-		char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE));
-		if (!buf) {
-			throw LoggableException{
-			    "Internal error: XML parser out of memory!"};
-		}
-
-		// Read into the buffer
-		size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE);
-
-		// Parse the data and handle any XML error
-		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) {
-			// Fetch the xml parser byte offset
-			size_t offs = XML_GetCurrentByteIndex(&p);
-
-			// Throw a corresponding exception
-			XML_Error code = XML_GetErrorCode(&p);
-			std::string msg = std::string{XML_ErrorString(code)};
-			throw LoggableException{"XML: " + msg,
-			                        SourceLocation{ctx.getSourceId(), offs}};
-		}
-
-		// Abort once there are no more bytes in the stream
-		if (bytesRead == 0) {
-			break;
-		}
-	}
-}
-}
-
diff --git a/src/formats/osdmx/OsdmxParser.hpp b/src/formats/osdmx/OsdmxParser.hpp
deleted file mode 100644
index c8b6302..0000000
--- a/src/formats/osdmx/OsdmxParser.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file XmlParser.hpp
- *
- * Contains the parser responsible for reading Ousía XML Documents (extension
- * oxd) and Ousía XML Modules (extension oxm).
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_XML_PARSER_HPP_
-#define _OUSIA_XML_PARSER_HPP_
-
-#include <core/parser/Parser.hpp>
-
-namespace ousia {
-
-/**
- * The XmlParser class implements parsing the various types of Ousía XML
- * documents using the expat stream XML parser.
- */
-class XmlParser : public Parser {
-protected:
-	/**
-	 * Parses the given input stream as XML file and returns the parsed
-	 * top-level node.
-	 *
-	 * @param reader is the CharReader from which the input should be read.
-	 * @param ctx is a reference to the ParserContext instance that should be
-	 * used.
-	 */
-	void doParse(CharReader &reader, ParserContext &ctx) override;
-};
-
-}
-
-#endif /* _OUSIA_XML_PARSER_HPP_ */
-
diff --git a/src/formats/osml/OsmlParser.cpp b/src/formats/osml/OsmlParser.cpp
new file mode 100644
index 0000000..4973639
--- /dev/null
+++ b/src/formats/osml/OsmlParser.cpp
@@ -0,0 +1,57 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <core/parser/generic/ParserStateCallbacks.hpp>
+#include <core/parser/generic/ParserStateStack.hpp>
+
+#include "OsdmParser.hpp"
+#include "OsdmStreamParser.hpp"
+
+namespace ousia {
+
+namespace {
+
+/**
+ * The OsdmParserImplementation class contains the actual implementation of the
+ * parsing process and is created in the "doParse" function of the OsdmParser.
+ 
+ */
+class OsdmParserImplementation : public ParserStateCallbacks {
+private:
+	/**
+	 * OsdmStreamParser instance.
+	 */
+	OsdmStreamParser parser;
+
+	/**
+	 * Instance of the ParserStateStack.
+	 */
+	ParserStateStack stack;
+
+public:
+	OsdmParserImplementation parser(reader, ctx) : parser(reader), stack(ctx, std::multimap)
+};
+}
+
+void OsdmParser::doParse(CharReader &reader, ParserContext &ctx)
+{
+	OsdmParserImplementation parser(reader, ctx);
+	parser.parse();
+}
+
+}
diff --git a/src/formats/osml/OsmlParser.hpp b/src/formats/osml/OsmlParser.hpp
new file mode 100644
index 0000000..37505b4
--- /dev/null
+++ b/src/formats/osml/OsmlParser.hpp
@@ -0,0 +1,48 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file OsdmParser.hpp
+ *
+ * Contains the parser of the osdm format, the standard plain-text format used
+ * by Ousía for documents.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_OSDM_PARSER_HPP_
+#define _OUSIA_OSDM_PARSER_HPP_
+
+#include <core/parser/Parser.hpp>
+
+namespace ousia {
+
+/**
+ * OsdmParser is a small wrapper implementing the Parser interface. The actual
+ * parsing is performed with the OsdmStreamParser in conjunction with the
+ * ParserStateStack.
+ */
+class OsdmParser : public Parser {
+protected:
+	void doParse(CharReader &reader, ParserContext &ctx) override;
+};
+
+}
+
+#endif /* _OUSIA_OSDM_PARSER_HPP_ */
+
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
new file mode 100644
index 0000000..6a55f12
--- /dev/null
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -0,0 +1,640 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <core/common/CharReader.hpp>
+#include <core/common/Logger.hpp>
+#include <core/common/Utils.hpp>
+#include <core/common/VariantReader.hpp>
+
+#include "OsdmStreamParser.hpp"
+
+namespace ousia {
+
+/**
+ * Plain format default tokenizer.
+ */
+class PlainFormatTokens : public Tokenizer {
+public:
+	/**
+	 * Id of the backslash token.
+	 */
+	TokenTypeId Backslash;
+
+	/**
+	 * Id of the line comment token.
+	 */
+	TokenTypeId LineComment;
+
+	/**
+	 * Id of the block comment start token.
+	 */
+	TokenTypeId BlockCommentStart;
+
+	/**
+	 * Id of the block comment end token.
+	 */
+	TokenTypeId BlockCommentEnd;
+
+	/**
+	 * Id of the field start token.
+	 */
+	TokenTypeId FieldStart;
+
+	/**
+	 * Id of the field end token.
+	 */
+	TokenTypeId FieldEnd;
+
+	/**
+	 * Registers the plain format tokens in the internal tokenizer.
+	 */
+	PlainFormatTokens()
+	{
+		Backslash = registerToken("\\");
+		LineComment = registerToken("%");
+		BlockCommentStart = registerToken("%{");
+		BlockCommentEnd = registerToken("}%");
+		FieldStart = registerToken("{");
+		FieldEnd = registerToken("}");
+	}
+};
+
+static const PlainFormatTokens Tokens;
+
+/**
+ * Class used internally to collect data issued via "DATA" event.
+ */
+class DataHandler {
+private:
+	/**
+	 * Internal character buffer.
+	 */
+	std::vector<char> buf;
+
+	/**
+	 * Start location of the character data.
+	 */
+	SourceOffset start;
+
+	/**
+	 * End location of the character data.
+	 */
+	SourceOffset end;
+
+public:
+	/**
+	 * Default constructor, initializes start and end with zeros.
+	 */
+	DataHandler() : start(0), end(0) {}
+
+	/**
+	 * Returns true if the internal buffer is empty.
+	 *
+	 * @return true if no characters were added to the internal buffer, false
+	 * otherwise.
+	 */
+	bool isEmpty() { return buf.empty(); }
+
+	/**
+	 * Appends a single character to the internal buffer.
+	 *
+	 * @param c is the character that should be added to the internal buffer.
+	 * @param charStart is the start position of the character.
+	 * @param charEnd is the end position of the character.
+	 */
+	void append(char c, SourceOffset charStart, SourceOffset charEnd)
+	{
+		if (isEmpty()) {
+			start = charStart;
+		}
+		buf.push_back(c);
+		end = charEnd;
+	}
+
+	/**
+	 * Appends a string to the internal buffer.
+	 *
+	 * @param s is the string that should be added to the internal buffer.
+	 * @param stringStart is the start position of the string.
+	 * @param stringEnd is the end position of the string.
+	 */
+	void append(const std::string &s, SourceOffset stringStart,
+	            SourceOffset stringEnd)
+	{
+		if (isEmpty()) {
+			start = stringStart;
+		}
+		std::copy(s.c_str(), s.c_str() + s.size(), back_inserter(buf));
+		end = stringEnd;
+	}
+
+	/**
+	 * Converts the internal buffer to a variant with attached location
+	 * information.
+	 *
+	 * @param sourceId is the source id which is needed for building the
+	 * location information.
+	 * @return a Variant with the internal buffer content as string and
+	 * the correct start and end location.
+	 */
+	Variant toVariant(SourceId sourceId)
+	{
+		Variant res = Variant::fromString(std::string(buf.data(), buf.size()));
+		res.setLocation({sourceId, start, end});
+		return res;
+	}
+};
+
+OsdmStreamParser::OsdmStreamParser(CharReader &reader, Logger &logger)
+    : reader(reader), logger(logger), tokenizer(Tokens)
+{
+	// Place an intial command representing the complete file on the stack
+	commands.push(Command{"", Variant::mapType{}, true, true, true});
+}
+
+Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep)
+{
+	bool first = true;
+	bool hasCharSiceNSSep = false;
+	std::vector<char> identifier;
+	size_t end = reader.getPeekOffset();
+	char c, c2;
+	while (reader.peek(c)) {
+		// Abort if this character is not a valid identifer character
+		if ((first && Utils::isIdentifierStartCharacter(c)) ||
+		    (!first && Utils::isIdentifierCharacter(c))) {
+			identifier.push_back(c);
+		} else if (c == ':' && hasCharSiceNSSep && reader.fetchPeek(c2) &&
+		           Utils::isIdentifierStartCharacter(c2)) {
+			identifier.push_back(c);
+		} else {
+			if (c == ':' && allowNSSep) {
+				logger.error(
+				    "Expected character before and after namespace separator "
+				    "\":\"",
+				    reader);
+			}
+			reader.resetPeek();
+			break;
+		}
+
+		// This is no longer the first character
+		first = false;
+
+		// Advance the hasCharSiceNSSep flag
+		hasCharSiceNSSep = allowNSSep && (c != ':');
+
+		end = reader.getPeekOffset();
+		reader.consumePeek();
+	}
+
+	// Return the identifier at its location
+	Variant res =
+	    Variant::fromString(std::string(identifier.data(), identifier.size()));
+	res.setLocation({reader.getSourceId(), start, end});
+	return res;
+}
+
+OsdmStreamParser::State OsdmStreamParser::parseBeginCommand()
+{
+	// Expect a '{' after the command
+	reader.consumeWhitespace();
+	if (!reader.expect('{')) {
+		logger.error("Expected \"{\" after \\begin", reader);
+		return State::NONE;
+	}
+
+	// Parse the name of the command that should be opened
+	Variant commandName = parseIdentifier(reader.getOffset(), true);
+	if (commandName.asString().empty()) {
+		logger.error("Expected identifier", commandName);
+		return State::ERROR;
+	}
+
+	// Check whether the next character is a '#', indicating the start of the
+	// command name
+	Variant commandArgName;
+	SourceOffset start = reader.getOffset();
+	if (reader.expect('#')) {
+		commandArgName = parseIdentifier(start);
+		if (commandArgName.asString().empty()) {
+			logger.error("Expected identifier after \"#\"", commandArgName);
+		}
+	}
+
+	if (!reader.expect('}')) {
+		logger.error("Expected \"}\"", reader);
+		return State::ERROR;
+	}
+
+	// Parse the arguments
+	Variant commandArguments = parseCommandArguments(std::move(commandArgName));
+
+	// Push the command onto the command stack
+	pushCommand(std::move(commandName), std::move(commandArguments), true);
+
+	return State::COMMAND;
+}
+
+static bool checkStillInField(const OsdmStreamParser::Command &cmd,
+                              const Variant &endName, Logger &logger)
+{
+	if (cmd.inField && !cmd.inRangeField) {
+		logger.error(std::string("\\end in open field of command \"") +
+		                 cmd.name.asString() + std::string("\""),
+		             endName);
+		logger.note(std::string("Open command started here:"), cmd.name);
+		return true;
+	}
+	return false;
+}
+
+OsdmStreamParser::State OsdmStreamParser::parseEndCommand()
+{
+	// Expect a '{' after the command
+	if (!reader.expect('{')) {
+		logger.error("Expected \"{\" after \\end", reader);
+		return State::NONE;
+	}
+
+	// Fetch the name of the command that should be ended here
+	Variant name = parseIdentifier(reader.getOffset(), true);
+
+	// Make sure the given command name is not empty
+	if (name.asString().empty()) {
+		logger.error("Expected identifier", name);
+		return State::ERROR;
+	}
+
+	// Make sure the command name is terminated with a '}'
+	if (!reader.expect('}')) {
+		logger.error("Expected \"}\"", reader);
+		return State::ERROR;
+	}
+
+	// Unroll the command stack up to the last range command
+	while (!commands.top().hasRange) {
+		if (checkStillInField(commands.top(), name, logger)) {
+			return State::ERROR;
+		}
+		commands.pop();
+	}
+
+	// Make sure we're not in an open field of this command
+	if (checkStillInField(commands.top(), name, logger)) {
+		return State::ERROR;
+	}
+
+	// Special error message if the top-level command is reached
+	if (commands.size() == 1) {
+		logger.error(std::string("Cannot end command \"") + name.asString() +
+		                 std::string("\" here, no command open"),
+		             name);
+		return State::ERROR;
+	}
+
+	// Inform the about command mismatches
+	const Command &cmd = commands.top();
+	if (commands.top().name.asString() != name.asString()) {
+		logger.error(std::string("Trying to end command \"") +
+		                 cmd.name.asString() +
+		                 std::string("\", but open command is \"") +
+		                 name.asString() + std::string("\""),
+		             name);
+		logger.note("Last command was opened here:", cmd.name);
+		return State::ERROR;
+	}
+
+	// Set the location to the location of the command that was ended, then end
+	// the current command
+	location = name.getLocation();
+	commands.pop();
+	return cmd.inRangeField ? State::FIELD_END : State::NONE;
+}
+
+Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName)
+{
+	// Parse the arguments using the universal VariantReader
+	Variant commandArguments;
+	if (reader.expect('[')) {
+		auto res = VariantReader::parseObject(reader, logger, ']');
+		commandArguments = res.second;
+	} else {
+		commandArguments = Variant::mapType{};
+	}
+
+	// Insert the parsed name, make sure "name" was not specified in the
+	// arguments
+	if (commandArgName.isString()) {
+		auto res =
+		    commandArguments.asMap().emplace("name", std::move(commandArgName));
+		if (!res.second) {
+			logger.error("Name argument specified multiple times",
+			             SourceLocation{}, MessageMode::NO_CONTEXT);
+			logger.note("First occurance is here: ", commandArgName);
+			logger.note("Second occurance is here: ", res.first->second);
+		}
+	}
+	return commandArguments;
+}
+
+void OsdmStreamParser::pushCommand(Variant commandName,
+                                   Variant commandArguments, bool hasRange)
+{
+	// Store the location on the stack
+	location = commandName.getLocation();
+
+	// Place the command on the command stack, remove the last commands if we're
+	// not currently inside a field of these commands
+	while (!commands.top().inField) {
+		commands.pop();
+	}
+	commands.push(Command{std::move(commandName), std::move(commandArguments),
+	                      hasRange, false, false});
+}
+
+OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start)
+{
+	// Parse the commandName as a first identifier
+	Variant commandName = parseIdentifier(start, true);
+	if (commandName.asString().empty()) {
+		logger.error("Empty command name", reader);
+		return State::NONE;
+	}
+
+	// Handle the special "begin" and "end" commands
+	const auto commandNameComponents =
+	    Utils::split(commandName.asString(), ':');
+	const bool isBegin = commandNameComponents[0] == "begin";
+	const bool isEnd = commandNameComponents[0] == "end";
+	if (isBegin || isEnd) {
+		if (commandNameComponents.size() > 1) {
+			logger.error(
+			    "Special commands \"\\begin\" and \"\\end\" may not contain a "
+			    "namespace separator \":\"",
+			    commandName);
+		}
+		if (isBegin) {
+			return parseBeginCommand();
+		} else if (isEnd) {
+			return parseEndCommand();
+		}
+	}
+
+	// Check whether the next character is a '#', indicating the start of the
+	// command name
+	Variant commandArgName;
+	start = reader.getOffset();
+	if (reader.expect('#')) {
+		commandArgName = parseIdentifier(start);
+		if (commandArgName.asString().empty()) {
+			logger.error("Expected identifier after \"#\"", commandArgName);
+		}
+	}
+
+	// Parse the arugments
+	Variant commandArguments = parseCommandArguments(std::move(commandArgName));
+
+	// Push the command onto the command stack
+	pushCommand(std::move(commandName), std::move(commandArguments), false);
+
+	return State::COMMAND;
+}
+
+void OsdmStreamParser::parseBlockComment()
+{
+	Token token;
+	size_t depth = 1;
+	while (tokenizer.read(reader, token)) {
+		if (token.type == Tokens.BlockCommentEnd) {
+			depth--;
+			if (depth == 0) {
+				return;
+			}
+		}
+		if (token.type == Tokens.BlockCommentStart) {
+			depth++;
+		}
+	}
+
+	// Issue an error if the file ends while we are in a block comment
+	logger.error("File ended while being in a block comment", reader);
+}
+
+void OsdmStreamParser::parseLineComment()
+{
+	char c;
+	while (reader.read(c)) {
+		if (c == '\n') {
+			return;
+		}
+	}
+}
+
+bool OsdmStreamParser::checkIssueData(DataHandler &handler)
+{
+	if (!handler.isEmpty()) {
+		data = handler.toVariant(reader.getSourceId());
+		location = data.getLocation();
+		reader.resetPeek();
+		return true;
+	}
+	return false;
+}
+
+bool OsdmStreamParser::checkIssueFieldStart()
+{
+	// Fetch the current command, and check whether we're currently inside a
+	// field of this command
+	Command &cmd = commands.top();
+	if (!cmd.inField) {
+		// If this is a range command, we're now implicitly inside the field of
+		// this command -- we'll have to issue a field start command!
+		if (cmd.hasRange) {
+			cmd.inField = true;
+			cmd.inRangeField = true;
+			reader.resetPeek();
+			return true;
+		}
+
+		// This was not a range command, so obviously we're now inside within
+		// a field of some command -- so unroll the commands stack until a
+		// command with open field is reached
+		while (!commands.top().inField) {
+			commands.pop();
+		}
+	}
+	return false;
+}
+
+OsdmStreamParser::State OsdmStreamParser::parse()
+{
+	// Handler for incomming data
+	DataHandler handler;
+
+	// Read tokens until the outer loop should be left
+	Token token;
+	while (tokenizer.peek(reader, token)) {
+		const TokenTypeId type = token.type;
+
+		// Special handling for Backslash and Text
+		if (type == Tokens.Backslash) {
+			// Before appending anything to the output data or starting a new
+			// command, check whether FIELD_START has to be issued, as the
+			// current command is a command with range
+			if (checkIssueFieldStart()) {
+				location = token.location;
+				return State::FIELD_START;
+			}
+
+			// Check whether a command starts now, without advancing the peek
+			// cursor
+			char c;
+			if (!reader.fetchPeek(c)) {
+				logger.error("Trailing backslash at the end of the file.",
+				             token);
+				return State::END;
+			}
+
+			// Try to parse a command
+			if (Utils::isIdentifierStartCharacter(c)) {
+				// Make sure to issue any data before it is to late
+				if (checkIssueData(handler)) {
+					return State::DATA;
+				}
+
+				// Parse the actual command
+				State res = parseCommand(token.location.getStart());
+				switch (res) {
+					case State::ERROR:
+						throw LoggableException(
+						    "Last error was irrecoverable, ending parsing "
+						    "process");
+					case State::NONE:
+						continue;
+					default:
+						return res;
+				}
+			}
+
+			// This was not a special character, just append the given character
+			// to the data buffer, use the escape character start as start
+			// location and the peek offset as end location
+			reader.peek(c);  // Peek the previously fetched character
+			handler.append(c, token.location.getStart(),
+			               reader.getPeekOffset());
+			reader.consumePeek();
+			continue;
+		} else if (type == TextToken) {
+			// Check whether FIELD_START has to be issued before appending text
+			if (checkIssueFieldStart()) {
+				location = token.location;
+				return State::FIELD_START;
+			}
+
+			// Append the text to the data handler
+			handler.append(token.content, token.location.getStart(),
+			               token.location.getEnd());
+
+			reader.consumePeek();
+			continue;
+		}
+
+		// A non-text token was reached, make sure all pending data commands
+		// have been issued
+		if (checkIssueData(handler)) {
+			return State::DATA;
+		}
+
+		// We will handle the token now, consume the peeked characters
+		reader.consumePeek();
+
+		// Update the location to the current token location
+		location = token.location;
+
+		if (token.type == Tokens.LineComment) {
+			parseLineComment();
+		} else if (token.type == Tokens.BlockCommentStart) {
+			parseBlockComment();
+		} else if (token.type == Tokens.FieldStart) {
+			Command &cmd = commands.top();
+			if (!cmd.inField) {
+				cmd.inField = true;
+				return State::FIELD_START;
+			}
+			logger.error(
+			    "Got field start token \"{\", but no command for which to "
+			    "start the field. Did you mean \"\\{\"?",
+			    token);
+		} else if (token.type == Tokens.FieldEnd) {
+			// Try to end an open field of the current command -- if the current
+			// command is not inside an open field, end this command and try to
+			// close the next one
+			for (int i = 0; i < 2 && commands.size() > 1; i++) {
+				Command &cmd = commands.top();
+				if (!cmd.inRangeField) {
+					if (cmd.inField) {
+						cmd.inField = false;
+						return State::FIELD_END;
+					}
+					commands.pop();
+				} else {
+					break;
+				}
+			}
+			logger.error(
+			    "Got field end token \"}\", but there is no field to end. Did "
+			    "you mean \"\\}\"?",
+			    token);
+		} else {
+			logger.error("Unexpected token \"" + token.content + "\"", token);
+		}
+	}
+
+	// Issue available data
+	if (checkIssueData(handler)) {
+		return State::DATA;
+	}
+
+	// Make sure all open commands and fields have been ended at the end of the
+	// stream
+	while (commands.size() > 1) {
+		Command &cmd = commands.top();
+		if (cmd.inField || cmd.hasRange) {
+			logger.error("Reached end of stream, but command \"" +
+			                 cmd.name.asString() + "\" has not been ended",
+			             cmd.name);
+		}
+		commands.pop();
+	}
+
+	location = SourceLocation{reader.getSourceId(), reader.getOffset()};
+	return State::END;
+}
+
+const Variant &OsdmStreamParser::getCommandName()
+{
+	return commands.top().name;
+}
+
+const Variant &OsdmStreamParser::getCommandArguments()
+{
+	return commands.top().arguments;
+}
+}
+
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
new file mode 100644
index 0000000..84674c0
--- /dev/null
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -0,0 +1,350 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file OsdmStreamParser.hpp
+ *
+ * Provides classes for low-level classes for reading the TeX-esque osdm
+ * format. The class provided here does not build any model objects and does not
+ * implement the Parser interface.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_OSDM_STREAM_PARSER_HPP_
+#define _OUSIA_OSDM_STREAM_PARSER_HPP_
+
+#include <stack>
+
+#include <core/common/Variant.hpp>
+#include <core/parser/utils/Tokenizer.hpp>
+
+namespace ousia {
+
+// Forward declarations
+class CharReader;
+class Logger;
+class DataHandler;
+
+/**
+ * The OsdmStreamParser class provides a low-level reader for the TeX-esque osdm
+ * format. The parser is constructed around a "parse" function, which reads data
+ * from the underlying CharReader until a new state is reached and indicates
+ * this state in a return value. The calling code then has to pull corresponding
+ * data from the stream reader. The reader makes sure the incommind file is
+ * syntactically valid and tries to recorver from most errors. If an error is
+ * irrecoverable (this is the case for errors with wrong nesting of commands or
+ * fields, as this would lead to too many consecutive errors) a
+ * LoggableException is thrown.
+ */
+class OsdmStreamParser {
+public:
+	/**
+	 * Enum used to indicate which state the OsdmStreamParser class is in
+	 * after calling the "parse" function.
+	 */
+	enum class State {
+		/**
+	     * State returned if a fully featured command has been read. A command
+	     * consists of the command name and its arguments (which optionally
+	     * includes the name).
+	     */
+		COMMAND,
+
+		/**
+	     * State returned if data is given. The reader must decide which field
+	     * or command this should be routed to. Trailing or leading whitespace
+	     * has been removed. Only called if the data is non-empty.
+	     */
+		DATA,
+
+		/**
+	     * A user-defined entity has been found. The entity sequence is stored
+	     * in the command name.
+	     */
+		ENTITY,
+
+		/**
+	     * State returned if an annotation was started. An annotation consists
+	     * of the command name and its arguments (which optionally include the
+	     * name).
+	     */
+		ANNOTATION_START,
+
+		/**
+	     * State returned if an annotation ends. The reader indicates which
+	     * annotation ends.
+	     */
+		ANNOTATION_END,
+
+		/**
+	     * State returned if a new field started. The reader assures that the
+	     * current field ends before a new field is started and that the field
+	     * is not started if data has been given outside of a field. The
+	     * field number is set to the current field index.
+	     */
+		FIELD_START,
+
+		/**
+	     * State returned if the current field ends. The reader assures that a
+	     * field was actually open.
+	     */
+		FIELD_END,
+
+		/**
+	     * The end of the stream has been reached.
+	     */
+		END,
+
+		/**
+	     * Returned from internal functions if nothing should be done.
+	     */
+		NONE,
+
+		/**
+	     * Returned from internal function to indicate irrecoverable errors.
+	     */
+		ERROR
+	};
+
+	/**
+	 * Entry used for the command stack.
+	 */
+	struct Command {
+		/**
+		 * Name and location of the current command.
+		 */
+		Variant name;
+
+		/**
+		 * Arguments that were passed to the command.
+		 */
+		Variant arguments;
+
+		/**
+		 * Set to true if this is a command with clear begin and end.
+		 */
+		bool hasRange;
+
+		/**
+		 * Set to true if we are currently inside a field of this command.
+		 */
+		bool inField;
+
+		/**
+		 * Set to true if we are currently in the range field of the command
+		 * (implies inField being set to true).
+		 */
+		bool inRangeField;
+
+		/**
+		 * Default constructor.
+		 */
+		Command() : hasRange(false), inField(false), inRangeField(false) {}
+
+		/**
+		 * Constructor of the Command class.
+		 *
+		 * @param name is a string variant with name and location of the
+		 * command.
+		 * @param arguments is a map variant with the arguments given to the
+		 * command.
+		 * @param hasRange should be set to true if this is a command with
+		 * explicit range.
+		 * @param inField is set to true if we currently are inside a field
+		 * of this command.
+		 * @param inRangeField is set to true if we currently inside the outer
+		 * field of the command.
+		 */
+		Command(Variant name, Variant arguments, bool hasRange, bool inField,
+		        bool inRangeField)
+		    : name(std::move(name)),
+		      arguments(std::move(arguments)),
+		      hasRange(hasRange),
+		      inField(inField),
+		      inRangeField(inRangeField)
+		{
+		}
+	};
+
+private:
+	/**
+	 * Reference to the CharReader instance from which the incomming bytes are
+	 * read.
+	 */
+	CharReader &reader;
+
+	/**
+	 * Reference at the logger instance to which all error messages are sent.
+	 */
+	Logger &logger;
+
+	/**
+	 * Tokenizer instance used to read individual tokens from the text.
+	 */
+	Tokenizer tokenizer;
+
+	/**
+	 * Stack containing the current commands.
+	 */
+	std::stack<Command> commands;
+
+	/**
+	 * Variant containing the data that has been read (always is a string,
+	 * contains the exact location of the data in the source file).
+	 */
+	Variant data;
+
+	/**
+	 * Contains the location of the last token.
+	 */
+	SourceLocation location;
+
+	/**
+	 * Contains the field index of the current command.
+	 */
+	size_t fieldIdx;
+
+	/**
+	 * Function used internall to parse an identifier.
+	 *
+	 * @param start is the start byte offset of the identifier (including the
+	 * backslash).
+	 * @param allowNSSep should be set to true if the namespace separator is
+	 * allowed in the identifier name. Issues error if the namespace separator
+	 * is placed incorrectly.
+	 */
+	Variant parseIdentifier(size_t start, bool allowNSSep = false);
+
+	/**
+	 * Function used internally to handle the special "\begin" command.
+	 */
+	State parseBeginCommand();
+
+	/**
+	 * Function used internally to handle the special "\end" command.
+	 */
+	State parseEndCommand();
+
+	/**
+	 * Pushes the parsed command onto the command stack.
+	 */
+	void pushCommand(Variant commandName, Variant commandArguments,
+	                 bool hasRange);
+
+	/**
+	 * Parses the command arguments.
+	 */
+	Variant parseCommandArguments(Variant commandArgName);
+
+	/**
+	 * Function used internally to parse a command.
+	 *
+	 * @param start is the start byte offset of the command (including the
+	 * backslash)
+	 * @return true if a command was actuall parsed, false otherwise.
+	 */
+	State parseCommand(size_t start);
+
+	/**
+	 * Function used internally to parse a block comment.
+	 */
+	void parseBlockComment();
+
+	/**
+	 * Function used internally to parse a generic comment.
+	 */
+	void parseLineComment();
+
+	/**
+	 * Checks whether there is any data pending to be issued, if yes, issues it.
+	 *
+	 * @param handler is the data handler that contains the data that may be
+	 * returned to the user.
+	 * @return true if there was any data and DATA should be returned by the
+	 * parse function, false otherwise.
+	 */
+	bool checkIssueData(DataHandler &handler);
+
+	/**
+	 * Called before any data is appended to the internal data handler. Checks
+	 * whether a new field should be started or implicitly ended.
+	 *
+	 * @return true if FIELD_START should be returned by the parse function.
+	 */
+	bool checkIssueFieldStart();
+
+public:
+	/**
+	 * Constructor of the OsdmStreamParser class. Attaches the new
+	 * OsdmStreamParser to the given CharReader and Logger instances.
+	 *
+	 * @param reader is the reader instance from which incomming characters
+	 * should be read.
+	 * @param logger is the logger instance to which errors should be written.
+	 */
+	OsdmStreamParser(CharReader &reader, Logger &logger);
+
+	/**
+	 * Continues parsing. Returns one of the states defined in the State enum.
+	 * Callers should stop once the State::END state is reached. Use the getter
+	 * functions to get more information about the current state, such as the
+	 * command name or the data or the current field index.
+	 *
+	 * @return the new state the parser has reached.
+	 */
+	State parse();
+
+	/**
+	 * Returns a reference at the internally stored data. Only valid if
+	 * State::DATA was returned by the "parse" function.
+	 *
+	 * @return a reference at a variant containing the data parsed by the
+	 * "parse" function.
+	 */
+	const Variant &getData() { return data; }
+
+	/**
+	 * Returns a reference at the internally stored command name. Only valid if
+	 * State::COMMAND was returned by the "parse" function.
+	 *
+	 * @return a reference at a variant containing name and location of the
+	 * parsed command.
+	 */
+	const Variant &getCommandName();
+
+	/**
+	 * Returns a reference at the internally stored command name. Only valid if
+	 * State::COMMAND was returned by the "parse" function.
+	 *
+	 * @return a reference at a variant containing arguments given to the
+	 * command.
+	 */
+	const Variant &getCommandArguments();
+
+	/**
+	 * Returns a reference at the char reader.
+	 *
+	 * @return the last internal token location.
+	 */
+	SourceLocation &getLocation() { return location; }
+};
+}
+
+#endif /* _OUSIA_OSDM_STREAM_PARSER_HPP_ */
+
diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp
new file mode 100644
index 0000000..c46d9de
--- /dev/null
+++ b/src/formats/osxml/OsxmlParser.cpp
@@ -0,0 +1,1435 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <vector>
+
+#include <expat.h>
+
+#include <core/common/CharReader.hpp>
+#include <core/common/RttiBuilder.hpp>
+#include <core/common/Utils.hpp>
+#include <core/common/VariantReader.hpp>
+#include <core/parser/ParserStack.hpp>
+#include <core/parser/ParserScope.hpp>
+#include <core/model/Document.hpp>
+#include <core/model/Domain.hpp>
+#include <core/model/Project.hpp>
+#include <core/model/RootNode.hpp>
+#include <core/model/Typesystem.hpp>
+
+#include "XmlParser.hpp"
+
+namespace ousia {
+
+/* HeadNode Helper class */
+
+namespace {
+class HeadNode : public Node {
+public:
+	using Node::Node;
+};
+}
+
+namespace RttiTypes {
+static Rtti HeadNode = RttiBuilder<ousia::HeadNode>("HeadNode");
+}
+
+/* Element Handler Classes */
+
+class DocumentHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<Document> document =
+		    project()->createDocument(args["name"].asString());
+		document->setLocation(location());
+		scope().push(document);
+		scope().setFlag(ParserFlag::POST_HEAD, false);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DocumentHandler{handlerData};
+	}
+};
+
+class DocumentField : public Node {
+public:
+	DocumentField(Manager &mgr, std::string name, Handle<Node> parent)
+	    : Node(mgr, name, parent)
+	{
+	}
+};
+
+namespace RttiTypes {
+const Rtti DocumentField =
+    RttiBuilder<ousia::DocumentField>("DocumentField").parent(&Node);
+}
+
+class DocumentChildHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void preamble(Handle<Node> parentNode, std::string &fieldName,
+	              DocumentEntity *&parent, bool &inField)
+	{
+		// check if the parent in the structure tree was an explicit field
+		// reference.
+		inField = parentNode->isa(&RttiTypes::DocumentField);
+		if (inField) {
+			fieldName = parentNode->getName();
+			parentNode = scope().selectOrThrow(
+			    {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity});
+		} else {
+			// if it wasn't an explicit reference, we use the default field.
+			fieldName = DEFAULT_FIELD_NAME;
+		}
+		// reference the parent entity explicitly.
+		parent = nullptr;
+		if (parentNode->isa(&RttiTypes::StructuredEntity)) {
+			parent = static_cast<DocumentEntity *>(
+			    parentNode.cast<StructuredEntity>().get());
+		} else if (parentNode->isa(&RttiTypes::AnnotationEntity)) {
+			parent = static_cast<DocumentEntity *>(
+			    parentNode.cast<AnnotationEntity>().get());
+		}
+	}
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+		Rooted<Node> parentNode = scope().selectOrThrow(
+		    {&RttiTypes::Document, &RttiTypes::StructuredEntity,
+		     &RttiTypes::AnnotationEntity, &RttiTypes::DocumentField});
+
+		std::string fieldName;
+		DocumentEntity *parent;
+		bool inField;
+
+		preamble(parentNode, fieldName, parent, inField);
+
+		// try to find a FieldDescriptor for the given tag if we are not in a
+		// field already.
+		// TODO: Consider fields of transparent classes
+		if (!inField && parent != nullptr &&
+		    parent->getDescriptor()->hasField(name())) {
+			Rooted<DocumentField> field{new DocumentField(
+			    parentNode->getManager(), fieldName, parentNode)};
+			field->setLocation(location());
+			scope().push(field);
+			return;
+		}
+
+		// Otherwise create a new StructuredEntity
+		// TODO: Consider Anchors and AnnotationEntities
+		Rooted<StructuredClass> strct = scope().resolve<StructuredClass>(
+		    Utils::split(name(), ':'), logger());
+		if (strct == nullptr) {
+			// if we could not resolve the name, throw an exception.
+			throw LoggableException(
+			    std::string("\"") + name() + "\" could not be resolved.",
+			    location());
+		}
+
+		std::string name;
+		auto it = args.find("name");
+		if (it != args.end()) {
+			name = it->second.asString();
+			args.erase(it);
+		}
+
+		Rooted<StructuredEntity> entity;
+		if (parentNode->isa(&RttiTypes::Document)) {
+			entity = parentNode.cast<Document>()->createRootStructuredEntity(
+			    strct, args, name);
+		} else {
+			// calculate a path if transparent entities are needed in between.
+			auto path = parent->getDescriptor()->pathTo(strct);
+			if (path.empty()) {
+				throw LoggableException(
+				    std::string("An instance of \"") + strct->getName() +
+				        "\" is not allowed as child of an instance of \"" +
+				        parent->getDescriptor()->getName() + "\"",
+				    location());
+			}
+
+			// create all transparent entities until the last field.
+			for (size_t p = 1; p < path.size() - 1; p = p + 2) {
+				parent = static_cast<DocumentEntity *>(
+				    parent->createChildStructuredEntity(
+				                path[p].cast<StructuredClass>(),
+				                Variant::mapType{}, path[p - 1]->getName(),
+				                "").get());
+			}
+			entity = parent->createChildStructuredEntity(strct, args, fieldName,
+			                                             name);
+		}
+		entity->setLocation(location());
+		scope().push(entity);
+	}
+
+	void end() override { scope().pop(); }
+
+	void data(const std::string &data, int fieldIdx) override
+	{
+		Rooted<Node> parentNode = scope().selectOrThrow(
+		    {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity,
+		     &RttiTypes::DocumentField});
+
+		std::string fieldName;
+		DocumentEntity *parent;
+		bool inField;
+
+		preamble(parentNode, fieldName, parent, inField);
+
+		// retrieve the correct FieldDescriptor.
+		// TODO: Consider fields of transparent classes
+		Rooted<Descriptor> desc = parent->getDescriptor();
+		Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName);
+		if (field == nullptr) {
+			logger().error(
+			    std::string("Can't handle data because no field with name \"") +
+			        fieldName + "\" exists in descriptor\"" + desc->getName() +
+			        "\".",
+			    location());
+			return;
+		}
+		if (!field->isPrimitive()) {
+			logger().error(std::string("Can't handle data because field \"") +
+			                   fieldName + "\" of descriptor \"" +
+			                   desc->getName() + "\" is not primitive!",
+			               location());
+			return;
+		}
+
+		// try to parse the content.
+		auto res = VariantReader::parseGenericString(
+		    data, logger(), location().getSourceId(), location().getStart());
+		if (!res.first) {
+			return;
+		}
+		// try to convert it to the correct type.
+		if (!field->getPrimitiveType()->build(res.second, logger())) {
+			return;
+		}
+		// add it as primitive content.
+		parent->createChildDocumentPrimitive(res.second, fieldName);
+	}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DocumentChildHandler{handlerData};
+	}
+};
+
+class TypesystemHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		// Create the typesystem instance
+		Rooted<Typesystem> typesystem =
+		    project()->createTypesystem(args["name"].asString());
+		typesystem->setLocation(location());
+
+		// Push the typesystem onto the scope, set the POST_HEAD flag to true
+		scope().push(typesystem);
+		scope().setFlag(ParserFlag::POST_HEAD, false);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemHandler{handlerData};
+	}
+};
+
+class TypesystemEnumHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+
+		// Fetch the current typesystem and create the enum node
+		Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
+		Rooted<EnumType> enumType =
+		    typesystem->createEnumType(args["name"].asString());
+		enumType->setLocation(location());
+
+		scope().push(enumType);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemEnumHandler{handlerData};
+	}
+};
+
+class TypesystemEnumEntryHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	std::string entry;
+
+	void start(Variant::mapType &args) override {}
+
+	void end() override
+	{
+		Rooted<EnumType> enumType = scope().selectOrThrow<EnumType>();
+		enumType->addEntry(entry, logger());
+	}
+
+	void data(const std::string &data, int field) override
+	{
+		if (field != 0) {
+			// TODO: This should be stored in the HandlerData
+			logger().error("Enum entry only has one field.");
+			return;
+		}
+		entry.append(data);
+	}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemEnumEntryHandler{handlerData};
+	}
+};
+
+class TypesystemStructHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+
+		// Fetch the arguments used for creating this type
+		const std::string &name = args["name"].asString();
+		const std::string &parent = args["parent"].asString();
+
+		// Fetch the current typesystem and create the struct node
+		Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
+		Rooted<StructType> structType = typesystem->createStructType(name);
+		structType->setLocation(location());
+
+		// Try to resolve the parent type and set it as parent structure
+		if (!parent.empty()) {
+			scope().resolve<StructType>(
+			    parent, structType, logger(),
+			    [](Handle<Node> parent, Handle<Node> structType,
+			       Logger &logger) {
+				    if (parent != nullptr) {
+					    structType.cast<StructType>()->setParentStructure(
+					        parent.cast<StructType>(), logger);
+				    }
+				});
+		}
+		scope().push(structType);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemStructHandler{handlerData};
+	}
+};
+
+class TypesystemStructFieldHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		// Read the argument values
+		const std::string &name = args["name"].asString();
+		const std::string &type = args["type"].asString();
+		const Variant &defaultValue = args["default"];
+		const bool optional =
+		    !(defaultValue.isObject() && defaultValue.asObject() == nullptr);
+
+		Rooted<StructType> structType = scope().selectOrThrow<StructType>();
+		Rooted<Attribute> attribute =
+		    structType->createAttribute(name, defaultValue, optional, logger());
+		attribute->setLocation(location());
+
+		// Try to resolve the type and default value
+		if (optional) {
+			scope().resolveTypeWithValue(
+			    type, attribute, attribute->getDefaultValue(), logger(),
+			    [](Handle<Node> type, Handle<Node> attribute, Logger &logger) {
+				    if (type != nullptr) {
+					    attribute.cast<Attribute>()->setType(type.cast<Type>(),
+					                                         logger);
+				    }
+				});
+		} else {
+			scope().resolveType(
+			    type, attribute, logger(),
+			    [](Handle<Node> type, Handle<Node> attribute, Logger &logger) {
+				    if (type != nullptr) {
+					    attribute.cast<Attribute>()->setType(type.cast<Type>(),
+					                                         logger);
+				    }
+				});
+		}
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemStructFieldHandler{handlerData};
+	}
+};
+
+class TypesystemConstantHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+
+		// Read the argument values
+		const std::string &name = args["name"].asString();
+		const std::string &type = args["type"].asString();
+		const Variant &value = args["value"];
+
+		Rooted<Typesystem> typesystem = scope().selectOrThrow<Typesystem>();
+		Rooted<Constant> constant = typesystem->createConstant(name, value);
+		constant->setLocation(location());
+
+		// Try to resolve the type
+		scope().resolveTypeWithValue(
+		    type, constant, constant->getValue(), logger(),
+		    [](Handle<Node> type, Handle<Node> constant, Logger &logger) {
+			    if (type != nullptr) {
+				    constant.cast<Constant>()->setType(type.cast<Type>(),
+				                                       logger);
+			    }
+			});
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new TypesystemConstantHandler{handlerData};
+	}
+};
+
+/*
+ * Domain Handlers
+ */
+
+class DomainHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<Domain> domain =
+		    project()->createDomain(args["name"].asString());
+		domain->setLocation(location());
+
+		scope().push(domain);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainHandler{handlerData};
+	}
+};
+
+class DomainStructHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+
+		Rooted<Domain> domain = scope().selectOrThrow<Domain>();
+
+		Rooted<StructuredClass> structuredClass = domain->createStructuredClass(
+		    args["name"].asString(), args["cardinality"].asCardinality(),
+		    nullptr, args["transparent"].asBool(), args["isRoot"].asBool());
+		structuredClass->setLocation(location());
+
+		const std::string &isa = args["isa"].asString();
+		if (!isa.empty()) {
+			scope().resolve<StructuredClass>(
+			    isa, structuredClass, logger(),
+			    [](Handle<Node> superclass, Handle<Node> structuredClass,
+			       Logger &logger) {
+				    if (superclass != nullptr) {
+					    structuredClass.cast<StructuredClass>()->setSuperclass(
+					        superclass.cast<StructuredClass>(), logger);
+				    }
+				});
+		}
+
+		scope().push(structuredClass);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainStructHandler{handlerData};
+	}
+};
+
+class DomainAnnotationHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		scope().setFlag(ParserFlag::POST_HEAD, true);
+
+		Rooted<Domain> domain = scope().selectOrThrow<Domain>();
+
+		Rooted<AnnotationClass> annotationClass =
+		    domain->createAnnotationClass(args["name"].asString());
+		annotationClass->setLocation(location());
+
+		scope().push(annotationClass);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainAnnotationHandler{handlerData};
+	}
+};
+
+class DomainAttributesHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		// Fetch the current typesystem and create the struct node
+		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
+
+		Rooted<StructType> attrDesc = parent->getAttributesDescriptor();
+		attrDesc->setLocation(location());
+
+		scope().push(attrDesc);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainAttributesHandler{handlerData};
+	}
+};
+
+class DomainFieldHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		FieldDescriptor::FieldType type;
+		if (args["isSubtree"].asBool()) {
+			type = FieldDescriptor::FieldType::SUBTREE;
+		} else {
+			type = FieldDescriptor::FieldType::TREE;
+		}
+
+		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
+
+		Rooted<FieldDescriptor> field = parent->createFieldDescriptor(
+		    type, args["name"].asString(), args["optional"].asBool());
+		field->setLocation(location());
+
+		scope().push(field);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainFieldHandler{handlerData};
+	}
+};
+
+class DomainFieldRefHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
+
+		const std::string &name = args["name"].asString();
+		scope().resolve<FieldDescriptor>(
+		    name, parent, logger(),
+		    [](Handle<Node> field, Handle<Node> parent, Logger &logger) {
+			    if (field != nullptr) {
+				    parent.cast<StructuredClass>()->addFieldDescriptor(
+				        field.cast<FieldDescriptor>());
+			    }
+			});
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainFieldRefHandler{handlerData};
+	}
+};
+
+class DomainPrimitiveHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<Descriptor> parent = scope().selectOrThrow<Descriptor>();
+
+		Rooted<FieldDescriptor> field = parent->createPrimitiveFieldDescriptor(
+		    nullptr, args["name"].asString(), args["optional"].asBool());
+		field->setLocation(location());
+
+		const std::string &type = args["type"].asString();
+		scope().resolve<Type>(
+		    type, field, logger(),
+		    [](Handle<Node> type, Handle<Node> field, Logger &logger) {
+			    if (type != nullptr) {
+				    field.cast<FieldDescriptor>()->setPrimitiveType(
+				        type.cast<Type>());
+			    }
+			});
+
+		scope().push(field);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainPrimitiveHandler{handlerData};
+	}
+};
+
+class DomainChildHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<FieldDescriptor> field =
+		    scope().selectOrThrow<FieldDescriptor>();
+
+		const std::string &ref = args["ref"].asString();
+		scope().resolve<StructuredClass>(
+		    ref, field, logger(),
+		    [](Handle<Node> child, Handle<Node> field, Logger &logger) {
+			    if (child != nullptr) {
+				    field.cast<FieldDescriptor>()->addChild(
+				        child.cast<StructuredClass>());
+			    }
+			});
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainChildHandler{handlerData};
+	}
+};
+
+class DomainParent : public Node {
+public:
+	DomainParent(Manager &mgr, std::string name, Handle<Node> parent)
+	    : Node(mgr, name, parent)
+	{
+	}
+};
+
+namespace RttiTypes {
+const Rtti DomainParent =
+    RttiBuilder<ousia::DomainParent>("DomainParent").parent(&Node);
+}
+
+class DomainParentHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<StructuredClass> strct =
+		    scope().selectOrThrow<StructuredClass>();
+
+		Rooted<DomainParent> parent{new DomainParent(
+		    strct->getManager(), args["name"].asString(), strct)};
+		parent->setLocation(location());
+		scope().push(parent);
+	}
+
+	void end() override { scope().pop(); }
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainParentHandler{handlerData};
+	}
+};
+
+class DomainParentFieldHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<DomainParent> parentNameNode =
+		    scope().selectOrThrow<DomainParent>();
+		FieldDescriptor::FieldType type;
+		if (args["isSubtree"].asBool()) {
+			type = FieldDescriptor::FieldType::SUBTREE;
+		} else {
+			type = FieldDescriptor::FieldType::TREE;
+		}
+
+		const std::string &name = args["name"].asString();
+		const bool optional = args["optional"].asBool();
+		Rooted<StructuredClass> strct =
+		    parentNameNode->getParent().cast<StructuredClass>();
+
+		// resolve the parent, create the declared field and add the declared
+		// StructuredClass as child to it.
+		scope().resolve<Descriptor>(
+		    parentNameNode->getName(), strct, logger(),
+		    [type, name, optional](Handle<Node> parent, Handle<Node> strct,
+		                           Logger &logger) {
+			    if (parent != nullptr) {
+				    Rooted<FieldDescriptor> field =
+				        parent.cast<Descriptor>()->createFieldDescriptor(
+				            type, name, optional);
+				    field->addChild(strct.cast<StructuredClass>());
+			    }
+			});
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainParentFieldHandler{handlerData};
+	}
+};
+
+class DomainParentFieldRefHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	void start(Variant::mapType &args) override
+	{
+		Rooted<DomainParent> parentNameNode =
+		    scope().selectOrThrow<DomainParent>();
+
+		const std::string &name = args["name"].asString();
+		Rooted<StructuredClass> strct =
+		    parentNameNode->getParent().cast<StructuredClass>();
+		auto loc = location();
+
+		// resolve the parent, get the referenced field and add the declared
+		// StructuredClass as child to it.
+		scope().resolve<Descriptor>(parentNameNode->getName(), strct, logger(),
+		                            [name, loc](Handle<Node> parent,
+		                                        Handle<Node> strct,
+		                                        Logger &logger) {
+			if (parent != nullptr) {
+				auto res = parent.cast<Descriptor>()->resolve(
+				    &RttiTypes::FieldDescriptor, name);
+				if (res.size() != 1) {
+					logger.error(
+					    std::string("Could not find referenced field ") + name,
+					    loc);
+					return;
+				}
+				Rooted<FieldDescriptor> field =
+				    res[0].node.cast<FieldDescriptor>();
+				field->addChild(strct.cast<StructuredClass>());
+			}
+		});
+	}
+
+	void end() override {}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new DomainParentFieldRefHandler{handlerData};
+	}
+};
+
+/*
+ * Import and Include Handler
+ */
+
+class ImportIncludeHandler : public Handler {
+public:
+	using Handler::Handler;
+
+	bool srcInArgs = false;
+	std::string rel;
+	std::string type;
+	std::string src;
+
+	void start(Variant::mapType &args) override
+	{
+		rel = args["rel"].asString();
+		type = args["type"].asString();
+		src = args["src"].asString();
+		srcInArgs = !src.empty();
+	}
+
+	void data(const std::string &data, int field) override
+	{
+		if (srcInArgs) {
+			logger().error("\"src\" attribute has already been set");
+			return;
+		}
+		if (field != 0) {
+			logger().error("Command has only one field.");
+			return;
+		}
+		src.append(data);
+	}
+};
+
+class ImportHandler : public ImportIncludeHandler {
+public:
+	using ImportIncludeHandler::ImportIncludeHandler;
+
+	void start(Variant::mapType &args) override
+	{
+		ImportIncludeHandler::start(args);
+
+		// Make sure imports are still possible
+		if (scope().getFlag(ParserFlag::POST_HEAD)) {
+			logger().error("Imports must be listed before other commands.",
+			               location());
+			return;
+		}
+	}
+
+	void end() override
+	{
+		// Fetch the last node and check whether an import is valid at this
+		// position
+		Rooted<Node> leaf = scope().getLeaf();
+		if (leaf == nullptr || !leaf->isa(&RttiTypes::RootNode)) {
+			logger().error(
+			    "Import not supported here, must be inside a document, domain "
+			    "or typesystem command.",
+			    location());
+			return;
+		}
+		Rooted<RootNode> leafRootNode = leaf.cast<RootNode>();
+
+		// Perform the actual import, register the imported node within the leaf
+		// node
+		Rooted<Node> imported =
+		    context().import(src, type, rel, leafRootNode->getReferenceTypes());
+		if (imported != nullptr) {
+			leafRootNode->reference(imported);
+		}
+	}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new ImportHandler{handlerData};
+	}
+};
+
+class IncludeHandler : public ImportIncludeHandler {
+public:
+	using ImportIncludeHandler::ImportIncludeHandler;
+
+	void start(Variant::mapType &args) override
+	{
+		ImportIncludeHandler::start(args);
+	}
+
+	void end() override
+	{
+		context().include(src, type, rel, {&RttiTypes::Node});
+	}
+
+	static Handler *create(const HandlerData &handlerData)
+	{
+		return new IncludeHandler{handlerData};
+	}
+};
+
+namespace ParserStates {
+/* Document states */
+static const ParserState Document =
+    ParserStateBuilder()
+        .parent(&None)
+        .createdNodeType(&RttiTypes::Document)
+        .elementHandler(DocumentHandler::create)
+        .arguments({Argument::String("name", "")});
+
+static const ParserState DocumentChild =
+    ParserStateBuilder()
+        .parents({&Document, &DocumentChild})
+        .createdNodeTypes({&RttiTypes::StructureNode,
+                           &RttiTypes::AnnotationEntity,
+                           &RttiTypes::DocumentField})
+        .elementHandler(DocumentChildHandler::create);
+
+/* Domain states */
+static const ParserState Domain = ParserStateBuilder()
+                                      .parents({&None, &Document})
+                                      .createdNodeType(&RttiTypes::Domain)
+                                      .elementHandler(DomainHandler::create)
+                                      .arguments({Argument::String("name")});
+
+static const ParserState DomainStruct =
+    ParserStateBuilder()
+        .parent(&Domain)
+        .createdNodeType(&RttiTypes::StructuredClass)
+        .elementHandler(DomainStructHandler::create)
+        .arguments({Argument::String("name"),
+                    Argument::Cardinality("cardinality", Cardinality::any()),
+                    Argument::Bool("isRoot", false),
+                    Argument::Bool("transparent", false),
+                    Argument::String("isa", "")});
+
+static const ParserState DomainAnnotation =
+    ParserStateBuilder()
+        .parent(&Domain)
+        .createdNodeType(&RttiTypes::AnnotationClass)
+        .elementHandler(DomainAnnotationHandler::create)
+        .arguments({Argument::String("name")});
+
+static const ParserState DomainAttributes =
+    ParserStateBuilder()
+        .parents({&DomainStruct, &DomainAnnotation})
+        .createdNodeType(&RttiTypes::StructType)
+        .elementHandler(DomainAttributesHandler::create)
+        .arguments({});
+
+static const ParserState DomainAttribute =
+    ParserStateBuilder()
+        .parent(&DomainAttributes)
+        .elementHandler(TypesystemStructFieldHandler::create)
+        .arguments({Argument::String("name"), Argument::String("type"),
+                    Argument::Any("default", Variant::fromObject(nullptr))});
+
+static const ParserState DomainField =
+    ParserStateBuilder()
+        .parents({&DomainStruct, &DomainAnnotation})
+        .createdNodeType(&RttiTypes::FieldDescriptor)
+        .elementHandler(DomainFieldHandler::create)
+        .arguments({Argument::String("name", DEFAULT_FIELD_NAME),
+                    Argument::Bool("isSubtree", false),
+                    Argument::Bool("optional", false)});
+
+static const ParserState DomainFieldRef =
+    ParserStateBuilder()
+        .parents({&DomainStruct, &DomainAnnotation})
+        .createdNodeType(&RttiTypes::FieldDescriptor)
+        .elementHandler(DomainFieldRefHandler::create)
+        .arguments({Argument::String("name", DEFAULT_FIELD_NAME)});
+
+static const ParserState DomainStructPrimitive =
+    ParserStateBuilder()
+        .parents({&DomainStruct, &DomainAnnotation})
+        .createdNodeType(&RttiTypes::FieldDescriptor)
+        .elementHandler(DomainPrimitiveHandler::create)
+        .arguments({Argument::String("name", DEFAULT_FIELD_NAME),
+                    Argument::Bool("optional", false),
+                    Argument::String("type")});
+
+static const ParserState DomainStructChild =
+    ParserStateBuilder()
+        .parent(&DomainField)
+        .elementHandler(DomainChildHandler::create)
+        .arguments({Argument::String("ref")});
+
+static const ParserState DomainStructParent =
+    ParserStateBuilder()
+        .parent(&DomainStruct)
+        .createdNodeType(&RttiTypes::DomainParent)
+        .elementHandler(DomainParentHandler::create)
+        .arguments({Argument::String("name")});
+
+static const ParserState DomainStructParentField =
+    ParserStateBuilder()
+        .parent(&DomainStructParent)
+        .createdNodeType(&RttiTypes::FieldDescriptor)
+        .elementHandler(DomainParentFieldHandler::create)
+        .arguments({Argument::String("name", DEFAULT_FIELD_NAME),
+                    Argument::Bool("isSubtree", false),
+                    Argument::Bool("optional", false)});
+
+static const ParserState DomainStructParentFieldRef =
+    ParserStateBuilder()
+        .parent(&DomainStructParent)
+        .createdNodeType(&RttiTypes::FieldDescriptor)
+        .elementHandler(DomainParentFieldRefHandler::create)
+        .arguments({Argument::String("name", DEFAULT_FIELD_NAME)});
+
+/* Typesystem states */
+static const ParserState Typesystem =
+    ParserStateBuilder()
+        .parents({&None, &Domain})
+        .createdNodeType(&RttiTypes::Typesystem)
+        .elementHandler(TypesystemHandler::create)
+        .arguments({Argument::String("name", "")});
+
+static const ParserState TypesystemEnum =
+    ParserStateBuilder()
+        .parent(&Typesystem)
+        .createdNodeType(&RttiTypes::EnumType)
+        .elementHandler(TypesystemEnumHandler::create)
+        .arguments({Argument::String("name")});
+
+static const ParserState TypesystemEnumEntry =
+    ParserStateBuilder()
+        .parent(&TypesystemEnum)
+        .elementHandler(TypesystemEnumEntryHandler::create)
+        .arguments({});
+
+static const ParserState TypesystemStruct =
+    ParserStateBuilder()
+        .parent(&Typesystem)
+        .createdNodeType(&RttiTypes::StructType)
+        .elementHandler(TypesystemStructHandler::create)
+        .arguments({Argument::String("name"), Argument::String("parent", "")});
+
+static const ParserState TypesystemStructField =
+    ParserStateBuilder()
+        .parent(&TypesystemStruct)
+        .elementHandler(TypesystemStructFieldHandler::create)
+        .arguments({Argument::String("name"), Argument::String("type"),
+                    Argument::Any("default", Variant::fromObject(nullptr))});
+
+static const ParserState TypesystemConstant =
+    ParserStateBuilder()
+        .parent(&Typesystem)
+        .createdNodeType(&RttiTypes::Constant)
+        .elementHandler(TypesystemConstantHandler::create)
+        .arguments({Argument::String("name"), Argument::String("type"),
+                    Argument::Any("value")});
+
+/* Special states for import and include */
+static const ParserState Import =
+    ParserStateBuilder()
+        .parents({&Document, &Typesystem, &Domain})
+        .elementHandler(ImportHandler::create)
+        .arguments({Argument::String("rel", ""), Argument::String("type", ""),
+                    Argument::String("src", "")});
+
+static const ParserState Include =
+    ParserStateBuilder()
+        .parent(&All)
+        .elementHandler(IncludeHandler::create)
+        .arguments({Argument::String("rel", ""), Argument::String("type", ""),
+                    Argument::String("src", "")});
+
+static const std::multimap<std::string, const ParserState *> XmlStates{
+    {"document", &Document},
+    {"*", &DocumentChild},
+    {"domain", &Domain},
+    {"struct", &DomainStruct},
+    {"annotation", &DomainAnnotation},
+    {"attributes", &DomainAttributes},
+    {"attribute", &DomainAttribute},
+    {"field", &DomainField},
+    {"fieldRef", &DomainFieldRef},
+    {"primitive", &DomainStructPrimitive},
+    {"child", &DomainStructChild},
+    {"parent", &DomainStructParent},
+    {"field", &DomainStructParentField},
+    {"fieldRef", &DomainStructParentFieldRef},
+    {"typesystem", &Typesystem},
+    {"enum", &TypesystemEnum},
+    {"entry", &TypesystemEnumEntry},
+    {"struct", &TypesystemStruct},
+    {"field", &TypesystemStructField},
+    {"constant", &TypesystemConstant},
+    {"import", &Import},
+    {"include", &Include}};
+}
+
+/**
+ * Structue containing the private data that is being passed to the
+ * XML-Handlers.
+ */
+struct XMLUserData {
+	/**
+	 * Containing the depth of the current XML file
+	 */
+	size_t depth;
+
+	/**
+	 * Reference at the ParserStack instance.
+	 */
+	ParserStack *stack;
+
+	/**
+	 * Reference at the CharReader instance.
+	 */
+	CharReader *reader;
+
+	/**
+	 * Constructor of the XMLUserData struct.
+	 *
+	 * @param stack is a pointer at the ParserStack instance.
+	 * @param reader is a pointer at the CharReader instance.
+	 */
+	XMLUserData(ParserStack *stack, CharReader *reader)
+	    : depth(0), stack(stack), reader(reader)
+	{
+	}
+};
+
+/**
+ * Wrapper class around the XML_Parser pointer which safely frees it whenever
+ * the scope is left (e.g. because an exception was thrown).
+ */
+class ScopedExpatXmlParser {
+private:
+	/**
+	 * Internal pointer to the XML_Parser instance.
+	 */
+	XML_Parser parser;
+
+public:
+	/**
+	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS
+	 * from the expat library. Throws a parser exception if the XML parser
+	 * cannot be initialized.
+	 *
+	 * @param encoding is the protocol-defined encoding passed to expat (or
+	 * nullptr if expat should determine the encoding by itself).
+	 */
+	ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
+	{
+		parser = XML_ParserCreate(encoding);
+		if (!parser) {
+			throw LoggableException{
+			    "Internal error: Could not create expat XML parser!"};
+		}
+	}
+
+	/**
+	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance.
+	 */
+	~ScopedExpatXmlParser()
+	{
+		if (parser) {
+			XML_ParserFree(parser);
+			parser = nullptr;
+		}
+	}
+
+	/**
+	 * Returns the XML_Parser pointer.
+	 */
+	XML_Parser operator&() { return parser; }
+};
+
+/* Adapter Expat -> ParserStack */
+
+static SourceLocation syncLoggerPosition(XML_Parser p, size_t len = 0)
+{
+	// Fetch the parser stack and the associated user data
+	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
+	ParserStack *stack = userData->stack;
+
+	// Fetch the current location in the XML file
+	size_t offs = XML_GetCurrentByteIndex(p);
+
+	// Build the source location and update the default location of the
+	// current
+	// logger instance
+	SourceLocation loc{stack->getContext().getSourceId(), offs, offs + len};
+	stack->getContext().getLogger().setDefaultLocation(loc);
+	return loc;
+}
+
+enum class XMLAttributeState {
+	IN_TAG_NAME,
+	SEARCH_ATTR,
+	IN_ATTR_NAME,
+	HAS_ATTR_NAME,
+	HAS_ATTR_EQUALS,
+	IN_ATTR_DATA
+};
+
+static std::map<std::string, SourceLocation> reconstructXMLAttributeOffsets(
+    CharReader &reader, SourceLocation location)
+{
+	std::map<std::string, SourceLocation> res;
+
+	// Fork the reader, we don't want to mess up the XML parsing process, do we?
+	CharReaderFork readerFork = reader.fork();
+
+	// Move the read cursor to the start location, abort if this does not work
+	size_t offs = location.getStart();
+	if (!location.isValid() || offs != readerFork.seek(offs)) {
+		return res;
+	}
+
+	// Now all we need to do is to implement one half of an XML parser. As this
+	// is inherently complicated we'll totaly fail at it. Don't care. All we
+	// want to get is those darn offsets for pretty error messages... (and we
+	// can assume the XML is valid as it was already read by expat)
+	XMLAttributeState state = XMLAttributeState::IN_TAG_NAME;
+	char c;
+	std::stringstream attrName;
+	while (readerFork.read(c)) {
+		// Abort at the end of the tag
+		if (c == '>' && state != XMLAttributeState::IN_ATTR_DATA) {
+			return res;
+		}
+
+		// One state machine to rule them all, one state machine to find them,
+		// One state machine to bring them all and in the darkness bind them
+		// (the byte offsets)
+		switch (state) {
+			case XMLAttributeState::IN_TAG_NAME:
+				if (Utils::isWhitespace(c)) {
+					state = XMLAttributeState::SEARCH_ATTR;
+				}
+				break;
+			case XMLAttributeState::SEARCH_ATTR:
+				if (!Utils::isWhitespace(c)) {
+					state = XMLAttributeState::IN_ATTR_NAME;
+					attrName << c;
+				}
+				break;
+			case XMLAttributeState::IN_ATTR_NAME:
+				if (Utils::isWhitespace(c)) {
+					state = XMLAttributeState::HAS_ATTR_NAME;
+				} else if (c == '=') {
+					state = XMLAttributeState::HAS_ATTR_EQUALS;
+				} else {
+					attrName << c;
+				}
+				break;
+			case XMLAttributeState::HAS_ATTR_NAME:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '=') {
+						state = XMLAttributeState::HAS_ATTR_EQUALS;
+						break;
+					}
+					// Well, this is a strange XML file... We expected to
+					// see a '=' here! Try to continue with the
+					// "HAS_ATTR_EQUALS" state as this state will hopefully
+					// inlcude some error recovery
+				} else {
+					// Skip whitespace here
+					break;
+				}
+			// Fallthrough
+			case XMLAttributeState::HAS_ATTR_EQUALS:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '"') {
+						// Here we are! We have found the beginning of an
+						// attribute. Let's quickly lock the current offset away
+						// in the result map
+						res.emplace(attrName.str(),
+						            SourceLocation{reader.getSourceId(),
+						                           readerFork.getOffset()});
+						attrName.str(std::string{});
+						state = XMLAttributeState::IN_ATTR_DATA;
+					} else {
+						// No, this XML file is not well formed. Assume we're in
+						// an attribute name once again
+						attrName.str(std::string{&c, 1});
+						state = XMLAttributeState::IN_ATTR_NAME;
+					}
+				}
+				break;
+			case XMLAttributeState::IN_ATTR_DATA:
+				if (c == '"') {
+					// We're at the end of the attribute data, start anew
+					state = XMLAttributeState::SEARCH_ATTR;
+				}
+				break;
+		}
+	}
+	return res;
+}
+
+static void xmlStartElementHandler(void *p, const XML_Char *name,
+                                   const XML_Char **attrs)
+{
+	XML_Parser parser = static_cast<XML_Parser>(p);
+	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
+	ParserStack *stack = userData->stack;
+
+	SourceLocation loc = syncLoggerPosition(parser);
+
+	// Read the argument locations -- this is only a stupid and slow hack,
+	// but it is necessary, as expat doesn't give use the byte offset of the
+	// arguments.
+	std::map<std::string, SourceLocation> offs =
+	    reconstructXMLAttributeOffsets(*userData->reader, loc);
+
+	// Assemble the arguments
+	Variant::mapType args;
+
+	const XML_Char **attr = attrs;
+	while (*attr) {
+		// Convert the C string to a std::string
+		const std::string key{*(attr++)};
+
+		// Search the location of the key
+		SourceLocation keyLoc;
+		auto it = offs.find(key);
+		if (it != offs.end()) {
+			keyLoc = it->second;
+		}
+
+		// Parse the string, pass the location of the key
+		std::pair<bool, Variant> value = VariantReader::parseGenericString(
+		    *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(),
+		    keyLoc.getStart());
+		args.emplace(key, value.second);
+	}
+
+	// Call the start function
+	std::string nameStr(name);
+	if (nameStr != "ousia" || userData->depth > 0) {
+		stack->start(std::string(name), args, loc);
+	}
+
+	// Increment the current depth
+	userData->depth++;
+}
+
+static void xmlEndElementHandler(void *p, const XML_Char *name)
+{
+	XML_Parser parser = static_cast<XML_Parser>(p);
+	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
+	ParserStack *stack = userData->stack;
+
+	syncLoggerPosition(parser);
+
+	// Decrement the current depth
+	userData->depth--;
+
+	// Call the end function
+	std::string nameStr(name);
+	if (nameStr != "ousia" || userData->depth > 0) {
+		stack->end();
+	}
+}
+
+static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len)
+{
+	XML_Parser parser = static_cast<XML_Parser>(p);
+	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
+	ParserStack *stack = userData->stack;
+
+	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
+	syncLoggerPosition(parser, ulen);
+	const std::string data = Utils::trim(std::string{s, ulen});
+	if (!data.empty()) {
+		stack->data(data);
+	}
+}
+
+/* Class XmlParser */
+
+void XmlParser::doParse(CharReader &reader, ParserContext &ctx)
+{
+	// Create the parser object
+	ScopedExpatXmlParser p{"UTF-8"};
+
+	// Create the parser stack instance, if we're starting on a non-empty scope,
+	// try to deduce the parser state
+	ParserStack stack(ctx, ParserStates::XmlStates);
+	if (!ctx.getScope().isEmpty()) {
+		if (!stack.deduceState()) {
+			return;
+		}
+	}
+
+	// Pass the reference to the ParserStack to the XML handler
+	XMLUserData data(&stack, &reader);
+	XML_SetUserData(&p, &data);
+	XML_UseParserAsHandlerArg(&p);
+
+	// Set the callback functions
+	XML_SetStartElementHandler(&p, xmlStartElementHandler);
+	XML_SetEndElementHandler(&p, xmlEndElementHandler);
+	XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler);
+
+	// Feed data into expat while there is data to process
+	constexpr size_t BUFFER_SIZE = 64 * 1024;
+	while (true) {
+		// Fetch a buffer from expat for the input data
+		char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE));
+		if (!buf) {
+			throw LoggableException{
+			    "Internal error: XML parser out of memory!"};
+		}
+
+		// Read into the buffer
+		size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE);
+
+		// Parse the data and handle any XML error
+		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) {
+			// Fetch the xml parser byte offset
+			size_t offs = XML_GetCurrentByteIndex(&p);
+
+			// Throw a corresponding exception
+			XML_Error code = XML_GetErrorCode(&p);
+			std::string msg = std::string{XML_ErrorString(code)};
+			throw LoggableException{"XML: " + msg,
+			                        SourceLocation{ctx.getSourceId(), offs}};
+		}
+
+		// Abort once there are no more bytes in the stream
+		if (bytesRead == 0) {
+			break;
+		}
+	}
+}
+}
+
diff --git a/src/formats/osxml/OsxmlParser.hpp b/src/formats/osxml/OsxmlParser.hpp
new file mode 100644
index 0000000..c8b6302
--- /dev/null
+++ b/src/formats/osxml/OsxmlParser.hpp
@@ -0,0 +1,55 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file XmlParser.hpp
+ *
+ * Contains the parser responsible for reading Ousía XML Documents (extension
+ * oxd) and Ousía XML Modules (extension oxm).
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_XML_PARSER_HPP_
+#define _OUSIA_XML_PARSER_HPP_
+
+#include <core/parser/Parser.hpp>
+
+namespace ousia {
+
+/**
+ * The XmlParser class implements parsing the various types of Ousía XML
+ * documents using the expat stream XML parser.
+ */
+class XmlParser : public Parser {
+protected:
+	/**
+	 * Parses the given input stream as XML file and returns the parsed
+	 * top-level node.
+	 *
+	 * @param reader is the CharReader from which the input should be read.
+	 * @param ctx is a reference to the ParserContext instance that should be
+	 * used.
+	 */
+	void doParse(CharReader &reader, ParserContext &ctx) override;
+};
+
+}
+
+#endif /* _OUSIA_XML_PARSER_HPP_ */
+
-- 
cgit v1.2.3


From 98f43328e566b3a77b75808892246a295adb0eb0 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sat, 14 Feb 2015 23:59:43 +0100
Subject: Renamed osdm to osml and osdmx to osxml

---
 src/formats/osml/OsmlStreamParser.cpp      |  32 +-
 src/formats/osml/OsmlStreamParser.hpp      |  22 +-
 src/formats/osxml/OsxmlEventParser.cpp     | 524 ++++++++++++++++
 src/formats/osxml/OsxmlEventParser.hpp     | 205 ++++++
 src/formats/osxml/OsxmlParser.cpp          | 337 ----------
 test/formats/osdm/OsdmStreamParserTest.cpp | 973 -----------------------------
 test/formats/osdmx/OsdmxParserTest.cpp     | 314 ----------
 7 files changed, 756 insertions(+), 1651 deletions(-)
 create mode 100644 src/formats/osxml/OsxmlEventParser.cpp
 create mode 100644 src/formats/osxml/OsxmlEventParser.hpp
 delete mode 100644 test/formats/osdm/OsdmStreamParserTest.cpp
 delete mode 100644 test/formats/osdmx/OsdmxParserTest.cpp

(limited to 'src/formats')

diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 6a55f12..6b00eef 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -21,7 +21,7 @@
 #include <core/common/Utils.hpp>
 #include <core/common/VariantReader.hpp>
 
-#include "OsdmStreamParser.hpp"
+#include "OsmlStreamParser.hpp"
 
 namespace ousia {
 
@@ -160,14 +160,14 @@ public:
 	}
 };
 
-OsdmStreamParser::OsdmStreamParser(CharReader &reader, Logger &logger)
+OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
     : reader(reader), logger(logger), tokenizer(Tokens)
 {
 	// Place an intial command representing the complete file on the stack
 	commands.push(Command{"", Variant::mapType{}, true, true, true});
 }
 
-Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep)
+Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 {
 	bool first = true;
 	bool hasCharSiceNSSep = false;
@@ -210,7 +210,7 @@ Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep)
 	return res;
 }
 
-OsdmStreamParser::State OsdmStreamParser::parseBeginCommand()
+OsmlStreamParser::State OsmlStreamParser::parseBeginCommand()
 {
 	// Expect a '{' after the command
 	reader.consumeWhitespace();
@@ -251,7 +251,7 @@ OsdmStreamParser::State OsdmStreamParser::parseBeginCommand()
 	return State::COMMAND;
 }
 
-static bool checkStillInField(const OsdmStreamParser::Command &cmd,
+static bool checkStillInField(const OsmlStreamParser::Command &cmd,
                               const Variant &endName, Logger &logger)
 {
 	if (cmd.inField && !cmd.inRangeField) {
@@ -264,7 +264,7 @@ static bool checkStillInField(const OsdmStreamParser::Command &cmd,
 	return false;
 }
 
-OsdmStreamParser::State OsdmStreamParser::parseEndCommand()
+OsmlStreamParser::State OsmlStreamParser::parseEndCommand()
 {
 	// Expect a '{' after the command
 	if (!reader.expect('{')) {
@@ -327,7 +327,7 @@ OsdmStreamParser::State OsdmStreamParser::parseEndCommand()
 	return cmd.inRangeField ? State::FIELD_END : State::NONE;
 }
 
-Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName)
+Variant OsmlStreamParser::parseCommandArguments(Variant commandArgName)
 {
 	// Parse the arguments using the universal VariantReader
 	Variant commandArguments;
@@ -353,7 +353,7 @@ Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName)
 	return commandArguments;
 }
 
-void OsdmStreamParser::pushCommand(Variant commandName,
+void OsmlStreamParser::pushCommand(Variant commandName,
                                    Variant commandArguments, bool hasRange)
 {
 	// Store the location on the stack
@@ -368,7 +368,7 @@ void OsdmStreamParser::pushCommand(Variant commandName,
 	                      hasRange, false, false});
 }
 
-OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start)
+OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
 {
 	// Parse the commandName as a first identifier
 	Variant commandName = parseIdentifier(start, true);
@@ -416,7 +416,7 @@ OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start)
 	return State::COMMAND;
 }
 
-void OsdmStreamParser::parseBlockComment()
+void OsmlStreamParser::parseBlockComment()
 {
 	Token token;
 	size_t depth = 1;
@@ -436,7 +436,7 @@ void OsdmStreamParser::parseBlockComment()
 	logger.error("File ended while being in a block comment", reader);
 }
 
-void OsdmStreamParser::parseLineComment()
+void OsmlStreamParser::parseLineComment()
 {
 	char c;
 	while (reader.read(c)) {
@@ -446,7 +446,7 @@ void OsdmStreamParser::parseLineComment()
 	}
 }
 
-bool OsdmStreamParser::checkIssueData(DataHandler &handler)
+bool OsmlStreamParser::checkIssueData(DataHandler &handler)
 {
 	if (!handler.isEmpty()) {
 		data = handler.toVariant(reader.getSourceId());
@@ -457,7 +457,7 @@ bool OsdmStreamParser::checkIssueData(DataHandler &handler)
 	return false;
 }
 
-bool OsdmStreamParser::checkIssueFieldStart()
+bool OsmlStreamParser::checkIssueFieldStart()
 {
 	// Fetch the current command, and check whether we're currently inside a
 	// field of this command
@@ -482,7 +482,7 @@ bool OsdmStreamParser::checkIssueFieldStart()
 	return false;
 }
 
-OsdmStreamParser::State OsdmStreamParser::parse()
+OsmlStreamParser::State OsmlStreamParser::parse()
 {
 	// Handler for incomming data
 	DataHandler handler;
@@ -627,12 +627,12 @@ OsdmStreamParser::State OsdmStreamParser::parse()
 	return State::END;
 }
 
-const Variant &OsdmStreamParser::getCommandName()
+const Variant &OsmlStreamParser::getCommandName()
 {
 	return commands.top().name;
 }
 
-const Variant &OsdmStreamParser::getCommandArguments()
+const Variant &OsmlStreamParser::getCommandArguments()
 {
 	return commands.top().arguments;
 }
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index 84674c0..1508012 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -17,17 +17,17 @@
 */
 
 /**
- * @file OsdmStreamParser.hpp
+ * @file OsmlStreamParser.hpp
  *
- * Provides classes for low-level classes for reading the TeX-esque osdm
+ * Provides classes for low-level classes for reading the TeX-esque osml
  * format. The class provided here does not build any model objects and does not
  * implement the Parser interface.
  *
  * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
  */
 
-#ifndef _OUSIA_OSDM_STREAM_PARSER_HPP_
-#define _OUSIA_OSDM_STREAM_PARSER_HPP_
+#ifndef _OUSIA_OSML_STREAM_PARSER_HPP_
+#define _OUSIA_OSML_STREAM_PARSER_HPP_
 
 #include <stack>
 
@@ -42,7 +42,7 @@ class Logger;
 class DataHandler;
 
 /**
- * The OsdmStreamParser class provides a low-level reader for the TeX-esque osdm
+ * The OsmlStreamParser class provides a low-level reader for the TeX-esque osml
  * format. The parser is constructed around a "parse" function, which reads data
  * from the underlying CharReader until a new state is reached and indicates
  * this state in a return value. The calling code then has to pull corresponding
@@ -52,10 +52,10 @@ class DataHandler;
  * fields, as this would lead to too many consecutive errors) a
  * LoggableException is thrown.
  */
-class OsdmStreamParser {
+class OsmlStreamParser {
 public:
 	/**
-	 * Enum used to indicate which state the OsdmStreamParser class is in
+	 * Enum used to indicate which state the OsmlStreamParser class is in
 	 * after calling the "parse" function.
 	 */
 	enum class State {
@@ -291,14 +291,14 @@ private:
 
 public:
 	/**
-	 * Constructor of the OsdmStreamParser class. Attaches the new
-	 * OsdmStreamParser to the given CharReader and Logger instances.
+	 * Constructor of the OsmlStreamParser class. Attaches the new
+	 * OsmlStreamParser to the given CharReader and Logger instances.
 	 *
 	 * @param reader is the reader instance from which incomming characters
 	 * should be read.
 	 * @param logger is the logger instance to which errors should be written.
 	 */
-	OsdmStreamParser(CharReader &reader, Logger &logger);
+	OsmlStreamParser(CharReader &reader, Logger &logger);
 
 	/**
 	 * Continues parsing. Returns one of the states defined in the State enum.
@@ -346,5 +346,5 @@ public:
 };
 }
 
-#endif /* _OUSIA_OSDM_STREAM_PARSER_HPP_ */
+#endif /* _OUSIA_OSML_STREAM_PARSER_HPP_ */
 
diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
new file mode 100644
index 0000000..2ef170e
--- /dev/null
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -0,0 +1,524 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <expat.h>
+
+#include <core/common/Logger.hpp>
+#include <core/common/Variant.hpp>
+#include <core/common/Utils.hpp>
+
+#include "OsxmlEventParser.hpp"
+
+namespace ousia {
+
+/**
+ * Class containing data used by the internal functions.
+ */
+class OsxmlEventParserData {
+public:
+	/**
+	 * Contains the current depth of the parsing process.
+	 */
+	ssize_t depth;
+
+	/**
+	 * Set to a value larger or equal to zero if the parser is currently inside
+	 * an annotation end tag -- the value represents the depth in which the
+	 * tag was opened.
+	 */
+	ssize_t annotationEndTagDepth;
+
+	/**
+	 * Default constructor.
+	 */
+	OsxmlEventParserData() : depth(0), annotationEndTagDepth(-1) {}
+
+	/**
+	 * Increments the depth.
+	 */
+	void incrDepth() { depth++; }
+
+	/**
+	 * Decrement the depth and reset the annotationEndTagDepth flag.
+	 */
+	void decrDepth()
+	{
+		if (depth > 0) {
+			depth--;
+		}
+		if (depth < annotationEndTagDepth) {
+			annotationEndTagDepth = -1;
+		}
+	}
+
+	/**
+	 * Returns true if we're currently inside an end tag.
+	 */
+	bool inAnnotationEndTag() { depth >= annotationEndTagDepth; }
+};
+
+namespace {
+/**
+ * Wrapper class around the XML_Parser pointer which safely frees it whenever
+ * the scope is left (e.g. because an exception was thrown).
+ */
+class ScopedExpatXmlParser {
+private:
+	/**
+	 * Internal pointer to the XML_Parser instance.
+	 */
+	XML_Parser parser;
+
+public:
+	/**
+	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS
+	 * from the expat library. Throws a parser exception if the XML parser
+	 * cannot be initialized.
+	 *
+	 * @param encoding is the protocol-defined encoding passed to expat (or
+	 * nullptr if expat should determine the encoding by itself).
+	 */
+	ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
+	{
+		parser = XML_ParserCreate(encoding);
+		if (!parser) {
+			throw LoggableException{
+			    "Internal error: Could not create expat XML parser!"};
+		}
+	}
+
+	/**
+	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance.
+	 */
+	~ScopedExpatXmlParser()
+	{
+		if (parser) {
+			XML_ParserFree(parser);
+			parser = nullptr;
+		}
+	}
+
+	/**
+	 * Returns the XML_Parser pointer.
+	 */
+	XML_Parser operator&() { return parser; }
+};
+
+/**
+ * Enum used internally in the statemachine of the micro-xml argument parser.
+ */
+enum class XmlAttributeState {
+	IN_TAG_NAME,
+	SEARCH_ATTR,
+	IN_ATTR_NAME,
+	HAS_ATTR_NAME,
+	HAS_ATTR_EQUALS,
+	IN_ATTR_DATA
+};
+
+/**
+ * Function used to reconstruct the location of the attributes of a XML tag in
+ * the source code. This is necessary, as the xml parser only returns an offset
+ * to the begining of a tag and not to the position of the individual arguments.
+ *
+ * @param reader is the char reader from which the character data should be
+ * read.
+ * @param offs is a byte offset in the xml file pointing at the "<" character of
+ * the tag.
+ * @return a map from attribute keys to the corresponding location (including
+ * range) of the atribute. Also contains the location of the tagname in the
+ * form of the virtual attribute "$tag".
+ */
+static std::map<std::string, SourceLocation> xmlReconstructAttributeOffsets(
+    CharReader &reader, size_t offs)
+{
+	std::map<std::string, SourceLocation> res;
+
+	// Fork the reader, we don't want to mess up the XML parsing process, do we?
+	CharReaderFork readerFork = reader.fork();
+
+	// Move the read cursor to the start location, abort if this does not work
+	if (!location.isValid() || offs != readerFork.seek(offs)) {
+		return res;
+	}
+
+	// Now all we need to do is to implement one half of an XML parser. As this
+	// is inherently complicated we'll totaly fail at it. Don't care. All we
+	// want to get is those darn offsets for pretty error messages... (and we
+	// can assume the XML is valid as it was already read by expat)
+	XmlAttributeState state = XmlAttributeState::IN_TAG_NAME;
+	char c;
+	std::stringstream attrName;
+	while (readerFork.read(c)) {
+		// Abort at the end of the tag
+		if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) {
+			return res;
+		}
+
+		// One state machine to rule them all, one state machine to find them,
+		// One state machine to bring them all and in the darkness bind them
+		// (the byte offsets)
+		switch (state) {
+			case XmlAttributeState::IN_TAG_NAME:
+				if (Utils::isWhitespace(c)) {
+					res.emplace("$tag",
+					            SourceLocation{reader.getSourceId(), offs + 1,
+					                           readerFork.getOffset() - 1});
+					state = XmlAttributeState::SEARCH_ATTR;
+				}
+				break;
+			case XmlAttributeState::SEARCH_ATTR:
+				if (!Utils::isWhitespace(c)) {
+					state = XmlAttributeState::IN_ATTR_NAME;
+					attrName << c;
+				}
+				break;
+			case XmlAttributeState::IN_ATTR_NAME:
+				if (Utils::isWhitespace(c)) {
+					state = XmlAttributeState::HAS_ATTR_NAME;
+				} else if (c == '=') {
+					state = XmlAttributeState::HAS_ATTR_EQUALS;
+				} else {
+					attrName << c;
+				}
+				break;
+			case XmlAttributeState::HAS_ATTR_NAME:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '=') {
+						state = XmlAttributeState::HAS_ATTR_EQUALS;
+						break;
+					}
+					// Well, this is a strange XML file... We expected to
+					// see a '=' here! Try to continue with the
+					// "HAS_ATTR_EQUALS" state as this state will hopefully
+					// inlcude some error recovery
+				} else {
+					// Skip whitespace here
+					break;
+				}
+			// Fallthrough
+			case XmlAttributeState::HAS_ATTR_EQUALS:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '"') {
+						// Here we are! We have found the beginning of an
+						// attribute. Let's quickly lock the current offset away
+						// in the result map
+						res.emplace(attrName.str(),
+						            SourceLocation{reader.getSourceId(),
+						                           readerFork.getOffset()});
+						state = XmlAttributeState::IN_ATTR_DATA;
+					} else {
+						// No, this XML file is not well formed. Assume we're in
+						// an attribute name once again
+						attrName.str(std::string{&c, 1});
+						state = XmlAttributeState::IN_ATTR_NAME;
+					}
+				}
+				break;
+			case XmlAttributeState::IN_ATTR_DATA:
+				if (c == '"') {
+					// We're at the end of the attribute data, set the end
+					// location
+					auto it = res.find(attrName.str());
+					if (it != res.end()) {
+						it->second.setEnd(readerFork.getOffset() - 1);
+					}
+
+					// Reset the attribute name and restart the search
+					attrName.str(std::string{});
+					state = XmlAttributeState::SEARCH_ATTR;
+				}
+				break;
+		}
+	}
+	return res;
+}
+
+/**
+ * Synchronizes the position of the xml parser with the default location of the
+ * logger instance.
+ *
+ * @param p is a pointer at the xml parser instance.
+ * @param len is the length of the string that should be refered to.
+ * @return the SourceLocation that has been set in the logger.
+ */
+static SourceLocation xmlSyncLoggerPosition(XML_Parser p, size_t len = 0)
+{
+	// Fetch the OsxmlEventParser instance
+	OsxmlEventParser *parser =
+	    static_cast<OsxmlEventParser *>(XML_GetUserData(p));
+
+	// Fetch the current location in the XML file and set the default location
+	// in the logger
+	size_t offs = XML_GetCurrentByteIndex(p);
+	SourceLocation loc =
+	    SourceLocation{parser->getReader().getSourceId(), offs, offs + len};
+	parser->getLogger().setDefaultLocation(location);
+
+	// Return the fetched location
+	return loc;
+}
+
+/**
+ * Prefix used to indicate the start of an annoation,
+ */
+static const std::string ANNOTATION_START_PREFIX{"a:start:"};
+
+/**
+ * Prefix used to indicate the end of an annotation.
+ */
+static const std::string ANNOTATION_END_PREFIX{"a:end"};
+
+/**
+ * Callback called by eXpat whenever a start handler is reached.
+ */
+static void xmlStartElementHandler(void *ref, const XML_Char *name,
+                                   const XML_Char **attrs)
+{
+	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
+	XML_Parser p = static_cast<XML_Parser>(ref);
+	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
+
+	// Read the argument locations -- this is only a stupid and slow hack,
+	// but it is necessary, as expat doesn't give use the byte offset of the
+	// arguments.
+	std::map<std::string, SourceLocation> attributeOffsets =
+	    xmlReconstructXMLAttributeOffsets(*userData->reader,
+	                                      XML_GetCurrentByteIndex(p));
+
+	// Update the logger position
+	SourceLocation loc = xmlSyncLoggerPosition(p);
+
+	// Fetch the location of the name
+	SourceLocation nameLoc = loc;
+	auto it = attributeOffsets.find("$tag");
+	if (it != attributeOffsets.end()) {
+		nameLoc = it->second;
+	}
+	// Increment the current depth
+	parser->getData().incrDepth();
+
+	// Make sure we're currently not inside an annotation end tag -- this would
+	// be highly illegal!
+	if (parser->getData().inAnnotationEndTag()) {
+		logger.error("No tags allowed inside an annotation end tag", nameLoc);
+		return;
+	}
+
+	// Assemble the arguments
+	Variant::mapType args;
+	const XML_Char **attr = attrs;
+	while (*attr) {
+		// Convert the C string to a std::string
+		const std::string key{*(attr++)};
+
+		// Search the location of the key
+		SourceLocation keyLoc;
+		auto it = attributeOffsets.find(key);
+		if (it != attributeOffsets.end()) {
+			keyLoc = it->second;
+		}
+
+		// Parse the string, pass the location of the key
+		std::pair<bool, Variant> value = VariantReader::parseGenericString(
+		    *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(),
+		    keyLoc.getStart());
+
+		// Set the overall location of the parsed element to the attribute
+		// location
+		value.second->setLocation(keyLoc);
+
+		// Store the
+		if (!args.emplace(key, value.second).second) {
+			parser->getLogger().warning(
+			    std::string("Attribute \"") + key +
+			        "\" defined multiple times, only using first definition",
+			    keyLoc);
+		}
+	}
+
+	// Fetch the name of the tag, check for special tags
+	std::string nameStr(name);
+	if (nameStr == "ousia" && parser->getData().depth == 1) {
+		// We're in the top-level and the magic "ousia" tag is reached -- just
+		// ignore it and issue a warning for each argument that has been given
+		for (const auto &arg : args) {
+			parser->getLogger().warning(
+			    std::string("Ignoring attribute \"") + arg.first +
+			        std::string("\" for magic tag \"ousia\""),
+			    arg.second);
+		}
+	} else if (Utils::startsWith(nameStr, ANNOTATION_START_PREFIX)) {
+		// Assemble a name variant containing the name minus the prefix
+		Variant nameVar = nameStr.substr(ANNOTATION_START_PREFIX.size());
+		nameVar.setLocation(nameLoc);
+
+		// Issue the "annotationStart" event
+		parser->getEvents().annotationStart(nameVar, args);
+	} else if (Utils::startsWith(nameStr, ANNOTATION_END_PREFIX)) {
+		// Assemble a name variant containing the name minus the prefix
+		nameStr = nameStr.substr(ANNOTATION_END_PREFIX.size());
+
+		// Discard a potentially leading colon
+		if (!nameStr.empty() && nameStr[0] == ':') {
+			nameStr = nameStr.substr(1);
+		}
+
+		// Assemble the variant containing the name and its location
+		Variant nameVar = Variant::fromString(nameStr);
+		nameVar.setLocation(nameLoc);
+
+		// Check whether a "name" attribute was given
+		Variant elementName;
+		for (const auto &arg : args) {
+			if (arg.first == "name") {
+				elementName = arg.second;
+			} else {
+				parser->getLogger().warning(
+				    std::string("Ignoring attribute \"") + arg.first +
+				        "\" in annotation end tag",
+				    arg.second);
+			}
+		}
+
+		// Set the annotationEndTagDepth to disallow any further tags to be
+		// opened inside the annotation end tag.
+		parser->getData().annotationEndTagDepth = parser->getData().depth;
+
+		// Issue the "annotationEnd" event
+		parser->getEvents().annotationEnd(nameVar, args);
+	} else {
+		// Just issue a "commandStart" event in any other case
+		Variant nameVar = Variant::fromString(nameStr);
+		nameVar.setLocation(nameLoc);
+		parser->getEvents().commandStart(nameVar, args);
+	}
+}
+
+static void xmlEndElementHandler(void *p, const XML_Char *name)
+{
+	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
+	XML_Parser p = static_cast<XML_Parser>(ref);
+	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
+
+	// Synchronize the position of the logger with teh position
+	xmlSyncLoggerPosition(parser);
+
+	// Decrement the current depth
+	parser->getData().decrDepth();
+
+	// Abort as long as we're in an annotation end tag
+	if (parser->getData().inAnnotationEndTag()) {
+		return;
+	}
+
+	// Abort if the special ousia tag ends here
+	if (nameStr == "ousia" && parser->getData().depth == 0) {
+		return;
+	}
+
+	// Issue the "fieldEnd" event
+	parser->getEvents().fieldEnd();
+}
+
+static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len)
+{
+	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
+	XML_Parser p = static_cast<XML_Parser>(ref);
+	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
+
+	// TODO
+/*	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
+	syncLoggerPosition(parser, ulen);
+	const std::string data = Utils::trim(std::string{s, ulen});
+	if (!data.empty()) {
+		stack->data(data);
+	}*/
+}
+}
+
+/* Class OsxmlEventParser */
+
+OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events,
+                                   Logger &logger)
+    : reader(reader),
+      events(events),
+      logger(logger),
+      whitespaceMode(WhitespaceMode::COLLAPSE),
+      data(new OsxmlEventParserData())
+{
+}
+
+void OsxmlEventParser::parse(CharReader &reader)
+{
+	// Create the parser object
+	ScopedExpatXmlParser p{"UTF-8"};
+
+	// Reset the depth
+	depth = 0;
+
+	// Pass the reference to the ParserStack to the XML handler
+	XMLUserData data(&stack, &reader);
+	XML_SetUserData(&p, this);
+	XML_UseParserAsHandlerArg(&p);
+
+	// Set the callback functions
+	XML_SetStartElementHandler(&p, xmlStartElementHandler);
+	XML_SetEndElementHandler(&p, xmlEndElementHandler);
+	XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler);
+
+	// Feed data into expat while there is data to process
+	constexpr size_t BUFFER_SIZE = 64 * 1024;
+	while (true) {
+		// Fetch a buffer from expat for the input data
+		char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE));
+		if (!buf) {
+			throw OusiaException{"Internal error: XML parser out of memory!"};
+		}
+
+		// Read into the buffer
+		size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE);
+
+		// Parse the data and handle any XML error as exception
+		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) {
+			throw LoggableException{
+			    "XML: " + std::string{XML_ErrorString(XML_GetErrorCode(&p))},
+			    xmlSyncLoggerPosition(p)};
+		}
+
+		// Abort once there are no more bytes in the stream
+		if (bytesRead == 0) {
+			break;
+		}
+	}
+}
+
+void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode)
+{
+	this->whitespaceMode = whitespaceMode;
+}
+
+CharReader &OsxmlEventParser::getCharReader() { return charReader; }
+
+Logger &OsxmlEventParser::getLogger() { return logger; }
+
+OsxmlEvents &OsxmlEventParser::getEvents() { return events; }
+
+OsxmlEventParserData &OsxmlEventParser::getData() { return *data; }
+}
+
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
new file mode 100644
index 0000000..5319ca6
--- /dev/null
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -0,0 +1,205 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file OsxmlEventParser.hpp
+ *
+ * The OsxmlEventParser class is responsible for parsing an XML file and calling
+ * the corresponding event handler functions if an XML item is found. Event
+ * handling is performed using a listener interface.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OSXML_EVENT_PARSER_HPP_
+#define _OSXML_EVENT_PARSER_HPP_
+
+#include <memory>
+#include <string>
+
+#include <core/common/Whitespace.hpp>
+
+namespace ousia {
+
+// Forward declarations
+class Logger;
+class Variant;
+class OsxmlEventParserData;
+
+/**
+ * Interface which defines the callback functions which are called by the 
+ * OsxmlEventParser whenever an event occurs.
+ */
+class OsxmlEvents {
+public:
+	/**
+	 * Virtual destructor.
+	 */
+	virtual ~OsxmlEvents() {}
+
+	/**
+	 * Called whenever a command starts. Note that this implicitly always starts
+	 * the default field of the command.
+	 *
+	 * @param name is a string variant containing name and location of the 
+	 * command.
+	 * @param args is a map variant containing the arguments that were given
+	 * to the command.
+	 */
+	virtual void commandStart(Variant name, Variant args) = 0;
+
+	/**
+	 * Called whenever an annotation starts. Note that this implicitly always
+	 * starts the default field of the annotation.
+	 *
+	 * @param name is a string variant containing the name of the annotation 
+	 * class and the location of the annotation definition.
+	 * @param args is a map variant containing the arguments that were given
+	 * to the annotation definition.
+	 */
+	virtual void annotationStart(Variant name, Variant args);
+
+	/**
+	 * Called whenever the range of an annotation ends. The callee must
+	 * disambiguate the actual annotation that is finished here.
+	 *
+	 * @param name is a string variant containing the name of the annotation
+	 * class that should end here. May be empty (or nullptr), if no elementName
+	 * has been specified at the end of the annotation.
+	 * @param elementName is the name of the annotation element that should be
+	 * ended here. May be empty (or nullptr), if no elementName has been
+	 * specified at the end of the annotation.
+	 */
+	virtual void annotationEnd(Variant name, Variant elementName);
+
+	/**
+	 * Called whenever the default field which was implicitly started by 
+	 * commandStart or annotationStart ends. Note that this does not end the
+	 * range of an annotation, but the default field of the annotation. To 
+	 * signal the end of the annotation this, the annotationEnd method will be
+	 * invoked.
+	 */
+	virtual void fieldEnd() = 0;
+
+	/**
+	 * Called whenever data is found. Whitespace data is handled as specified
+	 * and the data has been parsed to the specified variant type. This function
+	 * is not called if the parsing failed, the parser prints an error message
+	 * instead.
+	 *
+	 * @param data is the already parsed data that should be passed to the 
+	 * handler.
+	 */
+	virtual void data(Variant data) = 0;
+
+};
+
+/**
+ * The OsxmlEventParser class is a wrapper around eXpat which implements the
+ * specialities of the osxml formats class (like annotation ranges). It notifies
+ * a specified event handler whenever a command, annotation or data has been
+ * reached.
+ */
+class OsxmlEventParser {
+private:
+	/**
+	 * Reference at the internal CharReader instance.
+	 */
+	CharReader &reader;
+
+	/**
+	 * Set of callback functions to be called whenever an event is triggered.
+	 */
+	OsxmlEvents &events;
+
+	/**
+	 * Reference at the Logger object to which error messages or warnings should
+	 * be logged.
+	 */
+	Logger &logger;
+
+	/**
+	 * Current whitespace mode.
+	 */
+	WhitespaceMode whitespaceMode;
+
+	/**
+	 * Data to be used by the internal functions.
+	 */
+	std::unique_ptr<OsxmlEventParserData> data;
+
+public:
+	/**
+	 * Constructor fo the OsxmlEventParser. Takes a reference at the OsxmlEvents
+	 * of which the callback functions are called.
+	 *
+	 * @param reader is a reference to the CharReader instance from which the 
+	 * XML should be read.
+	 * @param events is a refence at an instance of the OsxmlEvents class. All
+	 * events are forwarded to this class.
+	 * @param logger is the Logger instance to which log messages should be
+	 * written.
+	 */
+	OsxmlEventParser(CharReader &reader, OsxmlEvents &events, Logger &logger);
+
+	/**
+	 * Performs the actual parsing. Reads the XML using eXpat and calles the
+	 * callbacks in the event listener instance whenever something interesting
+	 * happens.
+	 */
+	void parse();
+
+	/**
+	 * Sets the whitespace handling mode.
+	 *
+	 * @param whitespaceMode defines how whitespace in the data should be 
+	 * handled.
+	 */
+	void setWhitespaceMode(WhitespaceMode whitespaceMode);
+
+	/**
+	 * Returns the internal CharReader reference.
+	 *
+	 * @return the CharReader reference.
+	 */
+	CharReader &getCharReader();
+
+	/**
+	 * Returns the internal Logger reference.
+	 *
+	 * @return the internal Logger reference.
+	 */
+	Logger &getLogger();
+
+	/**
+	 * Returns the internal OsxmlEvents reference.
+	 *
+	 * @return the internal OsxmlEvents reference.
+	 */
+	OsxmlEvents &getEvents();
+
+	/**
+	 * Returns a reference at the internal data.
+	 */
+	OsxmlEventParserData &getData();
+};
+
+}
+
+#endif /* _OSXML_EVENT_PARSER_HPP_ */
+
diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp
index c46d9de..4f6503c 100644
--- a/src/formats/osxml/OsxmlParser.cpp
+++ b/src/formats/osxml/OsxmlParser.cpp
@@ -1093,343 +1093,6 @@ static const std::multimap<std::string, const ParserState *> XmlStates{
     {"include", &Include}};
 }
 
-/**
- * Structue containing the private data that is being passed to the
- * XML-Handlers.
- */
-struct XMLUserData {
-	/**
-	 * Containing the depth of the current XML file
-	 */
-	size_t depth;
-
-	/**
-	 * Reference at the ParserStack instance.
-	 */
-	ParserStack *stack;
-
-	/**
-	 * Reference at the CharReader instance.
-	 */
-	CharReader *reader;
-
-	/**
-	 * Constructor of the XMLUserData struct.
-	 *
-	 * @param stack is a pointer at the ParserStack instance.
-	 * @param reader is a pointer at the CharReader instance.
-	 */
-	XMLUserData(ParserStack *stack, CharReader *reader)
-	    : depth(0), stack(stack), reader(reader)
-	{
-	}
-};
-
-/**
- * Wrapper class around the XML_Parser pointer which safely frees it whenever
- * the scope is left (e.g. because an exception was thrown).
- */
-class ScopedExpatXmlParser {
-private:
-	/**
-	 * Internal pointer to the XML_Parser instance.
-	 */
-	XML_Parser parser;
-
-public:
-	/**
-	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS
-	 * from the expat library. Throws a parser exception if the XML parser
-	 * cannot be initialized.
-	 *
-	 * @param encoding is the protocol-defined encoding passed to expat (or
-	 * nullptr if expat should determine the encoding by itself).
-	 */
-	ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
-	{
-		parser = XML_ParserCreate(encoding);
-		if (!parser) {
-			throw LoggableException{
-			    "Internal error: Could not create expat XML parser!"};
-		}
-	}
-
-	/**
-	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance.
-	 */
-	~ScopedExpatXmlParser()
-	{
-		if (parser) {
-			XML_ParserFree(parser);
-			parser = nullptr;
-		}
-	}
-
-	/**
-	 * Returns the XML_Parser pointer.
-	 */
-	XML_Parser operator&() { return parser; }
-};
-
-/* Adapter Expat -> ParserStack */
-
-static SourceLocation syncLoggerPosition(XML_Parser p, size_t len = 0)
-{
-	// Fetch the parser stack and the associated user data
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	// Fetch the current location in the XML file
-	size_t offs = XML_GetCurrentByteIndex(p);
-
-	// Build the source location and update the default location of the
-	// current
-	// logger instance
-	SourceLocation loc{stack->getContext().getSourceId(), offs, offs + len};
-	stack->getContext().getLogger().setDefaultLocation(loc);
-	return loc;
-}
-
-enum class XMLAttributeState {
-	IN_TAG_NAME,
-	SEARCH_ATTR,
-	IN_ATTR_NAME,
-	HAS_ATTR_NAME,
-	HAS_ATTR_EQUALS,
-	IN_ATTR_DATA
-};
-
-static std::map<std::string, SourceLocation> reconstructXMLAttributeOffsets(
-    CharReader &reader, SourceLocation location)
-{
-	std::map<std::string, SourceLocation> res;
-
-	// Fork the reader, we don't want to mess up the XML parsing process, do we?
-	CharReaderFork readerFork = reader.fork();
-
-	// Move the read cursor to the start location, abort if this does not work
-	size_t offs = location.getStart();
-	if (!location.isValid() || offs != readerFork.seek(offs)) {
-		return res;
-	}
-
-	// Now all we need to do is to implement one half of an XML parser. As this
-	// is inherently complicated we'll totaly fail at it. Don't care. All we
-	// want to get is those darn offsets for pretty error messages... (and we
-	// can assume the XML is valid as it was already read by expat)
-	XMLAttributeState state = XMLAttributeState::IN_TAG_NAME;
-	char c;
-	std::stringstream attrName;
-	while (readerFork.read(c)) {
-		// Abort at the end of the tag
-		if (c == '>' && state != XMLAttributeState::IN_ATTR_DATA) {
-			return res;
-		}
-
-		// One state machine to rule them all, one state machine to find them,
-		// One state machine to bring them all and in the darkness bind them
-		// (the byte offsets)
-		switch (state) {
-			case XMLAttributeState::IN_TAG_NAME:
-				if (Utils::isWhitespace(c)) {
-					state = XMLAttributeState::SEARCH_ATTR;
-				}
-				break;
-			case XMLAttributeState::SEARCH_ATTR:
-				if (!Utils::isWhitespace(c)) {
-					state = XMLAttributeState::IN_ATTR_NAME;
-					attrName << c;
-				}
-				break;
-			case XMLAttributeState::IN_ATTR_NAME:
-				if (Utils::isWhitespace(c)) {
-					state = XMLAttributeState::HAS_ATTR_NAME;
-				} else if (c == '=') {
-					state = XMLAttributeState::HAS_ATTR_EQUALS;
-				} else {
-					attrName << c;
-				}
-				break;
-			case XMLAttributeState::HAS_ATTR_NAME:
-				if (!Utils::isWhitespace(c)) {
-					if (c == '=') {
-						state = XMLAttributeState::HAS_ATTR_EQUALS;
-						break;
-					}
-					// Well, this is a strange XML file... We expected to
-					// see a '=' here! Try to continue with the
-					// "HAS_ATTR_EQUALS" state as this state will hopefully
-					// inlcude some error recovery
-				} else {
-					// Skip whitespace here
-					break;
-				}
-			// Fallthrough
-			case XMLAttributeState::HAS_ATTR_EQUALS:
-				if (!Utils::isWhitespace(c)) {
-					if (c == '"') {
-						// Here we are! We have found the beginning of an
-						// attribute. Let's quickly lock the current offset away
-						// in the result map
-						res.emplace(attrName.str(),
-						            SourceLocation{reader.getSourceId(),
-						                           readerFork.getOffset()});
-						attrName.str(std::string{});
-						state = XMLAttributeState::IN_ATTR_DATA;
-					} else {
-						// No, this XML file is not well formed. Assume we're in
-						// an attribute name once again
-						attrName.str(std::string{&c, 1});
-						state = XMLAttributeState::IN_ATTR_NAME;
-					}
-				}
-				break;
-			case XMLAttributeState::IN_ATTR_DATA:
-				if (c == '"') {
-					// We're at the end of the attribute data, start anew
-					state = XMLAttributeState::SEARCH_ATTR;
-				}
-				break;
-		}
-	}
-	return res;
-}
-
-static void xmlStartElementHandler(void *p, const XML_Char *name,
-                                   const XML_Char **attrs)
-{
-	XML_Parser parser = static_cast<XML_Parser>(p);
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	SourceLocation loc = syncLoggerPosition(parser);
-
-	// Read the argument locations -- this is only a stupid and slow hack,
-	// but it is necessary, as expat doesn't give use the byte offset of the
-	// arguments.
-	std::map<std::string, SourceLocation> offs =
-	    reconstructXMLAttributeOffsets(*userData->reader, loc);
-
-	// Assemble the arguments
-	Variant::mapType args;
-
-	const XML_Char **attr = attrs;
-	while (*attr) {
-		// Convert the C string to a std::string
-		const std::string key{*(attr++)};
-
-		// Search the location of the key
-		SourceLocation keyLoc;
-		auto it = offs.find(key);
-		if (it != offs.end()) {
-			keyLoc = it->second;
-		}
-
-		// Parse the string, pass the location of the key
-		std::pair<bool, Variant> value = VariantReader::parseGenericString(
-		    *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(),
-		    keyLoc.getStart());
-		args.emplace(key, value.second);
-	}
-
-	// Call the start function
-	std::string nameStr(name);
-	if (nameStr != "ousia" || userData->depth > 0) {
-		stack->start(std::string(name), args, loc);
-	}
-
-	// Increment the current depth
-	userData->depth++;
-}
-
-static void xmlEndElementHandler(void *p, const XML_Char *name)
-{
-	XML_Parser parser = static_cast<XML_Parser>(p);
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	syncLoggerPosition(parser);
-
-	// Decrement the current depth
-	userData->depth--;
-
-	// Call the end function
-	std::string nameStr(name);
-	if (nameStr != "ousia" || userData->depth > 0) {
-		stack->end();
-	}
-}
 
-static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len)
-{
-	XML_Parser parser = static_cast<XML_Parser>(p);
-	XMLUserData *userData = static_cast<XMLUserData *>(XML_GetUserData(p));
-	ParserStack *stack = userData->stack;
-
-	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
-	syncLoggerPosition(parser, ulen);
-	const std::string data = Utils::trim(std::string{s, ulen});
-	if (!data.empty()) {
-		stack->data(data);
-	}
-}
-
-/* Class XmlParser */
-
-void XmlParser::doParse(CharReader &reader, ParserContext &ctx)
-{
-	// Create the parser object
-	ScopedExpatXmlParser p{"UTF-8"};
-
-	// Create the parser stack instance, if we're starting on a non-empty scope,
-	// try to deduce the parser state
-	ParserStack stack(ctx, ParserStates::XmlStates);
-	if (!ctx.getScope().isEmpty()) {
-		if (!stack.deduceState()) {
-			return;
-		}
-	}
-
-	// Pass the reference to the ParserStack to the XML handler
-	XMLUserData data(&stack, &reader);
-	XML_SetUserData(&p, &data);
-	XML_UseParserAsHandlerArg(&p);
-
-	// Set the callback functions
-	XML_SetStartElementHandler(&p, xmlStartElementHandler);
-	XML_SetEndElementHandler(&p, xmlEndElementHandler);
-	XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler);
-
-	// Feed data into expat while there is data to process
-	constexpr size_t BUFFER_SIZE = 64 * 1024;
-	while (true) {
-		// Fetch a buffer from expat for the input data
-		char *buf = static_cast<char *>(XML_GetBuffer(&p, BUFFER_SIZE));
-		if (!buf) {
-			throw LoggableException{
-			    "Internal error: XML parser out of memory!"};
-		}
-
-		// Read into the buffer
-		size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE);
-
-		// Parse the data and handle any XML error
-		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) {
-			// Fetch the xml parser byte offset
-			size_t offs = XML_GetCurrentByteIndex(&p);
-
-			// Throw a corresponding exception
-			XML_Error code = XML_GetErrorCode(&p);
-			std::string msg = std::string{XML_ErrorString(code)};
-			throw LoggableException{"XML: " + msg,
-			                        SourceLocation{ctx.getSourceId(), offs}};
-		}
-
-		// Abort once there are no more bytes in the stream
-		if (bytesRead == 0) {
-			break;
-		}
-	}
-}
 }
 
diff --git a/test/formats/osdm/OsdmStreamParserTest.cpp b/test/formats/osdm/OsdmStreamParserTest.cpp
deleted file mode 100644
index 46f4cf6..0000000
--- a/test/formats/osdm/OsdmStreamParserTest.cpp
+++ /dev/null
@@ -1,973 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <gtest/gtest.h>
-
-#include <iostream>
-
-#include <core/common/CharReader.hpp>
-#include <core/frontend/TerminalLogger.hpp>
-
-#include <formats/osdm/OsdmStreamParser.hpp>
-
-namespace ousia {
-
-static TerminalLogger logger(std::cerr, true);
-
-TEST(OsdmStreamParser, empty)
-{
-	const char *testString = "";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, oneCharacter)
-{
-	const char *testString = "a";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("a", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(1U, loc.getEnd());
-}
-
-TEST(OsdmStreamParser, whitespaceElimination)
-{
-	const char *testString = " hello \t world ";
-	//                        0123456 78901234
-	//                        0          1
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(14U, loc.getEnd());
-}
-
-TEST(OsdmStreamParser, whitespaceEliminationWithLinebreak)
-{
-	const char *testString = " hello \n world ";
-	//                        0123456 78901234
-	//                        0          1
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(14U, loc.getEnd());
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, escapeWhitespace)
-{
-	const char *testString = " hello\\ \\ world ";
-	//                        012345 67 89012345
-	//                        0           1
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello  world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(15U, loc.getEnd());
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-static void testEscapeSpecialCharacter(const std::string &c)
-{
-	CharReader charReader(std::string("\\") + c);
-	OsdmStreamParser reader(charReader, logger);
-	EXPECT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(c, reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	EXPECT_EQ(0U, loc.getStart());
-	EXPECT_EQ(1U + c.size(), loc.getEnd());
-}
-
-TEST(OsdmStreamParser, escapeSpecialCharacters)
-{
-	testEscapeSpecialCharacter("\\");
-	testEscapeSpecialCharacter("{");
-	testEscapeSpecialCharacter("}");
-	testEscapeSpecialCharacter("<");
-	testEscapeSpecialCharacter(">");
-}
-
-TEST(OsdmStreamParser, simpleSingleLineComment)
-{
-	const char *testString = "% This is a single line comment";
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, singleLineComment)
-{
-	const char *testString = "a% This is a single line comment\nb";
-	//                        01234567890123456789012345678901 23
-	//                        0         1         2         3
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(1U, loc.getEnd());
-	}
-
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(33U, loc.getStart());
-		ASSERT_EQ(34U, loc.getEnd());
-	}
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, multilineComment)
-{
-	const char *testString = "a%{ This is a\n\n multiline line comment}%b";
-	//                        0123456789012 3 456789012345678901234567890
-	//                        0         1           2         3         4
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(1U, loc.getEnd());
-	}
-
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(40U, loc.getStart());
-		ASSERT_EQ(41U, loc.getEnd());
-	}
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, nestedMultilineComment)
-{
-	const char *testString = "a%{%{Another\n\n}%multiline line comment}%b";
-	//                        0123456789012 3 456789012345678901234567890
-	//                        0         1           2         3         4
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("a", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(0U, loc.getStart());
-		ASSERT_EQ(1U, loc.getEnd());
-	}
-
-	{
-		ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-		ASSERT_EQ("b", reader.getData().asString());
-		SourceLocation loc = reader.getData().getLocation();
-		ASSERT_EQ(40U, loc.getStart());
-		ASSERT_EQ(41U, loc.getEnd());
-	}
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, simpleCommand)
-{
-	const char *testString = "\\test";
-	//                        0 12345
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse());
-
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
-
-	ASSERT_EQ(0U, reader.getCommandArguments().asMap().size());
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, simpleCommandWithName)
-{
-	const char *testString = "\\test#bla";
-	//                        0 12345678
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse());
-
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
-
-	Variant commandArguments = reader.getCommandArguments();
-	ASSERT_TRUE(commandArguments.isMap());
-	ASSERT_EQ(1U, commandArguments.asMap().size());
-	ASSERT_EQ(1U, commandArguments.asMap().count("name"));
-	ASSERT_EQ("bla", commandArguments.asMap()["name"].asString());
-
-	loc = commandArguments.asMap()["name"].getLocation();
-	ASSERT_EQ(5U, loc.getStart());
-	ASSERT_EQ(9U, loc.getEnd());
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, simpleCommandWithArguments)
-{
-	const char *testString = "\\test[a=1,b=2,c=\"test\"]";
-	//                        0 123456789012345 678901 2
-	//                        0          1          2
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse());
-
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
-
-	Variant commandArguments = reader.getCommandArguments();
-	ASSERT_TRUE(commandArguments.isMap());
-	ASSERT_EQ(3U, commandArguments.asMap().size());
-	ASSERT_EQ(1U, commandArguments.asMap().count("a"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("b"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("c"));
-	ASSERT_EQ(1, commandArguments.asMap()["a"].asInt());
-	ASSERT_EQ(2, commandArguments.asMap()["b"].asInt());
-	ASSERT_EQ("test", commandArguments.asMap()["c"].asString());
-
-	loc = commandArguments.asMap()["a"].getLocation();
-	ASSERT_EQ(8U, loc.getStart());
-	ASSERT_EQ(9U, loc.getEnd());
-
-	loc = commandArguments.asMap()["b"].getLocation();
-	ASSERT_EQ(12U, loc.getStart());
-	ASSERT_EQ(13U, loc.getEnd());
-
-	loc = commandArguments.asMap()["c"].getLocation();
-	ASSERT_EQ(16U, loc.getStart());
-	ASSERT_EQ(22U, loc.getEnd());
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-TEST(OsdmStreamParser, simpleCommandWithArgumentsAndName)
-{
-	const char *testString = "\\test#bla[a=1,b=2,c=\"test\"]";
-	//                        0 1234567890123456789 01234 56
-	//                        0          1          2
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse());
-
-	Variant commandName = reader.getCommandName();
-	ASSERT_EQ("test", commandName.asString());
-	SourceLocation loc = commandName.getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(5U, loc.getEnd());
-
-	Variant commandArguments = reader.getCommandArguments();
-	ASSERT_TRUE(commandArguments.isMap());
-	ASSERT_EQ(4U, commandArguments.asMap().size());
-	ASSERT_EQ(1U, commandArguments.asMap().count("a"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("b"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("c"));
-	ASSERT_EQ(1U, commandArguments.asMap().count("name"));
-	ASSERT_EQ(1, commandArguments.asMap()["a"].asInt());
-	ASSERT_EQ(2, commandArguments.asMap()["b"].asInt());
-	ASSERT_EQ("test", commandArguments.asMap()["c"].asString());
-	ASSERT_EQ("bla", commandArguments.asMap()["name"].asString());
-
-	loc = commandArguments.asMap()["a"].getLocation();
-	ASSERT_EQ(12U, loc.getStart());
-	ASSERT_EQ(13U, loc.getEnd());
-
-	loc = commandArguments.asMap()["b"].getLocation();
-	ASSERT_EQ(16U, loc.getStart());
-	ASSERT_EQ(17U, loc.getEnd());
-
-	loc = commandArguments.asMap()["c"].getLocation();
-	ASSERT_EQ(20U, loc.getStart());
-	ASSERT_EQ(26U, loc.getEnd());
-
-	loc = commandArguments.asMap()["name"].getLocation();
-	ASSERT_EQ(5U, loc.getStart());
-	ASSERT_EQ(9U, loc.getEnd());
-
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-}
-
-static void assertCommand(OsdmStreamParser &reader, const std::string &name,
-                          SourceOffset start = InvalidSourceOffset,
-                          SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsdmStreamParser::State::COMMAND, reader.parse());
-	EXPECT_EQ(name, reader.getCommandName().asString());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertCommand(OsdmStreamParser &reader, const std::string &name,
-                          const Variant::mapType &args,
-                          SourceOffset start = InvalidSourceOffset,
-                          SourceOffset end = InvalidSourceOffset)
-{
-	assertCommand(reader, name, start, end);
-	EXPECT_EQ(args, reader.getCommandArguments());
-}
-
-static void assertData(OsdmStreamParser &reader, const std::string &data,
-                       SourceOffset start = InvalidSourceOffset,
-                       SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsdmStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(data, reader.getData().asString());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getData().getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getData().getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertFieldStart(OsdmStreamParser &reader,
-                             SourceOffset start = InvalidSourceOffset,
-                             SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsdmStreamParser::State::FIELD_START, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertFieldEnd(OsdmStreamParser &reader,
-                           SourceOffset start = InvalidSourceOffset,
-                           SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsdmStreamParser::State::FIELD_END, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertEnd(OsdmStreamParser &reader,
-                      SourceOffset start = InvalidSourceOffset,
-                      SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsdmStreamParser::State::END, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-TEST(OsdmStreamParser, fields)
-{
-	const char *testString = "\\test{a}{b}{c}";
-	//                         01234567890123
-	//                         0         1
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	assertData(reader, "a", 6, 7);
-	assertFieldEnd(reader, 7, 8);
-
-	assertFieldStart(reader, 8, 9);
-	assertData(reader, "b", 9, 10);
-	assertFieldEnd(reader, 10, 11);
-
-	assertFieldStart(reader, 11, 12);
-	assertData(reader, "c", 12, 13);
-	assertFieldEnd(reader, 13, 14);
-	assertEnd(reader, 14, 14);
-}
-
-TEST(OsdmStreamParser, dataOutsideField)
-{
-	const char *testString = "\\test{a}{b} c";
-	//                         0123456789012
-	//                         0         1
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	assertData(reader, "a", 6, 7);
-	assertFieldEnd(reader, 7, 8);
-
-	assertFieldStart(reader, 8, 9);
-	assertData(reader, "b", 9, 10);
-	assertFieldEnd(reader, 10, 11);
-
-	assertData(reader, "c", 12, 13);
-	assertEnd(reader, 13, 13);
-}
-
-TEST(OsdmStreamParser, nestedCommand)
-{
-	const char *testString = "\\test{a}{\\test2{b} c} d";
-	//                         012345678 90123456789012
-	//                         0          1         2
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-
-	assertFieldStart(reader, 5, 6);
-	assertData(reader, "a", 6, 7);
-	assertFieldEnd(reader, 7, 8);
-
-	assertFieldStart(reader, 8, 9);
-	{
-		assertCommand(reader, "test2", 9, 15);
-		assertFieldStart(reader, 15, 16);
-		assertData(reader, "b", 16, 17);
-		assertFieldEnd(reader, 17, 18);
-	}
-	assertData(reader, "c", 19, 20);
-	assertFieldEnd(reader, 20, 21);
-	assertData(reader, "d", 22, 23);
-	assertEnd(reader, 23, 23);
-}
-
-TEST(OsdmStreamParser, nestedCommandImmediateEnd)
-{
-	const char *testString = "\\test{\\test2{b}} d";
-	//                         012345 678901234567
-	//                         0          1
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	{
-		assertCommand(reader, "test2", 6, 12);
-		assertFieldStart(reader, 12, 13);
-		assertData(reader, "b", 13, 14);
-		assertFieldEnd(reader, 14, 15);
-	}
-	assertFieldEnd(reader, 15, 16);
-	assertData(reader, "d", 17, 18);
-	assertEnd(reader, 18, 18);
-}
-
-TEST(OsdmStreamParser, nestedCommandNoData)
-{
-	const char *testString = "\\test{\\test2}";
-	//                         012345 6789012
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	assertCommand(reader, "test2", 6, 12);
-	assertFieldEnd(reader, 12, 13);
-	assertEnd(reader, 13, 13);
-}
-
-TEST(OsdmStreamParser, multipleCommands)
-{
-	const char *testString = "\\a \\b \\c \\d";
-	//                         012 345 678 90
-	//                         0            1
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "a", 0, 2);
-	assertCommand(reader, "b", 3, 5);
-	assertCommand(reader, "c", 6, 8);
-	assertCommand(reader, "d", 9, 11);
-	assertEnd(reader, 11, 11);
-}
-
-TEST(OsdmStreamParser, fieldsWithSpaces)
-{
-	const char *testString = "\\a {\\b \\c}   \n\n {\\d}";
-	//                         0123 456 789012 3 456 789
-	//                         0           1
-	CharReader charReader(testString);
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "a", 0, 2);
-	assertFieldStart(reader, 3, 4);
-	assertCommand(reader, "b", 4, 6);
-	assertCommand(reader, "c", 7, 9);
-	assertFieldEnd(reader, 9, 10);
-	assertFieldStart(reader, 16, 17);
-	assertCommand(reader, "d", 17, 19);
-	assertFieldEnd(reader, 19, 20);
-	assertEnd(reader, 20, 20);
-}
-
-TEST(OsdmStreamParser, errorNoFieldToStart)
-{
-	const char *testString = "\\a b {";
-	//                         012345
-	//                         0
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "a", 0, 2);
-	assertData(reader, "b", 3, 4);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 6, 6);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorNoFieldToEnd)
-{
-	const char *testString = "\\a b }";
-	//                         012345
-	//                         0
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "a", 0, 2);
-	assertData(reader, "b", 3, 4);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 6, 6);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorNoFieldEndNested)
-{
-	const char *testString = "\\test{\\test2{}}}";
-	//                         012345 6789012345
-	//                         0          1
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	assertCommand(reader, "test2", 6, 12);
-	assertFieldStart(reader, 12, 13);
-	assertFieldEnd(reader, 13, 14);
-	assertFieldEnd(reader, 14, 15);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 16, 16);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorNoFieldEndNestedData)
-{
-	const char *testString = "\\test{\\test2{}}a}";
-	//                         012345 67890123456
-	//                         0          1
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
-	assertCommand(reader, "test2", 6, 12);
-	assertFieldStart(reader, 12, 13);
-	assertFieldEnd(reader, 13, 14);
-	assertFieldEnd(reader, 14, 15);
-	assertData(reader, "a", 15, 16);
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader, 17, 17);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, beginEnd)
-{
-	const char *testString = "\\begin{book}\\end{book}";
-	//                         012345678901 2345678901
-	//                         0         1          2
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
-	assertFieldEnd(reader, 17, 21);
-	assertEnd(reader, 22, 22);
-}
-
-TEST(OsdmStreamParser, beginEndWithName)
-{
-	const char *testString = "\\begin{book#a}\\end{book}";
-	//                         01234567890123 4567890123
-	//                         0         1          2
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book", {{"name", "a"}}, 7, 11);
-	assertFieldStart(reader, 14, 15);
-	assertFieldEnd(reader, 19, 23);
-	assertEnd(reader, 24, 24);
-}
-
-TEST(OsdmStreamParser, beginEndWithNameAndArgs)
-{
-	const char *testString = "\\begin{book#a}[a=1,b=2,c=\"test\"]\\end{book}";
-	//                         0123456789012345678901234 56789 01 2345678901
-	//                         0         1         2           3          4
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book",
-	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-	assertFieldStart(reader, 32, 33);
-	assertFieldEnd(reader, 37, 41);
-	assertEnd(reader, 42, 42);
-}
-
-TEST(OsdmStreamParser, beginEndWithNameAndArgsMultipleFields)
-{
-	const char *testString =
-	    "\\begin{book#a}[a=1,b=2,c=\"test\"]{a \\test}{b \\test{}}\\end{book}";
-	//    0123456789012345678901234 56789 01234 567890123 45678901 2345678901
-	//    0         1         2           3          4          5          6
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book",
-	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-	assertFieldStart(reader, 32, 33);
-	assertData(reader, "a", 33, 34);
-	assertCommand(reader, "test", Variant::mapType{}, 35, 40);
-	assertFieldEnd(reader, 40, 41);
-	assertFieldStart(reader, 41, 42);
-	assertData(reader, "b", 42, 43);
-	assertCommand(reader, "test", Variant::mapType{}, 44, 49);
-	assertFieldStart(reader, 49, 50);
-	assertFieldEnd(reader, 50, 51);
-	assertFieldEnd(reader, 51, 52);
-	assertFieldStart(reader, 52, 53);
-	assertFieldEnd(reader, 57, 61);
-	assertEnd(reader, 62, 62);
-}
-
-TEST(OsdmStreamParser, beginEndWithData)
-{
-	const char *testString = "\\begin{book}a\\end{book}";
-	//                         0123456789012 3456789012
-	//                         0         1          2
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
-	assertData(reader, "a", 12, 13);
-	assertFieldEnd(reader, 18, 22);
-	assertEnd(reader, 23, 23);
-}
-
-TEST(OsdmStreamParser, beginEndWithCommand)
-{
-	const char *testString = "\\begin{book}\\a{test}\\end{book}";
-	//                         012345678901 23456789 0123456789
-	//                         0         1           2
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
-	assertCommand(reader, "a", 12, 14);
-	assertFieldStart(reader, 14, 15);
-	assertData(reader, "test", 15, 19);
-	assertFieldEnd(reader, 19, 20);
-	assertFieldEnd(reader, 25, 29);
-	assertEnd(reader, 30, 30);
-}
-
-TEST(OsdmStreamParser, errorBeginNoBraceOpen)
-{
-	const char *testString = "\\begin a";
-	//                         01234567
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertData(reader, "a", 7, 8);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorBeginNoIdentifier)
-{
-	const char *testString = "\\begin{!";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorBeginNoBraceClose)
-{
-	const char *testString = "\\begin{a";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorBeginNoName)
-{
-	const char *testString = "\\begin{a#}";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "a");
-	ASSERT_TRUE(logger.hasError());
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertEnd(reader);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorEndNoBraceOpen)
-{
-	const char *testString = "\\end a";
-	//                         012345
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertData(reader, "a", 5, 6);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorEndNoIdentifier)
-{
-	const char *testString = "\\end{!";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorEndNoBraceClose)
-{
-	const char *testString = "\\end{a";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorEndNoBegin)
-{
-	const char *testString = "\\end{a}";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, errorBeginEndMismatch)
-{
-	const char *testString = "\\begin{a} \\begin{b} test \\end{a}";
-	//                         0123456789 012345678901234 5678901
-	//                         0          1         2          3
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "a", 7, 8);
-	assertFieldStart(reader, 10, 11);
-	assertCommand(reader, "b", 17, 18);
-	assertFieldStart(reader, 20, 24);
-	assertData(reader, "test", 20, 24);
-	ASSERT_FALSE(logger.hasError());
-	ASSERT_THROW(reader.parse(), LoggableException);
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(OsdmStreamParser, commandWithNSSep)
-{
-	const char *testString = "\\test1:test2";
-	//                         012345678901
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test1:test2", 0, 12);
-	assertEnd(reader, 12, 12);
-}
-
-TEST(OsdmStreamParser, beginEndWithNSSep)
-{
-	const char *testString = "\\begin{test1:test2}\\end{test1:test2}";
-	//                         0123456789012345678 90123456789012345
-	//                         0         1          2         3
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	assertCommand(reader, "test1:test2", 7, 18);
-	assertFieldStart(reader, 19, 20);
-	assertFieldEnd(reader, 24, 35);
-	assertEnd(reader, 36, 36);
-}
-
-TEST(OsdmStreamParser, errorBeginNSSep)
-{
-	const char *testString = "\\begin:test{blub}\\end{blub}";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "blub");
-	ASSERT_TRUE(logger.hasError());
-	assertFieldStart(reader);
-	assertFieldEnd(reader);
-	assertEnd(reader);
-}
-
-TEST(OsdmStreamParser, errorEndNSSep)
-{
-	const char *testString = "\\begin{blub}\\end:test{blub}";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	assertCommand(reader, "blub");
-	assertFieldStart(reader);
-	ASSERT_FALSE(logger.hasError());
-	assertFieldEnd(reader);
-	ASSERT_TRUE(logger.hasError());
-	assertEnd(reader);
-}
-
-TEST(OsdmStreamParser, errorEmptyNs)
-{
-	const char *testString = "\\test:";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "test");
-	ASSERT_TRUE(logger.hasError());
-	assertData(reader, ":");
-	assertEnd(reader);
-}
-
-TEST(OsdmStreamParser, errorRepeatedNs)
-{
-	const char *testString = "\\test::";
-	CharReader charReader(testString);
-
-	OsdmStreamParser reader(charReader, logger);
-
-	logger.reset();
-	ASSERT_FALSE(logger.hasError());
-	assertCommand(reader, "test");
-	ASSERT_TRUE(logger.hasError());
-	assertData(reader, "::");
-	assertEnd(reader);
-}
-}
-
diff --git a/test/formats/osdmx/OsdmxParserTest.cpp b/test/formats/osdmx/OsdmxParserTest.cpp
deleted file mode 100644
index c0fb50d..0000000
--- a/test/formats/osdmx/OsdmxParserTest.cpp
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <iostream>
-
-#include <gtest/gtest.h>
-
-#include <core/common/CharReader.hpp>
-#include <core/common/SourceContextReader.hpp>
-#include <core/model/Domain.hpp>
-#include <core/model/Node.hpp>
-#include <core/model/Project.hpp>
-#include <core/frontend/TerminalLogger.hpp>
-#include <core/StandaloneEnvironment.hpp>
-
-#include <plugins/filesystem/FileLocator.hpp>
-#include <formats/osdmx/OsdmxParser.hpp>
-
-namespace ousia {
-
-namespace RttiTypes {
-extern const Rtti Document;
-extern const Rtti Domain;
-extern const Rtti Typesystem;
-}
-
-struct XmlStandaloneEnvironment : public StandaloneEnvironment {
-	XmlParser xmlParser;
-	FileLocator fileLocator;
-
-	XmlStandaloneEnvironment(ConcreteLogger &logger)
-	    : StandaloneEnvironment(logger)
-	{
-		fileLocator.addDefaultSearchPaths();
-		fileLocator.addUnittestSearchPath("xmlparser");
-
-		registry.registerDefaultExtensions();
-		registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"},
-		                        {&RttiTypes::Node}, &xmlParser);
-		registry.registerResourceLocator(&fileLocator);
-	}
-};
-
-static TerminalLogger logger(std::cerr, true);
-
-TEST(XmlParser, mismatchedTag)
-{
-	XmlStandaloneEnvironment env(logger);
-	env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document});
-	ASSERT_TRUE(logger.hasError());
-}
-
-TEST(XmlParser, generic)
-{
-	XmlStandaloneEnvironment env(logger);
-	env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node});
-#ifdef MANAGER_GRAPHVIZ_EXPORT
-	env.manager.exportGraphviz("xmlDocument.dot");
-#endif
-}
-
-static void checkAttributes(Handle<StructType> expected,
-                            Handle<Descriptor> desc)
-{
-	if (expected == nullptr) {
-		ASSERT_TRUE(desc->getAttributesDescriptor()->getAttributes().empty());
-	} else {
-		ASSERT_EQ(expected->getName(),
-		          desc->getAttributesDescriptor()->getName());
-		auto &attrs_exp = expected->getAttributes();
-		auto &attrs = desc->getAttributesDescriptor()->getAttributes();
-		ASSERT_EQ(attrs_exp.size(), attrs.size());
-		for (size_t i = 0; i < attrs_exp.size(); i++) {
-			ASSERT_EQ(attrs_exp[i]->getName(), attrs[i]->getName());
-			ASSERT_EQ(attrs_exp[i]->getType(), attrs[i]->getType());
-			ASSERT_EQ(attrs_exp[i]->isOptional(), attrs[i]->isOptional());
-			ASSERT_EQ(attrs_exp[i]->getDefaultValue(),
-			          attrs[i]->getDefaultValue());
-		}
-	}
-}
-
-static void checkStructuredClass(
-    Handle<Node> n, const std::string &name, Handle<Domain> domain,
-    Variant cardinality = Cardinality::any(),
-    Handle<StructType> attributesDescriptor = nullptr,
-    Handle<StructuredClass> superclass = nullptr, bool transparent = false,
-    bool root = false)
-{
-	ASSERT_FALSE(n == nullptr);
-	Handle<StructuredClass> sc = n.cast<StructuredClass>();
-	ASSERT_FALSE(sc == nullptr);
-	ASSERT_EQ(name, sc->getName());
-	ASSERT_EQ(domain, sc->getParent());
-	ASSERT_EQ(cardinality, sc->getCardinality());
-	ASSERT_EQ(transparent, sc->isTransparent());
-	ASSERT_EQ(root, sc->hasRootPermission());
-	checkAttributes(attributesDescriptor, sc);
-}
-
-static Rooted<StructuredClass> checkStructuredClass(
-    const std::string &resolve, const std::string &name, Handle<Domain> domain,
-    Variant cardinality = Cardinality::any(),
-    Handle<StructType> attributesDescriptor = nullptr,
-    Handle<StructuredClass> superclass = nullptr, bool transparent = false,
-    bool root = false)
-{
-	auto res = domain->resolve(&RttiTypes::StructuredClass, resolve);
-	if (res.size() != 1) {
-		throw OusiaException("resolution error!");
-	}
-	Handle<StructuredClass> sc = res[0].node.cast<StructuredClass>();
-	checkStructuredClass(sc, name, domain, cardinality, attributesDescriptor,
-	                     superclass, transparent, root);
-	return sc;
-}
-
-static void checkAnnotationClass(
-    Handle<Node> n, const std::string &name, Handle<Domain> domain,
-    Handle<StructType> attributesDescriptor = nullptr)
-{
-	ASSERT_FALSE(n == nullptr);
-	Handle<AnnotationClass> ac = n.cast<AnnotationClass>();
-	ASSERT_FALSE(ac == nullptr);
-	ASSERT_EQ(name, ac->getName());
-	ASSERT_EQ(domain, ac->getParent());
-	checkAttributes(attributesDescriptor, ac);
-}
-
-static Rooted<AnnotationClass> checkAnnotationClass(
-    const std::string &resolve, const std::string &name, Handle<Domain> domain,
-    Handle<StructType> attributesDescriptor = nullptr)
-{
-	auto res = domain->resolve(&RttiTypes::AnnotationClass, resolve);
-	if (res.size() != 1) {
-		throw OusiaException("resolution error!");
-	}
-	Handle<AnnotationClass> ac = res[0].node.cast<AnnotationClass>();
-	checkAnnotationClass(ac, name, domain, attributesDescriptor);
-	return ac;
-}
-
-static void checkFieldDescriptor(
-    Handle<Node> n, const std::string &name, Handle<Descriptor> parent,
-    NodeVector<StructuredClass> children,
-    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
-    Handle<Type> primitiveType = nullptr, bool optional = false)
-{
-	ASSERT_FALSE(n == nullptr);
-	Handle<FieldDescriptor> field = n.cast<FieldDescriptor>();
-	ASSERT_FALSE(field.isNull());
-	ASSERT_EQ(name, field->getName());
-	ASSERT_EQ(parent, field->getParent());
-	ASSERT_EQ(type, field->getFieldType());
-	ASSERT_EQ(primitiveType, field->getPrimitiveType());
-	ASSERT_EQ(optional, field->isOptional());
-	// check the children.
-	ASSERT_EQ(children.size(), field->getChildren().size());
-	for (unsigned int c = 0; c < children.size(); c++) {
-		ASSERT_EQ(children[c], field->getChildren()[c]);
-	}
-}
-
-static void checkFieldDescriptor(
-    Handle<Descriptor> desc, Handle<Descriptor> parent,
-    NodeVector<StructuredClass> children,
-    const std::string &name = DEFAULT_FIELD_NAME,
-    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
-    Handle<Type> primitiveType = nullptr, bool optional = false)
-{
-	auto res = desc->resolve(&RttiTypes::FieldDescriptor, name);
-	ASSERT_EQ(1, res.size());
-	checkFieldDescriptor(res[0].node, name, parent, children, type,
-	                     primitiveType, optional);
-}
-
-static void checkFieldDescriptor(
-    Handle<Descriptor> desc, NodeVector<StructuredClass> children,
-    const std::string &name = DEFAULT_FIELD_NAME,
-    FieldDescriptor::FieldType type = FieldDescriptor::FieldType::TREE,
-    Handle<Type> primitiveType = nullptr, bool optional = false)
-{
-	checkFieldDescriptor(desc, desc, children, name, type, primitiveType,
-	                     optional);
-}
-
-TEST(XmlParser, domainParsing)
-{
-	XmlStandaloneEnvironment env(logger);
-	Rooted<Node> book_domain_node =
-	    env.parse("book_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
-	ASSERT_FALSE(book_domain_node == nullptr);
-	ASSERT_FALSE(logger.hasError());
-	// check the domain node.
-	Rooted<Domain> book_domain = book_domain_node.cast<Domain>();
-	ASSERT_EQ("book", book_domain->getName());
-	// get the book struct node.
-	Cardinality single;
-	single.merge({1});
-	Rooted<StructType> bookAuthor{
-	    new StructType(book_domain->getManager(), "", nullptr)};
-	bookAuthor->addAttribute(
-	    {new Attribute(book_domain->getManager(), "author",
-	                   env.project->getSystemTypesystem()->getStringType(),
-	                   "")},
-	    logger);
-	Rooted<StructuredClass> book = checkStructuredClass(
-	    "book", "book", book_domain, single, bookAuthor, nullptr, false, true);
-	// get the chapter struct node.
-	Rooted<StructuredClass> chapter =
-	    checkStructuredClass("chapter", "chapter", book_domain);
-	Rooted<StructuredClass> section =
-	    checkStructuredClass("section", "section", book_domain);
-	Rooted<StructuredClass> subsection =
-	    checkStructuredClass("subsection", "subsection", book_domain);
-	Rooted<StructuredClass> paragraph =
-	    checkStructuredClass("paragraph", "paragraph", book_domain,
-	                         Cardinality::any(), nullptr, nullptr, true, false);
-	Rooted<StructuredClass> text =
-	    checkStructuredClass("text", "text", book_domain, Cardinality::any(),
-	                         nullptr, nullptr, true, false);
-
-	// check the FieldDescriptors.
-	checkFieldDescriptor(book, {chapter, paragraph});
-	checkFieldDescriptor(chapter, {section, paragraph});
-	checkFieldDescriptor(section, {subsection, paragraph});
-	checkFieldDescriptor(subsection, {paragraph});
-	checkFieldDescriptor(paragraph, {text});
-	checkFieldDescriptor(
-	    text, {}, DEFAULT_FIELD_NAME, FieldDescriptor::FieldType::PRIMITIVE,
-	    env.project->getSystemTypesystem()->getStringType(), false);
-
-	// check parent handling using the headings domain.
-	Rooted<Node> headings_domain_node =
-	    env.parse("headings_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
-	ASSERT_FALSE(headings_domain_node == nullptr);
-	ASSERT_FALSE(logger.hasError());
-	Rooted<Domain> headings_domain = headings_domain_node.cast<Domain>();
-	// now there should be a heading struct.
-	Rooted<StructuredClass> heading =
-	    checkStructuredClass("heading", "heading", headings_domain, single,
-	                         nullptr, nullptr, true, false);
-	// which should be a reference to the paragraph descriptor.
-	checkFieldDescriptor(heading, paragraph, {text});
-	// and each struct in the book domain (except for text) should have a
-	// heading field now.
-	checkFieldDescriptor(book, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(chapter, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(section, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(subsection, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-	checkFieldDescriptor(paragraph, {heading}, "heading",
-	                     FieldDescriptor::FieldType::SUBTREE, nullptr, true);
-
-	// check annotation handling using the comments domain.
-	Rooted<Node> comments_domain_node =
-	    env.parse("comments_domain.oxm", "", "", RttiSet{&RttiTypes::Domain});
-	ASSERT_FALSE(comments_domain_node == nullptr);
-	ASSERT_FALSE(logger.hasError());
-	Rooted<Domain> comments_domain = comments_domain_node.cast<Domain>();
-	// now we should be able to find a comment annotation.
-	Rooted<AnnotationClass> comment_anno =
-	    checkAnnotationClass("comment", "comment", comments_domain);
-	// as well as a comment struct
-	Rooted<StructuredClass> comment =
-	    checkStructuredClass("comment", "comment", comments_domain);
-	// and a reply struct
-	Rooted<StructuredClass> reply =
-	    checkStructuredClass("reply", "reply", comments_domain);
-	// check the fields for each of them.
-	{
-		std::vector<Rooted<Descriptor>> descs{comment_anno, comment, reply};
-		for (auto &d : descs) {
-			checkFieldDescriptor(d, {paragraph}, "content",
-			                     FieldDescriptor::FieldType::SUBTREE, nullptr,
-			                     false);
-			checkFieldDescriptor(d, {reply}, "replies",
-			                     FieldDescriptor::FieldType::SUBTREE, nullptr,
-			                     false);
-		}
-	}
-	// paragraph should have comment as child now as well.
-	checkFieldDescriptor(paragraph, {text, comment});
-	// as should heading, because it references the paragraph default field.
-	checkFieldDescriptor(heading, paragraph, {text, comment});
-}
-
-TEST(XmlParser, documentParsing)
-{
-	XmlStandaloneEnvironment env(logger);
-	Rooted<Node> book_domain_node =
-	    env.parse("simple_book.oxd", "", "", RttiSet{&RttiTypes::Document});
-	//TODO: Check result
-}
-}
-
-- 
cgit v1.2.3


From 2659b4595d809cbd69a77e5ff7e2fc08d225f065 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:02:54 +0100
Subject: Tidied OsxmlEventParser up, implemented correct whitespace handling,
 started to write unit tests for the osxml parser

---
 CMakeLists.txt                              |  93 +++---
 src/core/common/Utils.hpp                   |  21 +-
 src/core/common/WhitespaceHandler.hpp       |  60 ++++
 src/formats/osxml/OsxmlAttributeLocator.cpp | 144 ++++++++++
 src/formats/osxml/OsxmlAttributeLocator.hpp |  67 +++++
 src/formats/osxml/OsxmlEventParser.cpp      | 425 +++++++++++++++-------------
 src/formats/osxml/OsxmlEventParser.hpp      |  44 +--
 test/formats/osml/OsmlStreamParserTest.cpp  |   1 +
 test/formats/osxml/OsxmlEventParserTest.cpp | 222 +++++++++++++++
 9 files changed, 811 insertions(+), 266 deletions(-)
 create mode 100644 src/formats/osxml/OsxmlAttributeLocator.cpp
 create mode 100644 src/formats/osxml/OsxmlAttributeLocator.hpp
 create mode 100644 test/formats/osxml/OsxmlEventParserTest.cpp

(limited to 'src/formats')

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6e3b90f..bdc9541 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -147,9 +147,9 @@ ADD_LIBRARY(ousia_core
 	src/core/model/RootNode
 	src/core/model/Style
 	src/core/model/Typesystem
-#	src/core/parser/Parser
-#	src/core/parser/ParserContext
-#	src/core/parser/ParserScope
+	src/core/parser/Parser
+	src/core/parser/ParserContext
+	src/core/parser/ParserScope
 #	src/core/parser/generic/ParserState
 #	src/core/parser/generic/ParserStateCallbacks
 #	src/core/parser/generic/ParserStateHandler
@@ -183,36 +183,37 @@ TARGET_LINK_LIBRARIES(ousia_osml
 	ousia_core
 )
 
-#ADD_LIBRARY(ousia_osxml
-#	src/formats/osxml/osxmlParser
-#)
+ADD_LIBRARY(ousia_osxml
+	src/formats/osxml/OsxmlAttributeLocator
+	src/formats/osxml/OsxmlEventParser
+)
 
-#TARGET_LINK_LIBRARIES(ousia_osxml
-#	ousia_core
-#	${EXPAT_LIBRARIES}
-#)
+TARGET_LINK_LIBRARIES(ousia_osxml
+	ousia_core
+	${EXPAT_LIBRARIES}
+)
 
 # Resource locators
 
-#ADD_LIBRARY(ousia_filesystem
-#	src/plugins/filesystem/FileLocator
-#	src/plugins/filesystem/SpecialPaths
-#)
+ADD_LIBRARY(ousia_filesystem
+	src/plugins/filesystem/FileLocator
+	src/plugins/filesystem/SpecialPaths
+)
 
-#TARGET_LINK_LIBRARIES(ousia_filesystem
-#	ousia_core
-#	${Boost_LIBRARIES}
-#)
+TARGET_LINK_LIBRARIES(ousia_filesystem
+	ousia_core
+	${Boost_LIBRARIES}
+)
 
 # Output libraries
 
-#ADD_LIBRARY(ousia_html
-#	src/plugins/html/DemoOutput
-#)
+ADD_LIBRARY(ousia_html
+	src/plugins/html/DemoOutput
+)
 
-#TARGET_LINK_LIBRARIES(ousia_html
-#	ousia_core
-#)
+TARGET_LINK_LIBRARIES(ousia_html
+	ousia_core
+)
 
 #ADD_LIBRARY(ousia_mozjs
 #	src/plugins/mozjs/MozJsScriptEngine
@@ -247,7 +248,7 @@ IF(TEST)
 
 	ADD_EXECUTABLE(ousia_test_core
 		test/core/RangeSetTest
-#		test/core/RegistryTest
+		test/core/RegistryTest
 		test/core/XMLTest
 		test/core/common/ArgumentTest
 		test/core/common/CharReaderTest
@@ -272,7 +273,7 @@ IF(TEST)
 		test/core/model/NodeTest
 		test/core/model/StyleTest
 		test/core/model/TypesystemTest
-#		test/core/parser/ParserScopeTest
+		test/core/parser/ParserScopeTest
 #		test/core/parser/ParserStackTest
 #		test/core/parser/ParserStateTest
 		test/core/parser/utils/TokenizerTest
@@ -311,15 +312,15 @@ IF(TEST)
 #		ousia_css
 #	)
 
-#	ADD_EXECUTABLE(ousia_test_html
-#		test/plugins/html/DemoOutputTest
-#	)
+	ADD_EXECUTABLE(ousia_test_html
+		test/plugins/html/DemoOutputTest
+	)
 
-#	TARGET_LINK_LIBRARIES(ousia_test_html
-#		${GTEST_LIBRARIES}
-#		ousia_core
-#		ousia_html
-#	)
+	TARGET_LINK_LIBRARIES(ousia_test_html
+		${GTEST_LIBRARIES}
+		ousia_core
+		ousia_html
+	)
 
 	ADD_EXECUTABLE(ousia_test_osml
 		test/formats/osml/OsmlStreamParserTest
@@ -331,16 +332,16 @@ IF(TEST)
 		ousia_osml
 	)
 
-#	ADD_EXECUTABLE(ousia_test_osxml
-#		test/plugins/xml/XmlParserTest
-#	)
+	ADD_EXECUTABLE(ousia_test_osxml
+		test/formats/osxml/OsxmlEventParserTest
+	)
 
-#	TARGET_LINK_LIBRARIES(ousia_test_osxml
-#		${GTEST_LIBRARIES}
-#		ousia_core
-#		ousia_osml
-#		ousia_filesystem
-#	)
+	TARGET_LINK_LIBRARIES(ousia_test_osxml
+		${GTEST_LIBRARIES}
+		ousia_core
+		ousia_osxml
+		ousia_filesystem
+	)
 
 #	ADD_EXECUTABLE(ousia_test_mozjs
 #		test/plugins/mozjs/MozJsScriptEngineTest
@@ -354,11 +355,11 @@ IF(TEST)
 
 	# Register the unit tests
 	ADD_TEST(ousia_test_core ousia_test_core)
-#	ADD_TEST(ousia_test_filesystem ousia_test_filesystem)
+	ADD_TEST(ousia_test_filesystem ousia_test_filesystem)
 #	ADD_TEST(ousia_test_css ousia_test_css)
-#	ADD_TEST(ousia_test_html ousia_test_html)
+	ADD_TEST(ousia_test_html ousia_test_html)
 	ADD_TEST(ousia_test_osml ousia_test_osml)
-#	ADD_TEST(ousia_test_osxml ousia_test_osxml)
+	ADD_TEST(ousia_test_osxml ousia_test_osxml)
 #	ADD_TEST(ousia_test_mozjs ousia_test_mozjs)
 ENDIF()
 
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 16a9136..8361973 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -119,9 +119,26 @@ public:
 	 */
 	template <class T, class Filter>
 	static std::pair<size_t, size_t> trim(const T &s, Filter f)
+	{
+		return trim(s, s.size(), f);
+	}
+
+	/**
+	 * Trims the given string or vector of chars by returning the start and end
+	 * index.
+	 *
+	 * @param s is the container that should be trimmed.
+	 * @param len is the number of elements in the container.
+	 * @param f is a function that returns true for values that should be
+	 * removed.
+	 * @return start and end index. Note that "end" points at the character
+	 * beyond the end, thus "end" minus "start"
+	 */
+	template <class T, class Filter>
+	static std::pair<size_t, size_t> trim(const T &s, size_t len, Filter f)
 	{
 		size_t start = 0;
-		for (size_t i = 0; i < s.size(); i++) {
+		for (size_t i = 0; i < len; i++) {
 			if (!f(s[i])) {
 				start = i;
 				break;
@@ -129,7 +146,7 @@ public:
 		}
 
 		size_t end = 0;
-		for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) {
+		for (ssize_t i = len - 1; i >= static_cast<ssize_t>(start); i--) {
 			if (!f(s[i])) {
 				end = i + 1;
 				break;
diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp
index 79e0518..ed52ea3 100644
--- a/src/core/common/WhitespaceHandler.hpp
+++ b/src/core/common/WhitespaceHandler.hpp
@@ -97,6 +97,25 @@ public:
 	 * @param end is the end byte offset of the given character.
 	 */
 	void append(char c, size_t start, size_t end)
+	{
+		append(c, start, end, textBuf, textStart, textEnd);
+	}
+
+	/**
+	 * Static version of PreservingWhitespaceHandler append
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 * @param textBuf is a reference at the text buffer that is to be used.
+	 * @param textStart is a reference at the text start variable that is to be
+	 * used.
+	 * @param textEnd is a reference at the text end variable that is to be
+	 * used.
+	 */
+	static void append(char c, size_t start, size_t end,
+	                   std::vector<char> &textBuf, size_t &textStart,
+	                   size_t &textEnd)
 	{
 		if (textBuf.empty()) {
 			textStart = start;
@@ -129,6 +148,27 @@ public:
 	 * @param end is the end byte offset of the given character.
 	 */
 	void append(char c, size_t start, size_t end)
+	{
+		append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf);
+	}
+
+	/**
+	 * Static version of TrimmingWhitespaceHandler append
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 * @param textBuf is a reference at the text buffer that is to be used.
+	 * @param textStart is a reference at the text start variable that is to be
+	 * used.
+	 * @param textEnd is a reference at the text end variable that is to be
+	 * used.
+	 * @param whitespaceBuf is a reference at the buffer for storing whitespace
+	 * characters.
+	 */
+	static void append(char c, size_t start, size_t end,
+	                   std::vector<char> &textBuf, size_t &textStart,
+	                   size_t &textEnd, std::vector<char> &whitespaceBuf)
 	{
 		// Handle whitespace characters
 		if (Utils::isWhitespace(c)) {
@@ -174,6 +214,26 @@ public:
 	 * @param end is the end byte offset of the given character.
 	 */
 	void append(char c, size_t start, size_t end)
+	{
+		append(c, start, end, textBuf, textStart, textEnd, hasWhitespace);
+	}
+
+	/**
+	 * Static version of CollapsingWhitespaceHandler append
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 * @param textBuf is a reference at the text buffer that is to be used.
+	 * @param textStart is a reference at the text start variable that is to be
+	 * used.
+	 * @param textEnd is a reference at the text end variable that is to be
+	 * used.
+	 * @param hasWhitespace is a reference at the "hasWhitespace" flag.
+	 */
+	static void append(char c, size_t start, size_t end,
+	                   std::vector<char> &textBuf, size_t &textStart,
+	                   size_t &textEnd, bool &hasWhitespace)
 	{
 		// Handle whitespace characters
 		if (Utils::isWhitespace(c)) {
diff --git a/src/formats/osxml/OsxmlAttributeLocator.cpp b/src/formats/osxml/OsxmlAttributeLocator.cpp
new file mode 100644
index 0000000..e37446a
--- /dev/null
+++ b/src/formats/osxml/OsxmlAttributeLocator.cpp
@@ -0,0 +1,144 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <core/common/Location.hpp>
+#include <core/common/CharReader.hpp>
+#include <core/common/Utils.hpp>
+
+#include "OsxmlAttributeLocator.hpp"
+
+namespace ousia {
+
+/**
+ * Enum used internally in the statemachine of the xml argument parser.
+ */
+enum class XmlAttributeState {
+	IN_TAG_NAME,
+	SEARCH_ATTR,
+	IN_ATTR_NAME,
+	HAS_ATTR_NAME,
+	HAS_ATTR_EQUALS,
+	IN_ATTR_DATA
+};
+
+std::map<std::string, SourceLocation> OsxmlAttributeLocator::locate(
+    CharReader &reader, size_t offs)
+{
+	std::map<std::string, SourceLocation> res;
+
+	// Fork the reader, we don't want to mess up the XML parsing process, do we?
+	CharReaderFork readerFork = reader.fork();
+
+	// Move the read cursor to the start location, abort if this does not work
+	if (offs != readerFork.seek(offs)) {
+		return res;
+	}
+
+	// Now all we need to do is to implement one half of an XML parser. As this
+	// is inherently complicated we'll totaly fail at it. Don't care. All we
+	// want to get is those darn offsets for pretty error messages... (and we
+	// can assume the XML is valid as it was already read by expat)
+	XmlAttributeState state = XmlAttributeState::IN_TAG_NAME;
+	char c;
+	std::stringstream attrName;
+	while (readerFork.read(c)) {
+		// Abort at the end of the tag
+		if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) {
+			return res;
+		}
+
+		// One state machine to rule them all, one state machine to find them,
+		// One state machine to bring them all and in the darkness bind them
+		// (the byte offsets)
+		switch (state) {
+			case XmlAttributeState::IN_TAG_NAME:
+				if (Utils::isWhitespace(c)) {
+					res.emplace("$tag",
+					            SourceLocation{reader.getSourceId(), offs + 1,
+					                           readerFork.getOffset() - 1});
+					state = XmlAttributeState::SEARCH_ATTR;
+				}
+				break;
+			case XmlAttributeState::SEARCH_ATTR:
+				if (!Utils::isWhitespace(c)) {
+					state = XmlAttributeState::IN_ATTR_NAME;
+					attrName << c;
+				}
+				break;
+			case XmlAttributeState::IN_ATTR_NAME:
+				if (Utils::isWhitespace(c)) {
+					state = XmlAttributeState::HAS_ATTR_NAME;
+				} else if (c == '=') {
+					state = XmlAttributeState::HAS_ATTR_EQUALS;
+				} else {
+					attrName << c;
+				}
+				break;
+			case XmlAttributeState::HAS_ATTR_NAME:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '=') {
+						state = XmlAttributeState::HAS_ATTR_EQUALS;
+						break;
+					}
+					// Well, this is a strange XML file... We expected to
+					// see a '=' here! Try to continue with the
+					// "HAS_ATTR_EQUALS" state as this state will hopefully
+					// inlcude some error recovery
+				} else {
+					// Skip whitespace here
+					break;
+				}
+			// Fallthrough
+			case XmlAttributeState::HAS_ATTR_EQUALS:
+				if (!Utils::isWhitespace(c)) {
+					if (c == '"') {
+						// Here we are! We have found the beginning of an
+						// attribute. Let's quickly lock the current offset away
+						// in the result map
+						res.emplace(attrName.str(),
+						            SourceLocation{reader.getSourceId(),
+						                           readerFork.getOffset()});
+						state = XmlAttributeState::IN_ATTR_DATA;
+					} else {
+						// No, this XML file is not well formed. Assume we're in
+						// an attribute name once again
+						attrName.str(std::string{&c, 1});
+						state = XmlAttributeState::IN_ATTR_NAME;
+					}
+				}
+				break;
+			case XmlAttributeState::IN_ATTR_DATA:
+				if (c == '"') {
+					// We're at the end of the attribute data, set the end
+					// location
+					auto it = res.find(attrName.str());
+					if (it != res.end()) {
+						it->second.setEnd(readerFork.getOffset() - 1);
+					}
+
+					// Reset the attribute name and restart the search
+					attrName.str(std::string{});
+					state = XmlAttributeState::SEARCH_ATTR;
+				}
+				break;
+		}
+	}
+	return res;
+}
+}
+
diff --git a/src/formats/osxml/OsxmlAttributeLocator.hpp b/src/formats/osxml/OsxmlAttributeLocator.hpp
new file mode 100644
index 0000000..f9a3437
--- /dev/null
+++ b/src/formats/osxml/OsxmlAttributeLocator.hpp
@@ -0,0 +1,67 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file OsxmlAttributeLocator.hpp
+ *
+ * Contains a class used for locating the byte offsets of the attributes given
+ * in a XML tag.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_
+#define _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_
+
+#include <map>
+
+namespace ousia {
+
+// Forward declarations
+class CharReader;
+class SourceLocation;
+
+/**
+ * Class containing one static function for locating the byte offsets of the
+ * attributes in a XML tag. This are not retrieved by our xml parser, so we have
+ * to do this manually.
+ */
+class OsxmlAttributeLocator {
+public:
+	/**
+	 * Function used to reconstruct the location of the attributes of a XML tag
+	 * in the source code. This is necessary, as the xml parser only returns an
+	 * offset to the begining of a tag and not to the position of the individual
+	 * arguments.
+	 *
+	 * @param reader is the char reader from which the character data should be
+	 * read.
+	 * @param offs is a byte offset in the xml file pointing at the "<"
+	 * character of the tag.
+	 * @return a map from attribute keys to the corresponding location
+	 * (including range) of the atribute. Also contains the location of the
+	 * tagname in the form of the virtual attribute "$tag".
+	 */
+	static std::map<std::string, SourceLocation> locate(CharReader &reader,
+	                                                    size_t offs);
+};
+
+}
+
+#endif /* _OUSIA_OSXML_ATTRIBUTE_LOCATOR_HPP_ */
+
diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
index 2ef170e..b4aff77 100644
--- a/src/formats/osxml/OsxmlEventParser.cpp
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -18,14 +18,22 @@
 
 #include <expat.h>
 
+#include <vector>
+
+#include <core/common/CharReader.hpp>
 #include <core/common/Logger.hpp>
 #include <core/common/Variant.hpp>
+#include <core/common/VariantReader.hpp>
 #include <core/common/Utils.hpp>
+#include <core/common/WhitespaceHandler.hpp>
 
+#include "OsxmlAttributeLocator.hpp"
 #include "OsxmlEventParser.hpp"
 
 namespace ousia {
 
+/* Class OsxmlEventParser */
+
 /**
  * Class containing data used by the internal functions.
  */
@@ -43,41 +51,75 @@ public:
 	 */
 	ssize_t annotationEndTagDepth;
 
+	/**
+	 * Current character data buffer.
+	 */
+	std::vector<char> textBuf;
+
+	/**
+	 * Current whitespace buffer (for the trimming whitspace mode)
+	 */
+	std::vector<char> whitespaceBuf;
+
+	/**
+	 * Flag indicating whether a whitespace character was present (for the
+	 * collapsing whitespace mode).
+	 */
+	bool hasWhitespace;
+
+	/**
+	 * Current character data start.
+	 */
+	size_t textStart;
+
+	/**
+	 * Current character data end.
+	 */
+	size_t textEnd;
+
 	/**
 	 * Default constructor.
 	 */
-	OsxmlEventParserData() : depth(0), annotationEndTagDepth(-1) {}
+	OsxmlEventParserData();
 
 	/**
 	 * Increments the depth.
 	 */
-	void incrDepth() { depth++; }
+	void incrDepth();
 
 	/**
 	 * Decrement the depth and reset the annotationEndTagDepth flag.
 	 */
-	void decrDepth()
-	{
-		if (depth > 0) {
-			depth--;
-		}
-		if (depth < annotationEndTagDepth) {
-			annotationEndTagDepth = -1;
-		}
-	}
+	void decrDepth();
 
 	/**
 	 * Returns true if we're currently inside an end tag.
 	 */
-	bool inAnnotationEndTag() { depth >= annotationEndTagDepth; }
+	bool inAnnotationEndTag();
+
+	/**
+	 * Returns true if character data is available.
+	 *
+	 * @return true if character data is available.
+	 */
+	bool hasText();
+
+	/**
+	 * Returns a Variant containing the character data and its location.
+	 *
+	 * @return a string variant containing the text data and the character
+	 * location.
+	 */
+	Variant getText(SourceId sourceId);
 };
 
-namespace {
+/* Class GuardedExpatXmlParser */
+
 /**
  * Wrapper class around the XML_Parser pointer which safely frees it whenever
  * the scope is left (e.g. because an exception was thrown).
  */
-class ScopedExpatXmlParser {
+class GuardedExpatXmlParser {
 private:
 	/**
 	 * Internal pointer to the XML_Parser instance.
@@ -86,14 +128,14 @@ private:
 
 public:
 	/**
-	 * Constructor of the ScopedExpatXmlParser class. Calls XML_ParserCreateNS
+	 * Constructor of the GuardedExpatXmlParser class. Calls XML_ParserCreateNS
 	 * from the expat library. Throws a parser exception if the XML parser
 	 * cannot be initialized.
 	 *
 	 * @param encoding is the protocol-defined encoding passed to expat (or
 	 * nullptr if expat should determine the encoding by itself).
 	 */
-	ScopedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
+	GuardedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
 	{
 		parser = XML_ParserCreate(encoding);
 		if (!parser) {
@@ -103,9 +145,9 @@ public:
 	}
 
 	/**
-	 * Destuctor of the ScopedExpatXmlParser, frees the XML parser instance.
+	 * Destuctor of the GuardedExpatXmlParser, frees the XML parser instance.
 	 */
-	~ScopedExpatXmlParser()
+	~GuardedExpatXmlParser()
 	{
 		if (parser) {
 			XML_ParserFree(parser);
@@ -120,134 +162,20 @@ public:
 };
 
 /**
- * Enum used internally in the statemachine of the micro-xml argument parser.
+ * Name of the special outer tag used for allowing multiple top-level elements
+ * in an xml file.
  */
-enum class XmlAttributeState {
-	IN_TAG_NAME,
-	SEARCH_ATTR,
-	IN_ATTR_NAME,
-	HAS_ATTR_NAME,
-	HAS_ATTR_EQUALS,
-	IN_ATTR_DATA
-};
+static const std::string TOP_LEVEL_TAG{"ousia"};
 
 /**
- * Function used to reconstruct the location of the attributes of a XML tag in
- * the source code. This is necessary, as the xml parser only returns an offset
- * to the begining of a tag and not to the position of the individual arguments.
- *
- * @param reader is the char reader from which the character data should be
- * read.
- * @param offs is a byte offset in the xml file pointing at the "<" character of
- * the tag.
- * @return a map from attribute keys to the corresponding location (including
- * range) of the atribute. Also contains the location of the tagname in the
- * form of the virtual attribute "$tag".
+ * Prefix used to indicate the start of an annoation (note the trailing colon)
  */
-static std::map<std::string, SourceLocation> xmlReconstructAttributeOffsets(
-    CharReader &reader, size_t offs)
-{
-	std::map<std::string, SourceLocation> res;
-
-	// Fork the reader, we don't want to mess up the XML parsing process, do we?
-	CharReaderFork readerFork = reader.fork();
-
-	// Move the read cursor to the start location, abort if this does not work
-	if (!location.isValid() || offs != readerFork.seek(offs)) {
-		return res;
-	}
-
-	// Now all we need to do is to implement one half of an XML parser. As this
-	// is inherently complicated we'll totaly fail at it. Don't care. All we
-	// want to get is those darn offsets for pretty error messages... (and we
-	// can assume the XML is valid as it was already read by expat)
-	XmlAttributeState state = XmlAttributeState::IN_TAG_NAME;
-	char c;
-	std::stringstream attrName;
-	while (readerFork.read(c)) {
-		// Abort at the end of the tag
-		if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) {
-			return res;
-		}
+static const std::string ANNOTATION_START_PREFIX{"a:start:"};
 
-		// One state machine to rule them all, one state machine to find them,
-		// One state machine to bring them all and in the darkness bind them
-		// (the byte offsets)
-		switch (state) {
-			case XmlAttributeState::IN_TAG_NAME:
-				if (Utils::isWhitespace(c)) {
-					res.emplace("$tag",
-					            SourceLocation{reader.getSourceId(), offs + 1,
-					                           readerFork.getOffset() - 1});
-					state = XmlAttributeState::SEARCH_ATTR;
-				}
-				break;
-			case XmlAttributeState::SEARCH_ATTR:
-				if (!Utils::isWhitespace(c)) {
-					state = XmlAttributeState::IN_ATTR_NAME;
-					attrName << c;
-				}
-				break;
-			case XmlAttributeState::IN_ATTR_NAME:
-				if (Utils::isWhitespace(c)) {
-					state = XmlAttributeState::HAS_ATTR_NAME;
-				} else if (c == '=') {
-					state = XmlAttributeState::HAS_ATTR_EQUALS;
-				} else {
-					attrName << c;
-				}
-				break;
-			case XmlAttributeState::HAS_ATTR_NAME:
-				if (!Utils::isWhitespace(c)) {
-					if (c == '=') {
-						state = XmlAttributeState::HAS_ATTR_EQUALS;
-						break;
-					}
-					// Well, this is a strange XML file... We expected to
-					// see a '=' here! Try to continue with the
-					// "HAS_ATTR_EQUALS" state as this state will hopefully
-					// inlcude some error recovery
-				} else {
-					// Skip whitespace here
-					break;
-				}
-			// Fallthrough
-			case XmlAttributeState::HAS_ATTR_EQUALS:
-				if (!Utils::isWhitespace(c)) {
-					if (c == '"') {
-						// Here we are! We have found the beginning of an
-						// attribute. Let's quickly lock the current offset away
-						// in the result map
-						res.emplace(attrName.str(),
-						            SourceLocation{reader.getSourceId(),
-						                           readerFork.getOffset()});
-						state = XmlAttributeState::IN_ATTR_DATA;
-					} else {
-						// No, this XML file is not well formed. Assume we're in
-						// an attribute name once again
-						attrName.str(std::string{&c, 1});
-						state = XmlAttributeState::IN_ATTR_NAME;
-					}
-				}
-				break;
-			case XmlAttributeState::IN_ATTR_DATA:
-				if (c == '"') {
-					// We're at the end of the attribute data, set the end
-					// location
-					auto it = res.find(attrName.str());
-					if (it != res.end()) {
-						it->second.setEnd(readerFork.getOffset() - 1);
-					}
-
-					// Reset the attribute name and restart the search
-					attrName.str(std::string{});
-					state = XmlAttributeState::SEARCH_ATTR;
-				}
-				break;
-		}
-	}
-	return res;
-}
+/**
+ * Prefix used to indicate the end of an annotation.
+ */
+static const std::string ANNOTATION_END_PREFIX{"a:end"};
 
 /**
  * Synchronizes the position of the xml parser with the default location of the
@@ -268,22 +196,12 @@ static SourceLocation xmlSyncLoggerPosition(XML_Parser p, size_t len = 0)
 	size_t offs = XML_GetCurrentByteIndex(p);
 	SourceLocation loc =
 	    SourceLocation{parser->getReader().getSourceId(), offs, offs + len};
-	parser->getLogger().setDefaultLocation(location);
+	parser->getLogger().setDefaultLocation(loc);
 
 	// Return the fetched location
 	return loc;
 }
 
-/**
- * Prefix used to indicate the start of an annoation,
- */
-static const std::string ANNOTATION_START_PREFIX{"a:start:"};
-
-/**
- * Prefix used to indicate the end of an annotation.
- */
-static const std::string ANNOTATION_END_PREFIX{"a:end"};
-
 /**
  * Callback called by eXpat whenever a start handler is reached.
  */
@@ -292,14 +210,21 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 {
 	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
 	XML_Parser p = static_cast<XML_Parser>(ref);
-	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
+	OsxmlEventParser *parser =
+	    static_cast<OsxmlEventParser *>(XML_GetUserData(p));
+
+	// If there is any text data in the buffer, issue that first
+	if (parser->getData().hasText()) {
+		parser->getEvents().data(
+		    parser->getData().getText(parser->getReader().getSourceId()));
+	}
 
 	// Read the argument locations -- this is only a stupid and slow hack,
 	// but it is necessary, as expat doesn't give use the byte offset of the
 	// arguments.
 	std::map<std::string, SourceLocation> attributeOffsets =
-	    xmlReconstructXMLAttributeOffsets(*userData->reader,
-	                                      XML_GetCurrentByteIndex(p));
+	    OsxmlAttributeLocator::locate(parser->getReader(),
+	                                  XML_GetCurrentByteIndex(p));
 
 	// Update the logger position
 	SourceLocation loc = xmlSyncLoggerPosition(p);
@@ -316,7 +241,8 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 	// Make sure we're currently not inside an annotation end tag -- this would
 	// be highly illegal!
 	if (parser->getData().inAnnotationEndTag()) {
-		logger.error("No tags allowed inside an annotation end tag", nameLoc);
+		parser->getLogger().error(
+		    "No tags allowed inside an annotation end tag", nameLoc);
 		return;
 	}
 
@@ -336,36 +262,33 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 
 		// Parse the string, pass the location of the key
 		std::pair<bool, Variant> value = VariantReader::parseGenericString(
-		    *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(),
+		    *(attr++), parser->getLogger(), keyLoc.getSourceId(),
 		    keyLoc.getStart());
 
 		// Set the overall location of the parsed element to the attribute
 		// location
-		value.second->setLocation(keyLoc);
-
-		// Store the
-		if (!args.emplace(key, value.second).second) {
-			parser->getLogger().warning(
-			    std::string("Attribute \"") + key +
-			        "\" defined multiple times, only using first definition",
-			    keyLoc);
-		}
+		value.second.setLocation(keyLoc);
+
+		// Store the keys in the map
+		args.emplace(key, value.second).second;
 	}
 
 	// Fetch the name of the tag, check for special tags
 	std::string nameStr(name);
-	if (nameStr == "ousia" && parser->getData().depth == 1) {
-		// We're in the top-level and the magic "ousia" tag is reached -- just
+	if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 1) {
+		// We're in the top-level and the magic tag is reached -- just
 		// ignore it and issue a warning for each argument that has been given
 		for (const auto &arg : args) {
-			parser->getLogger().warning(
-			    std::string("Ignoring attribute \"") + arg.first +
-			        std::string("\" for magic tag \"ousia\""),
-			    arg.second);
+			parser->getLogger().warning(std::string("Ignoring attribute \"") +
+			                                arg.first +
+			                                std::string("\" for magic tag \"") +
+			                                TOP_LEVEL_TAG + std::string("\""),
+			                            arg.second);
 		}
 	} else if (Utils::startsWith(nameStr, ANNOTATION_START_PREFIX)) {
 		// Assemble a name variant containing the name minus the prefix
-		Variant nameVar = nameStr.substr(ANNOTATION_START_PREFIX.size());
+		Variant nameVar =
+		    Variant::fromString(nameStr.substr(ANNOTATION_START_PREFIX.size()));
 		nameVar.setLocation(nameLoc);
 
 		// Issue the "annotationStart" event
@@ -410,25 +333,34 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 	}
 }
 
-static void xmlEndElementHandler(void *p, const XML_Char *name)
+static void xmlEndElementHandler(void *ref, const XML_Char *name)
 {
 	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
 	XML_Parser p = static_cast<XML_Parser>(ref);
-	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
+	OsxmlEventParser *parser =
+	    static_cast<OsxmlEventParser *>(XML_GetUserData(p));
 
 	// Synchronize the position of the logger with teh position
-	xmlSyncLoggerPosition(parser);
-
-	// Decrement the current depth
-	parser->getData().decrDepth();
+	xmlSyncLoggerPosition(p);
 
 	// Abort as long as we're in an annotation end tag
 	if (parser->getData().inAnnotationEndTag()) {
+		parser->getData().decrDepth();
 		return;
 	}
 
+	// Decrement the current depth
+	parser->getData().decrDepth();
+
+	// If there is any text data in the buffer, issue that first
+	if (parser->getData().hasText()) {
+		parser->getEvents().data(
+		    parser->getData().getText(parser->getReader().getSourceId()));
+	}
+
 	// Abort if the special ousia tag ends here
-	if (nameStr == "ousia" && parser->getData().depth == 0) {
+	std::string nameStr{name};
+	if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 0) {
 		return;
 	}
 
@@ -436,20 +368,105 @@ static void xmlEndElementHandler(void *p, const XML_Char *name)
 	parser->getEvents().fieldEnd();
 }
 
-static void xmlCharacterDataHandler(void *p, const XML_Char *s, int len)
+static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len)
 {
 	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
 	XML_Parser p = static_cast<XML_Parser>(ref);
-	OsxmlEventParser *parser = static_cast<XMLUserData *>(XML_GetUserData(p));
-
-	// TODO
-/*	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
-	syncLoggerPosition(parser, ulen);
-	const std::string data = Utils::trim(std::string{s, ulen});
-	if (!data.empty()) {
-		stack->data(data);
-	}*/
+	OsxmlEventParser *parser =
+	    static_cast<OsxmlEventParser *>(XML_GetUserData(p));
+
+	// Abort as long as we're in an annotation end tag
+	if (parser->getData().inAnnotationEndTag()) {
+		return;
+	}
+
+	// Convert the signed (smell the 90's C library here?) length to an usigned
+	// value
+	size_t ulen = len > 0 ? static_cast<size_t>(len) : 0;
+
+	// Synchronize the logger position
+	SourceLocation loc = xmlSyncLoggerPosition(p, ulen);
+
+	// Fetch some variables for convenience
+	const WhitespaceMode mode = parser->getWhitespaceMode();
+	OsxmlEventParserData &data = parser->getData();
+	std::vector<char> &textBuf = data.textBuf;
+	std::vector<char> &whitespaceBuf = data.whitespaceBuf;
+	bool &hasWhitespace = data.hasWhitespace;
+	size_t &textStart = data.textStart;
+	size_t &textEnd = data.textEnd;
+
+	size_t pos = loc.getStart();
+	for (size_t i = 0; i < ulen; i++, pos++) {
+		switch (mode) {
+			case WhitespaceMode::PRESERVE:
+				PreservingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
+				                                    textStart, textEnd);
+				break;
+			case WhitespaceMode::TRIM:
+				TrimmingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
+				                                  textStart, textEnd,
+				                                  whitespaceBuf);
+				break;
+			case WhitespaceMode::COLLAPSE:
+				CollapsingWhitespaceHandler::append(s[i], pos, pos + 1, textBuf,
+				                                    textStart, textEnd,
+				                                    hasWhitespace);
+				break;
+		}
+	}
+}
+
+/* Class OsxmlEvents */
+
+OsxmlEvents::~OsxmlEvents() {}
+
+/* Class OsxmlEventParser */
+
+OsxmlEventParserData::OsxmlEventParserData()
+    : depth(0),
+      annotationEndTagDepth(-1),
+      hasWhitespace(false),
+      textStart(0),
+      textEnd(0)
+{
+}
+
+void OsxmlEventParserData::incrDepth() { depth++; }
+
+void OsxmlEventParserData::decrDepth()
+{
+	if (depth > 0) {
+		depth--;
+	}
+	if (depth < annotationEndTagDepth) {
+		annotationEndTagDepth = -1;
+	}
+}
+
+bool OsxmlEventParserData::inAnnotationEndTag()
+{
+	return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth);
 }
+
+bool OsxmlEventParserData::hasText() { return !textBuf.empty(); }
+
+Variant OsxmlEventParserData::getText(SourceId sourceId)
+{
+	// Create a variant containing the string data and the location
+	Variant var =
+	    Variant::fromString(std::string{textBuf.data(), textBuf.size()});
+	var.setLocation({sourceId, textStart, textEnd});
+
+	// Reset the text buffers
+	textBuf.clear();
+	whitespaceBuf.clear();
+	hasWhitespace = false;
+	textStart = 0;
+	textEnd = 0;
+
+	// Return the variant
+	return var;
 }
 
 /* Class OsxmlEventParser */
@@ -459,21 +476,22 @@ OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events,
     : reader(reader),
       events(events),
       logger(logger),
-      whitespaceMode(WhitespaceMode::COLLAPSE),
+      whitespaceMode(WhitespaceMode::TRIM),
       data(new OsxmlEventParserData())
 {
 }
 
-void OsxmlEventParser::parse(CharReader &reader)
+OsxmlEventParser::~OsxmlEventParser() {}
+
+void OsxmlEventParser::parse()
 {
 	// Create the parser object
-	ScopedExpatXmlParser p{"UTF-8"};
+	GuardedExpatXmlParser p{"UTF-8"};
 
 	// Reset the depth
-	depth = 0;
+	data->depth = 0;
 
-	// Pass the reference to the ParserStack to the XML handler
-	XMLUserData data(&stack, &reader);
+	// Pass the reference to this parser instance to the XML handler
 	XML_SetUserData(&p, this);
 	XML_UseParserAsHandlerArg(&p);
 
@@ -498,7 +516,7 @@ void OsxmlEventParser::parse(CharReader &reader)
 		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) {
 			throw LoggableException{
 			    "XML: " + std::string{XML_ErrorString(XML_GetErrorCode(&p))},
-			    xmlSyncLoggerPosition(p)};
+			    xmlSyncLoggerPosition(&p)};
 		}
 
 		// Abort once there are no more bytes in the stream
@@ -513,12 +531,17 @@ void OsxmlEventParser::setWhitespaceMode(WhitespaceMode whitespaceMode)
 	this->whitespaceMode = whitespaceMode;
 }
 
-CharReader &OsxmlEventParser::getCharReader() { return charReader; }
+WhitespaceMode OsxmlEventParser::getWhitespaceMode() const
+{
+	return whitespaceMode;
+}
+
+CharReader &OsxmlEventParser::getReader() const { return reader; }
 
-Logger &OsxmlEventParser::getLogger() { return logger; }
+Logger &OsxmlEventParser::getLogger() const { return logger; }
 
-OsxmlEvents &OsxmlEventParser::getEvents() { return events; }
+OsxmlEvents &OsxmlEventParser::getEvents() const { return events; }
 
-OsxmlEventParserData &OsxmlEventParser::getData() { return *data; }
+OsxmlEventParserData &OsxmlEventParser::getData() const { return *data; }
 }
 
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
index 5319ca6..aa20ea9 100644
--- a/src/formats/osxml/OsxmlEventParser.hpp
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -42,7 +42,7 @@ class Variant;
 class OsxmlEventParserData;
 
 /**
- * Interface which defines the callback functions which are called by the 
+ * Interface which defines the callback functions which are called by the
  * OsxmlEventParser whenever an event occurs.
  */
 class OsxmlEvents {
@@ -50,13 +50,13 @@ public:
 	/**
 	 * Virtual destructor.
 	 */
-	virtual ~OsxmlEvents() {}
+	virtual ~OsxmlEvents();
 
 	/**
 	 * Called whenever a command starts. Note that this implicitly always starts
 	 * the default field of the command.
 	 *
-	 * @param name is a string variant containing name and location of the 
+	 * @param name is a string variant containing name and location of the
 	 * command.
 	 * @param args is a map variant containing the arguments that were given
 	 * to the command.
@@ -67,12 +67,12 @@ public:
 	 * Called whenever an annotation starts. Note that this implicitly always
 	 * starts the default field of the annotation.
 	 *
-	 * @param name is a string variant containing the name of the annotation 
+	 * @param name is a string variant containing the name of the annotation
 	 * class and the location of the annotation definition.
 	 * @param args is a map variant containing the arguments that were given
 	 * to the annotation definition.
 	 */
-	virtual void annotationStart(Variant name, Variant args);
+	virtual void annotationStart(Variant name, Variant args) = 0;
 
 	/**
 	 * Called whenever the range of an annotation ends. The callee must
@@ -85,12 +85,12 @@ public:
 	 * ended here. May be empty (or nullptr), if no elementName has been
 	 * specified at the end of the annotation.
 	 */
-	virtual void annotationEnd(Variant name, Variant elementName);
+	virtual void annotationEnd(Variant name, Variant elementName) = 0;
 
 	/**
-	 * Called whenever the default field which was implicitly started by 
+	 * Called whenever the default field which was implicitly started by
 	 * commandStart or annotationStart ends. Note that this does not end the
-	 * range of an annotation, but the default field of the annotation. To 
+	 * range of an annotation, but the default field of the annotation. To
 	 * signal the end of the annotation this, the annotationEnd method will be
 	 * invoked.
 	 */
@@ -102,11 +102,10 @@ public:
 	 * is not called if the parsing failed, the parser prints an error message
 	 * instead.
 	 *
-	 * @param data is the already parsed data that should be passed to the 
+	 * @param data is the already parsed data that should be passed to the
 	 * handler.
 	 */
 	virtual void data(Variant data) = 0;
-
 };
 
 /**
@@ -148,7 +147,7 @@ public:
 	 * Constructor fo the OsxmlEventParser. Takes a reference at the OsxmlEvents
 	 * of which the callback functions are called.
 	 *
-	 * @param reader is a reference to the CharReader instance from which the 
+	 * @param reader is a reference to the CharReader instance from which the
 	 * XML should be read.
 	 * @param events is a refence at an instance of the OsxmlEvents class. All
 	 * events are forwarded to this class.
@@ -157,6 +156,11 @@ public:
 	 */
 	OsxmlEventParser(CharReader &reader, OsxmlEvents &events, Logger &logger);
 
+	/**
+	 * Destructor of OsxmlEventParser (needed for unique_ptr to incomplete type)
+	 */
+	~OsxmlEventParser();
+
 	/**
 	 * Performs the actual parsing. Reads the XML using eXpat and calles the
 	 * callbacks in the event listener instance whenever something interesting
@@ -167,38 +171,44 @@ public:
 	/**
 	 * Sets the whitespace handling mode.
 	 *
-	 * @param whitespaceMode defines how whitespace in the data should be 
+	 * @param whitespaceMode defines how whitespace in the data should be
 	 * handled.
 	 */
 	void setWhitespaceMode(WhitespaceMode whitespaceMode);
 
+	/**
+	 * Returns the current whitespace handling mode.
+	 *
+	 * @return the currently set whitespace handling mode.
+	 */
+	WhitespaceMode getWhitespaceMode() const;
+
 	/**
 	 * Returns the internal CharReader reference.
 	 *
 	 * @return the CharReader reference.
 	 */
-	CharReader &getCharReader();
+	CharReader &getReader() const;
 
 	/**
 	 * Returns the internal Logger reference.
 	 *
 	 * @return the internal Logger reference.
 	 */
-	Logger &getLogger();
+	Logger &getLogger() const;
 
 	/**
 	 * Returns the internal OsxmlEvents reference.
 	 *
 	 * @return the internal OsxmlEvents reference.
 	 */
-	OsxmlEvents &getEvents();
+	OsxmlEvents &getEvents() const;
 
 	/**
 	 * Returns a reference at the internal data.
 	 */
-	OsxmlEventParserData &getData();
+	OsxmlEventParserData &getData() const;
 };
-
 }
 
 #endif /* _OSXML_EVENT_PARSER_HPP_ */
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index e5eff05..b944af8 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -28,6 +28,7 @@
 namespace ousia {
 
 static TerminalLogger logger(std::cerr, true);
+//static ConcreteLogger logger;
 
 TEST(OsmlStreamParser, empty)
 {
diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp
new file mode 100644
index 0000000..06c800f
--- /dev/null
+++ b/test/formats/osxml/OsxmlEventParserTest.cpp
@@ -0,0 +1,222 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/frontend/TerminalLogger.hpp>
+#include <core/common/CharReader.hpp>
+#include <core/common/Variant.hpp>
+
+#include <formats/osxml/OsxmlEventParser.hpp>
+
+namespace ousia {
+
+static TerminalLogger logger(std::cerr, true);
+// static ConcreteLogger logger;
+
+namespace {
+enum class OsxmlEvent {
+	COMMAND_START,
+	ANNOTATION_START,
+	ANNOTATION_END,
+	FIELD_END,
+	DATA
+};
+
+class TestOsxmlEventListener : public OsxmlEvents {
+public:
+	std::vector<std::pair<OsxmlEvent, Variant>> events;
+
+	void commandStart(Variant name, Variant args) override
+	{
+		events.emplace_back(OsxmlEvent::COMMAND_START,
+		                    Variant::arrayType{name, args});
+	}
+
+	void annotationStart(Variant name, Variant args) override
+	{
+		events.emplace_back(OsxmlEvent::ANNOTATION_START,
+		                    Variant::arrayType{name, args});
+	}
+
+	void annotationEnd(Variant name, Variant elementName) override
+	{
+		events.emplace_back(OsxmlEvent::ANNOTATION_END,
+		                    Variant::arrayType{name, elementName});
+	}
+
+	void fieldEnd() override
+	{
+		events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{});
+	}
+
+	void data(Variant data) override
+	{
+		events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data});
+	}
+};
+
+static std::vector<std::pair<OsxmlEvent, Variant>> parseXml(
+    const char *testString,
+    WhitespaceMode whitespaceMode = WhitespaceMode::TRIM)
+{
+	TestOsxmlEventListener listener;
+	CharReader reader(testString);
+	OsxmlEventParser parser(reader, listener, logger);
+	parser.setWhitespaceMode(whitespaceMode);
+	parser.parse();
+	return listener.events;
+}
+}
+
+TEST(OsxmlEventParser, simpleCommandWithArgs)
+{
+	const char *testString = "<a name=\"test\" a=\"1\" b=\"2\" c=\"blub\"/>";
+	//                        01234567 89012 3456 78 9012 34 5678 90123 456
+	//                        0          1            2            3
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{
+	         "a", Variant::mapType{
+	                  {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString);
+	ASSERT_EQ(expectedEvents, events);
+
+	// Check the locations (I'll do this one time and then just assume it works)
+	ASSERT_EQ(1U, events[0].second.asArray()[0].getLocation().getStart());
+	ASSERT_EQ(2U, events[0].second.asArray()[0].getLocation().getEnd());
+	ASSERT_EQ(
+	    9U,
+	    events[0].second.asArray()[1].asMap()["name"].getLocation().getStart());
+	ASSERT_EQ(
+	    13U,
+	    events[0].second.asArray()[1].asMap()["name"].getLocation().getEnd());
+	ASSERT_EQ(
+	    18U,
+	    events[0].second.asArray()[1].asMap()["a"].getLocation().getStart());
+	ASSERT_EQ(
+	    19U, events[0].second.asArray()[1].asMap()["a"].getLocation().getEnd());
+	ASSERT_EQ(
+	    24U,
+	    events[0].second.asArray()[1].asMap()["b"].getLocation().getStart());
+	ASSERT_EQ(
+	    25U, events[0].second.asArray()[1].asMap()["b"].getLocation().getEnd());
+	ASSERT_EQ(
+	    30U,
+	    events[0].second.asArray()[1].asMap()["c"].getLocation().getStart());
+	ASSERT_EQ(
+	    34U, events[0].second.asArray()[1].asMap()["c"].getLocation().getEnd());
+}
+
+TEST(OsxmlEventParser, magicTopLevelTag)
+{
+	const char *testString = "<ousia><a/><b/></ousia>";
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"a", Variant::mapType{}}}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}},
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"b", Variant::mapType{}}}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString);
+	ASSERT_EQ(expectedEvents, events);
+}
+
+TEST(OsxmlEventParser, magicTopLevelTagInside)
+{
+	const char *testString = "<a><ousia/></a>";
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"a", Variant::mapType{}}}},
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{{"ousia", Variant::mapType{}}}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString);
+	ASSERT_EQ(expectedEvents, events);
+}
+
+TEST(OsxmlEventParser, commandWithDataPreserveWhitespace)
+{
+	const char *testString = "<a>  hello  \n world </a>";
+	//                        012345678901 234567890123
+	//                        0         1          2
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::DATA, Variant::arrayType{"  hello  \n world "}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString, WhitespaceMode::PRESERVE);
+	ASSERT_EQ(expectedEvents, events);
+
+	// Check the location of the text
+	ASSERT_EQ(3U, events[1].second.asArray()[0].getLocation().getStart());
+	ASSERT_EQ(20U, events[1].second.asArray()[0].getLocation().getEnd());
+}
+
+TEST(OsxmlEventParser, commandWithDataTrimWhitespace)
+{
+	const char *testString = "<a>  hello  \n world </a>";
+	//                        012345678901 234567890123
+	//                        0         1          2
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::DATA, Variant::arrayType{"hello  \n world"}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString, WhitespaceMode::TRIM);
+	ASSERT_EQ(expectedEvents, events);
+
+	// Check the location of the text
+	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart());
+	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd());
+}
+
+TEST(OsxmlEventParser, commandWithDataCollapseWhitespace)
+{
+	const char *testString = "<a>  hello  \n world </a>";
+	//                        012345678901 234567890123
+	//                        0         1          2
+
+	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
+	    {OsxmlEvent::COMMAND_START,
+	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::DATA, Variant::arrayType{"hello world"}},
+	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
+
+	auto events = parseXml(testString, WhitespaceMode::COLLAPSE);
+	ASSERT_EQ(expectedEvents, events);
+
+	// Check the location of the text
+	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart());
+	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd());
+}
+
+}
+
-- 
cgit v1.2.3


From 9b4cdfabf6527440d6ffa499cc6b57a44daaeadb Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:05:42 +0100
Subject: Added code for the handling of explicit default fields and improved
 unit tests

---
 CMakeLists.txt                             |  16 +-
 src/formats/osml/OsmlStreamParser.cpp      |  78 +++++--
 src/formats/osml/OsmlStreamParser.hpp      |  45 +++-
 test/formats/osml/OsmlStreamParserTest.cpp | 340 +++++++++++++++++------------
 4 files changed, 302 insertions(+), 177 deletions(-)

(limited to 'src/formats')

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bdc9541..d311f7a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -290,15 +290,15 @@ IF(TEST)
 		ousia_core
 	)
 
-#	ADD_EXECUTABLE(ousia_test_filesystem
-#		test/plugins/filesystem/FileLocatorTest
-#	)
+	ADD_EXECUTABLE(ousia_test_filesystem
+		test/plugins/filesystem/FileLocatorTest
+	)
 
-#	TARGET_LINK_LIBRARIES(ousia_test_filesystem
-#		${GTEST_LIBRARIES}
-#		ousia_core
-#		ousia_filesystem
-#	)
+	TARGET_LINK_LIBRARIES(ousia_test_filesystem
+		${GTEST_LIBRARIES}
+		ousia_core
+		ousia_filesystem
+	)
 
 #	ADD_EXECUTABLE(ousia_test_css
 #		test/plugins/css/Tokenizer
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 6b00eef..6606120 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -60,6 +60,11 @@ public:
 	 */
 	TokenTypeId FieldEnd;
 
+	/**
+	 * Id of the default field start token.
+	 */
+	TokenTypeId DefaultFieldStart;
+
 	/**
 	 * Registers the plain format tokens in the internal tokenizer.
 	 */
@@ -71,6 +76,7 @@ public:
 		BlockCommentEnd = registerToken("}%");
 		FieldStart = registerToken("{");
 		FieldEnd = registerToken("}");
+		DefaultFieldStart = registerToken("{!");
 	}
 };
 
@@ -164,7 +170,7 @@ OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
     : reader(reader), logger(logger), tokenizer(Tokens)
 {
 	// Place an intial command representing the complete file on the stack
-	commands.push(Command{"", Variant::mapType{}, true, true, true});
+	commands.push(Command{"", Variant::mapType{}, true, true, true, false});
 }
 
 Variant OsmlStreamParser::parseIdentifier(size_t start, bool allowNSSep)
@@ -365,7 +371,7 @@ void OsmlStreamParser::pushCommand(Variant commandName,
 		commands.pop();
 	}
 	commands.push(Command{std::move(commandName), std::move(commandArguments),
-	                      hasRange, false, false});
+	                      hasRange, false, false, false});
 }
 
 OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
@@ -482,6 +488,29 @@ bool OsmlStreamParser::checkIssueFieldStart()
 	return false;
 }
 
+bool OsmlStreamParser::closeField()
+{
+	// Try to end an open field of the current command -- if the current command
+	// is not inside an open field, end this command and try to close the next
+	// one
+	for (int i = 0; i < 2 && commands.size() > 1; i++) {
+		Command &cmd = commands.top();
+		if (!cmd.inRangeField) {
+			if (cmd.inField) {
+				cmd.inField = false;
+				if (cmd.inDefaultField) {
+					commands.pop();
+				}
+				return true;
+			}
+			commands.pop();
+		} else {
+			return false;
+		}
+	}
+	return false;
+}
+
 OsmlStreamParser::State OsmlStreamParser::parse()
 {
 	// Handler for incomming data
@@ -579,27 +608,29 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			}
 			logger.error(
 			    "Got field start token \"{\", but no command for which to "
-			    "start the field. Did you mean \"\\{\"?",
+			    "start the field. Write \"\\{\" to insert this sequence as "
+			    "text.",
 			    token);
 		} else if (token.type == Tokens.FieldEnd) {
-			// Try to end an open field of the current command -- if the current
-			// command is not inside an open field, end this command and try to
-			// close the next one
-			for (int i = 0; i < 2 && commands.size() > 1; i++) {
-				Command &cmd = commands.top();
-				if (!cmd.inRangeField) {
-					if (cmd.inField) {
-						cmd.inField = false;
-						return State::FIELD_END;
-					}
-					commands.pop();
-				} else {
-					break;
-				}
+			if (closeField()) {
+				return State::FIELD_END;
+			}
+			logger.error(
+			    "Got field end token \"}\", but there is no field to end. "
+			    "Write \"\\}\" to insert this sequence as text.",
+			    token);
+		} else if (token.type == Tokens.DefaultFieldStart) {
+			// Try to start a default field the first time the token is reached
+			Command &topCmd = commands.top();
+			if (!topCmd.inField) {
+				topCmd.inField = true;
+				topCmd.inDefaultField = true;
+				return State::FIELD_START;
 			}
 			logger.error(
-			    "Got field end token \"}\", but there is no field to end. Did "
-			    "you mean \"\\}\"?",
+			    "Got default field start token \"{!\", but no command for "
+			    "which to start the field. Write \"\\{!\" to insert this "
+			    "sequence as text",
 			    token);
 		} else {
 			logger.error("Unexpected token \"" + token.content + "\"", token);
@@ -627,14 +658,19 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 	return State::END;
 }
 
-const Variant &OsmlStreamParser::getCommandName()
+const Variant &OsmlStreamParser::getCommandName() const
 {
 	return commands.top().name;
 }
 
-const Variant &OsmlStreamParser::getCommandArguments()
+const Variant &OsmlStreamParser::getCommandArguments() const
 {
 	return commands.top().arguments;
 }
+
+bool OsmlStreamParser::inDefaultField() const
+{
+	return commands.top().inRangeField || commands.top().inDefaultField;
+}
 }
 
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index 1508012..bb5db65 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -152,10 +152,16 @@ public:
 		 */
 		bool inRangeField;
 
+		/**
+		 * Set to true if we are currently in a field that has been especially
+		 * marked as default field (using the "|") syntax.
+		 */
+		bool inDefaultField;
+
 		/**
 		 * Default constructor.
 		 */
-		Command() : hasRange(false), inField(false), inRangeField(false) {}
+		Command() : hasRange(false), inField(false), inRangeField(false), inDefaultField() {}
 
 		/**
 		 * Constructor of the Command class.
@@ -168,16 +174,19 @@ public:
 		 * explicit range.
 		 * @param inField is set to true if we currently are inside a field
 		 * of this command.
-		 * @param inRangeField is set to true if we currently inside the outer
-		 * field of the command.
+		 * @param inRangeField is set to true if we currently are inside the
+		 * outer field of a ranged command.
+		 * @param inDefaultField is set to true if we currently are in a
+		 * specially marked default field.
 		 */
 		Command(Variant name, Variant arguments, bool hasRange, bool inField,
-		        bool inRangeField)
+		        bool inRangeField, bool inDefaultField)
 		    : name(std::move(name)),
 		      arguments(std::move(arguments)),
 		      hasRange(hasRange),
 		      inField(inField),
-		      inRangeField(inRangeField)
+		      inRangeField(inRangeField),
+		      inDefaultField(inDefaultField)
 		{
 		}
 	};
@@ -289,6 +298,16 @@ private:
 	 */
 	bool checkIssueFieldStart();
 
+	/**
+	 * Closes a currently open field. Note that the command will be removed from
+	 * the internal command stack if the field that is being closed is a
+	 * field marked as default field.
+	 *
+	 * @return true if the field could be closed, false if there was no field
+	 * to close.
+	 */
+	bool closeField();
+
 public:
 	/**
 	 * Constructor of the OsmlStreamParser class. Attaches the new
@@ -317,7 +336,7 @@ public:
 	 * @return a reference at a variant containing the data parsed by the
 	 * "parse" function.
 	 */
-	const Variant &getData() { return data; }
+	const Variant &getData() const { return data; }
 
 	/**
 	 * Returns a reference at the internally stored command name. Only valid if
@@ -326,7 +345,7 @@ public:
 	 * @return a reference at a variant containing name and location of the
 	 * parsed command.
 	 */
-	const Variant &getCommandName();
+	const Variant &getCommandName() const;
 
 	/**
 	 * Returns a reference at the internally stored command name. Only valid if
@@ -335,14 +354,22 @@ public:
 	 * @return a reference at a variant containing arguments given to the
 	 * command.
 	 */
-	const Variant &getCommandArguments();
+	const Variant &getCommandArguments() const;
+
+	/**
+	 * Returns true if the current field is the "default" field. This is true if
+	 * the parser either is in the outer range of a range command or inside a
+	 * field that has been especially marked as "default" field (using the "|"
+	 * syntax).
+	 */
+	bool inDefaultField() const;
 
 	/**
 	 * Returns a reference at the char reader.
 	 *
 	 * @return the last internal token location.
 	 */
-	SourceLocation &getLocation() { return location; }
+	const SourceLocation &getLocation() const { return location; }
 };
 }
 
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index b944af8..da9fe8a 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -28,7 +28,88 @@
 namespace ousia {
 
 static TerminalLogger logger(std::cerr, true);
-//static ConcreteLogger logger;
+// static ConcreteLogger logger;
+
+static void assertCommand(OsmlStreamParser &reader, const std::string &name,
+                          SourceOffset start = InvalidSourceOffset,
+                          SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
+	EXPECT_EQ(name, reader.getCommandName().asString());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertCommand(OsmlStreamParser &reader, const std::string &name,
+                          const Variant::mapType &args,
+                          SourceOffset start = InvalidSourceOffset,
+                          SourceOffset end = InvalidSourceOffset)
+{
+	assertCommand(reader, name, start, end);
+	EXPECT_EQ(args, reader.getCommandArguments());
+}
+
+static void assertData(OsmlStreamParser &reader, const std::string &data,
+                       SourceOffset start = InvalidSourceOffset,
+                       SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
+	EXPECT_EQ(data, reader.getData().asString());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getData().getLocation().getStart());
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getData().getLocation().getEnd());
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertFieldStart(OsmlStreamParser &reader, bool defaultField,
+                             SourceOffset start = InvalidSourceOffset,
+                             SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse());
+	EXPECT_EQ(defaultField, reader.inDefaultField());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertFieldEnd(OsmlStreamParser &reader,
+                           SourceOffset start = InvalidSourceOffset,
+                           SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertEnd(OsmlStreamParser &reader,
+                      SourceOffset start = InvalidSourceOffset,
+                      SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
 
 TEST(OsmlStreamParser, empty)
 {
@@ -47,12 +128,7 @@ TEST(OsmlStreamParser, oneCharacter)
 
 	OsmlStreamParser reader(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("a", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(0U, loc.getStart());
-	ASSERT_EQ(1U, loc.getEnd());
+	assertData(reader, "a", 0, 1);
 }
 
 TEST(OsmlStreamParser, whitespaceElimination)
@@ -64,12 +140,7 @@ TEST(OsmlStreamParser, whitespaceElimination)
 
 	OsmlStreamParser reader(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(14U, loc.getEnd());
+	assertData(reader, "hello world", 1, 14);
 }
 
 TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak)
@@ -81,13 +152,7 @@ TEST(OsmlStreamParser, whitespaceEliminationWithLinebreak)
 
 	OsmlStreamParser reader(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(14U, loc.getEnd());
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	assertData(reader, "hello world", 1, 14);
 }
 
 TEST(OsmlStreamParser, escapeWhitespace)
@@ -99,13 +164,7 @@ TEST(OsmlStreamParser, escapeWhitespace)
 
 	OsmlStreamParser reader(charReader, logger);
 
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	ASSERT_EQ("hello  world", reader.getData().asString());
-
-	SourceLocation loc = reader.getData().getLocation();
-	ASSERT_EQ(1U, loc.getStart());
-	ASSERT_EQ(15U, loc.getEnd());
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
+	assertData(reader, "hello  world", 1, 15);
 }
 
 static void testEscapeSpecialCharacter(const std::string &c)
@@ -127,6 +186,7 @@ TEST(OsmlStreamParser, escapeSpecialCharacters)
 	testEscapeSpecialCharacter("}");
 	testEscapeSpecialCharacter("<");
 	testEscapeSpecialCharacter(">");
+	testEscapeSpecialCharacter("|");
 }
 
 TEST(OsmlStreamParser, simpleSingleLineComment)
@@ -347,86 +407,6 @@ TEST(OsmlStreamParser, simpleCommandWithArgumentsAndName)
 	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
 }
 
-static void assertCommand(OsmlStreamParser &reader, const std::string &name,
-                          SourceOffset start = InvalidSourceOffset,
-                          SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::COMMAND, reader.parse());
-	EXPECT_EQ(name, reader.getCommandName().asString());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertCommand(OsmlStreamParser &reader, const std::string &name,
-                          const Variant::mapType &args,
-                          SourceOffset start = InvalidSourceOffset,
-                          SourceOffset end = InvalidSourceOffset)
-{
-	assertCommand(reader, name, start, end);
-	EXPECT_EQ(args, reader.getCommandArguments());
-}
-
-static void assertData(OsmlStreamParser &reader, const std::string &data,
-                       SourceOffset start = InvalidSourceOffset,
-                       SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::DATA, reader.parse());
-	EXPECT_EQ(data, reader.getData().asString());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getData().getLocation().getStart());
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getData().getLocation().getEnd());
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertFieldStart(OsmlStreamParser &reader,
-                             SourceOffset start = InvalidSourceOffset,
-                             SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::FIELD_START, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertFieldEnd(OsmlStreamParser &reader,
-                           SourceOffset start = InvalidSourceOffset,
-                           SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::FIELD_END, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
-static void assertEnd(OsmlStreamParser &reader,
-                      SourceOffset start = InvalidSourceOffset,
-                      SourceOffset end = InvalidSourceOffset)
-{
-	ASSERT_EQ(OsmlStreamParser::State::END, reader.parse());
-	if (start != InvalidSourceOffset) {
-		EXPECT_EQ(start, reader.getLocation().getStart());
-	}
-	if (end != InvalidSourceOffset) {
-		EXPECT_EQ(end, reader.getLocation().getEnd());
-	}
-}
-
 TEST(OsmlStreamParser, fields)
 {
 	const char *testString = "\\test{a}{b}{c}";
@@ -436,15 +416,15 @@ TEST(OsmlStreamParser, fields)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertData(reader, "a", 6, 7);
 	assertFieldEnd(reader, 7, 8);
 
-	assertFieldStart(reader, 8, 9);
+	assertFieldStart(reader, false, 8, 9);
 	assertData(reader, "b", 9, 10);
 	assertFieldEnd(reader, 10, 11);
 
-	assertFieldStart(reader, 11, 12);
+	assertFieldStart(reader, false, 11, 12);
 	assertData(reader, "c", 12, 13);
 	assertFieldEnd(reader, 13, 14);
 	assertEnd(reader, 14, 14);
@@ -459,11 +439,11 @@ TEST(OsmlStreamParser, dataOutsideField)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertData(reader, "a", 6, 7);
 	assertFieldEnd(reader, 7, 8);
 
-	assertFieldStart(reader, 8, 9);
+	assertFieldStart(reader, false, 8, 9);
 	assertData(reader, "b", 9, 10);
 	assertFieldEnd(reader, 10, 11);
 
@@ -481,14 +461,14 @@ TEST(OsmlStreamParser, nestedCommand)
 
 	assertCommand(reader, "test", 0, 5);
 
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertData(reader, "a", 6, 7);
 	assertFieldEnd(reader, 7, 8);
 
-	assertFieldStart(reader, 8, 9);
+	assertFieldStart(reader, false, 8, 9);
 	{
 		assertCommand(reader, "test2", 9, 15);
-		assertFieldStart(reader, 15, 16);
+		assertFieldStart(reader, false, 15, 16);
 		assertData(reader, "b", 16, 17);
 		assertFieldEnd(reader, 17, 18);
 	}
@@ -507,10 +487,10 @@ TEST(OsmlStreamParser, nestedCommandImmediateEnd)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	{
 		assertCommand(reader, "test2", 6, 12);
-		assertFieldStart(reader, 12, 13);
+		assertFieldStart(reader, false, 12, 13);
 		assertData(reader, "b", 13, 14);
 		assertFieldEnd(reader, 14, 15);
 	}
@@ -527,7 +507,7 @@ TEST(OsmlStreamParser, nestedCommandNoData)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertCommand(reader, "test2", 6, 12);
 	assertFieldEnd(reader, 12, 13);
 	assertEnd(reader, 13, 13);
@@ -557,11 +537,11 @@ TEST(OsmlStreamParser, fieldsWithSpaces)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "a", 0, 2);
-	assertFieldStart(reader, 3, 4);
+	assertFieldStart(reader, false, 3, 4);
 	assertCommand(reader, "b", 4, 6);
 	assertCommand(reader, "c", 7, 9);
 	assertFieldEnd(reader, 9, 10);
-	assertFieldStart(reader, 16, 17);
+	assertFieldStart(reader, false, 16, 17);
 	assertCommand(reader, "d", 17, 19);
 	assertFieldEnd(reader, 19, 20);
 	assertEnd(reader, 20, 20);
@@ -612,9 +592,9 @@ TEST(OsmlStreamParser, errorNoFieldEndNested)
 
 	logger.reset();
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertCommand(reader, "test2", 6, 12);
-	assertFieldStart(reader, 12, 13);
+	assertFieldStart(reader, false, 12, 13);
 	assertFieldEnd(reader, 13, 14);
 	assertFieldEnd(reader, 14, 15);
 	ASSERT_FALSE(logger.hasError());
@@ -633,9 +613,9 @@ TEST(OsmlStreamParser, errorNoFieldEndNestedData)
 
 	logger.reset();
 	assertCommand(reader, "test", 0, 5);
-	assertFieldStart(reader, 5, 6);
+	assertFieldStart(reader, false, 5, 6);
 	assertCommand(reader, "test2", 6, 12);
-	assertFieldStart(reader, 12, 13);
+	assertFieldStart(reader, false, 12, 13);
 	assertFieldEnd(reader, 13, 14);
 	assertFieldEnd(reader, 14, 15);
 	assertData(reader, "a", 15, 16);
@@ -654,7 +634,7 @@ TEST(OsmlStreamParser, beginEnd)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
+	assertFieldStart(reader, true, 12, 13);
 	assertFieldEnd(reader, 17, 21);
 	assertEnd(reader, 22, 22);
 }
@@ -669,7 +649,7 @@ TEST(OsmlStreamParser, beginEndWithName)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "book", {{"name", "a"}}, 7, 11);
-	assertFieldStart(reader, 14, 15);
+	assertFieldStart(reader, true, 14, 15);
 	assertFieldEnd(reader, 19, 23);
 	assertEnd(reader, 24, 24);
 }
@@ -685,7 +665,7 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgs)
 
 	assertCommand(reader, "book",
 	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-	assertFieldStart(reader, 32, 33);
+	assertFieldStart(reader, true, 32, 33);
 	assertFieldEnd(reader, 37, 41);
 	assertEnd(reader, 42, 42);
 }
@@ -702,17 +682,17 @@ TEST(OsmlStreamParser, beginEndWithNameAndArgsMultipleFields)
 
 	assertCommand(reader, "book",
 	              {{"name", "a"}, {"a", 1}, {"b", 2}, {"c", "test"}}, 7, 11);
-	assertFieldStart(reader, 32, 33);
+	assertFieldStart(reader, false, 32, 33);
 	assertData(reader, "a", 33, 34);
 	assertCommand(reader, "test", Variant::mapType{}, 35, 40);
 	assertFieldEnd(reader, 40, 41);
-	assertFieldStart(reader, 41, 42);
+	assertFieldStart(reader, false, 41, 42);
 	assertData(reader, "b", 42, 43);
 	assertCommand(reader, "test", Variant::mapType{}, 44, 49);
-	assertFieldStart(reader, 49, 50);
+	assertFieldStart(reader, false, 49, 50);
 	assertFieldEnd(reader, 50, 51);
 	assertFieldEnd(reader, 51, 52);
-	assertFieldStart(reader, 52, 53);
+	assertFieldStart(reader, true, 52, 53);
 	assertFieldEnd(reader, 57, 61);
 	assertEnd(reader, 62, 62);
 }
@@ -727,12 +707,45 @@ TEST(OsmlStreamParser, beginEndWithData)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
+	assertFieldStart(reader, true, 12, 13);
 	assertData(reader, "a", 12, 13);
 	assertFieldEnd(reader, 18, 22);
 	assertEnd(reader, 23, 23);
 }
 
+TEST(OsmlStreamParser, beginEndNested)
+{
+	const char *testString =
+	    "\\begin{a}{b} c \\begin{d}{e}{f} \\g{h} \\end{d}\\end{a}";
+	//    012345678901234 5678901234567890 123456 7890123 4567890
+	//    0         1          2         3           4          5
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "a", 7, 8);
+	assertFieldStart(reader, false, 9, 10);
+	assertData(reader, "b", 10, 11);
+	assertFieldEnd(reader, 11, 12);
+	assertFieldStart(reader, true, 13, 14);
+	assertData(reader, "c", 13, 14);
+	assertCommand(reader, "d", 22, 23);
+	assertFieldStart(reader, false, 24, 25);
+	assertData(reader, "e", 25, 26);
+	assertFieldEnd(reader, 26, 27);
+	assertFieldStart(reader, false, 27, 28);
+	assertData(reader, "f", 28, 29);
+	assertFieldEnd(reader, 29, 30);
+	assertFieldStart(reader, true, 31, 32);
+	assertCommand(reader, "g", 31, 33);
+	assertFieldStart(reader, false, 33, 34);
+	assertData(reader, "h", 34, 35);
+	assertFieldEnd(reader, 35, 36);
+	assertFieldEnd(reader, 42, 43);
+	assertFieldEnd(reader, 49, 50);
+	assertEnd(reader, 51, 51);
+}
+
 TEST(OsmlStreamParser, beginEndWithCommand)
 {
 	const char *testString = "\\begin{book}\\a{test}\\end{book}";
@@ -743,9 +756,9 @@ TEST(OsmlStreamParser, beginEndWithCommand)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "book", 7, 11);
-	assertFieldStart(reader, 12, 13);
+	assertFieldStart(reader, true, 12, 13);
 	assertCommand(reader, "a", 12, 14);
-	assertFieldStart(reader, 14, 15);
+	assertFieldStart(reader, false, 14, 15);
 	assertData(reader, "test", 15, 19);
 	assertFieldEnd(reader, 19, 20);
 	assertFieldEnd(reader, 25, 29);
@@ -873,9 +886,9 @@ TEST(OsmlStreamParser, errorBeginEndMismatch)
 
 	logger.reset();
 	assertCommand(reader, "a", 7, 8);
-	assertFieldStart(reader, 10, 11);
+	assertFieldStart(reader, true, 10, 11);
 	assertCommand(reader, "b", 17, 18);
-	assertFieldStart(reader, 20, 24);
+	assertFieldStart(reader, true, 20, 24);
 	assertData(reader, "test", 20, 24);
 	ASSERT_FALSE(logger.hasError());
 	ASSERT_THROW(reader.parse(), LoggableException);
@@ -904,7 +917,7 @@ TEST(OsmlStreamParser, beginEndWithNSSep)
 	OsmlStreamParser reader(charReader, logger);
 
 	assertCommand(reader, "test1:test2", 7, 18);
-	assertFieldStart(reader, 19, 20);
+	assertFieldStart(reader, true, 19, 20);
 	assertFieldEnd(reader, 24, 35);
 	assertEnd(reader, 36, 36);
 }
@@ -920,7 +933,7 @@ TEST(OsmlStreamParser, errorBeginNSSep)
 	ASSERT_FALSE(logger.hasError());
 	assertCommand(reader, "blub");
 	ASSERT_TRUE(logger.hasError());
-	assertFieldStart(reader);
+	assertFieldStart(reader, true);
 	assertFieldEnd(reader);
 	assertEnd(reader);
 }
@@ -934,7 +947,7 @@ TEST(OsmlStreamParser, errorEndNSSep)
 
 	logger.reset();
 	assertCommand(reader, "blub");
-	assertFieldStart(reader);
+	assertFieldStart(reader, true);
 	ASSERT_FALSE(logger.hasError());
 	assertFieldEnd(reader);
 	ASSERT_TRUE(logger.hasError());
@@ -970,5 +983,54 @@ TEST(OsmlStreamParser, errorRepeatedNs)
 	assertData(reader, "::");
 	assertEnd(reader);
 }
+
+TEST(OsmlStreamParser, explicitDefaultField)
+{
+	const char *testString = "\\a{!b}c";
+	//                         01234567
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "a", 0, 2);
+	assertFieldStart(reader, true, 2, 4);
+	assertData(reader, "b", 4, 5);
+	assertFieldEnd(reader, 5, 6);
+	assertData(reader, "c", 6, 7);
+	assertEnd(reader, 7, 7);
+}
+
+TEST(OsmlStreamParser, explicitDefaultFieldWithCommand)
+{
+	const char *testString = "\\a{!\\b}c";
+	//                         0123 4567
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "a", 0, 2);
+	assertFieldStart(reader, true, 2, 4);
+	assertCommand(reader, "b", 4, 6);
+	assertFieldEnd(reader, 6, 7);
+	assertData(reader, "c", 7, 8);
+	assertEnd(reader, 8, 8);
+}
+
+TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField)
+{
+	const char *testString = "\\a{!\\b}{c}";
+	//                         0123 4567
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertCommand(reader, "a", 0, 2);
+	assertFieldStart(reader, true, 2, 4);
+	assertCommand(reader, "b", 4, 6);
+	assertFieldEnd(reader, 6, 7);
+	assertData(reader, "c", 7, 8);
+	assertEnd(reader, 8, 8);
+}
+
 }
 
-- 
cgit v1.2.3


From 205810b44c980998958dcd857c2cb34a914dc760 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Thu, 12 Feb 2015 16:21:36 +0100
Subject: Implemented annotation start and end field

---
 contrib/test.osdm                          |  29 ----
 contrib/test.osml                          |  29 ++++
 src/formats/osml/OsmlStreamParser.cpp      | 116 ++++++++++++---
 src/formats/osml/OsmlStreamParser.hpp      |  16 +-
 test/formats/osml/OsmlStreamParserTest.cpp | 228 ++++++++++++++++++++++++++++-
 5 files changed, 363 insertions(+), 55 deletions(-)
 delete mode 100644 contrib/test.osdm
 create mode 100644 contrib/test.osml

(limited to 'src/formats')

diff --git a/contrib/test.osdm b/contrib/test.osdm
deleted file mode 100644
index 100bc77..0000000
--- a/contrib/test.osdm
+++ /dev/null
@@ -1,29 +0,0 @@
-%{
-	We're currently inside a block comment.
-	%{
-		Note that block comments can be nested, easily allowing you to comment
-		out blocks which already contain comments.
-	}%
-}%
-
-% Well, line comments, as we know them from TeX also work
-
-\import{meta}
-\import{book}
-
-\domain#special_words{
-	\struct#latex
-	\struct#ousia
-}
-
-\book{
-	\include{chapters/chapter1}
-	\include{chapters/chapter2}
-
-	\begin{note}{Behaviour of "Include"}
-		Analogous to the `include` command in \latex, \ousia forces the included
-		file to be *complete* in a sense, that it must not have dangling open
-		commands.
-	\end{note}
-}
-
diff --git a/contrib/test.osml b/contrib/test.osml
new file mode 100644
index 0000000..100bc77
--- /dev/null
+++ b/contrib/test.osml
@@ -0,0 +1,29 @@
+%{
+	We're currently inside a block comment.
+	%{
+		Note that block comments can be nested, easily allowing you to comment
+		out blocks which already contain comments.
+	}%
+}%
+
+% Well, line comments, as we know them from TeX also work
+
+\import{meta}
+\import{book}
+
+\domain#special_words{
+	\struct#latex
+	\struct#ousia
+}
+
+\book{
+	\include{chapters/chapter1}
+	\include{chapters/chapter2}
+
+	\begin{note}{Behaviour of "Include"}
+		Analogous to the `include` command in \latex, \ousia forces the included
+		file to be *complete* in a sense, that it must not have dangling open
+		commands.
+	\end{note}
+}
+
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 6606120..0174fa4 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -65,6 +65,16 @@ public:
 	 */
 	TokenTypeId DefaultFieldStart;
 
+	/**
+	 * Id of the annotation start token.
+	 */
+	TokenTypeId AnnotationStart;
+
+	/**
+	 * Id of the annotation end token.
+	 */
+	TokenTypeId AnnotationEnd;
+
 	/**
 	 * Registers the plain format tokens in the internal tokenizer.
 	 */
@@ -77,6 +87,8 @@ public:
 		FieldStart = registerToken("{");
 		FieldEnd = registerToken("}");
 		DefaultFieldStart = registerToken("{!");
+		AnnotationStart = registerToken("<\\");
+		AnnotationEnd = registerToken("\\>");
 	}
 };
 
@@ -374,7 +386,8 @@ void OsmlStreamParser::pushCommand(Variant commandName,
 	                      hasRange, false, false, false});
 }
 
-OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
+OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
+                                                       bool isAnnotation)
 {
 	// Parse the commandName as a first identifier
 	Variant commandName = parseIdentifier(start, true);
@@ -388,6 +401,9 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
 	    Utils::split(commandName.asString(), ':');
 	const bool isBegin = commandNameComponents[0] == "begin";
 	const bool isEnd = commandNameComponents[0] == "end";
+
+	// Parse the begin or end command
+	State res = State::COMMAND;
 	if (isBegin || isEnd) {
 		if (commandNameComponents.size() > 1) {
 			logger.error(
@@ -396,30 +412,76 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
 			    commandName);
 		}
 		if (isBegin) {
-			return parseBeginCommand();
+			res = parseBeginCommand();
 		} else if (isEnd) {
-			return parseEndCommand();
+			res = parseEndCommand();
+		}
+	} else {
+		// Check whether the next character is a '#', indicating the start of
+		// the command name
+		Variant commandArgName;
+		start = reader.getOffset();
+		if (reader.expect('#')) {
+			commandArgName = parseIdentifier(start);
+			if (commandArgName.asString().empty()) {
+				logger.error("Expected identifier after \"#\"", commandArgName);
+			}
 		}
+
+		// Parse the arugments
+		Variant commandArguments =
+		    parseCommandArguments(std::move(commandArgName));
+
+		// Push the command onto the command stack
+		pushCommand(std::move(commandName), std::move(commandArguments), false);
 	}
 
-	// Check whether the next character is a '#', indicating the start of the
-	// command name
-	Variant commandArgName;
-	start = reader.getOffset();
-	if (reader.expect('#')) {
-		commandArgName = parseIdentifier(start);
-		if (commandArgName.asString().empty()) {
-			logger.error("Expected identifier after \"#\"", commandArgName);
+	// Check whether a ">" character is the next character that is to be read.
+	// In that case the current command could be an annotation end command!
+	char c;
+	if (reader.fetch(c) && c == '>') {
+		// Ignore the character after a begin or end command
+		if (isBegin || isEnd) {
+			logger.warning(
+			    "Ignoring annotation end character \">\" after special "
+			    "commands \"begin\" or \"end\". Write \"\\>\" to end a "
+			    "\"begin\"/\"end\" enclosed annotation.",
+			    reader);
+			return res;
 		}
-	}
 
-	// Parse the arugments
-	Variant commandArguments = parseCommandArguments(std::move(commandArgName));
+		// If this should be an annoation, ignore the character
+		if (isAnnotation) {
+			logger.warning(
+			    "Ignoring annotation end character \">\" after annotation "
+			    "start command. Write \"\\>\" to end the annotation.",
+			    reader);
+		} else {
+			// Make sure no arguments apart from the "name" argument are given
+			// to an annotation end
+			Variant::mapType &map = commands.top().arguments.asMap();
+			if (!map.empty()) {
+				if (map.count("name") == 0 || map.size() > 1U) {
+					logger.error(
+					    "An annotation end command may not have any arguments "
+					    "other than \"name\"");
+					return res;
+				}
+			}
 
-	// Push the command onto the command stack
-	pushCommand(std::move(commandName), std::move(commandArguments), false);
+			// If we got here, this is a valid ANNOTATION_END command, issue it
+			reader.peek(c);
+			reader.consumePeek();
+			return State::ANNOTATION_END;
+		}
+	}
 
-	return State::COMMAND;
+	// If we're starting an annotation, return the command as annotation start
+	// instead of command
+	if (isAnnotation && res == State::COMMAND) {
+		return State::ANNOTATION_START;
+	}
+	return res;
 }
 
 void OsmlStreamParser::parseBlockComment()
@@ -522,7 +584,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 		const TokenTypeId type = token.type;
 
 		// Special handling for Backslash and Text
-		if (type == Tokens.Backslash) {
+		if (type == Tokens.Backslash || type == Tokens.AnnotationStart) {
 			// Before appending anything to the output data or starting a new
 			// command, check whether FIELD_START has to be issued, as the
 			// current command is a command with range
@@ -548,7 +610,8 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 				}
 
 				// Parse the actual command
-				State res = parseCommand(token.location.getStart());
+				State res = parseCommand(token.location.getStart(),
+				                         type == Tokens.AnnotationStart);
 				switch (res) {
 					case State::ERROR:
 						throw LoggableException(
@@ -565,6 +628,14 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			// to the data buffer, use the escape character start as start
 			// location and the peek offset as end location
 			reader.peek(c);  // Peek the previously fetched character
+
+			// If this was an annotation start token, add the parsed < to the
+			// output
+			if (type == Tokens.AnnotationStart) {
+				handler.append('<', token.location.getStart(),
+				               token.location.getStart() + 1);
+			}
+
 			handler.append(c, token.location.getStart(),
 			               reader.getPeekOffset());
 			reader.consumePeek();
@@ -632,6 +703,13 @@ OsmlStreamParser::State OsmlStreamParser::parse()
 			    "which to start the field. Write \"\\{!\" to insert this "
 			    "sequence as text",
 			    token);
+		} else if (token.type == Tokens.AnnotationEnd) {
+			// We got a single annotation end token "\>" -- simply issue the
+			// ANNOTATION_END event
+			Variant annotationName = Variant::fromString("");
+			annotationName.setLocation(token.location);
+			pushCommand(annotationName, Variant::mapType{}, false);
+			return State::ANNOTATION_END;
 		} else {
 			logger.error("Unexpected token \"" + token.content + "\"", token);
 		}
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index bb5db65..3827118 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -161,7 +161,13 @@ public:
 		/**
 		 * Default constructor.
 		 */
-		Command() : hasRange(false), inField(false), inRangeField(false), inDefaultField() {}
+		Command()
+		    : hasRange(false),
+		      inField(false),
+		      inRangeField(false),
+		      inDefaultField()
+		{
+		}
 
 		/**
 		 * Constructor of the Command class.
@@ -179,8 +185,8 @@ public:
 		 * @param inDefaultField is set to true if we currently are in a
 		 * specially marked default field.
 		 */
-		Command(Variant name, Variant arguments, bool hasRange, bool inField,
-		        bool inRangeField, bool inDefaultField)
+		Command(Variant name, Variant arguments, bool hasRange,
+		        bool inField, bool inRangeField, bool inDefaultField)
 		    : name(std::move(name)),
 		      arguments(std::move(arguments)),
 		      hasRange(hasRange),
@@ -266,9 +272,11 @@ private:
 	 *
 	 * @param start is the start byte offset of the command (including the
 	 * backslash)
+	 * @param isAnnotation if true, the command is not returned as command, but
+	 * as annotation start.
 	 * @return true if a command was actuall parsed, false otherwise.
 	 */
-	State parseCommand(size_t start);
+	State parseCommand(size_t start, bool isAnnotation);
 
 	/**
 	 * Function used internally to parse a block comment.
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index 5f23822..d52fa5b 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -98,6 +98,56 @@ static void assertFieldEnd(OsmlStreamParser &reader,
 	}
 }
 
+static void assertAnnotationStart(OsmlStreamParser &reader,
+                                  const std::string &name,
+                                  SourceOffset start = InvalidSourceOffset,
+                                  SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse());
+	EXPECT_EQ(name, reader.getCommandName().asString());
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
+static void assertAnnotationStart(OsmlStreamParser &reader,
+                                  const std::string &name,
+                                  const Variant::mapType &args,
+                                  SourceOffset start = InvalidSourceOffset,
+                                  SourceOffset end = InvalidSourceOffset)
+{
+	assertAnnotationStart(reader, name, start, end);
+	EXPECT_EQ(args, reader.getCommandArguments());
+}
+
+static void assertAnnotationEnd(OsmlStreamParser &reader,
+                                const std::string &name,
+                                const std::string &elementName,
+                                SourceOffset start = InvalidSourceOffset,
+                                SourceOffset end = InvalidSourceOffset)
+{
+	ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse());
+	ASSERT_EQ(name, reader.getCommandName().asString());
+	if (!elementName.empty()) {
+		ASSERT_EQ(1U, reader.getCommandArguments().asMap().size());
+		ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name"));
+
+		auto it = reader.getCommandArguments().asMap().find("name");
+		ASSERT_EQ(elementName, it->second.asString());
+	}
+	if (start != InvalidSourceOffset) {
+		EXPECT_EQ(start, reader.getLocation().getStart());
+	}
+	if (end != InvalidSourceOffset) {
+		EXPECT_EQ(end, reader.getLocation().getEnd());
+	}
+}
+
 static void assertEnd(OsmlStreamParser &reader,
                       SourceOffset start = InvalidSourceOffset,
                       SourceOffset end = InvalidSourceOffset)
@@ -184,9 +234,6 @@ TEST(OsmlStreamParser, escapeSpecialCharacters)
 	testEscapeSpecialCharacter("\\");
 	testEscapeSpecialCharacter("{");
 	testEscapeSpecialCharacter("}");
-	testEscapeSpecialCharacter("<");
-	testEscapeSpecialCharacter(">");
-	testEscapeSpecialCharacter("|");
 }
 
 TEST(OsmlStreamParser, simpleSingleLineComment)
@@ -1035,5 +1082,180 @@ TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField)
 	assertEnd(reader, 10, 10);
 }
 
+TEST(OsmlStreamParser, annotationStart)
+{
+	const char *testString = "<\\a";
+	//                        0 12
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
+	assertEnd(reader, 3, 3);
+}
+
+TEST(OsmlStreamParser, annotationStartWithName)
+{
+	const char *testString = "<\\annotationWithName#aName";
+	//                        0 1234567890123456789012345
+	//                        0          1         2
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationStart(reader, "annotationWithName",
+	                      Variant::mapType{{"name", "aName"}}, 0, 20);
+	assertEnd(reader, 26, 26);
+}
+
+TEST(OsmlStreamParser, annotationStartWithArguments)
+{
+	const char *testString = "<\\annotationWithName#aName[a=1,b=2]";
+	//                        0 1234567890123456789012345678901234
+	//                        0          1         2         3
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationStart(
+	    reader, "annotationWithName",
+	    Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20);
+	assertEnd(reader, 35, 35);
+}
+
+TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd)
+{
+	const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>";
+	//                        0 123456789012345678901234567 89012345 67
+	//                        0          1         2          3
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationStart(
+	    reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
+	    10);
+	assertFieldStart(reader, true, 26, 27);
+	assertData(reader, "a", 26, 27);
+	assertFieldEnd(reader, 33, 35);
+	assertAnnotationEnd(reader, "", "", 36, 38);
+	assertEnd(reader, 38, 38);
+}
+
+TEST(OsmlStreamParser, annotationEnd)
+{
+	const char *testString = "\\a>";
+	//                         012
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationEnd(reader, "a", "", 0, 2);
+	assertEnd(reader, 3, 3);
+}
+
+TEST(OsmlStreamParser, annotationEndWithName)
+{
+	const char *testString = "\\a#name>";
+	//                         01234567
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationEnd(reader, "a", "name", 0, 2);
+	assertEnd(reader, 8, 8);
+}
+
+TEST(OsmlStreamParser, annotationEndWithNameAsArgs)
+{
+	const char *testString = "\\a[name=name]>";
+	//                         01234567890123
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationEnd(reader, "a", "name", 0, 2);
+	assertEnd(reader, 14, 14);
+}
+
+TEST(OsmlStreamParser, errorAnnotationEndWithArguments)
+{
+	const char *testString = "\\a[foo=bar]>";
+	//                         012345678901
+	//                         0         1
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	logger.reset();
+	ASSERT_FALSE(logger.hasError());
+	assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2);
+	ASSERT_TRUE(logger.hasError());
+	assertData(reader, ">", 11, 12);
+	assertEnd(reader, 12, 12);
+}
+
+TEST(OsmlStreamParser, closingAnnotation)
+{
+	const char *testString = "<\\a>";
+	//                        0 123
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
+	assertData(reader, ">", 3, 4);
+	assertEnd(reader, 4, 4);
+}
+
+TEST(OsmlStreamParser, annotationWithFields)
+{
+	const char *testString = "a <\\b{c}{d}{!e} f \\> g";
+	//                        012 345678901234567 8901
+	//                        0          1          2
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertData(reader, "a", 0, 1);
+	assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5);
+	assertFieldStart(reader, false, 5, 6);
+	assertData(reader, "c", 6, 7);
+	assertFieldEnd(reader, 7, 8);
+	assertFieldStart(reader, false, 8, 9);
+	assertData(reader, "d", 9, 10);
+	assertFieldEnd(reader, 10, 11);
+	assertFieldStart(reader, true, 11, 13);
+	assertData(reader, "e", 13, 14);
+	assertFieldEnd(reader, 14, 15);
+	assertData(reader, "f", 16, 17);
+	assertAnnotationEnd(reader, "", "", 18, 20);
+	assertData(reader, "g", 21, 22);
+	assertEnd(reader, 22, 22);
+}
+
+TEST(OsmlStreamParser, annotationStartEscape)
+{
+	const char *testString = "<\\%test";
+	//                        0 123456
+	//                        0
+
+	CharReader charReader(testString);
+
+	OsmlStreamParser reader(charReader, logger);
+
+	assertData(reader, "<%test", 0, 7);
+	assertEnd(reader, 7, 7);
+}
 }
 
-- 
cgit v1.2.3


From ddbcefd960052f3d27fef5f57fc933d269b17857 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:12:13 +0100
Subject: Made flags only one bit wide

---
 src/formats/osml/OsmlStreamParser.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/formats')

diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index 3827118..dc3034c 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -139,24 +139,24 @@ public:
 		/**
 		 * Set to true if this is a command with clear begin and end.
 		 */
-		bool hasRange;
+		bool hasRange : 1;
 
 		/**
 		 * Set to true if we are currently inside a field of this command.
 		 */
-		bool inField;
+		bool inField : 1;
 
 		/**
 		 * Set to true if we are currently in the range field of the command
 		 * (implies inField being set to true).
 		 */
-		bool inRangeField;
+		bool inRangeField : 1;
 
 		/**
 		 * Set to true if we are currently in a field that has been especially
 		 * marked as default field (using the "|") syntax.
 		 */
-		bool inDefaultField;
+		bool inDefaultField : 1;
 
 		/**
 		 * Default constructor.
-- 
cgit v1.2.3


From 19d7c2e400850d06b21acc28733a7cd8ba343d1a Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 00:15:22 +0100
Subject: Renamed OsxmlParser

---
 src/formats/osxml/OsxmlParser.hpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'src/formats')

diff --git a/src/formats/osxml/OsxmlParser.hpp b/src/formats/osxml/OsxmlParser.hpp
index c8b6302..281a49c 100644
--- a/src/formats/osxml/OsxmlParser.hpp
+++ b/src/formats/osxml/OsxmlParser.hpp
@@ -25,18 +25,18 @@
  * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
  */
 
-#ifndef _OUSIA_XML_PARSER_HPP_
-#define _OUSIA_XML_PARSER_HPP_
+#ifndef _OUSIA_OSXML_PARSER_HPP_
+#define _OUSIA_OSXML_PARSER_HPP_
 
 #include <core/parser/Parser.hpp>
 
 namespace ousia {
 
 /**
- * The XmlParser class implements parsing the various types of Ousía XML
- * documents using the expat stream XML parser.
+ * The OsxmlParser class implements parsing the various types of Ousía XML
+ * documents using the OsxmlEventParser and Stack classes.
  */
-class XmlParser : public Parser {
+class OsxmlParser : public Parser {
 protected:
 	/**
 	 * Parses the given input stream as XML file and returns the parsed
@@ -51,5 +51,5 @@ protected:
 
 }
 
-#endif /* _OUSIA_XML_PARSER_HPP_ */
+#endif /* _OUSIA_OSXML_PARSER_HPP_ */
 
-- 
cgit v1.2.3


From b7ffeb3dca889aee1c878e2ef0f07644f910dba2 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 20:58:05 +0100
Subject: Made OsxmlEvents interface consistent with Stack commands

---
 src/formats/osxml/OsxmlEventParser.cpp      |  2 +-
 src/formats/osxml/OsxmlEventParser.hpp      | 24 +++++++++--------
 test/formats/osxml/OsxmlEventParserTest.cpp | 41 +++++++++++++----------------
 3 files changed, 32 insertions(+), 35 deletions(-)

(limited to 'src/formats')

diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
index b4aff77..7404960 100644
--- a/src/formats/osxml/OsxmlEventParser.cpp
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -329,7 +329,7 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
 		// Just issue a "commandStart" event in any other case
 		Variant nameVar = Variant::fromString(nameStr);
 		nameVar.setLocation(nameLoc);
-		parser->getEvents().commandStart(nameVar, args);
+		parser->getEvents().command(nameVar, args);
 	}
 }
 
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
index aa20ea9..e39245f 100644
--- a/src/formats/osxml/OsxmlEventParser.hpp
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -58,34 +58,36 @@ public:
 	 *
 	 * @param name is a string variant containing name and location of the
 	 * command.
-	 * @param args is a map variant containing the arguments that were given
-	 * to the command.
+	 * @param args is a map containing the arguments that were given to the
+	 * command.
 	 */
-	virtual void commandStart(Variant name, Variant args) = 0;
+	virtual void command(const Variant &name, const Variant::mapType &args) = 0;
 
 	/**
 	 * Called whenever an annotation starts. Note that this implicitly always
 	 * starts the default field of the annotation.
 	 *
-	 * @param name is a string variant containing the name of the annotation
-	 * class and the location of the annotation definition.
+	 * @param className is a string variant containing the name of the
+	 * annotation class and the location of the annotation definition.
 	 * @param args is a map variant containing the arguments that were given
 	 * to the annotation definition.
 	 */
-	virtual void annotationStart(Variant name, Variant args) = 0;
+	virtual void annotationStart(const Variant &className,
+	                             const Variant::mapType &args) = 0;
 
 	/**
 	 * Called whenever the range of an annotation ends. The callee must
 	 * disambiguate the actual annotation that is finished here.
 	 *
-	 * @param name is a string variant containing the name of the annotation
-	 * class that should end here. May be empty (or nullptr), if no elementName
-	 * has been specified at the end of the annotation.
+	 * @param className is a string variant containing the name of the
+	 * annotation class that should end here. May be empty (or nullptr), if no
+	 * elementName has been specified at the end of the annotation.
 	 * @param elementName is the name of the annotation element that should be
 	 * ended here. May be empty (or nullptr), if no elementName has been
 	 * specified at the end of the annotation.
 	 */
-	virtual void annotationEnd(Variant name, Variant elementName) = 0;
+	virtual void annotationEnd(const Variant &className,
+	                           const Variant &elementName) = 0;
 
 	/**
 	 * Called whenever the default field which was implicitly started by
@@ -105,7 +107,7 @@ public:
 	 * @param data is the already parsed data that should be passed to the
 	 * handler.
 	 */
-	virtual void data(Variant data) = 0;
+	virtual void data(const Variant &data) = 0;
 };
 
 /**
diff --git a/test/formats/osxml/OsxmlEventParserTest.cpp b/test/formats/osxml/OsxmlEventParserTest.cpp
index 06c800f..3293370 100644
--- a/test/formats/osxml/OsxmlEventParserTest.cpp
+++ b/test/formats/osxml/OsxmlEventParserTest.cpp
@@ -31,7 +31,7 @@ static TerminalLogger logger(std::cerr, true);
 
 namespace {
 enum class OsxmlEvent {
-	COMMAND_START,
+	COMMAND,
 	ANNOTATION_START,
 	ANNOTATION_END,
 	FIELD_END,
@@ -42,22 +42,24 @@ class TestOsxmlEventListener : public OsxmlEvents {
 public:
 	std::vector<std::pair<OsxmlEvent, Variant>> events;
 
-	void commandStart(Variant name, Variant args) override
+	void command(const Variant &name, const Variant::mapType &args) override
 	{
-		events.emplace_back(OsxmlEvent::COMMAND_START,
+		events.emplace_back(OsxmlEvent::COMMAND,
 		                    Variant::arrayType{name, args});
 	}
 
-	void annotationStart(Variant name, Variant args) override
+	void annotationStart(const Variant &className,
+	                     const Variant::mapType &args) override
 	{
 		events.emplace_back(OsxmlEvent::ANNOTATION_START,
-		                    Variant::arrayType{name, args});
+		                    Variant::arrayType{className, args});
 	}
 
-	void annotationEnd(Variant name, Variant elementName) override
+	void annotationEnd(const Variant &className,
+	                   const Variant &elementName) override
 	{
 		events.emplace_back(OsxmlEvent::ANNOTATION_END,
-		                    Variant::arrayType{name, elementName});
+		                    Variant::arrayType{className, elementName});
 	}
 
 	void fieldEnd() override
@@ -65,7 +67,7 @@ public:
 		events.emplace_back(OsxmlEvent::FIELD_END, Variant::arrayType{});
 	}
 
-	void data(Variant data) override
+	void data(const Variant &data) override
 	{
 		events.emplace_back(OsxmlEvent::DATA, Variant::arrayType{data});
 	}
@@ -91,7 +93,7 @@ TEST(OsxmlEventParser, simpleCommandWithArgs)
 	//                        0          1            2            3
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
+	    {OsxmlEvent::COMMAND,
 	     Variant::arrayType{
 	         "a", Variant::mapType{
 	                  {"name", "test"}, {"a", 1}, {"b", 2}, {"c", "blub"}}}},
@@ -131,11 +133,9 @@ TEST(OsxmlEventParser, magicTopLevelTag)
 	const char *testString = "<ousia><a/><b/></ousia>";
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{{"a", Variant::mapType{}}}},
+	    {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}},
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{{"b", Variant::mapType{}}}},
+	    {OsxmlEvent::COMMAND, Variant::arrayType{{"b", Variant::mapType{}}}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
 
 	auto events = parseXml(testString);
@@ -147,9 +147,8 @@ TEST(OsxmlEventParser, magicTopLevelTagInside)
 	const char *testString = "<a><ousia/></a>";
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{{"a", Variant::mapType{}}}},
-	    {OsxmlEvent::COMMAND_START,
+	    {OsxmlEvent::COMMAND, Variant::arrayType{{"a", Variant::mapType{}}}},
+	    {OsxmlEvent::COMMAND,
 	     Variant::arrayType{{"ousia", Variant::mapType{}}}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
@@ -165,8 +164,7 @@ TEST(OsxmlEventParser, commandWithDataPreserveWhitespace)
 	//                        0         1          2
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
 	    {OsxmlEvent::DATA, Variant::arrayType{"  hello  \n world "}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
 
@@ -185,8 +183,7 @@ TEST(OsxmlEventParser, commandWithDataTrimWhitespace)
 	//                        0         1          2
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
 	    {OsxmlEvent::DATA, Variant::arrayType{"hello  \n world"}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
 
@@ -205,8 +202,7 @@ TEST(OsxmlEventParser, commandWithDataCollapseWhitespace)
 	//                        0         1          2
 
 	std::vector<std::pair<OsxmlEvent, Variant>> expectedEvents{
-	    {OsxmlEvent::COMMAND_START,
-	     Variant::arrayType{"a", Variant::mapType{}}},
+	    {OsxmlEvent::COMMAND, Variant::arrayType{"a", Variant::mapType{}}},
 	    {OsxmlEvent::DATA, Variant::arrayType{"hello world"}},
 	    {OsxmlEvent::FIELD_END, Variant::arrayType{}}};
 
@@ -217,6 +213,5 @@ TEST(OsxmlEventParser, commandWithDataCollapseWhitespace)
 	ASSERT_EQ(5U, events[1].second.asArray()[0].getLocation().getStart());
 	ASSERT_EQ(19U, events[1].second.asArray()[0].getLocation().getEnd());
 }
-
 }
 
-- 
cgit v1.2.3


From c298f00ef1633a663775fe9a715a249b9f4d255d Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Sun, 15 Feb 2015 20:58:26 +0100
Subject: Implemented OsxmlParser

---
 CMakeLists.txt                         |   2 +
 src/formats/osxml/OsxmlParser.cpp      | 288 +++++++++------------------------
 src/formats/osxml/OsxmlParser.hpp      |   2 +-
 test/formats/osxml/OsxmlParserTest.cpp |  28 ++--
 4 files changed, 91 insertions(+), 229 deletions(-)

(limited to 'src/formats')

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2106cf0..ec1bb4d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,6 +203,7 @@ TARGET_LINK_LIBRARIES(ousia_osml
 ADD_LIBRARY(ousia_osxml
 	src/formats/osxml/OsxmlAttributeLocator
 	src/formats/osxml/OsxmlEventParser
+	src/formats/osxml/OsxmlParser
 )
 
 TARGET_LINK_LIBRARIES(ousia_osxml
@@ -351,6 +352,7 @@ IF(TEST)
 
 	ADD_EXECUTABLE(ousia_test_osxml
 		test/formats/osxml/OsxmlEventParserTest
+		test/formats/osxml/OsxmlParserTest
 	)
 
 	TARGET_LINK_LIBRARIES(ousia_test_osxml
diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp
index 869c76a..c216855 100644
--- a/src/formats/osxml/OsxmlParser.cpp
+++ b/src/formats/osxml/OsxmlParser.cpp
@@ -16,223 +16,83 @@
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
-#include <map>
-#include <sstream>
-#include <vector>
-
-#include <expat.h>
-
-#include <core/common/CharReader.hpp>
-#include <core/common/Utils.hpp>
-#include <core/common/VariantReader.hpp>
-#include <core/parser/ParserScope.hpp>
-#include <core/parser/ParserStack.hpp>
-#include <core/parser/stack/DocumentHandler.hpp>
-#include <core/parser/stack/DomainHandler.hpp>
-#include <core/parser/stack/ImportIncludeHandler.hpp>
-#include <core/parser/stack/TypesystemHandler.hpp>
-#include <core/model/Document.hpp>
-#include <core/model/Domain.hpp>
-#include <core/model/Typesystem.hpp>
-
-#include "XmlParser.hpp"
+#include <core/parser/stack/GenericParserStates.hpp>
+#include <core/parser/stack/Stack.hpp>
+#include <core/parser/ParserContext.hpp>
+
+#include "OsxmlEventParser.hpp"
+#include "OsxmlParser.hpp"
 
 namespace ousia {
 
-namespace ParserStates {
-/* Document states */
-static const ParserState Document =
-    ParserStateBuilder()
-        .parent(&None)
-        .createdNodeType(&RttiTypes::Document)
-        .elementHandler(DocumentHandler::create)
-        .arguments({Argument::String("name", "")});
-
-static const ParserState DocumentChild =
-    ParserStateBuilder()
-        .parents({&Document, &DocumentChild})
-        .createdNodeTypes({&RttiTypes::StructureNode,
-                           &RttiTypes::AnnotationEntity,
-                           &RttiTypes::DocumentField})
-        .elementHandler(DocumentChildHandler::create);
-
-/* Domain states */
-static const ParserState Domain = ParserStateBuilder()
-                                      .parents({&None, &Document})
-                                      .createdNodeType(&RttiTypes::Domain)
-                                      .elementHandler(DomainHandler::create)
-                                      .arguments({Argument::String("name")});
-
-static const ParserState DomainStruct =
-    ParserStateBuilder()
-        .parent(&Domain)
-        .createdNodeType(&RttiTypes::StructuredClass)
-        .elementHandler(DomainStructHandler::create)
-        .arguments({Argument::String("name"),
-                    Argument::Cardinality("cardinality", Cardinality::any()),
-                    Argument::Bool("isRoot", false),
-                    Argument::Bool("transparent", false),
-                    Argument::String("isa", "")});
-
-static const ParserState DomainAnnotation =
-    ParserStateBuilder()
-        .parent(&Domain)
-        .createdNodeType(&RttiTypes::AnnotationClass)
-        .elementHandler(DomainAnnotationHandler::create)
-        .arguments({Argument::String("name")});
-
-static const ParserState DomainAttributes =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::StructType)
-        .elementHandler(DomainAttributesHandler::create)
-        .arguments({});
-
-static const ParserState DomainAttribute =
-    ParserStateBuilder()
-        .parent(&DomainAttributes)
-        .elementHandler(TypesystemStructFieldHandler::create)
-        .arguments({Argument::String("name"), Argument::String("type"),
-                    Argument::Any("default", Variant::fromObject(nullptr))});
-
-static const ParserState DomainField =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainFieldHandler::create)
-        .arguments({Argument::String("name", ""),
-                    Argument::Bool("isSubtree", false),
-                    Argument::Bool("optional", false)});
-
-static const ParserState DomainFieldRef =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainFieldRefHandler::create)
-        .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)});
-
-static const ParserState DomainStructPrimitive =
-    ParserStateBuilder()
-        .parents({&DomainStruct, &DomainAnnotation})
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainPrimitiveHandler::create)
-        .arguments(
-            {Argument::String("name", ""), Argument::Bool("isSubtree", false),
-             Argument::Bool("optional", false), Argument::String("type")});
-
-static const ParserState DomainStructChild =
-    ParserStateBuilder()
-        .parent(&DomainField)
-        .elementHandler(DomainChildHandler::create)
-        .arguments({Argument::String("ref")});
-
-static const ParserState DomainStructParent =
-    ParserStateBuilder()
-        .parent(&DomainStruct)
-        .createdNodeType(&RttiTypes::DomainParent)
-        .elementHandler(DomainParentHandler::create)
-        .arguments({Argument::String("ref")});
-
-static const ParserState DomainStructParentField =
-    ParserStateBuilder()
-        .parent(&DomainStructParent)
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainParentFieldHandler::create)
-        .arguments({Argument::String("name", ""),
-                    Argument::Bool("isSubtree", false),
-                    Argument::Bool("optional", false)});
-
-static const ParserState DomainStructParentFieldRef =
-    ParserStateBuilder()
-        .parent(&DomainStructParent)
-        .createdNodeType(&RttiTypes::FieldDescriptor)
-        .elementHandler(DomainParentFieldRefHandler::create)
-        .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)});
-
-/* Typesystem states */
-static const ParserState Typesystem =
-    ParserStateBuilder()
-        .parents({&None, &Domain})
-        .createdNodeType(&RttiTypes::Typesystem)
-        .elementHandler(TypesystemHandler::create)
-        .arguments({Argument::String("name", "")});
-
-static const ParserState TypesystemEnum =
-    ParserStateBuilder()
-        .parent(&Typesystem)
-        .createdNodeType(&RttiTypes::EnumType)
-        .elementHandler(TypesystemEnumHandler::create)
-        .arguments({Argument::String("name")});
-
-static const ParserState TypesystemEnumEntry =
-    ParserStateBuilder()
-        .parent(&TypesystemEnum)
-        .elementHandler(TypesystemEnumEntryHandler::create)
-        .arguments({});
-
-static const ParserState TypesystemStruct =
-    ParserStateBuilder()
-        .parent(&Typesystem)
-        .createdNodeType(&RttiTypes::StructType)
-        .elementHandler(TypesystemStructHandler::create)
-        .arguments({Argument::String("name"), Argument::String("parent", "")});
-
-static const ParserState TypesystemStructField =
-    ParserStateBuilder()
-        .parent(&TypesystemStruct)
-        .elementHandler(TypesystemStructFieldHandler::create)
-        .arguments({Argument::String("name"), Argument::String("type"),
-                    Argument::Any("default", Variant::fromObject(nullptr))});
-
-static const ParserState TypesystemConstant =
-    ParserStateBuilder()
-        .parent(&Typesystem)
-        .createdNodeType(&RttiTypes::Constant)
-        .elementHandler(TypesystemConstantHandler::create)
-        .arguments({Argument::String("name"), Argument::String("type"),
-                    Argument::Any("value")});
-
-/* Special states for import and include */
-static const ParserState Import =
-    ParserStateBuilder()
-        .parents({&Document, &Typesystem, &Domain})
-        .elementHandler(ImportHandler::create)
-        .arguments({Argument::String("rel", ""), Argument::String("type", ""),
-                    Argument::String("src", "")});
-
-static const ParserState Include =
-    ParserStateBuilder()
-        .parent(&All)
-        .elementHandler(IncludeHandler::create)
-        .arguments({Argument::String("rel", ""), Argument::String("type", ""),
-                    Argument::String("src", "")});
-
-static const std::multimap<std::string, const ParserState *> XmlStates{
-    {"document", &Document},
-    {"*", &DocumentChild},
-    {"domain", &Domain},
-    {"struct", &DomainStruct},
-    {"annotation", &DomainAnnotation},
-    {"attributes", &DomainAttributes},
-    {"attribute", &DomainAttribute},
-    {"field", &DomainField},
-    {"fieldRef", &DomainFieldRef},
-    {"primitive", &DomainStructPrimitive},
-    {"childRef", &DomainStructChild},
-    {"parentRef", &DomainStructParent},
-    {"field", &DomainStructParentField},
-    {"fieldRef", &DomainStructParentFieldRef},
-    {"typesystem", &Typesystem},
-    {"enum", &TypesystemEnum},
-    {"entry", &TypesystemEnumEntry},
-    {"struct", &TypesystemStruct},
-    {"field", &TypesystemStructField},
-    {"constant", &TypesystemConstant},
-    {"import", &Import},
-    {"include", &Include}};
+using namespace parser_stack;
+
+/**
+ * Class containing the actual OsxmlParser implementation.
+ */
+class OsxmlParserImplementation : public OsxmlEvents {
+private:
+	/**
+	 * Actual xml parser -- converts the xml stream into a set of events.
+	 */
+	OsxmlEventParser parser;
+
+	/**
+	 * Pushdown automaton responsible for converting the xml events into an
+	 * actual Node tree.
+	 */
+	Stack stack;
+
+public:
+	/**
+	 * Constructor of the OsxmlParserImplementation class.
+	 *
+	 * @param reader is a reference to the CharReader instance from which the
+	 * XML should be read.
+	 * @param ctx is a reference to the ParserContext instance that should be
+	 * used.
+	 */
+	OsxmlParserImplementation(CharReader &reader, ParserContext &ctx)
+	    : parser(reader, *this, ctx.getLogger()),
+	      stack(ctx, GenericParserStates)
+	{
+	}
+
+	/**
+	 * Starts the actual parsing process.
+	 */
+	void parse() { parser.parse(); }
+
+	void command(const Variant &name, const Variant::mapType &args) override
+	{
+		stack.command(name, args);
+		stack.fieldStart(true);
+	}
+
+	void annotationStart(const Variant &name,
+	                     const Variant::mapType &args) override
+	{
+		stack.annotationStart(name, args);
+		stack.fieldStart(true);
+	}
+
+	void annotationEnd(const Variant &className,
+	                   const Variant &elementName) override
+	{
+		stack.annotationEnd(className, elementName);
+	}
+
+	void fieldEnd() override { stack.fieldEnd(); }
+
+	void data(const Variant &data) override { stack.data(data); }
+};
+
+/* Class OsxmlParser */
+
+void OsxmlParser::doParse(CharReader &reader, ParserContext &ctx)
+{
+	OsxmlParserImplementation impl(reader, ctx);
+	impl.parse();
 }
-
-
 }
 
diff --git a/src/formats/osxml/OsxmlParser.hpp b/src/formats/osxml/OsxmlParser.hpp
index 281a49c..0fbf83c 100644
--- a/src/formats/osxml/OsxmlParser.hpp
+++ b/src/formats/osxml/OsxmlParser.hpp
@@ -17,7 +17,7 @@
 */
 
 /**
- * @file XmlParser.hpp
+ * @file OsxmlParser.hpp
  *
  * Contains the parser responsible for reading Ousía XML Documents (extension
  * oxd) and Ousía XML Modules (extension oxm).
diff --git a/test/formats/osxml/OsxmlParserTest.cpp b/test/formats/osxml/OsxmlParserTest.cpp
index 269a3f6..a2bd8b1 100644
--- a/test/formats/osxml/OsxmlParserTest.cpp
+++ b/test/formats/osxml/OsxmlParserTest.cpp
@@ -30,7 +30,7 @@
 #include <core/StandaloneEnvironment.hpp>
 
 #include <plugins/filesystem/FileLocator.hpp>
-#include <formats/osdmx/OsdmxParser.hpp>
+#include <formats/osxml/OsxmlParser.hpp>
 
 namespace ousia {
 
@@ -41,7 +41,7 @@ extern const Rtti Typesystem;
 }
 
 struct XmlStandaloneEnvironment : public StandaloneEnvironment {
-	XmlParser xmlParser;
+	OsxmlParser parser;
 	FileLocator fileLocator;
 
 	XmlStandaloneEnvironment(ConcreteLogger &logger)
@@ -52,21 +52,21 @@ struct XmlStandaloneEnvironment : public StandaloneEnvironment {
 
 		registry.registerDefaultExtensions();
 		registry.registerParser({"text/vnd.ousia.oxm", "text/vnd.ousia.oxd"},
-		                        {&RttiTypes::Node}, &xmlParser);
+		                        {&RttiTypes::Node}, &parser);
 		registry.registerResourceLocator(&fileLocator);
 	}
 };
 
 static TerminalLogger logger(std::cerr, true);
 
-TEST(XmlParser, mismatchedTag)
+TEST(OsxmlParser, mismatchedTag)
 {
 	XmlStandaloneEnvironment env(logger);
 	env.parse("mismatchedTag.oxm", "", "", RttiSet{&RttiTypes::Document});
 	ASSERT_TRUE(logger.hasError());
 }
 
-TEST(XmlParser, generic)
+TEST(OsxmlParser, generic)
 {
 	XmlStandaloneEnvironment env(logger);
 	env.parse("generic.oxm", "", "", RttiSet{&RttiTypes::Node});
@@ -186,7 +186,7 @@ static void checkFieldDescriptor(
     Handle<Type> primitiveType = nullptr, bool optional = false)
 {
 	auto res = desc->resolve(&RttiTypes::FieldDescriptor, name);
-	ASSERT_EQ(1, res.size());
+	ASSERT_EQ(1U, res.size());
 	checkFieldDescriptor(res[0].node, name, parent, children, type,
 	                     primitiveType, optional);
 }
@@ -201,7 +201,7 @@ static void checkFieldDescriptor(
 	                     optional);
 }
 
-TEST(XmlParser, domainParsing)
+TEST(OsxmlParser, domainParsing)
 {
 	XmlStandaloneEnvironment env(logger);
 	Rooted<Node> book_domain_node =
@@ -339,10 +339,10 @@ static void checkText(Handle<Node> p, Handle<Node> expectedParent,
 {
 	checkStructuredEntity(p, expectedParent, doc, "paragraph");
 	Rooted<StructuredEntity> par = p.cast<StructuredEntity>();
-	ASSERT_EQ(1, par->getField().size());
+	ASSERT_EQ(1U, par->getField().size());
 	checkStructuredEntity(par->getField()[0], par, doc, "text");
 	Rooted<StructuredEntity> text = par->getField()[0].cast<StructuredEntity>();
-	ASSERT_EQ(1, text->getField().size());
+	ASSERT_EQ(1U, text->getField().size());
 
 	Handle<StructureNode> d = text->getField()[0];
 	ASSERT_FALSE(d == nullptr);
@@ -352,7 +352,7 @@ static void checkText(Handle<Node> p, Handle<Node> expectedParent,
 	ASSERT_EQ(expected, prim->getContent());
 }
 
-TEST(XmlParser, documentParsing)
+TEST(OsxmlParser, documentParsing)
 {
 	XmlStandaloneEnvironment env(logger);
 	Rooted<Node> book_document_node =
@@ -364,7 +364,7 @@ TEST(XmlParser, documentParsing)
 	checkStructuredEntity(doc->getRoot(), doc, doc, "book");
 	{
 		Rooted<StructuredEntity> book = doc->getRoot();
-		ASSERT_EQ(2, book->getField().size());
+		ASSERT_EQ(2U, book->getField().size());
 		checkText(book->getField()[0], book, doc,
 		          "This might be some introductory text or a dedication.");
 		checkStructuredEntity(book->getField()[1], book, doc, "chapter",
@@ -372,7 +372,7 @@ TEST(XmlParser, documentParsing)
 		{
 			Rooted<StructuredEntity> chapter =
 			    book->getField()[1].cast<StructuredEntity>();
-			ASSERT_EQ(3, chapter->getField().size());
+			ASSERT_EQ(3U, chapter->getField().size());
 			checkText(chapter->getField()[0], chapter, doc,
 			          "Here we might have an introduction to the chapter.");
 			checkStructuredEntity(chapter->getField()[1], chapter, doc,
@@ -381,7 +381,7 @@ TEST(XmlParser, documentParsing)
 			{
 				Rooted<StructuredEntity> section =
 				    chapter->getField()[1].cast<StructuredEntity>();
-				ASSERT_EQ(1, section->getField().size());
+				ASSERT_EQ(1U, section->getField().size());
 				checkText(section->getField()[0], section, doc,
 				          "Here we might find the actual section content.");
 			}
@@ -391,7 +391,7 @@ TEST(XmlParser, documentParsing)
 			{
 				Rooted<StructuredEntity> section =
 				    chapter->getField()[2].cast<StructuredEntity>();
-				ASSERT_EQ(1, section->getField().size());
+				ASSERT_EQ(1U, section->getField().size());
 				checkText(section->getField()[0], section, doc,
 				          "Here we might find the actual section content.");
 			}
-- 
cgit v1.2.3