From 6352751f87a4acdd9a32baf4f1133819678d06b0 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Tue, 10 Feb 2015 16:42:20 +0100
Subject: Renamed some files and changed folder structure

---
 src/formats/osdm/DynamicTokenizer.cpp         | 544 ++++++++++++++++++++++
 src/formats/osdm/DynamicTokenizer.hpp         | 252 ++++++++++
 src/formats/osdm/OsdmStreamParser.cpp         | 640 +++++++++++++++++++++++++
 src/formats/osdm/OsdmStreamParser.hpp         | 351 ++++++++++++++
 src/formats/osdm/TokenTrie.cpp                | 119 +++++
 src/formats/osdm/TokenTrie.hpp                | 150 ++++++
 src/plugins/plain/DynamicTokenizer.cpp        | 544 ----------------------
 src/plugins/plain/DynamicTokenizer.hpp        | 252 ----------
 src/plugins/plain/PlainFormatStreamReader.cpp | 641 --------------------------
 src/plugins/plain/PlainFormatStreamReader.hpp | 347 --------------
 src/plugins/plain/TokenTrie.cpp               | 119 -----
 src/plugins/plain/TokenTrie.hpp               | 150 ------
 12 files changed, 2056 insertions(+), 2053 deletions(-)
 create mode 100644 src/formats/osdm/DynamicTokenizer.cpp
 create mode 100644 src/formats/osdm/DynamicTokenizer.hpp
 create mode 100644 src/formats/osdm/OsdmStreamParser.cpp
 create mode 100644 src/formats/osdm/OsdmStreamParser.hpp
 create mode 100644 src/formats/osdm/TokenTrie.cpp
 create mode 100644 src/formats/osdm/TokenTrie.hpp
 delete mode 100644 src/plugins/plain/DynamicTokenizer.cpp
 delete mode 100644 src/plugins/plain/DynamicTokenizer.hpp
 delete mode 100644 src/plugins/plain/PlainFormatStreamReader.cpp
 delete mode 100644 src/plugins/plain/PlainFormatStreamReader.hpp
 delete mode 100644 src/plugins/plain/TokenTrie.cpp
 delete mode 100644 src/plugins/plain/TokenTrie.hpp

(limited to 'src')
diff --git a/src/formats/osdm/DynamicTokenizer.cpp b/src/formats/osdm/DynamicTokenizer.cpp
new file mode 100644
index 0000000..f2cfcd1
--- /dev/null
+++ b/src/formats/osdm/DynamicTokenizer.cpp
@@ -0,0 +1,544 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <memory>
+#include <vector>
+
+#include <core/common/CharReader.hpp>
+#include <core/common/Exceptions.hpp>
+#include <core/common/Utils.hpp>
+
+#include "DynamicTokenizer.hpp"
+
+namespace ousia {
+
+namespace {
+
+/* Internal class TokenMatch */
+
+/**
+ * Contains information about a matching token.
+ */
+struct TokenMatch {
+	/**
+	 * Token that was matched.
+	 */
+	DynamicToken token;
+
+	/**
+	 * Current length of the data within the text handler. The text buffer needs
+	 * to be trimmed to this length if this token matches.
+	 */
+	size_t textLength;
+
+	/**
+	 * End location of the current text handler. This location needs to be used
+	 * for the text token that is emitted before the actual token.
+	 */
+	size_t textEnd;
+
+	/**
+	 * Constructor of the TokenMatch class.
+	 */
+	TokenMatch() : textLength(0), textEnd(0) {}
+
+	/**
+	 * Returns true if this TokenMatch instance actually represents a match.
+	 */
+	bool hasMatch() { return token.type != EmptyToken; }
+};
+
+/* Internal class TokenLookup */
+
+/**
+ * The TokenLookup class is used to represent a thread in a running token
+ * lookup.
+ */
+class TokenLookup {
+private:
+	/**
+	 * Current node within the token trie.
+	 */
+	TokenTrie::Node const *node;
+
+	/**
+	 * Start offset within the source file.
+	 */
+	size_t start;
+
+	/**
+	 * Current length of the data within the text handler. The text buffer needs
+	 * to be trimmed to this length if this token matches.
+	 */
+	size_t textLength;
+
+	/**
+	 * End location of the current text handler. This location needs to be used
+	 * for the text token that is emitted before the actual token.
+	 */
+	size_t textEnd;
+
+public:
+	/**
+	 * Constructor of the TokenLookup class.
+	 *
+	 * @param node is the current node.
+	 * @param start is the start position.
+	 * @param textLength is the text buffer length of the previous text token.
+	 * @param textEnd is the current end location of the previous text token.
+	 */
+	TokenLookup(const TokenTrie::Node *node, size_t start,
+	            size_t textLength, size_t textEnd)
+	    : node(node), start(start), textLength(textLength), textEnd(textEnd)
+	{
+	}
+
+	/**
+	 * Tries to extend the current path in the token trie with the given
+	 * character. If a complete token is matched, stores this match in the
+	 * tokens list (in case it is longer than any previous token).
+	 *
+	 * @param c is the character that should be appended to the current prefix.
+	 * @param lookups is a list to which new TokeLookup instances are added --
+	 * which could potentially be expanded in the next iteration.
+	 * @param match is the DynamicToken instance to which the matching token
+	 * should be written.
+	 * @param tokens is a reference at the internal token list of the
+	 * DynamicTokenizer.
+	 * @param end is the end byte offset of the current character.
+	 * @param sourceId is the source if of this file.
+	 */
+	void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
+	             const std::vector<std::string> &tokens, SourceOffset end,
+	             SourceId sourceId)
+	{
+		// Check whether we can continue the current token path with the given
+		// character without visiting an already visited node
+		auto it = node->children.find(c);
+		if (it == node->children.end()) {
+			return;
+		}
+
+		// Check whether the new node represents a complete token a whether it
+		// is longer than the current token. If yes, replace the current token.
+		node = it->second.get();
+		if (node->type != EmptyToken) {
+			const std::string &str = tokens[node->type];
+			size_t len = str.size();
+			if (len > match.token.content.size()) {
+				match.token =
+				    DynamicToken{node->type, str, {sourceId, start, end}};
+				match.textLength = textLength;
+				match.textEnd = textEnd;
+			}
+		}
+
+		// If this state can possibly be advanced, store it in the states list.
+		if (!node->children.empty()) {
+			lookups.emplace_back(*this);
+		}
+	}
+};
+
+/* Internal class TextHandlerBase */
+
+/**
+ * Base class used for those classes that may be used as TextHandler in the
+ * DynamicTokenizer::next function.
+ */
+class TextHandlerBase {
+public:
+	/**
+	 * Start position of the extracted text.
+	 */
+	size_t textStart;
+
+	/**
+	 * End position of the extracted text.
+	 */
+	size_t textEnd;
+
+	/**
+	 * Buffer containing the extracted text.
+	 */
+	std::vector<char> textBuf;
+
+	/**
+	 * Constructor of the TextHandlerBase base class. Initializes the start and
+	 * end position with zeros.
+	 */
+	TextHandlerBase() : textStart(0), textEnd(0) {}
+
+	/**
+	 * Transforms the given token into a text token containing the extracted
+	 * text.
+	 *
+	 * @param token is the output token to which the text should be written.
+	 * @param sourceId is the source id of the underlying file.
+	 */
+	void buildTextToken(TokenMatch &match, SourceId sourceId)
+	{
+		if (match.hasMatch()) {
+			match.token.content =
+			    std::string{textBuf.data(), match.textLength};
+			match.token.location =
+			    SourceLocation{sourceId, textStart, match.textEnd};
+		} else {
+			match.token.content = std::string{textBuf.data(), textBuf.size()};
+			match.token.location = SourceLocation{sourceId, textStart, textEnd};
+		}
+		match.token.type = TextToken;
+	}
+
+	/**
+	 * Returns true if this whitespace handler has found any text and a text
+	 * token could be emitted.
+	 *
+	 * @return true if the internal data buffer is non-empty.
+	 */
+	bool hasText() { return !textBuf.empty(); }
+};
+
+/* Internal class PreservingTextHandler */
+
+/**
+ * The PreservingTextHandler class preserves all characters unmodified,
+ * including whitepace characters.
+ */
+class PreservingTextHandler : public TextHandlerBase {
+public:
+	using TextHandlerBase::TextHandlerBase;
+
+	/**
+	 * Appends the given character to the internal text buffer, does not
+	 * eliminate whitespace.
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 */
+	void append(char c, size_t start, size_t end)
+	{
+		if (textBuf.empty()) {
+			textStart = start;
+		}
+		textEnd = end;
+		textBuf.push_back(c);
+	}
+};
+
+/* Internal class TrimmingTextHandler */
+
+/**
+ * The TrimmingTextHandler class trims all whitespace characters at the begin
+ * and the end of a text section but leaves all other characters unmodified,
+ * including whitepace characters.
+ */
+class TrimmingTextHandler : public TextHandlerBase {
+public:
+	using TextHandlerBase::TextHandlerBase;
+
+	/**
+	 * Buffer used internally to temporarily store all whitespace characters.
+	 * They are only added to the output buffer if another non-whitespace
+	 * character is reached.
+	 */
+	std::vector<char> whitespaceBuf;
+
+	/**
+	 * Appends the given character to the internal text buffer, eliminates
+	 * whitespace characters at the begin and end of the text.
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 */
+	void append(char c, size_t start, size_t end)
+	{
+		// Handle whitespace characters
+		if (Utils::isWhitespace(c)) {
+			if (!textBuf.empty()) {
+				whitespaceBuf.push_back(c);
+			}
+			return;
+		}
+
+		// Set the start and end offset correctly
+		if (textBuf.empty()) {
+			textStart = start;
+		}
+		textEnd = end;
+
+		// Store the character
+		if (!whitespaceBuf.empty()) {
+			textBuf.insert(textBuf.end(), whitespaceBuf.begin(),
+			               whitespaceBuf.end());
+			whitespaceBuf.clear();
+		}
+		textBuf.push_back(c);
+	}
+};
+
+/* Internal class CollapsingTextHandler */
+
+/**
+ * The CollapsingTextHandler trims characters at the beginning and end of the
+ * text and reduced multiple whitespace characters to a single blank.
+ */
+class CollapsingTextHandler : public TextHandlerBase {
+public:
+	using TextHandlerBase::TextHandlerBase;
+
+	/**
+	 * Flag set to true if a whitespace character was reached.
+	 */
+	bool hasWhitespace = false;
+
+	/**
+	 * Appends the given character to the internal text buffer, eliminates
+	 * redundant whitespace characters.
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 */
+	void append(char c, size_t start, size_t end)
+	{
+		// Handle whitespace characters
+		if (Utils::isWhitespace(c)) {
+			if (!textBuf.empty()) {
+				hasWhitespace = true;
+			}
+			return;
+		}
+
+		// Set the start and end offset correctly
+		if (textBuf.empty()) {
+			textStart = start;
+		}
+		textEnd = end;
+
+		// Store the character
+		if (hasWhitespace) {
+			textBuf.push_back(' ');
+			hasWhitespace = false;
+		}
+		textBuf.push_back(c);
+	}
+};
+}
+
+/* Class DynamicTokenizer */
+
+DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode)
+    : whitespaceMode(whitespaceMode), nextTokenTypeId(0)
+{
+}
+
+template <typename TextHandler, bool read>
+bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
+{
+	// If we're in the read mode, reset the char reader peek position to the
+	// current read position
+	if (read) {
+		reader.resetPeek();
+	}
+
+	// Prepare the lookups in the token trie
+	const TokenTrie::Node *root = trie.getRoot();
+	TokenMatch match;
+	std::vector<TokenLookup> lookups;
+	std::vector<TokenLookup> nextLookups;
+
+	// Instantiate the text handler
+	TextHandler textHandler;
+
+	// Peek characters from the reader and try to advance the current token tree
+	// cursor
+	char c;
+	size_t charStart = reader.getPeekOffset();
+	const SourceId sourceId = reader.getSourceId();
+	while (reader.peek(c)) {
+		const size_t charEnd = reader.getPeekOffset();
+		const size_t textLength = textHandler.textBuf.size();
+		const size_t textEnd = textHandler.textEnd;
+
+		// If we do not have a match yet, start a new lookup from the root
+		if (!match.hasMatch()) {
+			TokenLookup{root, charStart, textLength, textEnd}.advance(
+			    c, nextLookups, match, tokens, charEnd, sourceId);
+		}
+
+		// Try to advance all other lookups with the new character
+		for (TokenLookup &lookup : lookups) {
+			lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId);
+		}
+
+		// We have found a token and there are no more states to advance or the
+		// text handler has found something -- abort to return the new token
+		if (match.hasMatch()) {
+			if ((nextLookups.empty() || textHandler.hasText())) {
+				break;
+			}
+		} else {
+			// Record all incomming characters
+			textHandler.append(c, charStart, charEnd);
+		}
+
+		// Swap the lookups and the nextLookups list
+		lookups = std::move(nextLookups);
+		nextLookups.clear();
+
+		// Advance the offset
+		charStart = charEnd;
+	}
+
+	// If we found text, emit that text
+	if (textHandler.hasText() &&
+	    (!match.hasMatch() || match.textLength > 0)) {
+		textHandler.buildTextToken(match, sourceId);
+	}
+
+	// Move the read/peek cursor to the end of the token, abort if an error
+	// happens while doing so
+	if (match.hasMatch()) {
+		// Make sure we have a valid location
+		if (match.token.location.getEnd() == InvalidSourceOffset) {
+			throw OusiaException{"Token end position offset out of range"};
+		}
+
+		// Seek to the end of the current token
+		const size_t end = match.token.location.getEnd();
+		if (read) {
+			reader.seek(end);
+		} else {
+			reader.seekPeekCursor(end);
+		}
+		token = match.token;
+	} else {
+		token = DynamicToken{};
+	}
+	return match.hasMatch();
+}
+
+bool DynamicTokenizer::read(CharReader &reader,DynamicToken &token)
+{
+	switch (whitespaceMode) {
+		case WhitespaceMode::PRESERVE:
+			return next<PreservingTextHandler, true>(reader, token);
+		case WhitespaceMode::TRIM:
+			return next<TrimmingTextHandler, true>(reader, token);
+		case WhitespaceMode::COLLAPSE:
+			return next<CollapsingTextHandler, true>(reader, token);
+	}
+	return false;
+}
+
+bool DynamicTokenizer::peek(CharReader &reader,DynamicToken &token)
+{
+	switch (whitespaceMode) {
+		case WhitespaceMode::PRESERVE:
+			return next<PreservingTextHandler, false>(reader, token);
+		case WhitespaceMode::TRIM:
+			return next<TrimmingTextHandler, false>(reader, token);
+		case WhitespaceMode::COLLAPSE:
+			return next<CollapsingTextHandler, false>(reader, token);
+	}
+	return false;
+}
+
+TokenTypeId DynamicTokenizer::registerToken(const std::string &token)
+{
+	// Abort if an empty token should be registered
+	if (token.empty()) {
+		return EmptyToken;
+	}
+
+	// Search for a new slot in the tokens list
+	TokenTypeId type = EmptyToken;
+	for (size_t i = nextTokenTypeId; i < tokens.size(); i++) {
+		if (tokens[i].empty()) {
+			tokens[i] = token;
+			type = i;
+			break;
+		}
+	}
+
+	// No existing slot was found, add a new one -- make sure we do not
+	// override the special token type handles
+	if (type == EmptyToken) {
+		type = tokens.size();
+		if (type == TextToken || type == EmptyToken) {
+			throw OusiaException{"Token type ids depleted!"};
+		}
+		tokens.emplace_back(token);
+	}
+	nextTokenTypeId = type + 1;
+
+	// Try to register the token in the trie -- if this fails, remove it
+	// from the tokens list
+	if (!trie.registerToken(token, type)) {
+		tokens[type] = std::string();
+		nextTokenTypeId = type;
+		return EmptyToken;
+	}
+	return type;
+}
+
+bool DynamicTokenizer::unregisterToken(TokenTypeId type)
+{
+	// Unregister the token from the trie, abort if an invalid type is given
+	if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
+		tokens[type] = std::string{};
+		nextTokenTypeId = type;
+		return true;
+	}
+	return false;
+}
+
+std::string DynamicTokenizer::getTokenString(TokenTypeId type)
+{
+	if (type < tokens.size()) {
+		return tokens[type];
+	}
+	return std::string{};
+}
+
+void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode)
+{
+	whitespaceMode = mode;
+}
+
+WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; }
+
+/* Explicitly instantiate all possible instantiations of the "next" member
+   function */
+template bool DynamicTokenizer::next<PreservingTextHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<TrimmingTextHandler, false>(
+    CharReader &reader, DynamicToken &token);
+template bool DynamicTokenizer::next<CollapsingTextHandler, false>(
+    CharReader &reader,DynamicToken &token);
+template bool DynamicTokenizer::next<PreservingTextHandler, true>(
+    CharReader &reader,DynamicToken &token);
+template bool DynamicTokenizer::next<TrimmingTextHandler, true>(
+    CharReader &reader,DynamicToken &token);
+template bool DynamicTokenizer::next<CollapsingTextHandler, true>(
+    CharReader &reader,DynamicToken &token);
+}
+
diff --git a/src/formats/osdm/DynamicTokenizer.hpp b/src/formats/osdm/DynamicTokenizer.hpp
new file mode 100644
index 0000000..0cac2e8
--- /dev/null
+++ b/src/formats/osdm/DynamicTokenizer.hpp
@@ -0,0 +1,252 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file DynamicTokenizer.hpp
+ *
+ * Tokenizer that can be reconfigured at runtime used for parsing the plain
+ * text format.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_
+#define _OUSIA_DYNAMIC_TOKENIZER_HPP_
+
+#include <set>
+#include <string>
+#include <vector>
+
+#include <core/common/Location.hpp>
+
+#include "TokenTrie.hpp"
+
+namespace ousia {
+
+// Forward declarations
+class CharReader;
+
+/**
+ * The DynamicToken structure describes a token discovered by the Tokenizer.
+ */
+struct DynamicToken {
+	/**
+	 * Id of the type of this token.
+	 */
+	TokenTypeId type;
+
+	/**
+	 * String that was matched.
+	 */
+	std::string content;
+
+	/**
+	 * Location from which the string was extracted.
+	 */
+	SourceLocation location;
+
+	/**
+	 * Default constructor.
+	 */
+	DynamicToken() : type(EmptyToken) {}
+
+	/**
+	 * Constructor of the DynamicToken struct.
+	 *
+	 * @param id represents the token type.
+	 * @param content is the string content that has been extracted.
+	 * @param location is the location of the extracted string content in the
+	 * source file.
+	 */
+	DynamicToken(TokenTypeId type, const std::string &content,
+	             SourceLocation location)
+	    : type(type), content(content), location(location)
+	{
+	}
+
+	/**
+	 * Constructor of the DynamicToken struct, only initializes the token type
+	 *
+	 * @param type is the id corresponding to the type of the token.
+	 */
+	DynamicToken(TokenTypeId type) : type(type) {}
+
+	/**
+	 * The getLocation function allows the tokens to be directly passed as
+	 * parameter to Logger or LoggableException instances.
+	 *
+	 * @return a reference at the location field
+	 */
+	const SourceLocation &getLocation() const { return location; }
+};
+
+/**
+ * Enum specifying the whitespace handling of the DynamicTokenizer class when
+ * reading non-token text.
+ */
+enum class WhitespaceMode {
+	/**
+     * Preserves all whitespaces as they are found in the source file.
+     */
+	PRESERVE,
+
+	/**
+     * Trims whitespace at the beginning and the end of the found text.
+     */
+	TRIM,
+
+	/**
+     * Whitespaces are trimmed and collapsed, multiple whitespace characters
+     * are replaced by a single space character.
+     */
+	COLLAPSE
+};
+
+/**
+ * The DynamicTokenizer is used to extract tokens and chunks of text from a
+ * CharReader. It allows to register and unregister tokens while parsing and
+ * to modify the handling of whitespace characters. Note that the
+ * DynamicTokenizer always tries to extract the longest possible token from the
+ * tokenizer.
+ */
+class DynamicTokenizer {
+private:
+	/**
+	 * Internally used token trie. This object holds all registered tokens.
+	 */
+	TokenTrie trie;
+
+	/**
+	 * Flag defining whether whitespaces should be preserved or not.
+	 */
+	WhitespaceMode whitespaceMode;
+
+	/**
+	 * Vector containing all registered token types.
+	 */
+	std::vector<std::string> tokens;
+
+	/**
+	 * Next index in the tokens list where to search for a new token id.
+	 */
+	size_t nextTokenTypeId;
+
+	/**
+	 * Templated function used internally to read the current token. The
+	 * function is templated in order to force code generation for all six
+	 * combiations of whitespace modes and reading/peeking.
+	 *
+	 * @tparam TextHandler is the type to be used for the textHandler instance.
+	 * @tparam read specifies whether the function should start from and advance
+	 * the read pointer of the char reader.
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
+	 * @param token is the token structure into which the token information
+	 * should be written.
+	 * @return false if the end of the stream has been reached, true otherwise.
+	 */
+	template <typename TextHandler, bool read>
+	bool next(CharReader &reader, DynamicToken &token);
+
+public:
+	/**
+	 * Constructor of the DynamicTokenizer class.
+	 *
+	 * @param whitespaceMode specifies how whitespace should be handled.
+	 */
+	DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
+
+	/**
+	 * Registers the given string as a token. Returns a const pointer at a
+	 * TokenDescriptor that will be used to reference the newly created token.
+	 *
+	 * @param token is the token string that should be registered.
+	 * @return a unique identifier for the registered token or EmptyToken if
+	 * an error occured.
+	 */
+	TokenTypeId registerToken(const std::string &token);
+
+	/**
+	 * Unregisters the token belonging to the given TokenTypeId.
+	 *
+	 * @param type is the token type that should be unregistered. The
+	 *TokenTypeId
+	 * must have been returned by registerToken.
+	 * @return true if the operation was successful, false otherwise (e.g.
+	 * because the given TokenDescriptor was already unregistered).
+	 */
+	bool unregisterToken(TokenTypeId type);
+
+	/**
+	 * Returns the token that was registered under the given TokenTypeId id or
+	 *an
+	 * empty string if an invalid TokenTypeId id is given.
+	 *
+	 * @param type is the TokenTypeId id for which the corresponding token
+	 *string
+	 * should be returned.
+	 * @return the registered token string or an empty string if the given type
+	 * was invalid.
+	 */
+	std::string getTokenString(TokenTypeId type);
+
+	/**
+	 * Sets the whitespace mode.
+	 *
+	 * @param whitespaceMode defines how whitespace should be treated in text
+	 * tokens.
+	 */
+	void setWhitespaceMode(WhitespaceMode mode);
+
+	/**
+	 * Returns the current value of the whitespace mode.
+	 *
+	 * @return the whitespace mode.
+	 */
+	WhitespaceMode getWhitespaceMode();
+
+	/**
+	 * Reads a new token from the CharReader and stores it in the given
+	 * DynamicToken instance.
+	 *
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
+	 * @param token is a reference at the token instance into which the Token
+	 * information should be written.
+	 * @return true if a token could be read, false if the end of the stream
+	 * has been reached.
+	 */
+	bool read(CharReader &reader, DynamicToken &token);
+
+	/**
+	 * The peek method does not advance the read position of the char reader,
+	 * but reads the next token from the current char reader peek position.
+	 *
+	 * @param reader is the CharReader instance from which the data should be
+	 * read.
+	 * @param token is a reference at the token instance into which the Token
+	 * information should be written.
+	 * @return true if a token could be read, false if the end of the stream
+	 * has been reached.
+	 */
+	bool peek(CharReader &reader, DynamicToken &token);
+};
+}
+
+#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */
+
diff --git a/src/formats/osdm/OsdmStreamParser.cpp b/src/formats/osdm/OsdmStreamParser.cpp
new file mode 100644
index 0000000..8cb8caf
--- /dev/null
+++ b/src/formats/osdm/OsdmStreamParser.cpp
@@ -0,0 +1,640 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <core/common/CharReader.hpp>
+#include <core/common/Logger.hpp>
+#include <core/common/Utils.hpp>
+#include <core/common/VariantReader.hpp>
+
+#include "OsdmStreamParser.hpp"
+
+namespace ousia {
+
+/**
+ * Plain format default tokenizer.
+ */
+class PlainFormatTokens : public DynamicTokenizer {
+public:
+	/**
+	 * Id of the backslash token.
+	 */
+	TokenTypeId Backslash;
+
+	/**
+	 * Id of the line comment token.
+	 */
+	TokenTypeId LineComment;
+
+	/**
+	 * Id of the block comment start token.
+	 */
+	TokenTypeId BlockCommentStart;
+
+	/**
+	 * Id of the block comment end token.
+	 */
+	TokenTypeId BlockCommentEnd;
+
+	/**
+	 * Id of the field start token.
+	 */
+	TokenTypeId FieldStart;
+
+	/**
+	 * Id of the field end token.
+	 */
+	TokenTypeId FieldEnd;
+
+	/**
+	 * Registers the plain format tokens in the internal tokenizer.
+	 */
+	PlainFormatTokens()
+	{
+		Backslash = registerToken("\\");
+		LineComment = registerToken("%");
+		BlockCommentStart = registerToken("%{");
+		BlockCommentEnd = registerToken("}%");
+		FieldStart = registerToken("{");
+		FieldEnd = registerToken("}");
+	}
+};
+
+static const PlainFormatTokens Tokens;
+
+/**
+ * Class used internally to collect data issued via "DATA" event.
+ */
+class DataHandler {
+private:
+	/**
+	 * Internal character buffer.
+	 */
+	std::vector<char> buf;
+
+	/**
+	 * Start location of the character data.
+	 */
+	SourceOffset start;
+
+	/**
+	 * End location of the character data.
+	 */
+	SourceOffset end;
+
+public:
+	/**
+	 * Default constructor, initializes start and end with zeros.
+	 */
+	DataHandler() : start(0), end(0) {}
+
+	/**
+	 * Returns true if the internal buffer is empty.
+	 *
+	 * @return true if no characters were added to the internal buffer, false
+	 * otherwise.
+	 */
+	bool isEmpty() { return buf.empty(); }
+
+	/**
+	 * Appends a single character to the internal buffer.
+	 *
+	 * @param c is the character that should be added to the internal buffer.
+	 * @param charStart is the start position of the character.
+	 * @param charEnd is the end position of the character.
+	 */
+	void append(char c, SourceOffset charStart, SourceOffset charEnd)
+	{
+		if (isEmpty()) {
+			start = charStart;
+		}
+		buf.push_back(c);
+		end = charEnd;
+	}
+
+	/**
+	 * Appends a string to the internal buffer.
+	 *
+	 * @param s is the string that should be added to the internal buffer.
+	 * @param stringStart is the start position of the string.
+	 * @param stringEnd is the end position of the string.
+	 */
+	void append(const std::string &s, SourceOffset stringStart,
+	            SourceOffset stringEnd)
+	{
+		if (isEmpty()) {
+			start = stringStart;
+		}
+		std::copy(s.c_str(), s.c_str() + s.size(), back_inserter(buf));
+		end = stringEnd;
+	}
+
+	/**
+	 * Converts the internal buffer to a variant with attached location
+	 * information.
+	 *
+	 * @param sourceId is the source id which is needed for building the
+	 * location information.
+	 * @return a Variant with the internal buffer content as string and
+	 * the correct start and end location.
+	 */
+	Variant toVariant(SourceId sourceId)
+	{
+		Variant res = Variant::fromString(std::string(buf.data(), buf.size()));
+		res.setLocation({sourceId, start, end});
+		return res;
+	}
+};
+
+OsdmStreamParser::OsdmStreamParser(CharReader &reader, Logger &logger)
+    : reader(reader), logger(logger), tokenizer(Tokens)
+{
+	// Place an intial command representing the complete file on the stack
+	commands.push(Command{"", Variant::mapType{}, true, true, true});
+}
+
+Variant OsdmStreamParser::parseIdentifier(size_t start, bool allowNSSep)
+{
+	bool first = true;
+	bool hasCharSiceNSSep = false;
+	std::vector<char> identifier;
+	size_t end = reader.getPeekOffset();
+	char c, c2;
+	while (reader.peek(c)) {
+		// Abort if this character is not a valid identifer character
+		if ((first && Utils::isIdentifierStartCharacter(c)) ||
+		    (!first && Utils::isIdentifierCharacter(c))) {
+			identifier.push_back(c);
+		} else if (c == ':' && hasCharSiceNSSep && reader.fetchPeek(c2) &&
+		           Utils::isIdentifierStartCharacter(c2)) {
+			identifier.push_back(c);
+		} else {
+			if (c == ':' && allowNSSep) {
+				logger.error(
+				    "Expected character before and after namespace separator "
+				    "\":\"",
+				    reader);
+			}
+			reader.resetPeek();
+			break;
+		}
+
+		// This is no longer the first character
+		first = false;
+
+		// Advance the hasCharSiceNSSep flag
+		hasCharSiceNSSep = allowNSSep && (c != ':');
+
+		end = reader.getPeekOffset();
+		reader.consumePeek();
+	}
+
+	// Return the identifier at its location
+	Variant res =
+	    Variant::fromString(std::string(identifier.data(), identifier.size()));
+	res.setLocation({reader.getSourceId(), start, end});
+	return res;
+}
+
+OsdmStreamParser::State OsdmStreamParser::parseBeginCommand()
+{
+	// Expect a '{' after the command
+	reader.consumeWhitespace();
+	if (!reader.expect('{')) {
+		logger.error("Expected \"{\" after \\begin", reader);
+		return State::NONE;
+	}
+
+	// Parse the name of the command that should be opened
+	Variant commandName = parseIdentifier(reader.getOffset(), true);
+	if (commandName.asString().empty()) {
+		logger.error("Expected identifier", commandName);
+		return State::ERROR;
+	}
+
+	// Check whether the next character is a '#', indicating the start of the
+	// command name
+	Variant commandArgName;
+	SourceOffset start = reader.getOffset();
+	if (reader.expect('#')) {
+		commandArgName = parseIdentifier(start);
+		if (commandArgName.asString().empty()) {
+			logger.error("Expected identifier after \"#\"", commandArgName);
+		}
+	}
+
+	if (!reader.expect('}')) {
+		logger.error("Expected \"}\"", reader);
+		return State::ERROR;
+	}
+
+	// Parse the arguments
+	Variant commandArguments = parseCommandArguments(std::move(commandArgName));
+
+	// Push the command onto the command stack
+	pushCommand(std::move(commandName), std::move(commandArguments), true);
+
+	return State::COMMAND;
+}
+
+static bool checkStillInField(const OsdmStreamParser::Command &cmd,
+                              const Variant &endName, Logger &logger)
+{
+	if (cmd.inField && !cmd.inRangeField) {
+		logger.error(std::string("\\end in open field of command \"") +
+		                 cmd.name.asString() + std::string("\""),
+		             endName);
+		logger.note(std::string("Open command started here:"), cmd.name);
+		return true;
+	}
+	return false;
+}
+
+OsdmStreamParser::State OsdmStreamParser::parseEndCommand()
+{
+	// Expect a '{' after the command
+	if (!reader.expect('{')) {
+		logger.error("Expected \"{\" after \\end", reader);
+		return State::NONE;
+	}
+
+	// Fetch the name of the command that should be ended here
+	Variant name = parseIdentifier(reader.getOffset(), true);
+
+	// Make sure the given command name is not empty
+	if (name.asString().empty()) {
+		logger.error("Expected identifier", name);
+		return State::ERROR;
+	}
+
+	// Make sure the command name is terminated with a '}'
+	if (!reader.expect('}')) {
+		logger.error("Expected \"}\"", reader);
+		return State::ERROR;
+	}
+
+	// Unroll the command stack up to the last range command
+	while (!commands.top().hasRange) {
+		if (checkStillInField(commands.top(), name, logger)) {
+			return State::ERROR;
+		}
+		commands.pop();
+	}
+
+	// Make sure we're not in an open field of this command
+	if (checkStillInField(commands.top(), name, logger)) {
+		return State::ERROR;
+	}
+
+	// Special error message if the top-level command is reached
+	if (commands.size() == 1) {
+		logger.error(std::string("Cannot end command \"") + name.asString() +
+		                 std::string("\" here, no command open"),
+		             name);
+		return State::ERROR;
+	}
+
+	// Inform the about command mismatches
+	const Command &cmd = commands.top();
+	if (commands.top().name.asString() != name.asString()) {
+		logger.error(std::string("Trying to end command \"") +
+		                 cmd.name.asString() +
+		                 std::string("\", but open command is \"") +
+		                 name.asString() + std::string("\""),
+		             name);
+		logger.note("Last command was opened here:", cmd.name);
+		return State::ERROR;
+	}
+
+	// Set the location to the location of the command that was ended, then end
+	// the current command
+	location = name.getLocation();
+	commands.pop();
+	return cmd.inRangeField ? State::FIELD_END : State::NONE;
+}
+
+Variant OsdmStreamParser::parseCommandArguments(Variant commandArgName)
+{
+	// Parse the arguments using the universal VariantReader
+	Variant commandArguments;
+	if (reader.expect('[')) {
+		auto res = VariantReader::parseObject(reader, logger, ']');
+		commandArguments = res.second;
+	} else {
+		commandArguments = Variant::mapType{};
+	}
+
+	// Insert the parsed name, make sure "name" was not specified in the
+	// arguments
+	if (commandArgName.isString()) {
+		auto res =
+		    commandArguments.asMap().emplace("name", std::move(commandArgName));
+		if (!res.second) {
+			logger.error("Name argument specified multiple times",
+			             SourceLocation{}, MessageMode::NO_CONTEXT);
+			logger.note("First occurance is here: ", commandArgName);
+			logger.note("Second occurance is here: ", res.first->second);
+		}
+	}
+	return commandArguments;
+}
+
+void OsdmStreamParser::pushCommand(Variant commandName,
+                                   Variant commandArguments, bool hasRange)
+{
+	// Store the location on the stack
+	location = commandName.getLocation();
+
+	// Place the command on the command stack, remove the last commands if we're
+	// not currently inside a field of these commands
+	while (!commands.top().inField) {
+		commands.pop();
+	}
+	commands.push(Command{std::move(commandName), std::move(commandArguments),
+	                      hasRange, false, false});
+}
+
+OsdmStreamParser::State OsdmStreamParser::parseCommand(size_t start)
+{
+	// Parse the commandName as a first identifier
+	Variant commandName = parseIdentifier(start, true);
+	if (commandName.asString().empty()) {
+		logger.error("Empty command name", reader);
+		return State::NONE;
+	}
+
+	// Handle the special "begin" and "end" commands
+	const auto commandNameComponents =
+	    Utils::split(commandName.asString(), ':');
+	const bool isBegin = commandNameComponents[0] == "begin";
+	const bool isEnd = commandNameComponents[0] == "end";
+	if (isBegin || isEnd) {
+		if (commandNameComponents.size() > 1) {
+			logger.error(
+			    "Special commands \"\\begin\" and \"\\end\" may not contain a "
+			    "namespace separator \":\"",
+			    commandName);
+		}
+		if (isBegin) {
+			return parseBeginCommand();
+		} else if (isEnd) {
+			return parseEndCommand();
+		}
+	}
+
+	// Check whether the next character is a '#', indicating the start of the
+	// command name
+	Variant commandArgName;
+	start = reader.getOffset();
+	if (reader.expect('#')) {
+		commandArgName = parseIdentifier(start);
+		if (commandArgName.asString().empty()) {
+			logger.error("Expected identifier after \"#\"", commandArgName);
+		}
+	}
+
+	// Parse the arugments
+	Variant commandArguments = parseCommandArguments(std::move(commandArgName));
+
+	// Push the command onto the command stack
+	pushCommand(std::move(commandName), std::move(commandArguments), false);
+
+	return State::COMMAND;
+}
+
+void OsdmStreamParser::parseBlockComment()
+{
+	DynamicToken token;
+	size_t depth = 1;
+	while (tokenizer.read(reader, token)) {
+		if (token.type == Tokens.BlockCommentEnd) {
+			depth--;
+			if (depth == 0) {
+				return;
+			}
+		}
+		if (token.type == Tokens.BlockCommentStart) {
+			depth++;
+		}
+	}
+
+	// Issue an error if the file ends while we are in a block comment
+	logger.error("File ended while being in a block comment", reader);
+}
+
+void OsdmStreamParser::parseLineComment()
+{
+	char c;
+	while (reader.read(c)) {
+		if (c == '\n') {
+			return;
+		}
+	}
+}
+
+bool OsdmStreamParser::checkIssueData(DataHandler &handler)
+{
+	if (!handler.isEmpty()) {
+		data = handler.toVariant(reader.getSourceId());
+		location = data.getLocation();
+		reader.resetPeek();
+		return true;
+	}
+	return false;
+}
+
+bool OsdmStreamParser::checkIssueFieldStart()
+{
+	// Fetch the current command, and check whether we're currently inside a
+	// field of this command
+	Command &cmd = commands.top();
+	if (!cmd.inField) {
+		// If this is a range command, we're now implicitly inside the field of
+		// this command -- we'll have to issue a field start command!
+		if (cmd.hasRange) {
+			cmd.inField = true;
+			cmd.inRangeField = true;
+			reader.resetPeek();
+			return true;
+		}
+
+		// This was not a range command, so obviously we're now inside within
+		// a field of some command -- so unroll the commands stack until a
+		// command with open field is reached
+		while (!commands.top().inField) {
+			commands.pop();
+		}
+	}
+	return false;
+}
+
+OsdmStreamParser::State OsdmStreamParser::parse()
+{
+	// Handler for incomming data
+	DataHandler handler;
+
+	// Read tokens until the outer loop should be left
+	DynamicToken token;
+	while (tokenizer.peek(reader, token)) {
+		const TokenTypeId type = token.type;
+
+		// Special handling for Backslash and Text
+		if (type == Tokens.Backslash) {
+			// Before appending anything to the output data or starting a new
+			// command, check whether FIELD_START has to be issued, as the
+			// current command is a command with range
+			if (checkIssueFieldStart()) {
+				location = token.location;
+				return State::FIELD_START;
+			}
+
+			// Check whether a command starts now, without advancing the peek
+			// cursor
+			char c;
+			if (!reader.fetchPeek(c)) {
+				logger.error("Trailing backslash at the end of the file.",
+				             token);
+				return State::END;
+			}
+
+			// Try to parse a command
+			if (Utils::isIdentifierStartCharacter(c)) {
+				// Make sure to issue any data before it is to late
+				if (checkIssueData(handler)) {
+					return State::DATA;
+				}
+
+				// Parse the actual command
+				State res = parseCommand(token.location.getStart());
+				switch (res) {
+					case State::ERROR:
+						throw LoggableException(
+						    "Last error was irrecoverable, ending parsing "
+						    "process");
+					case State::NONE:
+						continue;
+					default:
+						return res;
+				}
+			}
+
+			// This was not a special character, just append the given character
+			// to the data buffer, use the escape character start as start
+			// location and the peek offset as end location
+			reader.peek(c);  // Peek the previously fetched character
+			handler.append(c, token.location.getStart(),
+			               reader.getPeekOffset());
+			reader.consumePeek();
+			continue;
+		} else if (type == TextToken) {
+			// Check whether FIELD_START has to be issued before appending text
+			if (checkIssueFieldStart()) {
+				location = token.location;
+				return State::FIELD_START;
+			}
+
+			// Append the text to the data handler
+			handler.append(token.content, token.location.getStart(),
+			               token.location.getEnd());
+
+			reader.consumePeek();
+			continue;
+		}
+
+		// A non-text token was reached, make sure all pending data commands
+		// have been issued
+		if (checkIssueData(handler)) {
+			return State::DATA;
+		}
+
+		// We will handle the token now, consume the peeked characters
+		reader.consumePeek();
+
+		// Update the location to the current token location
+		location = token.location;
+
+		if (token.type == Tokens.LineComment) {
+			parseLineComment();
+		} else if (token.type == Tokens.BlockCommentStart) {
+			parseBlockComment();
+		} else if (token.type == Tokens.FieldStart) {
+			Command &cmd = commands.top();
+			if (!cmd.inField) {
+				cmd.inField = true;
+				return State::FIELD_START;
+			}
+			logger.error(
+			    "Got field start token \"{\", but no command for which to "
+			    "start the field. Did you mean \"\\{\"?",
+			    token);
+		} else if (token.type == Tokens.FieldEnd) {
+			// Try to end an open field of the current command -- if the current
+			// command is not inside an open field, end this command and try to
+			// close the next one
+			for (int i = 0; i < 2 && commands.size() > 1; i++) {
+				Command &cmd = commands.top();
+				if (!cmd.inRangeField) {
+					if (cmd.inField) {
+						cmd.inField = false;
+						return State::FIELD_END;
+					}
+					commands.pop();
+				} else {
+					break;
+				}
+			}
+			logger.error(
+			    "Got field end token \"}\", but there is no field to end. Did "
+			    "you mean \"\\}\"?",
+			    token);
+		} else {
+			logger.error("Unexpected token \"" + token.content + "\"", token);
+		}
+	}
+
+	// Issue available data
+	if (checkIssueData(handler)) {
+		return State::DATA;
+	}
+
+	// Make sure all open commands and fields have been ended at the end of the
+	// stream
+	while (commands.size() > 1) {
+		Command &cmd = commands.top();
+		if (cmd.inField || cmd.hasRange) {
+			logger.error("Reached end of stream, but command \"" +
+			                 cmd.name.asString() + "\" has not been ended",
+			             cmd.name);
+		}
+		commands.pop();
+	}
+
+	location = SourceLocation{reader.getSourceId(), reader.getOffset()};
+	return State::END;
+}
+
+const Variant &OsdmStreamParser::getCommandName()
+{
+	return commands.top().name;
+}
+
+const Variant &OsdmStreamParser::getCommandArguments()
+{
+	return commands.top().arguments;
+}
+}
+
diff --git a/src/formats/osdm/OsdmStreamParser.hpp b/src/formats/osdm/OsdmStreamParser.hpp
new file mode 100644
index 0000000..48d8fb7
--- /dev/null
+++ b/src/formats/osdm/OsdmStreamParser.hpp
@@ -0,0 +1,351 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file OsdmStreamParser.hpp
+ *
+ * Provides classes for low-level classes for reading the TeX-esque osdm
+ * format. The class provided here does not build any model objects and does not
+ * implement the Parser interface.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_OSDM_STREAM_PARSER_HPP_
+#define _OUSIA_OSDM_STREAM_PARSER_HPP_
+
+#include <stack>
+
+#include <core/common/Variant.hpp>
+
+#include "DynamicTokenizer.hpp"
+
+namespace ousia {
+
+// Forward declarations
+class CharReader;
+class Logger;
+class DataHandler;
+
+/**
+ * The OsdmStreamParser class provides a low-level reader for the TeX-esque osdm
+ * format. The parser is constructed around a "parse" function, which reads data
+ * from the underlying CharReader until a new state is reached and indicates
+ * this state in a return value. The calling code then has to pull corresponding
+ * data from the stream reader. The reader makes sure the incommind file is
+ * syntactically valid and tries to recorver from most errors. If an error is
+ * irrecoverable (this is the case for errors with wrong nesting of commands or
+ * fields, as this would lead to too many consecutive errors) a
+ * LoggableException is thrown.
+ */
+class OsdmStreamParser {
+public:
+	/**
+	 * Enum used to indicate which state the OsdmStreamParser class is in
+	 * after calling the "parse" function.
+	 */
+	enum class State {
+		/**
+	     * State returned if a fully featured command has been read. A command
+	     * consists of the command name and its arguments (which optionally
+	     * includes the name).
+	     */
+		COMMAND,
+
+		/**
+	     * State returned if data is given. The reader must decide which field
+	     * or command this should be routed to. Trailing or leading whitespace
+	     * has been removed. Only called if the data is non-empty.
+	     */
+		DATA,
+
+		/**
+	     * A user-defined entity has been found. The entity sequence is stored
+	     * in the command name.
+	     */
+		ENTITY,
+
+		/**
+	     * State returned if an annotation was started. An annotation consists
+	     * of the command name and its arguments (which optionally include the
+	     * name).
+	     */
+		ANNOTATION_START,
+
+		/**
+	     * State returned if an annotation ends. The reader indicates which
+	     * annotation ends.
+	     */
+		ANNOTATION_END,
+
+		/**
+	     * State returned if a new field started. The reader assures that the
+	     * current field ends before a new field is started and that the field
+	     * is not started if data has been given outside of a field. The
+	     * field number is set to the current field index.
+	     */
+		FIELD_START,
+
+		/**
+	     * State returned if the current field ends. The reader assures that a
+	     * field was actually open.
+	     */
+		FIELD_END,
+
+		/**
+	     * The end of the stream has been reached.
+	     */
+		END,
+
+		/**
+	     * Returned from internal functions if nothing should be done.
+	     */
+		NONE,
+
+		/**
+	     * Returned from internal function to indicate irrecoverable errors.
+	     */
+		ERROR
+	};
+
+	/**
+	 * Entry used for the command stack.
+	 */
+	struct Command {
+		/**
+		 * Name and location of the current command.
+		 */
+		Variant name;
+
+		/**
+		 * Arguments that were passed to the command.
+		 */
+		Variant arguments;
+
+		/**
+		 * Set to true if this is a command with clear begin and end.
+		 */
+		bool hasRange;
+
+		/**
+		 * Set to true if we are currently inside a field of this command.
+		 */
+		bool inField;
+
+		/**
+		 * Set to true if we are currently in the range field of the command
+		 * (implies inField being set to true).
+		 */
+		bool inRangeField;
+
+		/**
+		 * Default constructor.
+		 */
+		Command() : hasRange(false), inField(false), inRangeField(false) {}
+
+		/**
+		 * Constructor of the Command class.
+		 *
+		 * @param name is a string variant with name and location of the
+		 * command.
+		 * @param arguments is a map variant with the arguments given to the
+		 * command.
+		 * @param hasRange should be set to true if this is a command with
+		 * explicit range.
+		 * @param inField is set to true if we currently are inside a field
+		 * of this command.
+		 * @param inRangeField is set to true if we currently inside the outer
+		 * field of the command.
+		 */
+		Command(Variant name, Variant arguments, bool hasRange, bool inField,
+		        bool inRangeField)
+		    : name(std::move(name)),
+		      arguments(std::move(arguments)),
+		      hasRange(hasRange),
+		      inField(inField),
+		      inRangeField(inRangeField)
+		{
+		}
+	};
+
+private:
+	/**
+	 * Reference to the CharReader instance from which the incomming bytes are
+	 * read.
+	 */
+	CharReader &reader;
+
+	/**
+	 * Reference at the logger instance to which all error messages are sent.
+	 */
+	Logger &logger;
+
+	/**
+	 * Tokenizer instance used to read individual tokens from the text.
+	 */
+	DynamicTokenizer tokenizer;
+
+	/**
+	 * Stack containing the current commands.
+	 */
+	std::stack<Command> commands;
+
+	/**
+	 * Variant containing the data that has been read (always is a string,
+	 * contains the exact location of the data in the source file).
+	 */
+	Variant data;
+
+	/**
+	 * Contains the location of the last token.
+	 */
+	SourceLocation location;
+
+	/**
+	 * Contains the field index of the current command.
+	 */
+	size_t fieldIdx;
+
+	/**
+	 * Function used internall to parse an identifier.
+	 *
+	 * @param start is the start byte offset of the identifier (including the
+	 * backslash).
+	 * @param allowNSSep should be set to true if the namespace separator is
+	 * allowed in the identifier name. Issues error if the namespace separator
+	 * is placed incorrectly.
+	 */
+	Variant parseIdentifier(size_t start, bool allowNSSep = false);
+
+	/**
+	 * Function used internally to handle the special "\begin" command.
+	 */
+	State parseBeginCommand();
+
+	/**
+	 * Function used internally to handle the special "\end" command.
+	 */
+	State parseEndCommand();
+
+	/**
+	 * Pushes the parsed command onto the command stack.
+	 */
+	void pushCommand(Variant commandName, Variant commandArguments,
+	                 bool hasRange);
+
+	/**
+	 * Parses the command arguments.
+	 */
+	Variant parseCommandArguments(Variant commandArgName);
+
+	/**
+	 * Function used internally to parse a command.
+	 *
+	 * @param start is the start byte offset of the command (including the
+	 * backslash)
+	 * @return true if a command was actuall parsed, false otherwise.
+	 */
+	State parseCommand(size_t start);
+
+	/**
+	 * Function used internally to parse a block comment.
+	 */
+	void parseBlockComment();
+
+	/**
+	 * Function used internally to parse a generic comment.
+	 */
+	void parseLineComment();
+
+	/**
+	 * Checks whether there is any data pending to be issued, if yes, issues it.
+	 *
+	 * @param handler is the data handler that contains the data that may be
+	 * returned to the user.
+	 * @return true if there was any data and DATA should be returned by the
+	 * parse function, false otherwise.
+	 */
+	bool checkIssueData(DataHandler &handler);
+
+	/**
+	 * Called before any data is appended to the internal data handler. Checks
+	 * whether a new field should be started or implicitly ended.
+	 *
+	 * @return true if FIELD_START should be returned by the parse function.
+	 */
+	bool checkIssueFieldStart();
+
+public:
+	/**
+	 * Constructor of the OsdmStreamParser class. Attaches the new
+	 * OsdmStreamParser to the given CharReader and Logger instances.
+	 *
+	 * @param reader is the reader instance from which incomming characters
+	 * should be read.
+	 * @param logger is the logger instance to which errors should be written.
+	 */
+	OsdmStreamParser(CharReader &reader, Logger &logger);
+
+	/**
+	 * Continues parsing. Returns one of the states defined in the State enum.
+	 * Callers should stop once the State::END state is reached. Use the getter
+	 * functions to get more information about the current state, such as the
+	 * command name or the data or the current field index.
+	 *
+	 * @return the new state the parser has reached.
+	 */
+	State parse();
+
+	/**
+	 * Returns a reference at the internally stored data. Only valid if
+	 * State::DATA was returned by the "parse" function.
+	 *
+	 * @return a reference at a variant containing the data parsed by the
+	 * "parse" function.
+	 */
+	const Variant &getData() { return data; }
+
+	/**
+	 * Returns a reference at the internally stored command name. Only valid if
+	 * State::COMMAND was returned by the "parse" function.
+	 *
+	 * @return a reference at a variant containing name and location of the
+	 * parsed command.
+	 */
+	const Variant &getCommandName();
+
+	/**
+	 * Returns a reference at the internally stored command name. Only valid if
+	 * State::COMMAND was returned by the "parse" function.
+	 *
+	 * @return a reference at a variant containing arguments given to the
+	 * command.
+	 */
+	const Variant &getCommandArguments();
+
+	/**
+	 * Returns a reference at the char reader.
+	 *
+	 * @return the last internal token location.
+	 */
+	SourceLocation &getLocation() { return location; }
+};
+}
+
+#endif /* _OUSIA_OSDM_STREAM_PARSER_HPP_ */
+
diff --git a/src/formats/osdm/TokenTrie.cpp b/src/formats/osdm/TokenTrie.cpp
new file mode 100644
index 0000000..4a0430b
--- /dev/null
+++ b/src/formats/osdm/TokenTrie.cpp
@@ -0,0 +1,119 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "TokenTrie.hpp"
+
+namespace ousia {
+
+/* Class DynamicTokenTree::Node */
+
+TokenTrie::Node::Node() : type(EmptyToken) {}
+
+/* Class DynamicTokenTree */
+
+bool TokenTrie::registerToken(const std::string &token,
+                              TokenTypeId type) noexcept
+{
+	// Abort if the token is empty -- this would taint the root node
+	if (token.empty()) {
+		return false;
+	}
+
+	// Iterate over each character in the given string and insert them as
+	// (new) nodes
+	Node *node = &root;
+	for (size_t i = 0; i < token.size(); i++) {
+		// Insert a new node if this one does not exist
+		const char c = token[i];
+		auto it = node->children.find(c);
+		if (it == node->children.end()) {
+			it = node->children.emplace(c, std::make_shared<Node>()).first;
+		}
+		node = it->second.get();
+	}
+
+	// If the resulting node already has a type set, we're screwed.
+	if (node->type != EmptyToken) {
+		return false;
+	}
+
+	// Otherwise just set the type to the given type.
+	node->type = type;
+	return true;
+}
+
+bool TokenTrie::unregisterToken(const std::string &token) noexcept
+{
+	// We cannot remove empty tokens as we need to access the fist character
+	// upfront
+	if (token.empty()) {
+		return false;
+	}
+
+	// First pass -- search the node in the path that can be deleted
+	Node *subtreeRoot = &root;
+	char subtreeKey = token[0];
+	Node *node = &root;
+	for (size_t i = 0; i < token.size(); i++) {
+		// Go to the next node, abort if the tree ends unexpectedly
+		auto it = node->children.find(token[i]);
+		if (it == node->children.end()) {
+			return false;
+		}
+
+		// Reset the subtree handler if this node has another type
+		node = it->second.get();
+		if ((node->type != EmptyToken || node->children.size() > 1) &&
+		    (i + 1 != token.size())) {
+			subtreeRoot = node;
+			subtreeKey = token[i + 1];
+		}
+	}
+
+	// If the node type is already EmptyToken, we cannot do anything here
+	if (node->type == EmptyToken) {
+		return false;
+	}
+
+	// If the target node has children, we cannot delete the subtree. Set the
+	// type to EmptyToken instead
+	if (!node->children.empty()) {
+		node->type = EmptyToken;
+		return true;
+	}
+
+	// If we end up here, we can safely delete the complete subtree
+	subtreeRoot->children.erase(subtreeKey);
+	return true;
+}
+
+TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept
+{
+	Node const *node = &root;
+	for (size_t i = 0; i < token.size(); i++) {
+		const char c = token[i];
+		auto it = node->children.find(c);
+		if (it == node->children.end()) {
+			return EmptyToken;
+		}
+		node = it->second.get();
+	}
+	return node->type;
+}
+}
+
diff --git a/src/formats/osdm/TokenTrie.hpp b/src/formats/osdm/TokenTrie.hpp
new file mode 100644
index 0000000..36c2ffa
--- /dev/null
+++ b/src/formats/osdm/TokenTrie.hpp
@@ -0,0 +1,150 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file TokenTrie.hpp
+ *
+ * Class representing a token trie that can be updated dynamically.
+ *
+ * @author Benjamin Paaßen (astoecke@techfak.uni-bielefeld.de)
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_TOKEN_TRIE_HPP_
+#define _OUSIA_TOKEN_TRIE_HPP_
+
+#include <cstdint>
+#include <memory>
+#include <limits>
+#include <unordered_map>
+
+namespace ousia {
+
+/**
+ * The TokenTypeId is used to give each token type a unique id.
+ */
+using TokenTypeId = uint32_t;
+
+/**
+ * Token which is not a token.
+ */
+constexpr TokenTypeId EmptyToken = std::numeric_limits<TokenTypeId>::max();
+
+/**
+ * Token which represents a text token.
+ */
+constexpr TokenTypeId TextToken = std::numeric_limits<TokenTypeId>::max() - 1;
+
+/**
+ * The Tokenizer internally uses a TokenTrie to be efficiently able to identify
+ * the longest consecutive token in the text. This is equivalent to a prefix
+ * trie.
+ *
+ * A token trie is a construct that structures all special tokens a Tokenizer
+ * recognizes. Consider the tokens "aab", "a" and "bac" numbered as one, two and
+ * three. Then the token tree would look like this:
+ *
+ * \code{*.txt}
+ *        ~ (0)
+ *       /     \
+ *      a (2)  b (0)
+ *      |      |
+ *      a (0)  a (0)
+ *      |      |
+ *      b (1)  c (0)
+ * \endcode
+ *
+ * Where the number indicates the corresponding token descriptor identifier.
+ */
+class TokenTrie {
+public:
+	/**
+	 * Structure used to build the node tree.
+	 */
+	struct Node {
+		/**
+		 * Type used for the child map.
+		 */
+		using ChildMap = std::unordered_map<char, std::shared_ptr<Node>>;
+
+		/**
+		 * Map from single characters at the corresponding child nodes.
+		 */
+		ChildMap children;
+
+		/**
+		 * Reference at the corresponding token descriptor. Set to nullptr if
+		 * no token is attached to this node.
+		 */
+		TokenTypeId type;
+
+		/**
+		 * Default constructor, initializes the descriptor with nullptr.
+		 */
+		Node();
+	};
+
+private:
+	/**
+	 * Root node of the internal token tree.
+	 */
+	Node root;
+
+public:
+	/**
+	 * Registers a token containing the given string. Returns false if the
+	 * token already exists, true otherwise.
+	 *
+	 * @param token is the character sequence that should be registered as
+	 * token.
+	 * @param type is the descriptor that should be set for this token.
+	 * @return true if the operation is successful, false otherwise.
+	 */
+	bool registerToken(const std::string &token, TokenTypeId type) noexcept;
+
+	/**
+	 * Unregisters the token from the token tree. Returns true if the token was
+	 * unregistered successfully, false otherwise.
+	 *
+	 * @param token is the character sequence that should be unregistered.
+	 * @return true if the operation was successful, false otherwise.
+	 */
+	bool unregisterToken(const std::string &token) noexcept;
+
+	/**
+	 * Returns true, if the given token exists within the TokenTree. This
+	 * function is mostly thought for debugging and unit testing.
+	 *
+	 * @param token is the character sequence that should be searched.
+	 * @return the attached token descriptor or nullptr if the given token is
+	 * not found.
+	 */
+	TokenTypeId hasToken(const std::string &token) const noexcept;
+
+	/**
+	 * Returns a reference at the root node to be used for traversing the token
+	 * tree.
+	 *
+	 * @return a reference at the root node.
+	 */
+	const Node *getRoot() const noexcept { return &root; }
+};
+}
+
+#endif /* _OUSIA_TOKEN_TRIE_HPP_ */
+
diff --git a/src/plugins/plain/DynamicTokenizer.cpp b/src/plugins/plain/DynamicTokenizer.cpp
deleted file mode 100644
index f2cfcd1..0000000
--- a/src/plugins/plain/DynamicTokenizer.cpp
+++ /dev/null
@@ -1,544 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <memory>
-#include <vector>
-
-#include <core/common/CharReader.hpp>
-#include <core/common/Exceptions.hpp>
-#include <core/common/Utils.hpp>
-
-#include "DynamicTokenizer.hpp"
-
-namespace ousia {
-
-namespace {
-
-/* Internal class TokenMatch */
-
-/**
- * Contains information about a matching token.
- */
-struct TokenMatch {
-	/**
-	 * Token that was matched.
-	 */
-	DynamicToken token;
-
-	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
-	 */
-	size_t textLength;
-
-	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
-	 */
-	size_t textEnd;
-
-	/**
-	 * Constructor of the TokenMatch class.
-	 */
-	TokenMatch() : textLength(0), textEnd(0) {}
-
-	/**
-	 * Returns true if this TokenMatch instance actually represents a match.
-	 */
-	bool hasMatch() { return token.type != EmptyToken; }
-};
-
-/* Internal class TokenLookup */
-
-/**
- * The TokenLookup class is used to represent a thread in a running token
- * lookup.
- */
-class TokenLookup {
-private:
-	/**
-	 * Current node within the token trie.
-	 */
-	TokenTrie::Node const *node;
-
-	/**
-	 * Start offset within the source file.
-	 */
-	size_t start;
-
-	/**
-	 * Current length of the data within the text handler. The text buffer needs
-	 * to be trimmed to this length if this token matches.
-	 */
-	size_t textLength;
-
-	/**
-	 * End location of the current text handler. This location needs to be used
-	 * for the text token that is emitted before the actual token.
-	 */
-	size_t textEnd;
-
-public:
-	/**
-	 * Constructor of the TokenLookup class.
-	 *
-	 * @param node is the current node.
-	 * @param start is the start position.
-	 * @param textLength is the text buffer length of the previous text token.
-	 * @param textEnd is the current end location of the previous text token.
-	 */
-	TokenLookup(const TokenTrie::Node *node, size_t start,
-	            size_t textLength, size_t textEnd)
-	    : node(node), start(start), textLength(textLength), textEnd(textEnd)
-	{
-	}
-
-	/**
-	 * Tries to extend the current path in the token trie with the given
-	 * character. If a complete token is matched, stores this match in the
-	 * tokens list (in case it is longer than any previous token).
-	 *
-	 * @param c is the character that should be appended to the current prefix.
-	 * @param lookups is a list to which new TokeLookup instances are added --
-	 * which could potentially be expanded in the next iteration.
-	 * @param match is the DynamicToken instance to which the matching token
-	 * should be written.
-	 * @param tokens is a reference at the internal token list of the
-	 * DynamicTokenizer.
-	 * @param end is the end byte offset of the current character.
-	 * @param sourceId is the source if of this file.
-	 */
-	void advance(char c, std::vector<TokenLookup> &lookups, TokenMatch &match,
-	             const std::vector<std::string> &tokens, SourceOffset end,
-	             SourceId sourceId)
-	{
-		// Check whether we can continue the current token path with the given
-		// character without visiting an already visited node
-		auto it = node->children.find(c);
-		if (it == node->children.end()) {
-			return;
-		}
-
-		// Check whether the new node represents a complete token a whether it
-		// is longer than the current token. If yes, replace the current token.
-		node = it->second.get();
-		if (node->type != EmptyToken) {
-			const std::string &str = tokens[node->type];
-			size_t len = str.size();
-			if (len > match.token.content.size()) {
-				match.token =
-				    DynamicToken{node->type, str, {sourceId, start, end}};
-				match.textLength = textLength;
-				match.textEnd = textEnd;
-			}
-		}
-
-		// If this state can possibly be advanced, store it in the states list.
-		if (!node->children.empty()) {
-			lookups.emplace_back(*this);
-		}
-	}
-};
-
-/* Internal class TextHandlerBase */
-
-/**
- * Base class used for those classes that may be used as TextHandler in the
- * DynamicTokenizer::next function.
- */
-class TextHandlerBase {
-public:
-	/**
-	 * Start position of the extracted text.
-	 */
-	size_t textStart;
-
-	/**
-	 * End position of the extracted text.
-	 */
-	size_t textEnd;
-
-	/**
-	 * Buffer containing the extracted text.
-	 */
-	std::vector<char> textBuf;
-
-	/**
-	 * Constructor of the TextHandlerBase base class. Initializes the start and
-	 * end position with zeros.
-	 */
-	TextHandlerBase() : textStart(0), textEnd(0) {}
-
-	/**
-	 * Transforms the given token into a text token containing the extracted
-	 * text.
-	 *
-	 * @param token is the output token to which the text should be written.
-	 * @param sourceId is the source id of the underlying file.
-	 */
-	void buildTextToken(TokenMatch &match, SourceId sourceId)
-	{
-		if (match.hasMatch()) {
-			match.token.content =
-			    std::string{textBuf.data(), match.textLength};
-			match.token.location =
-			    SourceLocation{sourceId, textStart, match.textEnd};
-		} else {
-			match.token.content = std::string{textBuf.data(), textBuf.size()};
-			match.token.location = SourceLocation{sourceId, textStart, textEnd};
-		}
-		match.token.type = TextToken;
-	}
-
-	/**
-	 * Returns true if this whitespace handler has found any text and a text
-	 * token could be emitted.
-	 *
-	 * @return true if the internal data buffer is non-empty.
-	 */
-	bool hasText() { return !textBuf.empty(); }
-};
-
-/* Internal class PreservingTextHandler */
-
-/**
- * The PreservingTextHandler class preserves all characters unmodified,
- * including whitepace characters.
- */
-class PreservingTextHandler : public TextHandlerBase {
-public:
-	using TextHandlerBase::TextHandlerBase;
-
-	/**
-	 * Appends the given character to the internal text buffer, does not
-	 * eliminate whitespace.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-		textBuf.push_back(c);
-	}
-};
-
-/* Internal class TrimmingTextHandler */
-
-/**
- * The TrimmingTextHandler class trims all whitespace characters at the begin
- * and the end of a text section but leaves all other characters unmodified,
- * including whitepace characters.
- */
-class TrimmingTextHandler : public TextHandlerBase {
-public:
-	using TextHandlerBase::TextHandlerBase;
-
-	/**
-	 * Buffer used internally to temporarily store all whitespace characters.
-	 * They are only added to the output buffer if another non-whitespace
-	 * character is reached.
-	 */
-	std::vector<char> whitespaceBuf;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * whitespace characters at the begin and end of the text.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				whitespaceBuf.push_back(c);
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (!whitespaceBuf.empty()) {
-			textBuf.insert(textBuf.end(), whitespaceBuf.begin(),
-			               whitespaceBuf.end());
-			whitespaceBuf.clear();
-		}
-		textBuf.push_back(c);
-	}
-};
-
-/* Internal class CollapsingTextHandler */
-
-/**
- * The CollapsingTextHandler trims characters at the beginning and end of the
- * text and reduced multiple whitespace characters to a single blank.
- */
-class CollapsingTextHandler : public TextHandlerBase {
-public:
-	using TextHandlerBase::TextHandlerBase;
-
-	/**
-	 * Flag set to true if a whitespace character was reached.
-	 */
-	bool hasWhitespace = false;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * redundant whitespace characters.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				hasWhitespace = true;
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (hasWhitespace) {
-			textBuf.push_back(' ');
-			hasWhitespace = false;
-		}
-		textBuf.push_back(c);
-	}
-};
-}
-
-/* Class DynamicTokenizer */
-
-DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode)
-    : whitespaceMode(whitespaceMode), nextTokenTypeId(0)
-{
-}
-
-template <typename TextHandler, bool read>
-bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
-{
-	// If we're in the read mode, reset the char reader peek position to the
-	// current read position
-	if (read) {
-		reader.resetPeek();
-	}
-
-	// Prepare the lookups in the token trie
-	const TokenTrie::Node *root = trie.getRoot();
-	TokenMatch match;
-	std::vector<TokenLookup> lookups;
-	std::vector<TokenLookup> nextLookups;
-
-	// Instantiate the text handler
-	TextHandler textHandler;
-
-	// Peek characters from the reader and try to advance the current token tree
-	// cursor
-	char c;
-	size_t charStart = reader.getPeekOffset();
-	const SourceId sourceId = reader.getSourceId();
-	while (reader.peek(c)) {
-		const size_t charEnd = reader.getPeekOffset();
-		const size_t textLength = textHandler.textBuf.size();
-		const size_t textEnd = textHandler.textEnd;
-
-		// If we do not have a match yet, start a new lookup from the root
-		if (!match.hasMatch()) {
-			TokenLookup{root, charStart, textLength, textEnd}.advance(
-			    c, nextLookups, match, tokens, charEnd, sourceId);
-		}
-
-		// Try to advance all other lookups with the new character
-		for (TokenLookup &lookup : lookups) {
-			lookup.advance(c, nextLookups, match, tokens, charEnd, sourceId);
-		}
-
-		// We have found a token and there are no more states to advance or the
-		// text handler has found something -- abort to return the new token
-		if (match.hasMatch()) {
-			if ((nextLookups.empty() || textHandler.hasText())) {
-				break;
-			}
-		} else {
-			// Record all incomming characters
-			textHandler.append(c, charStart, charEnd);
-		}
-
-		// Swap the lookups and the nextLookups list
-		lookups = std::move(nextLookups);
-		nextLookups.clear();
-
-		// Advance the offset
-		charStart = charEnd;
-	}
-
-	// If we found text, emit that text
-	if (textHandler.hasText() &&
-	    (!match.hasMatch() || match.textLength > 0)) {
-		textHandler.buildTextToken(match, sourceId);
-	}
-
-	// Move the read/peek cursor to the end of the token, abort if an error
-	// happens while doing so
-	if (match.hasMatch()) {
-		// Make sure we have a valid location
-		if (match.token.location.getEnd() == InvalidSourceOffset) {
-			throw OusiaException{"Token end position offset out of range"};
-		}
-
-		// Seek to the end of the current token
-		const size_t end = match.token.location.getEnd();
-		if (read) {
-			reader.seek(end);
-		} else {
-			reader.seekPeekCursor(end);
-		}
-		token = match.token;
-	} else {
-		token = DynamicToken{};
-	}
-	return match.hasMatch();
-}
-
-bool DynamicTokenizer::read(CharReader &reader,DynamicToken &token)
-{
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingTextHandler, true>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingTextHandler, true>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingTextHandler, true>(reader, token);
-	}
-	return false;
-}
-
-bool DynamicTokenizer::peek(CharReader &reader,DynamicToken &token)
-{
-	switch (whitespaceMode) {
-		case WhitespaceMode::PRESERVE:
-			return next<PreservingTextHandler, false>(reader, token);
-		case WhitespaceMode::TRIM:
-			return next<TrimmingTextHandler, false>(reader, token);
-		case WhitespaceMode::COLLAPSE:
-			return next<CollapsingTextHandler, false>(reader, token);
-	}
-	return false;
-}
-
-TokenTypeId DynamicTokenizer::registerToken(const std::string &token)
-{
-	// Abort if an empty token should be registered
-	if (token.empty()) {
-		return EmptyToken;
-	}
-
-	// Search for a new slot in the tokens list
-	TokenTypeId type = EmptyToken;
-	for (size_t i = nextTokenTypeId; i < tokens.size(); i++) {
-		if (tokens[i].empty()) {
-			tokens[i] = token;
-			type = i;
-			break;
-		}
-	}
-
-	// No existing slot was found, add a new one -- make sure we do not
-	// override the special token type handles
-	if (type == EmptyToken) {
-		type = tokens.size();
-		if (type == TextToken || type == EmptyToken) {
-			throw OusiaException{"Token type ids depleted!"};
-		}
-		tokens.emplace_back(token);
-	}
-	nextTokenTypeId = type + 1;
-
-	// Try to register the token in the trie -- if this fails, remove it
-	// from the tokens list
-	if (!trie.registerToken(token, type)) {
-		tokens[type] = std::string();
-		nextTokenTypeId = type;
-		return EmptyToken;
-	}
-	return type;
-}
-
-bool DynamicTokenizer::unregisterToken(TokenTypeId type)
-{
-	// Unregister the token from the trie, abort if an invalid type is given
-	if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
-		tokens[type] = std::string{};
-		nextTokenTypeId = type;
-		return true;
-	}
-	return false;
-}
-
-std::string DynamicTokenizer::getTokenString(TokenTypeId type)
-{
-	if (type < tokens.size()) {
-		return tokens[type];
-	}
-	return std::string{};
-}
-
-void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode)
-{
-	whitespaceMode = mode;
-}
-
-WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; }
-
-/* Explicitly instantiate all possible instantiations of the "next" member
-   function */
-template bool DynamicTokenizer::next<PreservingTextHandler, false>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<TrimmingTextHandler, false>(
-    CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<CollapsingTextHandler, false>(
-    CharReader &reader,DynamicToken &token);
-template bool DynamicTokenizer::next<PreservingTextHandler, true>(
-    CharReader &reader,DynamicToken &token);
-template bool DynamicTokenizer::next<TrimmingTextHandler, true>(
-    CharReader &reader,DynamicToken &token);
-template bool DynamicTokenizer::next<CollapsingTextHandler, true>(
-    CharReader &reader,DynamicToken &token);
-}
-
diff --git a/src/plugins/plain/DynamicTokenizer.hpp b/src/plugins/plain/DynamicTokenizer.hpp
deleted file mode 100644
index 0cac2e8..0000000
--- a/src/plugins/plain/DynamicTokenizer.hpp
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file DynamicTokenizer.hpp
- *
- * Tokenizer that can be reconfigured at runtime used for parsing the plain
- * text format.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_DYNAMIC_TOKENIZER_HPP_
-#define _OUSIA_DYNAMIC_TOKENIZER_HPP_
-
-#include <set>
-#include <string>
-#include <vector>
-
-#include <core/common/Location.hpp>
-
-#include "TokenTrie.hpp"
-
-namespace ousia {
-
-// Forward declarations
-class CharReader;
-
-/**
- * The DynamicToken structure describes a token discovered by the Tokenizer.
- */
-struct DynamicToken {
-	/**
-	 * Id of the type of this token.
-	 */
-	TokenTypeId type;
-
-	/**
-	 * String that was matched.
-	 */
-	std::string content;
-
-	/**
-	 * Location from which the string was extracted.
-	 */
-	SourceLocation location;
-
-	/**
-	 * Default constructor.
-	 */
-	DynamicToken() : type(EmptyToken) {}
-
-	/**
-	 * Constructor of the DynamicToken struct.
-	 *
-	 * @param id represents the token type.
-	 * @param content is the string content that has been extracted.
-	 * @param location is the location of the extracted string content in the
-	 * source file.
-	 */
-	DynamicToken(TokenTypeId type, const std::string &content,
-	             SourceLocation location)
-	    : type(type), content(content), location(location)
-	{
-	}
-
-	/**
-	 * Constructor of the DynamicToken struct, only initializes the token type
-	 *
-	 * @param type is the id corresponding to the type of the token.
-	 */
-	DynamicToken(TokenTypeId type) : type(type) {}
-
-	/**
-	 * The getLocation function allows the tokens to be directly passed as
-	 * parameter to Logger or LoggableException instances.
-	 *
-	 * @return a reference at the location field
-	 */
-	const SourceLocation &getLocation() const { return location; }
-};
-
-/**
- * Enum specifying the whitespace handling of the DynamicTokenizer class when
- * reading non-token text.
- */
-enum class WhitespaceMode {
-	/**
-     * Preserves all whitespaces as they are found in the source file.
-     */
-	PRESERVE,
-
-	/**
-     * Trims whitespace at the beginning and the end of the found text.
-     */
-	TRIM,
-
-	/**
-     * Whitespaces are trimmed and collapsed, multiple whitespace characters
-     * are replaced by a single space character.
-     */
-	COLLAPSE
-};
-
-/**
- * The DynamicTokenizer is used to extract tokens and chunks of text from a
- * CharReader. It allows to register and unregister tokens while parsing and
- * to modify the handling of whitespace characters. Note that the
- * DynamicTokenizer always tries to extract the longest possible token from the
- * tokenizer.
- */
-class DynamicTokenizer {
-private:
-	/**
-	 * Internally used token trie. This object holds all registered tokens.
-	 */
-	TokenTrie trie;
-
-	/**
-	 * Flag defining whether whitespaces should be preserved or not.
-	 */
-	WhitespaceMode whitespaceMode;
-
-	/**
-	 * Vector containing all registered token types.
-	 */
-	std::vector<std::string> tokens;
-
-	/**
-	 * Next index in the tokens list where to search for a new token id.
-	 */
-	size_t nextTokenTypeId;
-
-	/**
-	 * Templated function used internally to read the current token. The
-	 * function is templated in order to force code generation for all six
-	 * combiations of whitespace modes and reading/peeking.
-	 *
-	 * @tparam TextHandler is the type to be used for the textHandler instance.
-	 * @tparam read specifies whether the function should start from and advance
-	 * the read pointer of the char reader.
-	 * @param reader is the CharReader instance from which the data should be
-	 * read.
-	 * @param token is the token structure into which the token information
-	 * should be written.
-	 * @return false if the end of the stream has been reached, true otherwise.
-	 */
-	template <typename TextHandler, bool read>
-	bool next(CharReader &reader, DynamicToken &token);
-
-public:
-	/**
-	 * Constructor of the DynamicTokenizer class.
-	 *
-	 * @param whitespaceMode specifies how whitespace should be handled.
-	 */
-	DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
-
-	/**
-	 * Registers the given string as a token. Returns a const pointer at a
-	 * TokenDescriptor that will be used to reference the newly created token.
-	 *
-	 * @param token is the token string that should be registered.
-	 * @return a unique identifier for the registered token or EmptyToken if
-	 * an error occured.
-	 */
-	TokenTypeId registerToken(const std::string &token);
-
-	/**
-	 * Unregisters the token belonging to the given TokenTypeId.
-	 *
-	 * @param type is the token type that should be unregistered. The
-	 *TokenTypeId
-	 * must have been returned by registerToken.
-	 * @return true if the operation was successful, false otherwise (e.g.
-	 * because the given TokenDescriptor was already unregistered).
-	 */
-	bool unregisterToken(TokenTypeId type);
-
-	/**
-	 * Returns the token that was registered under the given TokenTypeId id or
-	 *an
-	 * empty string if an invalid TokenTypeId id is given.
-	 *
-	 * @param type is the TokenTypeId id for which the corresponding token
-	 *string
-	 * should be returned.
-	 * @return the registered token string or an empty string if the given type
-	 * was invalid.
-	 */
-	std::string getTokenString(TokenTypeId type);
-
-	/**
-	 * Sets the whitespace mode.
-	 *
-	 * @param whitespaceMode defines how whitespace should be treated in text
-	 * tokens.
-	 */
-	void setWhitespaceMode(WhitespaceMode mode);
-
-	/**
-	 * Returns the current value of the whitespace mode.
-	 *
-	 * @return the whitespace mode.
-	 */
-	WhitespaceMode getWhitespaceMode();
-
-	/**
-	 * Reads a new token from the CharReader and stores it in the given
-	 * DynamicToken instance.
-	 *
-	 * @param reader is the CharReader instance from which the data should be
-	 * read.
-	 * @param token is a reference at the token instance into which the Token
-	 * information should be written.
-	 * @return true if a token could be read, false if the end of the stream
-	 * has been reached.
-	 */
-	bool read(CharReader &reader, DynamicToken &token);
-
-	/**
-	 * The peek method does not advance the read position of the char reader,
-	 * but reads the next token from the current char reader peek position.
-	 *
-	 * @param reader is the CharReader instance from which the data should be
-	 * read.
-	 * @param token is a reference at the token instance into which the Token
-	 * information should be written.
-	 * @return true if a token could be read, false if the end of the stream
-	 * has been reached.
-	 */
-	bool peek(CharReader &reader, DynamicToken &token);
-};
-}
-
-#endif /* _OUSIA_DYNAMIC_TOKENIZER_HPP_ */
-
diff --git a/src/plugins/plain/PlainFormatStreamReader.cpp b/src/plugins/plain/PlainFormatStreamReader.cpp
deleted file mode 100644
index 05769f0..0000000
--- a/src/plugins/plain/PlainFormatStreamReader.cpp
+++ /dev/null
@@ -1,641 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <core/common/CharReader.hpp>
-#include <core/common/Logger.hpp>
-#include <core/common/Utils.hpp>
-#include <core/common/VariantReader.hpp>
-
-#include "PlainFormatStreamReader.hpp"
-
-namespace ousia {
-
-/**
- * Plain format default tokenizer.
- */
-class PlainFormatTokens : public DynamicTokenizer {
-public:
-	/**
-	 * Id of the backslash token.
-	 */
-	TokenTypeId Backslash;
-
-	/**
-	 * Id of the line comment token.
-	 */
-	TokenTypeId LineComment;
-
-	/**
-	 * Id of the block comment start token.
-	 */
-	TokenTypeId BlockCommentStart;
-
-	/**
-	 * Id of the block comment end token.
-	 */
-	TokenTypeId BlockCommentEnd;
-
-	/**
-	 * Id of the field start token.
-	 */
-	TokenTypeId FieldStart;
-
-	/**
-	 * Id of the field end token.
-	 */
-	TokenTypeId FieldEnd;
-
-	/**
-	 * Registers the plain format tokens in the internal tokenizer.
-	 */
-	PlainFormatTokens()
-	{
-		Backslash = registerToken("\\");
-		LineComment = registerToken("%");
-		BlockCommentStart = registerToken("%{");
-		BlockCommentEnd = registerToken("}%");
-		FieldStart = registerToken("{");
-		FieldEnd = registerToken("}");
-	}
-};
-
-static const PlainFormatTokens Tokens;
-
-/**
- * Class used internally to collect data issued via "DATA" event.
- */
-class DataHandler {
-private:
-	/**
-	 * Internal character buffer.
-	 */
-	std::vector<char> buf;
-
-	/**
-	 * Start location of the character data.
-	 */
-	SourceOffset start;
-
-	/**
-	 * End location of the character data.
-	 */
-	SourceOffset end;
-
-public:
-	/**
-	 * Default constructor, initializes start and end with zeros.
-	 */
-	DataHandler() : start(0), end(0) {}
-
-	/**
-	 * Returns true if the internal buffer is empty.
-	 *
-	 * @return true if no characters were added to the internal buffer, false
-	 * otherwise.
-	 */
-	bool isEmpty() { return buf.empty(); }
-
-	/**
-	 * Appends a single character to the internal buffer.
-	 *
-	 * @param c is the character that should be added to the internal buffer.
-	 * @param charStart is the start position of the character.
-	 * @param charEnd is the end position of the character.
-	 */
-	void append(char c, SourceOffset charStart, SourceOffset charEnd)
-	{
-		if (isEmpty()) {
-			start = charStart;
-		}
-		buf.push_back(c);
-		end = charEnd;
-	}
-
-	/**
-	 * Appends a string to the internal buffer.
-	 *
-	 * @param s is the string that should be added to the internal buffer.
-	 * @param stringStart is the start position of the string.
-	 * @param stringEnd is the end position of the string.
-	 */
-	void append(const std::string &s, SourceOffset stringStart,
-	            SourceOffset stringEnd)
-	{
-		if (isEmpty()) {
-			start = stringStart;
-		}
-		std::copy(s.c_str(), s.c_str() + s.size(), back_inserter(buf));
-		end = stringEnd;
-	}
-
-	/**
-	 * Converts the internal buffer to a variant with attached location
-	 * information.
-	 *
-	 * @param sourceId is the source id which is needed for building the
-	 * location information.
-	 * @return a Variant with the internal buffer content as string and
-	 * the correct start and end location.
-	 */
-	Variant toVariant(SourceId sourceId)
-	{
-		Variant res = Variant::fromString(std::string(buf.data(), buf.size()));
-		res.setLocation({sourceId, start, end});
-		return res;
-	}
-};
-
-PlainFormatStreamReader::PlainFormatStreamReader(CharReader &reader,
-                                                 Logger &logger)
-    : reader(reader), logger(logger), tokenizer(Tokens)
-{
-	// Place an intial command representing the complete file on the stack
-	commands.push(Command{"", Variant::mapType{}, true, true, true});
-}
-
-Variant PlainFormatStreamReader::parseIdentifier(size_t start, bool allowNSSep)
-{
-	bool first = true;
-	bool hasCharSiceNSSep = false;
-	std::vector<char> identifier;
-	size_t end = reader.getPeekOffset();
-	char c, c2;
-	while (reader.peek(c)) {
-		// Abort if this character is not a valid identifer character
-		if ((first && Utils::isIdentifierStartCharacter(c)) ||
-		    (!first && Utils::isIdentifierCharacter(c))) {
-			identifier.push_back(c);
-		} else if (c == ':' && hasCharSiceNSSep && reader.fetchPeek(c2) && Utils::isIdentifierStartCharacter(c2)) {
-			identifier.push_back(c);
-		} else {
-			if (c == ':' && allowNSSep) {
-				logger.error(
-				    "Expected character before and after namespace separator \":\"",
-				    reader);
-			}
-			reader.resetPeek();
-			break;
-		}
-
-		// This is no longer the first character
-		first = false;
-
-		// Advance the hasCharSiceNSSep flag
-		hasCharSiceNSSep = allowNSSep && (c != ':');
-
-		end = reader.getPeekOffset();
-		reader.consumePeek();
-	}
-
-	// Return the identifier at its location
-	Variant res =
-	    Variant::fromString(std::string(identifier.data(), identifier.size()));
-	res.setLocation({reader.getSourceId(), start, end});
-	return res;
-}
-
-PlainFormatStreamReader::State PlainFormatStreamReader::parseBeginCommand()
-{
-	// Expect a '{' after the command
-	reader.consumeWhitespace();
-	if (!reader.expect('{')) {
-		logger.error("Expected \"{\" after \\begin", reader);
-		return State::NONE;
-	}
-
-	// Parse the name of the command that should be opened
-	Variant commandName = parseIdentifier(reader.getOffset(), true);
-	if (commandName.asString().empty()) {
-		logger.error("Expected identifier", commandName);
-		return State::ERROR;
-	}
-
-	// Check whether the next character is a '#', indicating the start of the
-	// command name
-	Variant commandArgName;
-	SourceOffset start = reader.getOffset();
-	if (reader.expect('#')) {
-		commandArgName = parseIdentifier(start);
-		if (commandArgName.asString().empty()) {
-			logger.error("Expected identifier after \"#\"", commandArgName);
-		}
-	}
-
-	if (!reader.expect('}')) {
-		logger.error("Expected \"}\"", reader);
-		return State::ERROR;
-	}
-
-	// Parse the arguments
-	Variant commandArguments = parseCommandArguments(std::move(commandArgName));
-
-	// Push the command onto the command stack
-	pushCommand(std::move(commandName), std::move(commandArguments), true);
-
-	return State::COMMAND;
-}
-
-static bool checkStillInField(const PlainFormatStreamReader::Command &cmd,
-                              const Variant &endName, Logger &logger)
-{
-	if (cmd.inField && !cmd.inRangeField) {
-		logger.error(std::string("\\end in open field of command \"") +
-		                 cmd.name.asString() + std::string("\""),
-		             endName);
-		logger.note(std::string("Open command started here:"), cmd.name);
-		return true;
-	}
-	return false;
-}
-
-PlainFormatStreamReader::State PlainFormatStreamReader::parseEndCommand()
-{
-	// Expect a '{' after the command
-	if (!reader.expect('{')) {
-		logger.error("Expected \"{\" after \\end", reader);
-		return State::NONE;
-	}
-
-	// Fetch the name of the command that should be ended here
-	Variant name = parseIdentifier(reader.getOffset(), true);
-
-	// Make sure the given command name is not empty
-	if (name.asString().empty()) {
-		logger.error("Expected identifier", name);
-		return State::ERROR;
-	}
-
-	// Make sure the command name is terminated with a '}'
-	if (!reader.expect('}')) {
-		logger.error("Expected \"}\"", reader);
-		return State::ERROR;
-	}
-
-	// Unroll the command stack up to the last range command
-	while (!commands.top().hasRange) {
-		if (checkStillInField(commands.top(), name, logger)) {
-			return State::ERROR;
-		}
-		commands.pop();
-	}
-
-	// Make sure we're not in an open field of this command
-	if (checkStillInField(commands.top(), name, logger)) {
-		return State::ERROR;
-	}
-
-	// Special error message if the top-level command is reached
-	if (commands.size() == 1) {
-		logger.error(std::string("Cannot end command \"") + name.asString() +
-		                 std::string("\" here, no command open"),
-		             name);
-		return State::ERROR;
-	}
-
-	// Inform the about command mismatches
-	const Command &cmd = commands.top();
-	if (commands.top().name.asString() != name.asString()) {
-		logger.error(std::string("Trying to end command \"") +
-		                 cmd.name.asString() +
-		                 std::string("\", but open command is \"") +
-		                 name.asString() + std::string("\""),
-		             name);
-		logger.note("Last command was opened here:", cmd.name);
-		return State::ERROR;
-	}
-
-	// Set the location to the location of the command that was ended, then end
-	// the current command
-	location = name.getLocation();
-	commands.pop();
-	return cmd.inRangeField ? State::FIELD_END : State::NONE;
-}
-
-Variant PlainFormatStreamReader::parseCommandArguments(Variant commandArgName)
-{
-	// Parse the arguments using the universal VariantReader
-	Variant commandArguments;
-	if (reader.expect('[')) {
-		auto res = VariantReader::parseObject(reader, logger, ']');
-		commandArguments = res.second;
-	} else {
-		commandArguments = Variant::mapType{};
-	}
-
-	// Insert the parsed name, make sure "name" was not specified in the
-	// arguments
-	if (commandArgName.isString()) {
-		auto res =
-		    commandArguments.asMap().emplace("name", std::move(commandArgName));
-		if (!res.second) {
-			logger.error("Name argument specified multiple times",
-			             SourceLocation{}, MessageMode::NO_CONTEXT);
-			logger.note("First occurance is here: ", commandArgName);
-			logger.note("Second occurance is here: ", res.first->second);
-		}
-	}
-	return commandArguments;
-}
-
-void PlainFormatStreamReader::pushCommand(Variant commandName,
-                                          Variant commandArguments,
-                                          bool hasRange)
-{
-	// Store the location on the stack
-	location = commandName.getLocation();
-
-	// Place the command on the command stack, remove the last commands if we're
-	// not currently inside a field of these commands
-	while (!commands.top().inField) {
-		commands.pop();
-	}
-	commands.push(Command{std::move(commandName), std::move(commandArguments),
-	                      hasRange, false, false});
-}
-
-PlainFormatStreamReader::State PlainFormatStreamReader::parseCommand(
-    size_t start)
-{
-	// Parse the commandName as a first identifier
-	Variant commandName = parseIdentifier(start, true);
-	if (commandName.asString().empty()) {
-		logger.error("Empty command name", reader);
-		return State::NONE;
-	}
-
-	// Handle the special "begin" and "end" commands
-	const auto commandNameComponents =
-	    Utils::split(commandName.asString(), ':');
-	const bool isBegin = commandNameComponents[0] == "begin";
-	const bool isEnd = commandNameComponents[0] == "end";
-	if (isBegin || isEnd) {
-		if (commandNameComponents.size() > 1) {
-			logger.error(
-			    "Special commands \"\\begin\" and \"\\end\" may not contain a "
-			    "namespace separator \":\"",
-			    commandName);
-		}
-		if (isBegin) {
-			return parseBeginCommand();
-		} else if (isEnd) {
-			return parseEndCommand();
-		}
-	}
-
-	// Check whether the next character is a '#', indicating the start of the
-	// command name
-	Variant commandArgName;
-	start = reader.getOffset();
-	if (reader.expect('#')) {
-		commandArgName = parseIdentifier(start);
-		if (commandArgName.asString().empty()) {
-			logger.error("Expected identifier after \"#\"", commandArgName);
-		}
-	}
-
-	// Parse the arugments
-	Variant commandArguments = parseCommandArguments(std::move(commandArgName));
-
-	// Push the command onto the command stack
-	pushCommand(std::move(commandName), std::move(commandArguments), false);
-
-	return State::COMMAND;
-}
-
-void PlainFormatStreamReader::parseBlockComment()
-{
-	DynamicToken token;
-	size_t depth = 1;
-	while (tokenizer.read(reader, token)) {
-		if (token.type == Tokens.BlockCommentEnd) {
-			depth--;
-			if (depth == 0) {
-				return;
-			}
-		}
-		if (token.type == Tokens.BlockCommentStart) {
-			depth++;
-		}
-	}
-
-	// Issue an error if the file ends while we are in a block comment
-	logger.error("File ended while being in a block comment", reader);
-}
-
-void PlainFormatStreamReader::parseLineComment()
-{
-	char c;
-	while (reader.read(c)) {
-		if (c == '\n') {
-			return;
-		}
-	}
-}
-
-bool PlainFormatStreamReader::checkIssueData(DataHandler &handler)
-{
-	if (!handler.isEmpty()) {
-		data = handler.toVariant(reader.getSourceId());
-		location = data.getLocation();
-		reader.resetPeek();
-		return true;
-	}
-	return false;
-}
-
-bool PlainFormatStreamReader::checkIssueFieldStart()
-{
-	// Fetch the current command, and check whether we're currently inside a
-	// field of this command
-	Command &cmd = commands.top();
-	if (!cmd.inField) {
-		// If this is a range command, we're now implicitly inside the field of
-		// this command -- we'll have to issue a field start command!
-		if (cmd.hasRange) {
-			cmd.inField = true;
-			cmd.inRangeField = true;
-			reader.resetPeek();
-			return true;
-		}
-
-		// This was not a range command, so obviously we're now inside within
-		// a field of some command -- so unroll the commands stack until a
-		// command with open field is reached
-		while (!commands.top().inField) {
-			commands.pop();
-		}
-	}
-	return false;
-}
-
-PlainFormatStreamReader::State PlainFormatStreamReader::parse()
-{
-	// Handler for incomming data
-	DataHandler handler;
-
-	// Read tokens until the outer loop should be left
-	DynamicToken token;
-	while (tokenizer.peek(reader, token)) {
-		const TokenTypeId type = token.type;
-
-		// Special handling for Backslash and Text
-		if (type == Tokens.Backslash) {
-			// Before appending anything to the output data or starting a new
-			// command, check whether FIELD_START has to be issued, as the
-			// current command is a command with range
-			if (checkIssueFieldStart()) {
-				location = token.location;
-				return State::FIELD_START;
-			}
-
-			// Check whether a command starts now, without advancing the peek
-			// cursor
-			char c;
-			if (!reader.fetchPeek(c)) {
-				logger.error("Trailing backslash at the end of the file.",
-				             token);
-				return State::END;
-			}
-
-			// Try to parse a command
-			if (Utils::isIdentifierStartCharacter(c)) {
-				// Make sure to issue any data before it is to late
-				if (checkIssueData(handler)) {
-					return State::DATA;
-				}
-
-				// Parse the actual command
-				State res = parseCommand(token.location.getStart());
-				switch (res) {
-					case State::ERROR:
-						throw LoggableException(
-						    "Last error was irrecoverable, ending parsing "
-						    "process");
-					case State::NONE:
-						continue;
-					default:
-						return res;
-				}
-			}
-
-			// This was not a special character, just append the given character
-			// to the data buffer, use the escape character start as start
-			// location and the peek offset as end location
-			reader.peek(c);  // Peek the previously fetched character
-			handler.append(c, token.location.getStart(),
-			               reader.getPeekOffset());
-			reader.consumePeek();
-			continue;
-		} else if (type == TextToken) {
-			// Check whether FIELD_START has to be issued before appending text
-			if (checkIssueFieldStart()) {
-				location = token.location;
-				return State::FIELD_START;
-			}
-
-			// Append the text to the data handler
-			handler.append(token.content, token.location.getStart(),
-			               token.location.getEnd());
-
-			reader.consumePeek();
-			continue;
-		}
-
-		// A non-text token was reached, make sure all pending data commands
-		// have been issued
-		if (checkIssueData(handler)) {
-			return State::DATA;
-		}
-
-		// We will handle the token now, consume the peeked characters
-		reader.consumePeek();
-
-		// Update the location to the current token location
-		location = token.location;
-
-		if (token.type == Tokens.LineComment) {
-			parseLineComment();
-		} else if (token.type == Tokens.BlockCommentStart) {
-			parseBlockComment();
-		} else if (token.type == Tokens.FieldStart) {
-			Command &cmd = commands.top();
-			if (!cmd.inField) {
-				cmd.inField = true;
-				return State::FIELD_START;
-			}
-			logger.error(
-			    "Got field start token \"{\", but no command for which to "
-			    "start the field. Did you mean \"\\{\"?",
-			    token);
-		} else if (token.type == Tokens.FieldEnd) {
-			// Try to end an open field of the current command -- if the current
-			// command is not inside an open field, end this command and try to
-			// close the next one
-			for (int i = 0; i < 2 && commands.size() > 1; i++) {
-				Command &cmd = commands.top();
-				if (!cmd.inRangeField) {
-					if (cmd.inField) {
-						cmd.inField = false;
-						return State::FIELD_END;
-					}
-					commands.pop();
-				} else {
-					break;
-				}
-			}
-			logger.error(
-			    "Got field end token \"}\", but there is no field to end. Did "
-			    "you mean \"\\}\"?",
-			    token);
-		} else {
-			logger.error("Unexpected token \"" + token.content + "\"", token);
-		}
-	}
-
-	// Issue available data
-	if (checkIssueData(handler)) {
-		return State::DATA;
-	}
-
-	// Make sure all open commands and fields have been ended at the end of the
-	// stream
-	while (commands.size() > 1) {
-		Command &cmd = commands.top();
-		if (cmd.inField || cmd.hasRange) {
-			logger.error("Reached end of stream, but command \"" +
-			                 cmd.name.asString() + "\" has not been ended",
-			             cmd.name);
-		}
-		commands.pop();
-	}
-
-	location = SourceLocation{reader.getSourceId(), reader.getOffset()};
-	return State::END;
-}
-
-const Variant &PlainFormatStreamReader::getCommandName()
-{
-	return commands.top().name;
-}
-
-const Variant &PlainFormatStreamReader::getCommandArguments()
-{
-	return commands.top().arguments;
-}
-}
-
diff --git a/src/plugins/plain/PlainFormatStreamReader.hpp b/src/plugins/plain/PlainFormatStreamReader.hpp
deleted file mode 100644
index 2ee261c..0000000
--- a/src/plugins/plain/PlainFormatStreamReader.hpp
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file PlainFormatStreamReader.hpp
- *
- * Provides classes for low-level classes for reading the plain TeX-esque
- * format. The class provided here do not build any model objects and does not
- * implement the Parser interfaces.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_
-#define _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_
-
-#include <stack>
-
-#include <core/common/Variant.hpp>
-
-#include "DynamicTokenizer.hpp"
-
-namespace ousia {
-
-// Forward declarations
-class CharReader;
-class Logger;
-class DataHandler;
-
-/**
- * The PlainFormatStreamReader class provides a low-level reader for the plain
- * TeX-esque format. The parser is constructed around a "parse" function, which
- * reads data from the underlying CharReader until a new state is reached and
- * indicates this state in a return value. The calling code then has to pull
- * corresponding data from the stream reader. The reader already handles some
- * invalid cases, but recovers from most errors and happily continues parsing.
- */
-class PlainFormatStreamReader {
-public:
-	/**
-	 * Enum used to indicate which state the PlainFormatStreamReader class is in
-	 * after calling the "parse" function.
-	 */
-	enum class State {
-		/**
-	     * State returned if a fully featured command has been read. A command
-	     * consists of the command name and its arguments (which optionally
-	     * includes the name).
-	     */
-		COMMAND,
-
-		/**
-	     * State returned if data is given. The reader must decide which field
-	     * or command this should be routed to. Trailing or leading whitespace
-	     * has been removed. Only called if the data is non-empty.
-	     */
-		DATA,
-
-		/**
-	     * A user-defined entity has been found. The entity sequence is stored
-	     * in the command name.
-	     */
-		ENTITY,
-
-		/**
-	     * State returned if an annotation was started. An annotation consists
-	     * of the command name and its arguments (which optionally include the
-	     * name).
-	     */
-		ANNOTATION_START,
-
-		/**
-	     * State returned if an annotation ends. The reader indicates which
-	     * annotation ends.
-	     */
-		ANNOTATION_END,
-
-		/**
-	     * State returned if a new field started. The reader assures that the
-	     * current field ends before a new field is started and that the field
-	     * is not started if data has been given outside of a field. The
-	     * field number is set to the current field index.
-	     */
-		FIELD_START,
-
-		/**
-	     * State returned if the current field ends. The reader assures that a
-	     * field was actually open.
-	     */
-		FIELD_END,
-
-		/**
-	     * The end of the stream has been reached.
-	     */
-		END,
-
-		/**
-		 * Returned from internal functions if nothing should be done.
-		 */
-		NONE,
-
-		/**
-		 * Returned from internal function to indicate irrecoverable errors.
-		 */
-		ERROR
-	};
-
-	/**
-	 * Entry used for the command stack.
-	 */
-	struct Command {
-		/**
-		 * Name and location of the current command.
-		 */
-		Variant name;
-
-		/**
-		 * Arguments that were passed to the command.
-		 */
-		Variant arguments;
-
-		/**
-		 * Set to true if this is a command with clear begin and end.
-		 */
-		bool hasRange;
-
-		/**
-		 * Set to true if we are currently inside a field of this command.
-		 */
-		bool inField;
-
-		/**
-		 * Set to true if we are currently in the range field of the command
-		 * (implies inField being set to true).
-		 */
-		bool inRangeField;
-
-		/**
-		 * Default constructor.
-		 */
-		Command() : hasRange(false), inField(false), inRangeField(false) {}
-
-		/**
-		 * Constructor of the Command class.
-		 *
-		 * @param name is a string variant with name and location of the
-		 * command.
-		 * @param arguments is a map variant with the arguments given to the
-		 * command.
-		 * @param hasRange should be set to true if this is a command with
-		 * explicit range.
-		 * @param inField is set to true if we currently are inside a field
-		 * of this command.
-		 * @param inRangeField is set to true if we currently inside the outer
-		 * field of the command.
-		 */
-		Command(Variant name, Variant arguments, bool hasRange,
-		        bool inField, bool inRangeField)
-		    : name(std::move(name)),
-		      arguments(std::move(arguments)),
-		      hasRange(hasRange),
-		      inField(inField),
-		      inRangeField(inRangeField)
-		{
-		}
-	};
-
-private:
-	/**
-	 * Reference to the CharReader instance from which the incomming bytes are
-	 * read.
-	 */
-	CharReader &reader;
-
-	/**
-	 * Reference at the logger instance to which all error messages are sent.
-	 */
-	Logger &logger;
-
-	/**
-	 * Tokenizer instance used to read individual tokens from the text.
-	 */
-	DynamicTokenizer tokenizer;
-
-	/**
-	 * Stack containing the current commands.
-	 */
-	std::stack<Command> commands;
-
-	/**
-	 * Variant containing the data that has been read (always is a string,
-	 * contains the exact location of the data in the source file).
-	 */
-	Variant data;
-
-	/**
-	 * Contains the location of the last token.
-	 */
-	SourceLocation location;
-
-	/**
-	 * Contains the field index of the current command.
-	 */
-	size_t fieldIdx;
-
-	/**
-	 * Function used internall to parse an identifier.
-	 *
-	 * @param start is the start byte offset of the identifier (including the
-	 * backslash).
-	 * @param allowNSSep should be set to true if the namespace separator is
-	 * allowed in the identifier name. Issues error if the namespace separator
-	 * is placed incorrectly.
-	 */
-	Variant parseIdentifier(size_t start, bool allowNSSep = false);
-
-	/**
-	 * Function used internally to handle the special "\begin" command.
-	 */
-	State parseBeginCommand();
-
-	/**
-	 * Function used internally to handle the special "\end" command.
-	 */
-	State parseEndCommand();
-
-	/**
-	 * Pushes the parsed command onto the command stack.
-	 */
-	void pushCommand(Variant commandName, Variant commandArguments, bool hasRange);
-
-	/**
-	 * Parses the command arguments.
-	 */
-	Variant parseCommandArguments(Variant commandArgName);
-
-	/**
-	 * Function used internally to parse a command.
-	 *
-	 * @param start is the start byte offset of the command (including the
-	 * backslash)
-	 * @return true if a command was actuall parsed, false otherwise.
-	 */
-	State parseCommand(size_t start);
-
-	/**
-	 * Function used internally to parse a block comment.
-	 */
-	void parseBlockComment();
-
-	/**
-	 * Function used internally to parse a generic comment.
-	 */
-	void parseLineComment();
-
-	/**
-	 * Checks whether there is any data pending to be issued, if yes, issues it.
-	 *
-	 * @param handler is the data handler that contains the data that may be
-	 * returned to the user.
-	 * @return true if there was any data and DATA should be returned by the
-	 * parse function, false otherwise.
-	 */
-	bool checkIssueData(DataHandler &handler);
-
-	/**
-	 * Called before any data is appended to the internal data handler. Checks
-	 * whether a new field should be started or implicitly ended.
-	 *
-	 * @return true if FIELD_START should be returned by the parse function.
-	 */
-	bool checkIssueFieldStart();
-
-public:
-	/**
-	 * Constructor of the PlainFormatStreamReader class. Attaches the new
-	 * PlainFormatStreamReader to the given CharReader and Logger instances.
-	 *
-	 * @param reader is the reader instance from which incomming characters
-	 * should be read.
-	 * @param logger is the logger instance to which errors should be written.
-	 */
-	PlainFormatStreamReader(CharReader &reader, Logger &logger);
-
-	/**
-	 * Continues parsing. Returns one of the states defined in the State enum.
-	 * Callers should stop once the State::END state is reached. Use the getter
-	 * functions to get more information about the current state, such as the
-	 * command name or the data or the current field index.
-	 *
-	 * @return the new state the parser has reached.
-	 */
-	State parse();
-
-	/**
-	 * Returns a reference at the internally stored data. Only valid if
-	 * State::DATA was returned by the "parse" function.
-	 *
-	 * @return a reference at a variant containing the data parsed by the
-	 * "parse" function.
-	 */
-	const Variant &getData() { return data; }
-
-	/**
-	 * Returns a reference at the internally stored command name. Only valid if
-	 * State::COMMAND was returned by the "parse" function.
-	 *
-	 * @return a reference at a variant containing name and location of the
-	 * parsed command.
-	 */
-	const Variant &getCommandName();
-
-	/**
-	 * Returns a reference at the internally stored command name. Only valid if
-	 * State::COMMAND was returned by the "parse" function.
-	 *
-	 * @return a reference at a variant containing arguments given to the
-	 * command.
-	 */
-	const Variant &getCommandArguments();
-
-	/**
-	 * Returns a reference at the char reader.
-	 *
-	 * @return the last internal token location.
-	 */
-	SourceLocation &getLocation() {return location;}
-};
-}
-
-#endif /* _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_ */
-
diff --git a/src/plugins/plain/TokenTrie.cpp b/src/plugins/plain/TokenTrie.cpp
deleted file mode 100644
index 4a0430b..0000000
--- a/src/plugins/plain/TokenTrie.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "TokenTrie.hpp"
-
-namespace ousia {
-
-/* Class DynamicTokenTree::Node */
-
-TokenTrie::Node::Node() : type(EmptyToken) {}
-
-/* Class DynamicTokenTree */
-
-bool TokenTrie::registerToken(const std::string &token,
-                              TokenTypeId type) noexcept
-{
-	// Abort if the token is empty -- this would taint the root node
-	if (token.empty()) {
-		return false;
-	}
-
-	// Iterate over each character in the given string and insert them as
-	// (new) nodes
-	Node *node = &root;
-	for (size_t i = 0; i < token.size(); i++) {
-		// Insert a new node if this one does not exist
-		const char c = token[i];
-		auto it = node->children.find(c);
-		if (it == node->children.end()) {
-			it = node->children.emplace(c, std::make_shared<Node>()).first;
-		}
-		node = it->second.get();
-	}
-
-	// If the resulting node already has a type set, we're screwed.
-	if (node->type != EmptyToken) {
-		return false;
-	}
-
-	// Otherwise just set the type to the given type.
-	node->type = type;
-	return true;
-}
-
-bool TokenTrie::unregisterToken(const std::string &token) noexcept
-{
-	// We cannot remove empty tokens as we need to access the fist character
-	// upfront
-	if (token.empty()) {
-		return false;
-	}
-
-	// First pass -- search the node in the path that can be deleted
-	Node *subtreeRoot = &root;
-	char subtreeKey = token[0];
-	Node *node = &root;
-	for (size_t i = 0; i < token.size(); i++) {
-		// Go to the next node, abort if the tree ends unexpectedly
-		auto it = node->children.find(token[i]);
-		if (it == node->children.end()) {
-			return false;
-		}
-
-		// Reset the subtree handler if this node has another type
-		node = it->second.get();
-		if ((node->type != EmptyToken || node->children.size() > 1) &&
-		    (i + 1 != token.size())) {
-			subtreeRoot = node;
-			subtreeKey = token[i + 1];
-		}
-	}
-
-	// If the node type is already EmptyToken, we cannot do anything here
-	if (node->type == EmptyToken) {
-		return false;
-	}
-
-	// If the target node has children, we cannot delete the subtree. Set the
-	// type to EmptyToken instead
-	if (!node->children.empty()) {
-		node->type = EmptyToken;
-		return true;
-	}
-
-	// If we end up here, we can safely delete the complete subtree
-	subtreeRoot->children.erase(subtreeKey);
-	return true;
-}
-
-TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept
-{
-	Node const *node = &root;
-	for (size_t i = 0; i < token.size(); i++) {
-		const char c = token[i];
-		auto it = node->children.find(c);
-		if (it == node->children.end()) {
-			return EmptyToken;
-		}
-		node = it->second.get();
-	}
-	return node->type;
-}
-}
-
diff --git a/src/plugins/plain/TokenTrie.hpp b/src/plugins/plain/TokenTrie.hpp
deleted file mode 100644
index 36c2ffa..0000000
--- a/src/plugins/plain/TokenTrie.hpp
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file TokenTrie.hpp
- *
- * Class representing a token trie that can be updated dynamically.
- *
- * @author Benjamin Paaßen (astoecke@techfak.uni-bielefeld.de)
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_TOKEN_TRIE_HPP_
-#define _OUSIA_TOKEN_TRIE_HPP_
-
-#include <cstdint>
-#include <memory>
-#include <limits>
-#include <unordered_map>
-
-namespace ousia {
-
-/**
- * The TokenTypeId is used to give each token type a unique id.
- */
-using TokenTypeId = uint32_t;
-
-/**
- * Token which is not a token.
- */
-constexpr TokenTypeId EmptyToken = std::numeric_limits<TokenTypeId>::max();
-
-/**
- * Token which represents a text token.
- */
-constexpr TokenTypeId TextToken = std::numeric_limits<TokenTypeId>::max() - 1;
-
-/**
- * The Tokenizer internally uses a TokenTrie to be efficiently able to identify
- * the longest consecutive token in the text. This is equivalent to a prefix
- * trie.
- *
- * A token trie is a construct that structures all special tokens a Tokenizer
- * recognizes. Consider the tokens "aab", "a" and "bac" numbered as one, two and
- * three. Then the token tree would look like this:
- *
- * \code{*.txt}
- *        ~ (0)
- *       /     \
- *      a (2)  b (0)
- *      |      |
- *      a (0)  a (0)
- *      |      |
- *      b (1)  c (0)
- * \endcode
- *
- * Where the number indicates the corresponding token descriptor identifier.
- */
-class TokenTrie {
-public:
-	/**
-	 * Structure used to build the node tree.
-	 */
-	struct Node {
-		/**
-		 * Type used for the child map.
-		 */
-		using ChildMap = std::unordered_map<char, std::shared_ptr<Node>>;
-
-		/**
-		 * Map from single characters at the corresponding child nodes.
-		 */
-		ChildMap children;
-
-		/**
-		 * Reference at the corresponding token descriptor. Set to nullptr if
-		 * no token is attached to this node.
-		 */
-		TokenTypeId type;
-
-		/**
-		 * Default constructor, initializes the descriptor with nullptr.
-		 */
-		Node();
-	};
-
-private:
-	/**
-	 * Root node of the internal token tree.
-	 */
-	Node root;
-
-public:
-	/**
-	 * Registers a token containing the given string. Returns false if the
-	 * token already exists, true otherwise.
-	 *
-	 * @param token is the character sequence that should be registered as
-	 * token.
-	 * @param type is the descriptor that should be set for this token.
-	 * @return true if the operation is successful, false otherwise.
-	 */
-	bool registerToken(const std::string &token, TokenTypeId type) noexcept;
-
-	/**
-	 * Unregisters the token from the token tree. Returns true if the token was
-	 * unregistered successfully, false otherwise.
-	 *
-	 * @param token is the character sequence that should be unregistered.
-	 * @return true if the operation was successful, false otherwise.
-	 */
-	bool unregisterToken(const std::string &token) noexcept;
-
-	/**
-	 * Returns true, if the given token exists within the TokenTree. This
-	 * function is mostly thought for debugging and unit testing.
-	 *
-	 * @param token is the character sequence that should be searched.
-	 * @return the attached token descriptor or nullptr if the given token is
-	 * not found.
-	 */
-	TokenTypeId hasToken(const std::string &token) const noexcept;
-
-	/**
-	 * Returns a reference at the root node to be used for traversing the token
-	 * tree.
-	 *
-	 * @return a reference at the root node.
-	 */
-	const Node *getRoot() const noexcept { return &root; }
-};
-}
-
-#endif /* _OUSIA_TOKEN_TRIE_HPP_ */
-
-- 
cgit v1.2.3