8 files changed, 488 insertions, 29 deletions
diff --git a/src/core/common/Argument.cpp b/src/core/common/Argument.cpp
index bfe74a4..b10fad3 100644
--- a/src/core/common/Argument.cpp
+++ b/src/core/common/Argument.cpp
@@ -302,10 +302,10 @@ bool Arguments::validateMap(Variant::mapType &map, Logger &logger,
 		} else {
 			if (ignoreUnknown) {
 				logger.note(std::string("Ignoring argument \"") + e.first +
-				            std::string("\""));
+				            std::string("\""), e.second);
 			} else {
 				logger.error(std::string("Unknown argument \"") + e.first +
-				             std::string("\""));
+				             std::string("\""), e.second);
 				ok = false;
 			}
 		}
diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 563fe2a..f8b53c6 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -18,19 +18,13 @@
 
 #include <algorithm>
 #include <cctype>
-#include <limits>
 #include <string>
 
 #include "Utils.hpp"
+#include "WhitespaceHandler.hpp"
 
 namespace ousia {
 
-std::string Utils::trim(const std::string &s)
-{
-	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace);
-	return s.substr(bounds.first, bounds.second - bounds.first);
-}
-
 bool Utils::isIdentifier(const std::string &name)
 {
 	bool first = true;
@@ -43,7 +37,27 @@ bool Utils::isIdentifier(const std::string &name)
 		}
 		first = false;
 	}
-	return true;
+	return !first;
+}
+
+bool Utils::isIdentifierOrEmpty(const std::string &name)
+{
+	return name.empty() || isIdentifier(name);
+}
+
+bool Utils::isNamespacedIdentifier(const std::string &name)
+{
+	bool first = true;
+	for (char c : name) {
+		if (first && !isIdentifierStartCharacter(c)) {
+			return false;
+		}
+		if (!first && (!isIdentifierCharacter(c) && c != ':')) {
+			return false;
+		}
+		first = (c == ':');
+	}
+	return !first;
 }
 
 bool Utils::hasNonWhitepaceChar(const std::string &s)
@@ -94,5 +108,29 @@ std::string Utils::extractFileExtension(const std::string &filename)
 	}
 	return std::string{};
 }
+
+std::string Utils::trim(const std::string &s)
+{
+	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace);
+	return s.substr(bounds.first, bounds.second - bounds.first);
+}
+
+std::string Utils::collapse(const std::string &s)
+{
+	CollapsingWhitespaceHandler h;
+	appendToWhitespaceHandler(h, s, 0);
+	return h.toString();
+}
+
+bool Utils::startsWith(const std::string &s, const std::string &prefix)
+{
+	return prefix.size() <= s.size() && s.substr(0, prefix.size()) == prefix;
+}
+
+bool Utils::endsWith(const std::string &s, const std::string &suffix)
+{
+	return suffix.size() <= s.size() &&
+	       s.substr(s.size() - suffix.size(), suffix.size()) == suffix;
+}
 }
 
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 2c8a5b3..b5a54fc 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -74,16 +74,45 @@ public:
 	}
 
 	/**
-	 * Returns true if the given character is in [A-Za-z][A-Za-z0-9_-]*
+	 * Returns true if the given string is in
+	 * \code{.txt}
+	 * [A-Za-z][A-Za-z0-9_-]*
+	 * \endCode
+	 *
+	 * @param name is the string that should be tested.
+	 * @return true if the string matches the regular expression given above, 
+	 * false otherwise.
 	 */
 	static bool isIdentifier(const std::string &name);
 
 	/**
+	 * Returns true if the given string is an identifier or an empty string.
+	 */
+	static bool isIdentifierOrEmpty(const std::string &name);
+
+	/**
+	 * Returns true if the given string is in
+	 * \code{.txt}
+	 * ([A-Za-z][A-Za-z0-9_-]*)(:[A-Za-z][A-Za-z0-9_-]*)*
+	 * \endCode
+	 *
+	 * @param name is the string that should be tested.
+	 * @return true if the string matches the regular expression given above, 
+	 * false otherwise.
+	 */
+	static bool isNamespacedIdentifier(const std::string &name);
+
+	/**
+	 * Returns true if the given character is a linebreak character.
+	 */
+	static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); }
+
+	/**
 	 * Returns true if the given character is a whitespace character.
 	 */
 	static bool isWhitespace(const char c)
 	{
-		return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r');
+		return (c == ' ') || (c == '\t') || isLinebreak(c);
 	}
 
 	/**
@@ -95,11 +124,6 @@ public:
 	static bool hasNonWhitepaceChar(const std::string &s);
 
 	/**
-	 * Returns true if the given character is a whitespace character.
-	 */
-	static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); }
-
-	/**
 	 * Removes whitespace at the beginning and the end of the given string.
 	 *
 	 * @param s is the string that should be trimmed.
@@ -120,8 +144,25 @@ public:
 	template <class T, class Filter>
 	static std::pair<size_t, size_t> trim(const T &s, Filter f)
 	{
+		return trim(s, s.size(), f);
+	}
+
+	/**
+	 * Trims the given string or vector of chars by returning the start and end
+	 * index.
+	 *
+	 * @param s is the container that should be trimmed.
+	 * @param len is the number of elements in the container.
+	 * @param f is a function that returns true for values that should be
+	 * removed.
+	 * @return start and end index. Note that "end" points at the character
+	 * beyond the end, thus "end" minus "start"
+	 */
+	template <class T, class Filter>
+	static std::pair<size_t, size_t> trim(const T &s, size_t len, Filter f)
+	{
 		size_t start = 0;
-		for (size_t i = 0; i < s.size(); i++) {
+		for (size_t i = 0; i < len; i++) {
 			if (!f(s[i])) {
 				start = i;
 				break;
@@ -129,7 +170,7 @@ public:
 		}
 
 		size_t end = 0;
-		for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) {
+		for (ssize_t i = len - 1; i >= static_cast<ssize_t>(start); i--) {
 			if (!f(s[i])) {
 				end = i + 1;
 				break;
@@ -145,6 +186,15 @@ public:
 	}
 
 	/**
+	 * Collapses the whitespaces in the given string (trims the string and
+	 * replaces all whitespace characters by a single one).
+	 *
+	 * @param s is the string in which the whitespace should be collapsed.
+	 * @return a copy of s with collapsed whitespace.
+	 */
+	static std::string collapse(const std::string &s);
+
+	/**
 	 * Turns the elements of a collection into a string separated by the
 	 * given delimiter.
 	 *
@@ -205,6 +255,24 @@ public:
 	static std::string extractFileExtension(const std::string &filename);
 
 	/**
+	 * Checks whether the given string starts with the given prefix.
+	 *
+	 * @param s is the string.
+	 * @param prefix is the string which should be checked for being a prefix of
+	 * s.
+	 */
+	static bool startsWith(const std::string &s, const std::string &prefix);
+
+	/**
+	 * Checks whether the given string ends with the given suffix.
+	 *
+	 * @param s is the string.
+	 * @param suffix is the string which should be checked for being a suffix of
+	 * s.
+	 */
+	static bool endsWith(const std::string &s, const std::string &suffix);
+
+	/**
 	 * Hash functional to be used for enum classes.
 	 * See http://stackoverflow.com/a/24847480/2188211
 	 */
diff --git a/src/core/common/Variant.hpp b/src/core/common/Variant.hpp
index 6eae7e1..ddd17d7 100644
--- a/src/core/common/Variant.hpp
+++ b/src/core/common/Variant.hpp
@@ -884,6 +884,21 @@ public:
 	}
 
 	/**
+	 * If the value of the variant already is a string, the markAsMagic function
+	 * marks this string as a "magic" value (a variant which might also be an
+	 * identifier). Throws an exception if the variant is not a string or magic
+	 * value.
+	 */
+	void markAsMagic()
+	{
+		if (getType() == VariantType::STRING) {
+			meta.setType(VariantType::MAGIC);
+			return;
+		}
+		throw TypeException{getType(), VariantType::STRING};
+	}
+
+	/**
 	 * Returns the value of the Variant as boolean, performs type conversion.
 	 *
 	 * @return the Variant value converted to a boolean value.
@@ -1146,10 +1161,7 @@ public:
 	 *
 	 * @retun true if the
 	 */
-	bool hasLocation() const
-	{
-		return meta.hasLocation();
-	}
+	bool hasLocation() const { return meta.hasLocation(); }
 
 	/**
 	 * Unpacks ans returns the stored source location. Note that the returned
@@ -1158,10 +1170,7 @@ public:
 	 *
 	 * @return the stored SourceLocation.
 	 */
-	SourceLocation getLocation() const
-	{
-		return meta.getLocation();
-	}
+	SourceLocation getLocation() const { return meta.getLocation(); }
 
 	/**
 	 * Packs the given source location and stores it in the metadata. Not all
diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp
index 3f02226..fb93ad0 100644
--- a/src/core/common/VariantReader.cpp
+++ b/src/core/common/VariantReader.cpp
@@ -495,7 +495,7 @@ std::pair<bool, Variant::boolType> VariantReader::parseBool(CharReader &reader,
 	bool val = false;
 	CharReaderFork readerFork = reader.fork();
 	LoggerFork loggerFork = logger.fork();
-	auto res = parseToken(readerFork, loggerFork, {});
+	auto res = parseToken(readerFork, loggerFork, std::unordered_set<char>{});
 	if (res.first) {
 		bool valid = false;
 		if (res.second == "true") {
diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp
index 1232f6e..44132a0 100644
--- a/src/core/common/VariantReader.hpp
+++ b/src/core/common/VariantReader.hpp
@@ -322,7 +322,7 @@ public:
 	 */
 	static std::pair<bool, Variant> parseTyped(
 	    VariantType type, CharReader &reader, Logger &logger,
-	    const std::unordered_set<char> &delims = {});
+	    const std::unordered_set<char> &delims = std::unordered_set<char>{});
 	/**
 	 * Tries to parse an instance of the given type from the given string. The
 	 * called method is one of the parse methods defined here and adheres to the
diff --git a/src/core/common/Whitespace.hpp b/src/core/common/Whitespace.hpp
new file mode 100644
index 0000000..72a2291
--- /dev/null
+++ b/src/core/common/Whitespace.hpp
@@ -0,0 +1,60 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Whitespace.hpp
+ *
+ * Contains the WhitespaceMode enum used in various places.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_WHITESPACE_HPP_
+#define _OUSIA_WHITESPACE_HPP_
+
+#include <string>
+#include <utility>
+
+namespace ousia {
+
+/**
+ * Enum specifying the whitespace handling mode of the tokenizer and the
+ * parsers.
+ */
+enum class WhitespaceMode {
+	/**
+     * Preserves all whitespaces as they are found in the source file.
+     */
+	PRESERVE,
+
+	/**
+     * Trims whitespace at the beginning and the end of the found text.
+     */
+	TRIM,
+
+	/**
+     * Whitespaces are trimmed and collapsed, multiple whitespace characters
+     * are replaced by a single space character.
+     */
+	COLLAPSE
+};
+
+}
+
+#endif /* _OUSIA_WHITESPACE_HPP_ */
+
diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp
new file mode 100644
index 0000000..ed52ea3
--- /dev/null
+++ b/src/core/common/WhitespaceHandler.hpp
@@ -0,0 +1,284 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file WhitespaceHandler.hpp
+ *
+ * Contains the WhitespaceHandler classes which are used in multiple places to
+ * trim, compact or preserve whitespaces while at the same time maintaining the
+ * position information associated with the input strings.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_WHITESPACE_HANDLER_HPP_
+#define _OUSIA_WHITESPACE_HANDLER_HPP_
+
+#include <string>
+#include <vector>
+
+#include "Utils.hpp"
+
+namespace ousia {
+
+/**
+ * WhitespaceHandler is a based class that can be used to collect text on a
+ * character-by-character basis. Note that this class and its descendants are
+ * hoped to be inlined by the compiler (and used in conjunction with templates),
+ * thus they are fully defined inside this header.
+ */
+class WhitespaceHandler {
+public:
+	/**
+	 * Start position of the extracted text.
+	 */
+	size_t textStart;
+
+	/**
+	 * End position of the extracted text.
+	 */
+	size_t textEnd;
+
+	/**
+	 * Buffer containing the extracted text.
+	 */
+	std::vector<char> textBuf;
+
+	/**
+	 * Constructor of the TextHandlerBase base class. Initializes the start and
+	 * end position with zeros.
+	 */
+	WhitespaceHandler() : textStart(0), textEnd(0) {}
+
+	/**
+	 * Returns true if this whitespace handler has found any text and a text
+	 * token could be emitted.
+	 *
+	 * @return true if the internal data buffer is non-empty.
+	 */
+	bool hasText() { return !textBuf.empty(); }
+
+	/**
+	 * Returns the content of the WhitespaceHandler as string.
+	 */
+	std::string toString() const
+	{
+		return std::string(textBuf.data(), textBuf.size());
+	}
+};
+
+/**
+ * The PreservingWhitespaceHandler class preserves all characters unmodified,
+ * including whitepace characters.
+ */
+class PreservingWhitespaceHandler : public WhitespaceHandler {
+public:
+	/**
+	 * Appends the given character to the internal text buffer, does not
+	 * eliminate whitespace.
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 */
+	void append(char c, size_t start, size_t end)
+	{
+		append(c, start, end, textBuf, textStart, textEnd);
+	}
+
+	/**
+	 * Static version of PreservingWhitespaceHandler append
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 * @param textBuf is a reference at the text buffer that is to be used.
+	 * @param textStart is a reference at the text start variable that is to be
+	 * used.
+	 * @param textEnd is a reference at the text end variable that is to be
+	 * used.
+	 */
+	static void append(char c, size_t start, size_t end,
+	                   std::vector<char> &textBuf, size_t &textStart,
+	                   size_t &textEnd)
+	{
+		if (textBuf.empty()) {
+			textStart = start;
+		}
+		textEnd = end;
+		textBuf.push_back(c);
+	}
+};
+
+/**
+ * The TrimmingTextHandler class trims all whitespace characters at the begin
+ * and the end of a text section but leaves all other characters unmodified,
+ * including whitepace characters.
+ */
+class TrimmingWhitespaceHandler : public WhitespaceHandler {
+public:
+	/**
+	 * Buffer used internally to temporarily store all whitespace characters.
+	 * They are only added to the output buffer if another non-whitespace
+	 * character is reached.
+	 */
+	std::vector<char> whitespaceBuf;
+
+	/**
+	 * Appends the given character to the internal text buffer, eliminates
+	 * whitespace characters at the begin and end of the text.
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 */
+	void append(char c, size_t start, size_t end)
+	{
+		append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf);
+	}
+
+	/**
+	 * Static version of TrimmingWhitespaceHandler append
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 * @param textBuf is a reference at the text buffer that is to be used.
+	 * @param textStart is a reference at the text start variable that is to be
+	 * used.
+	 * @param textEnd is a reference at the text end variable that is to be
+	 * used.
+	 * @param whitespaceBuf is a reference at the buffer for storing whitespace
+	 * characters.
+	 */
+	static void append(char c, size_t start, size_t end,
+	                   std::vector<char> &textBuf, size_t &textStart,
+	                   size_t &textEnd, std::vector<char> &whitespaceBuf)
+	{
+		// Handle whitespace characters
+		if (Utils::isWhitespace(c)) {
+			if (!textBuf.empty()) {
+				whitespaceBuf.push_back(c);
+			}
+			return;
+		}
+
+		// Set the start and end offset correctly
+		if (textBuf.empty()) {
+			textStart = start;
+		}
+		textEnd = end;
+
+		// Store the character
+		if (!whitespaceBuf.empty()) {
+			textBuf.insert(textBuf.end(), whitespaceBuf.begin(),
+			               whitespaceBuf.end());
+			whitespaceBuf.clear();
+		}
+		textBuf.push_back(c);
+	}
+};
+
+/**
+ * The CollapsingTextHandler trims characters at the beginning and end of the
+ * text and reduced multiple whitespace characters to a single blank.
+ */
+class CollapsingWhitespaceHandler : public WhitespaceHandler {
+public:
+	/**
+	 * Flag set to true if a whitespace character was reached.
+	 */
+	bool hasWhitespace = false;
+
+	/**
+	 * Appends the given character to the internal text buffer, eliminates
+	 * redundant whitespace characters.
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 */
+	void append(char c, size_t start, size_t end)
+	{
+		append(c, start, end, textBuf, textStart, textEnd, hasWhitespace);
+	}
+
+	/**
+	 * Static version of CollapsingWhitespaceHandler append
+	 *
+	 * @param c is the character that should be appended to the internal buffer.
+	 * @param start is the start byte offset of the given character.
+	 * @param end is the end byte offset of the given character.
+	 * @param textBuf is a reference at the text buffer that is to be used.
+	 * @param textStart is a reference at the text start variable that is to be
+	 * used.
+	 * @param textEnd is a reference at the text end variable that is to be
+	 * used.
+	 * @param hasWhitespace is a reference at the "hasWhitespace" flag.
+	 */
+	static void append(char c, size_t start, size_t end,
+	                   std::vector<char> &textBuf, size_t &textStart,
+	                   size_t &textEnd, bool &hasWhitespace)
+	{
+		// Handle whitespace characters
+		if (Utils::isWhitespace(c)) {
+			if (!textBuf.empty()) {
+				hasWhitespace = true;
+			}
+			return;
+		}
+
+		// Set the start and end offset correctly
+		if (textBuf.empty()) {
+			textStart = start;
+		}
+		textEnd = end;
+
+		// Store the character
+		if (hasWhitespace) {
+			textBuf.push_back(' ');
+			hasWhitespace = false;
+		}
+		textBuf.push_back(c);
+	}
+};
+
+/**
+ * Function that can be used to append the given buffer (e.g. a string or a
+ * vector) to the whitespace handler.
+ *
+ * @tparam WhitespaceHandler is one of the WhitespaceHandler classes.
+ * @tparam Buffer is an iterable type.
+ * @param handler is the handler to which the characters of the Buffer should be
+ * appended.
+ * @param buf is the buffer from which the characters should be read.
+ * @param start is the start byte offset. Each character is counted as one byte.
+ */
+template <typename WhitespaceHandler, typename Buffer>
+inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf,
+                                      size_t start)
+{
+	for (auto elem : buf) {
+		handler.append(elem, start, start + 1);
+		start++;
+	}
+}
+}
+
+#endif /* _OUSIA_WHITESPACE_HANDLER_HPP_ */
+