6 files changed, 299 insertions, 52 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 98b7acb..94b2cc7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -114,7 +114,7 @@ ADD_LIBRARY(ousia_core
 #	src/core/script/Object
 #	src/core/script/ScriptEngine
 #	src/core/script/Variant
-#	src/core/variant/Reader
+	src/core/variant/Reader
 	src/core/variant/Variant
 )
 
@@ -166,7 +166,7 @@ IF(TEST)
 #		test/core/script/FunctionTest
 #		test/core/script/ObjectTest
 #		test/core/script/VariantTest
-#		test/core/variant/ReaderTest
+		test/core/variant/ReaderTest
 		test/core/variant/VariantTest
 	)
 
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp
index 23c219a..0821a5d 100644
--- a/src/core/BufferedCharReader.cpp
+++ b/src/core/BufferedCharReader.cpp
@@ -18,6 +18,8 @@
 
 #include <array>
 
+#include "Utils.hpp"
+
 #include "BufferedCharReader.hpp"
 
 namespace ousia {
@@ -73,6 +75,15 @@ BufferedCharReader::BufferedCharReader(const std::string &str, int line,
 	buffer.push_back(str);
 }
 
+BufferedCharReader::BufferedCharReader(const std::string &str)
+    : inputStream(nullptr),
+      readCursor(1, 1, true),
+      peekCursor(1, 1, false),
+      depleted(true)
+{
+	buffer.push_back(str);
+}
+
 BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line,
                                        int column)
     : inputStream(&inputStream),
@@ -218,6 +229,19 @@ void BufferedCharReader::consumePeek()
 	readCursor.assign(peekCursor);
 }
 
+bool BufferedCharReader::consumeWhitespace()
+{
+	char c;
+	while (peek(&c)) {
+		if (!Utils::isWhitespace(c)) {
+			resetPeek();
+			return true;
+		}
+		consumePeek();
+	}
+	return false;
+}
+
 void BufferedCharReader::resetPeek()
 {
 	// Reset the peek cursor to the read cursor
diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp
index bd19d4a..e7f3186 100644
--- a/src/core/BufferedCharReader.hpp
+++ b/src/core/BufferedCharReader.hpp
@@ -172,7 +172,6 @@ public:
 	 */
 	BufferedCharReader(int line = 1, int column = 1);
 
-
 	/**
 	 * Constructor of the buffered char reader class with a string as input.
 	 *
@@ -180,7 +179,14 @@ public:
 	 * @param line is the start line.
 	 * @param column is the start column.
 	 */
-	BufferedCharReader(const std::string &str, int line = 1, int column = 1);
+	BufferedCharReader(const std::string &str, int line, int column);
+
+	/**
+	 * Constructor of the buffered char reader class with a string as input.
+	 *
+	 * @param str is a string containing the input data.
+	 */
+	BufferedCharReader(const std::string &str);
 
 	/**
 	 * Constructor of the buffered char reader class with a string as input.
@@ -222,6 +228,14 @@ public:
 	void consumePeek();
 
 	/**
+	 * Moves the read cursor to the next non-whitespace character. Returns
+	 * false, if the end of the stream was reached.
+	 *
+	 * @return false if the end of the stream was reached, false othrwise.
+	 */
+	bool consumeWhitespace();
+
+	/**
 	 * Resets the peek pointer to the "read" pointer.
 	 */
 	void resetPeek();
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp
index e9a58a1..a0bba52 100644
--- a/src/core/variant/Reader.cpp
+++ b/src/core/variant/Reader.cpp
@@ -29,21 +29,33 @@ namespace variant {
 static const char *ERR_UNEXPECTED_CHARACTER = "Unexpected character";
 static const char *ERR_UNEXPECTED_END = "Unexpected end";
 static const char *ERR_UNTERMINATED = "Unterminated literal";
+static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence";
 
 static const int STATE_INIT = 0;
 static const int STATE_IN_STRING = 1;
 static const int STATE_ESCAPE = 2;
+static const int STATE_WHITESPACE = 3;
 
-static std::pair<Err, std::string> parseString(
-    BufferedCharReader &reader, const unordered_set<char> *delims = nullptr,
-    Logger *logger = nullptr)
+template <class T>
+static std::pair<bool, T> error(BufferedCharReader &reader, Logger &logger,
+                                const char *err, T res)
+{
+	logger.errorAt(err, reader);
+	return std::make_pair(false, std::move(res));
+}
+
+std::pair<bool, std::string> Reader::parseString(
+    BufferedCharReader &reader, Logger &logger,
+    const std::unordered_set<char> *delims)
 {
 	// Initialize the internal state
-	Err errCode = Err::OK;
 	int state = STATE_INIT;
 	char quote = 0;
 	std::stringstream res;
 
+	// Consume all whitespace
+	reader.consumeWhitespace();
+
 	// Statemachine whic iterates over each character in the stream
 	// TODO: Combination of peeking and consumePeek is stupid as consumePeek is
 	// the default (read and putBack would obviously be better, yet the latter
@@ -55,29 +67,28 @@ static std::pair<Err, std::string> parseString(
 				if (c == '"' || c == '\'') {
 					quote = c;
 					state = STATE_IN_STRING;
-				} else if (delims && delims.count(c)) {
-					Logger.log(ERR_UNTERMINATED, reader);
-					return std::make_pair(Err::UNEXPECTED_END, res.str());
-				} else if (Utils::isWhitespace(c)) {
-					reader.consumePeek();
-					continue;
+					break;
+				} else if (delims && delims->count(c)) {
+					return error(reader, logger, ERR_UNEXPECTED_END, res.str());
 				}
-				return std::make_pair(Err::UNEXPECTED_CHARACTER, res.str());
-				break;
+				return error(reader, logger, ERR_UNEXPECTED_CHARACTER,
+				             res.str());
 			case STATE_IN_STRING:
-				if (c == q) {
-					state = STATE_END;
+				if (c == quote) {
 					reader.consumePeek();
-					return std::make_pair(Err::OK, res.str());
+					return std::make_pair(true, res.str());
 				} else if (c == '\\') {
 					state = STATE_ESCAPE;
+					reader.consumePeek();
+					break;
 				} else if (c == '\n') {
-					return std::make_pair(Err::UNTERMINATED, res.str());
+					return error(reader, logger, ERR_UNTERMINATED, res.str());
 				}
 				res << c;
 				reader.consumePeek();
 				break;
 			case STATE_ESCAPE:
+				// Handle all possible special escape characters
 				switch (c) {
 					case 'b':
 						res << '\b';
@@ -118,67 +129,90 @@ static std::pair<Err, std::string> parseString(
 						if (Utils::isNumeric(c)) {
 							// TODO: Parse octal 000 sequence
 						} else {
-							errCode = Err::ERR_INVALID_ESCAPE;
+							logger.errorAt(ERR_INVALID_ESCAPE, reader);
 						}
 						break;
 				}
+
+				// Switch back to the "normal" state
 				state = STATE_IN_STRING;
 				reader.consumePeek();
 				break;
 		}
 	}
-	return std::make_pair(Err::UNEXPECTED_END, res.str());
+	return error(reader, logger, ERR_UNEXPECTED_END, res.str());
 }
 
-static std::pair<Err, std::string> parseUnescapedString(
-    BufferedCharReader &reader, const unordered_set<char> *delims)
+std::pair<bool, std::string> Reader::parseUnescapedString(
+    BufferedCharReader &reader, Logger &logger,
+    const std::unordered_set<char> &delims)
 {
-	assert(delims);
-
 	std::stringstream res;
+	std::stringstream buf;
 	char c;
+
+	// Consume all whitespace
+	reader.consumeWhitespace();
+
+	// Copy all characters, skip whitespace at the end
+	int state = STATE_IN_STRING;
 	while (reader.peek(&c)) {
-		if (delims->count(c)) {
-			return std::make_pair(Err::OK, res.str());
+		if (delims.count(c)) {
+			return std::make_pair(true, res.str());
+		} else if (Utils::isWhitespace(c)) {
+			// Do not add whitespace to the output buffer
+			state = STATE_WHITESPACE;
+			buf << c;
+		} else {
+			// If we just hat a sequence of whitespace, append it to the output
+			// buffer and continue
+			if (state == STATE_WHITESPACE) {
+				res << buf.str();
+				buf.str(std::string{});
+				buf.clear();
+				state = STATE_IN_STRING;
+			}
+			res << c;
 		}
-		res << c;
 		reader.consumePeek();
 	}
-	return std::make_pair(Err::UNEXPECTED_END, res.str());
+	return std::make_pair(true, res.str());
 }
 
-static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader,
-                                            const unordered_set<char> *delims)
+std::pair<bool, Variant> Reader::parseGeneric(
+    BufferedCharReader &reader, Logger &logger,
+    const std::unordered_set<char> &delims)
 {
-	assert(delims);
-
 	char c;
+
+	// Skip all whitespace characters
+	reader.consumeWhitespace();
+
 	while (reader.peek(&c)) {
-		// Stop if a delimiter is reached, skipp all whitespace characters
-		if (delims->count(c)) {
-			return std::make_pair(Err::OK, res.str());
-		} else if (Utils::isWhitespace(c)) {
-			reader.consumePeek();
-			continue;
+		// Stop if a delimiter is reached
+		if (delims.count(c)) {
+			return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
 		}
 
 		// Parse a string if a quote is reached
 		if (c == '"' || c == '\'') {
-			return parseString(reader, nullptr);
+			auto res = parseString(reader, logger);
+			return std::make_pair(res.first, res.second.c_str());
 		}
 
 		if (c == '[') {
 			// TODO: Parse struct descriptor
 		}
 
-		if (isNumeric(c)) {
+		if (Utils::isNumeric(c)) {
 			// TODO: Parse integer/double
 		}
 
 		// Parse an unescaped string in any other case
-		return parseUnescapedString(reader, delims);
+		auto res = parseUnescapedString(reader, logger, delims);
+		return std::make_pair(res.first, res.second.c_str());
 	}
-	return std::make_pair(Err::UNEXPECTED_END, res.str());
+	return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
 }
 }
 }
diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp
index 339127f..62592c1 100644
--- a/src/core/variant/Reader.hpp
+++ b/src/core/variant/Reader.hpp
@@ -40,7 +40,7 @@ namespace ousia {
 namespace variant {
 
 class Reader {
-public:
+private:
 	/**
 	 * Parses a string which may either be enclosed by " or ', unescapes
 	 * entities in the string as specified for JavaScript.
@@ -49,15 +49,55 @@ public:
 	 * the source for the character data. The reader will be positioned after
 	 * the terminating quote character or at the terminating delimiting
 	 * character.
+	 * @param logger is the logger instance that should be used to log error
+	 * messages and warnings.
 	 * @param delims is an optional set of delimiters after which parsing has to
 	 * be stopped (the delimiters may occur inside the actual string, but not
 	 * outside). If nullptr is given, no delimiter is used and a complete string
 	 * is read.
 	 */
 	static std::pair<bool, std::string> parseString(
-	    BufferedCharReader &reader,
-	    const unordered_set<char> *delims = nullptr,
-	    Logger *logger = nullptr);
+	    BufferedCharReader &reader, Logger &logger,
+	    const std::unordered_set<char> *delims);
+
+public:
+	/**
+	 * Parses a string which may either be enclosed by " or ', unescapes
+	 * entities in the string as specified for JavaScript.
+	 *
+	 * @param reader is a reference to the BufferedCharReader instance which is
+	 * the source for the character data. The reader will be positioned after
+	 * the terminating quote character or at the terminating delimiting
+	 * character.
+	 * @param logger is the logger instance that should be used to log error
+	 * messages and warnings.
+	 * @param delims is a set of delimiters after which parsing has to
+	 * be stopped (the delimiters may occur inside the actual string, but not
+	 * outside).
+	 */
+	static std::pair<bool, std::string> parseString(
+	    BufferedCharReader &reader, Logger &logger,
+	    const std::unordered_set<char> &delims)
+	{
+		return parseString(reader, logger, &delims);
+	}
+
+	/**
+	 * Parses a string which may either be enclosed by " or ', unescapes
+	 * entities in the string as specified for JavaScript.
+	 *
+	 * @param reader is a reference to the BufferedCharReader instance which is
+	 * the source for the character data. The reader will be positioned after
+	 * the terminating quote character or at the terminating delimiting
+	 * character.
+	 * @param logger is the logger instance that should be used to log error
+	 * messages and warnings.
+	 */
+	static std::pair<bool, std::string> parseString(BufferedCharReader &reader,
+	                                                Logger &logger)
+	{
+		return parseString(reader, logger, nullptr);
+	}
 
 	/**
 	 * Extracts an unescaped string from the given buffered char reader
@@ -71,8 +111,8 @@ public:
 	 * These characters are not included in the result. May not be nullptr.
 	 */
 	static std::pair<bool, std::string> parseUnescapedString(
-	    BufferedCharReader &reader, const unordered_set<char> *delims,
-	    Logger *logger = nullptr);
+	    BufferedCharReader &reader, Logger &logger,
+	    const std::unordered_set<char> &delims);
 
 	/**
 	 * Tries to parse the most specific item from the given stream until one of
@@ -86,8 +126,8 @@ public:
 	 * These characters are not included in the result. May not be nullptr.
 	 */
 	static std::pair<bool, Variant> parseGeneric(
-	    BufferedCharReader &reader, const unordered_set<char> *delims,
-	    Logger *logger = nullptr);
+	    BufferedCharReader &reader, Logger &logger,
+	    const std::unordered_set<char> &delims);
 };
 }
 }
diff --git a/test/core/variant/ReaderTest.cpp b/test/core/variant/ReaderTest.cpp
new file mode 100644
index 0000000..760760b
--- /dev/null
+++ b/test/core/variant/ReaderTest.cpp
@@ -0,0 +1,135 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+#include <gtest/gtest.h>
+
+#include <core/variant/Reader.hpp>
+
+namespace ousia {
+namespace variant {
+
+TEST(Reader, readString)
+{
+	TerminalLogger logger(std::cerr, true);
+
+	// Simple, double quoted string
+	{
+		BufferedCharReader reader("\"hello world\"");
+		auto res = Reader::parseString(reader, logger);
+		ASSERT_TRUE(res.first);
+		ASSERT_EQ("hello world", res.second);
+	}
+
+	// Simple, double quoted string with whitespace
+	{
+		BufferedCharReader reader("    \"hello world\"   ");
+		auto res = Reader::parseString(reader, logger);
+		ASSERT_TRUE(res.first);
+		ASSERT_EQ("hello world", res.second);
+	}
+
+	// Simple, single quoted string
+	{
+		BufferedCharReader reader("'hello world'");
+		auto res = Reader::parseString(reader, logger);
+		ASSERT_TRUE(res.first);
+		ASSERT_EQ("hello world", res.second);
+	}
+
+	// Escape characters
+	{
+		BufferedCharReader reader("'\\'\\\"\\b\\f\\n\\r\\t\\v'");
+		auto res = Reader::parseString(reader, logger);
+		ASSERT_TRUE(res.first);
+		ASSERT_EQ("'\"\b\f\n\r\t\v", res.second);
+	}
+}
+
+TEST(Reader, parseUnescapedString)
+{
+	TerminalLogger logger(std::cerr, true);
+
+	// Simple case
+	{
+		BufferedCharReader reader("hello world;");
+		auto res = Reader::parseUnescapedString(reader, logger, {';'});
+		ASSERT_TRUE(res.first);
+		ASSERT_EQ("hello world", res.second);
+	}
+
+	// Simple case with whitespace
+	{
+		BufferedCharReader reader("    hello world   ;    ");
+		auto res = Reader::parseUnescapedString(reader, logger, {';'});
+		ASSERT_TRUE(res.first);
+		ASSERT_EQ("hello world", res.second);
+	}
+
+	// Linebreaks
+	{
+		BufferedCharReader reader("    hello\nworld   ;    ");
+		auto res = Reader::parseUnescapedString(reader, logger, {';'});
+		ASSERT_TRUE(res.first);
+		ASSERT_EQ("hello\nworld", res.second);
+	}
+
+	// End of stream
+	{
+		BufferedCharReader reader("    hello world ");
+		auto res = Reader::parseUnescapedString(reader, logger, {';'});
+		ASSERT_TRUE(res.first);
+		ASSERT_EQ("hello world", res.second);
+	}
+}
+
+TEST(Reader, parseGeneric)
+{
+	TerminalLogger logger(std::cerr, true);
+
+	// Simple case, unescaped string
+	{
+		BufferedCharReader reader("hello world");
+		auto res = Reader::parseGeneric(reader, logger, {';'});
+		ASSERT_TRUE(res.first);
+		ASSERT_TRUE(res.second.isString());
+		ASSERT_EQ("hello world", res.second.asString());
+	}
+
+	// Simple case, double quoted string
+	{
+		BufferedCharReader reader(" \"hello world\"    ");
+		auto res = Reader::parseGeneric(reader, logger, {';'});
+		ASSERT_TRUE(res.first);
+		ASSERT_TRUE(res.second.isString());
+		ASSERT_EQ("hello world", res.second.asString());
+	}
+
+	// Simple case, single quoted string
+	{
+		BufferedCharReader reader(" 'hello world'    ");
+		auto res = Reader::parseGeneric(reader, logger, {';'});
+		ASSERT_TRUE(res.first);
+		ASSERT_TRUE(res.second.isString());
+		ASSERT_EQ("hello world", res.second.asString());
+	}
+}
+
+}
+}
+