3 files changed, 312 insertions, 1 deletions
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp
new file mode 100644
index 0000000..6142ecf
--- /dev/null
+++ b/src/core/variant/Reader.cpp
@@ -0,0 +1,177 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <cassert>
+#include <sstream>
+
+#include <core/Utils.hpp>
+
+#include "Reader.hpp"
+
+namespace ousia {
+namespace variant {
+
+static const int STATE_INIT = 0;
+static const int STATE_IN_STRING = 1;
+static const int STATE_ESCAPE = 2;
+
+static std::pair<Err, std::string> parseString(
+    BufferedCharReader &reader, const unordered_set<char> *delims = nullptr)
+{
+	// Initialize the internal state
+	Err errCode = Err::OK;
+	int state = STATE_INIT;
+	char quote = 0;
+	std::stringstream res;
+
+	// Statemachine whic iterates over each character in the stream
+	// TODO: Combination of peeking and consumePeek is stupid as consumePeek is
+	// the default (read and putBack would obviously be better, yet the latter
+	// is not trivial to implement in the current BufferedCharReader).
+	char c;
+	while (reader.peek(&c)) {
+		switch (state) {
+			case STATE_INIT:
+				if (c == '"' || c == '\'') {
+					quote = c;
+					state = STATE_IN_STRING;
+				} else if (delims && delims.count(c)) {
+					return std::make_pair(Err::UNEXPECTED_END, res.str());
+				}
+				reader.consumePeek();
+				break;
+			case STATE_IN_STRING:
+				if (c == q) {
+					state = STATE_END;
+					reader.consumePeek();
+					return std::make_pair(Err::OK, res.str());
+				} else if (c == '\\') {
+					state = STATE_ESCAPE;
+				} else if (c == '\n') {
+					return std::make_pair(Err::UNTERMINATED, res.str());
+				}
+				res << c;
+				reader.consumePeek();
+				break;
+			case STATE_ESCAPE:
+				switch (c) {
+					case 'b':
+						res << '\b';
+						break;
+					case 'f':
+						res << '\f';
+						break;
+					case 'n':
+						res << '\n';
+						break;
+					case 'r':
+						res << '\r';
+						break;
+					case 't':
+						res << '\t';
+						break;
+					case 'v':
+						res << '\v';
+						break;
+					case '\'':
+						res << '\'';
+						break;
+					case '"':
+						res << '"';
+						break;
+					case '\\':
+						res << '\\';
+						break;
+					case '\n':
+						break;
+					case 'x':
+						// TODO: Parse Latin-1 sequence hex XX
+						break;
+					case 'u':
+						// TODO: Parse 16-Bit unicode character hex XXXX
+						break;
+					default:
+						if (Utils::isNumeric(c)) {
+							// TODO: Parse octal 000 sequence
+						} else {
+							errCode = Err::ERR_INVALID_ESCAPE;
+						}
+						break;
+				}
+				state = STATE_IN_STRING;
+				reader.consumePeek();
+				break;
+		}
+	}
+	return std::make_pair(Err::UNEXPECTED_END, res.str());
+}
+
+static std::pair<Err, std::string> parseUnescapedString(
+    BufferedCharReader &reader, const unordered_set<char> *delims)
+{
+	assert(delims);
+
+	std::stringstream res;
+	char c;
+	while (reader.peek(&c)) {
+		if (delims->count(c)) {
+			return std::make_pair(Err::OK, res.str());
+		}
+		res << c;
+		reader.consumePeek();
+	}
+	return std::make_pair(Err::UNEXPECTED_END, res.str());
+}
+
+static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader,
+                                            const unordered_set<char> *delims)
+{
+	assert(delims);
+
+	char c;
+	while (reader.peek(&c)) {
+		// Stop if a delimiter is reached, skipp all whitespace characters
+		if (delims->count(c)) {
+			return std::make_pair(Err::OK, res.str());
+		} else if (Utils::isWhitespace(c)) {
+			reader.consumePeek();
+			continue;
+		}
+
+		// Parse a string if a quote is reached
+		if (c == '"' || c == '\'') {
+			return parseString(reader, nullptr);
+		}
+
+		if (c == '[') {
+			// TODO: Parse struct descriptor
+		}
+
+		if (isNumeric(c)) {
+			// TODO: Parse integer/double
+		}
+
+		// Parse an unescaped string in any other case
+		return parseUnescapedString(reader, delims);
+	}
+	return std::make_pair(Err::UNEXPECTED_END, res.str());
+}
+
+}
+}
+
diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp
new file mode 100644
index 0000000..3f945f0
--- /dev/null
+++ b/src/core/variant/Reader.hpp
@@ -0,0 +1,130 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Reader.hpp
+ *
+ * Provides parsers for various micro formats. These formats include integers,
+ * doubles, strings, JSON and the Ousía struct notation.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_VARIANT_READER_HPP_
+#define _OUSIA_VARIANT_READER_HPP_
+
+#include <unordered_set>
+#include <utility>
+
+#include <core/BufferedCharReader.hpp>
+
+#include "Variant.hpp"
+
+namespace ousia {
+namespace variant {
+
+class Reader {
+public:
+	// TODO: Pass logger instance instead of using error codes?
+
+	/**
+	 * The Err enum describes possible error codes that may be encountered when
+	 * parsing the microtypes.
+	 */
+	enum class Err : int {
+		/**
+	     * Reached the end of the stream, but expected more data.
+	     */
+		ERR_UNEXPECTED_END = -1,
+
+		/**
+	     * The stream is malformed.
+	     */
+		ERR_MALFORMED = -2,
+
+		/**
+		 * Unexpected character.
+		 */
+		ERR_UNEXPECTED_CHARACTER = -3,
+
+		/**
+		 * Unterminated literal.
+		 */
+		ERR_UNTERMINATED = -4,
+
+		/**
+		 * Invalid escape character.
+		 */
+		ERR_INVALID_ESCAPE = -5,
+
+		/**
+	     * A value of the requested type was extracted successfully.
+	     */
+		OK = 0
+	};
+
+	/**
+	 * Parses a string which may either be enclosed by " or ', unescapes
+	 * entities in the string as specified for JavaScript.
+	 *
+	 * @param reader is a reference to the BufferedCharReader instance which is
+	 * the source for the character data. The reader will be positioned after
+	 * the terminating quote character or at the terminating delimiting
+	 * character.
+	 * @param delims is an optional set of delimiters after which parsing has to
+	 * be stopped (the delimiters may occur inside the actual string, but not
+	 * outside). If nullptr is given, no delimiter is used and a complete string
+	 * is read.
+	 */
+	static std::pair<Err, std::string> parseString(
+	    BufferedCharReader &reader,
+	    const unordered_set<char> *delims = nullptr);
+
+	/**
+	 * Extracts an unescaped string from the given buffered char reader
+	 * instance. This function just reads text until one of the given delimiter
+	 * characters is reached.
+	 *
+	 * @param reader is a reference to the BufferedCharReader instance which is
+	 * the source for the character data. The reader will be positioned at the
+	 * terminating delimiting character.
+	 * @param delims is a set of characters which will terminate the string.
+	 * These characters are not included in the result. May not be nullptr.
+	 */
+	static std::pair<Err, std::string> parseUnescapedString(
+	    BufferedCharReader &reader, const unordered_set<char> *delims);
+
+	/**
+	 * Tries to parse the most specific item from the given stream until one of
+	 * the given delimiters is reached or a meaningful literal has been read.
+	 * The resulting variant represents the value that has been read.
+	 *
+	 * @param reader is a reference to the BufferedCharReader instance which is
+	 * the source for the character data. The reader will be positioned at the
+	 * terminating delimiting character.
+	 * @param delims is a set of characters which will terminate the string.
+	 * These characters are not included in the result. May not be nullptr.
+	 */
+	static std::pair<Err, Variant> parseGeneric(
+	    BufferedCharReader &reader, const unordered_set<char> *delims);
+};
+}
+}
+
+#endif /* _OUSIA_VARIANT_READER_HPP_ */
+
diff --git a/src/core/variant/Variant.hpp b/src/core/variant/Variant.hpp
index d65e14a..6476780 100644
--- a/src/core/variant/Variant.hpp
+++ b/src/core/variant/Variant.hpp
@@ -42,6 +42,7 @@
 #include <core/Exceptions.hpp>
 
 namespace ousia {
+namespace variant {
 
 /**
  * Instances of the Variant class represent any kind of data that is exchanged
@@ -680,13 +681,16 @@ public:
 	 * Prints a key value pair to the output stream.
 	 */
 	friend std::ostream &operator<<(std::ostream &os,
-		                     const mapType::value_type &v)
+	                                const mapType::value_type &v)
 	{
 		// TODO: Use proper serialization function
 		return os << "\"" << v.first << "\": " << v.second.toString(true);
 	}
 };
+}
 
+// Alias for the (very often used and unambigous) variant class
+using Variant = variant::Variant;
 }
 
 #endif /* _OUSIA_VARIANT_HPP_ */