added string reading functions of the Reader class

author: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> 2014-12-05 16:08:34 +0100
committer: Andreas Stöckel <andreas@somweyr.de> 2014-12-05 16:08:34 +0100
commit: e06e7ae19851acf5e397f579d6c8459e87086d30 (patch)
tree: 2140f3d79239d6f0ebd5c08f1fb48b327586249e /src
parent: bf59bc2edbb1f3f4d12bfbd8ed2663fbbb1900c0 (diff)
4 files changed, 162 insertions, 50 deletions
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp
index 23c219a..0821a5d 100644
--- a/src/core/BufferedCharReader.cpp
+++ b/src/core/BufferedCharReader.cpp
@@ -18,6 +18,8 @@
 
 #include <array>
 
+#include "Utils.hpp"
+
 #include "BufferedCharReader.hpp"
 
 namespace ousia {
@@ -73,6 +75,15 @@ BufferedCharReader::BufferedCharReader(const std::string &str, int line,
 	buffer.push_back(str);
 }
 
+BufferedCharReader::BufferedCharReader(const std::string &str)
+    : inputStream(nullptr),
+      readCursor(1, 1, true),
+      peekCursor(1, 1, false),
+      depleted(true)
+{
+	buffer.push_back(str);
+}
+
 BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line,
                                        int column)
     : inputStream(&inputStream),
@@ -218,6 +229,19 @@ void BufferedCharReader::consumePeek()
 	readCursor.assign(peekCursor);
 }
 
+bool BufferedCharReader::consumeWhitespace()
+{
+	char c;
+	while (peek(&c)) {
+		if (!Utils::isWhitespace(c)) {
+			resetPeek();
+			return true;
+		}
+		consumePeek();
+	}
+	return false;
+}
+
 void BufferedCharReader::resetPeek()
 {
 	// Reset the peek cursor to the read cursor
diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp
index bd19d4a..e7f3186 100644
--- a/src/core/BufferedCharReader.hpp
+++ b/src/core/BufferedCharReader.hpp
@@ -172,7 +172,6 @@ public:
 	 */
 	BufferedCharReader(int line = 1, int column = 1);
 
-
 	/**
 	 * Constructor of the buffered char reader class with a string as input.
 	 *
@@ -180,7 +179,14 @@ public:
 	 * @param line is the start line.
 	 * @param column is the start column.
 	 */
-	BufferedCharReader(const std::string &str, int line = 1, int column = 1);
+	BufferedCharReader(const std::string &str, int line, int column);
+
+	/**
+	 * Constructor of the buffered char reader class with a string as input.
+	 *
+	 * @param str is a string containing the input data.
+	 */
+	BufferedCharReader(const std::string &str);
 
 	/**
 	 * Constructor of the buffered char reader class with a string as input.
@@ -222,6 +228,14 @@ public:
 	void consumePeek();
 
 	/**
+	 * Moves the read cursor to the next non-whitespace character. Returns
+	 * false, if the end of the stream was reached.
+	 *
+	 * @return false if the end of the stream was reached, false othrwise.
+	 */
+	bool consumeWhitespace();
+
+	/**
 	 * Resets the peek pointer to the "read" pointer.
 	 */
 	void resetPeek();
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp
index e9a58a1..a0bba52 100644
--- a/src/core/variant/Reader.cpp
+++ b/src/core/variant/Reader.cpp
@@ -29,21 +29,33 @@ namespace variant {
 static const char *ERR_UNEXPECTED_CHARACTER = "Unexpected character";
 static const char *ERR_UNEXPECTED_END = "Unexpected end";
 static const char *ERR_UNTERMINATED = "Unterminated literal";
+static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence";
 
 static const int STATE_INIT = 0;
 static const int STATE_IN_STRING = 1;
 static const int STATE_ESCAPE = 2;
+static const int STATE_WHITESPACE = 3;
 
-static std::pair<Err, std::string> parseString(
-    BufferedCharReader &reader, const unordered_set<char> *delims = nullptr,
-    Logger *logger = nullptr)
+template <class T>
+static std::pair<bool, T> error(BufferedCharReader &reader, Logger &logger,
+                                const char *err, T res)
+{
+	logger.errorAt(err, reader);
+	return std::make_pair(false, std::move(res));
+}
+
+std::pair<bool, std::string> Reader::parseString(
+    BufferedCharReader &reader, Logger &logger,
+    const std::unordered_set<char> *delims)
 {
 	// Initialize the internal state
-	Err errCode = Err::OK;
 	int state = STATE_INIT;
 	char quote = 0;
 	std::stringstream res;
 
+	// Consume all whitespace
+	reader.consumeWhitespace();
+
 	// Statemachine whic iterates over each character in the stream
 	// TODO: Combination of peeking and consumePeek is stupid as consumePeek is
 	// the default (read and putBack would obviously be better, yet the latter
@@ -55,29 +67,28 @@ static std::pair<Err, std::string> parseString(
 				if (c == '"' || c == '\'') {
 					quote = c;
 					state = STATE_IN_STRING;
-				} else if (delims && delims.count(c)) {
-					Logger.log(ERR_UNTERMINATED, reader);
-					return std::make_pair(Err::UNEXPECTED_END, res.str());
-				} else if (Utils::isWhitespace(c)) {
-					reader.consumePeek();
-					continue;
+					break;
+				} else if (delims && delims->count(c)) {
+					return error(reader, logger, ERR_UNEXPECTED_END, res.str());
 				}
-				return std::make_pair(Err::UNEXPECTED_CHARACTER, res.str());
-				break;
+				return error(reader, logger, ERR_UNEXPECTED_CHARACTER,
+				             res.str());
 			case STATE_IN_STRING:
-				if (c == q) {
-					state = STATE_END;
+				if (c == quote) {
 					reader.consumePeek();
-					return std::make_pair(Err::OK, res.str());
+					return std::make_pair(true, res.str());
 				} else if (c == '\\') {
 					state = STATE_ESCAPE;
+					reader.consumePeek();
+					break;
 				} else if (c == '\n') {
-					return std::make_pair(Err::UNTERMINATED, res.str());
+					return error(reader, logger, ERR_UNTERMINATED, res.str());
 				}
 				res << c;
 				reader.consumePeek();
 				break;
 			case STATE_ESCAPE:
+				// Handle all possible special escape characters
 				switch (c) {
 					case 'b':
 						res << '\b';
@@ -118,67 +129,90 @@ static std::pair<Err, std::string> parseString(
 						if (Utils::isNumeric(c)) {
 							// TODO: Parse octal 000 sequence
 						} else {
-							errCode = Err::ERR_INVALID_ESCAPE;
+							logger.errorAt(ERR_INVALID_ESCAPE, reader);
 						}
 						break;
 				}
+
+				// Switch back to the "normal" state
 				state = STATE_IN_STRING;
 				reader.consumePeek();
 				break;
 		}
 	}
-	return std::make_pair(Err::UNEXPECTED_END, res.str());
+	return error(reader, logger, ERR_UNEXPECTED_END, res.str());
 }
 
-static std::pair<Err, std::string> parseUnescapedString(
-    BufferedCharReader &reader, const unordered_set<char> *delims)
+std::pair<bool, std::string> Reader::parseUnescapedString(
+    BufferedCharReader &reader, Logger &logger,
+    const std::unordered_set<char> &delims)
 {
-	assert(delims);
-
 	std::stringstream res;
+	std::stringstream buf;
 	char c;
+
+	// Consume all whitespace
+	reader.consumeWhitespace();
+
+	// Copy all characters, skip whitespace at the end
+	int state = STATE_IN_STRING;
 	while (reader.peek(&c)) {
-		if (delims->count(c)) {
-			return std::make_pair(Err::OK, res.str());
+		if (delims.count(c)) {
+			return std::make_pair(true, res.str());
+		} else if (Utils::isWhitespace(c)) {
+			// Do not add whitespace to the output buffer
+			state = STATE_WHITESPACE;
+			buf << c;
+		} else {
+			// If we just hat a sequence of whitespace, append it to the output
+			// buffer and continue
+			if (state == STATE_WHITESPACE) {
+				res << buf.str();
+				buf.str(std::string{});
+				buf.clear();
+				state = STATE_IN_STRING;
+			}
+			res << c;
 		}
-		res << c;
 		reader.consumePeek();
 	}
-	return std::make_pair(Err::UNEXPECTED_END, res.str());
+	return std::make_pair(true, res.str());
 }
 
-static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader,
-                                            const unordered_set<char> *delims)
+std::pair<bool, Variant> Reader::parseGeneric(
+    BufferedCharReader &reader, Logger &logger,
+    const std::unordered_set<char> &delims)
 {
-	assert(delims);
-
 	char c;
+
+	// Skip all whitespace characters
+	reader.consumeWhitespace();
+
 	while (reader.peek(&c)) {
-		// Stop if a delimiter is reached, skipp all whitespace characters
-		if (delims->count(c)) {
-			return std::make_pair(Err::OK, res.str());
-		} else if (Utils::isWhitespace(c)) {
-			reader.consumePeek();
-			continue;
+		// Stop if a delimiter is reached
+		if (delims.count(c)) {
+			return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
 		}
 
 		// Parse a string if a quote is reached
 		if (c == '"' || c == '\'') {
-			return parseString(reader, nullptr);
+			auto res = parseString(reader, logger);
+			return std::make_pair(res.first, res.second.c_str());
 		}
 
 		if (c == '[') {
 			// TODO: Parse struct descriptor
 		}
 
-		if (isNumeric(c)) {
+		if (Utils::isNumeric(c)) {
 			// TODO: Parse integer/double
 		}
 
 		// Parse an unescaped string in any other case
-		return parseUnescapedString(reader, delims);
+		auto res = parseUnescapedString(reader, logger, delims);
+		return std::make_pair(res.first, res.second.c_str());
 	}
-	return std::make_pair(Err::UNEXPECTED_END, res.str());
+	return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
 }
 }
 }
diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp
index 339127f..62592c1 100644
--- a/src/core/variant/Reader.hpp
+++ b/src/core/variant/Reader.hpp
@@ -40,7 +40,7 @@ namespace ousia {
 namespace variant {
 
 class Reader {
-public:
+private:
 	/**
 	 * Parses a string which may either be enclosed by " or ', unescapes
 	 * entities in the string as specified for JavaScript.
@@ -49,15 +49,55 @@ public:
 	 * the source for the character data. The reader will be positioned after
 	 * the terminating quote character or at the terminating delimiting
 	 * character.
+	 * @param logger is the logger instance that should be used to log error
+	 * messages and warnings.
 	 * @param delims is an optional set of delimiters after which parsing has to
 	 * be stopped (the delimiters may occur inside the actual string, but not
 	 * outside). If nullptr is given, no delimiter is used and a complete string
 	 * is read.
 	 */
 	static std::pair<bool, std::string> parseString(
-	    BufferedCharReader &reader,
-	    const unordered_set<char> *delims = nullptr,
-	    Logger *logger = nullptr);
+	    BufferedCharReader &reader, Logger &logger,
+	    const std::unordered_set<char> *delims);
+
+public:
+	/**
+	 * Parses a string which may either be enclosed by " or ', unescapes
+	 * entities in the string as specified for JavaScript.
+	 *
+	 * @param reader is a reference to the BufferedCharReader instance which is
+	 * the source for the character data. The reader will be positioned after
+	 * the terminating quote character or at the terminating delimiting
+	 * character.
+	 * @param logger is the logger instance that should be used to log error
+	 * messages and warnings.
+	 * @param delims is a set of delimiters after which parsing has to
+	 * be stopped (the delimiters may occur inside the actual string, but not
+	 * outside).
+	 */
+	static std::pair<bool, std::string> parseString(
+	    BufferedCharReader &reader, Logger &logger,
+	    const std::unordered_set<char> &delims)
+	{
+		return parseString(reader, logger, &delims);
+	}
+
+	/**
+	 * Parses a string which may either be enclosed by " or ', unescapes
+	 * entities in the string as specified for JavaScript.
+	 *
+	 * @param reader is a reference to the BufferedCharReader instance which is
+	 * the source for the character data. The reader will be positioned after
+	 * the terminating quote character or at the terminating delimiting
+	 * character.
+	 * @param logger is the logger instance that should be used to log error
+	 * messages and warnings.
+	 */
+	static std::pair<bool, std::string> parseString(BufferedCharReader &reader,
+	                                                Logger &logger)
+	{
+		return parseString(reader, logger, nullptr);
+	}
 
 	/**
 	 * Extracts an unescaped string from the given buffered char reader
@@ -71,8 +111,8 @@ public:
 	 * These characters are not included in the result. May not be nullptr.
 	 */
 	static std::pair<bool, std::string> parseUnescapedString(
-	    BufferedCharReader &reader, const unordered_set<char> *delims,
-	    Logger *logger = nullptr);
+	    BufferedCharReader &reader, Logger &logger,
+	    const std::unordered_set<char> &delims);
 
 	/**
 	 * Tries to parse the most specific item from the given stream until one of
@@ -86,8 +126,8 @@ public:
 	 * These characters are not included in the result. May not be nullptr.
 	 */
 	static std::pair<bool, Variant> parseGeneric(
-	    BufferedCharReader &reader, const unordered_set<char> *delims,
-	    Logger *logger = nullptr);
+	    BufferedCharReader &reader, Logger &logger,
+	    const std::unordered_set<char> &delims);
 };
 }
 }
author	Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>	2014-12-05 16:08:34 +0100
committer	Andreas Stöckel <andreas@somweyr.de>	2014-12-05 16:08:34 +0100
commit	e06e7ae19851acf5e397f579d6c8459e87086d30 (patch)
tree	2140f3d79239d6f0ebd5c08f1fb48b327586249e /src
parent	bf59bc2edbb1f3f4d12bfbd8ed2663fbbb1900c0 (diff)