summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt4
-rw-r--r--src/core/BufferedCharReader.cpp24
-rw-r--r--src/core/BufferedCharReader.hpp18
-rw-r--r--src/core/variant/Reader.cpp114
-rw-r--r--src/core/variant/Reader.hpp56
-rw-r--r--test/core/variant/ReaderTest.cpp135
6 files changed, 299 insertions, 52 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 98b7acb..94b2cc7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -114,7 +114,7 @@ ADD_LIBRARY(ousia_core
# src/core/script/Object
# src/core/script/ScriptEngine
# src/core/script/Variant
-# src/core/variant/Reader
+ src/core/variant/Reader
src/core/variant/Variant
)
@@ -166,7 +166,7 @@ IF(TEST)
# test/core/script/FunctionTest
# test/core/script/ObjectTest
# test/core/script/VariantTest
-# test/core/variant/ReaderTest
+ test/core/variant/ReaderTest
test/core/variant/VariantTest
)
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp
index 23c219a..0821a5d 100644
--- a/src/core/BufferedCharReader.cpp
+++ b/src/core/BufferedCharReader.cpp
@@ -18,6 +18,8 @@
#include <array>
+#include "Utils.hpp"
+
#include "BufferedCharReader.hpp"
namespace ousia {
@@ -73,6 +75,15 @@ BufferedCharReader::BufferedCharReader(const std::string &str, int line,
buffer.push_back(str);
}
+BufferedCharReader::BufferedCharReader(const std::string &str)
+ : inputStream(nullptr),
+ readCursor(1, 1, true),
+ peekCursor(1, 1, false),
+ depleted(true)
+{
+ buffer.push_back(str);
+}
+
BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line,
int column)
: inputStream(&inputStream),
@@ -218,6 +229,19 @@ void BufferedCharReader::consumePeek()
readCursor.assign(peekCursor);
}
+bool BufferedCharReader::consumeWhitespace()
+{
+ char c;
+ while (peek(&c)) {
+ if (!Utils::isWhitespace(c)) {
+ resetPeek();
+ return true;
+ }
+ consumePeek();
+ }
+ return false;
+}
+
void BufferedCharReader::resetPeek()
{
// Reset the peek cursor to the read cursor
diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp
index bd19d4a..e7f3186 100644
--- a/src/core/BufferedCharReader.hpp
+++ b/src/core/BufferedCharReader.hpp
@@ -172,7 +172,6 @@ public:
*/
BufferedCharReader(int line = 1, int column = 1);
-
/**
* Constructor of the buffered char reader class with a string as input.
*
@@ -180,7 +179,14 @@ public:
* @param line is the start line.
* @param column is the start column.
*/
- BufferedCharReader(const std::string &str, int line = 1, int column = 1);
+ BufferedCharReader(const std::string &str, int line, int column);
+
+ /**
+ * Constructor of the buffered char reader class with a string as input.
+ *
+ * @param str is a string containing the input data.
+ */
+ BufferedCharReader(const std::string &str);
/**
* Constructor of the buffered char reader class with a string as input.
@@ -222,6 +228,14 @@ public:
void consumePeek();
/**
+ * Moves the read cursor to the next non-whitespace character. Returns
+ * false, if the end of the stream was reached.
+ *
+ * @return false if the end of the stream was reached, false othrwise.
+ */
+ bool consumeWhitespace();
+
+ /**
* Resets the peek pointer to the "read" pointer.
*/
void resetPeek();
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp
index e9a58a1..a0bba52 100644
--- a/src/core/variant/Reader.cpp
+++ b/src/core/variant/Reader.cpp
@@ -29,21 +29,33 @@ namespace variant {
static const char *ERR_UNEXPECTED_CHARACTER = "Unexpected character";
static const char *ERR_UNEXPECTED_END = "Unexpected end";
static const char *ERR_UNTERMINATED = "Unterminated literal";
+static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence";
static const int STATE_INIT = 0;
static const int STATE_IN_STRING = 1;
static const int STATE_ESCAPE = 2;
+static const int STATE_WHITESPACE = 3;
-static std::pair<Err, std::string> parseString(
- BufferedCharReader &reader, const unordered_set<char> *delims = nullptr,
- Logger *logger = nullptr)
+template <class T>
+static std::pair<bool, T> error(BufferedCharReader &reader, Logger &logger,
+ const char *err, T res)
+{
+ logger.errorAt(err, reader);
+ return std::make_pair(false, std::move(res));
+}
+
+std::pair<bool, std::string> Reader::parseString(
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> *delims)
{
// Initialize the internal state
- Err errCode = Err::OK;
int state = STATE_INIT;
char quote = 0;
std::stringstream res;
+ // Consume all whitespace
+ reader.consumeWhitespace();
+
// Statemachine whic iterates over each character in the stream
// TODO: Combination of peeking and consumePeek is stupid as consumePeek is
// the default (read and putBack would obviously be better, yet the latter
@@ -55,29 +67,28 @@ static std::pair<Err, std::string> parseString(
if (c == '"' || c == '\'') {
quote = c;
state = STATE_IN_STRING;
- } else if (delims && delims.count(c)) {
- Logger.log(ERR_UNTERMINATED, reader);
- return std::make_pair(Err::UNEXPECTED_END, res.str());
- } else if (Utils::isWhitespace(c)) {
- reader.consumePeek();
- continue;
+ break;
+ } else if (delims && delims->count(c)) {
+ return error(reader, logger, ERR_UNEXPECTED_END, res.str());
}
- return std::make_pair(Err::UNEXPECTED_CHARACTER, res.str());
- break;
+ return error(reader, logger, ERR_UNEXPECTED_CHARACTER,
+ res.str());
case STATE_IN_STRING:
- if (c == q) {
- state = STATE_END;
+ if (c == quote) {
reader.consumePeek();
- return std::make_pair(Err::OK, res.str());
+ return std::make_pair(true, res.str());
} else if (c == '\\') {
state = STATE_ESCAPE;
+ reader.consumePeek();
+ break;
} else if (c == '\n') {
- return std::make_pair(Err::UNTERMINATED, res.str());
+ return error(reader, logger, ERR_UNTERMINATED, res.str());
}
res << c;
reader.consumePeek();
break;
case STATE_ESCAPE:
+ // Handle all possible special escape characters
switch (c) {
case 'b':
res << '\b';
@@ -118,67 +129,90 @@ static std::pair<Err, std::string> parseString(
if (Utils::isNumeric(c)) {
// TODO: Parse octal 000 sequence
} else {
- errCode = Err::ERR_INVALID_ESCAPE;
+ logger.errorAt(ERR_INVALID_ESCAPE, reader);
}
break;
}
+
+ // Switch back to the "normal" state
state = STATE_IN_STRING;
reader.consumePeek();
break;
}
}
- return std::make_pair(Err::UNEXPECTED_END, res.str());
+ return error(reader, logger, ERR_UNEXPECTED_END, res.str());
}
-static std::pair<Err, std::string> parseUnescapedString(
- BufferedCharReader &reader, const unordered_set<char> *delims)
+std::pair<bool, std::string> Reader::parseUnescapedString(
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
{
- assert(delims);
-
std::stringstream res;
+ std::stringstream buf;
char c;
+
+ // Consume all whitespace
+ reader.consumeWhitespace();
+
+ // Copy all characters, skip whitespace at the end
+ int state = STATE_IN_STRING;
while (reader.peek(&c)) {
- if (delims->count(c)) {
- return std::make_pair(Err::OK, res.str());
+ if (delims.count(c)) {
+ return std::make_pair(true, res.str());
+ } else if (Utils::isWhitespace(c)) {
+ // Do not add whitespace to the output buffer
+ state = STATE_WHITESPACE;
+ buf << c;
+ } else {
+ // If we just hat a sequence of whitespace, append it to the output
+ // buffer and continue
+ if (state == STATE_WHITESPACE) {
+ res << buf.str();
+ buf.str(std::string{});
+ buf.clear();
+ state = STATE_IN_STRING;
+ }
+ res << c;
}
- res << c;
reader.consumePeek();
}
- return std::make_pair(Err::UNEXPECTED_END, res.str());
+ return std::make_pair(true, res.str());
}
-static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader,
- const unordered_set<char> *delims)
+std::pair<bool, Variant> Reader::parseGeneric(
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
{
- assert(delims);
-
char c;
+
+ // Skip all whitespace characters
+ reader.consumeWhitespace();
+
while (reader.peek(&c)) {
- // Stop if a delimiter is reached, skipp all whitespace characters
- if (delims->count(c)) {
- return std::make_pair(Err::OK, res.str());
- } else if (Utils::isWhitespace(c)) {
- reader.consumePeek();
- continue;
+ // Stop if a delimiter is reached
+ if (delims.count(c)) {
+ return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
}
// Parse a string if a quote is reached
if (c == '"' || c == '\'') {
- return parseString(reader, nullptr);
+ auto res = parseString(reader, logger);
+ return std::make_pair(res.first, res.second.c_str());
}
if (c == '[') {
// TODO: Parse struct descriptor
}
- if (isNumeric(c)) {
+ if (Utils::isNumeric(c)) {
// TODO: Parse integer/double
}
// Parse an unescaped string in any other case
- return parseUnescapedString(reader, delims);
+ auto res = parseUnescapedString(reader, logger, delims);
+ return std::make_pair(res.first, res.second.c_str());
}
- return std::make_pair(Err::UNEXPECTED_END, res.str());
+ return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
}
}
}
diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp
index 339127f..62592c1 100644
--- a/src/core/variant/Reader.hpp
+++ b/src/core/variant/Reader.hpp
@@ -40,7 +40,7 @@ namespace ousia {
namespace variant {
class Reader {
-public:
+private:
/**
* Parses a string which may either be enclosed by " or ', unescapes
* entities in the string as specified for JavaScript.
@@ -49,15 +49,55 @@ public:
* the source for the character data. The reader will be positioned after
* the terminating quote character or at the terminating delimiting
* character.
+ * @param logger is the logger instance that should be used to log error
+ * messages and warnings.
* @param delims is an optional set of delimiters after which parsing has to
* be stopped (the delimiters may occur inside the actual string, but not
* outside). If nullptr is given, no delimiter is used and a complete string
* is read.
*/
static std::pair<bool, std::string> parseString(
- BufferedCharReader &reader,
- const unordered_set<char> *delims = nullptr,
- Logger *logger = nullptr);
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> *delims);
+
+public:
+ /**
+ * Parses a string which may either be enclosed by " or ', unescapes
+ * entities in the string as specified for JavaScript.
+ *
+ * @param reader is a reference to the BufferedCharReader instance which is
+ * the source for the character data. The reader will be positioned after
+ * the terminating quote character or at the terminating delimiting
+ * character.
+ * @param logger is the logger instance that should be used to log error
+ * messages and warnings.
+ * @param delims is a set of delimiters after which parsing has to
+ * be stopped (the delimiters may occur inside the actual string, but not
+ * outside).
+ */
+ static std::pair<bool, std::string> parseString(
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
+ {
+ return parseString(reader, logger, &delims);
+ }
+
+ /**
+ * Parses a string which may either be enclosed by " or ', unescapes
+ * entities in the string as specified for JavaScript.
+ *
+ * @param reader is a reference to the BufferedCharReader instance which is
+ * the source for the character data. The reader will be positioned after
+ * the terminating quote character or at the terminating delimiting
+ * character.
+ * @param logger is the logger instance that should be used to log error
+ * messages and warnings.
+ */
+ static std::pair<bool, std::string> parseString(BufferedCharReader &reader,
+ Logger &logger)
+ {
+ return parseString(reader, logger, nullptr);
+ }
/**
* Extracts an unescaped string from the given buffered char reader
@@ -71,8 +111,8 @@ public:
* These characters are not included in the result. May not be nullptr.
*/
static std::pair<bool, std::string> parseUnescapedString(
- BufferedCharReader &reader, const unordered_set<char> *delims,
- Logger *logger = nullptr);
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims);
/**
* Tries to parse the most specific item from the given stream until one of
@@ -86,8 +126,8 @@ public:
* These characters are not included in the result. May not be nullptr.
*/
static std::pair<bool, Variant> parseGeneric(
- BufferedCharReader &reader, const unordered_set<char> *delims,
- Logger *logger = nullptr);
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims);
};
}
}
diff --git a/test/core/variant/ReaderTest.cpp b/test/core/variant/ReaderTest.cpp
new file mode 100644
index 0000000..760760b
--- /dev/null
+++ b/test/core/variant/ReaderTest.cpp
@@ -0,0 +1,135 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+#include <gtest/gtest.h>
+
+#include <core/variant/Reader.hpp>
+
+namespace ousia {
+namespace variant {
+
+TEST(Reader, readString)
+{
+ TerminalLogger logger(std::cerr, true);
+
+ // Simple, double quoted string
+ {
+ BufferedCharReader reader("\"hello world\"");
+ auto res = Reader::parseString(reader, logger);
+ ASSERT_TRUE(res.first);
+ ASSERT_EQ("hello world", res.second);
+ }
+
+ // Simple, double quoted string with whitespace
+ {
+ BufferedCharReader reader(" \"hello world\" ");
+ auto res = Reader::parseString(reader, logger);
+ ASSERT_TRUE(res.first);
+ ASSERT_EQ("hello world", res.second);
+ }
+
+ // Simple, single quoted string
+ {
+ BufferedCharReader reader("'hello world'");
+ auto res = Reader::parseString(reader, logger);
+ ASSERT_TRUE(res.first);
+ ASSERT_EQ("hello world", res.second);
+ }
+
+ // Escape characters
+ {
+ BufferedCharReader reader("'\\'\\\"\\b\\f\\n\\r\\t\\v'");
+ auto res = Reader::parseString(reader, logger);
+ ASSERT_TRUE(res.first);
+ ASSERT_EQ("'\"\b\f\n\r\t\v", res.second);
+ }
+}
+
+TEST(Reader, parseUnescapedString)
+{
+ TerminalLogger logger(std::cerr, true);
+
+ // Simple case
+ {
+ BufferedCharReader reader("hello world;");
+ auto res = Reader::parseUnescapedString(reader, logger, {';'});
+ ASSERT_TRUE(res.first);
+ ASSERT_EQ("hello world", res.second);
+ }
+
+ // Simple case with whitespace
+ {
+ BufferedCharReader reader(" hello world ; ");
+ auto res = Reader::parseUnescapedString(reader, logger, {';'});
+ ASSERT_TRUE(res.first);
+ ASSERT_EQ("hello world", res.second);
+ }
+
+ // Linebreaks
+ {
+ BufferedCharReader reader(" hello\nworld ; ");
+ auto res = Reader::parseUnescapedString(reader, logger, {';'});
+ ASSERT_TRUE(res.first);
+ ASSERT_EQ("hello\nworld", res.second);
+ }
+
+ // End of stream
+ {
+ BufferedCharReader reader(" hello world ");
+ auto res = Reader::parseUnescapedString(reader, logger, {';'});
+ ASSERT_TRUE(res.first);
+ ASSERT_EQ("hello world", res.second);
+ }
+}
+
+TEST(Reader, parseGeneric)
+{
+ TerminalLogger logger(std::cerr, true);
+
+ // Simple case, unescaped string
+ {
+ BufferedCharReader reader("hello world");
+ auto res = Reader::parseGeneric(reader, logger, {';'});
+ ASSERT_TRUE(res.first);
+ ASSERT_TRUE(res.second.isString());
+ ASSERT_EQ("hello world", res.second.asString());
+ }
+
+ // Simple case, double quoted string
+ {
+ BufferedCharReader reader(" \"hello world\" ");
+ auto res = Reader::parseGeneric(reader, logger, {';'});
+ ASSERT_TRUE(res.first);
+ ASSERT_TRUE(res.second.isString());
+ ASSERT_EQ("hello world", res.second.asString());
+ }
+
+ // Simple case, single quoted string
+ {
+ BufferedCharReader reader(" 'hello world' ");
+ auto res = Reader::parseGeneric(reader, logger, {';'});
+ ASSERT_TRUE(res.first);
+ ASSERT_TRUE(res.second.isString());
+ ASSERT_EQ("hello world", res.second.asString());
+ }
+}
+
+}
+}
+