summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2014-12-07 01:14:26 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2014-12-07 01:14:26 +0100
commita2fafc917f96b879ad023b117978da0de124d12b (patch)
treee51df9fcabbd685f4549cd03a37122d994a97d4e
parent52cb69b8611a4376b27d55078d16855a16f2c88c (diff)
implemented parseArray
-rw-r--r--src/core/variant/Reader.cpp98
-rw-r--r--src/core/variant/Reader.hpp7
-rw-r--r--test/core/variant/ReaderTest.cpp68
3 files changed, 168 insertions, 5 deletions
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp
index 9215e31..ba857af 100644
--- a/src/core/variant/Reader.cpp
+++ b/src/core/variant/Reader.cpp
@@ -28,6 +28,14 @@
namespace ousia {
namespace variant {
+// TODO: Better error messages (like "Expected 'x' but got 'y'")
+// TODO: Replace delims with single char delim where possible
+// TODO: Use custom return value instead of std::pair
+// TODO: Allow buffered char reader to "fork"
+// TODO: Rename BufferedCharReader to shorter CharReader
+// TODO: Implement context in CharReader (to allow error messages to extract the
+// current line)
+
/* Error Messages */
static const char *ERR_UNEXPECTED_CHAR = "Unexpected character";
@@ -287,7 +295,7 @@ public:
reader.consumePeek();
}
- // States in which ending is valid, in other states, log an error
+ // States in which ending is valid. Log an error in other states
if (state == State::LEADING_ZERO || state == State::HEX ||
state == State::INT || state == State::POINT ||
state == State::EXP) {
@@ -302,8 +310,11 @@ public:
static const int STATE_INIT = 0;
static const int STATE_IN_STRING = 1;
-static const int STATE_ESCAPE = 2;
-static const int STATE_WHITESPACE = 3;
+static const int STATE_IN_ARRAY = 2;
+static const int STATE_EXPECT_COMMA = 3;
+static const int STATE_ESCAPE = 4;
+static const int STATE_WHITESPACE = 5;
+static const int STATE_RESYNC = 6;
template <class T>
static std::pair<bool, T> error(BufferedCharReader &reader, Logger &logger,
@@ -411,6 +422,74 @@ std::pair<bool, std::string> Reader::parseString(
return error(reader, logger, ERR_UNEXPECTED_END, res.str());
}
+std::pair<bool, Variant::arrayType> Reader::parseArray(
+ BufferedCharReader &reader, Logger &logger, char delim)
+{
+ Variant::arrayType res;
+ bool hadError = false;
+ int state = delim ? STATE_IN_ARRAY : STATE_INIT;
+ delim = delim ? delim : ']';
+ char c;
+
+ // Consume all whitespace
+ reader.consumeWhitespace();
+
+ // Iterate over the characters, use the parseGeneric function to read the
+ // pairs
+ while (reader.peek(&c)) {
+ // Generically handle the end of the array
+ if (state != STATE_INIT && c == delim) {
+ reader.consumePeek();
+ return std::make_pair(!hadError, res);
+ }
+
+ switch (state) {
+ case STATE_INIT:
+ if (c != '[') {
+ return error(reader, logger, ERR_UNEXPECTED_CHAR, res);
+ }
+ state = STATE_IN_ARRAY;
+ reader.consumePeek();
+ break;
+ case STATE_IN_ARRAY: {
+ // Try to read an element using the parseGeneric function
+ reader.resetPeek();
+ auto elem = parseGeneric(reader, logger, {',', delim});
+ res.push_back(elem.second);
+
+ // If the reader had no error, expect an comma, otherwise skip
+ // to the next comma in the stream
+ if (elem.first) {
+ state = STATE_EXPECT_COMMA;
+ } else {
+ state = STATE_RESYNC;
+ hadError = true;
+ }
+ break;
+ }
+ case STATE_EXPECT_COMMA:
+ // Skip whitespace
+ if (c == ',') {
+ state = STATE_IN_ARRAY;
+ } else if (!Utils::isWhitespace(c)) {
+ hadError = true;
+ state = STATE_RESYNC;
+ logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
+ }
+ reader.consumePeek();
+ break;
+ case STATE_RESYNC:
+ // Just wait for another comma to arrive
+ if (c == ',') {
+ state = STATE_IN_ARRAY;
+ }
+ reader.consumePeek();
+ break;
+ }
+ }
+ return error(reader, logger, ERR_UNEXPECTED_END, res);
+}
+
std::pair<bool, std::string> Reader::parseUnescapedString(
BufferedCharReader &reader, Logger &logger,
const std::unordered_set<char> &delims)
@@ -517,6 +596,19 @@ std::pair<bool, Variant> Reader::parseGeneric(
// Parse an unescaped string in any other case
auto res = parseUnescapedString(reader, logger, delims);
+
+ // Handling for special primitive values
+ if (res.first) {
+ if (res.second == "true") {
+ return std::make_pair(true, Variant{true});
+ }
+ if (res.second == "false") {
+ return std::make_pair(true, Variant{false});
+ }
+ if (res.second == "null") {
+ return std::make_pair(true, Variant{nullptr});
+ }
+ }
return std::make_pair(res.first, res.second.c_str());
}
return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp
index 9de06bc..710f7c4 100644
--- a/src/core/variant/Reader.hpp
+++ b/src/core/variant/Reader.hpp
@@ -115,7 +115,6 @@ public:
BufferedCharReader &reader, Logger &logger,
const std::unordered_set<char> &delims);
-
/**
* Parses an integer from the given buffered char reader instance until one
* of the given delimiter characters is reached.
@@ -143,6 +142,12 @@ public:
const std::unordered_set<char> &delims);
/**
+ * Parses an array of values.
+ */
+ static std::pair<bool, Variant::arrayType> parseArray(
+ BufferedCharReader &reader, Logger &logger, char delim = 0);
+
+ /**
* Tries to parse the most specific item from the given stream until one of
* the given delimiters is reached or a meaningful literal has been read.
* The resulting variant represents the value that has been read.
diff --git a/test/core/variant/ReaderTest.cpp b/test/core/variant/ReaderTest.cpp
index 73e6bf8..bfa523d 100644
--- a/test/core/variant/ReaderTest.cpp
+++ b/test/core/variant/ReaderTest.cpp
@@ -24,7 +24,7 @@
namespace ousia {
namespace variant {
-Logger logger;
+static TerminalLogger logger{std::cerr, true};
TEST(Reader, readString)
{
@@ -240,6 +240,72 @@ TEST(Reader, parseDouble)
}
}
+TEST(Reader, parseArray)
+{
+ // Simple case (only primitive data types)
+ {
+ BufferedCharReader reader("[\"Hello, World\", unescaped\n string ,\n"
+ "1234, 0.56, true, false, null]");
+ auto res = Reader::parseArray(reader, logger);
+ ASSERT_TRUE(res.first);
+
+ // Make sure array has the correct size
+ ASSERT_EQ(7, res.second.size());
+
+ // Check the types
+ ASSERT_TRUE(res.second[0].isString());
+ ASSERT_TRUE(res.second[1].isString());
+ ASSERT_TRUE(res.second[2].isInt());
+ ASSERT_TRUE(res.second[3].isDouble());
+ ASSERT_TRUE(res.second[4].isBool());
+ ASSERT_TRUE(res.second[5].isBool());
+ ASSERT_TRUE(res.second[6].isNull());
+
+ // Check the values
+ ASSERT_EQ("Hello, World", res.second[0].asString());
+ ASSERT_EQ("unescaped\n string", res.second[1].asString());
+ ASSERT_EQ(1234, res.second[2].asInt());
+ ASSERT_EQ(0.56, res.second[3].asDouble());
+ ASSERT_TRUE(res.second[4].asBool());
+ ASSERT_FALSE(res.second[5].asBool());
+ }
+
+ // Ending with comma
+ {
+ BufferedCharReader reader("[ 'test' ,]");
+ auto res = Reader::parseArray(reader, logger);
+ ASSERT_TRUE(res.first);
+
+ // Make sure the array has the correct size
+ ASSERT_EQ(1, res.second.size());
+
+ // Check the types
+ ASSERT_TRUE(res.second[0].isString());
+
+ // Check the values
+ ASSERT_EQ("test", res.second[0].asString());
+ }
+
+ // Recovery from invalid values
+ // TODO: Actually parseGeneric should fall back to returning a simple string
+ // if parsing of a special (non-string) type failed
+ {
+ BufferedCharReader reader("[ 0invalidNumber, str, 1invalid]");
+ auto res = Reader::parseArray(reader, logger);
+ ASSERT_FALSE(res.first);
+
+ // Make sure the array has the correct size
+ ASSERT_EQ(3, res.second.size());
+
+ // Check the types (only for the valid entries, the other types are
+ // undefined)
+ ASSERT_TRUE(res.second[1].isString());
+
+ // Check the values
+ ASSERT_EQ("str", res.second[1].asString());
+ }
+}
+
TEST(Reader, parseGeneric)
{
// Simple case, unescaped string