diff options
-rw-r--r-- | src/core/variant/Reader.cpp | 98 | ||||
-rw-r--r-- | src/core/variant/Reader.hpp | 7 | ||||
-rw-r--r-- | test/core/variant/ReaderTest.cpp | 68 |
3 files changed, 168 insertions, 5 deletions
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp index 9215e31..ba857af 100644 --- a/src/core/variant/Reader.cpp +++ b/src/core/variant/Reader.cpp @@ -28,6 +28,14 @@ namespace ousia { namespace variant { +// TODO: Better error messages (like "Expected 'x' but got 'y'") +// TODO: Replace delims with single char delim where possible +// TODO: Use custom return value instead of std::pair +// TODO: Allow buffered char reader to "fork" +// TODO: Rename BufferedCharReader to shorter CharReader +// TODO: Implement context in CharReader (to allow error messages to extract the +// current line) + /* Error Messages */ static const char *ERR_UNEXPECTED_CHAR = "Unexpected character"; @@ -287,7 +295,7 @@ public: reader.consumePeek(); } - // States in which ending is valid, in other states, log an error + // States in which ending is valid. Log an error in other states if (state == State::LEADING_ZERO || state == State::HEX || state == State::INT || state == State::POINT || state == State::EXP) { @@ -302,8 +310,11 @@ public: static const int STATE_INIT = 0; static const int STATE_IN_STRING = 1; -static const int STATE_ESCAPE = 2; -static const int STATE_WHITESPACE = 3; +static const int STATE_IN_ARRAY = 2; +static const int STATE_EXPECT_COMMA = 3; +static const int STATE_ESCAPE = 4; +static const int STATE_WHITESPACE = 5; +static const int STATE_RESYNC = 6; template <class T> static std::pair<bool, T> error(BufferedCharReader &reader, Logger &logger, @@ -411,6 +422,74 @@ std::pair<bool, std::string> Reader::parseString( return error(reader, logger, ERR_UNEXPECTED_END, res.str()); } +std::pair<bool, Variant::arrayType> Reader::parseArray( + BufferedCharReader &reader, Logger &logger, char delim) +{ + Variant::arrayType res; + bool hadError = false; + int state = delim ? STATE_IN_ARRAY : STATE_INIT; + delim = delim ? delim : ']'; + char c; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Iterate over the characters, use the parseGeneric function to read the + // pairs + while (reader.peek(&c)) { + // Generically handle the end of the array + if (state != STATE_INIT && c == delim) { + reader.consumePeek(); + return std::make_pair(!hadError, res); + } + + switch (state) { + case STATE_INIT: + if (c != '[') { + return error(reader, logger, ERR_UNEXPECTED_CHAR, res); + } + state = STATE_IN_ARRAY; + reader.consumePeek(); + break; + case STATE_IN_ARRAY: { + // Try to read an element using the parseGeneric function + reader.resetPeek(); + auto elem = parseGeneric(reader, logger, {',', delim}); + res.push_back(elem.second); + + // If the reader had no error, expect an comma, otherwise skip + // to the next comma in the stream + if (elem.first) { + state = STATE_EXPECT_COMMA; + } else { + state = STATE_RESYNC; + hadError = true; + } + break; + } + case STATE_EXPECT_COMMA: + // Skip whitespace + if (c == ',') { + state = STATE_IN_ARRAY; + } else if (!Utils::isWhitespace(c)) { + hadError = true; + state = STATE_RESYNC; + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + } + reader.consumePeek(); + break; + case STATE_RESYNC: + // Just wait for another comma to arrive + if (c == ',') { + state = STATE_IN_ARRAY; + } + reader.consumePeek(); + break; + } + } + return error(reader, logger, ERR_UNEXPECTED_END, res); +} + std::pair<bool, std::string> Reader::parseUnescapedString( BufferedCharReader &reader, Logger &logger, const std::unordered_set<char> &delims) @@ -517,6 +596,19 @@ std::pair<bool, Variant> Reader::parseGeneric( // Parse an unescaped string in any other case auto res = parseUnescapedString(reader, logger, delims); + + // Handling for special primitive values + if (res.first) { + if (res.second == "true") { + return std::make_pair(true, Variant{true}); + } + if (res.second == "false") { + return std::make_pair(true, Variant{false}); + } + if (res.second == "null") { + return std::make_pair(true, Variant{nullptr}); + } + } return std::make_pair(res.first, res.second.c_str()); } return error(reader, logger, ERR_UNEXPECTED_END, nullptr); diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp index 9de06bc..710f7c4 100644 --- a/src/core/variant/Reader.hpp +++ b/src/core/variant/Reader.hpp @@ -115,7 +115,6 @@ public: BufferedCharReader &reader, Logger &logger, const std::unordered_set<char> &delims); - /** * Parses an integer from the given buffered char reader instance until one * of the given delimiter characters is reached. @@ -143,6 +142,12 @@ public: const std::unordered_set<char> &delims); /** + * Parses an array of values. + */ + static std::pair<bool, Variant::arrayType> parseArray( + BufferedCharReader &reader, Logger &logger, char delim = 0); + + /** * Tries to parse the most specific item from the given stream until one of * the given delimiters is reached or a meaningful literal has been read. * The resulting variant represents the value that has been read. diff --git a/test/core/variant/ReaderTest.cpp b/test/core/variant/ReaderTest.cpp index 73e6bf8..bfa523d 100644 --- a/test/core/variant/ReaderTest.cpp +++ b/test/core/variant/ReaderTest.cpp @@ -24,7 +24,7 @@ namespace ousia { namespace variant { -Logger logger; +static TerminalLogger logger{std::cerr, true}; TEST(Reader, readString) { @@ -240,6 +240,72 @@ TEST(Reader, parseDouble) } } +TEST(Reader, parseArray) +{ + // Simple case (only primitive data types) + { + BufferedCharReader reader("[\"Hello, World\", unescaped\n string ,\n" + "1234, 0.56, true, false, null]"); + auto res = Reader::parseArray(reader, logger); + ASSERT_TRUE(res.first); + + // Make sure array has the correct size + ASSERT_EQ(7, res.second.size()); + + // Check the types + ASSERT_TRUE(res.second[0].isString()); + ASSERT_TRUE(res.second[1].isString()); + ASSERT_TRUE(res.second[2].isInt()); + ASSERT_TRUE(res.second[3].isDouble()); + ASSERT_TRUE(res.second[4].isBool()); + ASSERT_TRUE(res.second[5].isBool()); + ASSERT_TRUE(res.second[6].isNull()); + + // Check the values + ASSERT_EQ("Hello, World", res.second[0].asString()); + ASSERT_EQ("unescaped\n string", res.second[1].asString()); + ASSERT_EQ(1234, res.second[2].asInt()); + ASSERT_EQ(0.56, res.second[3].asDouble()); + ASSERT_TRUE(res.second[4].asBool()); + ASSERT_FALSE(res.second[5].asBool()); + } + + // Ending with comma + { + BufferedCharReader reader("[ 'test' ,]"); + auto res = Reader::parseArray(reader, logger); + ASSERT_TRUE(res.first); + + // Make sure the array has the correct size + ASSERT_EQ(1, res.second.size()); + + // Check the types + ASSERT_TRUE(res.second[0].isString()); + + // Check the values + ASSERT_EQ("test", res.second[0].asString()); + } + + // Recovery from invalid values + // TODO: Actually parseGeneric should fall back to returning a simple string + // if parsing of a special (non-string) type failed + { + BufferedCharReader reader("[ 0invalidNumber, str, 1invalid]"); + auto res = Reader::parseArray(reader, logger); + ASSERT_FALSE(res.first); + + // Make sure the array has the correct size + ASSERT_EQ(3, res.second.size()); + + // Check the types (only for the valid entries, the other types are + // undefined) + ASSERT_TRUE(res.second[1].isString()); + + // Check the values + ASSERT_EQ("str", res.second[1].asString()); + } +} + TEST(Reader, parseGeneric) { // Simple case, unescaped string |