diff options
-rw-r--r-- | src/core/common/VariantReader.cpp | 91 | ||||
-rw-r--r-- | src/core/common/VariantReader.hpp | 47 | ||||
-rw-r--r-- | test/core/common/VariantReaderTest.cpp | 303 |
3 files changed, 413 insertions, 28 deletions
diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp index ccc14f8..d48e5cc 100644 --- a/src/core/common/VariantReader.cpp +++ b/src/core/common/VariantReader.cpp @@ -456,8 +456,8 @@ static std::pair<bool, Variant> parseComplex(CharReader &reader, Logger &logger, case STATE_IN_COMPLEX: { // Try to read an element using the parseGeneric function reader.resetPeek(); - auto elem = VariantReader::parseGeneric(reader, logger, - {',', '=', delim}); + auto elem = VariantReader::parseGenericToken( + reader, logger, {',', '=', delim}, true); // If the reader had no error, expect an comma, otherwise skip // to the next comma in the stream @@ -496,8 +496,8 @@ static std::pair<bool, Variant> parseComplex(CharReader &reader, Logger &logger, // Consume the equals sign and parse the value reader.consumePeek(); - auto elem = VariantReader::parseGeneric(reader, logger, - {',', delim}); + auto elem = VariantReader::parseGenericToken( + reader, logger, {',', delim}, true); if (elem.first) { objectResult.insert( std::make_pair(keyString, elem.second)); @@ -537,7 +537,7 @@ static std::pair<bool, Variant> parseComplex(CharReader &reader, Logger &logger, if (c == ',') { state = STATE_IN_COMPLEX; } else { - logger.error(unexpectedMsg("\",\"", c)); + logger.error(unexpectedMsg("\",\"", c), reader); state = STATE_RESYNC; hadError = true; } @@ -564,7 +564,8 @@ static bool encodeUtf8(std::stringstream &res, CharReader &reader, // Encode the unicode codepoint as UTF-8 uint32_t cp = static_cast<uint32_t>(v); if (latin1 && cp > 0xFF) { - logger.error("Not a valid ISO-8859-1 (Latin-1) character, skipping", reader); + logger.error("Not a valid ISO-8859-1 (Latin-1) character, skipping", + reader); return false; } @@ -699,6 +700,31 @@ std::pair<bool, std::string> VariantReader::parseString( return error(reader, logger, ERR_UNEXPECTED_END, res.str()); } +std::pair<bool, std::string> VariantReader::parseToken( + CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) +{ + std::stringstream res; + char c; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Copy all characters, skip whitespace at the end + int state = STATE_WHITESPACE; + while (reader.peek(c)) { + bool whitespace = Utils::isWhitespace(c); + if (delims.count(c) || (state == STATE_IN_STRING && whitespace)) { + reader.resetPeek(); + return std::make_pair(state == STATE_IN_STRING, res.str()); + } else if (!whitespace) { + state = STATE_IN_STRING; + res << c; + } + reader.consumePeek(); + } + return std::make_pair(state == STATE_IN_STRING, res.str()); +} + std::pair<bool, std::string> VariantReader::parseUnescapedString( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { @@ -777,11 +803,44 @@ std::pair<bool, Variant::mapType> VariantReader::parseObject(CharReader &reader, std::pair<bool, Variant> VariantReader::parseGeneric( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { + Variant::arrayType arr; + char c; + bool hadError = false; + + // Parse generic tokens until the end of the stream or the delimiter is + // reached + while (reader.peek(c) && !delims.count(c)) { + reader.resetPeek(); + auto res = parseGenericToken(reader, logger, delims); + hadError = hadError || !res.first; + arr.push_back(res.second); + } + reader.resetPeek(); + + // The resulting array should not be empty + if (arr.empty()) { + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); + } + + // If there only one element was extracted, return this element instead of + // an array + if (arr.size() == 1) { + return std::make_pair(!hadError, arr[0]); + } else { + return std::make_pair(!hadError, Variant{arr}); + } +} + +std::pair<bool, Variant> VariantReader::parseGenericToken( + CharReader &reader, Logger &logger, const std::unordered_set<char> &delims, + bool extractUnescapedStrings) +{ char c; // Skip all whitespace characters, read a character and abort if at the end reader.consumeWhitespace(); if (!reader.peek(c) || delims.count(c)) { + reader.resetPeek(); return error(reader, logger, ERR_UNEXPECTED_END, nullptr); } @@ -814,8 +873,13 @@ std::pair<bool, Variant> VariantReader::parseGeneric( return parseComplex(reader, logger, 0, ComplexMode::BOTH); } - // Parse an unescaped string in any other case - auto res = parseUnescapedString(reader, logger, delims); + // Otherwise parse a single token + std::pair<bool, std::string> res; + if (extractUnescapedStrings) { + res = parseUnescapedString(reader, logger, delims); + } else { + res = parseToken(reader, logger, delims); + } // Handling for special primitive values if (res.first) { @@ -829,7 +893,16 @@ std::pair<bool, Variant> VariantReader::parseGeneric( return std::make_pair(true, Variant{nullptr}); } } - return std::make_pair(res.first, res.second.c_str()); + + // Check whether the parsed string is a valid identifier -- if yes, flag it + // as "magic" string + if (Utils::isIdentifier(res.second)) { + Variant v; + v.setMagic(res.second.c_str()); + return std::make_pair(res.first, v); + } else { + return std::make_pair(res.first, Variant{res.second.c_str()}); + } } } diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp index 2ccfed7..abf529c 100644 --- a/src/core/common/VariantReader.hpp +++ b/src/core/common/VariantReader.hpp @@ -98,8 +98,9 @@ public: } /** - * Extracts an unescaped string from the given CharReader instance. - * This function just reads text until one of the given delimiter + * Extracts a single token from the given CharReader instance. Skips any + * whitespace character until a non-whitespace character is reached. Stops + * if another whitespace character is read or one of the given delimiters * characters is reached. * * @param reader is a reference to the CharReader instance which is @@ -110,6 +111,23 @@ public: * @param delims is a set of characters which will terminate the string. * These characters are not included in the result. */ + static std::pair<bool, std::string> parseToken( + CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims); + + /** + * Extracts an unescaped string from the given CharReader instance. Skips + * any whitespace character one of the given delimiters is reached. Strips + * whitespace at the end of the string. + * + * @param reader is a reference to the CharReader instance which is + * the source for the character data. The reader will be positioned at the + * terminating delimiting character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + * @param delims is a set of characters which will terminate the string. + * These characters are not included in the result. + */ static std::pair<bool, std::string> parseUnescapedString( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims); @@ -178,8 +196,9 @@ public: /** * Tries to parse the most specific item from the given stream until one of - * the given delimiters is reached or a meaningful literal has been read. - * The resulting variant represents the value that has been read. + * the given delimiters is reached or a meaningful literal (possibly an + * array of literals) has been read. The resulting variant represents the + * value that has been read. * * @param reader is a reference to the CharReader instance which is * the source for the character data. The reader will be positioned @@ -190,6 +209,26 @@ public: static std::pair<bool, Variant> parseGeneric( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims); + + /** + * Tries to parse the most specific item from the given stream until one of + * the given delimiters is reached or a meaningful literal has been read. + * The resulting variant represents the value that has been read. + * + * @param reader is a reference to the CharReader instance which is + * the source for the character data. The reader will be positioned + * at the terminating delimiting character. + * @param delims is a set of characters which will terminate the string. + * These characters are not included in the result. May not be nullptr. + * @param extractUnescapedStrings if set to true, interprets non-primitive + * literals as unescaped strings, which may also contain whitespace + * characters. Otherwise string literals are only generated until the next + * whitespace character. + */ + static std::pair<bool, Variant> parseGenericToken( + CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims, + bool extractUnescapedStrings = false); }; } diff --git a/test/core/common/VariantReaderTest.cpp b/test/core/common/VariantReaderTest.cpp index 3d4e7bd..9f21c19 100644 --- a/test/core/common/VariantReaderTest.cpp +++ b/test/core/common/VariantReaderTest.cpp @@ -22,10 +22,9 @@ #include <core/common/VariantReader.hpp> namespace ousia { -namespace variant { -static TerminalLogger logger{std::cerr, true}; -//static Logger logger; +// static TerminalLogger logger{std::cerr, true}; +static Logger logger; TEST(VariantReader, readString) { @@ -61,7 +60,6 @@ TEST(VariantReader, readString) ASSERT_EQ("'\"\b\f\n\r\t\v", res.second); } - // Hex Unicode character { CharReader reader("'linebreak\\u000A in unicode'"); @@ -127,6 +125,73 @@ TEST(VariantReader, readStringUnicode) } } +TEST(VariantReader, parseToken) +{ + // Simple case + { + CharReader reader("hello world;"); + { + auto res = VariantReader::parseToken(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello", res.second); + } + + { + auto res = VariantReader::parseToken(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("world", res.second); + } + } + + // Simple case with whitespace + { + CharReader reader(" hello world ; "); + { + auto res = VariantReader::parseToken(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello", res.second); + } + + { + auto res = VariantReader::parseToken(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("world", res.second); + } + } + + // Linebreaks + { + CharReader reader(" hello\nworld ; "); + { + auto res = VariantReader::parseToken(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello", res.second); + } + + { + auto res = VariantReader::parseToken(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("world", res.second); + } + } + + // End of stream + { + CharReader reader(" hello world"); + { + auto res = VariantReader::parseToken(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello", res.second); + } + + { + auto res = VariantReader::parseToken(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("world", res.second); + } + } +} + TEST(VariantReader, parseUnescapedString) { // Simple case @@ -155,7 +220,7 @@ TEST(VariantReader, parseUnescapedString) // End of stream { - CharReader reader(" hello world "); + CharReader reader(" hello world"); auto res = VariantReader::parseUnescapedString(reader, logger, {';'}); ASSERT_TRUE(res.first); ASSERT_EQ("hello world", res.second); @@ -312,8 +377,9 @@ TEST(VariantReader, parseArray) { // Simple case (only primitive data types) { - CharReader reader("[\"Hello, World\", unescaped\n string ,\n" - "1234, 0.56, true, false, null]"); + CharReader reader( + "[\"Hello, World\", unescaped\n string ,\n" + "1234, 0.56, true, false, null]"); auto res = VariantReader::parseArray(reader, logger); ASSERT_TRUE(res.first); @@ -379,8 +445,9 @@ TEST(VariantReader, parseObject) { // Array as object { - CharReader reader("[\"Hello, World\", unescaped\n string ,\n" - "1234, 0.56, true, false, null]"); + CharReader reader( + "[\"Hello, World\", unescaped\n string ,\n" + "1234, 0.56, true, false, null]"); auto res = VariantReader::parseObject(reader, logger); ASSERT_TRUE(res.first); @@ -469,7 +536,8 @@ TEST(VariantReader, parseObject) // Even More complex array/object { - CharReader reader("[\"key1\" = [4, 5, true, e=[1, 2, 3]], \"key2\"=[]]"); + CharReader reader( + "[\"key1\" = [4, 5, true, e=[1, 2, 3]], \"key2\"=[]]"); auto res = VariantReader::parseObject(reader, logger); ASSERT_TRUE(res.first); @@ -481,7 +549,22 @@ TEST(VariantReader, parseObject) ASSERT_TRUE(res.second["key2"].isArray()); // Check the values - std::cout << res.second << std::endl; + auto m = res.second["key1"].asMap(); + ASSERT_EQ(4U, m.size()); + ASSERT_TRUE(m["#0"].isInt()); + ASSERT_TRUE(m["#1"].isInt()); + ASSERT_TRUE(m["#2"].isBool()); + ASSERT_TRUE(m["e"].isArray()); + ASSERT_EQ(4, m["#0"].asInt()); + ASSERT_EQ(5, m["#1"].asInt()); + ASSERT_TRUE(m["#2"].asBool()); + ASSERT_EQ(3U, m["e"].asArray().size()); + ASSERT_EQ(1, m["e"].asArray()[0].asInt()); + ASSERT_EQ(2, m["e"].asArray()[1].asInt()); + ASSERT_EQ(3, m["e"].asArray()[2].asInt()); + + auto a = res.second["key2"].asArray(); + ASSERT_EQ(0U, a.size()); } // Invalid array/object @@ -492,36 +575,226 @@ TEST(VariantReader, parseObject) } } -TEST(VariantReader, parseGeneric) +TEST(VariantReader, parseGenericToken) { // Simple case, unescaped string { CharReader reader("hello world"); - auto res = VariantReader::parseGeneric(reader, logger, {';'}); + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, true); ASSERT_TRUE(res.first); ASSERT_TRUE(res.second.isString()); + ASSERT_FALSE(res.second.isMagic()); ASSERT_EQ("hello world", res.second.asString()); } // Simple case, double quoted string { CharReader reader(" \"hello world\" "); - auto res = VariantReader::parseGeneric(reader, logger, {';'}); + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, true); ASSERT_TRUE(res.first); ASSERT_TRUE(res.second.isString()); + ASSERT_FALSE(res.second.isMagic()); ASSERT_EQ("hello world", res.second.asString()); } // Simple case, single quoted string { CharReader reader(" 'hello world' "); - auto res = VariantReader::parseGeneric(reader, logger, {';'}); + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, true); ASSERT_TRUE(res.first); ASSERT_TRUE(res.second.isString()); + ASSERT_FALSE(res.second.isMagic()); ASSERT_EQ("hello world", res.second.asString()); } + + // Integer + { + CharReader reader("1234"); + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, true); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isInt()); + ASSERT_EQ(1234, res.second.asInt()); + } + + // Double + { + CharReader reader("1234.5"); + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, true); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isDouble()); + ASSERT_EQ(1234.5, res.second.asDouble()); + } + + // Boolean (true) + { + CharReader reader("true"); + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, true); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isBool()); + ASSERT_TRUE(res.second.asBool()); + } + + // Boolean (false) + { + CharReader reader("false"); + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, true); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isBool()); + ASSERT_FALSE(res.second.asBool()); + } + + // Nullptr + { + CharReader reader("null"); + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, true); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isNull()); + } + + // Simple case, unescaped string + { + CharReader reader("hello world"); + + { + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, false); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isString()); + ASSERT_TRUE(res.second.isMagic()); + ASSERT_EQ("hello", res.second.asString()); + } + + { + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, false); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isString()); + ASSERT_TRUE(res.second.isMagic()); + ASSERT_EQ("world", res.second.asString()); + } + } + + // Simple case, double quoted string + { + CharReader reader(" \"hello world\" "); + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, false); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isString()); + ASSERT_FALSE(res.second.isMagic()); + ASSERT_EQ("hello world", res.second.asString()); + } + + // Simple case, single quoted string + { + CharReader reader(" 'hello world' "); + auto res = + VariantReader::parseGenericToken(reader, logger, {';'}, false); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isString()); + ASSERT_FALSE(res.second.isMagic()); + ASSERT_EQ("hello world", res.second.asString()); + } +} + +TEST(VariantReader, parseGeneric) +{ + // Simple case, unescaped string + { + CharReader reader("hello"); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isMagic()); + ASSERT_EQ("hello", res.second.asMagic()); + } + + // Simple case, unescaped string with multiple array entries + { + CharReader reader("hello world"); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isArray()); + + auto arr = res.second.asArray(); + ASSERT_EQ(2U, arr.size()); + ASSERT_TRUE(arr[0].isMagic()); + ASSERT_TRUE(arr[1].isMagic()); + ASSERT_EQ("hello", arr[0].asMagic()); + ASSERT_EQ("world", arr[1].asMagic()); + } + + // Delimiter test + { + CharReader reader("hello; world"); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isMagic()); + ASSERT_EQ("hello", res.second.asMagic()); + + char c; + ASSERT_TRUE(reader.peek(c)); + ASSERT_EQ(';', c); + } + + // More complex CSS-like case + { + CharReader reader("1px solid blue"); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isArray()); + + auto arr = res.second.asArray(); + ASSERT_EQ(3U, arr.size()); + ASSERT_TRUE(arr[0].isString()); + ASSERT_TRUE(arr[1].isMagic()); + ASSERT_TRUE(arr[2].isMagic()); + ASSERT_EQ("1px", arr[0].asString()); + ASSERT_EQ("solid", arr[1].asMagic()); + ASSERT_EQ("blue", arr[2].asMagic()); + } } +TEST(VariantReader, parseGenericComplex) +{ + CharReader reader("10 true [1, 2] [] [foo=bar,h]; []"); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isArray()); + + auto arr = res.second.asArray(); + ASSERT_EQ(5U, arr.size()); + ASSERT_TRUE(arr[0].isInt()); + ASSERT_TRUE(arr[1].isBool()); + ASSERT_TRUE(arr[2].isArray()); + ASSERT_TRUE(arr[3].isArray()); + ASSERT_TRUE(arr[4].isMap()); + + ASSERT_EQ(10, arr[0].asInt()); + ASSERT_TRUE(arr[1].asBool()); + + ASSERT_EQ(2U, arr[2].asArray().size()); + ASSERT_EQ(1, arr[2].asArray()[0].asInt()); + ASSERT_EQ(2, arr[2].asArray()[1].asInt()); + + ASSERT_EQ(0U, arr[3].asArray().size()); + + ASSERT_EQ(2U, arr[4].asMap().size()); + ASSERT_TRUE(arr[4].asMap().count("foo")); + ASSERT_TRUE(arr[4].asMap().count("#1")); + ASSERT_TRUE(arr[4].asMap().find("foo")->second.isMagic()); + ASSERT_EQ("bar", arr[4].asMap().find("foo")->second.asMagic()); + + char c; + ASSERT_TRUE(reader.peek(c)); + ASSERT_EQ(';', c); } } |