summaryrefslogtreecommitdiff
path: root/src/core/common
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2014-12-23 23:37:55 +0100
committerAndreas Stöckel <andreas@somweyr.de>2014-12-23 23:37:55 +0100
commit5beaf5c18d8690b88981c30ff210dce86fd8e515 (patch)
treeedb6aca54ca983b2c1eb38214fb6373871e16400 /src/core/common
parentc32c2d53c34e80a2681442afa11540fdde404fdc (diff)
implemented routines for automatically extracting arrays when parsing generics
Diffstat (limited to 'src/core/common')
-rw-r--r--src/core/common/VariantReader.cpp91
-rw-r--r--src/core/common/VariantReader.hpp47
2 files changed, 125 insertions, 13 deletions
diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp
index ccc14f8..d48e5cc 100644
--- a/src/core/common/VariantReader.cpp
+++ b/src/core/common/VariantReader.cpp
@@ -456,8 +456,8 @@ static std::pair<bool, Variant> parseComplex(CharReader &reader, Logger &logger,
case STATE_IN_COMPLEX: {
// Try to read an element using the parseGeneric function
reader.resetPeek();
- auto elem = VariantReader::parseGeneric(reader, logger,
- {',', '=', delim});
+ auto elem = VariantReader::parseGenericToken(
+ reader, logger, {',', '=', delim}, true);
// If the reader had no error, expect an comma, otherwise skip
// to the next comma in the stream
@@ -496,8 +496,8 @@ static std::pair<bool, Variant> parseComplex(CharReader &reader, Logger &logger,
// Consume the equals sign and parse the value
reader.consumePeek();
- auto elem = VariantReader::parseGeneric(reader, logger,
- {',', delim});
+ auto elem = VariantReader::parseGenericToken(
+ reader, logger, {',', delim}, true);
if (elem.first) {
objectResult.insert(
std::make_pair(keyString, elem.second));
@@ -537,7 +537,7 @@ static std::pair<bool, Variant> parseComplex(CharReader &reader, Logger &logger,
if (c == ',') {
state = STATE_IN_COMPLEX;
} else {
- logger.error(unexpectedMsg("\",\"", c));
+ logger.error(unexpectedMsg("\",\"", c), reader);
state = STATE_RESYNC;
hadError = true;
}
@@ -564,7 +564,8 @@ static bool encodeUtf8(std::stringstream &res, CharReader &reader,
// Encode the unicode codepoint as UTF-8
uint32_t cp = static_cast<uint32_t>(v);
if (latin1 && cp > 0xFF) {
- logger.error("Not a valid ISO-8859-1 (Latin-1) character, skipping", reader);
+ logger.error("Not a valid ISO-8859-1 (Latin-1) character, skipping",
+ reader);
return false;
}
@@ -699,6 +700,31 @@ std::pair<bool, std::string> VariantReader::parseString(
return error(reader, logger, ERR_UNEXPECTED_END, res.str());
}
+std::pair<bool, std::string> VariantReader::parseToken(
+ CharReader &reader, Logger &logger, const std::unordered_set<char> &delims)
+{
+ std::stringstream res;
+ char c;
+
+ // Consume all whitespace
+ reader.consumeWhitespace();
+
+ // Copy all characters, skip whitespace at the end
+ int state = STATE_WHITESPACE;
+ while (reader.peek(c)) {
+ bool whitespace = Utils::isWhitespace(c);
+ if (delims.count(c) || (state == STATE_IN_STRING && whitespace)) {
+ reader.resetPeek();
+ return std::make_pair(state == STATE_IN_STRING, res.str());
+ } else if (!whitespace) {
+ state = STATE_IN_STRING;
+ res << c;
+ }
+ reader.consumePeek();
+ }
+ return std::make_pair(state == STATE_IN_STRING, res.str());
+}
+
std::pair<bool, std::string> VariantReader::parseUnescapedString(
CharReader &reader, Logger &logger, const std::unordered_set<char> &delims)
{
@@ -777,11 +803,44 @@ std::pair<bool, Variant::mapType> VariantReader::parseObject(CharReader &reader,
std::pair<bool, Variant> VariantReader::parseGeneric(
CharReader &reader, Logger &logger, const std::unordered_set<char> &delims)
{
+ Variant::arrayType arr;
+ char c;
+ bool hadError = false;
+
+ // Parse generic tokens until the end of the stream or the delimiter is
+ // reached
+ while (reader.peek(c) && !delims.count(c)) {
+ reader.resetPeek();
+ auto res = parseGenericToken(reader, logger, delims);
+ hadError = hadError || !res.first;
+ arr.push_back(res.second);
+ }
+ reader.resetPeek();
+
+ // The resulting array should not be empty
+ if (arr.empty()) {
+ return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
+ }
+
+ // If there only one element was extracted, return this element instead of
+ // an array
+ if (arr.size() == 1) {
+ return std::make_pair(!hadError, arr[0]);
+ } else {
+ return std::make_pair(!hadError, Variant{arr});
+ }
+}
+
+std::pair<bool, Variant> VariantReader::parseGenericToken(
+ CharReader &reader, Logger &logger, const std::unordered_set<char> &delims,
+ bool extractUnescapedStrings)
+{
char c;
// Skip all whitespace characters, read a character and abort if at the end
reader.consumeWhitespace();
if (!reader.peek(c) || delims.count(c)) {
+ reader.resetPeek();
return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
}
@@ -814,8 +873,13 @@ std::pair<bool, Variant> VariantReader::parseGeneric(
return parseComplex(reader, logger, 0, ComplexMode::BOTH);
}
- // Parse an unescaped string in any other case
- auto res = parseUnescapedString(reader, logger, delims);
+ // Otherwise parse a single token
+ std::pair<bool, std::string> res;
+ if (extractUnescapedStrings) {
+ res = parseUnescapedString(reader, logger, delims);
+ } else {
+ res = parseToken(reader, logger, delims);
+ }
// Handling for special primitive values
if (res.first) {
@@ -829,7 +893,16 @@ std::pair<bool, Variant> VariantReader::parseGeneric(
return std::make_pair(true, Variant{nullptr});
}
}
- return std::make_pair(res.first, res.second.c_str());
+
+ // Check whether the parsed string is a valid identifier -- if yes, flag it
+ // as "magic" string
+ if (Utils::isIdentifier(res.second)) {
+ Variant v;
+ v.setMagic(res.second.c_str());
+ return std::make_pair(res.first, v);
+ } else {
+ return std::make_pair(res.first, Variant{res.second.c_str()});
+ }
}
}
diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp
index 2ccfed7..abf529c 100644
--- a/src/core/common/VariantReader.hpp
+++ b/src/core/common/VariantReader.hpp
@@ -98,8 +98,9 @@ public:
}
/**
- * Extracts an unescaped string from the given CharReader instance.
- * This function just reads text until one of the given delimiter
+ * Extracts a single token from the given CharReader instance. Skips any
+ * whitespace character until a non-whitespace character is reached. Stops
+ * if another whitespace character is read or one of the given delimiters
* characters is reached.
*
* @param reader is a reference to the CharReader instance which is
@@ -110,6 +111,23 @@ public:
* @param delims is a set of characters which will terminate the string.
* These characters are not included in the result.
*/
+ static std::pair<bool, std::string> parseToken(
+ CharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims);
+
+ /**
+ * Extracts an unescaped string from the given CharReader instance. Skips
+ * any whitespace character one of the given delimiters is reached. Strips
+ * whitespace at the end of the string.
+ *
+ * @param reader is a reference to the CharReader instance which is
+ * the source for the character data. The reader will be positioned at the
+ * terminating delimiting character.
+ * @param logger is the logger instance that should be used to log error
+ * messages and warnings.
+ * @param delims is a set of characters which will terminate the string.
+ * These characters are not included in the result.
+ */
static std::pair<bool, std::string> parseUnescapedString(
CharReader &reader, Logger &logger,
const std::unordered_set<char> &delims);
@@ -178,8 +196,9 @@ public:
/**
* Tries to parse the most specific item from the given stream until one of
- * the given delimiters is reached or a meaningful literal has been read.
- * The resulting variant represents the value that has been read.
+ * the given delimiters is reached or a meaningful literal (possibly an
+ * array of literals) has been read. The resulting variant represents the
+ * value that has been read.
*
* @param reader is a reference to the CharReader instance which is
* the source for the character data. The reader will be positioned
@@ -190,6 +209,26 @@ public:
static std::pair<bool, Variant> parseGeneric(
CharReader &reader, Logger &logger,
const std::unordered_set<char> &delims);
+
+ /**
+ * Tries to parse the most specific item from the given stream until one of
+ * the given delimiters is reached or a meaningful literal has been read.
+ * The resulting variant represents the value that has been read.
+ *
+ * @param reader is a reference to the CharReader instance which is
+ * the source for the character data. The reader will be positioned
+ * at the terminating delimiting character.
+ * @param delims is a set of characters which will terminate the string.
+ * These characters are not included in the result. May not be nullptr.
+ * @param extractUnescapedStrings if set to true, interprets non-primitive
+ * literals as unescaped strings, which may also contain whitespace
+ * characters. Otherwise string literals are only generated until the next
+ * whitespace character.
+ */
+ static std::pair<bool, Variant> parseGenericToken(
+ CharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims,
+ bool extractUnescapedStrings = false);
};
}