summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/BufferedCharReader.cpp24
-rw-r--r--src/core/BufferedCharReader.hpp18
-rw-r--r--src/core/variant/Reader.cpp114
-rw-r--r--src/core/variant/Reader.hpp56
4 files changed, 162 insertions, 50 deletions
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp
index f3792ec..aeedf12 100644
--- a/src/core/BufferedCharReader.cpp
+++ b/src/core/BufferedCharReader.cpp
@@ -18,6 +18,8 @@
#include <array>
+#include "Utils.hpp"
+
#include "BufferedCharReader.hpp"
namespace ousia {
@@ -73,6 +75,15 @@ BufferedCharReader::BufferedCharReader(const std::string &str, int line,
buffer.push_back(str);
}
+BufferedCharReader::BufferedCharReader(const std::string &str)
+ : inputStream(nullptr),
+ readCursor(1, 1, true),
+ peekCursor(1, 1, false),
+ depleted(true)
+{
+ buffer.push_back(str);
+}
+
BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line,
int column)
: inputStream(&inputStream),
@@ -218,6 +229,19 @@ void BufferedCharReader::consumePeek()
readCursor.assign(peekCursor);
}
+bool BufferedCharReader::consumeWhitespace()
+{
+ char c;
+ while (peek(&c)) {
+ if (!Utils::isWhitespace(c)) {
+ resetPeek();
+ return true;
+ }
+ consumePeek();
+ }
+ return false;
+}
+
void BufferedCharReader::resetPeek()
{
// Reset the peek cursor to the read cursor
diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp
index bd19d4a..e7f3186 100644
--- a/src/core/BufferedCharReader.hpp
+++ b/src/core/BufferedCharReader.hpp
@@ -172,7 +172,6 @@ public:
*/
BufferedCharReader(int line = 1, int column = 1);
-
/**
* Constructor of the buffered char reader class with a string as input.
*
@@ -180,7 +179,14 @@ public:
* @param line is the start line.
* @param column is the start column.
*/
- BufferedCharReader(const std::string &str, int line = 1, int column = 1);
+ BufferedCharReader(const std::string &str, int line, int column);
+
+ /**
+ * Constructor of the buffered char reader class with a string as input.
+ *
+ * @param str is a string containing the input data.
+ */
+ BufferedCharReader(const std::string &str);
/**
* Constructor of the buffered char reader class with a string as input.
@@ -222,6 +228,14 @@ public:
void consumePeek();
/**
+ * Moves the read cursor to the next non-whitespace character. Returns
+ * false, if the end of the stream was reached.
+ *
+ * @return false if the end of the stream was reached, false othrwise.
+ */
+ bool consumeWhitespace();
+
+ /**
* Resets the peek pointer to the "read" pointer.
*/
void resetPeek();
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp
index e9a58a1..a0bba52 100644
--- a/src/core/variant/Reader.cpp
+++ b/src/core/variant/Reader.cpp
@@ -29,21 +29,33 @@ namespace variant {
static const char *ERR_UNEXPECTED_CHARACTER = "Unexpected character";
static const char *ERR_UNEXPECTED_END = "Unexpected end";
static const char *ERR_UNTERMINATED = "Unterminated literal";
+static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence";
static const int STATE_INIT = 0;
static const int STATE_IN_STRING = 1;
static const int STATE_ESCAPE = 2;
+static const int STATE_WHITESPACE = 3;
-static std::pair<Err, std::string> parseString(
- BufferedCharReader &reader, const unordered_set<char> *delims = nullptr,
- Logger *logger = nullptr)
+template <class T>
+static std::pair<bool, T> error(BufferedCharReader &reader, Logger &logger,
+ const char *err, T res)
+{
+ logger.errorAt(err, reader);
+ return std::make_pair(false, std::move(res));
+}
+
+std::pair<bool, std::string> Reader::parseString(
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> *delims)
{
// Initialize the internal state
- Err errCode = Err::OK;
int state = STATE_INIT;
char quote = 0;
std::stringstream res;
+ // Consume all whitespace
+ reader.consumeWhitespace();
+
// Statemachine whic iterates over each character in the stream
// TODO: Combination of peeking and consumePeek is stupid as consumePeek is
// the default (read and putBack would obviously be better, yet the latter
@@ -55,29 +67,28 @@ static std::pair<Err, std::string> parseString(
if (c == '"' || c == '\'') {
quote = c;
state = STATE_IN_STRING;
- } else if (delims && delims.count(c)) {
- Logger.log(ERR_UNTERMINATED, reader);
- return std::make_pair(Err::UNEXPECTED_END, res.str());
- } else if (Utils::isWhitespace(c)) {
- reader.consumePeek();
- continue;
+ break;
+ } else if (delims && delims->count(c)) {
+ return error(reader, logger, ERR_UNEXPECTED_END, res.str());
}
- return std::make_pair(Err::UNEXPECTED_CHARACTER, res.str());
- break;
+ return error(reader, logger, ERR_UNEXPECTED_CHARACTER,
+ res.str());
case STATE_IN_STRING:
- if (c == q) {
- state = STATE_END;
+ if (c == quote) {
reader.consumePeek();
- return std::make_pair(Err::OK, res.str());
+ return std::make_pair(true, res.str());
} else if (c == '\\') {
state = STATE_ESCAPE;
+ reader.consumePeek();
+ break;
} else if (c == '\n') {
- return std::make_pair(Err::UNTERMINATED, res.str());
+ return error(reader, logger, ERR_UNTERMINATED, res.str());
}
res << c;
reader.consumePeek();
break;
case STATE_ESCAPE:
+ // Handle all possible special escape characters
switch (c) {
case 'b':
res << '\b';
@@ -118,67 +129,90 @@ static std::pair<Err, std::string> parseString(
if (Utils::isNumeric(c)) {
// TODO: Parse octal 000 sequence
} else {
- errCode = Err::ERR_INVALID_ESCAPE;
+ logger.errorAt(ERR_INVALID_ESCAPE, reader);
}
break;
}
+
+ // Switch back to the "normal" state
state = STATE_IN_STRING;
reader.consumePeek();
break;
}
}
- return std::make_pair(Err::UNEXPECTED_END, res.str());
+ return error(reader, logger, ERR_UNEXPECTED_END, res.str());
}
-static std::pair<Err, std::string> parseUnescapedString(
- BufferedCharReader &reader, const unordered_set<char> *delims)
+std::pair<bool, std::string> Reader::parseUnescapedString(
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
{
- assert(delims);
-
std::stringstream res;
+ std::stringstream buf;
char c;
+
+ // Consume all whitespace
+ reader.consumeWhitespace();
+
+ // Copy all characters, skip whitespace at the end
+ int state = STATE_IN_STRING;
while (reader.peek(&c)) {
- if (delims->count(c)) {
- return std::make_pair(Err::OK, res.str());
+ if (delims.count(c)) {
+ return std::make_pair(true, res.str());
+ } else if (Utils::isWhitespace(c)) {
+ // Do not add whitespace to the output buffer
+ state = STATE_WHITESPACE;
+ buf << c;
+ } else {
+ // If we just hat a sequence of whitespace, append it to the output
+ // buffer and continue
+ if (state == STATE_WHITESPACE) {
+ res << buf.str();
+ buf.str(std::string{});
+ buf.clear();
+ state = STATE_IN_STRING;
+ }
+ res << c;
}
- res << c;
reader.consumePeek();
}
- return std::make_pair(Err::UNEXPECTED_END, res.str());
+ return std::make_pair(true, res.str());
}
-static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader,
- const unordered_set<char> *delims)
+std::pair<bool, Variant> Reader::parseGeneric(
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
{
- assert(delims);
-
char c;
+
+ // Skip all whitespace characters
+ reader.consumeWhitespace();
+
while (reader.peek(&c)) {
- // Stop if a delimiter is reached, skipp all whitespace characters
- if (delims->count(c)) {
- return std::make_pair(Err::OK, res.str());
- } else if (Utils::isWhitespace(c)) {
- reader.consumePeek();
- continue;
+ // Stop if a delimiter is reached
+ if (delims.count(c)) {
+ return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
}
// Parse a string if a quote is reached
if (c == '"' || c == '\'') {
- return parseString(reader, nullptr);
+ auto res = parseString(reader, logger);
+ return std::make_pair(res.first, res.second.c_str());
}
if (c == '[') {
// TODO: Parse struct descriptor
}
- if (isNumeric(c)) {
+ if (Utils::isNumeric(c)) {
// TODO: Parse integer/double
}
// Parse an unescaped string in any other case
- return parseUnescapedString(reader, delims);
+ auto res = parseUnescapedString(reader, logger, delims);
+ return std::make_pair(res.first, res.second.c_str());
}
- return std::make_pair(Err::UNEXPECTED_END, res.str());
+ return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
}
}
}
diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp
index 339127f..62592c1 100644
--- a/src/core/variant/Reader.hpp
+++ b/src/core/variant/Reader.hpp
@@ -40,7 +40,7 @@ namespace ousia {
namespace variant {
class Reader {
-public:
+private:
/**
* Parses a string which may either be enclosed by " or ', unescapes
* entities in the string as specified for JavaScript.
@@ -49,15 +49,55 @@ public:
* the source for the character data. The reader will be positioned after
* the terminating quote character or at the terminating delimiting
* character.
+ * @param logger is the logger instance that should be used to log error
+ * messages and warnings.
* @param delims is an optional set of delimiters after which parsing has to
* be stopped (the delimiters may occur inside the actual string, but not
* outside). If nullptr is given, no delimiter is used and a complete string
* is read.
*/
static std::pair<bool, std::string> parseString(
- BufferedCharReader &reader,
- const unordered_set<char> *delims = nullptr,
- Logger *logger = nullptr);
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> *delims);
+
+public:
+ /**
+ * Parses a string which may either be enclosed by " or ', unescapes
+ * entities in the string as specified for JavaScript.
+ *
+ * @param reader is a reference to the BufferedCharReader instance which is
+ * the source for the character data. The reader will be positioned after
+ * the terminating quote character or at the terminating delimiting
+ * character.
+ * @param logger is the logger instance that should be used to log error
+ * messages and warnings.
+ * @param delims is a set of delimiters after which parsing has to
+ * be stopped (the delimiters may occur inside the actual string, but not
+ * outside).
+ */
+ static std::pair<bool, std::string> parseString(
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
+ {
+ return parseString(reader, logger, &delims);
+ }
+
+ /**
+ * Parses a string which may either be enclosed by " or ', unescapes
+ * entities in the string as specified for JavaScript.
+ *
+ * @param reader is a reference to the BufferedCharReader instance which is
+ * the source for the character data. The reader will be positioned after
+ * the terminating quote character or at the terminating delimiting
+ * character.
+ * @param logger is the logger instance that should be used to log error
+ * messages and warnings.
+ */
+ static std::pair<bool, std::string> parseString(BufferedCharReader &reader,
+ Logger &logger)
+ {
+ return parseString(reader, logger, nullptr);
+ }
/**
* Extracts an unescaped string from the given buffered char reader
@@ -71,8 +111,8 @@ public:
* These characters are not included in the result. May not be nullptr.
*/
static std::pair<bool, std::string> parseUnescapedString(
- BufferedCharReader &reader, const unordered_set<char> *delims,
- Logger *logger = nullptr);
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims);
/**
* Tries to parse the most specific item from the given stream until one of
@@ -86,8 +126,8 @@ public:
* These characters are not included in the result. May not be nullptr.
*/
static std::pair<bool, Variant> parseGeneric(
- BufferedCharReader &reader, const unordered_set<char> *delims,
- Logger *logger = nullptr);
+ BufferedCharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims);
};
}
}