diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-05 16:08:34 +0100 |
---|---|---|
committer | Andreas Stöckel <andreas@somweyr.de> | 2014-12-05 16:08:34 +0100 |
commit | e06e7ae19851acf5e397f579d6c8459e87086d30 (patch) | |
tree | 2140f3d79239d6f0ebd5c08f1fb48b327586249e /src | |
parent | bf59bc2edbb1f3f4d12bfbd8ed2663fbbb1900c0 (diff) |
added string reading functions of the Reader class
Diffstat (limited to 'src')
-rw-r--r-- | src/core/BufferedCharReader.cpp | 24 | ||||
-rw-r--r-- | src/core/BufferedCharReader.hpp | 18 | ||||
-rw-r--r-- | src/core/variant/Reader.cpp | 114 | ||||
-rw-r--r-- | src/core/variant/Reader.hpp | 56 |
4 files changed, 162 insertions, 50 deletions
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp index 23c219a..0821a5d 100644 --- a/src/core/BufferedCharReader.cpp +++ b/src/core/BufferedCharReader.cpp @@ -18,6 +18,8 @@ #include <array> +#include "Utils.hpp" + #include "BufferedCharReader.hpp" namespace ousia { @@ -73,6 +75,15 @@ BufferedCharReader::BufferedCharReader(const std::string &str, int line, buffer.push_back(str); } +BufferedCharReader::BufferedCharReader(const std::string &str) + : inputStream(nullptr), + readCursor(1, 1, true), + peekCursor(1, 1, false), + depleted(true) +{ + buffer.push_back(str); +} + BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line, int column) : inputStream(&inputStream), @@ -218,6 +229,19 @@ void BufferedCharReader::consumePeek() readCursor.assign(peekCursor); } +bool BufferedCharReader::consumeWhitespace() +{ + char c; + while (peek(&c)) { + if (!Utils::isWhitespace(c)) { + resetPeek(); + return true; + } + consumePeek(); + } + return false; +} + void BufferedCharReader::resetPeek() { // Reset the peek cursor to the read cursor diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp index bd19d4a..e7f3186 100644 --- a/src/core/BufferedCharReader.hpp +++ b/src/core/BufferedCharReader.hpp @@ -172,7 +172,6 @@ public: */ BufferedCharReader(int line = 1, int column = 1); - /** * Constructor of the buffered char reader class with a string as input. * @@ -180,7 +179,14 @@ public: * @param line is the start line. * @param column is the start column. */ - BufferedCharReader(const std::string &str, int line = 1, int column = 1); + BufferedCharReader(const std::string &str, int line, int column); + + /** + * Constructor of the buffered char reader class with a string as input. + * + * @param str is a string containing the input data. + */ + BufferedCharReader(const std::string &str); /** * Constructor of the buffered char reader class with a string as input. @@ -222,6 +228,14 @@ public: void consumePeek(); /** + * Moves the read cursor to the next non-whitespace character. Returns + * false, if the end of the stream was reached. + * + * @return false if the end of the stream was reached, false othrwise. + */ + bool consumeWhitespace(); + + /** * Resets the peek pointer to the "read" pointer. */ void resetPeek(); diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp index e9a58a1..a0bba52 100644 --- a/src/core/variant/Reader.cpp +++ b/src/core/variant/Reader.cpp @@ -29,21 +29,33 @@ namespace variant { static const char *ERR_UNEXPECTED_CHARACTER = "Unexpected character"; static const char *ERR_UNEXPECTED_END = "Unexpected end"; static const char *ERR_UNTERMINATED = "Unterminated literal"; +static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence"; static const int STATE_INIT = 0; static const int STATE_IN_STRING = 1; static const int STATE_ESCAPE = 2; +static const int STATE_WHITESPACE = 3; -static std::pair<Err, std::string> parseString( - BufferedCharReader &reader, const unordered_set<char> *delims = nullptr, - Logger *logger = nullptr) +template <class T> +static std::pair<bool, T> error(BufferedCharReader &reader, Logger &logger, + const char *err, T res) +{ + logger.errorAt(err, reader); + return std::make_pair(false, std::move(res)); +} + +std::pair<bool, std::string> Reader::parseString( + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> *delims) { // Initialize the internal state - Err errCode = Err::OK; int state = STATE_INIT; char quote = 0; std::stringstream res; + // Consume all whitespace + reader.consumeWhitespace(); + // Statemachine whic iterates over each character in the stream // TODO: Combination of peeking and consumePeek is stupid as consumePeek is // the default (read and putBack would obviously be better, yet the latter @@ -55,29 +67,28 @@ static std::pair<Err, std::string> parseString( if (c == '"' || c == '\'') { quote = c; state = STATE_IN_STRING; - } else if (delims && delims.count(c)) { - Logger.log(ERR_UNTERMINATED, reader); - return std::make_pair(Err::UNEXPECTED_END, res.str()); - } else if (Utils::isWhitespace(c)) { - reader.consumePeek(); - continue; + break; + } else if (delims && delims->count(c)) { + return error(reader, logger, ERR_UNEXPECTED_END, res.str()); } - return std::make_pair(Err::UNEXPECTED_CHARACTER, res.str()); - break; + return error(reader, logger, ERR_UNEXPECTED_CHARACTER, + res.str()); case STATE_IN_STRING: - if (c == q) { - state = STATE_END; + if (c == quote) { reader.consumePeek(); - return std::make_pair(Err::OK, res.str()); + return std::make_pair(true, res.str()); } else if (c == '\\') { state = STATE_ESCAPE; + reader.consumePeek(); + break; } else if (c == '\n') { - return std::make_pair(Err::UNTERMINATED, res.str()); + return error(reader, logger, ERR_UNTERMINATED, res.str()); } res << c; reader.consumePeek(); break; case STATE_ESCAPE: + // Handle all possible special escape characters switch (c) { case 'b': res << '\b'; @@ -118,67 +129,90 @@ static std::pair<Err, std::string> parseString( if (Utils::isNumeric(c)) { // TODO: Parse octal 000 sequence } else { - errCode = Err::ERR_INVALID_ESCAPE; + logger.errorAt(ERR_INVALID_ESCAPE, reader); } break; } + + // Switch back to the "normal" state state = STATE_IN_STRING; reader.consumePeek(); break; } } - return std::make_pair(Err::UNEXPECTED_END, res.str()); + return error(reader, logger, ERR_UNEXPECTED_END, res.str()); } -static std::pair<Err, std::string> parseUnescapedString( - BufferedCharReader &reader, const unordered_set<char> *delims) +std::pair<bool, std::string> Reader::parseUnescapedString( + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) { - assert(delims); - std::stringstream res; + std::stringstream buf; char c; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Copy all characters, skip whitespace at the end + int state = STATE_IN_STRING; while (reader.peek(&c)) { - if (delims->count(c)) { - return std::make_pair(Err::OK, res.str()); + if (delims.count(c)) { + return std::make_pair(true, res.str()); + } else if (Utils::isWhitespace(c)) { + // Do not add whitespace to the output buffer + state = STATE_WHITESPACE; + buf << c; + } else { + // If we just hat a sequence of whitespace, append it to the output + // buffer and continue + if (state == STATE_WHITESPACE) { + res << buf.str(); + buf.str(std::string{}); + buf.clear(); + state = STATE_IN_STRING; + } + res << c; } - res << c; reader.consumePeek(); } - return std::make_pair(Err::UNEXPECTED_END, res.str()); + return std::make_pair(true, res.str()); } -static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader, - const unordered_set<char> *delims) +std::pair<bool, Variant> Reader::parseGeneric( + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) { - assert(delims); - char c; + + // Skip all whitespace characters + reader.consumeWhitespace(); + while (reader.peek(&c)) { - // Stop if a delimiter is reached, skipp all whitespace characters - if (delims->count(c)) { - return std::make_pair(Err::OK, res.str()); - } else if (Utils::isWhitespace(c)) { - reader.consumePeek(); - continue; + // Stop if a delimiter is reached + if (delims.count(c)) { + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); } // Parse a string if a quote is reached if (c == '"' || c == '\'') { - return parseString(reader, nullptr); + auto res = parseString(reader, logger); + return std::make_pair(res.first, res.second.c_str()); } if (c == '[') { // TODO: Parse struct descriptor } - if (isNumeric(c)) { + if (Utils::isNumeric(c)) { // TODO: Parse integer/double } // Parse an unescaped string in any other case - return parseUnescapedString(reader, delims); + auto res = parseUnescapedString(reader, logger, delims); + return std::make_pair(res.first, res.second.c_str()); } - return std::make_pair(Err::UNEXPECTED_END, res.str()); + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); } } } diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp index 339127f..62592c1 100644 --- a/src/core/variant/Reader.hpp +++ b/src/core/variant/Reader.hpp @@ -40,7 +40,7 @@ namespace ousia { namespace variant { class Reader { -public: +private: /** * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. @@ -49,15 +49,55 @@ public: * the source for the character data. The reader will be positioned after * the terminating quote character or at the terminating delimiting * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. * @param delims is an optional set of delimiters after which parsing has to * be stopped (the delimiters may occur inside the actual string, but not * outside). If nullptr is given, no delimiter is used and a complete string * is read. */ static std::pair<bool, std::string> parseString( - BufferedCharReader &reader, - const unordered_set<char> *delims = nullptr, - Logger *logger = nullptr); + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> *delims); + +public: + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param reader is a reference to the BufferedCharReader instance which is + * the source for the character data. The reader will be positioned after + * the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + * @param delims is a set of delimiters after which parsing has to + * be stopped (the delimiters may occur inside the actual string, but not + * outside). + */ + static std::pair<bool, std::string> parseString( + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) + { + return parseString(reader, logger, &delims); + } + + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param reader is a reference to the BufferedCharReader instance which is + * the source for the character data. The reader will be positioned after + * the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + */ + static std::pair<bool, std::string> parseString(BufferedCharReader &reader, + Logger &logger) + { + return parseString(reader, logger, nullptr); + } /** * Extracts an unescaped string from the given buffered char reader @@ -71,8 +111,8 @@ public: * These characters are not included in the result. May not be nullptr. */ static std::pair<bool, std::string> parseUnescapedString( - BufferedCharReader &reader, const unordered_set<char> *delims, - Logger *logger = nullptr); + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> &delims); /** * Tries to parse the most specific item from the given stream until one of @@ -86,8 +126,8 @@ public: * These characters are not included in the result. May not be nullptr. */ static std::pair<bool, Variant> parseGeneric( - BufferedCharReader &reader, const unordered_set<char> *delims, - Logger *logger = nullptr); + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> &delims); }; } } |