diff options
-rw-r--r-- | CMakeLists.txt | 4 | ||||
-rw-r--r-- | src/core/BufferedCharReader.cpp | 24 | ||||
-rw-r--r-- | src/core/BufferedCharReader.hpp | 18 | ||||
-rw-r--r-- | src/core/variant/Reader.cpp | 114 | ||||
-rw-r--r-- | src/core/variant/Reader.hpp | 56 | ||||
-rw-r--r-- | test/core/variant/ReaderTest.cpp | 135 |
6 files changed, 299 insertions, 52 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 98b7acb..94b2cc7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -114,7 +114,7 @@ ADD_LIBRARY(ousia_core # src/core/script/Object # src/core/script/ScriptEngine # src/core/script/Variant -# src/core/variant/Reader + src/core/variant/Reader src/core/variant/Variant ) @@ -166,7 +166,7 @@ IF(TEST) # test/core/script/FunctionTest # test/core/script/ObjectTest # test/core/script/VariantTest -# test/core/variant/ReaderTest + test/core/variant/ReaderTest test/core/variant/VariantTest ) diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp index 23c219a..0821a5d 100644 --- a/src/core/BufferedCharReader.cpp +++ b/src/core/BufferedCharReader.cpp @@ -18,6 +18,8 @@ #include <array> +#include "Utils.hpp" + #include "BufferedCharReader.hpp" namespace ousia { @@ -73,6 +75,15 @@ BufferedCharReader::BufferedCharReader(const std::string &str, int line, buffer.push_back(str); } +BufferedCharReader::BufferedCharReader(const std::string &str) + : inputStream(nullptr), + readCursor(1, 1, true), + peekCursor(1, 1, false), + depleted(true) +{ + buffer.push_back(str); +} + BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line, int column) : inputStream(&inputStream), @@ -218,6 +229,19 @@ void BufferedCharReader::consumePeek() readCursor.assign(peekCursor); } +bool BufferedCharReader::consumeWhitespace() +{ + char c; + while (peek(&c)) { + if (!Utils::isWhitespace(c)) { + resetPeek(); + return true; + } + consumePeek(); + } + return false; +} + void BufferedCharReader::resetPeek() { // Reset the peek cursor to the read cursor diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp index bd19d4a..e7f3186 100644 --- a/src/core/BufferedCharReader.hpp +++ b/src/core/BufferedCharReader.hpp @@ -172,7 +172,6 @@ public: */ BufferedCharReader(int line = 1, int column = 1); - /** * Constructor of the buffered char reader class with a string as input. * @@ -180,7 +179,14 @@ public: * @param line is the start line. * @param column is the start column. */ - BufferedCharReader(const std::string &str, int line = 1, int column = 1); + BufferedCharReader(const std::string &str, int line, int column); + + /** + * Constructor of the buffered char reader class with a string as input. + * + * @param str is a string containing the input data. + */ + BufferedCharReader(const std::string &str); /** * Constructor of the buffered char reader class with a string as input. @@ -222,6 +228,14 @@ public: void consumePeek(); /** + * Moves the read cursor to the next non-whitespace character. Returns + * false, if the end of the stream was reached. + * + * @return false if the end of the stream was reached, false othrwise. + */ + bool consumeWhitespace(); + + /** * Resets the peek pointer to the "read" pointer. */ void resetPeek(); diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp index e9a58a1..a0bba52 100644 --- a/src/core/variant/Reader.cpp +++ b/src/core/variant/Reader.cpp @@ -29,21 +29,33 @@ namespace variant { static const char *ERR_UNEXPECTED_CHARACTER = "Unexpected character"; static const char *ERR_UNEXPECTED_END = "Unexpected end"; static const char *ERR_UNTERMINATED = "Unterminated literal"; +static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence"; static const int STATE_INIT = 0; static const int STATE_IN_STRING = 1; static const int STATE_ESCAPE = 2; +static const int STATE_WHITESPACE = 3; -static std::pair<Err, std::string> parseString( - BufferedCharReader &reader, const unordered_set<char> *delims = nullptr, - Logger *logger = nullptr) +template <class T> +static std::pair<bool, T> error(BufferedCharReader &reader, Logger &logger, + const char *err, T res) +{ + logger.errorAt(err, reader); + return std::make_pair(false, std::move(res)); +} + +std::pair<bool, std::string> Reader::parseString( + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> *delims) { // Initialize the internal state - Err errCode = Err::OK; int state = STATE_INIT; char quote = 0; std::stringstream res; + // Consume all whitespace + reader.consumeWhitespace(); + // Statemachine whic iterates over each character in the stream // TODO: Combination of peeking and consumePeek is stupid as consumePeek is // the default (read and putBack would obviously be better, yet the latter @@ -55,29 +67,28 @@ static std::pair<Err, std::string> parseString( if (c == '"' || c == '\'') { quote = c; state = STATE_IN_STRING; - } else if (delims && delims.count(c)) { - Logger.log(ERR_UNTERMINATED, reader); - return std::make_pair(Err::UNEXPECTED_END, res.str()); - } else if (Utils::isWhitespace(c)) { - reader.consumePeek(); - continue; + break; + } else if (delims && delims->count(c)) { + return error(reader, logger, ERR_UNEXPECTED_END, res.str()); } - return std::make_pair(Err::UNEXPECTED_CHARACTER, res.str()); - break; + return error(reader, logger, ERR_UNEXPECTED_CHARACTER, + res.str()); case STATE_IN_STRING: - if (c == q) { - state = STATE_END; + if (c == quote) { reader.consumePeek(); - return std::make_pair(Err::OK, res.str()); + return std::make_pair(true, res.str()); } else if (c == '\\') { state = STATE_ESCAPE; + reader.consumePeek(); + break; } else if (c == '\n') { - return std::make_pair(Err::UNTERMINATED, res.str()); + return error(reader, logger, ERR_UNTERMINATED, res.str()); } res << c; reader.consumePeek(); break; case STATE_ESCAPE: + // Handle all possible special escape characters switch (c) { case 'b': res << '\b'; @@ -118,67 +129,90 @@ static std::pair<Err, std::string> parseString( if (Utils::isNumeric(c)) { // TODO: Parse octal 000 sequence } else { - errCode = Err::ERR_INVALID_ESCAPE; + logger.errorAt(ERR_INVALID_ESCAPE, reader); } break; } + + // Switch back to the "normal" state state = STATE_IN_STRING; reader.consumePeek(); break; } } - return std::make_pair(Err::UNEXPECTED_END, res.str()); + return error(reader, logger, ERR_UNEXPECTED_END, res.str()); } -static std::pair<Err, std::string> parseUnescapedString( - BufferedCharReader &reader, const unordered_set<char> *delims) +std::pair<bool, std::string> Reader::parseUnescapedString( + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) { - assert(delims); - std::stringstream res; + std::stringstream buf; char c; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Copy all characters, skip whitespace at the end + int state = STATE_IN_STRING; while (reader.peek(&c)) { - if (delims->count(c)) { - return std::make_pair(Err::OK, res.str()); + if (delims.count(c)) { + return std::make_pair(true, res.str()); + } else if (Utils::isWhitespace(c)) { + // Do not add whitespace to the output buffer + state = STATE_WHITESPACE; + buf << c; + } else { + // If we just hat a sequence of whitespace, append it to the output + // buffer and continue + if (state == STATE_WHITESPACE) { + res << buf.str(); + buf.str(std::string{}); + buf.clear(); + state = STATE_IN_STRING; + } + res << c; } - res << c; reader.consumePeek(); } - return std::make_pair(Err::UNEXPECTED_END, res.str()); + return std::make_pair(true, res.str()); } -static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader, - const unordered_set<char> *delims) +std::pair<bool, Variant> Reader::parseGeneric( + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) { - assert(delims); - char c; + + // Skip all whitespace characters + reader.consumeWhitespace(); + while (reader.peek(&c)) { - // Stop if a delimiter is reached, skipp all whitespace characters - if (delims->count(c)) { - return std::make_pair(Err::OK, res.str()); - } else if (Utils::isWhitespace(c)) { - reader.consumePeek(); - continue; + // Stop if a delimiter is reached + if (delims.count(c)) { + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); } // Parse a string if a quote is reached if (c == '"' || c == '\'') { - return parseString(reader, nullptr); + auto res = parseString(reader, logger); + return std::make_pair(res.first, res.second.c_str()); } if (c == '[') { // TODO: Parse struct descriptor } - if (isNumeric(c)) { + if (Utils::isNumeric(c)) { // TODO: Parse integer/double } // Parse an unescaped string in any other case - return parseUnescapedString(reader, delims); + auto res = parseUnescapedString(reader, logger, delims); + return std::make_pair(res.first, res.second.c_str()); } - return std::make_pair(Err::UNEXPECTED_END, res.str()); + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); } } } diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp index 339127f..62592c1 100644 --- a/src/core/variant/Reader.hpp +++ b/src/core/variant/Reader.hpp @@ -40,7 +40,7 @@ namespace ousia { namespace variant { class Reader { -public: +private: /** * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. @@ -49,15 +49,55 @@ public: * the source for the character data. The reader will be positioned after * the terminating quote character or at the terminating delimiting * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. * @param delims is an optional set of delimiters after which parsing has to * be stopped (the delimiters may occur inside the actual string, but not * outside). If nullptr is given, no delimiter is used and a complete string * is read. */ static std::pair<bool, std::string> parseString( - BufferedCharReader &reader, - const unordered_set<char> *delims = nullptr, - Logger *logger = nullptr); + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> *delims); + +public: + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param reader is a reference to the BufferedCharReader instance which is + * the source for the character data. The reader will be positioned after + * the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + * @param delims is a set of delimiters after which parsing has to + * be stopped (the delimiters may occur inside the actual string, but not + * outside). + */ + static std::pair<bool, std::string> parseString( + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) + { + return parseString(reader, logger, &delims); + } + + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param reader is a reference to the BufferedCharReader instance which is + * the source for the character data. The reader will be positioned after + * the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + */ + static std::pair<bool, std::string> parseString(BufferedCharReader &reader, + Logger &logger) + { + return parseString(reader, logger, nullptr); + } /** * Extracts an unescaped string from the given buffered char reader @@ -71,8 +111,8 @@ public: * These characters are not included in the result. May not be nullptr. */ static std::pair<bool, std::string> parseUnescapedString( - BufferedCharReader &reader, const unordered_set<char> *delims, - Logger *logger = nullptr); + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> &delims); /** * Tries to parse the most specific item from the given stream until one of @@ -86,8 +126,8 @@ public: * These characters are not included in the result. May not be nullptr. */ static std::pair<bool, Variant> parseGeneric( - BufferedCharReader &reader, const unordered_set<char> *delims, - Logger *logger = nullptr); + BufferedCharReader &reader, Logger &logger, + const std::unordered_set<char> &delims); }; } } diff --git a/test/core/variant/ReaderTest.cpp b/test/core/variant/ReaderTest.cpp new file mode 100644 index 0000000..760760b --- /dev/null +++ b/test/core/variant/ReaderTest.cpp @@ -0,0 +1,135 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <iostream> +#include <gtest/gtest.h> + +#include <core/variant/Reader.hpp> + +namespace ousia { +namespace variant { + +TEST(Reader, readString) +{ + TerminalLogger logger(std::cerr, true); + + // Simple, double quoted string + { + BufferedCharReader reader("\"hello world\""); + auto res = Reader::parseString(reader, logger); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } + + // Simple, double quoted string with whitespace + { + BufferedCharReader reader(" \"hello world\" "); + auto res = Reader::parseString(reader, logger); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } + + // Simple, single quoted string + { + BufferedCharReader reader("'hello world'"); + auto res = Reader::parseString(reader, logger); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } + + // Escape characters + { + BufferedCharReader reader("'\\'\\\"\\b\\f\\n\\r\\t\\v'"); + auto res = Reader::parseString(reader, logger); + ASSERT_TRUE(res.first); + ASSERT_EQ("'\"\b\f\n\r\t\v", res.second); + } +} + +TEST(Reader, parseUnescapedString) +{ + TerminalLogger logger(std::cerr, true); + + // Simple case + { + BufferedCharReader reader("hello world;"); + auto res = Reader::parseUnescapedString(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } + + // Simple case with whitespace + { + BufferedCharReader reader(" hello world ; "); + auto res = Reader::parseUnescapedString(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } + + // Linebreaks + { + BufferedCharReader reader(" hello\nworld ; "); + auto res = Reader::parseUnescapedString(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello\nworld", res.second); + } + + // End of stream + { + BufferedCharReader reader(" hello world "); + auto res = Reader::parseUnescapedString(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } +} + +TEST(Reader, parseGeneric) +{ + TerminalLogger logger(std::cerr, true); + + // Simple case, unescaped string + { + BufferedCharReader reader("hello world"); + auto res = Reader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isString()); + ASSERT_EQ("hello world", res.second.asString()); + } + + // Simple case, double quoted string + { + BufferedCharReader reader(" \"hello world\" "); + auto res = Reader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isString()); + ASSERT_EQ("hello world", res.second.asString()); + } + + // Simple case, single quoted string + { + BufferedCharReader reader(" 'hello world' "); + auto res = Reader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isString()); + ASSERT_EQ("hello world", res.second.asString()); + } +} + +} +} + |