diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/variant/Reader.cpp | 177 | ||||
-rw-r--r-- | src/core/variant/Reader.hpp | 130 | ||||
-rw-r--r-- | src/core/variant/Variant.hpp | 6 |
3 files changed, 312 insertions, 1 deletions
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp new file mode 100644 index 0000000..6142ecf --- /dev/null +++ b/src/core/variant/Reader.cpp @@ -0,0 +1,177 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <cassert> +#include <sstream> + +#include <core/Utils.hpp> + +#include "Reader.hpp" + +namespace ousia { +namespace variant { + +static const int STATE_INIT = 0; +static const int STATE_IN_STRING = 1; +static const int STATE_ESCAPE = 2; + +static std::pair<Err, std::string> parseString( + BufferedCharReader &reader, const unordered_set<char> *delims = nullptr) +{ + // Initialize the internal state + Err errCode = Err::OK; + int state = STATE_INIT; + char quote = 0; + std::stringstream res; + + // Statemachine whic iterates over each character in the stream + // TODO: Combination of peeking and consumePeek is stupid as consumePeek is + // the default (read and putBack would obviously be better, yet the latter + // is not trivial to implement in the current BufferedCharReader). + char c; + while (reader.peek(&c)) { + switch (state) { + case STATE_INIT: + if (c == '"' || c == '\'') { + quote = c; + state = STATE_IN_STRING; + } else if (delims && delims.count(c)) { + return std::make_pair(Err::UNEXPECTED_END, res.str()); + } + reader.consumePeek(); + break; + case STATE_IN_STRING: + if (c == q) { + state = STATE_END; + reader.consumePeek(); + return std::make_pair(Err::OK, res.str()); + } else if (c == '\\') { + state = STATE_ESCAPE; + } else if (c == '\n') { + return std::make_pair(Err::UNTERMINATED, res.str()); + } + res << c; + reader.consumePeek(); + break; + case STATE_ESCAPE: + switch (c) { + case 'b': + res << '\b'; + break; + case 'f': + res << '\f'; + break; + case 'n': + res << '\n'; + break; + case 'r': + res << '\r'; + break; + case 't': + res << '\t'; + break; + case 'v': + res << '\v'; + break; + case '\'': + res << '\''; + break; + case '"': + res << '"'; + break; + case '\\': + res << '\\'; + break; + case '\n': + break; + case 'x': + // TODO: Parse Latin-1 sequence hex XX + break; + case 'u': + // TODO: Parse 16-Bit unicode character hex XXXX + break; + default: + if (Utils::isNumeric(c)) { + // TODO: Parse octal 000 sequence + } else { + errCode = Err::ERR_INVALID_ESCAPE; + } + break; + } + state = STATE_IN_STRING; + reader.consumePeek(); + break; + } + } + return std::make_pair(Err::UNEXPECTED_END, res.str()); +} + +static std::pair<Err, std::string> parseUnescapedString( + BufferedCharReader &reader, const unordered_set<char> *delims) +{ + assert(delims); + + std::stringstream res; + char c; + while (reader.peek(&c)) { + if (delims->count(c)) { + return std::make_pair(Err::OK, res.str()); + } + res << c; + reader.consumePeek(); + } + return std::make_pair(Err::UNEXPECTED_END, res.str()); +} + +static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader, + const unordered_set<char> *delims) +{ + assert(delims); + + char c; + while (reader.peek(&c)) { + // Stop if a delimiter is reached, skipp all whitespace characters + if (delims->count(c)) { + return std::make_pair(Err::OK, res.str()); + } else if (Utils::isWhitespace(c)) { + reader.consumePeek(); + continue; + } + + // Parse a string if a quote is reached + if (c == '"' || c == '\'') { + return parseString(reader, nullptr); + } + + if (c == '[') { + // TODO: Parse struct descriptor + } + + if (isNumeric(c)) { + // TODO: Parse integer/double + } + + // Parse an unescaped string in any other case + return parseUnescapedString(reader, delims); + } + return std::make_pair(Err::UNEXPECTED_END, res.str()); +} + +} +} + diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp new file mode 100644 index 0000000..3f945f0 --- /dev/null +++ b/src/core/variant/Reader.hpp @@ -0,0 +1,130 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Reader.hpp + * + * Provides parsers for various micro formats. These formats include integers, + * doubles, strings, JSON and the Ousía struct notation. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_VARIANT_READER_HPP_ +#define _OUSIA_VARIANT_READER_HPP_ + +#include <unordered_set> +#include <utility> + +#include <core/BufferedCharReader.hpp> + +#include "Variant.hpp" + +namespace ousia { +namespace variant { + +class Reader { +public: + // TODO: Pass logger instance instead of using error codes? + + /** + * The Err enum describes possible error codes that may be encountered when + * parsing the microtypes. + */ + enum class Err : int { + /** + * Reached the end of the stream, but expected more data. + */ + ERR_UNEXPECTED_END = -1, + + /** + * The stream is malformed. + */ + ERR_MALFORMED = -2, + + /** + * Unexpected character. + */ + ERR_UNEXPECTED_CHARACTER = -3, + + /** + * Unterminated literal. + */ + ERR_UNTERMINATED = -4, + + /** + * Invalid escape character. + */ + ERR_INVALID_ESCAPE = -5, + + /** + * A value of the requested type was extracted successfully. + */ + OK = 0 + }; + + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param reader is a reference to the BufferedCharReader instance which is + * the source for the character data. The reader will be positioned after + * the terminating quote character or at the terminating delimiting + * character. + * @param delims is an optional set of delimiters after which parsing has to + * be stopped (the delimiters may occur inside the actual string, but not + * outside). If nullptr is given, no delimiter is used and a complete string + * is read. + */ + static std::pair<Err, std::string> parseString( + BufferedCharReader &reader, + const unordered_set<char> *delims = nullptr); + + /** + * Extracts an unescaped string from the given buffered char reader + * instance. This function just reads text until one of the given delimiter + * characters is reached. + * + * @param reader is a reference to the BufferedCharReader instance which is + * the source for the character data. The reader will be positioned at the + * terminating delimiting character. + * @param delims is a set of characters which will terminate the string. + * These characters are not included in the result. May not be nullptr. + */ + static std::pair<Err, std::string> parseUnescapedString( + BufferedCharReader &reader, const unordered_set<char> *delims); + + /** + * Tries to parse the most specific item from the given stream until one of + * the given delimiters is reached or a meaningful literal has been read. + * The resulting variant represents the value that has been read. + * + * @param reader is a reference to the BufferedCharReader instance which is + * the source for the character data. The reader will be positioned at the + * terminating delimiting character. + * @param delims is a set of characters which will terminate the string. + * These characters are not included in the result. May not be nullptr. + */ + static std::pair<Err, Variant> parseGeneric( + BufferedCharReader &reader, const unordered_set<char> *delims); +}; +} +} + +#endif /* _OUSIA_VARIANT_READER_HPP_ */ + diff --git a/src/core/variant/Variant.hpp b/src/core/variant/Variant.hpp index d65e14a..6476780 100644 --- a/src/core/variant/Variant.hpp +++ b/src/core/variant/Variant.hpp @@ -42,6 +42,7 @@ #include <core/Exceptions.hpp> namespace ousia { +namespace variant { /** * Instances of the Variant class represent any kind of data that is exchanged @@ -680,13 +681,16 @@ public: * Prints a key value pair to the output stream. */ friend std::ostream &operator<<(std::ostream &os, - const mapType::value_type &v) + const mapType::value_type &v) { // TODO: Use proper serialization function return os << "\"" << v.first << "\": " << v.second.toString(true); } }; +} +// Alias for the (very often used and unambigous) variant class +using Variant = variant::Variant; } #endif /* _OUSIA_VARIANT_HPP_ */ |