summaryrefslogtreecommitdiff
path: root/src/core/variant/Reader.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/variant/Reader.cpp')
-rw-r--r--src/core/variant/Reader.cpp624
1 files changed, 0 insertions, 624 deletions
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp
deleted file mode 100644
index 5c167cd..0000000
--- a/src/core/variant/Reader.cpp
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <iostream>
-
-#include <cmath>
-#include <sstream>
-
-#include <core/Utils.hpp>
-
-#include "Reader.hpp"
-
-namespace ousia {
-namespace variant {
-
-// TODO: Better error messages (like "Expected 'x' but got 'y'")
-// TODO: Replace delims with single char delim where possible
-// TODO: Use custom return value instead of std::pair
-// TODO: Allow buffered char reader to "fork"
-// TODO: Rename CharReader to shorter CharReader
-// TODO: Implement context in CharReader (to allow error messages to extract the
-// current line)
-
-/* Error Messages */
-
-static const char *ERR_UNEXPECTED_CHAR = "Unexpected character";
-static const char *ERR_UNEXPECTED_END = "Unexpected literal end";
-static const char *ERR_UNTERMINATED = "Unterminated literal";
-static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence";
-static const char *ERR_INVALID_INTEGER = "Invalid integer value";
-static const char *ERR_TOO_LARGE = "Value too large to represent";
-
-/* Class Number */
-
-/**
- * Class used internally to represent a number (integer or double). The number
- * is represented by its components (base value a, nominator n, denominator d,
- * exponent e, sign s and exponent sign sE).
- */
-class Number {
-private:
- /**
- * Reprsents the part of the number: Base value a, nominator n, exponent e.
- */
- enum class Part { A, N, E };
-
- /**
- * State used in the parser state machine
- */
- enum class State {
- INIT,
- HAS_MINUS,
- LEADING_ZERO,
- LEADING_POINT,
- INT,
- HEX,
- POINT,
- EXP_INIT,
- EXP_HAS_MINUS,
- EXP
- };
-
- /**
- * Returns the numeric value of the given ASCII character (returns 0 for
- * '0', 1 for '1', 10 for 'A' and so on).
- *
- * @param c is the character for which the numeric value should be returned.
- * @return the numeric value the character represents.
- */
- static int charValue(char c)
- {
- if (c >= '0' && c <= '9') {
- return c & 0x0F;
- }
- if ((c >= 'A' && c <= 'O') || (c >= 'a' && c <= 'o')) {
- return (c & 0x0F) + 9;
- }
- return -1;
- }
-
- /**
- * Appends the value of the character c to the internal number
- * representation and reports any errors that might occur.
- */
- bool appendChar(char c, int base, Part p, CharReader &reader,
- Logger &logger)
- {
- // Check whether the given character is valid
- int v = charValue(c);
- if (v < 0 || v >= base) {
- logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
- return false;
- }
-
- // Append the number to the specified part
- switch (p) {
- case Part::A:
- a = a * base + v;
- break;
- case Part::N:
- n = n * base + v;
- d = d * base;
- break;
- case Part::E:
- e = e * base + v;
- break;
- }
-
- // Check for any overflows
- if (a < 0 || n < 0 || d < 0 || e < 0) {
- logger.errorAt(ERR_TOO_LARGE, reader);
- return false;
- }
- return true;
- }
-
-public:
- /**
- * Sign and exponent sign.
- */
- int8_t s, sE;
-
- /**
- * Exponent
- */
- int16_t e;
-
- /**
- * Base value, nominator, denominator
- */
- int64_t a, n, d;
-
- /**
- * Constructor of the number class.
- */
- Number() : s(1), sE(1), e(0), a(0), n(0), d(1) {}
-
- /**
- * Returns the represented double value.
- */
- double doubleValue()
- {
- return s * (a + ((double)n / (double)d)) * pow(10.0, (double)(sE * e));
- }
-
- /**
- * Returns the represented integer value. Only a lossless operation, if the
- * number is an integer (as can be checked via the isInt method), otherwise
- * the exponent and the fractional value will be truncated.
- */
- int64_t intValue() { return s * a; }
-
- /**
- * Returns true, if the number is an integer (has no fractional or
- * exponential part).
- */
- bool isInt() { return (n == 0) && (d == 1) && (e == 0); }
-
- /**
- * Tries to parse the number from the given stream and loggs any errors to
- * the given logger instance. Numbers are terminated by one of the given
- * delimiters.
- */
- bool parse(CharReader &reader, Logger &logger,
- const std::unordered_set<char> &delims)
- {
- State state = State::INIT;
- char c;
-
- // Consume the first whitespace characters
- reader.consumeWhitespace();
-
- // Iterate over the FSM to extract numbers
- while (reader.peek(c)) {
- // Abort, once a delimiter or whitespace is reached
- if (Utils::isWhitespace(c) || delims.count(c)) {
- reader.resetPeek();
- break;
- }
-
- // The character is not a whitespace character and not a delimiter
- switch (state) {
- case State::INIT:
- case State::HAS_MINUS:
- switch (c) {
- case '-':
- // Do not allow multiple minus signs
- if (state == State::HAS_MINUS) {
- logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
- return false;
- }
- state = State::HAS_MINUS;
- s = -1;
- break;
- case '0':
- // Remember a leading zero for the detection of "0x"
- state = State::LEADING_ZERO;
- break;
- case '.':
- // Remember a leading point as ".eXXX" is invalid
- state = State::LEADING_POINT;
- break;
- default:
- state = State::INT;
- if (!appendChar(c, 10, Part::A, reader, logger)) {
- return false;
- }
- break;
- }
- break;
- case State::LEADING_ZERO:
- if (c == 'x' || c == 'X') {
- state = State::HEX;
- break;
- }
- // fallthrough
- case State::INT:
- switch (c) {
- case '.':
- state = State::POINT;
- break;
- case 'e':
- case 'E':
- state = State::EXP_INIT;
- break;
- default:
- state = State::INT;
- if (!appendChar(c, 10, Part::A, reader, logger)) {
- return false;
- }
- break;
- }
- break;
- case State::HEX:
- if (!appendChar(c, 16, Part::A, reader, logger)) {
- return false;
- }
- break;
- case State::LEADING_POINT:
- case State::POINT:
- switch (c) {
- case 'e':
- case 'E':
- if (state == State::LEADING_POINT) {
- logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
- return false;
- }
- state = State::EXP_INIT;
- break;
- default:
- state = State::POINT;
- if (!appendChar(c, 10, Part::N, reader, logger)) {
- return false;
- }
- break;
- }
- break;
- case State::EXP_HAS_MINUS:
- case State::EXP_INIT:
- if (c == '-') {
- if (state == State::EXP_HAS_MINUS) {
- logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
- return false;
- }
- state = State::EXP_HAS_MINUS;
- sE = -1;
- } else {
- state = State::EXP;
- if (!appendChar(c, 10, Part::E, reader, logger)) {
- return false;
- }
- }
- break;
- case State::EXP:
- if (!appendChar(c, 10, Part::E, reader, logger)) {
- return false;
- }
- break;
- }
- reader.consumePeek();
- }
-
- // States in which ending is valid. Log an error in other states
- if (state == State::LEADING_ZERO || state == State::HEX ||
- state == State::INT || state == State::POINT ||
- state == State::EXP) {
- return true;
- }
- logger.errorAt(ERR_UNEXPECTED_END, reader);
- return false;
- }
-};
-
-/* Class Reader */
-
-static const int STATE_INIT = 0;
-static const int STATE_IN_STRING = 1;
-static const int STATE_IN_ARRAY = 2;
-static const int STATE_EXPECT_COMMA = 3;
-static const int STATE_ESCAPE = 4;
-static const int STATE_WHITESPACE = 5;
-static const int STATE_RESYNC = 6;
-
-template <class T>
-static std::pair<bool, T> error(CharReader &reader, Logger &logger,
- const char *err, T res)
-{
- logger.errorAt(err, reader);
- return std::make_pair(false, std::move(res));
-}
-
-std::pair<bool, std::string> Reader::parseString(
- CharReader &reader, Logger &logger,
- const std::unordered_set<char> *delims)
-{
- // Initialize the internal state
- int state = STATE_INIT;
- char quote = 0;
- std::stringstream res;
-
- // Consume all whitespace
- reader.consumeWhitespace();
-
- // Statemachine whic iterates over each character in the stream
- // TODO: Combination of peeking and consumePeek is stupid as consumePeek is
- // the default (read and putBack would obviously be better, yet the latter
- // is not trivial to implement in the current CharReader).
- char c;
- while (reader.peek(c)) {
- switch (state) {
- case STATE_INIT:
- if (c == '"' || c == '\'') {
- quote = c;
- state = STATE_IN_STRING;
- break;
- } else if (delims && delims->count(c)) {
- return error(reader, logger, ERR_UNEXPECTED_END, res.str());
- }
- return error(reader, logger, ERR_UNEXPECTED_CHAR, res.str());
- case STATE_IN_STRING:
- if (c == quote) {
- reader.consumePeek();
- return std::make_pair(true, res.str());
- } else if (c == '\\') {
- state = STATE_ESCAPE;
- reader.consumePeek();
- break;
- } else if (c == '\n') {
- return error(reader, logger, ERR_UNTERMINATED, res.str());
- }
- res << c;
- reader.consumePeek();
- break;
- case STATE_ESCAPE:
- // Handle all possible special escape characters
- switch (c) {
- case 'b':
- res << '\b';
- break;
- case 'f':
- res << '\f';
- break;
- case 'n':
- res << '\n';
- break;
- case 'r':
- res << '\r';
- break;
- case 't':
- res << '\t';
- break;
- case 'v':
- res << '\v';
- break;
- case '\'':
- res << '\'';
- break;
- case '"':
- res << '"';
- break;
- case '\\':
- res << '\\';
- break;
- case '\n':
- break;
- case 'x':
- // TODO: Parse Latin-1 sequence hex XX
- break;
- case 'u':
- // TODO: Parse 16-Bit unicode character hex XXXX
- break;
- default:
- if (Utils::isNumeric(c)) {
- // TODO: Parse octal 000 sequence
- } else {
- logger.errorAt(ERR_INVALID_ESCAPE, reader);
- }
- break;
- }
-
- // Switch back to the "normal" state
- state = STATE_IN_STRING;
- reader.consumePeek();
- break;
- }
- }
- return error(reader, logger, ERR_UNEXPECTED_END, res.str());
-}
-
-std::pair<bool, Variant::arrayType> Reader::parseArray(
- CharReader &reader, Logger &logger, char delim)
-{
- Variant::arrayType res;
- bool hadError = false;
- int state = delim ? STATE_IN_ARRAY : STATE_INIT;
- delim = delim ? delim : ']';
- char c;
-
- // Consume all whitespace
- reader.consumeWhitespace();
-
- // Iterate over the characters, use the parseGeneric function to read the
- // pairs
- while (reader.peek(c)) {
- // Generically handle the end of the array
- if (state != STATE_INIT && c == delim) {
- reader.consumePeek();
- return std::make_pair(!hadError, res);
- }
-
- switch (state) {
- case STATE_INIT:
- if (c != '[') {
- return error(reader, logger, ERR_UNEXPECTED_CHAR, res);
- }
- state = STATE_IN_ARRAY;
- reader.consumePeek();
- break;
- case STATE_IN_ARRAY: {
- // Try to read an element using the parseGeneric function
- reader.resetPeek();
- auto elem = parseGeneric(reader, logger, {',', delim});
- res.push_back(elem.second);
-
- // If the reader had no error, expect an comma, otherwise skip
- // to the next comma in the stream
- if (elem.first) {
- state = STATE_EXPECT_COMMA;
- } else {
- state = STATE_RESYNC;
- hadError = true;
- }
- break;
- }
- case STATE_EXPECT_COMMA:
- // Skip whitespace
- if (c == ',') {
- state = STATE_IN_ARRAY;
- } else if (!Utils::isWhitespace(c)) {
- hadError = true;
- state = STATE_RESYNC;
- logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
- }
- reader.consumePeek();
- break;
- case STATE_RESYNC:
- // Just wait for another comma to arrive
- if (c == ',') {
- state = STATE_IN_ARRAY;
- }
- reader.consumePeek();
- break;
- }
- }
- return error(reader, logger, ERR_UNEXPECTED_END, res);
-}
-
-std::pair<bool, std::string> Reader::parseUnescapedString(
- CharReader &reader, Logger &logger,
- const std::unordered_set<char> &delims)
-{
- std::stringstream res;
- std::stringstream buf;
- char c;
-
- // Consume all whitespace
- reader.consumeWhitespace();
-
- // Copy all characters, skip whitespace at the end
- int state = STATE_IN_STRING;
- while (reader.peek(c)) {
- if (delims.count(c)) {
- reader.resetPeek();
- return std::make_pair(true, res.str());
- } else if (Utils::isWhitespace(c)) {
- // Do not add whitespace to the output buffer
- state = STATE_WHITESPACE;
- buf << c;
- } else {
- // If we just hat a sequence of whitespace, append it to the output
- // buffer and continue
- if (state == STATE_WHITESPACE) {
- res << buf.str();
- buf.str(std::string{});
- buf.clear();
- state = STATE_IN_STRING;
- }
- res << c;
- }
- reader.consumePeek();
- }
- return std::make_pair(true, res.str());
-}
-
-std::pair<bool, int64_t> Reader::parseInteger(
- CharReader &reader, Logger &logger,
- const std::unordered_set<char> &delims)
-{
- Number n;
- if (n.parse(reader, logger, delims)) {
- // Only succeed if the parsed number is an integer, otherwise this is an
- // error
- if (n.isInt()) {
- return std::make_pair(true, n.intValue());
- } else {
- return error(reader, logger, ERR_INVALID_INTEGER, n.intValue());
- }
- }
- return std::make_pair(false, n.intValue());
-}
-
-std::pair<bool, double> Reader::parseDouble(
- CharReader &reader, Logger &logger,
- const std::unordered_set<char> &delims)
-{
- Number n;
- bool res = n.parse(reader, logger, delims);
- return std::make_pair(res, n.doubleValue());
-}
-
-std::pair<bool, Variant> Reader::parseGeneric(
- CharReader &reader, Logger &logger,
- const std::unordered_set<char> &delims)
-{
- char c;
-
- // Skip all whitespace characters
- reader.consumeWhitespace();
- while (reader.peek(c)) {
- // Stop if a delimiter is reached
- if (delims.count(c)) {
- return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
- }
-
- // Parse a string if a quote is reached
- if (c == '"' || c == '\'') {
- auto res = parseString(reader, logger);
- return std::make_pair(res.first, res.second.c_str());
- }
-
- if (c == '[') {
- // TODO: Parse struct descriptor
- }
-
- // Try to parse everything that looks like a number as number
- if (Utils::isNumeric(c) || c == '-') {
- Number n;
-
- // Fork the reader
- utils::CharReaderFork fork = reader.fork();
-
- // TODO: Fork logger
-
- // Try to parse the number
- if (n.parse(fork, logger, delims)) {
- // Parsing was successful, advance the reader
- fork.commit();
- if (n.isInt()) {
- return std::make_pair(
- true,
- Variant{static_cast<Variant::intType>(n.intValue())});
- } else {
- return std::make_pair(true, n.doubleValue());
- }
- }
- }
-
- // Parse an unescaped string in any other case
- auto res = parseUnescapedString(reader, logger, delims);
-
- // Handling for special primitive values
- if (res.first) {
- if (res.second == "true") {
- return std::make_pair(true, Variant{true});
- }
- if (res.second == "false") {
- return std::make_pair(true, Variant{false});
- }
- if (res.second == "null") {
- return std::make_pair(true, Variant{nullptr});
- }
- }
- return std::make_pair(res.first, res.second.c_str());
- }
- return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
-}
-}
-}
-