diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-01-11 00:55:55 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-01-11 00:55:55 +0100 |
commit | b7d0f6517d19b6f8c544d6fa88a5f17b3c591a03 (patch) | |
tree | aa0c18c7af52210eafc4cc23bf561984ac6977b5 /src/core/common/VariantReader.cpp | |
parent | dd185959b9fff68008d02979fefde602d177ce75 (diff) |
Refactored number class out of VariantReader and added documentation
Diffstat (limited to 'src/core/common/VariantReader.cpp')
-rw-r--r-- | src/core/common/VariantReader.cpp | 297 |
1 files changed, 7 insertions, 290 deletions
diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp index d48e5cc..904713e 100644 --- a/src/core/common/VariantReader.cpp +++ b/src/core/common/VariantReader.cpp @@ -23,21 +23,22 @@ #include <utf8.h> +#include "Number.hpp" #include "VariantReader.hpp" #include "Utils.hpp" namespace ousia { -// TODO: Use custom return value instead of std::pair - /* Error Messages */ +// TODO: Invent common system for error messages which allows localization +// TODO: Possibly adapt the clang error logging system + static const char *ERR_UNEXPECTED_CHAR = "Unexpected character"; static const char *ERR_UNEXPECTED_END = "Unexpected end of literal"; static const char *ERR_UNTERMINATED = "Unterminated literal"; static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence"; static const char *ERR_INVALID_INTEGER = "Invalid integer value"; -static const char *ERR_TOO_LARGE = "Value too large to represent"; template <class T> static std::pair<bool, T> error(CharReader &reader, Logger &logger, @@ -71,290 +72,6 @@ static std::pair<bool, T> unexpected(CharReader &reader, Logger &logger, return error(reader, logger, unexpectedMsg(expected, got), res); } -/* Class Number */ - -/** - * Class used internally to represent a number (integer or double). The number - * is represented by its components (base value a, nominator n, denominator d, - * exponent e, sign s and exponent sign sE). - */ -class Number { -private: - /** - * State used in the parser state machine - */ - enum class State { - INIT, - HAS_MINUS, - LEADING_ZERO, - LEADING_POINT, - INT, - HEX, - POINT, - EXP_INIT, - EXP_HAS_MINUS, - EXP - }; - - /** - * Returns the numeric value of the given ASCII character (returns 0 for - * '0', 1 for '1', 10 for 'A' and so on). - * - * @param c is the character for which the numeric value should be returned. - * @return the numeric value the character represents. - */ - static int charValue(char c) - { - if (c >= '0' && c <= '9') { - return c & 0x0F; - } - if ((c >= 'A' && c <= 'O') || (c >= 'a' && c <= 'o')) { - return (c & 0x0F) + 9; - } - return -1; - } - -public: - /** - * Reprsents the part of the number: Base value a, nominator n, exponent e. - */ - enum class Part { A, N, E }; - - /** - * Sign and exponent sign. - */ - int8_t s, sE; - - /** - * Exponent - */ - int16_t e; - - /** - * Base value, nominator, denominator - */ - int64_t a, n, d; - - /** - * Constructor of the number class. - */ - Number() : s(1), sE(1), e(0), a(0), n(0), d(1) {} - - /** - * Returns the represented double value. - */ - double doubleValue() - { - return s * (a + ((double)n / (double)d)) * pow(10.0, (double)(sE * e)); - } - - /** - * Returns the represented integer value. Only a lossless operation, if the - * number is an integer (as can be checked via the isInt method), otherwise - * the exponent and the fractional value will be truncated. - */ - int64_t intValue() { return s * a; } - - /** - * Returns true, if the number is an integer (has no fractional or - * exponential part). - */ - bool isInt() { return (n == 0) && (d == 1) && (e == 0); } - - /** - * Appends the value of the character c to the internal number - * representation and reports any errors that might occur. - */ - bool appendChar(char c, int base, Part p, CharReader &reader, - Logger &logger) - { - // Check whether the given character is valid - int v = charValue(c); - if (v < 0 || v >= base) { - logger.error(unexpectedMsg("digit", c), reader); - return false; - } - - // Append the number to the specified part - switch (p) { - case Part::A: - a = a * base + v; - break; - case Part::N: - n = n * base + v; - d = d * base; - break; - case Part::E: - e = e * base + v; - break; - } - - // Check for any overflows - if (a < 0 || n < 0 || d < 0 || e < 0) { - logger.error(ERR_TOO_LARGE, reader); - return false; - } - return true; - } - - /** - * Tries to parse the number from the given stream and loggs any errors to - * the given logger instance. Numbers are terminated by one of the given - * delimiters. - */ - bool parse(CharReader &reader, Logger &logger, - const std::unordered_set<char> &delims); - - bool parseFixedLenInt(CharReader &reader, Logger &logger, int base, - int len); -}; - -bool Number::parse(CharReader &reader, Logger &logger, - const std::unordered_set<char> &delims) -{ - State state = State::INIT; - char c; - - // Consume the first whitespace characters - reader.consumeWhitespace(); - - // Iterate over the FSM to extract numbers - while (reader.peek(c)) { - // Abort, once a delimiter or whitespace is reached - if (Utils::isWhitespace(c) || delims.count(c)) { - reader.resetPeek(); - break; - } - - // The character is not a whitespace character and not a delimiter - switch (state) { - case State::INIT: - case State::HAS_MINUS: - switch (c) { - case '-': - // Do not allow multiple minus signs - if (state == State::HAS_MINUS) { - logger.error(unexpectedMsg("digit", c), reader); - return false; - } - state = State::HAS_MINUS; - s = -1; - break; - case '0': - // Remember a leading zero for the detection of "0x" - state = State::LEADING_ZERO; - break; - case '.': - // Remember a leading point as ".eXXX" is invalid - state = State::LEADING_POINT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::LEADING_ZERO: - if (c == 'x' || c == 'X') { - state = State::HEX; - break; - } - // fallthrough - case State::INT: - switch (c) { - case '.': - state = State::POINT; - break; - case 'e': - case 'E': - state = State::EXP_INIT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::HEX: - if (!appendChar(c, 16, Part::A, reader, logger)) { - return false; - } - break; - case State::LEADING_POINT: - case State::POINT: - switch (c) { - case 'e': - case 'E': - if (state == State::LEADING_POINT) { - logger.error(unexpectedMsg("digit", c), reader); - return false; - } - state = State::EXP_INIT; - break; - default: - state = State::POINT; - if (!appendChar(c, 10, Part::N, reader, logger)) { - return false; - } - break; - } - break; - case State::EXP_HAS_MINUS: - case State::EXP_INIT: - if (c == '-') { - if (state == State::EXP_HAS_MINUS) { - logger.error(unexpectedMsg("digit", c), reader); - return false; - } - state = State::EXP_HAS_MINUS; - sE = -1; - } else { - state = State::EXP; - if (!appendChar(c, 10, Part::E, reader, logger)) { - return false; - } - } - break; - case State::EXP: - if (!appendChar(c, 10, Part::E, reader, logger)) { - return false; - } - break; - } - reader.consumePeek(); - } - - // States in which ending is valid. Log an error in other states - if (state == State::LEADING_ZERO || state == State::HEX || - state == State::INT || state == State::POINT || state == State::EXP) { - return true; - } - logger.error(ERR_UNEXPECTED_END, reader); - return false; -} - -bool Number::parseFixedLenInt(CharReader &reader, Logger &logger, int base, - int len) -{ - char c; - reader.consumePeek(); - for (int i = 0; i < len; i++) { - if (!reader.peek(c)) { - logger.error("Unexpected end of escape sequence", reader); - return false; - } - if (!appendChar(c, base, Number::Part::A, reader, logger)) { - return false; - } - reader.consumePeek(); - } - return true; -} - /* State machine states */ static const int STATE_INIT = 0; @@ -658,7 +375,7 @@ std::pair<bool, std::string> VariantReader::parseString( // Parse Latin-1 sequence \xXX Number n; hadError = - !(n.parseFixedLenInt(reader, logger, 16, 2) && + !(n.parseFixedLenInt(reader, 2, 16, logger) && encodeUtf8(res, reader, logger, n.intValue(), true)) || hadError; @@ -668,7 +385,7 @@ std::pair<bool, std::string> VariantReader::parseString( // Parse Unicode sequence \uXXXX Number n; hadError = - !(n.parseFixedLenInt(reader, logger, 16, 4) && + !(n.parseFixedLenInt(reader, 4, 16, logger) && encodeUtf8(res, reader, logger, n.intValue(), false)) || hadError; @@ -680,7 +397,7 @@ std::pair<bool, std::string> VariantReader::parseString( reader.resetPeek(); Number n; hadError = - !(n.parseFixedLenInt(reader, logger, 8, 3) && + !(n.parseFixedLenInt(reader, 3, 8, logger) && encodeUtf8(res, reader, logger, n.intValue(), true)) || hadError; |