From 3f62168ed0b088eec3cb2903f03966f7d501f564 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Thu, 11 Dec 2014 15:26:50 +0100 Subject: moved to CharReader everywhere --- CMakeLists.txt | 24 +- src/core/BufferedCharReader.cpp | 263 ----------- src/core/BufferedCharReader.hpp | 288 ------------ src/core/CSS.hpp | 2 +- src/core/CodeTokenizer.hpp | 8 +- src/core/Exceptions.cpp | 46 -- src/core/Exceptions.hpp | 162 ------- src/core/Logger.cpp | 161 ------- src/core/Logger.hpp | 609 ------------------------ src/core/Registry.cpp | 3 +- src/core/Tokenizer.cpp | 12 +- src/core/Tokenizer.hpp | 27 +- src/core/Utils.cpp | 59 --- src/core/Utils.hpp | 110 ----- src/core/common/CharReader.cpp | 640 +++++++++++++++++++++++++ src/core/common/CharReader.hpp | 665 ++++++++++++++++++++++++++ src/core/common/Exceptions.cpp | 46 ++ src/core/common/Exceptions.hpp | 162 +++++++ src/core/common/Logger.cpp | 161 +++++++ src/core/common/Logger.hpp | 609 ++++++++++++++++++++++++ src/core/common/Utils.cpp | 59 +++ src/core/common/Utils.hpp | 110 +++++ src/core/common/Variant.cpp | 154 ++++++ src/core/common/Variant.hpp | 761 ++++++++++++++++++++++++++++++ src/core/common/VariantReader.cpp | 625 +++++++++++++++++++++++++ src/core/common/VariantReader.hpp | 166 +++++++ src/core/parser/Parser.hpp | 4 +- src/core/parser/ParserStack.cpp | 4 +- src/core/parser/ParserStack.hpp | 2 +- src/core/utils/CharReader.cpp | 643 -------------------------- src/core/utils/CharReader.hpp | 672 --------------------------- src/core/variant/Reader.cpp | 624 ------------------------- src/core/variant/Reader.hpp | 169 ------- src/core/variant/Variant.cpp | 155 ------- src/core/variant/Variant.hpp | 766 ------------------------------ src/plugins/css/CSSParser.cpp | 18 +- src/plugins/css/CSSParser.hpp | 2 +- src/plugins/xml/XmlParser.cpp | 2 +- test/core/BufferedCharReaderTest.cpp | 185 -------- test/core/CodeTokenizerTest.cpp | 26 +- test/core/LoggerTest.cpp | 74 --- test/core/RegistryTest.cpp | 2 +- test/core/TokenizerTest.cpp | 14 +- test/core/UtilsTest.cpp | 43 -- test/core/common/CharReaderTest.cpp | 821 ++++++++++++++++++++++++++++++++ test/core/common/LoggerTest.cpp | 74 +++ test/core/common/UtilsTest.cpp | 43 ++ test/core/common/VariantReaderTest.cpp | 345 ++++++++++++++ test/core/common/VariantTest.cpp | 141 ++++++ test/core/utils/CharReaderTest.cpp | 823 --------------------------------- test/core/variant/ReaderTest.cpp | 345 -------------- test/core/variant/VariantTest.cpp | 141 ------ test/plugins/css/CSSParserTest.cpp | 16 +- 53 files changed, 5662 insertions(+), 6424 deletions(-) delete mode 100644 src/core/BufferedCharReader.cpp delete mode 100644 src/core/BufferedCharReader.hpp delete mode 100644 src/core/Exceptions.cpp delete mode 100644 src/core/Exceptions.hpp delete mode 100644 src/core/Logger.cpp delete mode 100644 src/core/Logger.hpp delete mode 100644 src/core/Utils.cpp delete mode 100644 src/core/Utils.hpp create mode 100644 src/core/common/CharReader.cpp create mode 100644 src/core/common/CharReader.hpp create mode 100644 src/core/common/Exceptions.cpp create mode 100644 src/core/common/Exceptions.hpp create mode 100644 src/core/common/Logger.cpp create mode 100644 src/core/common/Logger.hpp create mode 100644 src/core/common/Utils.cpp create mode 100644 src/core/common/Utils.hpp create mode 100644 src/core/common/Variant.cpp create mode 100644 src/core/common/Variant.hpp create mode 100644 src/core/common/VariantReader.cpp create mode 100644 src/core/common/VariantReader.hpp delete mode 100644 src/core/utils/CharReader.cpp delete mode 100644 src/core/utils/CharReader.hpp delete mode 100644 src/core/variant/Reader.cpp delete mode 100644 src/core/variant/Reader.hpp delete mode 100644 src/core/variant/Variant.cpp delete mode 100644 src/core/variant/Variant.hpp delete mode 100644 test/core/BufferedCharReaderTest.cpp delete mode 100644 test/core/LoggerTest.cpp delete mode 100644 test/core/UtilsTest.cpp create mode 100644 test/core/common/CharReaderTest.cpp create mode 100644 test/core/common/LoggerTest.cpp create mode 100644 test/core/common/UtilsTest.cpp create mode 100644 test/core/common/VariantReaderTest.cpp create mode 100644 test/core/common/VariantTest.cpp delete mode 100644 test/core/utils/CharReaderTest.cpp delete mode 100644 test/core/variant/ReaderTest.cpp delete mode 100644 test/core/variant/VariantTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index d824b86..3cfa185 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,18 +97,20 @@ ADD_DEFINITIONS( ) ADD_LIBRARY(ousia_core - src/core/BufferedCharReader src/core/CodeTokenizer src/core/CSS - src/core/Exceptions - src/core/Logger src/core/Managed src/core/Node src/core/Registry src/core/ResourceLocator src/core/Tokenizer # src/core/Typesystem - src/core/Utils + src/core/common/CharReader + src/core/common/Exceptions + src/core/common/Logger + src/core/common/Utils + src/core/common/Variant + src/core/common/VariantReader src/core/parser/Parser src/core/parser/ParserStack src/core/parser/Scope @@ -116,9 +118,6 @@ ADD_LIBRARY(ousia_core # src/core/script/Object # src/core/script/ScriptEngine # src/core/script/Variant - src/core/utils/CharReader - src/core/variant/Reader - src/core/variant/Variant ) ADD_LIBRARY(ousia_css @@ -155,10 +154,8 @@ IF(TEST) ) ADD_EXECUTABLE(ousia_test_core - test/core/BufferedCharReaderTest test/core/CodeTokenizerTest test/core/CSSTest - test/core/LoggerTest test/core/ManagedTest test/core/ManagedContainersTest test/core/NodeTest @@ -166,14 +163,15 @@ IF(TEST) test/core/RegistryTest test/core/ResourceLocatorTest test/core/TokenizerTest - test/core/UtilsTest + test/core/common/CharReaderTest + test/core/common/LoggerTest + test/core/common/VariantReaderTest + test/core/common/VariantTest + test/core/common/UtilsTest test/core/parser/ParserStackTest # test/core/script/FunctionTest # test/core/script/ObjectTest # test/core/script/VariantTest - test/core/utils/CharReaderTest - test/core/variant/ReaderTest - test/core/variant/VariantTest ) TARGET_LINK_LIBRARIES(ousia_test_core diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp deleted file mode 100644 index aeedf12..0000000 --- a/src/core/BufferedCharReader.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "Utils.hpp" - -#include "BufferedCharReader.hpp" - -namespace ousia { - -// Constants used within the linebreak statemachine. -static const uint8_t LB_STATE_NONE = 0x00; -static const uint8_t LB_STATE_ONE = 0x01; -static const uint8_t LB_STATE_LF = 0x10; -static const uint8_t LB_STATE_CR = 0x20; -static const uint8_t LB_STATE_MASK_CNT = 0x0F; -static const uint8_t LB_STATE_MASK_TYPE = 0xF0; - -/* Struct BufferedCharReader::ReadCursor */ - -BufferedCharReader::ReadCursor::ReadCursor(unsigned int line, - unsigned int column, - bool destructive) - : line(line), - column(column), - bufferElem(0), - bufferPos(0), - destructive(destructive), - lbState(LB_STATE_NONE) -{ -} - -void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) -{ - this->line = cursor.line; - this->column = cursor.column; - this->bufferElem = cursor.bufferElem; - this->bufferPos = cursor.bufferPos; - this->lbState = cursor.lbState; -} - -/* Class BufferedCharReader */ - -BufferedCharReader::BufferedCharReader(int line, int column) - : inputStream(nullptr), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(false) -{ -} - -BufferedCharReader::BufferedCharReader(const std::string &str, int line, - int column) - : inputStream(nullptr), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(true) -{ - buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(const std::string &str) - : inputStream(nullptr), - readCursor(1, 1, true), - peekCursor(1, 1, false), - depleted(true) -{ - buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line, - int column) - : inputStream(&inputStream), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(false) -{ -} - -void BufferedCharReader::feed(const std::string &data) -{ - if (!depleted && !inputStream) { - buffer.push_back(data); - } -} - -void BufferedCharReader::close() -{ - if (!inputStream) { - depleted = true; - } -} - -bool BufferedCharReader::substituteLinebreaks(ReadCursor &cursor, char *c) -{ - // Handle line breaks, inserts breakes after the following character - // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n - if ((*c == '\n') || (*c == '\r')) { - // Determine the type of the current linebreak character - const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; - - // Read the last count and the last type from the state - const uint8_t lastCount = cursor.lbState & LB_STATE_MASK_CNT; - const uint8_t lastType = cursor.lbState & LB_STATE_MASK_TYPE; - - // Set the current linebreak type and counter in the state - cursor.lbState = ((lastCount + 1) & 1) | type; - - // If either this is the first instance of this character or the same - // return character is repeated - if (!lastCount || (lastType == type)) { - *c = '\n'; - return true; - } - return false; - } - - // Find the state - cursor.lbState = LB_STATE_NONE; - return true; -} - -bool BufferedCharReader::readCharacterAtCursor(ReadCursor &cursor, char *c) -{ - bool hasChar = false; - while (!hasChar) { - // Abort if the current buffer element does not point to a valid entry - // in the buffer -- we must try to feed another data block into the - // internal buffer - if (cursor.bufferElem >= buffer.size()) { - // Abort if there is no more data or no input stream is set - if (depleted || !inputStream) { - return false; - } - - // Read a buffer of the specified size - constexpr std::streamsize BUFFER_SIZE = 1024; - std::array buf; - const std::streamsize cnt = - (*inputStream).read(buf.data(), BUFFER_SIZE).gcount(); - - // If data has been read, append it to the input buffer and try - // again - if (cnt > 0) { - buffer.emplace_back(std::string(buf.data(), cnt)); - continue; - } - - // End of file handling - if (inputStream->fail() || inputStream->eof()) { - depleted = true; - return false; - } - } - - // Fetch the current element the peek pointer points to - const std::string &data = buffer[cursor.bufferElem]; - - // Handle the "no data" case -- either in a destructive or - // non-destructive manner. - if (cursor.bufferPos >= data.length()) { - if (cursor.destructive) { - buffer.pop_front(); - } else { - cursor.bufferElem++; - } - cursor.bufferPos = 0; - continue; - } - - // Read the character, advance the buffer position - *c = *(data.data() + cursor.bufferPos); - cursor.bufferPos++; - - // Substitute linebreaks with a single LF (0x0A) - hasChar = substituteLinebreaks(cursor, c); - } - - // Update the position counter - if (*c == '\n') { - cursor.line++; - cursor.column = 1; - } else { - // Ignore UTF-8 continuation bytes - if (!((*c & 0x80) && !(*c & 0x40))) { - cursor.column++; - } - } - - return true; -} - -bool BufferedCharReader::peek(char *c) -{ - return readCharacterAtCursor(peekCursor, c); -} - -bool BufferedCharReader::read(char *c) -{ - resetPeek(); - return readCharacterAtCursor(readCursor, c); -} - -void BufferedCharReader::consumePeek() -{ - // Remove all no longer needed buffer elements - for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { - buffer.pop_front(); - } - peekCursor.bufferElem = 0; - - // Copy the peek cursor to the read cursor - readCursor.assign(peekCursor); -} - -bool BufferedCharReader::consumeWhitespace() -{ - char c; - while (peek(&c)) { - if (!Utils::isWhitespace(c)) { - resetPeek(); - return true; - } - consumePeek(); - } - return false; -} - -void BufferedCharReader::resetPeek() -{ - // Reset the peek cursor to the read cursor - peekCursor.assign(readCursor); -} - -bool BufferedCharReader::atEnd() const -{ - if (depleted || !inputStream) { - if (buffer.size() <= 0) { - return true; - } else if (buffer.size() == 1) { - return buffer[0].size() == readCursor.bufferPos; - } - } - return false; -} -} - diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp deleted file mode 100644 index e7f3186..0000000 --- a/src/core/BufferedCharReader.hpp +++ /dev/null @@ -1,288 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file BufferedCharReader.hpp - * - * Contains the BufferedCharReader class which is used for reading/peeking - * single characters from an input stream or string. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_BUFFERED_CHAR_READER_H_ -#define _OUSIA_BUFFERED_CHAR_READER_H_ - -#include -#include -#include -#include - -namespace ousia { - -// TODO: Better split this class into multiple classes with base class -// BufferedCharReader where each sub class represents one method of supplying -// the input data (feeding, initial string, input stream). - -/** - * The BufferedCharReader class is used for storing incomming data that - * is fed into the pipeline as well as reading/peeking single characters - * from that buffer. Additionally it counts the current column/row - * (with correct handling for UTF-8) and contains an internal state - * machine that handles the detection of linebreaks and converts these to a - * single '\n'. - */ -class BufferedCharReader { -private: - /** - * The ReadCursor structure is responsible for representing the read - * position within the text an all state machine states belonging to the - * cursor. There are two types of read cursors: destructive and - * non-destructive read cursors. - */ - struct ReadCursor { - /** - * The line the cursor currently points to. - */ - unsigned int line; - - /** - * The column the cursor currently points to. - */ - unsigned int column; - - /** - * The index of the element in the data buffer we're currently reading - * from. - */ - unsigned int bufferElem; - - /** - * The byte position within this data buffer. - */ - unsigned int bufferPos; - - /** - * Specifies whether this is a destructive cursor (bytes are discarded - * once they were read from the buffer). - */ - const bool destructive; - - /** - * State variable used in the internal state machine of the - * line feed detection. - */ - uint8_t lbState; - - /** - * Constructor of the ReadCursor structure. - * - * @param line is the start line. - * @param column is the start column. - * @param destructive specifies whether the ReadCursor is destructive - * (consumes all read characters, as used in the "read cursor") or - * non-destructive (as used in the "peek cursor"). - */ - ReadCursor(unsigned int line, unsigned int column, bool destructive); - - /** - * Copys the data from another ReadCursor without overriding the - * "destructive" flag. - * - * @param cursor is the cursor that should be copied. - */ - void assign(const ReadCursor &cursor); - }; - - /** - * Pointer at an (optional) input stream used for reading a chunk of data - * whenever the input buffer depletes. - */ - std::istream *inputStream; - - /** - * The read and the peek cursor. - */ - ReadCursor readCursor, peekCursor; - - /** - * Set to true if there is no more input data. - */ - bool depleted; - - /** - * Queue containing the data that has been fed into the char reader. - */ - std::deque buffer; - - /** - * Substitute any combination of linebreaks in the incomming code with "\n". - * Returns true if the current character is meant as output, false - * otherwise. - */ - bool substituteLinebreaks(ReadCursor &cursor, char *c); - - /** - * Reads a character from the input buffer and advances the given read - * cursor. - * - * @param cursor is a reference to the read cursor that should be used - * for reading. - * @param hasChar is set to true, if a character is available, false if - * no character is available (e.g. because line breaks are substituted or - * the end of a buffer boundary is reached -- in this case this function - * should be called again with the same parameters.) - * @param c is a output parameter, which will be set to the read character. - * @param returns true if there was enough data in the buffer, false - * otherwise. - */ - bool readCharacterAtCursor(ReadCursor &cursor, char *c); - - /** - * Function that is called for each read character -- updates the row and - * column count. - */ - void updatePositionCounters(const char c); - -public: - - /** - * Constructor of the buffered char reader class with empty buffer as input. - * This operates the BufferedCharReader in a mode where new data has to be - * fed using the "feed" function and explicitly closed using the "close" - * function. - * - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(int line = 1, int column = 1); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param str is a string containing the input data. - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(const std::string &str, int line, int column); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param str is a string containing the input data. - */ - BufferedCharReader(const std::string &str); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param inputStream is the input stream from which incomming data should - * be read. - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(std::istream &inputStream, int line = 1, int column = 1); - - /** - * Peeks a single character. If called multiple times, returns the - * character after the previously peeked character. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool peek(char *c); - - /** - * Reads a character from the input data. If "peek" was called - * beforehand resets the peek pointer. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool read(char *c); - - /** - * Advances the read pointer to the peek pointer -- so if the "peek" - * function was called, "read" will now return the character after - * the last peeked character. - */ - void consumePeek(); - - /** - * Moves the read cursor to the next non-whitespace character. Returns - * false, if the end of the stream was reached. - * - * @return false if the end of the stream was reached, false othrwise. - */ - bool consumeWhitespace(); - - /** - * Resets the peek pointer to the "read" pointer. - */ - void resetPeek(); - - /** - * Feeds new data into the internal buffer of the BufferedCharReader - * class. Only applicable if the buffered char reader was constructed - * without an input stream or string. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - */ - void feed(const std::string &data); - - /** - * Tells the buffered char reader that no more data will be fed. - * Only applicable if the buffered char reader was constructed without an - * input stream or string. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - */ - void close(); - - /** - * Returns true if there are no more characters as the stream was - * closed. - * - * @return true if there is no more data. - */ - bool atEnd() const; - - /** - * Returns the current line (starting with one). - * - * @return the current line number. - */ - int getLine() const { return readCursor.line; } - - /** - * Returns the current column (starting with one). - * - * @return the current column number. - */ - int getColumn() const { return readCursor.column; } -}; -} - -#endif /* _OUSIA_BUFFERED_CHAR_READER_H_ */ - diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp index 1510f3a..a54d956 100644 --- a/src/core/CSS.hpp +++ b/src/core/CSS.hpp @@ -23,7 +23,7 @@ #include #include -#include +#include #include "Managed.hpp" #include "Node.hpp" diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp index 43c7abb..4190297 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/core/CodeTokenizer.hpp @@ -22,7 +22,7 @@ #include #include -#include "BufferedCharReader.hpp" +#include #include "Tokenizer.hpp" namespace ousia { @@ -108,8 +108,8 @@ public: /** * - * @param input a BufferedCharReader containing the input for this - * tokenizer, as with a regular tokenizer. + * @param input a CharReader containing the input for this tokenizer, as + * with a regular tokenizer. * @param root a TokenTreeNode representing the root of the TokenTree. * Please note that you have to specify all tokenIDs here that you use * in the descriptors map. @@ -120,7 +120,7 @@ public: * and this CodeTokenizer would recognize the token "//" as starting a * line comment. */ - CodeTokenizer(BufferedCharReader &input, const TokenTreeNode &root, + CodeTokenizer(CharReader &input, const TokenTreeNode &root, std::map descriptors) : Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL) { diff --git a/src/core/Exceptions.cpp b/src/core/Exceptions.cpp deleted file mode 100644 index d064f35..0000000 --- a/src/core/Exceptions.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "Exceptions.hpp" - -namespace ousia { - -/* Class LoggableException */ - -std::string LoggableException::formatMessage(const std::string &msg, - const std::string &file, - int line, int column) -{ - std::stringstream ss; - ss << "error "; - if (!file.empty()) { - ss << "while processing \"" << file << "\" "; - } - if (line >= 0) { - ss << "at line " << line << ", "; - if (column >= 0) { - ss << "column " << column << " "; - } - } - ss << "with message: " << msg; - return ss.str(); -} -} - diff --git a/src/core/Exceptions.hpp b/src/core/Exceptions.hpp deleted file mode 100644 index 00d6106..0000000 --- a/src/core/Exceptions.hpp +++ /dev/null @@ -1,162 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file Exceptions.hpp - * - * Describes basic exception classes which are used throughout Ousía. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_EXCEPTIONS_HPP_ -#define _OUSIA_EXCEPTIONS_HPP_ - -namespace ousia { - -/** - * Base exception class all other Ousía exceptions should derive from. - */ -class OusiaException : public std::exception { -private: - /** - * Error message which will be printed by the runtime environment if the - * exception is not caught and handled in the code. - */ - const std::string formatedMessage; - -public: - /** - * Constructor of the OusiaException class. - * - * @param formatedMessage is a formated message that should be printed by - * the runtime environment if the exception is not caught. - */ - OusiaException(std::string formatedMessage) - : formatedMessage(std::move(formatedMessage)) - { - } - - /** - * Virtual destructor. - */ - virtual ~OusiaException() {} - - /** - * Implementation of the std::exception what function and used to retrieve - * the error message that should be printed by the runtime environment. - * - * @return a reference to the formated message string given in the - * constructor. - */ - const char *what() const noexcept override - { - return formatedMessage.c_str(); - } -}; - -/** - * Exception class which can be directly passed to a Logger instance and thus - * makes it simple to handle non-recoverable errors in the code. - */ -class LoggableException : public OusiaException { -private: - /** - * Function used internally to build the formated message that should be - * reported to the runtime environment. - */ - static std::string formatMessage(const std::string &msg, - const std::string &file, int line, - int column); - -public: - /** - * Message describing the error that occured. - */ - const std::string msg; - - /** - * Name of the file in which the error occured. May be empty. - */ - const std::string file; - - /** - * Line at which the exception occured. Negative values are ignored. - */ - const int line; - - /** - * Column at which the exception occured. Negative values are ignored. - */ - const int column; - - /** - * Constructor of the LoggableException class. - * - * @param msg contains the error message. - * @param file provides the context the message refers to. May be empty. - * @param line is the line in the above file the message refers to. - * @param column is the column in the above file the message refers to. - */ - LoggableException(std::string msg, std::string file, int line = -1, - int column = -1) - : OusiaException(formatMessage(msg, file, line, column)), - msg(std::move(msg)), - file(std::move(file)), - line(line), - column(column) - { - } - - /** - * Constructor of the LoggableException class with empty file. - * - * @param msg contains the error message. - * @param line is the line in the above file the message refers to. - * @param column is the column in the above file the message refers to. - */ - LoggableException(std::string msg, int line = -1, int column = -1) - : OusiaException(formatMessage(msg, "", line, column)), - msg(std::move(msg)), - line(line), - column(column) - { - } - - /** - * Constructor of the LoggableException class with empty file and an - * position object. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - LoggableException(std::string msg, const PosType &pos) - : OusiaException( - formatMessage(msg, "", pos.getLine(), pos.getColumn())), - msg(std::move(msg)), - line(pos.getLine()), - column(pos.getColumn()) - { - } -}; -} - -#endif /* _OUSIA_EXCEPTIONS_HPP_ */ - diff --git a/src/core/Logger.cpp b/src/core/Logger.cpp deleted file mode 100644 index 17f55a6..0000000 --- a/src/core/Logger.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include - -#include "Logger.hpp" - -namespace ousia { - -/* Class Logger */ - -void Logger::log(Severity severity, const std::string &msg, - const std::string &file, int line, int column) -{ - // Copy the current severity level - if (static_cast(severity) > static_cast(maxEncounteredSeverity)) { - maxEncounteredSeverity = severity; - } - - // Call the actual log message function if the severity is larger or equal - // to the minimum severity - if (static_cast(severity) >= static_cast(minSeverity)) { - process(Message{severity, msg, file, line, column}); - } -} - -unsigned int Logger::pushFilename(const std::string &name) -{ - filenameStack.push(name); - return filenameStack.size(); -} - -unsigned int Logger::popFilename() -{ - filenameStack.pop(); - return filenameStack.size(); -} - -void Logger::unwindFilenameStack(unsigned int pos) -{ - while (filenameStack.size() > pos && !filenameStack.empty()) { - filenameStack.pop(); - } -} - -/* Class TerminalLogger */ - -/** - * Small class used internally for formated terminal output using ANSI/VT100 - * escape codes on supported terminals. - * - * TODO: Deactivate if using windows or use the corresponding API function. - */ -class Terminal { -private: - /** - * If set to false, no control codes are generated. - */ - bool active; - -public: - static const int BLACK = 30; - static const int RED = 31; - static const int GREEN = 32; - static const int YELLOW = 33; - static const int BLUE = 34; - static const int MAGENTA = 35; - static const int CYAN = 36; - static const int WHITE = 37; - - Terminal(bool active) : active(active) {} - - std::string color(int color, bool bright = true) const - { - if (!active) { - return std::string{}; - } - std::stringstream ss; - ss << "\x1b["; - if (bright) { - ss << "1;"; - } - ss << color << "m"; - return ss.str(); - } - - std::string reset() const - { - if (!active) { - return std::string{}; - } - return "\x1b[0m"; - } -}; - -void TerminalLogger::process(const Message &msg) -{ - Terminal t(useColor); - - // Print the file name - if (msg.hasFile()) { - os << t.color(Terminal::WHITE, true) << msg.file << t.reset(); - } - - // Print line and column number - if (msg.hasLine()) { - if (msg.hasFile()) { - os << ':'; - } - os << t.color(Terminal::WHITE, true) << msg.line - << t.reset(); - if (msg.hasColumn()) { - os << ':' << msg.column; - } - } - - // Print the optional seperator - if (msg.hasFile() || msg.hasLine()) { - os << ": "; - } - - // Print the severity - switch (msg.severity) { - case Severity::DEBUG: - break; - case Severity::NOTE: - os << t.color(Terminal::CYAN, true) << "note: "; - break; - case Severity::WARNING: - os << t.color(Terminal::MAGENTA, true) << "warning: "; - break; - case Severity::ERROR: - os << t.color(Terminal::RED, true) << "error: "; - break; - case Severity::FATAL_ERROR: - os << t.color(Terminal::RED, true) << "fatal: "; - break; - } - os << t.reset(); - - // Print the actual message - os << msg.msg << std::endl; -} -} - diff --git a/src/core/Logger.hpp b/src/core/Logger.hpp deleted file mode 100644 index e6b97f4..0000000 --- a/src/core/Logger.hpp +++ /dev/null @@ -1,609 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file Logger.hpp - * - * Contains classes for logging messages in Ousía. Provides a generic Logger - * class, and TerminalLogger, an extension of Logger which logs do an output - * stream. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_LOGGER_HPP_ -#define _OUSIA_LOGGER_HPP_ - -#include -#include -#include -#include - -#include "Exceptions.hpp" - -namespace ousia { - -/** - * Enum containing the severities used for logging errors and debug messages. - */ -enum class Severity : int { - /** - * Indicates that this message was only printed for debugging. Note that - * in release builds messages with this severity are discarded. - */ - DEBUG = 0, - - /** - * A message which might provide additional information to the user. - */ - NOTE = 1, - - /** - * A message which warns of possible mistakes by the user which might not be - * actual errors but may lead to unintended behaviour. - */ - WARNING = 2, - - /** - * An error occurred while processing, however program execution continues, - * trying to deal with the error situation (graceful degradation). However, - * messages with this severity may be followed up by fatal errors. - */ - ERROR = 3, - - /** - * A fatal error occurred. Program execution cannot continue. - */ - FATAL_ERROR = 4 -}; - -#ifdef NDEBUG -static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::NOTE; -#else -static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::DEBUG; -#endif - -/** - * The Logger class is the base class the individual logging systems should - * derive from. It provides a simple interface for logging errors, warnings and - * notes and filters these according to the set minimum severity. Additionally - * a stack of file names is maintained in order to allow simple descent into - * included files. Note however, that this base Logger class simply discards the - * incomming log messages. Use one of the derived classes to actually handle the - * log messages. - */ -class Logger { -public: - /** - * The message struct represents a single log message and all information - * attached to it. - */ - struct Message { - /** - * Severity of the log message. - */ - Severity severity; - - /** - * Actual log message. - */ - std::string msg; - - /** - * Refers to the file which provides the context for this error message. - * May be empty. - */ - std::string file; - - /** - * Line in the above file the error message refers to. Ignored if - * smaller than zero. - */ - int line; - - /** - * Column in the above file the error message refers to. Ignored if - * smaller than zero. - */ - int column; - - /** - * Constructor of the Message struct. - * - * @param severity describes the message severity. - * @param msg contains the actual message. - * @param file provides the context the message refers to. May be empty. - * @param line is the line in the above file the message refers to. - * @param column is the column in the above file the message refers to. - */ - Message(Severity severity, std::string msg, std::string file, int line, - int column) - : severity(severity), - msg(std::move(msg)), - file(std::move(file)), - line(line), - column(column){}; - - /** - * Returns true if the file string is set. - * - * @return true if the file string is set. - */ - bool hasFile() const { return !file.empty(); } - - /** - * Returns true if the line is set. - * - * @return true if the line number is a non-negative integer. - */ - bool hasLine() const { return line >= 0; } - - /** - * Returns true if column and line are set (since a column has no - * significance without a line number). - * - * @return true if line number and column number are non-negative - * integers. - */ - bool hasColumn() const { return hasLine() && column >= 0; } - }; - -private: - /** - * Minimum severity a log message should have before it is discarded. - */ - Severity minSeverity; - - /** - * Maximum encountered log message severity. - */ - Severity maxEncounteredSeverity; - - /** - * Stack containing the current file names that have been processed. - */ - std::stack filenameStack; - -protected: - /** - * Function to be overriden by child classes to actually display or store - * the messages. The default implementation just discards all incomming - * messages. - * - * @param msg is an instance of the Message struct containing the data that - * should be logged. - */ - virtual void process(const Message &msg){}; - -public: - /** - * Constructor of the Logger class. - * - * @param minSeverity is the minimum severity a log message should have. - * Messages below this severity are discarded. - */ - Logger(Severity minSeverity = DEFAULT_MIN_SEVERITY) - : minSeverity(minSeverity), maxEncounteredSeverity(Severity::DEBUG) - { - } - - Logger(const Logger &) = delete; - - /** - * Virtual destructor. - */ - virtual ~Logger(){}; - - /** - * Logs the given message. Most generic log function. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void log(Severity severity, const std::string &msg, const std::string &file, - int line = -1, int column = -1); - - /** - * Logs the given message. The file name is set to the topmost file name on - * the file name stack. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void log(Severity severity, const std::string &msg, int line = -1, - int column = -1) - { - log(severity, msg, currentFilename(), line, column); - } - - /** - * Logs the given message. The file name is set to the topmost file name on - * the file name stack. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - * @tparam PosType is the actual type of pos and must implement a getLine - * and getColumn function. - */ - template - void logAt(Severity severity, const std::string &msg, const PosType &pos) - { - log(severity, msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs the given loggable exception. - * - * @param ex is the exception that should be logged. - */ - void log(const LoggableException &ex) - { - log(Severity::ERROR, ex.msg, - ex.file.empty() ? currentFilename() : ex.file, ex.line, ex.column); - } - - /** - * Logs a debug message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void debug(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::DEBUG, msg, file, line, column); - } - - /** - * Logs a debug message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void debug(const std::string &msg, int line = -1, int column = -1) - { - debug(msg, currentFilename(), line, column); - } - - /** - * Logs a debug message. The file name is set to the topmost file name on - * the file name stack. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - void debugAt(const std::string &msg, const PosType &pos) - { - debug(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a note. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void note(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::NOTE, msg, file, line, column); - } - - /** - * Logs a note. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void note(const std::string &msg, int line = -1, int column = -1) - { - note(msg, currentFilename(), line, column); - } - - /** - * Logs a note. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - void noteAt(const std::string &msg, const PosType &pos) - { - note(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a warning. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void warning(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::WARNING, msg, file, line, column); - } - - /** - * Logs a warning. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - void warningAt(const std::string &msg, const PosType &pos) - { - warning(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a warning. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void warning(const std::string &msg, int line = -1, int column = -1) - { - warning(msg, currentFilename(), line, column); - } - - /** - * Logs an error message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void error(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::ERROR, msg, file, line, column); - } - - /** - * Logs an error message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void error(const std::string &msg, int line = -1, int column = -1) - { - error(msg, currentFilename(), line, column); - } - - /** - * Logs an error message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - void errorAt(const std::string &msg, const PosType &pos) - { - error(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a fatal error. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void fatalError(const std::string &msg, const std::string &file, - int line = -1, int column = -1) - { - log(Severity::FATAL_ERROR, msg, file, line, column); - } - - /** - * Logs a fatal error. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void fatalError(const std::string &msg, int line = -1, int column = -1) - { - fatalError(msg, currentFilename(), line, column); - } - - /** - * Logs a fatal error. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - void fatalErrorAt(const std::string &msg, const PosType &pos) - { - fatalError(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Pushes a new file name onto the internal filename stack. - * - * @param name is the name of the file that should be added to the filename - * stack. - * @return the size of the filename stack. This number can be passed to the - * "unwindFilenameStack" method in order to return the stack to state it was - * in after this function has been called. - */ - unsigned int pushFilename(const std::string &name); - - /** - * Pops the filename from the internal filename stack. - * - * @return the current size of the filename stack. - */ - unsigned int popFilename(); - - /** - * Pops elements from the filename stack while it has more elements than - * the given number and the stack is non-empty. - * - * @param pos is the position the filename stack should be unwound to. Use - * a number returned by pushFilename. - */ - void unwindFilenameStack(unsigned int pos); - - /** - * Returns the topmost filename from the internal filename stack. - * - * @return the topmost filename from the filename stack or an empty string - * if the filename stack is empty. - */ - std::string currentFilename() - { - return filenameStack.empty() ? std::string{} : filenameStack.top(); - } - - /** - * Returns the maximum severity that was encountered by the Logger but at - * least Severity::DEBUG. - * - * @return the severity of the most severe log message but at least - * Severity::DEBUG. - */ - Severity getMaxEncounteredSeverity() { return maxEncounteredSeverity; } - - /** - * Returns the minimum severity. Messages with a smaller severity are - * discarded. - * - * @return the minimum severity. - */ - Severity getMinSeverity() { return minSeverity; } - - /** - * Sets the minimum severity. Messages with a smaller severity will be - * discarded. Only new messages will be filtered according to the new value. - * - * @param severity is the minimum severity for new log messages. - */ - void setMinSeverity(Severity severity) { minSeverity = severity; } -}; - -/** - * Class extending the Logger class and printing the log messages to the given - * stream. - */ -class TerminalLogger : public Logger { -private: - /** - * Reference to the target output stream. - */ - std::ostream &os; - - /** - * If true, the TerminalLogger will use colors to make the log messages - * prettier. - */ - bool useColor; - -protected: - /** - * Implements the process function and logs the messages to the output. - */ - void process(const Message &msg) override; - -public: - /** - * Constructor of the TerminalLogger class. - * - * @param os is the output stream the log messages should be logged to. - * Should be set to std::cerr in most cases. - * @param useColor if true, the TerminalLogger class will do its best to - * use ANSI/VT100 control sequences for colored log messages. - * @param minSeverity is the minimum severity below which log messages are - * discarded. - */ - TerminalLogger(std::ostream &os, bool useColor = false, - Severity minSeverity = DEFAULT_MIN_SEVERITY) - : Logger(minSeverity), os(os), useColor(useColor) - { - } -}; -} - -#endif /* _OUSIA_LOGGER_HPP_ */ - diff --git a/src/core/Registry.cpp b/src/core/Registry.cpp index 6ff9594..74d1cf8 100644 --- a/src/core/Registry.cpp +++ b/src/core/Registry.cpp @@ -16,8 +16,7 @@ along with this program. If not, see . */ -#include - +#include #include namespace ousia { diff --git a/src/core/Tokenizer.cpp b/src/core/Tokenizer.cpp index b99d1ed..0af5f5a 100644 --- a/src/core/Tokenizer.cpp +++ b/src/core/Tokenizer.cpp @@ -72,7 +72,7 @@ TokenTreeNode::TokenTreeNode(const std::map &inputs) { } -Tokenizer::Tokenizer(BufferedCharReader &input, const TokenTreeNode &root) +Tokenizer::Tokenizer(CharReader &input, const TokenTreeNode &root) : input(input), root(root) { } @@ -81,10 +81,10 @@ bool Tokenizer::prepare() { std::stringstream buffer; char c; - int startColumn = input.getColumn(); - int startLine = input.getLine(); + uint32_t startColumn = input.getColumn(); + uint32_t startLine = input.getLine(); bool bufEmpty = true; - while (input.peek(&c)) { + while (input.peek(c)) { if (root.children.find(c) != root.children.end()) { // if there might be a special token, keep peeking forward // until we find the token (or we don't). @@ -107,7 +107,7 @@ bool Tokenizer::prepare() input.consumePeek(); } } - if (!input.peek(&c)) { + if (!input.peek(c)) { // if we are at the end we break off the search. break; } @@ -153,7 +153,7 @@ bool Tokenizer::prepare() } } else{ //if we found nothing, read at least one character. - input.peek(&c); + input.peek(c); } } buffer << c; diff --git a/src/core/Tokenizer.hpp b/src/core/Tokenizer.hpp index 8f80150..33327cc 100644 --- a/src/core/Tokenizer.hpp +++ b/src/core/Tokenizer.hpp @@ -19,11 +19,12 @@ #ifndef _OUSIA_TOKENIZER_HPP_ #define _OUSIA_TOKENIZER_HPP_ +#include +#include #include #include -#include -#include "BufferedCharReader.hpp" +#include namespace ousia { @@ -120,13 +121,13 @@ static const int TOKEN_TEXT = -2; struct Token { int tokenId; std::string content; - int startColumn; - int startLine; - int endColumn; - int endLine; + uint32_t startColumn; + uint32_t startLine; + uint32_t endColumn; + uint32_t endLine; - Token(int tokenId, std::string content, int startColumn, int startLine, - int endColumn, int endLine) + Token(int tokenId, std::string content, uint32_t startColumn, uint32_t startLine, + uint32_t endColumn, uint32_t endLine) : tokenId(tokenId), content(content), startColumn(startColumn), @@ -160,7 +161,7 @@ struct Token { */ class Tokenizer { private: - BufferedCharReader &input; + CharReader &input; const TokenTreeNode &root; std::deque peeked; unsigned int peekCursor = 0; @@ -185,14 +186,14 @@ protected: public: /** * @param input The input of a Tokenizer is given in the form of a - * BufferedCharReader. Please refer to the respective documentation. + * CharReader. Please refer to the respective documentation. * @param root This is meant to be the root of a TokenTree giving the * specification of user-defined tokens this Tokenizer should recognize. * The Tokenizer promises to not change the TokenTree such that you can * re-use the same specification for multiple inputs. * Please refer to the TokenTreeNode documentation for more information. */ - Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); + Tokenizer(CharReader &input, const TokenTreeNode &root); /** * The next method consumes one Token from the input stream and gives @@ -224,9 +225,9 @@ public: */ void consumePeek(); - const BufferedCharReader &getInput() const { return input; } + const CharReader &getInput() const { return input; } - BufferedCharReader &getInput() { return input; } + CharReader &getInput() { return input; } }; } diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp deleted file mode 100644 index c460ed4..0000000 --- a/src/core/Utils.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include - -#include "Utils.hpp" - -namespace ousia { - -std::string Utils::trim(const std::string &s) -{ - size_t firstNonWhitespace = std::numeric_limits::max(); - size_t lastNonWhitespace = 0; - for (size_t i = 0; i < s.size(); i++) { - if (!isWhitespace(s[i])) { - firstNonWhitespace = std::min(i, firstNonWhitespace); - lastNonWhitespace = std::max(i, lastNonWhitespace); - } - } - - if (firstNonWhitespace < lastNonWhitespace) { - return s.substr(firstNonWhitespace, - lastNonWhitespace - firstNonWhitespace + 1); - } - return std::string{}; -} - -bool Utils::isIdentifier(const std::string &name) -{ - bool first = true; - for (char c : name) { - if (first && !(isAlphabetic(c) || c == '_')) { - return false; - } - if (first && !(isAlphanumeric(c) || c == '_' || c == '-')) { - return false; - } - first = false; - } - return true; -} -} - diff --git a/src/core/Utils.hpp b/src/core/Utils.hpp deleted file mode 100644 index 5332b50..0000000 --- a/src/core/Utils.hpp +++ /dev/null @@ -1,110 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_UTILS_H_ -#define _OUSIA_UTILS_H_ - -#include -#include - -namespace ousia { - -class Utils { -public: - /** - * Returns true if the given character is in [A-Za-z] - */ - static bool isAlphabetic(const char c) - { - return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); - } - - /** - * Returns true if the given character is in [0-9] - */ - static bool isNumeric(const char c) { return (c >= '0') && (c <= '9'); } - - /** - * Returns true if the given character is in [0-9A-Fa-f] - */ - static bool isHexadecimal(const char c) - { - return ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'F')) || - ((c >= 'a') && (c <= 'f')); - } - - /** - * Returns true if the given character is in [A-Za-z0-9] - */ - static bool isAlphanumeric(const char c) - { - return isAlphabetic(c) || isNumeric(c); - } - - /** - * Returns true if the given character is in [A-Za-z_][A-Za-z0-9_-]* - */ - static bool isIdentifier(const std::string &name); - - /** - * Returns true if the given character is a whitespace character. - */ - static bool isWhitespace(const char c) - { - return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); - } - - /** - * Removes whitespace at the beginning and the end of the given string. - */ - static std::string trim(const std::string &s); - - /** - * Turns the elements of a collection into a string separated by the - * given delimiter. - * - * @param es is an iterable container of elements that can be appended to an - * output stream (the << operator must be implemented). - * @param delim is the delimiter that should be used to separate the items. - * @param start is a character sequence that should be prepended to the - * result. - * @param end is a character sequence that should be appended to the result. - */ - template - static std::string join(T es, const std::string &delim, - const std::string &start = "", - const std::string &end = "") - { - std::stringstream res; - bool first = true; - res << start; - for (const auto &e : es) { - if (!first) { - res << delim; - } - res << e; - first = false; - } - res << end; - return res.str(); - } -}; -} - -#endif /* _OUSIA_UTILS_H_ */ - diff --git a/src/core/common/CharReader.cpp b/src/core/common/CharReader.cpp new file mode 100644 index 0000000..373c0c1 --- /dev/null +++ b/src/core/common/CharReader.cpp @@ -0,0 +1,640 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include + +#include "CharReader.hpp" +#include "Utils.hpp" + +namespace ousia { + +/* Helper functions */ + +/** + * istreamReadCallback is used internally by the Buffer calss to stream data + * from an input stream. + * + * @param buf is points a the target memory region. + * @param size is the requested number of bytes. + * @param userData is a pointer at some user defined data. + * @return the actual number of bytes read. If the result is smaller than + * the requested size, this tells the Buffer that the end of the input + * stream is reached. + */ +static size_t istreamReadCallback(char *buf, size_t size, void *userData) +{ + return (static_cast(userData))->read(buf, size).gcount(); +} + +/* Class Buffer */ + +Buffer::Buffer(ReadCallback callback, void *userData) + : callback(callback), + userData(userData), + reachedEnd(false), + startBucket(buckets.end()), + endBucket(buckets.end()), + startOffset(0), + firstDead(0) +{ + // Load a first block of data from the stream + stream(); + startBucket = buckets.begin(); +} + +Buffer::Buffer(std::istream &istream) : Buffer(istreamReadCallback, &istream) {} + +Buffer::Buffer(const std::string &str) + : callback(nullptr), + userData(nullptr), + reachedEnd(true), + startBucket(buckets.end()), + endBucket(buckets.end()), + startOffset(0), + firstDead(0) +{ + // Copy the given string into a first buffer and set the start buffer + // correctly + Bucket &bucket = nextBucket(); + bucket.resize(str.size()); + std::copy(str.begin(), str.end(), bucket.begin()); + startBucket = buckets.begin(); +} + +#ifndef NDEBUG +Buffer::~Buffer() +{ + // Make sure all cursors have been deleted + for (bool cursor_alive: alive) { + assert(!cursor_alive); + } +} +#endif + +void Buffer::advance(BucketIterator &it) +{ + it++; + if (it == buckets.end()) { + it = buckets.begin(); + } +} + +void Buffer::advance(BucketList::const_iterator &it) const +{ + it++; + if (it == buckets.cend()) { + it = buckets.cbegin(); + } +} + +Buffer::Bucket &Buffer::nextBucket() +{ + constexpr size_t MAXVAL = std::numeric_limits::max(); + + // Fetch the minimum bucket index + size_t minBucketIdx = MAXVAL; + for (size_t i = 0; i < cursors.size(); i++) { + if (alive[i]) { + // Fetch references to the bucket and the cursor + const Cursor &cur = cursors[i]; + const Bucket &bucket = *(cur.bucket); + + // Increment the bucket index by one, if the cursor is at the end + // of the bucket (only valid if the LOOKBACK_SIZE is set to zero) + size_t bIdx = cur.bucketIdx; + if (LOOKBACK_SIZE == 0 && cur.bucketOffs == bucket.size()) { + bIdx++; + } + + // Decrement the bucket index by one, if the previous bucket still + // needs to be reached and cannot be overridden + if (bIdx > 0 && cur.bucketOffs < LOOKBACK_SIZE) { + bIdx--; + } + + // Set the bucket index to the minium + minBucketIdx = std::min(minBucketIdx, bIdx); + } + } + + // If there is space between the current start bucket and the read + // cursor, the start bucket can be safely overridden. + if (minBucketIdx > 0 && minBucketIdx != MAXVAL) { + // All cursor bucket indices will be decreased by one + for (size_t i = 0; i < cursors.size(); i++) { + cursors[i].bucketIdx--; + } + + // Increment the start offset + startOffset += startBucket->size(); + + // The old start bucket is the new end bucket + endBucket = startBucket; + + // Advance the start bucket, wrap around at the end of the list + advance(startBucket); + } else { + // No free bucket, insert a new one before the start bucket + endBucket = buckets.emplace(startBucket); + } + return *endBucket; +} + +Buffer::CursorId Buffer::nextCursor() +{ + bool hasCursor = false; + CursorId res = 0; + + // Search for the next free cursor starting with minNextCursorId + for (size_t i = firstDead; i < alive.size(); i++) { + if (!alive[i]) { + res = i; + hasCursor = true; + break; + } + } + + // Add a new cursor to the cursor list if no cursor is currently free + if (!hasCursor) { + res = cursors.size(); + cursors.resize(res + 1); + alive.resize(res + 1); + } + + // The next dead cursor is at least the next cursor + firstDead = res + 1; + + // Mark the new cursor as alive + alive[res] = true; + + return res; +} + +void Buffer::stream() +{ + // Fetch the bucket into which the data should be inserted, make sure it + // has the correct size + Bucket &tar = nextBucket(); + tar.resize(REQUEST_SIZE); + + // Read data from the stream into the target buffer + size_t size = callback(tar.data(), REQUEST_SIZE, userData); + + // If not enough bytes were returned, we're at the end of the stream + if (size < REQUEST_SIZE) { + tar.resize(size); + reachedEnd = true; + } +} + +Buffer::CursorId Buffer::createCursor() +{ + CursorId res = nextCursor(); + cursors[res].bucket = startBucket; + cursors[res].bucketIdx = 0; + cursors[res].bucketOffs = 0; + return res; +} + +Buffer::CursorId Buffer::createCursor(Buffer::CursorId ref) +{ + CursorId res = nextCursor(); + cursors[res] = cursors[ref]; + return res; +} + +void Buffer::copyCursor(Buffer::CursorId from, Buffer::CursorId to) +{ + cursors[to] = cursors[from]; +} + +void Buffer::deleteCursor(Buffer::CursorId cursor) +{ + alive[cursor] = false; + firstDead = std::min(firstDead, cursor); +} + +size_t Buffer::offset(Buffer::CursorId cursor) const +{ + const Cursor &cur = cursors[cursor]; + size_t offs = startOffset + cur.bucketOffs; + BucketList::const_iterator it = startBucket; + while (it != cur.bucket) { + offs += it->size(); + advance(it); + } + return offs; +} + +size_t Buffer::moveForward(CursorId cursor, size_t relativeOffs) +{ + size_t offs = relativeOffs; + Cursor &cur = cursors[cursor]; + while (offs > 0) { + // Fetch the current bucket of the cursor + Bucket &bucket = *(cur.bucket); + + // If there is enough space in the bucket, simply increment the bucket + // offset by the given relative offset + const size_t space = bucket.size() - cur.bucketOffs; + if (space >= offs) { + cur.bucketOffs += offs; + break; + } else { + // Go to the end of the current bucket otherwise + offs -= space; + cur.bucketOffs = bucket.size(); + + // Go to the next bucket + if (cur.bucket != endBucket) { + // Go to the next bucket + advance(cur.bucket); + cur.bucketIdx++; + cur.bucketOffs = 0; + } else { + // Abort, if there is no more data to stream, otherwise just + // load new data + if (reachedEnd) { + return relativeOffs - offs; + } + stream(); + } + } + } + return relativeOffs; +} + +size_t Buffer::moveBackward(CursorId cursor, size_t relativeOffs) +{ + size_t offs = relativeOffs; + Cursor &cur = cursors[cursor]; + while (offs > 0) { + // If there is enough space in the bucket, simply decrement the bucket + // offset by the given relative offset + if (cur.bucketOffs >= offs) { + cur.bucketOffs -= offs; + break; + } else { + // Go to the beginning of the current bucket otherwise + offs -= cur.bucketOffs; + cur.bucketOffs = 0; + + // Abort if there is no more bucket to got back to + if (cur.bucketIdx == 0) { + return relativeOffs - offs; + } + + // Go to the previous bucket (wrap around at the beginning of the + // list) + if (cur.bucket == buckets.begin()) { + cur.bucket = buckets.end(); + } + cur.bucket--; + + // Decrement the bucket index, and set the current offset to the + // end of the new bucket + cur.bucketIdx--; + cur.bucketOffs = cur.bucket->size(); + } + } + return relativeOffs; +} + +ssize_t Buffer::moveCursor(CursorId cursor, ssize_t relativeOffs) +{ + if (relativeOffs > 0) { + return moveForward(cursor, relativeOffs); + } else if (relativeOffs < 0) { + return -moveBackward(cursor, -relativeOffs); + } else { + return 0; + } +} + +bool Buffer::atEnd(Buffer::CursorId cursor) const +{ + const Cursor &c = cursors[cursor]; + return reachedEnd && + (c.bucket == endBucket && c.bucketOffs == endBucket->size()); +} + +bool Buffer::fetchCharacter(CursorId cursor, char &c, bool incr) +{ + Cursor &cur = cursors[cursor]; + while (true) { + // Reference at the current bucket + Bucket &bucket = *(cur.bucket); + + // If there is still data in the current bucket, return this data + if (cur.bucketOffs < bucket.size()) { + c = bucket[cur.bucketOffs]; + if (incr) { + cur.bucketOffs++; + } + return true; + } else if (cur.bucket == endBucket) { + // Return false if the end of the stream has been reached, otherwise + // load new data + if (reachedEnd) { + return false; + } + stream(); + } + + // Go to the next bucket + cur.bucketIdx++; + cur.bucketOffs = 0; + advance(cur.bucket); + } +} + +bool Buffer::read(Buffer::CursorId cursor, char &c) +{ + return fetchCharacter(cursor, c, true); +} + +bool Buffer::fetch(CursorId cursor, char &c) +{ + return fetchCharacter(cursor, c, false); +} + +/* CharReader::Cursor class */ + +void CharReader::Cursor::assign(std::shared_ptr buffer, + CharReader::Cursor &cursor) +{ + // Copy the cursor position + buffer->copyCursor(cursor.cursor, this->cursor); + + // Copy the state + line = cursor.line; + column = cursor.column; +} + +/* CharReader class */ + +CharReader::CharReader(std::shared_ptr buffer, size_t line, + size_t column) + : buffer(buffer), + readCursor(buffer->createCursor(), line, column), + peekCursor(buffer->createCursor(), line, column), + coherent(true) +{ +} + +CharReader::CharReader(const std::string &str, size_t line, size_t column) + : CharReader(std::shared_ptr{new Buffer{str}}, line, column) +{ +} + +CharReader::CharReader(std::istream &istream, size_t line, size_t column) + : CharReader(std::shared_ptr{new Buffer{istream}}, line, column) +{ +} + +CharReader::~CharReader() +{ + buffer->deleteCursor(readCursor.cursor); + buffer->deleteCursor(peekCursor.cursor); +} + +bool CharReader::readAtCursor(Cursor &cursor, char &c) +{ + // Return false if we're at the end of the stream + if (!buffer->read(cursor.cursor, c)) { + return false; + } + + // Substitute linebreak sequences with a single '\n' + if (c == '\n' || c == '\r') { + // Output a single \n + c = '\n'; + + // Check whether the next character is a continuation of the + // current character + char c2; + if (buffer->read(cursor.cursor, c2)) { + if ((c2 != '\n' && c2 != '\r') || c2 == c) { + buffer->moveCursor(cursor.cursor, -1); + } + } + } + + // Count lines and columns + if (c == '\n') { + // A linebreak was reached, go to the next line + cursor.line++; + cursor.column = 1; + } else { + // Ignore UTF-8 continuation bytes + if (!((c & 0x80) && !(c & 0x40))) { + cursor.column++; + } + } + return true; +} + +bool CharReader::peek(char &c) +{ + // If the reader was coherent, update the peek cursor state + if (coherent) { + peekCursor.assign(buffer, readCursor); + coherent = false; + } + + // Read a character from the peek cursor + return readAtCursor(peekCursor, c); +} + +bool CharReader::read(char &c) +{ + // Read a character from the buffer at the current read cursor + bool res = readAtCursor(readCursor, c); + + // Set the peek position to the current read position, if reading was not + // coherent + if (!coherent) { + peekCursor.assign(buffer, readCursor); + coherent = true; + } else { + buffer->copyCursor(readCursor.cursor, peekCursor.cursor); + } + + // Return the result of the read function + return res; +} + +void CharReader::resetPeek() +{ + if (!coherent) { + peekCursor.assign(buffer, readCursor); + coherent = true; + } +} + +void CharReader::consumePeek() +{ + if (!coherent) { + readCursor.assign(buffer, peekCursor); + coherent = true; + } +} + +bool CharReader::consumeWhitespace() +{ + char c; + while (peek(c)) { + if (!Utils::isWhitespace(c)) { + resetPeek(); + return true; + } + consumePeek(); + } + return false; +} + +CharReaderFork CharReader::fork() +{ + return CharReaderFork(buffer, readCursor, peekCursor, coherent); +} + +CharReader::Context CharReader::getContext(ssize_t maxSize) +{ + // Clone the current read cursor + Buffer::CursorId cur = buffer->createCursor(readCursor.cursor); + + // Fetch the start position of the search + ssize_t offs = buffer->offset(cur); + ssize_t start = offs; + ssize_t end = offs; + char c; + + // Search the beginning of the line with the last non-whitespace character + bool hadNonWhitespace = false; + bool foundBegin = false; + for (ssize_t i = 0; i < maxSize; i++) { + // Fetch the character at the current position + if (buffer->fetch(cur, c)) { + // Abort, at linebreaks if we found a non-linebreak character + hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c); + if (hadNonWhitespace && (c == '\n' || c == '\r')) { + buffer->moveCursor(cur, 1); + start++; + foundBegin = true; + break; + } + } + if (buffer->moveCursor(cur, -1) == 0) { + foundBegin = true; + break; + } else { + // Update the start position and the hadNonWhitespace flag + start--; + } + } + + // Search the end of the line + buffer->moveCursor(cur, offs - start); + bool foundEnd = false; + for (ssize_t i = 0; i < maxSize; i++) { + // Increment the end counter if a character was read, abort if the end + // of the stream has been reached + if (buffer->read(cur, c)) { + end++; + } else { + foundEnd = true; + break; + } + + // Abort on linebreak characters + if (c == '\n' || c == '\r') { + foundEnd = true; + break; + } + } + + // Calculate the truncated start and end position and limit the number of + // characters to the maximum number of characters + ssize_t tStart = start; + ssize_t tEnd = end; + if (tEnd - tStart > maxSize) { + tStart = std::max(offs - maxSize / 2, tStart); + tEnd = tStart + maxSize; + } + + // Try to go to the calculated start position and fetch the actual start + // position + ssize_t aStart = end + buffer->moveCursor(cur, tStart - end); + if (aStart > tStart) { + tEnd = tEnd + (aStart - tStart); + tStart = aStart; + } + + // Read one line + std::stringstream ss; + size_t relPos = 0; + for (ssize_t i = tStart; i < tEnd; i++) { + if (buffer->read(cur, c)) { + // Break once a linebreak is reached + if (c == '\n' || c == '\r') { + break; + } + + // Add the current character to the output + ss << c; + + // Increment the string-relative offset as long as the original + // offset is not reached in the for loop + if (i < offs) { + relPos++; + } + } + } + + // Delete the newly created cursor + buffer->deleteCursor(cur); + + return CharReader::Context{ss.str(), relPos, !foundBegin || tStart != start, + !foundEnd || tEnd != end}; +} + +/* Class CharReaderFork */ + +CharReaderFork::CharReaderFork(std::shared_ptr buffer, + CharReader::Cursor &parentReadCursor, + CharReader::Cursor &parentPeekCursor, + bool coherent) + : CharReader(buffer, 1, 1), + parentReadCursor(parentReadCursor), + parentPeekCursor(parentPeekCursor) +{ + readCursor.assign(buffer, parentReadCursor); + peekCursor.assign(buffer, parentPeekCursor); + this->coherent = coherent; +} + +void CharReaderFork::commit() +{ + parentReadCursor.assign(buffer, readCursor); + parentPeekCursor.assign(buffer, peekCursor); +} +} + diff --git a/src/core/common/CharReader.hpp b/src/core/common/CharReader.hpp new file mode 100644 index 0000000..3cbe4b4 --- /dev/null +++ b/src/core/common/CharReader.hpp @@ -0,0 +1,665 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file CharReader.hpp + * + * Used within all parsers to read single characters from an underlying stream. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_CHAR_READER_HPP_ +#define _OUSIA_CHAR_READER_HPP_ + +#include +#include +#include +#include + +namespace ousia { + +/** + * A chunked ring buffer used in CharReader to provide access to an input stream + * with multiple read cursors. The Buffer automatically expands to the + * size of the spanned by the read cursors while reusing already allocated + * memory. + */ +class Buffer { +public: + /** + * Callback function which is called whenever new data is requested from the + * input stream. + * + * @param buf is points a the target memory region. + * @param size is the requested number of bytes. + * @param userData is a pointer at some user defined data given in the + * constructor. + * @return the actual number of bytes read. If the result is smaller than + * the requested size, this tells the Buffer that the end of the input + * stream is reached. + */ + using ReadCallback = size_t (*)(char *buf, size_t size, void *userData); + + /** + * Handle used to identify a cursor. + */ + using CursorId = size_t; + +private: + /** + * Number of bytes to request from the input stream. Set to 64 KiB because + * this seems to be a nice value for I/O operations according to multiple + * sources. + */ + static constexpr size_t REQUEST_SIZE = 64 * 1024; + + /** + * Number of bytes the buffer guarantees to be capable of looking back + * for extracting the current context. + */ + static constexpr size_t LOOKBACK_SIZE = 128; + + /** + * Type used internally to represent one chunk of memory. + */ + using Bucket = std::vector; + + /** + * Type used internally to represent a bucket container. + */ + using BucketList = std::list; + + /** + * Type used internally for representing iterators in the bucket list. + */ + using BucketIterator = BucketList::iterator; + + /** + * Type used internally to represent a read cursor. + */ + struct Cursor { + /** + * Iterator pointing at the current bucket. + */ + BucketIterator bucket; + + /** + * Index of the bucket relative to the start bucket. + */ + size_t bucketIdx; + + /** + * Current offset within that bucket. + */ + size_t bucketOffs; + }; + + /** + * List of buckets containing the buffered memory. + */ + BucketList buckets; + + /** + * List of cursors used to access the memory. Note that cursors can be + * marked as inactive and reused lateron (to avoid having to resize the + * vector). + */ + std::vector cursors; + + /** + * Bitfield specifying which of the cursors is actually valid. + */ + std::vector alive; + + /** + * Function to be called whenever new data is needed. Set to nullptr if the + * Buffer is not backed by an input stream. + */ + const ReadCallback callback; + + /** + * User data given in the constructor. + */ + void *userData; + + /** + * Set to true if the input stream is at its end. + */ + bool reachedEnd; + + /** + * Iterator pointing at the current start bucket. + */ + BucketIterator startBucket; + + /** + * Iterator pointing at the last bucket. + */ + BucketIterator endBucket; + + /** + * Byte offset of the start bucket relative to the beginning of the stream. + */ + size_t startOffset; + + /** + * Points at the smallest possible available cursor index, yet does not + * guarantee that this cursor index actuall is free. + */ + CursorId firstDead; + + /** + * Advances the bucket iterator, cares about wrapping around in the ring. + */ + void advance(BucketIterator &it); + + /** + * Advances the bucket iterator, cares about wrapping around in the ring. + */ + void advance(BucketList::const_iterator &it) const; + + /** + * Internally used to find the next free cursor in the cursors vector. The + * cursor is marked as active. + * + * @return the next free cursor index. + */ + CursorId nextCursor(); + + /** + * Returns a reference at the next bucket into which data should be + * inserted. + * + * @return a bucket into which the data can be inserted. + */ + Bucket &nextBucket(); + + /** + * Reads data from the input stream and places it in the next free buffer. + */ + void stream(); + + /** + * Moves the given cursor forward. + */ + size_t moveForward(CursorId cursor, size_t relativeOffs); + + /** + * Moves the given cursor backward. + */ + size_t moveBackward(CursorId cursor, size_t relativeOffs); + + /** + * Reads a character from the current cursor position and optionally + * advances. + */ + bool fetchCharacter(CursorId cursor, char &c, bool incr); + +public: + /** + * Intializes the Buffer with a reference to a ReadCallback that is used + * to fetch data from an underlying input stream. + * + * @param callback is the function that will be called whenever data is read + * from the ring buffer and the buffer does not hold enough data to fulfill + * this read request. + * @param userData is a pointer to user defined data which will be passed to + * the callback function. + */ + Buffer(ReadCallback callback, void *userData); + + /** + * Initializes the Buffer with a reference to an std::istream from which + * data will be read. + * + * @param istream is the input stream from which the data should be read. + */ + Buffer(std::istream &istream); + + /** + * Initializes the Buffer with the contents of the given string, after + * this operation the Buffer has a fixed size. + * + * @param str is the string containing the data that should be copied into + * the ring buffer. + */ + Buffer(const std::string &str); + +#ifndef NDEBUG + /** + * Destructor of the Buffer class. Makes sure that all cursors have been + * freed. + */ + ~Buffer(); +#endif + + // No copy + Buffer(const Buffer &) = delete; + + // No assign + Buffer &operator=(const Buffer &) = delete; + + /** + * Creates a new read cursor positioned at the smallest possible position + * in the ring buffer. + */ + CursorId createCursor(); + + /** + * Creates a new read cursor positioned at the same position as the given + * read cursor. + * + * @param ref is the read cursor that should be used as reference for the + * new read cursor. + */ + CursorId createCursor(CursorId ref); + + /** + * Copies the position of one cursor to another cursor. + * + * @param from is the cursor id of which the position should be copied. + * @param to is the cursor id to which the position should be copied. + */ + void copyCursor(CursorId from, CursorId to); + + /** + * Deletes the cursor with the given id. The cursor may no longer be used + * after this function has been called. + * + * @param cursor is the id of the cursor that should be freed. + */ + void deleteCursor(CursorId cursor); + + /** + * Moves a cursor by offs bytes. Note that moving backwards is theoretically + * limited by the LOOKBACK_SIZE of the Buffer, practically it will most + * likely be limited by the REQUEST_SIZE, so you can got at most 64 KiB + * backwards. + * + * @param cursor is the cursor that should be moved. + * @param relativeOffs is a positive or negative integer number specifying + * the number of bytes the cursor should be moved forward (positive numbers) + * or backwards (negative numbers). + * @return the actual number of bytes the cursor was moved. This number is + * smaller than the relativeOffs given in the constructor if the + */ + ssize_t moveCursor(CursorId cursor, ssize_t relativeOffs); + + /** + * Returns the current byte offset of the given cursor relative to the + * beginning of the stream. + * + * @param cursor is the cursor for which the byte offset relative to the + * beginning of the stream should be returned. + * @return the number of bytes since the beginning of the stream for the + * given cursor. + */ + size_t offset(CursorId cursor) const; + + /** + * Returns true if the given cursor currently is at the end of the stream. + * + * @param cursor is the cursor for which the atEnd flag should be returned. + * @return true if the there are no more bytes for this cursor. If false + * is returned, this means that there may be more bytes in the stream, + * nevertheless the end of the stream may be hit once the next read function + * is called. + */ + bool atEnd(CursorId cursor) const; + + /** + * Reads a single character from the ring buffer from the given cursor and + * moves to the next character. + * + * @param cursor specifies the cursor from which the data should be read. + * The cursor will be advanced by one byte. + * @param c is the character into which the data needs to be read. + * @return true if a character was read, false if the end of the stream has + * been reached. + */ + bool read(CursorId cursor, char &c); + + /** + * Returns a single character from the ring buffer from the current cursor + * position and stays at that position. + * + * @param cursor specifies the cursor from which the data should be read. + * The cursor will be advanced by one byte. + * @param c is the character into which the data needs to be read. + * @return true if a character could be fetched, false if the end of the + * stream has been reached. + */ + bool fetch(CursorId cursor, char &c); +}; + +// Forward declaration +class CharReaderFork; + +/** + * Used within parsers for convenient access to single characters in an input + * stream or buffer. It allows reading and peeking single characters from a + * buffer. Additionally it counts the current column/row (with correct handling + * for UTF-8) and contains an internal state machine that handles the detection + * of linebreaks and converts these to a single '\n'. + */ +class CharReader { +public: + /** + * The context struct is used to represent the current context the char + * reader is in. This context can for example be used when building error + * messages. + */ + struct Context { + /** + * Set to the content of the current line. + */ + std::string line; + + /** + * Relative position (in characters) within that line. + */ + size_t relPos; + + /** + * Set to true if the beginning of the line has been truncated (because + * the reader position is too far away from the actual position of the + * line). + */ + bool truncatedStart; + + /** + * Set to true if the end of the line has been truncated (because the + * reader position is too far away from the actual end position of the + * line. + */ + bool truncatedEnd; + + Context() + : line(), relPos(0), truncatedStart(false), truncatedEnd(false) + { + } + + Context(std::string line, size_t relPos, bool truncatedStart, + bool truncatedEnd) + : line(std::move(line)), + relPos(relPos), + truncatedStart(truncatedStart), + truncatedEnd(truncatedEnd) + { + } + }; + +protected: + /** + * Internally used cursor structure for managing the read and the peek + * cursor. + */ + struct Cursor { + /** + * Corresponding cursor in the underlying buffer instance. + */ + const Buffer::CursorId cursor; + + /** + * Current line the cursor is in. + */ + uint32_t line; + + /** + * Current column the cursor is in. + */ + uint32_t column; + + /** + * Constructor of the Cursor class. + * + * @param cursor is the underlying cursor in the Buffer instance. + */ + Cursor(Buffer::CursorId cursor, size_t line, size_t column) + : cursor(cursor), line(line), column(column) + { + } + + /** + * Assigns one cursor to another. + * + * @param buffer is the underlying buffer instance the internal cursor + * belongs to. + * @param cursor is the cursor from which the state should be copied. + */ + void assign(std::shared_ptr buffer, Cursor &cursor); + }; + +private: + /** + * Substitutes "\r", "\n\r", "\r\n" with a single "\n". + * + * @param cursor is the cursor from which the character should be read. + * @param c a reference to the character that should be written. + * @return true if another character needs to be read. + */ + bool substituteLinebreaks(Cursor &cursor, char &c); + + /** + * Reads a single character from the given cursor. + * + * @param cursor is the cursor from which the character should be read. + * @param c a reference to the character that should be written. + * @return true if a character was read, false if the end of the stream has + * been reached. + */ + bool readAtCursor(Cursor &cursor, char &c); + +protected: + /** + * Reference pointing at the underlying buffer. + */ + std::shared_ptr buffer; + + /** + * Cursor used for reading. + */ + Cursor readCursor; + + /** + * Cursor used for peeking. + */ + Cursor peekCursor; + + /** + * Set to true as long the underlying Buffer cursor is at the same position + * for the read and the peek cursor. This is only used for optimization + * purposes and makes consecutive reads a bit faster. + */ + bool coherent; + + /** + * Protected constructor of the CharReader base class. Creates new read + * and peek cursors for the given buffer. + * + * @param buffer is a reference to the underlying Buffer class responsible + * for allowing to read from a single input stream from multiple locations. + */ + CharReader(std::shared_ptr buffer, size_t line, size_t column); + +public: + /** + * Creates a new CharReader instance from a string. + * + * @param str is a string containing the input data. + * @param line is the start line. + * @param column is the start column. + */ + CharReader(const std::string &str, size_t line = 1, size_t column = 1); + + /** + * Creates a new CharReader instance for an input stream. + * + * @param istream is the input stream from which incomming data should be + * read. + * @param line is the start line. + * @param column is the start column. + */ + CharReader(std::istream &istream, size_t line = 1, size_t column = 1); + + /** + * Deletes the used cursors from the underlying buffer instance. + */ + ~CharReader(); + + // No copy + CharReader(const Buffer &) = delete; + + // No assign + CharReader &operator=(const Buffer &) = delete; + + /** + * Peeks a single character. If called multiple times, returns the + * character after the previously peeked character. + * + * @param c is a reference to the character to which the result should be + * written. + * @return true if the character was successfully read, false if there are + * no more characters to be read in the buffer. + */ + bool peek(char &c); + + /** + * Reads a character from the input data. If "peek" was called + * beforehand resets the peek pointer. + * + * @param c is a reference to the character to which the result should be + * written. + * @return true if the character was successfully read, false if there are + * no more characters to be read in the buffer. + */ + bool read(char &c); + + /** + * Resets the peek pointer to the "read" pointer. + */ + void resetPeek(); + + /** + * Advances the read pointer to the peek pointer -- so if the "peek" + * function was called, "read" will now return the character after + * the last peeked character. + */ + void consumePeek(); + + /** + * Moves the read cursor to the next non-whitespace character. Returns + * false, if the end of the stream was reached. + * + * @return false if the end of the stream was reached, false othrwise. + */ + bool consumeWhitespace(); + + /** + * Creates a new CharReader located at the same position as this CharReader + * instance, yet the new CharReader can be used independently of this + * CharReader. Use the "commit" function of the returned CharReader to + * copy the state of the forked CharReaderFork to this CharReader. + * + * @return a CharReaderFork instance positioned at the same location as this + * CharReader instance. + */ + CharReaderFork fork(); + + /** + * Returns true if there are no more characters as the stream was + * closed. + * + * @return true if there is no more data. + */ + bool atEnd() const { return buffer->atEnd(readCursor.cursor); } + + /** + * Returns the current line (starting with one). + * + * @return the current line number. + */ + uint32_t getLine() const { return readCursor.line; } + + /** + * Returns the current column (starting with one). + * + * @return the current column number. + */ + uint32_t getColumn() const { return readCursor.column; } + + /** + * Returns the current byte offset of the read cursor. + * + * @return the byte position within the stream. + */ + size_t getOffset() const { return buffer->offset(readCursor.cursor); }; + + /** + * Returns the line the read cursor currently is in, but at most the + * given number of characters in the form of a Context structure. + */ + Context getContext(ssize_t maxSize); +}; + +/** + * A CharReaderFork is returned whenever the "fork" function of the CharReader + * class is used. Its "commit" function can be used to move the underlying + * CharReader instance to the location of the CharReaderFork instance. Otherwise + * the read location of the underlying CharReader is left unchanged. + */ +class CharReaderFork : public CharReader { +private: + friend CharReader; + + /** + * The reader cursor of the underlying CharReader instance. + */ + CharReader::Cursor &parentReadCursor; + + /** + * The peek cursor of the underlying CharReader instance. + */ + CharReader::Cursor &parentPeekCursor; + + /** + * Constructor of the CharReaderFork class. + * + * @param buffer is a reference at the parent Buffer instance. + * @param parentPeekCursor is a reference at the parent read cursor. + * @param parentPeekCursor is a reference at the parent peek cursor. + * @param coherent specifies whether the char reader cursors are initialized + * coherently. + */ + CharReaderFork(std::shared_ptr buffer, + CharReader::Cursor &parentReadCursor, + CharReader::Cursor &parentPeekCursor, bool coherent); + +public: + /** + * Moves the read and peek cursor of the parent CharReader to the location + * of the read and peek cursor in the fork. + */ + void commit(); +}; + +} + +#endif /* _OUSIA_CHAR_READER_HPP_ */ + diff --git a/src/core/common/Exceptions.cpp b/src/core/common/Exceptions.cpp new file mode 100644 index 0000000..d064f35 --- /dev/null +++ b/src/core/common/Exceptions.cpp @@ -0,0 +1,46 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "Exceptions.hpp" + +namespace ousia { + +/* Class LoggableException */ + +std::string LoggableException::formatMessage(const std::string &msg, + const std::string &file, + int line, int column) +{ + std::stringstream ss; + ss << "error "; + if (!file.empty()) { + ss << "while processing \"" << file << "\" "; + } + if (line >= 0) { + ss << "at line " << line << ", "; + if (column >= 0) { + ss << "column " << column << " "; + } + } + ss << "with message: " << msg; + return ss.str(); +} +} + diff --git a/src/core/common/Exceptions.hpp b/src/core/common/Exceptions.hpp new file mode 100644 index 0000000..00d6106 --- /dev/null +++ b/src/core/common/Exceptions.hpp @@ -0,0 +1,162 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Exceptions.hpp + * + * Describes basic exception classes which are used throughout Ousía. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_EXCEPTIONS_HPP_ +#define _OUSIA_EXCEPTIONS_HPP_ + +namespace ousia { + +/** + * Base exception class all other Ousía exceptions should derive from. + */ +class OusiaException : public std::exception { +private: + /** + * Error message which will be printed by the runtime environment if the + * exception is not caught and handled in the code. + */ + const std::string formatedMessage; + +public: + /** + * Constructor of the OusiaException class. + * + * @param formatedMessage is a formated message that should be printed by + * the runtime environment if the exception is not caught. + */ + OusiaException(std::string formatedMessage) + : formatedMessage(std::move(formatedMessage)) + { + } + + /** + * Virtual destructor. + */ + virtual ~OusiaException() {} + + /** + * Implementation of the std::exception what function and used to retrieve + * the error message that should be printed by the runtime environment. + * + * @return a reference to the formated message string given in the + * constructor. + */ + const char *what() const noexcept override + { + return formatedMessage.c_str(); + } +}; + +/** + * Exception class which can be directly passed to a Logger instance and thus + * makes it simple to handle non-recoverable errors in the code. + */ +class LoggableException : public OusiaException { +private: + /** + * Function used internally to build the formated message that should be + * reported to the runtime environment. + */ + static std::string formatMessage(const std::string &msg, + const std::string &file, int line, + int column); + +public: + /** + * Message describing the error that occured. + */ + const std::string msg; + + /** + * Name of the file in which the error occured. May be empty. + */ + const std::string file; + + /** + * Line at which the exception occured. Negative values are ignored. + */ + const int line; + + /** + * Column at which the exception occured. Negative values are ignored. + */ + const int column; + + /** + * Constructor of the LoggableException class. + * + * @param msg contains the error message. + * @param file provides the context the message refers to. May be empty. + * @param line is the line in the above file the message refers to. + * @param column is the column in the above file the message refers to. + */ + LoggableException(std::string msg, std::string file, int line = -1, + int column = -1) + : OusiaException(formatMessage(msg, file, line, column)), + msg(std::move(msg)), + file(std::move(file)), + line(line), + column(column) + { + } + + /** + * Constructor of the LoggableException class with empty file. + * + * @param msg contains the error message. + * @param line is the line in the above file the message refers to. + * @param column is the column in the above file the message refers to. + */ + LoggableException(std::string msg, int line = -1, int column = -1) + : OusiaException(formatMessage(msg, "", line, column)), + msg(std::move(msg)), + line(line), + column(column) + { + } + + /** + * Constructor of the LoggableException class with empty file and an + * position object. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + LoggableException(std::string msg, const PosType &pos) + : OusiaException( + formatMessage(msg, "", pos.getLine(), pos.getColumn())), + msg(std::move(msg)), + line(pos.getLine()), + column(pos.getColumn()) + { + } +}; +} + +#endif /* _OUSIA_EXCEPTIONS_HPP_ */ + diff --git a/src/core/common/Logger.cpp b/src/core/common/Logger.cpp new file mode 100644 index 0000000..17f55a6 --- /dev/null +++ b/src/core/common/Logger.cpp @@ -0,0 +1,161 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include + +#include "Logger.hpp" + +namespace ousia { + +/* Class Logger */ + +void Logger::log(Severity severity, const std::string &msg, + const std::string &file, int line, int column) +{ + // Copy the current severity level + if (static_cast(severity) > static_cast(maxEncounteredSeverity)) { + maxEncounteredSeverity = severity; + } + + // Call the actual log message function if the severity is larger or equal + // to the minimum severity + if (static_cast(severity) >= static_cast(minSeverity)) { + process(Message{severity, msg, file, line, column}); + } +} + +unsigned int Logger::pushFilename(const std::string &name) +{ + filenameStack.push(name); + return filenameStack.size(); +} + +unsigned int Logger::popFilename() +{ + filenameStack.pop(); + return filenameStack.size(); +} + +void Logger::unwindFilenameStack(unsigned int pos) +{ + while (filenameStack.size() > pos && !filenameStack.empty()) { + filenameStack.pop(); + } +} + +/* Class TerminalLogger */ + +/** + * Small class used internally for formated terminal output using ANSI/VT100 + * escape codes on supported terminals. + * + * TODO: Deactivate if using windows or use the corresponding API function. + */ +class Terminal { +private: + /** + * If set to false, no control codes are generated. + */ + bool active; + +public: + static const int BLACK = 30; + static const int RED = 31; + static const int GREEN = 32; + static const int YELLOW = 33; + static const int BLUE = 34; + static const int MAGENTA = 35; + static const int CYAN = 36; + static const int WHITE = 37; + + Terminal(bool active) : active(active) {} + + std::string color(int color, bool bright = true) const + { + if (!active) { + return std::string{}; + } + std::stringstream ss; + ss << "\x1b["; + if (bright) { + ss << "1;"; + } + ss << color << "m"; + return ss.str(); + } + + std::string reset() const + { + if (!active) { + return std::string{}; + } + return "\x1b[0m"; + } +}; + +void TerminalLogger::process(const Message &msg) +{ + Terminal t(useColor); + + // Print the file name + if (msg.hasFile()) { + os << t.color(Terminal::WHITE, true) << msg.file << t.reset(); + } + + // Print line and column number + if (msg.hasLine()) { + if (msg.hasFile()) { + os << ':'; + } + os << t.color(Terminal::WHITE, true) << msg.line + << t.reset(); + if (msg.hasColumn()) { + os << ':' << msg.column; + } + } + + // Print the optional seperator + if (msg.hasFile() || msg.hasLine()) { + os << ": "; + } + + // Print the severity + switch (msg.severity) { + case Severity::DEBUG: + break; + case Severity::NOTE: + os << t.color(Terminal::CYAN, true) << "note: "; + break; + case Severity::WARNING: + os << t.color(Terminal::MAGENTA, true) << "warning: "; + break; + case Severity::ERROR: + os << t.color(Terminal::RED, true) << "error: "; + break; + case Severity::FATAL_ERROR: + os << t.color(Terminal::RED, true) << "fatal: "; + break; + } + os << t.reset(); + + // Print the actual message + os << msg.msg << std::endl; +} +} + diff --git a/src/core/common/Logger.hpp b/src/core/common/Logger.hpp new file mode 100644 index 0000000..e6b97f4 --- /dev/null +++ b/src/core/common/Logger.hpp @@ -0,0 +1,609 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Logger.hpp + * + * Contains classes for logging messages in Ousía. Provides a generic Logger + * class, and TerminalLogger, an extension of Logger which logs do an output + * stream. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_LOGGER_HPP_ +#define _OUSIA_LOGGER_HPP_ + +#include +#include +#include +#include + +#include "Exceptions.hpp" + +namespace ousia { + +/** + * Enum containing the severities used for logging errors and debug messages. + */ +enum class Severity : int { + /** + * Indicates that this message was only printed for debugging. Note that + * in release builds messages with this severity are discarded. + */ + DEBUG = 0, + + /** + * A message which might provide additional information to the user. + */ + NOTE = 1, + + /** + * A message which warns of possible mistakes by the user which might not be + * actual errors but may lead to unintended behaviour. + */ + WARNING = 2, + + /** + * An error occurred while processing, however program execution continues, + * trying to deal with the error situation (graceful degradation). However, + * messages with this severity may be followed up by fatal errors. + */ + ERROR = 3, + + /** + * A fatal error occurred. Program execution cannot continue. + */ + FATAL_ERROR = 4 +}; + +#ifdef NDEBUG +static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::NOTE; +#else +static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::DEBUG; +#endif + +/** + * The Logger class is the base class the individual logging systems should + * derive from. It provides a simple interface for logging errors, warnings and + * notes and filters these according to the set minimum severity. Additionally + * a stack of file names is maintained in order to allow simple descent into + * included files. Note however, that this base Logger class simply discards the + * incomming log messages. Use one of the derived classes to actually handle the + * log messages. + */ +class Logger { +public: + /** + * The message struct represents a single log message and all information + * attached to it. + */ + struct Message { + /** + * Severity of the log message. + */ + Severity severity; + + /** + * Actual log message. + */ + std::string msg; + + /** + * Refers to the file which provides the context for this error message. + * May be empty. + */ + std::string file; + + /** + * Line in the above file the error message refers to. Ignored if + * smaller than zero. + */ + int line; + + /** + * Column in the above file the error message refers to. Ignored if + * smaller than zero. + */ + int column; + + /** + * Constructor of the Message struct. + * + * @param severity describes the message severity. + * @param msg contains the actual message. + * @param file provides the context the message refers to. May be empty. + * @param line is the line in the above file the message refers to. + * @param column is the column in the above file the message refers to. + */ + Message(Severity severity, std::string msg, std::string file, int line, + int column) + : severity(severity), + msg(std::move(msg)), + file(std::move(file)), + line(line), + column(column){}; + + /** + * Returns true if the file string is set. + * + * @return true if the file string is set. + */ + bool hasFile() const { return !file.empty(); } + + /** + * Returns true if the line is set. + * + * @return true if the line number is a non-negative integer. + */ + bool hasLine() const { return line >= 0; } + + /** + * Returns true if column and line are set (since a column has no + * significance without a line number). + * + * @return true if line number and column number are non-negative + * integers. + */ + bool hasColumn() const { return hasLine() && column >= 0; } + }; + +private: + /** + * Minimum severity a log message should have before it is discarded. + */ + Severity minSeverity; + + /** + * Maximum encountered log message severity. + */ + Severity maxEncounteredSeverity; + + /** + * Stack containing the current file names that have been processed. + */ + std::stack filenameStack; + +protected: + /** + * Function to be overriden by child classes to actually display or store + * the messages. The default implementation just discards all incomming + * messages. + * + * @param msg is an instance of the Message struct containing the data that + * should be logged. + */ + virtual void process(const Message &msg){}; + +public: + /** + * Constructor of the Logger class. + * + * @param minSeverity is the minimum severity a log message should have. + * Messages below this severity are discarded. + */ + Logger(Severity minSeverity = DEFAULT_MIN_SEVERITY) + : minSeverity(minSeverity), maxEncounteredSeverity(Severity::DEBUG) + { + } + + Logger(const Logger &) = delete; + + /** + * Virtual destructor. + */ + virtual ~Logger(){}; + + /** + * Logs the given message. Most generic log function. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void log(Severity severity, const std::string &msg, const std::string &file, + int line = -1, int column = -1); + + /** + * Logs the given message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void log(Severity severity, const std::string &msg, int line = -1, + int column = -1) + { + log(severity, msg, currentFilename(), line, column); + } + + /** + * Logs the given message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + * @tparam PosType is the actual type of pos and must implement a getLine + * and getColumn function. + */ + template + void logAt(Severity severity, const std::string &msg, const PosType &pos) + { + log(severity, msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs the given loggable exception. + * + * @param ex is the exception that should be logged. + */ + void log(const LoggableException &ex) + { + log(Severity::ERROR, ex.msg, + ex.file.empty() ? currentFilename() : ex.file, ex.line, ex.column); + } + + /** + * Logs a debug message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void debug(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::DEBUG, msg, file, line, column); + } + + /** + * Logs a debug message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void debug(const std::string &msg, int line = -1, int column = -1) + { + debug(msg, currentFilename(), line, column); + } + + /** + * Logs a debug message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + void debugAt(const std::string &msg, const PosType &pos) + { + debug(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a note. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void note(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::NOTE, msg, file, line, column); + } + + /** + * Logs a note. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void note(const std::string &msg, int line = -1, int column = -1) + { + note(msg, currentFilename(), line, column); + } + + /** + * Logs a note. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + void noteAt(const std::string &msg, const PosType &pos) + { + note(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a warning. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void warning(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::WARNING, msg, file, line, column); + } + + /** + * Logs a warning. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + void warningAt(const std::string &msg, const PosType &pos) + { + warning(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a warning. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void warning(const std::string &msg, int line = -1, int column = -1) + { + warning(msg, currentFilename(), line, column); + } + + /** + * Logs an error message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void error(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::ERROR, msg, file, line, column); + } + + /** + * Logs an error message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void error(const std::string &msg, int line = -1, int column = -1) + { + error(msg, currentFilename(), line, column); + } + + /** + * Logs an error message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + void errorAt(const std::string &msg, const PosType &pos) + { + error(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a fatal error. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void fatalError(const std::string &msg, const std::string &file, + int line = -1, int column = -1) + { + log(Severity::FATAL_ERROR, msg, file, line, column); + } + + /** + * Logs a fatal error. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void fatalError(const std::string &msg, int line = -1, int column = -1) + { + fatalError(msg, currentFilename(), line, column); + } + + /** + * Logs a fatal error. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + void fatalErrorAt(const std::string &msg, const PosType &pos) + { + fatalError(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Pushes a new file name onto the internal filename stack. + * + * @param name is the name of the file that should be added to the filename + * stack. + * @return the size of the filename stack. This number can be passed to the + * "unwindFilenameStack" method in order to return the stack to state it was + * in after this function has been called. + */ + unsigned int pushFilename(const std::string &name); + + /** + * Pops the filename from the internal filename stack. + * + * @return the current size of the filename stack. + */ + unsigned int popFilename(); + + /** + * Pops elements from the filename stack while it has more elements than + * the given number and the stack is non-empty. + * + * @param pos is the position the filename stack should be unwound to. Use + * a number returned by pushFilename. + */ + void unwindFilenameStack(unsigned int pos); + + /** + * Returns the topmost filename from the internal filename stack. + * + * @return the topmost filename from the filename stack or an empty string + * if the filename stack is empty. + */ + std::string currentFilename() + { + return filenameStack.empty() ? std::string{} : filenameStack.top(); + } + + /** + * Returns the maximum severity that was encountered by the Logger but at + * least Severity::DEBUG. + * + * @return the severity of the most severe log message but at least + * Severity::DEBUG. + */ + Severity getMaxEncounteredSeverity() { return maxEncounteredSeverity; } + + /** + * Returns the minimum severity. Messages with a smaller severity are + * discarded. + * + * @return the minimum severity. + */ + Severity getMinSeverity() { return minSeverity; } + + /** + * Sets the minimum severity. Messages with a smaller severity will be + * discarded. Only new messages will be filtered according to the new value. + * + * @param severity is the minimum severity for new log messages. + */ + void setMinSeverity(Severity severity) { minSeverity = severity; } +}; + +/** + * Class extending the Logger class and printing the log messages to the given + * stream. + */ +class TerminalLogger : public Logger { +private: + /** + * Reference to the target output stream. + */ + std::ostream &os; + + /** + * If true, the TerminalLogger will use colors to make the log messages + * prettier. + */ + bool useColor; + +protected: + /** + * Implements the process function and logs the messages to the output. + */ + void process(const Message &msg) override; + +public: + /** + * Constructor of the TerminalLogger class. + * + * @param os is the output stream the log messages should be logged to. + * Should be set to std::cerr in most cases. + * @param useColor if true, the TerminalLogger class will do its best to + * use ANSI/VT100 control sequences for colored log messages. + * @param minSeverity is the minimum severity below which log messages are + * discarded. + */ + TerminalLogger(std::ostream &os, bool useColor = false, + Severity minSeverity = DEFAULT_MIN_SEVERITY) + : Logger(minSeverity), os(os), useColor(useColor) + { + } +}; +} + +#endif /* _OUSIA_LOGGER_HPP_ */ + diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp new file mode 100644 index 0000000..c460ed4 --- /dev/null +++ b/src/core/common/Utils.cpp @@ -0,0 +1,59 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include + +#include "Utils.hpp" + +namespace ousia { + +std::string Utils::trim(const std::string &s) +{ + size_t firstNonWhitespace = std::numeric_limits::max(); + size_t lastNonWhitespace = 0; + for (size_t i = 0; i < s.size(); i++) { + if (!isWhitespace(s[i])) { + firstNonWhitespace = std::min(i, firstNonWhitespace); + lastNonWhitespace = std::max(i, lastNonWhitespace); + } + } + + if (firstNonWhitespace < lastNonWhitespace) { + return s.substr(firstNonWhitespace, + lastNonWhitespace - firstNonWhitespace + 1); + } + return std::string{}; +} + +bool Utils::isIdentifier(const std::string &name) +{ + bool first = true; + for (char c : name) { + if (first && !(isAlphabetic(c) || c == '_')) { + return false; + } + if (first && !(isAlphanumeric(c) || c == '_' || c == '-')) { + return false; + } + first = false; + } + return true; +} +} + diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp new file mode 100644 index 0000000..5332b50 --- /dev/null +++ b/src/core/common/Utils.hpp @@ -0,0 +1,110 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef _OUSIA_UTILS_H_ +#define _OUSIA_UTILS_H_ + +#include +#include + +namespace ousia { + +class Utils { +public: + /** + * Returns true if the given character is in [A-Za-z] + */ + static bool isAlphabetic(const char c) + { + return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); + } + + /** + * Returns true if the given character is in [0-9] + */ + static bool isNumeric(const char c) { return (c >= '0') && (c <= '9'); } + + /** + * Returns true if the given character is in [0-9A-Fa-f] + */ + static bool isHexadecimal(const char c) + { + return ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'F')) || + ((c >= 'a') && (c <= 'f')); + } + + /** + * Returns true if the given character is in [A-Za-z0-9] + */ + static bool isAlphanumeric(const char c) + { + return isAlphabetic(c) || isNumeric(c); + } + + /** + * Returns true if the given character is in [A-Za-z_][A-Za-z0-9_-]* + */ + static bool isIdentifier(const std::string &name); + + /** + * Returns true if the given character is a whitespace character. + */ + static bool isWhitespace(const char c) + { + return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); + } + + /** + * Removes whitespace at the beginning and the end of the given string. + */ + static std::string trim(const std::string &s); + + /** + * Turns the elements of a collection into a string separated by the + * given delimiter. + * + * @param es is an iterable container of elements that can be appended to an + * output stream (the << operator must be implemented). + * @param delim is the delimiter that should be used to separate the items. + * @param start is a character sequence that should be prepended to the + * result. + * @param end is a character sequence that should be appended to the result. + */ + template + static std::string join(T es, const std::string &delim, + const std::string &start = "", + const std::string &end = "") + { + std::stringstream res; + bool first = true; + res << start; + for (const auto &e : es) { + if (!first) { + res << delim; + } + res << e; + first = false; + } + res << end; + return res.str(); + } +}; +} + +#endif /* _OUSIA_UTILS_H_ */ + diff --git a/src/core/common/Variant.cpp b/src/core/common/Variant.cpp new file mode 100644 index 0000000..27fc6e7 --- /dev/null +++ b/src/core/common/Variant.cpp @@ -0,0 +1,154 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "Utils.hpp" +#include "Variant.hpp" + +namespace ousia { + +/* Class Variant::TypeException */ + +Variant::TypeException::TypeException(Type actualType, Type requestedType) + : OusiaException(std::string("Variant: Requested \"") + + Variant::getTypeName(requestedType) + + std::string("\" but is \"") + + Variant::getTypeName(actualType) + std::string("\"")), + actualType(actualType), + requestedType(requestedType) +{ +} + +/* Class Variant */ + +const char *Variant::getTypeName(Type type) +{ + switch (type) { + case Type::NULLPTR: + return "null"; + case Type::BOOL: + return "boolean"; + case Type::INT: + return "integer"; + case Type::DOUBLE: + return "double"; + case Type::STRING: + return "string"; + case Type::ARRAY: + return "array"; + case Type::MAP: + return "map"; + } + return "unknown"; +} + +Variant::boolType Variant::toBool() const +{ + switch (getType()) { + case Type::NULLPTR: + return false; + case Type::BOOL: + return asBool(); + case Type::INT: + return asInt() != 0; + case Type::DOUBLE: + return asDouble() != 0.0; + case Type::STRING: + return true; + case Type::ARRAY: + return true; + case Type::MAP: + return true; + } + return false; +} + +Variant::intType Variant::toInt() const +{ + switch (getType()) { + case Type::NULLPTR: + return 0; + case Type::BOOL: + return asBool() ? 1 : 0; + case Type::INT: + return asInt(); + case Type::DOUBLE: + return asDouble(); + case Type::STRING: + return 0; // TODO: Parse string as int + case Type::ARRAY: { + const arrayType &a = asArray(); + return (a.size() == 1) ? a[0].toInt() : 0; + } + case Type::MAP: + return 0; + } + return false; +} + +Variant::doubleType Variant::toDouble() const +{ + switch (getType()) { + case Type::NULLPTR: + return 0.0; + case Type::BOOL: + return asBool() ? 1.0 : 0.0; + case Type::INT: + return asInt(); + case Type::DOUBLE: + return asDouble(); + case Type::STRING: + return 0.0; // TODO: Parse string as double + case Type::ARRAY: { + const arrayType &a = asArray(); + return (a.size() == 1) ? a[0].toDouble() : 0; + } + case Type::MAP: + return 0; + } + return false; +} + +Variant::stringType Variant::toString(bool escape) const +{ + switch (getType()) { + case Type::NULLPTR: + return "null"; + case Type::BOOL: + return asBool() ? "true" : "false"; + case Type::INT: + return std::to_string(asInt()); + case Type::DOUBLE: + return std::to_string(asDouble()); + case Type::STRING: { + // TODO: Use proper serialization function + std::stringstream ss; + ss << "\"" << asString() << "\""; + return ss.str(); + } + case Type::ARRAY: + return Utils::join(asArray(), ", ", "[", "]"); + case Type::MAP: + return Utils::join(asMap(), ", ", "{", "}"); + } + return ""; +} + +} + diff --git a/src/core/common/Variant.hpp b/src/core/common/Variant.hpp new file mode 100644 index 0000000..d411fd3 --- /dev/null +++ b/src/core/common/Variant.hpp @@ -0,0 +1,761 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Variant.hpp + * + * The Variant class is used to efficiently represent a variables of varying + * type. Variant instances are used to represent data given by the end user and + * to exchange information between the host application and the script clients. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_VARIANT_HPP_ +#define _OUSIA_VARIANT_HPP_ + +#include +#include +#include +#include +#include + +// TODO: Use +// http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html +// later (will allow to use 8 bytes for a variant) + +#include "Exceptions.hpp" + +namespace ousia { + +/** + * Instances of the Variant class represent any kind of data that is exchanged + * between the host application and the script engine. Variants are immutable. + */ +class Variant { +public: + /** + * Enum containing the possible types a variant may have. + */ + enum class Type : int16_t { + NULLPTR, + BOOL, + INT, + DOUBLE, + STRING, + ARRAY, + MAP + }; + + /** + * Exception thrown whenever a variant is accessed via a getter function + * that is not supported for the current variant type. + */ + class TypeException : public OusiaException { + private: + /** + * Internally used string holding the exception message. + */ + const std::string msg; + + public: + /** + * Contains the actual type of the variant. + */ + const Type actualType; + + /** + * Contains the requested type of the variant. + */ + const Type requestedType; + + /** + * Constructor of the TypeException. + * + * @param actualType describes the actual type of the variant. + * @param requestedType describes the type in which the variant was + * requested. + */ + TypeException(Type actualType, Type requestedType); + }; + + using boolType = bool; + using intType = int32_t; + using doubleType = double; + using stringType = std::string; + using arrayType = std::vector; + using mapType = std::map; + +private: + /** + * Used to store the actual type of the variant. + */ + Type type = Type::NULLPTR; + + /** + * Anonymous union containing the possible value of the variant. + */ + union { + /** + * The boolean value. Only valid if type is Type::BOOL. + */ + boolType boolVal; + /** + * The integer value. Only valid if type is Type::INT. + */ + intType intVal; + /** + * The number value. Only valid if type is Type::DOUBLE. + */ + doubleType doubleVal; + /** + * Pointer to the more complex data structures on the free store. Only + * valid if type is one of Type::STRING, Type::ARRAY, + * Type::MAP. + */ + void *ptrVal; + }; + + /** + * Internally used to convert the current pointer value to a reference of + * the specified type. + */ + template + T &asObj(Type requestedType) const + { + const Type actualType = getType(); + if (actualType == requestedType) { + return *(static_cast(ptrVal)); + } + throw TypeException{actualType, requestedType}; + } + + /** + * Used internally to assign the value of another Variant instance to this + * instance. + * + * @param v is the Variant instance that should be copied to this instance. + */ + void copy(const Variant &v) + { + destroy(); + type = v.type; + switch (type) { + case Type::NULLPTR: + break; + case Type::BOOL: + boolVal = v.boolVal; + break; + case Type::INT: + intVal = v.intVal; + break; + case Type::DOUBLE: + doubleVal = v.doubleVal; + break; + case Type::STRING: + ptrVal = new stringType(v.asString()); + break; + case Type::ARRAY: + ptrVal = new arrayType(v.asArray()); + break; + case Type::MAP: + ptrVal = new mapType(v.asMap()); + break; + } + } + + /** + * Used internally to move the value of another Variant instance to this + * instance. + * + * @param v is the Variant instance that should be copied to this instance. + */ + void move(Variant &&v) + { + destroy(); + type = v.type; + switch (type) { + case Type::NULLPTR: + break; + case Type::BOOL: + boolVal = v.boolVal; + break; + case Type::INT: + intVal = v.intVal; + break; + case Type::DOUBLE: + doubleVal = v.doubleVal; + break; + case Type::STRING: + case Type::ARRAY: + case Type::MAP: + ptrVal = v.ptrVal; + v.ptrVal = nullptr; + break; + } + v.type = Type::NULLPTR; + } + + /** + * Used internally to destroy any value that was allocated on the heap. + */ + void destroy() + { + if (ptrVal) { + switch (type) { + case Type::STRING: + delete static_cast(ptrVal); + break; + case Type::ARRAY: + delete static_cast(ptrVal); + break; + case Type::MAP: + delete static_cast(ptrVal); + break; + default: + break; + } + } + } + +public: + /** + * Copy constructor of the Variant class. + * + * @param v is the Variant instance that should be cloned. + */ + Variant(const Variant &v) : ptrVal(nullptr) { copy(v); } + + /** + * Move constructor of the Variant class. + * + * @param v is the reference to the Variant instance that should be moved, + * this instance is invalidated afterwards. + */ + Variant(Variant &&v) : ptrVal(nullptr) { move(std::move(v)); } + + /** + * Default constructor. Type is set to Type:null. + */ + Variant() : ptrVal(nullptr) { setNull(); } + + /** + * Default destructor, frees any memory that was allocated on the heap. + */ + ~Variant() { destroy(); } + + /** + * Constructor for null values. Initializes the variant as null value. + */ + Variant(std::nullptr_t) : ptrVal(nullptr) { setNull(); } + + /** + * Constructor for boolean values. + * + * @param b boolean value. + */ + Variant(boolType b) : ptrVal(nullptr) { setBool(b); } + + /** + * Constructor for integer values. + * + * @param i integer value. + */ + Variant(intType i) : ptrVal(nullptr) { setInt(i); } + + /** + * Constructor for double values. + * + * @param d double value. + */ + Variant(doubleType d) : ptrVal(nullptr) { setDouble(d); } + + /** + * Constructor for string values. The given string is copied and managed by + * the new Variant instance. + * + * @param s is a reference to a C-Style string used as string value. + */ + Variant(const char *s) : ptrVal(nullptr) { setString(s); } + + /** + * Constructor for array values. The given array is copied and managed by + * the new Variant instance. + * + * @param a is a reference to the array + */ + Variant(arrayType a) : ptrVal(nullptr) { setArray(std::move(a)); } + + /** + * Constructor for map values. The given map is copied and managed by the + * new Variant instance. + * + * @param m is a reference to the map. + */ + Variant(mapType m) : ptrVal(nullptr) { setMap(std::move(m)); } + + /** + * Copy assignment operator. + */ + Variant &operator=(const Variant &v) + { + copy(v); + return *this; + } + + /** + * Move assignment operator. + */ + Variant &operator=(Variant &&v) + { + move(std::move(v)); + return *this; + } + + /** + * Assign nullptr_t operator (allows to write Variant v = nullptr). + * + * @param p is an instance of std::nullptr_t. + */ + Variant &operator=(std::nullptr_t) + { + setNull(); + return *this; + } + + /** + * Assign a boolean value. + * + * @param b is the boolean value to which the variant should be set. + */ + Variant &operator=(boolType b) + { + setBool(b); + return *this; + } + + /** + * Assign an integer value. + * + * @param i is the integer value to which the variant should be set. + */ + Variant &operator=(intType i) + { + setInt(i); + return *this; + } + + /** + * Assign a double value. + * + * @param d is the double value to which the variant should be set. + */ + Variant &operator=(doubleType d) + { + setDouble(d); + return *this; + } + + /** + * Assign a zero terminated const char array. + * + * @param s is the zero terminated const char array to which the variant + * should be set. + */ + Variant &operator=(const char *s) + { + setString(s); + return *this; + } + + /** + * Checks whether this Variant instance represents the nullptr. + * + * @return true if the Variant instance represents the nullptr, false + * otherwise. + */ + bool isNull() const { return type == Type::NULLPTR; } + + /** + * Checks whether this Variant instance is a boolean. + * + * @return true if the Variant instance is a boolean, false otherwise. + */ + bool isBool() const { return type == Type::BOOL; } + + /** + * Checks whether this Variant instance is an integer. + * + * @return true if the Variant instance is an integer, false otherwise. + */ + bool isInt() const { return type == Type::INT; } + + /** + * Checks whether this Variant instance is a double. + * + * @return true if the Variant instance is a double, false otherwise. + */ + bool isDouble() const { return type == Type::DOUBLE; } + + /** + * Checks whether this Variant instance is a string. + * + * @return true if the Variant instance is a string, false otherwise. + */ + bool isString() const { return type == Type::STRING; } + + /** + * Checks whether this Variant instance is an array. + * + * @return true if the Variant instance is an array, false otherwise. + */ + bool isArray() const { return type == Type::ARRAY; } + + /** + * Checks whether this Variant instance is a map. + * + * @return true if the Variant instance is a map, false otherwise. + */ + bool isMap() const { return type == Type::MAP; } + + /** + * Returns the Variant boolean value. Performs no type conversion. Throws an + * exception if the underlying type is not a boolean. + * + * @return the boolean value. + */ + boolType asBool() const + { + if (isBool()) { + return boolVal; + } + throw TypeException{getType(), Type::BOOL}; + } + + /** + * Returns the Variant integer value. Performs no type conversion. Throws an + * exception if the underlying type is not an integer. + * + * @return the integer value. + */ + intType asInt() const + { + if (isInt()) { + return intVal; + } + throw TypeException{getType(), Type::INT}; + } + + /** + * Returns the Variant double value. Performs no type conversion. Throws an + * exception if the underlying type is not a double. + * + * @return the double value. + */ + doubleType asDouble() const + { + if (isDouble()) { + return doubleVal; + } + throw TypeException{getType(), Type::DOUBLE}; + } + + /** + * Returns a const reference to the string value. Performs no type + * conversion. Throws an exception if the underlying type is not a string. + * + * @return the string value as const reference. + */ + const stringType &asString() const + { + return asObj(Type::STRING); + } + + /** + * Returns a const reference to the string value. Performs no type + * conversion. Throws an exception if the underlying type is not a string. + * + * @return the string value as reference. + */ + stringType &asString() { return asObj(Type::STRING); } + + /** + * Returns a const reference to the array value. Performs no type + * conversion. Throws an exception if the underlying type is not an array. + * + * @return the array value as const reference. + */ + const arrayType &asArray() const { return asObj(Type::ARRAY); } + + /** + * Returns a const reference to the array value. Performs no type + * conversion. Throws an exception if the underlying type is not an array. + * + * @return the array value as reference. + */ + arrayType &asArray() { return asObj(Type::ARRAY); } + + /** + * Returns a const reference to the map value. Performs no type + * conversion. Throws an exception if the underlying type is not a map. + * + * @return the map value as const reference. + */ + const mapType &asMap() const { return asObj(Type::MAP); } + + /** + * Returns a reference to the map value. Performs no type conversion. + * Throws an exception if the underlying type is not a map. + * + * @return the map value as reference. + */ + mapType &asMap() { return asObj(Type::MAP); } + + /** + * Returns the value of the Variant as boolean, performs type conversion. + * + * @return the Variant value converted to a boolean value. + */ + boolType toBool() const; + + /** + * Returns the value of the Variant as integer, performs type conversion. + * + * @return the Variant value converted to an integer value. + */ + intType toInt() const; + + /** + * Returns the value of the Variant as double, performs type conversion. + * + * @return the Variant value converted to a double value. + */ + doubleType toDouble() const; + + /** + * Returns the value of the Variant as string, performs type conversion. + * + * @return the value of the variant as string. + * @param escape if set to true, adds double quotes to strings and escapes + * them properly (resulting in a more or less JSONesque output). + */ + stringType toString(bool escape = false) const; + + /** + * Sets the variant to null. + */ + void setNull() + { + destroy(); + type = Type::NULLPTR; + ptrVal = nullptr; + } + + /** + * Sets the variant to the given boolean value. + * + * @param b is the new boolean value. + */ + void setBool(boolType b) + { + destroy(); + type = Type::BOOL; + boolVal = b; + } + + /** + * Sets the variant to the given integer value. + * + * @param i is the new integer value. + */ + void setInt(intType i) + { + destroy(); + type = Type::INT; + intVal = i; + } + + /** + * Sets the variant to the given double value. + * + * @param d is the new double value. + */ + void setDouble(doubleType d) + { + destroy(); + type = Type::DOUBLE; + doubleVal = d; + } + + /** + * Sets the variant to the given string value. + * + * @param d is the new string value. + */ + void setString(const char *s) + { + if (isString()) { + asString().assign(s); + } else { + destroy(); + type = Type::STRING; + ptrVal = new stringType(s); + } + } + + /** + * Sets the variant to the given array value. + * + * @param a is the new array value. + */ + void setArray(arrayType a) + { + if (isArray()) { + asArray().swap(a); + } else { + destroy(); + type = Type::ARRAY; + ptrVal = new arrayType(std::move(a)); + } + } + + /** + * Sets the variant to the given map value. + * + * @param a is the new map value. + */ + void setMap(mapType m) + { + if (isMap()) { + asMap().swap(m); + } else { + destroy(); + type = Type::MAP; + ptrVal = new mapType(std::move(m)); + } + } + + /** + * Returns the current type of the Variant. + * + * @return the current type of the Variant. + */ + Type getType() const { return type; } + + /** + * Returns the name of the given variant type as C-style string. + */ + static const char *getTypeName(Type type); + + /** + * Returns the name of the type of this variant instance. + */ + const char *getTypeName() { return Variant::getTypeName(getType()); } + + /** + * Prints the Variant to the output stream. + */ + friend std::ostream &operator<<(std::ostream &os, const Variant &v) + { + return os << v.toString(true); + } + + /** + * Prints a key value pair to the output stream. + */ + friend std::ostream &operator<<(std::ostream &os, + const mapType::value_type &v) + { + // TODO: Use proper serialization function + return os << "\"" << v.first << "\": " << v.second.toString(true); + } + + /* + * Comprison operators. + */ + + friend bool operator<(const Variant &lhs, const Variant &rhs) + { + // If the types do not match, we can not do a meaningful comparison. + if (lhs.getType() != rhs.getType()) { + throw TypeException(lhs.getType(), rhs.getType()); + } + switch (lhs.getType()) { + case Type::NULLPTR: + return false; + case Type::BOOL: + return lhs.boolVal < rhs.boolVal; + case Type::INT: + return lhs.intVal < rhs.intVal; + case Type::DOUBLE: + return lhs.doubleVal < rhs.doubleVal; + case Type::STRING: + return lhs.asString() < rhs.asString(); + case Type::ARRAY: + return lhs.asArray() < rhs.asArray(); + case Type::MAP: + return lhs.asMap() < rhs.asMap(); + } + throw OusiaException("Internal Error! Unknown type!"); + } + friend bool operator>(const Variant &lhs, const Variant &rhs) + { + return rhs < lhs; + } + friend bool operator<=(const Variant &lhs, const Variant &rhs) + { + return !(lhs > rhs); + } + friend bool operator>=(const Variant &lhs, const Variant &rhs) + { + return !(lhs < rhs); + } + + friend bool operator==(const Variant &lhs, const Variant &rhs) + { + if (lhs.getType() != rhs.getType()) { + return false; + } + switch (lhs.getType()) { + case Type::NULLPTR: + return true; + case Type::BOOL: + return lhs.boolVal == rhs.boolVal; + case Type::INT: + return lhs.intVal == rhs.intVal; + case Type::DOUBLE: + return lhs.doubleVal == rhs.doubleVal; + case Type::STRING: + return lhs.asString() == rhs.asString(); + case Type::ARRAY: + return lhs.asArray() == rhs.asArray(); + case Type::MAP: + return lhs.asMap() == rhs.asMap(); + } + throw OusiaException("Internal Error! Unknown type!"); + } + + friend bool operator!=(const Variant &lhs, const Variant &rhs) + { + return !(lhs == rhs); + } +}; +} + +#endif /* _OUSIA_VARIANT_HPP_ */ + diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp new file mode 100644 index 0000000..e611842 --- /dev/null +++ b/src/core/common/VariantReader.cpp @@ -0,0 +1,625 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include + +#include "VariantReader.hpp" +#include "Utils.hpp" + +namespace ousia { + +// TODO: Better error messages (like "Expected 'x' but got 'y'") +// TODO: Replace delims with single char delim where possible +// TODO: Use custom return value instead of std::pair +// TODO: Allow buffered char reader to "fork" +// TODO: Rename CharReader to shorter CharReader +// TODO: Implement context in CharReader (to allow error messages to extract the +// current line) + +/* Error Messages */ + +static const char *ERR_UNEXPECTED_CHAR = "Unexpected character"; +static const char *ERR_UNEXPECTED_END = "Unexpected literal end"; +static const char *ERR_UNTERMINATED = "Unterminated literal"; +static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence"; +static const char *ERR_INVALID_INTEGER = "Invalid integer value"; +static const char *ERR_TOO_LARGE = "Value too large to represent"; + +/* Class Number */ + +/** + * Class used internally to represent a number (integer or double). The number + * is represented by its components (base value a, nominator n, denominator d, + * exponent e, sign s and exponent sign sE). + */ +class Number { +private: + /** + * Reprsents the part of the number: Base value a, nominator n, exponent e. + */ + enum class Part { A, N, E }; + + /** + * State used in the parser state machine + */ + enum class State { + INIT, + HAS_MINUS, + LEADING_ZERO, + LEADING_POINT, + INT, + HEX, + POINT, + EXP_INIT, + EXP_HAS_MINUS, + EXP + }; + + /** + * Returns the numeric value of the given ASCII character (returns 0 for + * '0', 1 for '1', 10 for 'A' and so on). + * + * @param c is the character for which the numeric value should be returned. + * @return the numeric value the character represents. + */ + static int charValue(char c) + { + if (c >= '0' && c <= '9') { + return c & 0x0F; + } + if ((c >= 'A' && c <= 'O') || (c >= 'a' && c <= 'o')) { + return (c & 0x0F) + 9; + } + return -1; + } + + /** + * Appends the value of the character c to the internal number + * representation and reports any errors that might occur. + */ + bool appendChar(char c, int base, Part p, CharReader &reader, + Logger &logger) + { + // Check whether the given character is valid + int v = charValue(c); + if (v < 0 || v >= base) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + + // Append the number to the specified part + switch (p) { + case Part::A: + a = a * base + v; + break; + case Part::N: + n = n * base + v; + d = d * base; + break; + case Part::E: + e = e * base + v; + break; + } + + // Check for any overflows + if (a < 0 || n < 0 || d < 0 || e < 0) { + logger.errorAt(ERR_TOO_LARGE, reader); + return false; + } + return true; + } + +public: + /** + * Sign and exponent sign. + */ + int8_t s, sE; + + /** + * Exponent + */ + int16_t e; + + /** + * Base value, nominator, denominator + */ + int64_t a, n, d; + + /** + * Constructor of the number class. + */ + Number() : s(1), sE(1), e(0), a(0), n(0), d(1) {} + + /** + * Returns the represented double value. + */ + double doubleValue() + { + return s * (a + ((double)n / (double)d)) * pow(10.0, (double)(sE * e)); + } + + /** + * Returns the represented integer value. Only a lossless operation, if the + * number is an integer (as can be checked via the isInt method), otherwise + * the exponent and the fractional value will be truncated. + */ + int64_t intValue() { return s * a; } + + /** + * Returns true, if the number is an integer (has no fractional or + * exponential part). + */ + bool isInt() { return (n == 0) && (d == 1) && (e == 0); } + + /** + * Tries to parse the number from the given stream and loggs any errors to + * the given logger instance. Numbers are terminated by one of the given + * delimiters. + */ + bool parse(CharReader &reader, Logger &logger, + const std::unordered_set &delims); +}; + +bool Number::parse(CharReader &reader, Logger &logger, + const std::unordered_set &delims) +{ + State state = State::INIT; + char c; + + // Consume the first whitespace characters + reader.consumeWhitespace(); + + // Iterate over the FSM to extract numbers + while (reader.peek(c)) { + // Abort, once a delimiter or whitespace is reached + if (Utils::isWhitespace(c) || delims.count(c)) { + reader.resetPeek(); + break; + } + + // The character is not a whitespace character and not a delimiter + switch (state) { + case State::INIT: + case State::HAS_MINUS: + switch (c) { + case '-': + // Do not allow multiple minus signs + if (state == State::HAS_MINUS) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::HAS_MINUS; + s = -1; + break; + case '0': + // Remember a leading zero for the detection of "0x" + state = State::LEADING_ZERO; + break; + case '.': + // Remember a leading point as ".eXXX" is invalid + state = State::LEADING_POINT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::LEADING_ZERO: + if (c == 'x' || c == 'X') { + state = State::HEX; + break; + } + // fallthrough + case State::INT: + switch (c) { + case '.': + state = State::POINT; + break; + case 'e': + case 'E': + state = State::EXP_INIT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::HEX: + if (!appendChar(c, 16, Part::A, reader, logger)) { + return false; + } + break; + case State::LEADING_POINT: + case State::POINT: + switch (c) { + case 'e': + case 'E': + if (state == State::LEADING_POINT) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::EXP_INIT; + break; + default: + state = State::POINT; + if (!appendChar(c, 10, Part::N, reader, logger)) { + return false; + } + break; + } + break; + case State::EXP_HAS_MINUS: + case State::EXP_INIT: + if (c == '-') { + if (state == State::EXP_HAS_MINUS) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::EXP_HAS_MINUS; + sE = -1; + } else { + state = State::EXP; + if (!appendChar(c, 10, Part::E, reader, logger)) { + return false; + } + } + break; + case State::EXP: + if (!appendChar(c, 10, Part::E, reader, logger)) { + return false; + } + break; + } + reader.consumePeek(); + } + + // States in which ending is valid. Log an error in other states + if (state == State::LEADING_ZERO || state == State::HEX || + state == State::INT || state == State::POINT || + state == State::EXP) { + return true; + } + logger.errorAt(ERR_UNEXPECTED_END, reader); + return false; +} + + +/* Class Reader */ + +static const int STATE_INIT = 0; +static const int STATE_IN_STRING = 1; +static const int STATE_IN_ARRAY = 2; +static const int STATE_EXPECT_COMMA = 3; +static const int STATE_ESCAPE = 4; +static const int STATE_WHITESPACE = 5; +static const int STATE_RESYNC = 6; + +template +static std::pair error(CharReader &reader, Logger &logger, + const char *err, T res) +{ + logger.errorAt(err, reader); + return std::make_pair(false, std::move(res)); +} + +std::pair VariantReader::parseString( + CharReader &reader, Logger &logger, + const std::unordered_set *delims) +{ + // Initialize the internal state + int state = STATE_INIT; + char quote = 0; + std::stringstream res; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Statemachine whic iterates over each character in the stream + // TODO: Combination of peeking and consumePeek is stupid as consumePeek is + // the default (read and putBack would obviously be better, yet the latter + // is not trivial to implement in the current CharReader). + char c; + while (reader.peek(c)) { + switch (state) { + case STATE_INIT: + if (c == '"' || c == '\'') { + quote = c; + state = STATE_IN_STRING; + break; + } else if (delims && delims->count(c)) { + return error(reader, logger, ERR_UNEXPECTED_END, res.str()); + } + return error(reader, logger, ERR_UNEXPECTED_CHAR, res.str()); + case STATE_IN_STRING: + if (c == quote) { + reader.consumePeek(); + return std::make_pair(true, res.str()); + } else if (c == '\\') { + state = STATE_ESCAPE; + reader.consumePeek(); + break; + } else if (c == '\n') { + return error(reader, logger, ERR_UNTERMINATED, res.str()); + } + res << c; + reader.consumePeek(); + break; + case STATE_ESCAPE: + // Handle all possible special escape characters + switch (c) { + case 'b': + res << '\b'; + break; + case 'f': + res << '\f'; + break; + case 'n': + res << '\n'; + break; + case 'r': + res << '\r'; + break; + case 't': + res << '\t'; + break; + case 'v': + res << '\v'; + break; + case '\'': + res << '\''; + break; + case '"': + res << '"'; + break; + case '\\': + res << '\\'; + break; + case '\n': + break; + case 'x': + // TODO: Parse Latin-1 sequence hex XX + break; + case 'u': + // TODO: Parse 16-Bit unicode character hex XXXX + break; + default: + if (Utils::isNumeric(c)) { + // TODO: Parse octal 000 sequence + } else { + logger.errorAt(ERR_INVALID_ESCAPE, reader); + } + break; + } + + // Switch back to the "normal" state + state = STATE_IN_STRING; + reader.consumePeek(); + break; + } + } + return error(reader, logger, ERR_UNEXPECTED_END, res.str()); +} + +std::pair VariantReader::parseArray( + CharReader &reader, Logger &logger, char delim) +{ + Variant::arrayType res; + bool hadError = false; + int state = delim ? STATE_IN_ARRAY : STATE_INIT; + delim = delim ? delim : ']'; + char c; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Iterate over the characters, use the parseGeneric function to read the + // pairs + while (reader.peek(c)) { + // Generically handle the end of the array + if (state != STATE_INIT && c == delim) { + reader.consumePeek(); + return std::make_pair(!hadError, res); + } + + switch (state) { + case STATE_INIT: + if (c != '[') { + return error(reader, logger, ERR_UNEXPECTED_CHAR, res); + } + state = STATE_IN_ARRAY; + reader.consumePeek(); + break; + case STATE_IN_ARRAY: { + // Try to read an element using the parseGeneric function + reader.resetPeek(); + auto elem = parseGeneric(reader, logger, {',', delim}); + res.push_back(elem.second); + + // If the reader had no error, expect an comma, otherwise skip + // to the next comma in the stream + if (elem.first) { + state = STATE_EXPECT_COMMA; + } else { + state = STATE_RESYNC; + hadError = true; + } + break; + } + case STATE_EXPECT_COMMA: + // Skip whitespace + if (c == ',') { + state = STATE_IN_ARRAY; + } else if (!Utils::isWhitespace(c)) { + hadError = true; + state = STATE_RESYNC; + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + } + reader.consumePeek(); + break; + case STATE_RESYNC: + // Just wait for another comma to arrive + if (c == ',') { + state = STATE_IN_ARRAY; + } + reader.consumePeek(); + break; + } + } + return error(reader, logger, ERR_UNEXPECTED_END, res); +} + +std::pair VariantReader::parseUnescapedString( + CharReader &reader, Logger &logger, + const std::unordered_set &delims) +{ + std::stringstream res; + std::stringstream buf; + char c; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Copy all characters, skip whitespace at the end + int state = STATE_IN_STRING; + while (reader.peek(c)) { + if (delims.count(c)) { + reader.resetPeek(); + return std::make_pair(true, res.str()); + } else if (Utils::isWhitespace(c)) { + // Do not add whitespace to the output buffer + state = STATE_WHITESPACE; + buf << c; + } else { + // If we just hat a sequence of whitespace, append it to the output + // buffer and continue + if (state == STATE_WHITESPACE) { + res << buf.str(); + buf.str(std::string{}); + buf.clear(); + state = STATE_IN_STRING; + } + res << c; + } + reader.consumePeek(); + } + return std::make_pair(true, res.str()); +} + +std::pair VariantReader::parseInteger( + CharReader &reader, Logger &logger, + const std::unordered_set &delims) +{ + Number n; + if (n.parse(reader, logger, delims)) { + // Only succeed if the parsed number is an integer, otherwise this is an + // error + if (n.isInt()) { + return std::make_pair(true, n.intValue()); + } else { + return error(reader, logger, ERR_INVALID_INTEGER, n.intValue()); + } + } + return std::make_pair(false, n.intValue()); +} + +std::pair VariantReader::parseDouble( + CharReader &reader, Logger &logger, + const std::unordered_set &delims) +{ + Number n; + bool res = n.parse(reader, logger, delims); + return std::make_pair(res, n.doubleValue()); +} + +std::pair VariantReader::parseGeneric( + CharReader &reader, Logger &logger, + const std::unordered_set &delims) +{ + char c; + + // Skip all whitespace characters + reader.consumeWhitespace(); + while (reader.peek(c)) { + // Stop if a delimiter is reached + if (delims.count(c)) { + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); + } + + // Parse a string if a quote is reached + if (c == '"' || c == '\'') { + auto res = parseString(reader, logger); + return std::make_pair(res.first, res.second.c_str()); + } + + if (c == '[') { + // TODO: Parse struct descriptor + } + + // Try to parse everything that looks like a number as number + if (Utils::isNumeric(c) || c == '-') { + Number n; + + // Fork the reader + CharReaderFork fork = reader.fork(); + + // TODO: Fork logger + + // Try to parse the number + if (n.parse(fork, logger, delims)) { + // Parsing was successful, advance the reader + fork.commit(); + if (n.isInt()) { + return std::make_pair( + true, + Variant{static_cast(n.intValue())}); + } else { + return std::make_pair(true, n.doubleValue()); + } + } + } + + // Parse an unescaped string in any other case + auto res = parseUnescapedString(reader, logger, delims); + + // Handling for special primitive values + if (res.first) { + if (res.second == "true") { + return std::make_pair(true, Variant{true}); + } + if (res.second == "false") { + return std::make_pair(true, Variant{false}); + } + if (res.second == "null") { + return std::make_pair(true, Variant{nullptr}); + } + } + return std::make_pair(res.first, res.second.c_str()); + } + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); +} +} + diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp new file mode 100644 index 0000000..5e7c5d2 --- /dev/null +++ b/src/core/common/VariantReader.hpp @@ -0,0 +1,166 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file VariantReader.hpp + * + * Provides parsers for various micro formats. These formats include integers, + * doubles, strings, JSON and the Ousía struct notation. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_VARIANT_READER_HPP_ +#define _OUSIA_VARIANT_READER_HPP_ + +#include +#include +#include + +#include "CharReader.hpp" +#include "Logger.hpp" +#include "Variant.hpp" + +namespace ousia { + +class VariantReader { +private: + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + * @param delims is an optional set of delimiters after which parsing has to + * be stopped (the delimiters may occur inside the actual string, but not + * outside). If nullptr is given, no delimiter is used and a complete string + * is read. + */ + static std::pair parseString( + CharReader &VariantReader, Logger &logger, + const std::unordered_set *delims); + +public: + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + * @param delims is a set of delimiters after which parsing has to + * be stopped (the delimiters may occur inside the actual string, but not + * outside). + */ + static std::pair parseString( + CharReader &VariantReader, Logger &logger, + const std::unordered_set &delims) + { + return parseString(VariantReader, logger, &delims); + } + + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + */ + static std::pair parseString(CharReader &VariantReader, + Logger &logger) + { + return parseString(VariantReader, logger, nullptr); + } + + /** + * Extracts an unescaped string from the given buffered char VariantReader + * instance. This function just reads text until one of the given delimiter + * characters is reached. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. + * @param delims is a set of characters which will terminate the string. + * These characters are not included in the result. May not be nullptr. + */ + static std::pair parseUnescapedString( + CharReader &VariantReader, Logger &logger, + const std::unordered_set &delims); + + /** + * Parses an integer from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. + * + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. + */ + static std::pair parseInteger( + CharReader &VariantReader, Logger &logger, + const std::unordered_set &delims); + + /** + * Parses an double from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. + * + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. + */ + static std::pair parseDouble( + CharReader &VariantReader, Logger &logger, + const std::unordered_set &delims); + + /** + * Parses an array of values. + */ + static std::pair parseArray( + CharReader &VariantReader, Logger &logger, char delim = 0); + + /** + * Tries to parse the most specific item from the given stream until one of + * the given delimiters is reached or a meaningful literal has been read. + * The resulting variant represents the value that has been read. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. + * @param delims is a set of characters which will terminate the string. + * These characters are not included in the result. May not be nullptr. + */ + static std::pair parseGeneric( + CharReader &VariantReader, Logger &logger, + const std::unordered_set &delims); +}; +} + +#endif /* _OUSIA_VARIANT_READER_HPP_ */ + diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index 5dac956..e155cfd 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -32,10 +32,10 @@ #include #include -#include #include -#include #include +#include +#include #include "Scope.hpp" diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp index dca7f35..5e801ee 100644 --- a/src/core/parser/ParserStack.cpp +++ b/src/core/parser/ParserStack.cpp @@ -20,8 +20,8 @@ #include "ParserStack.hpp" -#include -#include +#include +#include namespace ousia { namespace parser { diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp index c5ed4e4..233f4f9 100644 --- a/src/core/parser/ParserStack.hpp +++ b/src/core/parser/ParserStack.hpp @@ -37,7 +37,7 @@ #include #include -#include +#include #include "Parser.hpp" diff --git a/src/core/utils/CharReader.cpp b/src/core/utils/CharReader.cpp deleted file mode 100644 index 61616d7..0000000 --- a/src/core/utils/CharReader.cpp +++ /dev/null @@ -1,643 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include -#include -#include - -#include - -#include "CharReader.hpp" - -namespace ousia { -namespace utils { - -/* Helper functions */ - -/** - * istreamReadCallback is used internally by the Buffer calss to stream data - * from an input stream. - * - * @param buf is points a the target memory region. - * @param size is the requested number of bytes. - * @param userData is a pointer at some user defined data. - * @return the actual number of bytes read. If the result is smaller than - * the requested size, this tells the Buffer that the end of the input - * stream is reached. - */ -static size_t istreamReadCallback(char *buf, size_t size, void *userData) -{ - return (static_cast(userData))->read(buf, size).gcount(); -} - -/* Class Buffer */ - -Buffer::Buffer(ReadCallback callback, void *userData) - : callback(callback), - userData(userData), - reachedEnd(false), - startBucket(buckets.end()), - endBucket(buckets.end()), - startOffset(0), - firstDead(0) -{ - // Load a first block of data from the stream - stream(); - startBucket = buckets.begin(); -} - -Buffer::Buffer(std::istream &istream) : Buffer(istreamReadCallback, &istream) {} - -Buffer::Buffer(const std::string &str) - : callback(nullptr), - userData(nullptr), - reachedEnd(true), - startBucket(buckets.end()), - endBucket(buckets.end()), - startOffset(0), - firstDead(0) -{ - // Copy the given string into a first buffer and set the start buffer - // correctly - Bucket &bucket = nextBucket(); - bucket.resize(str.size()); - std::copy(str.begin(), str.end(), bucket.begin()); - startBucket = buckets.begin(); -} - -#ifndef NDEBUG -Buffer::~Buffer() -{ - // Make sure all cursors have been deleted - for (bool cursor_alive: alive) { - assert(!cursor_alive); - } -} -#endif - -void Buffer::advance(BucketIterator &it) -{ - it++; - if (it == buckets.end()) { - it = buckets.begin(); - } -} - -void Buffer::advance(BucketList::const_iterator &it) const -{ - it++; - if (it == buckets.cend()) { - it = buckets.cbegin(); - } -} - -Buffer::Bucket &Buffer::nextBucket() -{ - constexpr size_t MAXVAL = std::numeric_limits::max(); - - // Fetch the minimum bucket index - size_t minBucketIdx = MAXVAL; - for (size_t i = 0; i < cursors.size(); i++) { - if (alive[i]) { - // Fetch references to the bucket and the cursor - const Cursor &cur = cursors[i]; - const Bucket &bucket = *(cur.bucket); - - // Increment the bucket index by one, if the cursor is at the end - // of the bucket (only valid if the LOOKBACK_SIZE is set to zero) - size_t bIdx = cur.bucketIdx; - if (LOOKBACK_SIZE == 0 && cur.bucketOffs == bucket.size()) { - bIdx++; - } - - // Decrement the bucket index by one, if the previous bucket still - // needs to be reached and cannot be overridden - if (bIdx > 0 && cur.bucketOffs < LOOKBACK_SIZE) { - bIdx--; - } - - // Set the bucket index to the minium - minBucketIdx = std::min(minBucketIdx, bIdx); - } - } - - // If there is space between the current start bucket and the read - // cursor, the start bucket can be safely overridden. - if (minBucketIdx > 0 && minBucketIdx != MAXVAL) { - // All cursor bucket indices will be decreased by one - for (size_t i = 0; i < cursors.size(); i++) { - cursors[i].bucketIdx--; - } - - // Increment the start offset - startOffset += startBucket->size(); - - // The old start bucket is the new end bucket - endBucket = startBucket; - - // Advance the start bucket, wrap around at the end of the list - advance(startBucket); - } else { - // No free bucket, insert a new one before the start bucket - endBucket = buckets.emplace(startBucket); - } - return *endBucket; -} - -Buffer::CursorId Buffer::nextCursor() -{ - bool hasCursor = false; - CursorId res = 0; - - // Search for the next free cursor starting with minNextCursorId - for (size_t i = firstDead; i < alive.size(); i++) { - if (!alive[i]) { - res = i; - hasCursor = true; - break; - } - } - - // Add a new cursor to the cursor list if no cursor is currently free - if (!hasCursor) { - res = cursors.size(); - cursors.resize(res + 1); - alive.resize(res + 1); - } - - // The next dead cursor is at least the next cursor - firstDead = res + 1; - - // Mark the new cursor as alive - alive[res] = true; - - return res; -} - -void Buffer::stream() -{ - // Fetch the bucket into which the data should be inserted, make sure it - // has the correct size - Bucket &tar = nextBucket(); - tar.resize(REQUEST_SIZE); - - // Read data from the stream into the target buffer - size_t size = callback(tar.data(), REQUEST_SIZE, userData); - - // If not enough bytes were returned, we're at the end of the stream - if (size < REQUEST_SIZE) { - tar.resize(size); - reachedEnd = true; - } -} - -Buffer::CursorId Buffer::createCursor() -{ - CursorId res = nextCursor(); - cursors[res].bucket = startBucket; - cursors[res].bucketIdx = 0; - cursors[res].bucketOffs = 0; - return res; -} - -Buffer::CursorId Buffer::createCursor(Buffer::CursorId ref) -{ - CursorId res = nextCursor(); - cursors[res] = cursors[ref]; - return res; -} - -void Buffer::copyCursor(Buffer::CursorId from, Buffer::CursorId to) -{ - cursors[to] = cursors[from]; -} - -void Buffer::deleteCursor(Buffer::CursorId cursor) -{ - alive[cursor] = false; - firstDead = std::min(firstDead, cursor); -} - -size_t Buffer::offset(Buffer::CursorId cursor) const -{ - const Cursor &cur = cursors[cursor]; - size_t offs = startOffset + cur.bucketOffs; - BucketList::const_iterator it = startBucket; - while (it != cur.bucket) { - offs += it->size(); - advance(it); - } - return offs; -} - -size_t Buffer::moveForward(CursorId cursor, size_t relativeOffs) -{ - size_t offs = relativeOffs; - Cursor &cur = cursors[cursor]; - while (offs > 0) { - // Fetch the current bucket of the cursor - Bucket &bucket = *(cur.bucket); - - // If there is enough space in the bucket, simply increment the bucket - // offset by the given relative offset - const size_t space = bucket.size() - cur.bucketOffs; - if (space >= offs) { - cur.bucketOffs += offs; - break; - } else { - // Go to the end of the current bucket otherwise - offs -= space; - cur.bucketOffs = bucket.size(); - - // Go to the next bucket - if (cur.bucket != endBucket) { - // Go to the next bucket - advance(cur.bucket); - cur.bucketIdx++; - cur.bucketOffs = 0; - } else { - // Abort, if there is no more data to stream, otherwise just - // load new data - if (reachedEnd) { - return relativeOffs - offs; - } - stream(); - } - } - } - return relativeOffs; -} - -size_t Buffer::moveBackward(CursorId cursor, size_t relativeOffs) -{ - size_t offs = relativeOffs; - Cursor &cur = cursors[cursor]; - while (offs > 0) { - // If there is enough space in the bucket, simply decrement the bucket - // offset by the given relative offset - if (cur.bucketOffs >= offs) { - cur.bucketOffs -= offs; - break; - } else { - // Go to the beginning of the current bucket otherwise - offs -= cur.bucketOffs; - cur.bucketOffs = 0; - - // Abort if there is no more bucket to got back to - if (cur.bucketIdx == 0) { - return relativeOffs - offs; - } - - // Go to the previous bucket (wrap around at the beginning of the - // list) - if (cur.bucket == buckets.begin()) { - cur.bucket = buckets.end(); - } - cur.bucket--; - - // Decrement the bucket index, and set the current offset to the - // end of the new bucket - cur.bucketIdx--; - cur.bucketOffs = cur.bucket->size(); - } - } - return relativeOffs; -} - -ssize_t Buffer::moveCursor(CursorId cursor, ssize_t relativeOffs) -{ - if (relativeOffs > 0) { - return moveForward(cursor, relativeOffs); - } else if (relativeOffs < 0) { - return -moveBackward(cursor, -relativeOffs); - } else { - return 0; - } -} - -bool Buffer::atEnd(Buffer::CursorId cursor) const -{ - const Cursor &c = cursors[cursor]; - return reachedEnd && - (c.bucket == endBucket && c.bucketOffs == endBucket->size()); -} - -bool Buffer::fetchCharacter(CursorId cursor, char &c, bool incr) -{ - Cursor &cur = cursors[cursor]; - while (true) { - // Reference at the current bucket - Bucket &bucket = *(cur.bucket); - - // If there is still data in the current bucket, return this data - if (cur.bucketOffs < bucket.size()) { - c = bucket[cur.bucketOffs]; - if (incr) { - cur.bucketOffs++; - } - return true; - } else if (cur.bucket == endBucket) { - // Return false if the end of the stream has been reached, otherwise - // load new data - if (reachedEnd) { - return false; - } - stream(); - } - - // Go to the next bucket - cur.bucketIdx++; - cur.bucketOffs = 0; - advance(cur.bucket); - } -} - -bool Buffer::read(Buffer::CursorId cursor, char &c) -{ - return fetchCharacter(cursor, c, true); -} - -bool Buffer::fetch(CursorId cursor, char &c) -{ - return fetchCharacter(cursor, c, false); -} - -/* CharReader::Cursor class */ - -void CharReader::Cursor::assign(std::shared_ptr buffer, - CharReader::Cursor &cursor) -{ - // Copy the cursor position - buffer->copyCursor(cursor.cursor, this->cursor); - - // Copy the state - line = cursor.line; - column = cursor.column; -} - -/* CharReader class */ - -CharReader::CharReader(std::shared_ptr buffer, size_t line, - size_t column) - : buffer(buffer), - readCursor(buffer->createCursor(), line, column), - peekCursor(buffer->createCursor(), line, column), - coherent(true) -{ -} - -CharReader::CharReader(const std::string &str, size_t line, size_t column) - : CharReader(std::shared_ptr{new Buffer{str}}, line, column) -{ -} - -CharReader::CharReader(std::istream &istream, size_t line, size_t column) - : CharReader(std::shared_ptr{new Buffer{istream}}, line, column) -{ -} - -CharReader::~CharReader() -{ - buffer->deleteCursor(readCursor.cursor); - buffer->deleteCursor(peekCursor.cursor); -} - -bool CharReader::readAtCursor(Cursor &cursor, char &c) -{ - // Return false if we're at the end of the stream - if (!buffer->read(cursor.cursor, c)) { - return false; - } - - // Substitute linebreak sequences with a single '\n' - if (c == '\n' || c == '\r') { - // Output a single \n - c = '\n'; - - // Check whether the next character is a continuation of the - // current character - char c2; - if (buffer->read(cursor.cursor, c2)) { - if ((c2 != '\n' && c2 != '\r') || c2 == c) { - buffer->moveCursor(cursor.cursor, -1); - } - } - } - - // Count lines and columns - if (c == '\n') { - // A linebreak was reached, go to the next line - cursor.line++; - cursor.column = 1; - } else { - // Ignore UTF-8 continuation bytes - if (!((c & 0x80) && !(c & 0x40))) { - cursor.column++; - } - } - return true; -} - -bool CharReader::peek(char &c) -{ - // If the reader was coherent, update the peek cursor state - if (coherent) { - peekCursor.assign(buffer, readCursor); - coherent = false; - } - - // Read a character from the peek cursor - return readAtCursor(peekCursor, c); -} - -bool CharReader::read(char &c) -{ - // Read a character from the buffer at the current read cursor - bool res = readAtCursor(readCursor, c); - - // Set the peek position to the current read position, if reading was not - // coherent - if (!coherent) { - peekCursor.assign(buffer, readCursor); - coherent = true; - } else { - buffer->copyCursor(readCursor.cursor, peekCursor.cursor); - } - - // Return the result of the read function - return res; -} - -void CharReader::resetPeek() -{ - if (!coherent) { - peekCursor.assign(buffer, readCursor); - coherent = true; - } -} - -void CharReader::consumePeek() -{ - if (!coherent) { - readCursor.assign(buffer, peekCursor); - coherent = true; - } -} - -bool CharReader::consumeWhitespace() -{ - char c; - while (peek(c)) { - if (!Utils::isWhitespace(c)) { - resetPeek(); - return true; - } - consumePeek(); - } - return false; -} - -CharReaderFork CharReader::fork() -{ - return CharReaderFork(buffer, readCursor, peekCursor, coherent); -} - -CharReader::Context CharReader::getContext(ssize_t maxSize) -{ - // Clone the current read cursor - Buffer::CursorId cur = buffer->createCursor(readCursor.cursor); - - // Fetch the start position of the search - ssize_t offs = buffer->offset(cur); - ssize_t start = offs; - ssize_t end = offs; - char c; - - // Search the beginning of the line with the last non-whitespace character - bool hadNonWhitespace = false; - bool foundBegin = false; - for (ssize_t i = 0; i < maxSize; i++) { - // Fetch the character at the current position - if (buffer->fetch(cur, c)) { - // Abort, at linebreaks if we found a non-linebreak character - hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c); - if (hadNonWhitespace && (c == '\n' || c == '\r')) { - buffer->moveCursor(cur, 1); - start++; - foundBegin = true; - break; - } - } - if (buffer->moveCursor(cur, -1) == 0) { - foundBegin = true; - break; - } else { - // Update the start position and the hadNonWhitespace flag - start--; - } - } - - // Search the end of the line - buffer->moveCursor(cur, offs - start); - bool foundEnd = false; - for (ssize_t i = 0; i < maxSize; i++) { - // Increment the end counter if a character was read, abort if the end - // of the stream has been reached - if (buffer->read(cur, c)) { - end++; - } else { - foundEnd = true; - break; - } - - // Abort on linebreak characters - if (c == '\n' || c == '\r') { - foundEnd = true; - break; - } - } - - // Calculate the truncated start and end position and limit the number of - // characters to the maximum number of characters - ssize_t tStart = start; - ssize_t tEnd = end; - if (tEnd - tStart > maxSize) { - tStart = std::max(offs - maxSize / 2, tStart); - tEnd = tStart + maxSize; - } - - // Try to go to the calculated start position and fetch the actual start - // position - ssize_t aStart = end + buffer->moveCursor(cur, tStart - end); - if (aStart > tStart) { - tEnd = tEnd + (aStart - tStart); - tStart = aStart; - } - - // Read one line - std::stringstream ss; - size_t relPos = 0; - for (ssize_t i = tStart; i < tEnd; i++) { - if (buffer->read(cur, c)) { - // Break once a linebreak is reached - if (c == '\n' || c == '\r') { - break; - } - - // Add the current character to the output - ss << c; - - // Increment the string-relative offset as long as the original - // offset is not reached in the for loop - if (i < offs) { - relPos++; - } - } - } - - // Delete the newly created cursor - buffer->deleteCursor(cur); - - return CharReader::Context{ss.str(), relPos, !foundBegin || tStart != start, - !foundEnd || tEnd != end}; -} - -/* Class CharReaderFork */ - -CharReaderFork::CharReaderFork(std::shared_ptr buffer, - CharReader::Cursor &parentReadCursor, - CharReader::Cursor &parentPeekCursor, - bool coherent) - : CharReader(buffer, 1, 1), - parentReadCursor(parentReadCursor), - parentPeekCursor(parentPeekCursor) -{ - readCursor.assign(buffer, parentReadCursor); - peekCursor.assign(buffer, parentPeekCursor); - this->coherent = coherent; -} - -void CharReaderFork::commit() -{ - parentReadCursor.assign(buffer, readCursor); - parentPeekCursor.assign(buffer, peekCursor); -} -} -} - diff --git a/src/core/utils/CharReader.hpp b/src/core/utils/CharReader.hpp deleted file mode 100644 index 1306026..0000000 --- a/src/core/utils/CharReader.hpp +++ /dev/null @@ -1,672 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file CharReader.hpp - * - * Used within all parsers to read single characters from an underlying stream. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_CHAR_READER_HPP_ -#define _OUSIA_CHAR_READER_HPP_ - -#include -#include -#include -#include - -namespace ousia { -namespace utils { - -/** - * A chunked ring buffer used in CharReader to provide access to an input stream - * with multiple read cursors. The Buffer automatically expands to the - * size of the spanned by the read cursors while reusing already allocated - * memory. - */ -class Buffer { -public: - /** - * Callback function which is called whenever new data is requested from the - * input stream. - * - * @param buf is points a the target memory region. - * @param size is the requested number of bytes. - * @param userData is a pointer at some user defined data given in the - * constructor. - * @return the actual number of bytes read. If the result is smaller than - * the requested size, this tells the Buffer that the end of the input - * stream is reached. - */ - using ReadCallback = size_t (*)(char *buf, size_t size, void *userData); - - /** - * Handle used to identify a cursor. - */ - using CursorId = size_t; - -private: - /** - * Number of bytes to request from the input stream. Set to 64 KiB because - * this seems to be a nice value for I/O operations according to multiple - * sources. - */ - static constexpr size_t REQUEST_SIZE = 64 * 1024; - - /** - * Number of bytes the buffer guarantees to be capable of looking back - * for extracting the current context. - */ - static constexpr size_t LOOKBACK_SIZE = 128; - - /** - * Type used internally to represent one chunk of memory. - */ - using Bucket = std::vector; - - /** - * Type used internally to represent a bucket container. - */ - using BucketList = std::list; - - /** - * Type used internally for representing iterators in the bucket list. - */ - using BucketIterator = BucketList::iterator; - - /** - * Type used internally to represent a read cursor. - */ - struct Cursor { - /** - * Iterator pointing at the current bucket. - */ - BucketIterator bucket; - - /** - * Index of the bucket relative to the start bucket. - */ - size_t bucketIdx; - - /** - * Current offset within that bucket. - */ - size_t bucketOffs; - }; - - /** - * List of buckets containing the buffered memory. - */ - BucketList buckets; - - /** - * List of cursors used to access the memory. Note that cursors can be - * marked as inactive and reused lateron (to avoid having to resize the - * vector). - */ - std::vector cursors; - - /** - * Bitfield specifying which of the cursors is actually valid. - */ - std::vector alive; - - /** - * Function to be called whenever new data is needed. Set to nullptr if the - * Buffer is not backed by an input stream. - */ - const ReadCallback callback; - - /** - * User data given in the constructor. - */ - void *userData; - - /** - * Set to true if the input stream is at its end. - */ - bool reachedEnd; - - /** - * Iterator pointing at the current start bucket. - */ - BucketIterator startBucket; - - /** - * Iterator pointing at the last bucket. - */ - BucketIterator endBucket; - - /** - * Byte offset of the start bucket relative to the beginning of the stream. - */ - size_t startOffset; - - /** - * Points at the smallest possible available cursor index, yet does not - * guarantee that this cursor index actuall is free. - */ - CursorId firstDead; - - /** - * Advances the bucket iterator, cares about wrapping around in the ring. - */ - void advance(BucketIterator &it); - - /** - * Advances the bucket iterator, cares about wrapping around in the ring. - */ - void advance(BucketList::const_iterator &it) const; - - /** - * Internally used to find the next free cursor in the cursors vector. The - * cursor is marked as active. - * - * @return the next free cursor index. - */ - CursorId nextCursor(); - - /** - * Returns a reference at the next bucket into which data should be - * inserted. - * - * @return a bucket into which the data can be inserted. - */ - Bucket &nextBucket(); - - /** - * Reads data from the input stream and places it in the next free buffer. - */ - void stream(); - - /** - * Moves the given cursor forward. - */ - size_t moveForward(CursorId cursor, size_t relativeOffs); - - /** - * Moves the given cursor backward. - */ - size_t moveBackward(CursorId cursor, size_t relativeOffs); - - /** - * Reads a character from the current cursor position and optionally - * advances. - */ - bool fetchCharacter(CursorId cursor, char &c, bool incr); - -public: - /** - * Intializes the Buffer with a reference to a ReadCallback that is used - * to fetch data from an underlying input stream. - * - * @param callback is the function that will be called whenever data is read - * from the ring buffer and the buffer does not hold enough data to fulfill - * this read request. - * @param userData is a pointer to user defined data which will be passed to - * the callback function. - */ - Buffer(ReadCallback callback, void *userData); - - /** - * Initializes the Buffer with a reference to an std::istream from which - * data will be read. - * - * @param istream is the input stream from which the data should be read. - */ - Buffer(std::istream &istream); - - /** - * Initializes the Buffer with the contents of the given string, after - * this operation the Buffer has a fixed size. - * - * @param str is the string containing the data that should be copied into - * the ring buffer. - */ - Buffer(const std::string &str); - -#ifndef NDEBUG - /** - * Destructor of the Buffer class. Makes sure that all cursors have been - * freed. - */ - ~Buffer(); -#endif - - // No copy - Buffer(const Buffer &) = delete; - - // No assign - Buffer &operator=(const Buffer &) = delete; - - /** - * Creates a new read cursor positioned at the smallest possible position - * in the ring buffer. - */ - CursorId createCursor(); - - /** - * Creates a new read cursor positioned at the same position as the given - * read cursor. - * - * @param ref is the read cursor that should be used as reference for the - * new read cursor. - */ - CursorId createCursor(CursorId ref); - - /** - * Copies the position of one cursor to another cursor. - * - * @param from is the cursor id of which the position should be copied. - * @param to is the cursor id to which the position should be copied. - */ - void copyCursor(CursorId from, CursorId to); - - /** - * Deletes the cursor with the given id. The cursor may no longer be used - * after this function has been called. - * - * @param cursor is the id of the cursor that should be freed. - */ - void deleteCursor(CursorId cursor); - - /** - * Moves a cursor by offs bytes. Note that moving backwards is theoretically - * limited by the LOOKBACK_SIZE of the Buffer, practically it will most - * likely be limited by the REQUEST_SIZE, so you can got at most 64 KiB - * backwards. - * - * @param cursor is the cursor that should be moved. - * @param relativeOffs is a positive or negative integer number specifying - * the number of bytes the cursor should be moved forward (positive numbers) - * or backwards (negative numbers). - * @return the actual number of bytes the cursor was moved. This number is - * smaller than the relativeOffs given in the constructor if the - */ - ssize_t moveCursor(CursorId cursor, ssize_t relativeOffs); - - /** - * Returns the current byte offset of the given cursor relative to the - * beginning of the stream. - * - * @param cursor is the cursor for which the byte offset relative to the - * beginning of the stream should be returned. - * @return the number of bytes since the beginning of the stream for the - * given cursor. - */ - size_t offset(CursorId cursor) const; - - /** - * Returns true if the given cursor currently is at the end of the stream. - * - * @param cursor is the cursor for which the atEnd flag should be returned. - * @return true if the there are no more bytes for this cursor. If false - * is returned, this means that there may be more bytes in the stream, - * nevertheless the end of the stream may be hit once the next read function - * is called. - */ - bool atEnd(CursorId cursor) const; - - /** - * Reads a single character from the ring buffer from the given cursor and - * moves to the next character. - * - * @param cursor specifies the cursor from which the data should be read. - * The cursor will be advanced by one byte. - * @param c is the character into which the data needs to be read. - * @return true if a character was read, false if the end of the stream has - * been reached. - */ - bool read(CursorId cursor, char &c); - - /** - * Returns a single character from the ring buffer from the current cursor - * position and stays at that position. - * - * @param cursor specifies the cursor from which the data should be read. - * The cursor will be advanced by one byte. - * @param c is the character into which the data needs to be read. - * @return true if a character could be fetched, false if the end of the - * stream has been reached. - */ - bool fetch(CursorId cursor, char &c); -}; - -// Forward declaration -class CharReaderFork; - -/** - * Used within parsers for convenient access to single characters in an input - * stream or buffer. It allows reading and peeking single characters from a - * buffer. Additionally it counts the current column/row (with correct handling - * for UTF-8) and contains an internal state machine that handles the detection - * of linebreaks and converts these to a single '\n'. - */ -class CharReader { -public: - /** - * The context struct is used to represent the current context the char - * reader is in. This context can for example be used when building error - * messages. - */ - struct Context { - /** - * Set to the content of the current line. - */ - std::string line; - - /** - * Relative position (in characters) within that line. - */ - size_t relPos; - - /** - * Set to true if the beginning of the line has been truncated (because - * the reader position is too far away from the actual position of the - * line). - */ - bool truncatedStart; - - /** - * Set to true if the end of the line has been truncated (because the - * reader position is too far away from the actual end position of the - * line. - */ - bool truncatedEnd; - - Context() - : line(), relPos(0), truncatedStart(false), truncatedEnd(false) - { - } - - Context(std::string line, size_t relPos, bool truncatedStart, - bool truncatedEnd) - : line(std::move(line)), - relPos(relPos), - truncatedStart(truncatedStart), - truncatedEnd(truncatedEnd) - { - } - }; - -protected: - /** - * Internally used cursor structure for managing the read and the peek - * cursor. - */ - struct Cursor { - /** - * Corresponding cursor in the underlying buffer instance. - */ - const Buffer::CursorId cursor; - - /** - * Current line the cursor is in. - */ - uint32_t line; - - /** - * Current column the cursor is in. - */ - uint32_t column; - - /** - * Constructor of the Cursor class. - * - * @param cursor is the underlying cursor in the Buffer instance. - */ - Cursor(Buffer::CursorId cursor, size_t line, size_t column) - : cursor(cursor), line(line), column(column) - { - } - - /** - * Assigns one cursor to another. - * - * @param buffer is the underlying buffer instance the internal cursor - * belongs to. - * @param cursor is the cursor from which the state should be copied. - */ - void assign(std::shared_ptr buffer, Cursor &cursor); - }; - -private: - /** - * Substitutes "\r", "\n\r", "\r\n" with a single "\n". - * - * @param cursor is the cursor from which the character should be read. - * @param c a reference to the character that should be written. - * @return true if another character needs to be read. - */ - bool substituteLinebreaks(Cursor &cursor, char &c); - - /** - * Reads a single character from the given cursor. - * - * @param cursor is the cursor from which the character should be read. - * @param c a reference to the character that should be written. - * @return true if a character was read, false if the end of the stream has - * been reached. - */ - bool readAtCursor(Cursor &cursor, char &c); - -protected: - /** - * Reference pointing at the underlying buffer. - */ - std::shared_ptr buffer; - - /** - * Cursor used for reading. - */ - Cursor readCursor; - - /** - * Cursor used for peeking. - */ - Cursor peekCursor; - - /** - * Set to true as long the underlying Buffer cursor is at the same position - * for the read and the peek cursor. This is only used for optimization - * purposes and makes consecutive reads a bit faster. - */ - bool coherent; - - /** - * Protected constructor of the CharReader base class. Creates new read - * and peek cursors for the given buffer. - * - * @param buffer is a reference to the underlying Buffer class responsible - * for allowing to read from a single input stream from multiple locations. - */ - CharReader(std::shared_ptr buffer, size_t line, size_t column); - -public: - /** - * Creates a new CharReader instance from a string. - * - * @param str is a string containing the input data. - * @param line is the start line. - * @param column is the start column. - */ - CharReader(const std::string &str, size_t line = 1, size_t column = 1); - - /** - * Creates a new CharReader instance for an input stream. - * - * @param istream is the input stream from which incomming data should be - * read. - * @param line is the start line. - * @param column is the start column. - */ - CharReader(std::istream &istream, size_t line = 1, size_t column = 1); - - /** - * Deletes the used cursors from the underlying buffer instance. - */ - ~CharReader(); - - // No copy - CharReader(const Buffer &) = delete; - - // No assign - CharReader &operator=(const Buffer &) = delete; - - /** - * Peeks a single character. If called multiple times, returns the - * character after the previously peeked character. - * - * @param c is a reference to the character to which the result should be - * written. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool peek(char &c); - - /** - * Reads a character from the input data. If "peek" was called - * beforehand resets the peek pointer. - * - * @param c is a reference to the character to which the result should be - * written. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool read(char &c); - - /** - * Resets the peek pointer to the "read" pointer. - */ - void resetPeek(); - - /** - * Advances the read pointer to the peek pointer -- so if the "peek" - * function was called, "read" will now return the character after - * the last peeked character. - */ - void consumePeek(); - - /** - * Moves the read cursor to the next non-whitespace character. Returns - * false, if the end of the stream was reached. - * - * @return false if the end of the stream was reached, false othrwise. - */ - bool consumeWhitespace(); - - /** - * Creates a new CharReader located at the same position as this CharReader - * instance, yet the new CharReader can be used independently of this - * CharReader. Use the "commit" function of the returned CharReader to - * copy the state of the forked CharReaderFork to this CharReader. - * - * @return a CharReaderFork instance positioned at the same location as this - * CharReader instance. - */ - CharReaderFork fork(); - - /** - * Returns true if there are no more characters as the stream was - * closed. - * - * @return true if there is no more data. - */ - bool atEnd() const { return buffer->atEnd(readCursor.cursor); } - - /** - * Returns the current line (starting with one). - * - * @return the current line number. - */ - uint32_t getLine() const { return readCursor.line; } - - /** - * Returns the current column (starting with one). - * - * @return the current column number. - */ - uint32_t getColumn() const { return readCursor.column; } - - /** - * Returns the current byte offset of the read cursor. - * - * @return the byte position within the stream. - */ - size_t getOffset() const { return buffer->offset(readCursor.cursor); }; - - /** - * Returns the line the read cursor currently is in, but at most the - * given number of characters in the form of a Context structure. - */ - Context getContext(ssize_t maxSize); -}; - -/** - * A CharReaderFork is returned whenever the "fork" function of the CharReader - * class is used. Its "commit" function can be used to move the underlying - * CharReader instance to the location of the CharReaderFork instance. Otherwise - * the read location of the underlying CharReader is left unchanged. - */ -class CharReaderFork : public CharReader { -private: - friend CharReader; - - /** - * The reader cursor of the underlying CharReader instance. - */ - CharReader::Cursor &parentReadCursor; - - /** - * The peek cursor of the underlying CharReader instance. - */ - CharReader::Cursor &parentPeekCursor; - - /** - * Constructor of the CharReaderFork class. - * - * @param buffer is a reference at the parent Buffer instance. - * @param parentPeekCursor is a reference at the parent read cursor. - * @param parentPeekCursor is a reference at the parent peek cursor. - * @param coherent specifies whether the char reader cursors are initialized - * coherently. - */ - CharReaderFork(std::shared_ptr buffer, - CharReader::Cursor &parentReadCursor, - CharReader::Cursor &parentPeekCursor, bool coherent); - -public: - /** - * Moves the read and peek cursor of the parent CharReader to the location - * of the read and peek cursor in the fork. - */ - void commit(); -}; -} - -/** - * Alias of the commonly used CharReader class. - */ -using CharReader = utils::CharReader; - -} - -#endif /* _OUSIA_CHAR_READER_HPP_ */ - diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp deleted file mode 100644 index 5c167cd..0000000 --- a/src/core/variant/Reader.cpp +++ /dev/null @@ -1,624 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include -#include - -#include - -#include "Reader.hpp" - -namespace ousia { -namespace variant { - -// TODO: Better error messages (like "Expected 'x' but got 'y'") -// TODO: Replace delims with single char delim where possible -// TODO: Use custom return value instead of std::pair -// TODO: Allow buffered char reader to "fork" -// TODO: Rename CharReader to shorter CharReader -// TODO: Implement context in CharReader (to allow error messages to extract the -// current line) - -/* Error Messages */ - -static const char *ERR_UNEXPECTED_CHAR = "Unexpected character"; -static const char *ERR_UNEXPECTED_END = "Unexpected literal end"; -static const char *ERR_UNTERMINATED = "Unterminated literal"; -static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence"; -static const char *ERR_INVALID_INTEGER = "Invalid integer value"; -static const char *ERR_TOO_LARGE = "Value too large to represent"; - -/* Class Number */ - -/** - * Class used internally to represent a number (integer or double). The number - * is represented by its components (base value a, nominator n, denominator d, - * exponent e, sign s and exponent sign sE). - */ -class Number { -private: - /** - * Reprsents the part of the number: Base value a, nominator n, exponent e. - */ - enum class Part { A, N, E }; - - /** - * State used in the parser state machine - */ - enum class State { - INIT, - HAS_MINUS, - LEADING_ZERO, - LEADING_POINT, - INT, - HEX, - POINT, - EXP_INIT, - EXP_HAS_MINUS, - EXP - }; - - /** - * Returns the numeric value of the given ASCII character (returns 0 for - * '0', 1 for '1', 10 for 'A' and so on). - * - * @param c is the character for which the numeric value should be returned. - * @return the numeric value the character represents. - */ - static int charValue(char c) - { - if (c >= '0' && c <= '9') { - return c & 0x0F; - } - if ((c >= 'A' && c <= 'O') || (c >= 'a' && c <= 'o')) { - return (c & 0x0F) + 9; - } - return -1; - } - - /** - * Appends the value of the character c to the internal number - * representation and reports any errors that might occur. - */ - bool appendChar(char c, int base, Part p, CharReader &reader, - Logger &logger) - { - // Check whether the given character is valid - int v = charValue(c); - if (v < 0 || v >= base) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - - // Append the number to the specified part - switch (p) { - case Part::A: - a = a * base + v; - break; - case Part::N: - n = n * base + v; - d = d * base; - break; - case Part::E: - e = e * base + v; - break; - } - - // Check for any overflows - if (a < 0 || n < 0 || d < 0 || e < 0) { - logger.errorAt(ERR_TOO_LARGE, reader); - return false; - } - return true; - } - -public: - /** - * Sign and exponent sign. - */ - int8_t s, sE; - - /** - * Exponent - */ - int16_t e; - - /** - * Base value, nominator, denominator - */ - int64_t a, n, d; - - /** - * Constructor of the number class. - */ - Number() : s(1), sE(1), e(0), a(0), n(0), d(1) {} - - /** - * Returns the represented double value. - */ - double doubleValue() - { - return s * (a + ((double)n / (double)d)) * pow(10.0, (double)(sE * e)); - } - - /** - * Returns the represented integer value. Only a lossless operation, if the - * number is an integer (as can be checked via the isInt method), otherwise - * the exponent and the fractional value will be truncated. - */ - int64_t intValue() { return s * a; } - - /** - * Returns true, if the number is an integer (has no fractional or - * exponential part). - */ - bool isInt() { return (n == 0) && (d == 1) && (e == 0); } - - /** - * Tries to parse the number from the given stream and loggs any errors to - * the given logger instance. Numbers are terminated by one of the given - * delimiters. - */ - bool parse(CharReader &reader, Logger &logger, - const std::unordered_set &delims) - { - State state = State::INIT; - char c; - - // Consume the first whitespace characters - reader.consumeWhitespace(); - - // Iterate over the FSM to extract numbers - while (reader.peek(c)) { - // Abort, once a delimiter or whitespace is reached - if (Utils::isWhitespace(c) || delims.count(c)) { - reader.resetPeek(); - break; - } - - // The character is not a whitespace character and not a delimiter - switch (state) { - case State::INIT: - case State::HAS_MINUS: - switch (c) { - case '-': - // Do not allow multiple minus signs - if (state == State::HAS_MINUS) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::HAS_MINUS; - s = -1; - break; - case '0': - // Remember a leading zero for the detection of "0x" - state = State::LEADING_ZERO; - break; - case '.': - // Remember a leading point as ".eXXX" is invalid - state = State::LEADING_POINT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::LEADING_ZERO: - if (c == 'x' || c == 'X') { - state = State::HEX; - break; - } - // fallthrough - case State::INT: - switch (c) { - case '.': - state = State::POINT; - break; - case 'e': - case 'E': - state = State::EXP_INIT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::HEX: - if (!appendChar(c, 16, Part::A, reader, logger)) { - return false; - } - break; - case State::LEADING_POINT: - case State::POINT: - switch (c) { - case 'e': - case 'E': - if (state == State::LEADING_POINT) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::EXP_INIT; - break; - default: - state = State::POINT; - if (!appendChar(c, 10, Part::N, reader, logger)) { - return false; - } - break; - } - break; - case State::EXP_HAS_MINUS: - case State::EXP_INIT: - if (c == '-') { - if (state == State::EXP_HAS_MINUS) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::EXP_HAS_MINUS; - sE = -1; - } else { - state = State::EXP; - if (!appendChar(c, 10, Part::E, reader, logger)) { - return false; - } - } - break; - case State::EXP: - if (!appendChar(c, 10, Part::E, reader, logger)) { - return false; - } - break; - } - reader.consumePeek(); - } - - // States in which ending is valid. Log an error in other states - if (state == State::LEADING_ZERO || state == State::HEX || - state == State::INT || state == State::POINT || - state == State::EXP) { - return true; - } - logger.errorAt(ERR_UNEXPECTED_END, reader); - return false; - } -}; - -/* Class Reader */ - -static const int STATE_INIT = 0; -static const int STATE_IN_STRING = 1; -static const int STATE_IN_ARRAY = 2; -static const int STATE_EXPECT_COMMA = 3; -static const int STATE_ESCAPE = 4; -static const int STATE_WHITESPACE = 5; -static const int STATE_RESYNC = 6; - -template -static std::pair error(CharReader &reader, Logger &logger, - const char *err, T res) -{ - logger.errorAt(err, reader); - return std::make_pair(false, std::move(res)); -} - -std::pair Reader::parseString( - CharReader &reader, Logger &logger, - const std::unordered_set *delims) -{ - // Initialize the internal state - int state = STATE_INIT; - char quote = 0; - std::stringstream res; - - // Consume all whitespace - reader.consumeWhitespace(); - - // Statemachine whic iterates over each character in the stream - // TODO: Combination of peeking and consumePeek is stupid as consumePeek is - // the default (read and putBack would obviously be better, yet the latter - // is not trivial to implement in the current CharReader). - char c; - while (reader.peek(c)) { - switch (state) { - case STATE_INIT: - if (c == '"' || c == '\'') { - quote = c; - state = STATE_IN_STRING; - break; - } else if (delims && delims->count(c)) { - return error(reader, logger, ERR_UNEXPECTED_END, res.str()); - } - return error(reader, logger, ERR_UNEXPECTED_CHAR, res.str()); - case STATE_IN_STRING: - if (c == quote) { - reader.consumePeek(); - return std::make_pair(true, res.str()); - } else if (c == '\\') { - state = STATE_ESCAPE; - reader.consumePeek(); - break; - } else if (c == '\n') { - return error(reader, logger, ERR_UNTERMINATED, res.str()); - } - res << c; - reader.consumePeek(); - break; - case STATE_ESCAPE: - // Handle all possible special escape characters - switch (c) { - case 'b': - res << '\b'; - break; - case 'f': - res << '\f'; - break; - case 'n': - res << '\n'; - break; - case 'r': - res << '\r'; - break; - case 't': - res << '\t'; - break; - case 'v': - res << '\v'; - break; - case '\'': - res << '\''; - break; - case '"': - res << '"'; - break; - case '\\': - res << '\\'; - break; - case '\n': - break; - case 'x': - // TODO: Parse Latin-1 sequence hex XX - break; - case 'u': - // TODO: Parse 16-Bit unicode character hex XXXX - break; - default: - if (Utils::isNumeric(c)) { - // TODO: Parse octal 000 sequence - } else { - logger.errorAt(ERR_INVALID_ESCAPE, reader); - } - break; - } - - // Switch back to the "normal" state - state = STATE_IN_STRING; - reader.consumePeek(); - break; - } - } - return error(reader, logger, ERR_UNEXPECTED_END, res.str()); -} - -std::pair Reader::parseArray( - CharReader &reader, Logger &logger, char delim) -{ - Variant::arrayType res; - bool hadError = false; - int state = delim ? STATE_IN_ARRAY : STATE_INIT; - delim = delim ? delim : ']'; - char c; - - // Consume all whitespace - reader.consumeWhitespace(); - - // Iterate over the characters, use the parseGeneric function to read the - // pairs - while (reader.peek(c)) { - // Generically handle the end of the array - if (state != STATE_INIT && c == delim) { - reader.consumePeek(); - return std::make_pair(!hadError, res); - } - - switch (state) { - case STATE_INIT: - if (c != '[') { - return error(reader, logger, ERR_UNEXPECTED_CHAR, res); - } - state = STATE_IN_ARRAY; - reader.consumePeek(); - break; - case STATE_IN_ARRAY: { - // Try to read an element using the parseGeneric function - reader.resetPeek(); - auto elem = parseGeneric(reader, logger, {',', delim}); - res.push_back(elem.second); - - // If the reader had no error, expect an comma, otherwise skip - // to the next comma in the stream - if (elem.first) { - state = STATE_EXPECT_COMMA; - } else { - state = STATE_RESYNC; - hadError = true; - } - break; - } - case STATE_EXPECT_COMMA: - // Skip whitespace - if (c == ',') { - state = STATE_IN_ARRAY; - } else if (!Utils::isWhitespace(c)) { - hadError = true; - state = STATE_RESYNC; - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - } - reader.consumePeek(); - break; - case STATE_RESYNC: - // Just wait for another comma to arrive - if (c == ',') { - state = STATE_IN_ARRAY; - } - reader.consumePeek(); - break; - } - } - return error(reader, logger, ERR_UNEXPECTED_END, res); -} - -std::pair Reader::parseUnescapedString( - CharReader &reader, Logger &logger, - const std::unordered_set &delims) -{ - std::stringstream res; - std::stringstream buf; - char c; - - // Consume all whitespace - reader.consumeWhitespace(); - - // Copy all characters, skip whitespace at the end - int state = STATE_IN_STRING; - while (reader.peek(c)) { - if (delims.count(c)) { - reader.resetPeek(); - return std::make_pair(true, res.str()); - } else if (Utils::isWhitespace(c)) { - // Do not add whitespace to the output buffer - state = STATE_WHITESPACE; - buf << c; - } else { - // If we just hat a sequence of whitespace, append it to the output - // buffer and continue - if (state == STATE_WHITESPACE) { - res << buf.str(); - buf.str(std::string{}); - buf.clear(); - state = STATE_IN_STRING; - } - res << c; - } - reader.consumePeek(); - } - return std::make_pair(true, res.str()); -} - -std::pair Reader::parseInteger( - CharReader &reader, Logger &logger, - const std::unordered_set &delims) -{ - Number n; - if (n.parse(reader, logger, delims)) { - // Only succeed if the parsed number is an integer, otherwise this is an - // error - if (n.isInt()) { - return std::make_pair(true, n.intValue()); - } else { - return error(reader, logger, ERR_INVALID_INTEGER, n.intValue()); - } - } - return std::make_pair(false, n.intValue()); -} - -std::pair Reader::parseDouble( - CharReader &reader, Logger &logger, - const std::unordered_set &delims) -{ - Number n; - bool res = n.parse(reader, logger, delims); - return std::make_pair(res, n.doubleValue()); -} - -std::pair Reader::parseGeneric( - CharReader &reader, Logger &logger, - const std::unordered_set &delims) -{ - char c; - - // Skip all whitespace characters - reader.consumeWhitespace(); - while (reader.peek(c)) { - // Stop if a delimiter is reached - if (delims.count(c)) { - return error(reader, logger, ERR_UNEXPECTED_END, nullptr); - } - - // Parse a string if a quote is reached - if (c == '"' || c == '\'') { - auto res = parseString(reader, logger); - return std::make_pair(res.first, res.second.c_str()); - } - - if (c == '[') { - // TODO: Parse struct descriptor - } - - // Try to parse everything that looks like a number as number - if (Utils::isNumeric(c) || c == '-') { - Number n; - - // Fork the reader - utils::CharReaderFork fork = reader.fork(); - - // TODO: Fork logger - - // Try to parse the number - if (n.parse(fork, logger, delims)) { - // Parsing was successful, advance the reader - fork.commit(); - if (n.isInt()) { - return std::make_pair( - true, - Variant{static_cast(n.intValue())}); - } else { - return std::make_pair(true, n.doubleValue()); - } - } - } - - // Parse an unescaped string in any other case - auto res = parseUnescapedString(reader, logger, delims); - - // Handling for special primitive values - if (res.first) { - if (res.second == "true") { - return std::make_pair(true, Variant{true}); - } - if (res.second == "false") { - return std::make_pair(true, Variant{false}); - } - if (res.second == "null") { - return std::make_pair(true, Variant{nullptr}); - } - } - return std::make_pair(res.first, res.second.c_str()); - } - return error(reader, logger, ERR_UNEXPECTED_END, nullptr); -} -} -} - diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp deleted file mode 100644 index 4114d46..0000000 --- a/src/core/variant/Reader.hpp +++ /dev/null @@ -1,169 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file Reader.hpp - * - * Provides parsers for various micro formats. These formats include integers, - * doubles, strings, JSON and the Ousía struct notation. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_VARIANT_READER_HPP_ -#define _OUSIA_VARIANT_READER_HPP_ - -#include -#include -#include - -#include -#include - -#include "Variant.hpp" - -namespace ousia { -namespace variant { - -class Reader { -private: - /** - * Parses a string which may either be enclosed by " or ', unescapes - * entities in the string as specified for JavaScript. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting - * character. - * @param logger is the logger instance that should be used to log error - * messages and warnings. - * @param delims is an optional set of delimiters after which parsing has to - * be stopped (the delimiters may occur inside the actual string, but not - * outside). If nullptr is given, no delimiter is used and a complete string - * is read. - */ - static std::pair parseString( - CharReader &reader, Logger &logger, - const std::unordered_set *delims); - -public: - /** - * Parses a string which may either be enclosed by " or ', unescapes - * entities in the string as specified for JavaScript. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting - * character. - * @param logger is the logger instance that should be used to log error - * messages and warnings. - * @param delims is a set of delimiters after which parsing has to - * be stopped (the delimiters may occur inside the actual string, but not - * outside). - */ - static std::pair parseString( - CharReader &reader, Logger &logger, - const std::unordered_set &delims) - { - return parseString(reader, logger, &delims); - } - - /** - * Parses a string which may either be enclosed by " or ', unescapes - * entities in the string as specified for JavaScript. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting - * character. - * @param logger is the logger instance that should be used to log error - * messages and warnings. - */ - static std::pair parseString(CharReader &reader, - Logger &logger) - { - return parseString(reader, logger, nullptr); - } - - /** - * Extracts an unescaped string from the given buffered char reader - * instance. This function just reads text until one of the given delimiter - * characters is reached. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned at the - * terminating delimiting character. - * @param delims is a set of characters which will terminate the string. - * These characters are not included in the result. May not be nullptr. - */ - static std::pair parseUnescapedString( - CharReader &reader, Logger &logger, - const std::unordered_set &delims); - - /** - * Parses an integer from the given buffered char reader instance until one - * of the given delimiter characters is reached. - * - * @param reader is a reference to the CharReader instance from - * which the character data should been reader. The reader will be - * positioned at the terminating delimiting character or directly after the - * integer. - */ - static std::pair parseInteger( - CharReader &reader, Logger &logger, - const std::unordered_set &delims); - - /** - * Parses an double from the given buffered char reader instance until one - * of the given delimiter characters is reached. - * - * @param reader is a reference to the CharReader instance from - * which the character data should been reader. The reader will be - * positioned at the terminating delimiting character or directly after the - * integer. - */ - static std::pair parseDouble( - CharReader &reader, Logger &logger, - const std::unordered_set &delims); - - /** - * Parses an array of values. - */ - static std::pair parseArray( - CharReader &reader, Logger &logger, char delim = 0); - - /** - * Tries to parse the most specific item from the given stream until one of - * the given delimiters is reached or a meaningful literal has been read. - * The resulting variant represents the value that has been read. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned at the - * terminating delimiting character. - * @param delims is a set of characters which will terminate the string. - * These characters are not included in the result. May not be nullptr. - */ - static std::pair parseGeneric( - CharReader &reader, Logger &logger, - const std::unordered_set &delims); -}; -} -} - -#endif /* _OUSIA_VARIANT_READER_HPP_ */ - diff --git a/src/core/variant/Variant.cpp b/src/core/variant/Variant.cpp deleted file mode 100644 index d33cd4f..0000000 --- a/src/core/variant/Variant.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include - -#include "Variant.hpp" - -namespace ousia { - -/* Class Variant::TypeException */ - -Variant::TypeException::TypeException(Type actualType, Type requestedType) - : OusiaException(std::string("Variant: Requested \"") + - Variant::getTypeName(requestedType) + - std::string("\" but is \"") + - Variant::getTypeName(actualType) + std::string("\"")), - actualType(actualType), - requestedType(requestedType) -{ -} - -/* Class Variant */ - -const char *Variant::getTypeName(Type type) -{ - switch (type) { - case Type::NULLPTR: - return "null"; - case Type::BOOL: - return "boolean"; - case Type::INT: - return "integer"; - case Type::DOUBLE: - return "double"; - case Type::STRING: - return "string"; - case Type::ARRAY: - return "array"; - case Type::MAP: - return "map"; - } - return "unknown"; -} - -Variant::boolType Variant::toBool() const -{ - switch (getType()) { - case Type::NULLPTR: - return false; - case Type::BOOL: - return asBool(); - case Type::INT: - return asInt() != 0; - case Type::DOUBLE: - return asDouble() != 0.0; - case Type::STRING: - return true; - case Type::ARRAY: - return true; - case Type::MAP: - return true; - } - return false; -} - -Variant::intType Variant::toInt() const -{ - switch (getType()) { - case Type::NULLPTR: - return 0; - case Type::BOOL: - return asBool() ? 1 : 0; - case Type::INT: - return asInt(); - case Type::DOUBLE: - return asDouble(); - case Type::STRING: - return 0; // TODO: Parse string as int - case Type::ARRAY: { - const arrayType &a = asArray(); - return (a.size() == 1) ? a[0].toInt() : 0; - } - case Type::MAP: - return 0; - } - return false; -} - -Variant::doubleType Variant::toDouble() const -{ - switch (getType()) { - case Type::NULLPTR: - return 0.0; - case Type::BOOL: - return asBool() ? 1.0 : 0.0; - case Type::INT: - return asInt(); - case Type::DOUBLE: - return asDouble(); - case Type::STRING: - return 0.0; // TODO: Parse string as double - case Type::ARRAY: { - const arrayType &a = asArray(); - return (a.size() == 1) ? a[0].toDouble() : 0; - } - case Type::MAP: - return 0; - } - return false; -} - -Variant::stringType Variant::toString(bool escape) const -{ - switch (getType()) { - case Type::NULLPTR: - return "null"; - case Type::BOOL: - return asBool() ? "true" : "false"; - case Type::INT: - return std::to_string(asInt()); - case Type::DOUBLE: - return std::to_string(asDouble()); - case Type::STRING: { - // TODO: Use proper serialization function - std::stringstream ss; - ss << "\"" << asString() << "\""; - return ss.str(); - } - case Type::ARRAY: - return Utils::join(asArray(), ", ", "[", "]"); - case Type::MAP: - return Utils::join(asMap(), ", ", "{", "}"); - } - return ""; -} - -} - diff --git a/src/core/variant/Variant.hpp b/src/core/variant/Variant.hpp deleted file mode 100644 index 1e62644..0000000 --- a/src/core/variant/Variant.hpp +++ /dev/null @@ -1,766 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file Variant.hpp - * - * The Variant class is used to efficiently represent a variables of varying - * type. Variant instances are used to represent data given by the end user and - * to exchange information between the host application and the script clients. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_VARIANT_HPP_ -#define _OUSIA_VARIANT_HPP_ - -#include -#include -#include -#include -#include - -// TODO: Use -// http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html -// later (will allow to use 8 bytes for a variant) - -#include - -namespace ousia { -namespace variant { - -/** - * Instances of the Variant class represent any kind of data that is exchanged - * between the host application and the script engine. Variants are immutable. - */ -class Variant { -public: - /** - * Enum containing the possible types a variant may have. - */ - enum class Type : int16_t { - NULLPTR, - BOOL, - INT, - DOUBLE, - STRING, - ARRAY, - MAP - }; - - /** - * Exception thrown whenever a variant is accessed via a getter function - * that is not supported for the current variant type. - */ - class TypeException : public OusiaException { - private: - /** - * Internally used string holding the exception message. - */ - const std::string msg; - - public: - /** - * Contains the actual type of the variant. - */ - const Type actualType; - - /** - * Contains the requested type of the variant. - */ - const Type requestedType; - - /** - * Constructor of the TypeException. - * - * @param actualType describes the actual type of the variant. - * @param requestedType describes the type in which the variant was - * requested. - */ - TypeException(Type actualType, Type requestedType); - }; - - using boolType = bool; - using intType = int32_t; - using doubleType = double; - using stringType = std::string; - using arrayType = std::vector; - using mapType = std::map; - -private: - /** - * Used to store the actual type of the variant. - */ - Type type = Type::NULLPTR; - - /** - * Anonymous union containing the possible value of the variant. - */ - union { - /** - * The boolean value. Only valid if type is Type::BOOL. - */ - boolType boolVal; - /** - * The integer value. Only valid if type is Type::INT. - */ - intType intVal; - /** - * The number value. Only valid if type is Type::DOUBLE. - */ - doubleType doubleVal; - /** - * Pointer to the more complex data structures on the free store. Only - * valid if type is one of Type::STRING, Type::ARRAY, - * Type::MAP. - */ - void *ptrVal; - }; - - /** - * Internally used to convert the current pointer value to a reference of - * the specified type. - */ - template - T &asObj(Type requestedType) const - { - const Type actualType = getType(); - if (actualType == requestedType) { - return *(static_cast(ptrVal)); - } - throw TypeException{actualType, requestedType}; - } - - /** - * Used internally to assign the value of another Variant instance to this - * instance. - * - * @param v is the Variant instance that should be copied to this instance. - */ - void copy(const Variant &v) - { - destroy(); - type = v.type; - switch (type) { - case Type::NULLPTR: - break; - case Type::BOOL: - boolVal = v.boolVal; - break; - case Type::INT: - intVal = v.intVal; - break; - case Type::DOUBLE: - doubleVal = v.doubleVal; - break; - case Type::STRING: - ptrVal = new stringType(v.asString()); - break; - case Type::ARRAY: - ptrVal = new arrayType(v.asArray()); - break; - case Type::MAP: - ptrVal = new mapType(v.asMap()); - break; - } - } - - /** - * Used internally to move the value of another Variant instance to this - * instance. - * - * @param v is the Variant instance that should be copied to this instance. - */ - void move(Variant &&v) - { - destroy(); - type = v.type; - switch (type) { - case Type::NULLPTR: - break; - case Type::BOOL: - boolVal = v.boolVal; - break; - case Type::INT: - intVal = v.intVal; - break; - case Type::DOUBLE: - doubleVal = v.doubleVal; - break; - case Type::STRING: - case Type::ARRAY: - case Type::MAP: - ptrVal = v.ptrVal; - v.ptrVal = nullptr; - break; - } - v.type = Type::NULLPTR; - } - - /** - * Used internally to destroy any value that was allocated on the heap. - */ - void destroy() - { - if (ptrVal) { - switch (type) { - case Type::STRING: - delete static_cast(ptrVal); - break; - case Type::ARRAY: - delete static_cast(ptrVal); - break; - case Type::MAP: - delete static_cast(ptrVal); - break; - default: - break; - } - } - } - -public: - /** - * Copy constructor of the Variant class. - * - * @param v is the Variant instance that should be cloned. - */ - Variant(const Variant &v) : ptrVal(nullptr) { copy(v); } - - /** - * Move constructor of the Variant class. - * - * @param v is the reference to the Variant instance that should be moved, - * this instance is invalidated afterwards. - */ - Variant(Variant &&v) : ptrVal(nullptr) { move(std::move(v)); } - - /** - * Default constructor. Type is set to Type:null. - */ - Variant() : ptrVal(nullptr) { setNull(); } - - /** - * Default destructor, frees any memory that was allocated on the heap. - */ - ~Variant() { destroy(); } - - /** - * Constructor for null values. Initializes the variant as null value. - */ - Variant(std::nullptr_t) : ptrVal(nullptr) { setNull(); } - - /** - * Constructor for boolean values. - * - * @param b boolean value. - */ - Variant(boolType b) : ptrVal(nullptr) { setBool(b); } - - /** - * Constructor for integer values. - * - * @param i integer value. - */ - Variant(intType i) : ptrVal(nullptr) { setInt(i); } - - /** - * Constructor for double values. - * - * @param d double value. - */ - Variant(doubleType d) : ptrVal(nullptr) { setDouble(d); } - - /** - * Constructor for string values. The given string is copied and managed by - * the new Variant instance. - * - * @param s is a reference to a C-Style string used as string value. - */ - Variant(const char *s) : ptrVal(nullptr) { setString(s); } - - /** - * Constructor for array values. The given array is copied and managed by - * the new Variant instance. - * - * @param a is a reference to the array - */ - Variant(arrayType a) : ptrVal(nullptr) { setArray(std::move(a)); } - - /** - * Constructor for map values. The given map is copied and managed by the - * new Variant instance. - * - * @param m is a reference to the map. - */ - Variant(mapType m) : ptrVal(nullptr) { setMap(std::move(m)); } - - /** - * Copy assignment operator. - */ - Variant &operator=(const Variant &v) - { - copy(v); - return *this; - } - - /** - * Move assignment operator. - */ - Variant &operator=(Variant &&v) - { - move(std::move(v)); - return *this; - } - - /** - * Assign nullptr_t operator (allows to write Variant v = nullptr). - * - * @param p is an instance of std::nullptr_t. - */ - Variant &operator=(std::nullptr_t) - { - setNull(); - return *this; - } - - /** - * Assign a boolean value. - * - * @param b is the boolean value to which the variant should be set. - */ - Variant &operator=(boolType b) - { - setBool(b); - return *this; - } - - /** - * Assign an integer value. - * - * @param i is the integer value to which the variant should be set. - */ - Variant &operator=(intType i) - { - setInt(i); - return *this; - } - - /** - * Assign a double value. - * - * @param d is the double value to which the variant should be set. - */ - Variant &operator=(doubleType d) - { - setDouble(d); - return *this; - } - - /** - * Assign a zero terminated const char array. - * - * @param s is the zero terminated const char array to which the variant - * should be set. - */ - Variant &operator=(const char *s) - { - setString(s); - return *this; - } - - /** - * Checks whether this Variant instance represents the nullptr. - * - * @return true if the Variant instance represents the nullptr, false - * otherwise. - */ - bool isNull() const { return type == Type::NULLPTR; } - - /** - * Checks whether this Variant instance is a boolean. - * - * @return true if the Variant instance is a boolean, false otherwise. - */ - bool isBool() const { return type == Type::BOOL; } - - /** - * Checks whether this Variant instance is an integer. - * - * @return true if the Variant instance is an integer, false otherwise. - */ - bool isInt() const { return type == Type::INT; } - - /** - * Checks whether this Variant instance is a double. - * - * @return true if the Variant instance is a double, false otherwise. - */ - bool isDouble() const { return type == Type::DOUBLE; } - - /** - * Checks whether this Variant instance is a string. - * - * @return true if the Variant instance is a string, false otherwise. - */ - bool isString() const { return type == Type::STRING; } - - /** - * Checks whether this Variant instance is an array. - * - * @return true if the Variant instance is an array, false otherwise. - */ - bool isArray() const { return type == Type::ARRAY; } - - /** - * Checks whether this Variant instance is a map. - * - * @return true if the Variant instance is a map, false otherwise. - */ - bool isMap() const { return type == Type::MAP; } - - /** - * Returns the Variant boolean value. Performs no type conversion. Throws an - * exception if the underlying type is not a boolean. - * - * @return the boolean value. - */ - boolType asBool() const - { - if (isBool()) { - return boolVal; - } - throw TypeException{getType(), Type::BOOL}; - } - - /** - * Returns the Variant integer value. Performs no type conversion. Throws an - * exception if the underlying type is not an integer. - * - * @return the integer value. - */ - intType asInt() const - { - if (isInt()) { - return intVal; - } - throw TypeException{getType(), Type::INT}; - } - - /** - * Returns the Variant double value. Performs no type conversion. Throws an - * exception if the underlying type is not a double. - * - * @return the double value. - */ - doubleType asDouble() const - { - if (isDouble()) { - return doubleVal; - } - throw TypeException{getType(), Type::DOUBLE}; - } - - /** - * Returns a const reference to the string value. Performs no type - * conversion. Throws an exception if the underlying type is not a string. - * - * @return the string value as const reference. - */ - const stringType &asString() const - { - return asObj(Type::STRING); - } - - /** - * Returns a const reference to the string value. Performs no type - * conversion. Throws an exception if the underlying type is not a string. - * - * @return the string value as reference. - */ - stringType &asString() { return asObj(Type::STRING); } - - /** - * Returns a const reference to the array value. Performs no type - * conversion. Throws an exception if the underlying type is not an array. - * - * @return the array value as const reference. - */ - const arrayType &asArray() const { return asObj(Type::ARRAY); } - - /** - * Returns a const reference to the array value. Performs no type - * conversion. Throws an exception if the underlying type is not an array. - * - * @return the array value as reference. - */ - arrayType &asArray() { return asObj(Type::ARRAY); } - - /** - * Returns a const reference to the map value. Performs no type - * conversion. Throws an exception if the underlying type is not a map. - * - * @return the map value as const reference. - */ - const mapType &asMap() const { return asObj(Type::MAP); } - - /** - * Returns a reference to the map value. Performs no type conversion. - * Throws an exception if the underlying type is not a map. - * - * @return the map value as reference. - */ - mapType &asMap() { return asObj(Type::MAP); } - - /** - * Returns the value of the Variant as boolean, performs type conversion. - * - * @return the Variant value converted to a boolean value. - */ - boolType toBool() const; - - /** - * Returns the value of the Variant as integer, performs type conversion. - * - * @return the Variant value converted to an integer value. - */ - intType toInt() const; - - /** - * Returns the value of the Variant as double, performs type conversion. - * - * @return the Variant value converted to a double value. - */ - doubleType toDouble() const; - - /** - * Returns the value of the Variant as string, performs type conversion. - * - * @return the value of the variant as string. - * @param escape if set to true, adds double quotes to strings and escapes - * them properly (resulting in a more or less JSONesque output). - */ - stringType toString(bool escape = false) const; - - /** - * Sets the variant to null. - */ - void setNull() - { - destroy(); - type = Type::NULLPTR; - ptrVal = nullptr; - } - - /** - * Sets the variant to the given boolean value. - * - * @param b is the new boolean value. - */ - void setBool(boolType b) - { - destroy(); - type = Type::BOOL; - boolVal = b; - } - - /** - * Sets the variant to the given integer value. - * - * @param i is the new integer value. - */ - void setInt(intType i) - { - destroy(); - type = Type::INT; - intVal = i; - } - - /** - * Sets the variant to the given double value. - * - * @param d is the new double value. - */ - void setDouble(doubleType d) - { - destroy(); - type = Type::DOUBLE; - doubleVal = d; - } - - /** - * Sets the variant to the given string value. - * - * @param d is the new string value. - */ - void setString(const char *s) - { - if (isString()) { - asString().assign(s); - } else { - destroy(); - type = Type::STRING; - ptrVal = new stringType(s); - } - } - - /** - * Sets the variant to the given array value. - * - * @param a is the new array value. - */ - void setArray(arrayType a) - { - if (isArray()) { - asArray().swap(a); - } else { - destroy(); - type = Type::ARRAY; - ptrVal = new arrayType(std::move(a)); - } - } - - /** - * Sets the variant to the given map value. - * - * @param a is the new map value. - */ - void setMap(mapType m) - { - if (isMap()) { - asMap().swap(m); - } else { - destroy(); - type = Type::MAP; - ptrVal = new mapType(std::move(m)); - } - } - - /** - * Returns the current type of the Variant. - * - * @return the current type of the Variant. - */ - Type getType() const { return type; } - - /** - * Returns the name of the given variant type as C-style string. - */ - static const char *getTypeName(Type type); - - /** - * Returns the name of the type of this variant instance. - */ - const char *getTypeName() { return Variant::getTypeName(getType()); } - - /** - * Prints the Variant to the output stream. - */ - friend std::ostream &operator<<(std::ostream &os, const Variant &v) - { - return os << v.toString(true); - } - - /** - * Prints a key value pair to the output stream. - */ - friend std::ostream &operator<<(std::ostream &os, - const mapType::value_type &v) - { - // TODO: Use proper serialization function - return os << "\"" << v.first << "\": " << v.second.toString(true); - } - - /* - * Comprison operators. - */ - - friend bool operator<(const Variant &lhs, const Variant &rhs) - { - // If the types do not match, we can not do a meaningful comparison. - if (lhs.getType() != rhs.getType()) { - throw TypeException(lhs.getType(), rhs.getType()); - } - switch (lhs.getType()) { - case Type::NULLPTR: - return false; - case Type::BOOL: - return lhs.boolVal < rhs.boolVal; - case Type::INT: - return lhs.intVal < rhs.intVal; - case Type::DOUBLE: - return lhs.doubleVal < rhs.doubleVal; - case Type::STRING: - return lhs.asString() < rhs.asString(); - case Type::ARRAY: - return lhs.asArray() < rhs.asArray(); - case Type::MAP: - return lhs.asMap() < rhs.asMap(); - } - throw OusiaException("Internal Error! Unknown type!"); - } - friend bool operator>(const Variant &lhs, const Variant &rhs) - { - return rhs < lhs; - } - friend bool operator<=(const Variant &lhs, const Variant &rhs) - { - return !(lhs > rhs); - } - friend bool operator>=(const Variant &lhs, const Variant &rhs) - { - return !(lhs < rhs); - } - - friend bool operator==(const Variant &lhs, const Variant &rhs) - { - if (lhs.getType() != rhs.getType()) { - return false; - } - switch (lhs.getType()) { - case Type::NULLPTR: - return true; - case Type::BOOL: - return lhs.boolVal == rhs.boolVal; - case Type::INT: - return lhs.intVal == rhs.intVal; - case Type::DOUBLE: - return lhs.doubleVal == rhs.doubleVal; - case Type::STRING: - return lhs.asString() == rhs.asString(); - case Type::ARRAY: - return lhs.asArray() == rhs.asArray(); - case Type::MAP: - return lhs.asMap() == rhs.asMap(); - } - throw OusiaException("Internal Error! Unknown type!"); - } - - friend bool operator!=(const Variant &lhs, const Variant &rhs) - { - return !(lhs == rhs); - } -}; -} - -// Alias for the (very often used and unambigous) variant class -using Variant = variant::Variant; -} - -#endif /* _OUSIA_VARIANT_HPP_ */ - diff --git a/src/plugins/css/CSSParser.cpp b/src/plugins/css/CSSParser.cpp index 4cbe93f..5985047 100644 --- a/src/plugins/css/CSSParser.cpp +++ b/src/plugins/css/CSSParser.cpp @@ -18,7 +18,7 @@ #include "CSSParser.hpp" -#include +#include namespace ousia { namespace parser { @@ -77,7 +77,7 @@ static const std::map CSS_DESCRIPTORS = { Rooted CSSParser::parse(std::istream &is, ParserContext &ctx) { - BufferedCharReader input{is}; + CharReader input{is}; CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS}; tokenizer.ignoreComments = true; tokenizer.ignoreLinebreaks = true; @@ -228,14 +228,14 @@ Rooted CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer, Variant::arrayType args; // we require at least one argument, if parantheses are used // XXX - /*args.push_back(variant::Reader::parseGeneric(tokenizer.getInput(), + args.push_back(VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, - {',', ')'}).second);*/ + {',', ')'}).second); while (expect(COMMA, tokenizer, t, false, ctx)) { // as long as we find commas we expect new arguments. - /*args.push_back( - variant::Reader::parseGeneric( - tokenizer.getInput(), ctx.logger, {',', ')'}).second);*/ + args.push_back( + VariantReader::parseGeneric( + tokenizer.getInput(), ctx.logger, {',', ')'}).second); } expect(PAREN_CLOSE, tokenizer, t, true, ctx); // and we return with the finished Selector. @@ -334,8 +334,8 @@ bool CSSParser::parseRule(CodeTokenizer &tokenizer, ParserContext &ctx, expect(COLON, tokenizer, t, true, ctx); // then the value // TODO: Resolve key for appropriate parsing function here. - /*value = variant::Reader::parseGeneric(tokenizer.getInput(), ctx.logger, - {';'}).second;*/ + value = VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, + {';'}).second; // and a ; expect(SEMICOLON, tokenizer, t, true, ctx); return true; diff --git a/src/plugins/css/CSSParser.hpp b/src/plugins/css/CSSParser.hpp index 82f0cd1..eeb5b2c 100644 --- a/src/plugins/css/CSSParser.hpp +++ b/src/plugins/css/CSSParser.hpp @@ -22,9 +22,9 @@ #include #include -#include #include #include +#include #include namespace ousia { diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp index ce2857e..9a7b4d8 100644 --- a/src/plugins/xml/XmlParser.cpp +++ b/src/plugins/xml/XmlParser.cpp @@ -20,7 +20,7 @@ #include -#include +#include #include #include "XmlParser.hpp" diff --git a/test/core/BufferedCharReaderTest.cpp b/test/core/BufferedCharReaderTest.cpp deleted file mode 100644 index b3498f7..0000000 --- a/test/core/BufferedCharReaderTest.cpp +++ /dev/null @@ -1,185 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include - -#include "gtest/gtest.h" - -#include - -namespace ousia{ - -TEST(BufferedCharReaderTest, SimpleReadTest) -{ - std::string testStr{"this is a test"}; - char c; - - // Feed a test string into the reader - BufferedCharReader reader{testStr}; - - // Try to read the test string - std::string res; - while (!reader.atEnd()) { - ASSERT_TRUE(reader.read(&c)); - res.append(&c, 1); - } - - // The two strings must equal - ASSERT_STREQ(testStr.c_str(), res.c_str()) ; - - // We must now be at line 1, column 15 - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(testStr.size() + 1, reader.getColumn()); - - // If we call either read or peek, false is returned - ASSERT_FALSE(reader.read(&c)); - ASSERT_FALSE(reader.peek(&c)); -} - -TEST(BufferedCharReaderTest, SimplePeekTest) -{ - std::string testStr{"this is a test"}; - char c; - - // Feed a test string into the reader - BufferedCharReader reader{testStr}; - - // Try to read the test string - std::string res; - while (reader.peek(&c)) { - res.append(&c, 1); - } - - // Peeking does not trigger the "atEnd" flag - ASSERT_FALSE(reader.atEnd()); - - // The two strings must equal - ASSERT_STREQ(testStr.c_str(), res.c_str()); - - // We must now be at line 1, column 1 and NOT at the end of the stream - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(1, reader.getColumn()); - ASSERT_FALSE(reader.atEnd()); - - // If we consume the peek, we must be at line 1, column 15 and we should be - // at the end of the stream - reader.consumePeek(); - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(testStr.size() + 1, reader.getColumn()); - ASSERT_TRUE(reader.atEnd()); - - // If we call either read or peek, false is returned - ASSERT_FALSE(reader.read(&c)); - ASSERT_FALSE(reader.peek(&c)); -} - -TEST(BufferedCharReaderTest, SplittedPeakTest) -{ - std::string testStr{"this is a test"}; - char c; - - // Feed a test string into the reader - BufferedCharReader reader; - - // Try to peek the test string, feed char after char into the reader - std::string res; - for (unsigned int i = 0; i < testStr.length(); i++) { - reader.feed(std::string(&testStr[i], 1)); - while (reader.peek(&c)) { - res.append(&c, 1); - } - } - reader.close(); - - // Consume the peeked data - ASSERT_FALSE(reader.atEnd()); - reader.consumePeek(); - ASSERT_TRUE(reader.atEnd()); - - // The two strings must equal - ASSERT_STREQ(testStr.c_str(), res.c_str()) ; - - // We must now be at line 1, column 15 - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(testStr.size() + 1, reader.getColumn()); - - // If we call either read or peek, false is returned - ASSERT_FALSE(reader.read(&c)); - ASSERT_FALSE(reader.peek(&c)); -} - -TEST(BufferedCharReaderTest, RowColumnCounterTest) -{ - // Feed a test string into the reader - BufferedCharReader reader{"1\n\r2\n3\r\n\n4"}; - - // We should currently be in line 1, column 1 - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(1, reader.getColumn()); - - // Read two characters - char c; - for (int i = 0; i < 2; i++) reader.read(&c); - ASSERT_EQ(2, reader.getLine()); - ASSERT_EQ(1, reader.getColumn()); - - // Read two characters - for (int i = 0; i < 2; i++) reader.read(&c); - ASSERT_EQ(3, reader.getLine()); - ASSERT_EQ(1, reader.getColumn()); - - // Read three characters - for (int i = 0; i < 3; i++) reader.read(&c); - ASSERT_EQ(5, reader.getLine()); - ASSERT_EQ(1, reader.getColumn()); -} - -TEST(BufferedCharReaderTest, LinebreakSubstitutionTest) -{ - // Feed a test string into the reader - BufferedCharReader reader{"this\n\ris\n\rjust\na test\r\n\rtest\n\r"}; - - // Read all characters from the test string - std::string res; - char c; - while (reader.read(&c)) { - res.append(&c, 1); - } - - // Test for equality - ASSERT_STREQ("this\nis\njust\na test\n\ntest\n", res.c_str()); -} - -TEST(BufferedCharReaderTest, RowColumnCounterUTF8Test) -{ - // Feed a test string with some umlauts into the reader - BufferedCharReader reader{"\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f"}; - - // Read all bytes - char c; - while (reader.read(&c)); - - // The sequence above equals 5 UTF-8 characters (so after reading all the - // cursor is at position 6) - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(6, reader.getColumn()); -} - -} - diff --git a/test/core/CodeTokenizerTest.cpp b/test/core/CodeTokenizerTest.cpp index 1432564..4d11622 100644 --- a/test/core/CodeTokenizerTest.cpp +++ b/test/core/CodeTokenizerTest.cpp @@ -32,15 +32,15 @@ static const int CURLY_CLOSE = 41; TEST(CodeTokenizer, testTokenizer) { - BufferedCharReader reader; - reader.feed("/**\n"); // 1 - reader.feed(" * Some Block Comment\n"); // 2 - reader.feed(" */\n"); // 3 - reader.feed("var my_string = 'My \\'String\\'';\n"); // 4 - reader.feed("// and a line comment\n"); // 5 - reader.feed("var my_obj = { a = 4;}"); // 6 - // 123456789012345678901234567890123456789 - // 0 1 2 3 + CharReader reader{ + "/**\n" // 1 + " * Some Block Comment\n" // 2 + " */\n" // 3 + "var my_string = 'My \\'String\\'';\n" // 4 + "// and a line comment\n" // 5 + "var my_obj = { a = 4;}"}; // 6 + // 123456789012345678901234567890123456789 + // 0 1 2 3 TokenTreeNode root{{{"/*", 1}, {"*/", 2}, {"//", 3}, @@ -68,10 +68,10 @@ TEST(CodeTokenizer, testTokenizer) {STRING, "My 'String'", 17, 4, 32, 4}, {TOKEN_TEXT, ";", 32, 4, 33, 4}, {LINEBREAK, "\n", 33, 4, 1, 5}, - //this is slightly counter-intuitive but makes sense if you think about - //it: As a line comment is ended by a line break the line break is - //technically still a part of the line comment and thus the ending - //is in the next line. + // this is slightly counter-intuitive but makes sense if you think about + // it: As a line comment is ended by a line break the line break is + // technically still a part of the line comment and thus the ending + // is in the next line. {LINE_COMMENT, " and a line comment", 1, 5, 1, 6}, {TOKEN_TEXT, "var", 1, 6, 4, 6}, {TOKEN_TEXT, "my_obj", 5, 6, 11, 6}, diff --git a/test/core/LoggerTest.cpp b/test/core/LoggerTest.cpp deleted file mode 100644 index abb76de..0000000 --- a/test/core/LoggerTest.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include - -#include - -namespace ousia { - -struct Pos { - int line, column; - Pos(int line, int column) : line(line), column(column){}; - int getLine() const { return line; } - int getColumn() const { return column; } -}; - -TEST(TerminalLogger, log) -{ - // Test for manual visual expection only -- no assertions - TerminalLogger logger{std::cerr, true}; - logger.pushFilename("test.odp"); - - logger.debug("This is a test debug message", 10, 20); - logger.debug("This is a test debug message with no column", 10); - logger.debug("This is a test debug message with no line"); - logger.debug("This is a test debug message with no file", ""); - logger.debug("This is a test debug message with no file but a line", "", - 10); - logger.debug( - "This is a test debug message with no file but a line and a column", "", - 10, 20); - logger.note("This is a test note", 10, 20); - logger.warning("This is a test warning", 10, 20); - logger.error("This is a test error", 10, 20); - logger.fatalError("This is a test fatal error!", 10, 20); - - try { - throw LoggableException{"An exception"}; - } - catch (const LoggableException &ex) { - logger.log(ex); - } - - try { - throw LoggableException{"An exception at position", Pos(10, 20)}; - } - catch (const LoggableException &ex) { - logger.log(ex); - } - - logger.logAt(Severity::ERROR, "This is a positioned log message", - Pos(10, 20)); - logger.debugAt("This is a positioned debug message", Pos(10, 20)); - logger.noteAt("This is a positioned log error", Pos(10, 20)); -} -} - diff --git a/test/core/RegistryTest.cpp b/test/core/RegistryTest.cpp index e06011a..22365f2 100644 --- a/test/core/RegistryTest.cpp +++ b/test/core/RegistryTest.cpp @@ -22,7 +22,7 @@ #include -#include +#include namespace ousia { diff --git a/test/core/TokenizerTest.cpp b/test/core/TokenizerTest.cpp index da6b578..2b80662 100644 --- a/test/core/TokenizerTest.cpp +++ b/test/core/TokenizerTest.cpp @@ -18,7 +18,7 @@ #include -#include +#include #include @@ -65,10 +65,9 @@ TEST(Tokenizer, testTokenization) { TokenTreeNode root{{{"/", 1}, {"/*", 2}, {"*/", 3}}}; - BufferedCharReader reader; - reader.feed("Test/Test /* Block Comment */"); - // 12345678901234567890123456789 - // 0 1 2 + CharReader reader{"Test/Test /* Block Comment */"}; + // 12345678901234567890123456789 + // 0 1 2 std::vector expected = { {TOKEN_TEXT, "Test", 1, 1, 5, 1}, @@ -97,10 +96,7 @@ TEST(Tokenizer, testIncompleteTokens) { TokenTreeNode root{{{"ab", 1}, {"c", 2}}}; - BufferedCharReader reader; - reader.feed("ac"); - // 1234567890 - // 0 1 + CharReader reader{"ac"}; std::vector expected = { {TOKEN_TEXT, "a", 1, 1, 2, 1}, diff --git a/test/core/UtilsTest.cpp b/test/core/UtilsTest.cpp deleted file mode 100644 index 0a7d2a3..0000000 --- a/test/core/UtilsTest.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include - -namespace ousia { - -TEST(Utils, isIdentifier) -{ - ASSERT_TRUE(Utils::isIdentifier("test")); - ASSERT_TRUE(Utils::isIdentifier("t0-_est")); - ASSERT_TRUE(Utils::isIdentifier("_t0-_EST")); - ASSERT_FALSE(Utils::isIdentifier("-t0-_EST")); - ASSERT_FALSE(Utils::isIdentifier("0t-_EST")); -} - -TEST(Utils, trim) -{ - ASSERT_EQ("hello world", Utils::trim("\t hello world \n\r\t")); - ASSERT_EQ("hello world", Utils::trim("hello world \n\r\t")); - ASSERT_EQ("hello world", Utils::trim(" hello world")); - ASSERT_EQ("hello world", Utils::trim("hello world")); -} - -} - diff --git a/test/core/common/CharReaderTest.cpp b/test/core/common/CharReaderTest.cpp new file mode 100644 index 0000000..06b9d45 --- /dev/null +++ b/test/core/common/CharReaderTest.cpp @@ -0,0 +1,821 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include + +#include "gtest/gtest.h" + +#include + +namespace ousia { + +/* Test data */ + +// Generates some pseudo-random data +// (inspired by "Numerical Recipes, Third Edition", Chapter 7.17) +static std::vector generateData(size_t len) +{ + const uint32_t B1 = 17; + const uint32_t B2 = 15; + const uint32_t B3 = 5; + uint32_t v = 0xF3A99148; + std::vector res; + for (size_t i = 0; i < len; i++) { + while (true) { + // Advance the random seed + v = v ^ (v >> B1); + v = v ^ (v << B2); + v = v ^ (v >> B3); + + // Replace \n and \r in order to avoid line break processing by the + // CharReader + char c = v & 0xFF; + if (c != '\n' && c != '\r') { + res.push_back(c); + break; + } + } + } + return res; +} + +// For performance tests only +// static constexpr size_t DATA_LENGTH = 16 * 1024 * 1024 + 795; +static constexpr size_t DATA_LENGTH = 256 * 1024 + 795; +static const std::vector DATA = generateData(DATA_LENGTH); + +/* Buffer Test */ + +TEST(Buffer, simpleRead) +{ + std::string testStr{"this is a test"}; + + // Create buffer with the test string + char c; + Buffer buf{testStr}; + + // Create a read cursor + Buffer::CursorId cursor = buf.createCursor(); + + // We're not at the end of the stream + ASSERT_FALSE(buf.atEnd(cursor)); + + // The cursor must be at zero + ASSERT_EQ(0U, buf.offset(cursor)); + + // Try to read the test string + std::string res; + while (buf.read(cursor, c)) { + res.append(&c, 1); + } + + // The cursor must be at the end + ASSERT_TRUE(buf.atEnd(cursor)); + + // The cursor must be one byond the last byte + ASSERT_EQ(testStr.size(), buf.offset(cursor)); + + // The two strings must equal + ASSERT_EQ(testStr, res); + + buf.deleteCursor(cursor); +} + +TEST(Buffer, cursorManagement) +{ + Buffer buf{""}; + + Buffer::CursorId c1 = buf.createCursor(); + Buffer::CursorId c2 = buf.createCursor(); + Buffer::CursorId c3 = buf.createCursor(); + + ASSERT_EQ(0U, c1); + ASSERT_EQ(1U, c2); + ASSERT_EQ(2U, c3); + + buf.deleteCursor(c2); + Buffer::CursorId c4 = buf.createCursor(); + ASSERT_EQ(1U, c4); + + buf.deleteCursor(c1); + buf.deleteCursor(c3); + buf.deleteCursor(c4); +} + +TEST(Buffer, twoCursors) +{ + std::string testStr{"this is a test"}; + + // Create buffer with the test string + char c; + Buffer buf{testStr}; + + // Create two read cursors + Buffer::CursorId cur1 = buf.createCursor(); + Buffer::CursorId cur2 = buf.createCursor(); + + ASSERT_FALSE(buf.atEnd(cur1)); + ASSERT_FALSE(buf.atEnd(cur2)); + + // Try to read the test string with the first cursor + std::string res1; + while (buf.read(cur1, c)) { + res1.append(&c, 1); + } + + // The first cursor must be at the end + ASSERT_TRUE(buf.atEnd(cur1)); + ASSERT_FALSE(buf.atEnd(cur2)); + + // Try to read the test string with the second cursor + std::string res2; + while (buf.read(cur2, c)) { + res2.append(&c, 1); + } + + // The first cursor must be at the end + ASSERT_TRUE(buf.atEnd(cur1)); + ASSERT_TRUE(buf.atEnd(cur2)); + + // The two strings must equal + ASSERT_EQ(testStr, res1); + ASSERT_EQ(testStr, res2); + + buf.deleteCursor(cur1); + buf.deleteCursor(cur2); +} + +TEST(Buffer, copyCursors) +{ + std::string testStr{"test1 test2 test3"}; + + // Create buffer with the test string + char c; + Buffer buf{testStr}; + + // Create two read cursors + Buffer::CursorId cur1 = buf.createCursor(); + Buffer::CursorId cur2 = buf.createCursor(); + + ASSERT_FALSE(buf.atEnd(cur1)); + ASSERT_FALSE(buf.atEnd(cur2)); + + // Read the first six characters with cursor one + std::string res1; + for (int i = 0; i < 6; i++) { + if (buf.read(cur1, c)) { + res1.append(&c, 1); + } + } + ASSERT_EQ("test1 ", res1); + ASSERT_FALSE(buf.atEnd(cur1)); + + // Copy cur1 to cur2, free cur1 + buf.copyCursor(cur1, cur2); + buf.deleteCursor(cur1); + + std::string res2; + for (int i = 0; i < 6; i++) { + if (buf.read(cur2, c)) { + res2.append(&c, 1); + } + } + ASSERT_EQ("test2 ", res2); + ASSERT_FALSE(buf.atEnd(cur2)); + + // Create a new cursor as copy of cur2 + Buffer::CursorId cur3 = buf.createCursor(cur2); + std::string res3; + for (int i = 0; i < 6; i++) { + if (buf.read(cur3, c)) { + res3.append(&c, 1); + } + } + ASSERT_EQ("test3", res3); + + ASSERT_TRUE(buf.atEnd(cur3)); + + buf.deleteCursor(cur1); + buf.deleteCursor(cur2); + buf.deleteCursor(cur3); +} + +TEST(Buffer, moveCursor) +{ + std::string testStr{"test1 test2 test3"}; + + // Create buffer with the test string + char c; + Buffer buf{testStr}; + Buffer::CursorId cursor = buf.createCursor(); + + // Read the first six characters with cursor one + { + std::string res; + for (int i = 0; i < 6; i++) { + if (buf.read(cursor, c)) { + res.append(&c, 1); + } + } + ASSERT_EQ("test1 ", res); + } + + // Move six bytes backward + ASSERT_EQ(-6, buf.moveCursor(cursor, -6)); + { + std::string res; + for (int i = 0; i < 6; i++) { + if (buf.read(cursor, c)) { + res.append(&c, 1); + } + } + ASSERT_EQ("test1 ", res); + } + + // Move more than six bytes backward + ASSERT_EQ(-6, buf.moveCursor(cursor, -1000)); + { + std::string res; + for (int i = 0; i < 6; i++) { + if (buf.read(cursor, c)) { + res.append(&c, 1); + } + } + ASSERT_EQ("test1 ", res); + } + + // Move six bytes forward + ASSERT_EQ(6, buf.moveCursor(cursor, 6)); + { + std::string res; + for (int i = 0; i < 6; i++) { + if (buf.read(cursor, c)) { + res.append(&c, 1); + } + } + ASSERT_EQ("test3", res); + } + + buf.deleteCursor(cursor); +} + +struct VectorReadState { + size_t offs; + const std::vector &data; + + VectorReadState(const std::vector &data) : offs(0), data(data) {} +}; + +static size_t readFromVector(char *buf, size_t size, void *userData) +{ + VectorReadState &state = *(static_cast(userData)); + size_t tar = std::min(state.offs + size, state.data.size()); + for (size_t i = state.offs; i < tar; i++) { + *buf = state.data[i]; + buf++; + } + size_t res = tar - state.offs; + state.offs = tar; + return res; +} + +TEST(Buffer, simpleStream) +{ + VectorReadState state(DATA); + + Buffer buf{readFromVector, &state}; + Buffer::CursorId cursor = buf.createCursor(); + + char c; + std::vector res; + while (buf.read(cursor, c)) { + res.push_back(c); + } + + // We must be at the end of the buffer and the cursor offset must be set + // correctly + ASSERT_TRUE(buf.atEnd(cursor)); + ASSERT_EQ(DATA_LENGTH, buf.offset(cursor)); + + // The read data and the original data must be equal + ASSERT_EQ(DATA, res); + + buf.deleteCursor(cursor); +} + +TEST(Buffer, streamTwoCursors) +{ + VectorReadState state(DATA); + + Buffer buf{readFromVector, &state}; + Buffer::CursorId cur1 = buf.createCursor(); + Buffer::CursorId cur2 = buf.createCursor(); + + char c; + + std::vector res1; + while (buf.read(cur1, c)) { + res1.push_back(c); + } + + ASSERT_TRUE(buf.atEnd(cur1)); + ASSERT_FALSE(buf.atEnd(cur2)); + ASSERT_EQ(DATA_LENGTH, buf.offset(cur1)); + ASSERT_EQ(0U, buf.offset(cur2)); + + std::vector res2; + while (buf.read(cur2, c)) { + res2.push_back(c); + } + + ASSERT_TRUE(buf.atEnd(cur1)); + ASSERT_TRUE(buf.atEnd(cur2)); + ASSERT_EQ(DATA_LENGTH, buf.offset(cur1)); + ASSERT_EQ(DATA_LENGTH, buf.offset(cur2)); + + // The read data and the original data must be equal + ASSERT_EQ(DATA, res1); + ASSERT_EQ(DATA, res2); + + buf.deleteCursor(cur1); + buf.deleteCursor(cur2); +} + +TEST(Buffer, streamTwoCursorsMovingInterleaved) +{ + VectorReadState state(DATA); + + Buffer buf{readFromVector, &state}; + Buffer::CursorId cur1 = buf.createCursor(); + Buffer::CursorId cur2 = buf.createCursor(); + + char c; + + std::vector res1; + std::vector res2; + while (!buf.atEnd(cur1) || !buf.atEnd(cur2)) { + for (int i = 0; i < 100; i++) { + if (buf.read(cur1, c)) { + res1.push_back(c); + } + } + for (int i = 0; i < 120; i++) { + if (buf.read(cur2, c)) { + res2.push_back(c); + } + } + + // Move cur2 120 bytes backward and read the content again + res2.resize(res2.size() - 120); + ASSERT_EQ(-120, buf.moveCursor(cur2, -120)); + for (int i = 0; i < 120; i++) { + if (buf.read(cur2, c)) { + res2.push_back(c); + } + } + + // Move cur1 60 bytes forward and backward + buf.moveCursor(cur1, -buf.moveCursor(cur1, 60)); + + // Make sure the cursor position is correct + ASSERT_EQ(res1.size(), buf.offset(cur1)); + ASSERT_EQ(res2.size(), buf.offset(cur2)); + } + + ASSERT_EQ(DATA_LENGTH, buf.offset(cur1)); + ASSERT_EQ(DATA_LENGTH, buf.offset(cur2)); + + // The read data and the original data must be equal + ASSERT_EQ(DATA, res1); + ASSERT_EQ(DATA, res2); + + buf.deleteCursor(cur1); + buf.deleteCursor(cur2); +} + +TEST(Buffer, streamMoveForward) +{ + VectorReadState state(DATA); + + std::vector partialData; + partialData.resize(100); + std::copy(DATA.end() - partialData.size(), DATA.end(), partialData.begin()); + + Buffer buf{readFromVector, &state}; + Buffer::CursorId cursor = buf.createCursor(); + ASSERT_EQ(ssize_t(DATA_LENGTH) - 100, + buf.moveCursor(cursor, DATA_LENGTH - 100)); + + char c; + std::vector res; + while (buf.read(cursor, c)) { + res.push_back(c); + } + ASSERT_EQ(partialData, res); + + buf.deleteCursor(cursor); +} + +/* CharReader Test */ + +TEST(CharReader, simpleRead) +{ + std::string testStr{"this is a test"}; + char c; + + // Feed a test string into the reader + CharReader reader{testStr}; + + // Try to read the test string + std::string res; + while (!reader.atEnd()) { + ASSERT_TRUE(reader.read(c)); + res.append(&c, 1); + } + + // The two strings must equal + ASSERT_EQ(testStr, res); + + // We must now be at line 1, column 15 + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(testStr.size() + 1, reader.getColumn()); + + // If we call either read or peek, false is returned + ASSERT_FALSE(reader.read(c)); + ASSERT_FALSE(reader.peek(c)); +} + +TEST(CharReader, simplePeek) +{ + std::string testStr{"this is a test"}; + char c; + + // Feed a test string into the reader + CharReader reader{testStr}; + + // Try to read the test string + std::string res; + while (reader.peek(c)) { + res.append(&c, 1); + } + + // Peeking does not trigger the "atEnd" flag + ASSERT_FALSE(reader.atEnd()); + + // The two strings must equal + ASSERT_EQ(testStr, res); + + // We must now be at line 1, column 1 and NOT at the end of the stream + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + ASSERT_FALSE(reader.atEnd()); + + // If we consume the peek, we must be at line 1, column 15 and we should be + // at the end of the stream + reader.consumePeek(); + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(testStr.size() + 1, reader.getColumn()); + ASSERT_TRUE(reader.atEnd()); + + // If we call either read or peek, false is returned + ASSERT_FALSE(reader.read(c)); + ASSERT_FALSE(reader.peek(c)); +} + +TEST(CharReader, rowColumnCounter) +{ + // Feed a test string into the reader + CharReader reader{"1\n\r2\n3\r\n\n4"}; + + // We should currently be in line 1, column 1 + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + + // Read two characters + char c; + for (int i = 0; i < 2; i++) + reader.read(c); + ASSERT_EQ(2U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + + // Read two characters + for (int i = 0; i < 2; i++) + reader.read(c); + ASSERT_EQ(3U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + + // Read three characters + for (int i = 0; i < 3; i++) + reader.read(c); + ASSERT_EQ(5U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); +} + +TEST(CharReader, rowColumnCounterTest) +{ + // Feed a test string into the reader + CharReader reader{"1\n\r2\n3\r\n\n4", 4, 10}; + + // We should currently be in line 1, column 1 + ASSERT_EQ(4U, reader.getLine()); + ASSERT_EQ(10U, reader.getColumn()); + + // Read two characters + char c; + for (int i = 0; i < 2; i++) + reader.read(c); + ASSERT_EQ(5U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + + // Read two characters + for (int i = 0; i < 2; i++) + reader.read(c); + ASSERT_EQ(6U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + + // Read three characters + for (int i = 0; i < 3; i++) + reader.read(c); + ASSERT_EQ(8U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); +} + +TEST(CharReader, linebreakSubstitution) +{ + // Feed a test string into the reader and read all characters back + CharReader reader{"this\n\ris\n\rjust\na test\r\n\rtest\n\r"}; + std::string res; + char c; + while (reader.read(c)) { + res.append(&c, 1); + } + + // Test for equality + ASSERT_EQ("this\nis\njust\na test\n\ntest\n", res); +} + +TEST(CharReader, rowColumnCounterUTF8) +{ + // Feed a test string with some umlauts into the reader + CharReader reader{"\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f"}; + + // Read all bytes + char c; + while (reader.read(c)) { + // Do nothing + } + + // The sequence above equals 5 UTF-8 characters (so after reading all the + // cursor is at position 6) + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(6U, reader.getColumn()); +} + +TEST(CharReader, stream) +{ + // Copy the test data to a string stream + std::stringstream ss; + std::copy(DATA.begin(), DATA.end(), std::ostream_iterator(ss)); + + // Read the data back from the stream + std::vector res; + char c; + CharReader reader{ss}; + while (reader.read(c)) { + res.push_back(c); + } + ASSERT_EQ(DATA_LENGTH, res.size()); + ASSERT_EQ(DATA, res); +} + +TEST(CharReader, fork) +{ + std::string testStr{"first line\n\n\rsecond line\n\rlast line"}; + // 0123456789 0 123456789012 3456789012 + // 0 1 2 3 + + char c; + CharReader reader{testStr}; + + // Read a few characters + for (int i = 0; i < 4; i++) + reader.read(c); + + // Peek a few characters + for (int i = 4; i < 7; i++) + reader.peek(c); + + // Fork the reader + { + CharReaderFork fork = reader.fork(); + + ASSERT_EQ(1U, fork.getLine()); + ASSERT_EQ(5U, fork.getColumn()); + + fork.peek(c); + ASSERT_EQ('i', c); + + fork.read(c); + ASSERT_EQ('t', c); + + ASSERT_EQ(1U, fork.getLine()); + ASSERT_EQ(6U, fork.getColumn()); + + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(5U, reader.getColumn()); + + reader.read(c); + reader.read(c); + ASSERT_EQ(' ', c); + + fork.commit(); + } + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(6U, reader.getColumn()); +} + +TEST(CharReaderTest, context) +{ + std::string testStr{"first line\n\n\rsecond line\n\rlast line"}; + // 0123456789 0 123456789012 3456789012 + // 0 1 2 3 + + // Retrieval at beginning of stream + { + CharReader reader{testStr}; + CharReader::Context ctx = reader.getContext(80); + ASSERT_EQ("first line", ctx.line); + ASSERT_EQ(0U, ctx.relPos); + ASSERT_FALSE(ctx.truncatedStart); + ASSERT_FALSE(ctx.truncatedEnd); + } + + // Retrieval in middle of line + { + CharReader reader{testStr}; + CharReader::Context ctx = reader.getContext(80); + + char c; + for (int i = 0; i < 5; i++) + reader.read(c); + + ASSERT_EQ("first line", ctx.line); + ASSERT_EQ(0U, ctx.relPos); + ASSERT_FALSE(ctx.truncatedStart); + ASSERT_FALSE(ctx.truncatedEnd); + } + + // Retrieval in whitespace sequence + { + CharReader reader{testStr}; + + char c; + for (int i = 0; i < 11; i++) + reader.read(c); + + CharReader::Context ctx = reader.getContext(80); + ASSERT_EQ("first line", ctx.line); + ASSERT_EQ(10U, ctx.relPos); + ASSERT_FALSE(ctx.truncatedStart); + ASSERT_FALSE(ctx.truncatedEnd); + } + + // Truncation of text + { + CharReader reader{testStr}; + + char c; + for (int i = 0; i < 5; i++) + reader.read(c); + + CharReader::Context ctx = reader.getContext(3); + ASSERT_EQ("t l", ctx.line); + ASSERT_EQ(1U, ctx.relPos); + ASSERT_TRUE(ctx.truncatedStart); + ASSERT_TRUE(ctx.truncatedEnd); + } + + // Second line + { + CharReader reader{testStr}; + + char c; + for (int i = 0; i < 12; i++) + reader.read(c); + + CharReader::Context ctx = reader.getContext(80); + ASSERT_EQ("second line", ctx.line); + ASSERT_EQ(0U, ctx.relPos); + ASSERT_FALSE(ctx.truncatedStart); + ASSERT_FALSE(ctx.truncatedEnd); + } + + // End of second line + { + CharReader reader{testStr}; + + char c; + for (int i = 0; i < 23; i++) + reader.read(c); + + CharReader::Context ctx = reader.getContext(80); + ASSERT_EQ("second line", ctx.line); + ASSERT_EQ(11U, ctx.relPos); + ASSERT_FALSE(ctx.truncatedStart); + ASSERT_FALSE(ctx.truncatedEnd); + } + + // Last line + { + CharReader reader{testStr}; + + char c; + for (int i = 0; i < 24; i++) + reader.read(c); + + CharReader::Context ctx = reader.getContext(80); + ASSERT_EQ("last line", ctx.line); + ASSERT_EQ(0U, ctx.relPos); + ASSERT_FALSE(ctx.truncatedStart); + ASSERT_FALSE(ctx.truncatedEnd); + } + + // Middle of last line + { + CharReader reader{testStr}; + + char c; + for (int i = 0; i < 28; i++) + reader.read(c); + + CharReader::Context ctx = reader.getContext(80); + ASSERT_EQ("last line", ctx.line); + ASSERT_EQ(4U, ctx.relPos); + ASSERT_FALSE(ctx.truncatedStart); + ASSERT_FALSE(ctx.truncatedEnd); + } + + // Middle of last line truncated + { + CharReader reader{testStr}; + + char c; + for (int i = 0; i < 28; i++) + reader.read(c); + + CharReader::Context ctx = reader.getContext(3); + ASSERT_EQ("t l", ctx.line); + ASSERT_EQ(1U, ctx.relPos); + ASSERT_TRUE(ctx.truncatedStart); + ASSERT_TRUE(ctx.truncatedEnd); + } + + // End of stream + { + CharReader reader{testStr}; + + char c; + for (int i = 0; i < 100; i++) + reader.read(c); + + CharReader::Context ctx = reader.getContext(80); + ASSERT_EQ("last line", ctx.line); + ASSERT_EQ(9U, ctx.relPos); + ASSERT_FALSE(ctx.truncatedStart); + ASSERT_FALSE(ctx.truncatedEnd); + } + + // End of stream truncated + { + CharReader reader{testStr}; + + char c; + for (int i = 0; i < 100; i++) + reader.read(c); + + CharReader::Context ctx = reader.getContext(4); + ASSERT_EQ("line", ctx.line); + ASSERT_EQ(4U, ctx.relPos); + ASSERT_TRUE(ctx.truncatedStart); + ASSERT_FALSE(ctx.truncatedEnd); + } +} +} + diff --git a/test/core/common/LoggerTest.cpp b/test/core/common/LoggerTest.cpp new file mode 100644 index 0000000..54c67f9 --- /dev/null +++ b/test/core/common/LoggerTest.cpp @@ -0,0 +1,74 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +#include + +namespace ousia { + +struct Pos { + int line, column; + Pos(int line, int column) : line(line), column(column){}; + int getLine() const { return line; } + int getColumn() const { return column; } +}; + +TEST(TerminalLogger, log) +{ + // Test for manual visual expection only -- no assertions + TerminalLogger logger{std::cerr, true}; + logger.pushFilename("test.odp"); + + logger.debug("This is a test debug message", 10, 20); + logger.debug("This is a test debug message with no column", 10); + logger.debug("This is a test debug message with no line"); + logger.debug("This is a test debug message with no file", ""); + logger.debug("This is a test debug message with no file but a line", "", + 10); + logger.debug( + "This is a test debug message with no file but a line and a column", "", + 10, 20); + logger.note("This is a test note", 10, 20); + logger.warning("This is a test warning", 10, 20); + logger.error("This is a test error", 10, 20); + logger.fatalError("This is a test fatal error!", 10, 20); + + try { + throw LoggableException{"An exception"}; + } + catch (const LoggableException &ex) { + logger.log(ex); + } + + try { + throw LoggableException{"An exception at position", Pos(10, 20)}; + } + catch (const LoggableException &ex) { + logger.log(ex); + } + + logger.logAt(Severity::ERROR, "This is a positioned log message", + Pos(10, 20)); + logger.debugAt("This is a positioned debug message", Pos(10, 20)); + logger.noteAt("This is a positioned log error", Pos(10, 20)); +} +} + diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp new file mode 100644 index 0000000..2858038 --- /dev/null +++ b/test/core/common/UtilsTest.cpp @@ -0,0 +1,43 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +namespace ousia { + +TEST(Utils, isIdentifier) +{ + ASSERT_TRUE(Utils::isIdentifier("test")); + ASSERT_TRUE(Utils::isIdentifier("t0-_est")); + ASSERT_TRUE(Utils::isIdentifier("_t0-_EST")); + ASSERT_FALSE(Utils::isIdentifier("-t0-_EST")); + ASSERT_FALSE(Utils::isIdentifier("0t-_EST")); +} + +TEST(Utils, trim) +{ + ASSERT_EQ("hello world", Utils::trim("\t hello world \n\r\t")); + ASSERT_EQ("hello world", Utils::trim("hello world \n\r\t")); + ASSERT_EQ("hello world", Utils::trim(" hello world")); + ASSERT_EQ("hello world", Utils::trim("hello world")); +} + +} + diff --git a/test/core/common/VariantReaderTest.cpp b/test/core/common/VariantReaderTest.cpp new file mode 100644 index 0000000..d9bb74e --- /dev/null +++ b/test/core/common/VariantReaderTest.cpp @@ -0,0 +1,345 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include + +#include + +namespace ousia { +namespace variant { + +//static TerminalLogger logger{std::cerr, true}; +static Logger logger; + +TEST(Reader, readString) +{ + // Simple, double quoted string + { + CharReader reader("\"hello world\""); + auto res = VariantReader::parseString(reader, logger); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } + + // Simple, double quoted string with whitespace + { + CharReader reader(" \"hello world\" "); + auto res = VariantReader::parseString(reader, logger); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } + + // Simple, single quoted string + { + CharReader reader("'hello world'"); + auto res = VariantReader::parseString(reader, logger); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } + + // Escape characters + { + CharReader reader("'\\'\\\"\\b\\f\\n\\r\\t\\v'"); + auto res = VariantReader::parseString(reader, logger); + ASSERT_TRUE(res.first); + ASSERT_EQ("'\"\b\f\n\r\t\v", res.second); + } +} + +TEST(Reader, parseUnescapedString) +{ + // Simple case + { + CharReader reader("hello world;"); + auto res = VariantReader::parseUnescapedString(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } + + // Simple case with whitespace + { + CharReader reader(" hello world ; "); + auto res = VariantReader::parseUnescapedString(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } + + // Linebreaks + { + CharReader reader(" hello\nworld ; "); + auto res = VariantReader::parseUnescapedString(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello\nworld", res.second); + } + + // End of stream + { + CharReader reader(" hello world "); + auto res = VariantReader::parseUnescapedString(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_EQ("hello world", res.second); + } +} + +static const std::unordered_set noDelim; + +TEST(Reader, parseInteger) +{ + // Valid integers + { + CharReader reader("0 "); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(0, res.second); + } + + { + CharReader reader("42 "); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(42, res.second); + } + + { + CharReader reader("-42"); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(-42, res.second); + } + + { + CharReader reader(" -0x4A2 "); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(-0x4A2, res.second); + } + + { + CharReader reader(" 0Xaffe"); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(0xAFFE, res.second); + } + + { + CharReader reader("0x7FFFFFFFFFFFFFFF"); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(0x7FFFFFFFFFFFFFFFL, res.second); + } + + { + CharReader reader("-0x7FFFFFFFFFFFFFFF"); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(-0x7FFFFFFFFFFFFFFFL, res.second); + } + + // Invalid integers + { + CharReader reader("-"); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_FALSE(res.first); + } + + { + CharReader reader("0a"); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_FALSE(res.first); + } + + { + CharReader reader("-0xag"); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_FALSE(res.first); + } + + { + CharReader reader("0x8000000000000000"); + auto res = VariantReader::parseInteger(reader, logger, noDelim); + ASSERT_FALSE(res.first); + } +} + +TEST(Reader, parseDouble) +{ + // Valid doubles + { + CharReader reader("1.25"); + auto res = VariantReader::parseDouble(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(1.25, res.second); + } + + { + CharReader reader(".25"); + auto res = VariantReader::parseDouble(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(.25, res.second); + } + + { + CharReader reader(".25e1"); + auto res = VariantReader::parseDouble(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(2.5, res.second); + } + + { + CharReader reader("-2.5e-1"); + auto res = VariantReader::parseDouble(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(-0.25, res.second); + } + + { + CharReader reader("-50e-2"); + auto res = VariantReader::parseDouble(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(-0.5, res.second); + } + + { + CharReader reader("-1."); + auto res = VariantReader::parseDouble(reader, logger, noDelim); + ASSERT_TRUE(res.first); + ASSERT_EQ(-1., res.second); + } + + { + CharReader reader("-50.e-2"); + auto res = VariantReader::parseDouble(reader, logger, {'.'}); + ASSERT_TRUE(res.first); + ASSERT_EQ(-50, res.second); + } + + // Invalid doubles + { + CharReader reader(".e1"); + auto res = VariantReader::parseDouble(reader, logger, noDelim); + ASSERT_FALSE(res.first); + } + + { + CharReader reader("0e100000"); + auto res = VariantReader::parseDouble(reader, logger, noDelim); + ASSERT_FALSE(res.first); + } +} + +TEST(Reader, parseArray) +{ + // Simple case (only primitive data types) + { + CharReader reader("[\"Hello, World\", unescaped\n string ,\n" + "1234, 0.56, true, false, null]"); + auto res = VariantReader::parseArray(reader, logger); + ASSERT_TRUE(res.first); + + // Make sure array has the correct size + ASSERT_EQ(7U, res.second.size()); + + // Check the types + ASSERT_TRUE(res.second[0].isString()); + ASSERT_TRUE(res.second[1].isString()); + ASSERT_TRUE(res.second[2].isInt()); + ASSERT_TRUE(res.second[3].isDouble()); + ASSERT_TRUE(res.second[4].isBool()); + ASSERT_TRUE(res.second[5].isBool()); + ASSERT_TRUE(res.second[6].isNull()); + + // Check the values + ASSERT_EQ("Hello, World", res.second[0].asString()); + ASSERT_EQ("unescaped\n string", res.second[1].asString()); + ASSERT_EQ(1234, res.second[2].asInt()); + ASSERT_EQ(0.56, res.second[3].asDouble()); + ASSERT_TRUE(res.second[4].asBool()); + ASSERT_FALSE(res.second[5].asBool()); + } + + // Ending with comma + { + CharReader reader("[ 'test' ,]"); + auto res = VariantReader::parseArray(reader, logger); + ASSERT_TRUE(res.first); + + // Make sure the array has the correct size + ASSERT_EQ(1U, res.second.size()); + + // Check the types + ASSERT_TRUE(res.second[0].isString()); + + // Check the values + ASSERT_EQ("test", res.second[0].asString()); + } + + // Recovery from invalid values + { + CharReader reader("[ 0invalidNumber, str, 1invalid]"); + auto res = VariantReader::parseArray(reader, logger); + ASSERT_TRUE(res.first); + + // Make sure the array has the correct size + ASSERT_EQ(3U, res.second.size()); + + // Check the types (all must be strings since the numbers are invalid) + ASSERT_TRUE(res.second[0].isString()); + ASSERT_TRUE(res.second[1].isString()); + ASSERT_TRUE(res.second[2].isString()); + + // Check the values + ASSERT_EQ("0invalidNumber", res.second[0].asString()); + ASSERT_EQ("str", res.second[1].asString()); + ASSERT_EQ("1invalid", res.second[2].asString()); + } +} + +TEST(Reader, parseGeneric) +{ + // Simple case, unescaped string + { + CharReader reader("hello world"); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isString()); + ASSERT_EQ("hello world", res.second.asString()); + } + + // Simple case, double quoted string + { + CharReader reader(" \"hello world\" "); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isString()); + ASSERT_EQ("hello world", res.second.asString()); + } + + // Simple case, single quoted string + { + CharReader reader(" 'hello world' "); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); + ASSERT_TRUE(res.first); + ASSERT_TRUE(res.second.isString()); + ASSERT_EQ("hello world", res.second.asString()); + } +} + +} +} + diff --git a/test/core/common/VariantTest.cpp b/test/core/common/VariantTest.cpp new file mode 100644 index 0000000..580846e --- /dev/null +++ b/test/core/common/VariantTest.cpp @@ -0,0 +1,141 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +#include + +namespace ousia { + +TEST(Variant, nullValue) +{ + Variant v; + ASSERT_TRUE(v.isNull()); + + v = 1; + ASSERT_FALSE(v.isNull()); + + v = nullptr; + ASSERT_TRUE(v.isNull()); + + Variant v2{nullptr}; + ASSERT_TRUE(v.isNull()); +} + +TEST(Variant, booleanValue) +{ + Variant v{true}; + ASSERT_TRUE(v.isBool()); + ASSERT_TRUE(v.asBool()); + + v = false; + ASSERT_TRUE(v.isBool()); + ASSERT_FALSE(v.asBool()); + + v.setBool(true); + ASSERT_TRUE(v.isBool()); + ASSERT_TRUE(v.asBool()); + + v = nullptr; + ASSERT_FALSE(v.isBool()); +} + +TEST(Variant, intValue) +{ + Variant v{42}; + ASSERT_TRUE(v.isInt()); + ASSERT_EQ(42, v.asInt()); + + v = 43; + ASSERT_TRUE(v.isInt()); + ASSERT_EQ(43, v.asInt()); + + v = false; + ASSERT_FALSE(v.isInt()); +} + +TEST(Variant, doubleValue) +{ + Variant v{42.5}; + ASSERT_TRUE(v.isDouble()); + ASSERT_EQ(42.5, v.asDouble()); + + v = 42; + ASSERT_FALSE(v.isDouble()); + + v = 43.5; + ASSERT_TRUE(v.isDouble()); + ASSERT_EQ(43.5, v.asDouble()); +} + +TEST(Variant, stringValue) +{ + Variant v{"Hello World"}; + ASSERT_TRUE(v.isString()); + ASSERT_EQ("Hello World", v.asString()); + + v = "Goodbye World"; + ASSERT_TRUE(v.isString()); + ASSERT_EQ("Goodbye World", v.asString()); + + v = 42; + ASSERT_FALSE(v.isString()); +} + +TEST(Variant, arrayValue) +{ + const Variant v{{"test1", 42}}; + ASSERT_EQ(2, v.asArray().size()); + ASSERT_EQ("test1", v.asArray()[0].asString()); + ASSERT_EQ(42, v.asArray()[1].asInt()); +} + +TEST(Variant, mapValue) +{ + const Variant v{{{"key1", "entry1"}, {"key2", "entry2"}}}; + + auto map = v.asMap(); + ASSERT_EQ(2, map.size()); + + ASSERT_EQ("entry1", map.find("key1")->second.asString()); + ASSERT_EQ("entry2", map.find("key2")->second.asString()); + + const Variant v2{{{"key1", Variant::arrayType{1, 2}}, {"key2", "entry2"}}}; + ASSERT_EQ(2, v2.asMap().find("key1")->second.asArray()[1].asInt()); +} + +TEST(Variant, relationalOperators){ + Variant a{4}; + Variant b{4}; + + ASSERT_EQ(a,b); + + b.setInt(5); + ASSERT_TRUE(a < b); + + b.setDouble(4); + ASSERT_FALSE(a == b); + + a.setDouble(4); + ASSERT_EQ(a,b); +} + +} + diff --git a/test/core/utils/CharReaderTest.cpp b/test/core/utils/CharReaderTest.cpp deleted file mode 100644 index eb04a8e..0000000 --- a/test/core/utils/CharReaderTest.cpp +++ /dev/null @@ -1,823 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include -#include - -#include "gtest/gtest.h" - -#include - -namespace ousia { -namespace utils { - -/* Test data */ - -// Generates some pseudo-random data -// (inspired by "Numerical Recipes, Third Edition", Chapter 7.17) -static std::vector generateData(size_t len) -{ - const uint32_t B1 = 17; - const uint32_t B2 = 15; - const uint32_t B3 = 5; - uint32_t v = 0xF3A99148; - std::vector res; - for (size_t i = 0; i < len; i++) { - while (true) { - // Advance the random seed - v = v ^ (v >> B1); - v = v ^ (v << B2); - v = v ^ (v >> B3); - - // Replace \n and \r in order to avoid line break processing by the - // CharReader - char c = v & 0xFF; - if (c != '\n' && c != '\r') { - res.push_back(c); - break; - } - } - } - return res; -} - -// For performance tests only -// static constexpr size_t DATA_LENGTH = 16 * 1024 * 1024 + 795; -static constexpr size_t DATA_LENGTH = 256 * 1024 + 795; -static const std::vector DATA = generateData(DATA_LENGTH); - -/* Buffer Test */ - -TEST(Buffer, simpleRead) -{ - std::string testStr{"this is a test"}; - - // Create buffer with the test string - char c; - Buffer buf{testStr}; - - // Create a read cursor - Buffer::CursorId cursor = buf.createCursor(); - - // We're not at the end of the stream - ASSERT_FALSE(buf.atEnd(cursor)); - - // The cursor must be at zero - ASSERT_EQ(0U, buf.offset(cursor)); - - // Try to read the test string - std::string res; - while (buf.read(cursor, c)) { - res.append(&c, 1); - } - - // The cursor must be at the end - ASSERT_TRUE(buf.atEnd(cursor)); - - // The cursor must be one byond the last byte - ASSERT_EQ(testStr.size(), buf.offset(cursor)); - - // The two strings must equal - ASSERT_EQ(testStr, res); - - buf.deleteCursor(cursor); -} - -TEST(Buffer, cursorManagement) -{ - Buffer buf{""}; - - Buffer::CursorId c1 = buf.createCursor(); - Buffer::CursorId c2 = buf.createCursor(); - Buffer::CursorId c3 = buf.createCursor(); - - ASSERT_EQ(0U, c1); - ASSERT_EQ(1U, c2); - ASSERT_EQ(2U, c3); - - buf.deleteCursor(c2); - Buffer::CursorId c4 = buf.createCursor(); - ASSERT_EQ(1U, c4); - - buf.deleteCursor(c1); - buf.deleteCursor(c3); - buf.deleteCursor(c4); -} - -TEST(Buffer, twoCursors) -{ - std::string testStr{"this is a test"}; - - // Create buffer with the test string - char c; - Buffer buf{testStr}; - - // Create two read cursors - Buffer::CursorId cur1 = buf.createCursor(); - Buffer::CursorId cur2 = buf.createCursor(); - - ASSERT_FALSE(buf.atEnd(cur1)); - ASSERT_FALSE(buf.atEnd(cur2)); - - // Try to read the test string with the first cursor - std::string res1; - while (buf.read(cur1, c)) { - res1.append(&c, 1); - } - - // The first cursor must be at the end - ASSERT_TRUE(buf.atEnd(cur1)); - ASSERT_FALSE(buf.atEnd(cur2)); - - // Try to read the test string with the second cursor - std::string res2; - while (buf.read(cur2, c)) { - res2.append(&c, 1); - } - - // The first cursor must be at the end - ASSERT_TRUE(buf.atEnd(cur1)); - ASSERT_TRUE(buf.atEnd(cur2)); - - // The two strings must equal - ASSERT_EQ(testStr, res1); - ASSERT_EQ(testStr, res2); - - buf.deleteCursor(cur1); - buf.deleteCursor(cur2); -} - -TEST(Buffer, copyCursors) -{ - std::string testStr{"test1 test2 test3"}; - - // Create buffer with the test string - char c; - Buffer buf{testStr}; - - // Create two read cursors - Buffer::CursorId cur1 = buf.createCursor(); - Buffer::CursorId cur2 = buf.createCursor(); - - ASSERT_FALSE(buf.atEnd(cur1)); - ASSERT_FALSE(buf.atEnd(cur2)); - - // Read the first six characters with cursor one - std::string res1; - for (int i = 0; i < 6; i++) { - if (buf.read(cur1, c)) { - res1.append(&c, 1); - } - } - ASSERT_EQ("test1 ", res1); - ASSERT_FALSE(buf.atEnd(cur1)); - - // Copy cur1 to cur2, free cur1 - buf.copyCursor(cur1, cur2); - buf.deleteCursor(cur1); - - std::string res2; - for (int i = 0; i < 6; i++) { - if (buf.read(cur2, c)) { - res2.append(&c, 1); - } - } - ASSERT_EQ("test2 ", res2); - ASSERT_FALSE(buf.atEnd(cur2)); - - // Create a new cursor as copy of cur2 - Buffer::CursorId cur3 = buf.createCursor(cur2); - std::string res3; - for (int i = 0; i < 6; i++) { - if (buf.read(cur3, c)) { - res3.append(&c, 1); - } - } - ASSERT_EQ("test3", res3); - - ASSERT_TRUE(buf.atEnd(cur3)); - - buf.deleteCursor(cur1); - buf.deleteCursor(cur2); - buf.deleteCursor(cur3); -} - -TEST(Buffer, moveCursor) -{ - std::string testStr{"test1 test2 test3"}; - - // Create buffer with the test string - char c; - Buffer buf{testStr}; - Buffer::CursorId cursor = buf.createCursor(); - - // Read the first six characters with cursor one - { - std::string res; - for (int i = 0; i < 6; i++) { - if (buf.read(cursor, c)) { - res.append(&c, 1); - } - } - ASSERT_EQ("test1 ", res); - } - - // Move six bytes backward - ASSERT_EQ(-6, buf.moveCursor(cursor, -6)); - { - std::string res; - for (int i = 0; i < 6; i++) { - if (buf.read(cursor, c)) { - res.append(&c, 1); - } - } - ASSERT_EQ("test1 ", res); - } - - // Move more than six bytes backward - ASSERT_EQ(-6, buf.moveCursor(cursor, -1000)); - { - std::string res; - for (int i = 0; i < 6; i++) { - if (buf.read(cursor, c)) { - res.append(&c, 1); - } - } - ASSERT_EQ("test1 ", res); - } - - // Move six bytes forward - ASSERT_EQ(6, buf.moveCursor(cursor, 6)); - { - std::string res; - for (int i = 0; i < 6; i++) { - if (buf.read(cursor, c)) { - res.append(&c, 1); - } - } - ASSERT_EQ("test3", res); - } - - buf.deleteCursor(cursor); -} - -struct VectorReadState { - size_t offs; - const std::vector &data; - - VectorReadState(const std::vector &data) : offs(0), data(data) {} -}; - -static size_t readFromVector(char *buf, size_t size, void *userData) -{ - VectorReadState &state = *(static_cast(userData)); - size_t tar = std::min(state.offs + size, state.data.size()); - for (size_t i = state.offs; i < tar; i++) { - *buf = state.data[i]; - buf++; - } - size_t res = tar - state.offs; - state.offs = tar; - return res; -} - -TEST(Buffer, simpleStream) -{ - VectorReadState state(DATA); - - Buffer buf{readFromVector, &state}; - Buffer::CursorId cursor = buf.createCursor(); - - char c; - std::vector res; - while (buf.read(cursor, c)) { - res.push_back(c); - } - - // We must be at the end of the buffer and the cursor offset must be set - // correctly - ASSERT_TRUE(buf.atEnd(cursor)); - ASSERT_EQ(DATA_LENGTH, buf.offset(cursor)); - - // The read data and the original data must be equal - ASSERT_EQ(DATA, res); - - buf.deleteCursor(cursor); -} - -TEST(Buffer, streamTwoCursors) -{ - VectorReadState state(DATA); - - Buffer buf{readFromVector, &state}; - Buffer::CursorId cur1 = buf.createCursor(); - Buffer::CursorId cur2 = buf.createCursor(); - - char c; - - std::vector res1; - while (buf.read(cur1, c)) { - res1.push_back(c); - } - - ASSERT_TRUE(buf.atEnd(cur1)); - ASSERT_FALSE(buf.atEnd(cur2)); - ASSERT_EQ(DATA_LENGTH, buf.offset(cur1)); - ASSERT_EQ(0U, buf.offset(cur2)); - - std::vector res2; - while (buf.read(cur2, c)) { - res2.push_back(c); - } - - ASSERT_TRUE(buf.atEnd(cur1)); - ASSERT_TRUE(buf.atEnd(cur2)); - ASSERT_EQ(DATA_LENGTH, buf.offset(cur1)); - ASSERT_EQ(DATA_LENGTH, buf.offset(cur2)); - - // The read data and the original data must be equal - ASSERT_EQ(DATA, res1); - ASSERT_EQ(DATA, res2); - - buf.deleteCursor(cur1); - buf.deleteCursor(cur2); -} - -TEST(Buffer, streamTwoCursorsMovingInterleaved) -{ - VectorReadState state(DATA); - - Buffer buf{readFromVector, &state}; - Buffer::CursorId cur1 = buf.createCursor(); - Buffer::CursorId cur2 = buf.createCursor(); - - char c; - - std::vector res1; - std::vector res2; - while (!buf.atEnd(cur1) || !buf.atEnd(cur2)) { - for (int i = 0; i < 100; i++) { - if (buf.read(cur1, c)) { - res1.push_back(c); - } - } - for (int i = 0; i < 120; i++) { - if (buf.read(cur2, c)) { - res2.push_back(c); - } - } - - // Move cur2 120 bytes backward and read the content again - res2.resize(res2.size() - 120); - ASSERT_EQ(-120, buf.moveCursor(cur2, -120)); - for (int i = 0; i < 120; i++) { - if (buf.read(cur2, c)) { - res2.push_back(c); - } - } - - // Move cur1 60 bytes forward and backward - buf.moveCursor(cur1, -buf.moveCursor(cur1, 60)); - - // Make sure the cursor position is correct - ASSERT_EQ(res1.size(), buf.offset(cur1)); - ASSERT_EQ(res2.size(), buf.offset(cur2)); - } - - ASSERT_EQ(DATA_LENGTH, buf.offset(cur1)); - ASSERT_EQ(DATA_LENGTH, buf.offset(cur2)); - - // The read data and the original data must be equal - ASSERT_EQ(DATA, res1); - ASSERT_EQ(DATA, res2); - - buf.deleteCursor(cur1); - buf.deleteCursor(cur2); -} - -TEST(Buffer, streamMoveForward) -{ - VectorReadState state(DATA); - - std::vector partialData; - partialData.resize(100); - std::copy(DATA.end() - partialData.size(), DATA.end(), partialData.begin()); - - Buffer buf{readFromVector, &state}; - Buffer::CursorId cursor = buf.createCursor(); - ASSERT_EQ(ssize_t(DATA_LENGTH) - 100, - buf.moveCursor(cursor, DATA_LENGTH - 100)); - - char c; - std::vector res; - while (buf.read(cursor, c)) { - res.push_back(c); - } - ASSERT_EQ(partialData, res); - - buf.deleteCursor(cursor); -} - -/* CharReader Test */ - -TEST(CharReader, simpleRead) -{ - std::string testStr{"this is a test"}; - char c; - - // Feed a test string into the reader - CharReader reader{testStr}; - - // Try to read the test string - std::string res; - while (!reader.atEnd()) { - ASSERT_TRUE(reader.read(c)); - res.append(&c, 1); - } - - // The two strings must equal - ASSERT_EQ(testStr, res); - - // We must now be at line 1, column 15 - ASSERT_EQ(1U, reader.getLine()); - ASSERT_EQ(testStr.size() + 1, reader.getColumn()); - - // If we call either read or peek, false is returned - ASSERT_FALSE(reader.read(c)); - ASSERT_FALSE(reader.peek(c)); -} - -TEST(CharReader, simplePeek) -{ - std::string testStr{"this is a test"}; - char c; - - // Feed a test string into the reader - CharReader reader{testStr}; - - // Try to read the test string - std::string res; - while (reader.peek(c)) { - res.append(&c, 1); - } - - // Peeking does not trigger the "atEnd" flag - ASSERT_FALSE(reader.atEnd()); - - // The two strings must equal - ASSERT_EQ(testStr, res); - - // We must now be at line 1, column 1 and NOT at the end of the stream - ASSERT_EQ(1U, reader.getLine()); - ASSERT_EQ(1U, reader.getColumn()); - ASSERT_FALSE(reader.atEnd()); - - // If we consume the peek, we must be at line 1, column 15 and we should be - // at the end of the stream - reader.consumePeek(); - ASSERT_EQ(1U, reader.getLine()); - ASSERT_EQ(testStr.size() + 1, reader.getColumn()); - ASSERT_TRUE(reader.atEnd()); - - // If we call either read or peek, false is returned - ASSERT_FALSE(reader.read(c)); - ASSERT_FALSE(reader.peek(c)); -} - -TEST(CharReader, rowColumnCounter) -{ - // Feed a test string into the reader - CharReader reader{"1\n\r2\n3\r\n\n4"}; - - // We should currently be in line 1, column 1 - ASSERT_EQ(1U, reader.getLine()); - ASSERT_EQ(1U, reader.getColumn()); - - // Read two characters - char c; - for (int i = 0; i < 2; i++) - reader.read(c); - ASSERT_EQ(2U, reader.getLine()); - ASSERT_EQ(1U, reader.getColumn()); - - // Read two characters - for (int i = 0; i < 2; i++) - reader.read(c); - ASSERT_EQ(3U, reader.getLine()); - ASSERT_EQ(1U, reader.getColumn()); - - // Read three characters - for (int i = 0; i < 3; i++) - reader.read(c); - ASSERT_EQ(5U, reader.getLine()); - ASSERT_EQ(1U, reader.getColumn()); -} - -TEST(CharReader, rowColumnCounterTest) -{ - // Feed a test string into the reader - CharReader reader{"1\n\r2\n3\r\n\n4", 4, 10}; - - // We should currently be in line 1, column 1 - ASSERT_EQ(4U, reader.getLine()); - ASSERT_EQ(10U, reader.getColumn()); - - // Read two characters - char c; - for (int i = 0; i < 2; i++) - reader.read(c); - ASSERT_EQ(5U, reader.getLine()); - ASSERT_EQ(1U, reader.getColumn()); - - // Read two characters - for (int i = 0; i < 2; i++) - reader.read(c); - ASSERT_EQ(6U, reader.getLine()); - ASSERT_EQ(1U, reader.getColumn()); - - // Read three characters - for (int i = 0; i < 3; i++) - reader.read(c); - ASSERT_EQ(8U, reader.getLine()); - ASSERT_EQ(1U, reader.getColumn()); -} - -TEST(CharReader, linebreakSubstitution) -{ - // Feed a test string into the reader and read all characters back - CharReader reader{"this\n\ris\n\rjust\na test\r\n\rtest\n\r"}; - std::string res; - char c; - while (reader.read(c)) { - res.append(&c, 1); - } - - // Test for equality - ASSERT_EQ("this\nis\njust\na test\n\ntest\n", res); -} - -TEST(CharReader, rowColumnCounterUTF8) -{ - // Feed a test string with some umlauts into the reader - CharReader reader{"\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f"}; - - // Read all bytes - char c; - while (reader.read(c)) { - // Do nothing - } - - // The sequence above equals 5 UTF-8 characters (so after reading all the - // cursor is at position 6) - ASSERT_EQ(1U, reader.getLine()); - ASSERT_EQ(6U, reader.getColumn()); -} - -TEST(CharReader, stream) -{ - // Copy the test data to a string stream - std::stringstream ss; - std::copy(DATA.begin(), DATA.end(), std::ostream_iterator(ss)); - - // Read the data back from the stream - std::vector res; - char c; - CharReader reader{ss}; - while (reader.read(c)) { - res.push_back(c); - } - ASSERT_EQ(DATA_LENGTH, res.size()); - ASSERT_EQ(DATA, res); -} - -TEST(CharReader, fork) -{ - std::string testStr{"first line\n\n\rsecond line\n\rlast line"}; - // 0123456789 0 123456789012 3456789012 - // 0 1 2 3 - - char c; - CharReader reader{testStr}; - - // Read a few characters - for (int i = 0; i < 4; i++) - reader.read(c); - - // Peek a few characters - for (int i = 4; i < 7; i++) - reader.peek(c); - - // Fork the reader - { - CharReaderFork fork = reader.fork(); - - ASSERT_EQ(1U, fork.getLine()); - ASSERT_EQ(5U, fork.getColumn()); - - fork.peek(c); - ASSERT_EQ('i', c); - - fork.read(c); - ASSERT_EQ('t', c); - - ASSERT_EQ(1U, fork.getLine()); - ASSERT_EQ(6U, fork.getColumn()); - - ASSERT_EQ(1U, reader.getLine()); - ASSERT_EQ(5U, reader.getColumn()); - - reader.read(c); - reader.read(c); - ASSERT_EQ(' ', c); - - fork.commit(); - } - ASSERT_EQ(1U, reader.getLine()); - ASSERT_EQ(6U, reader.getColumn()); -} - -TEST(CharReaderTest, context) -{ - std::string testStr{"first line\n\n\rsecond line\n\rlast line"}; - // 0123456789 0 123456789012 3456789012 - // 0 1 2 3 - - // Retrieval at beginning of stream - { - CharReader reader{testStr}; - CharReader::Context ctx = reader.getContext(80); - ASSERT_EQ("first line", ctx.line); - ASSERT_EQ(0U, ctx.relPos); - ASSERT_FALSE(ctx.truncatedStart); - ASSERT_FALSE(ctx.truncatedEnd); - } - - // Retrieval in middle of line - { - CharReader reader{testStr}; - CharReader::Context ctx = reader.getContext(80); - - char c; - for (int i = 0; i < 5; i++) - reader.read(c); - - ASSERT_EQ("first line", ctx.line); - ASSERT_EQ(0U, ctx.relPos); - ASSERT_FALSE(ctx.truncatedStart); - ASSERT_FALSE(ctx.truncatedEnd); - } - - // Retrieval in whitespace sequence - { - CharReader reader{testStr}; - - char c; - for (int i = 0; i < 11; i++) - reader.read(c); - - CharReader::Context ctx = reader.getContext(80); - ASSERT_EQ("first line", ctx.line); - ASSERT_EQ(10U, ctx.relPos); - ASSERT_FALSE(ctx.truncatedStart); - ASSERT_FALSE(ctx.truncatedEnd); - } - - // Truncation of text - { - CharReader reader{testStr}; - - char c; - for (int i = 0; i < 5; i++) - reader.read(c); - - CharReader::Context ctx = reader.getContext(3); - ASSERT_EQ("t l", ctx.line); - ASSERT_EQ(1U, ctx.relPos); - ASSERT_TRUE(ctx.truncatedStart); - ASSERT_TRUE(ctx.truncatedEnd); - } - - // Second line - { - CharReader reader{testStr}; - - char c; - for (int i = 0; i < 12; i++) - reader.read(c); - - CharReader::Context ctx = reader.getContext(80); - ASSERT_EQ("second line", ctx.line); - ASSERT_EQ(0U, ctx.relPos); - ASSERT_FALSE(ctx.truncatedStart); - ASSERT_FALSE(ctx.truncatedEnd); - } - - // End of second line - { - CharReader reader{testStr}; - - char c; - for (int i = 0; i < 23; i++) - reader.read(c); - - CharReader::Context ctx = reader.getContext(80); - ASSERT_EQ("second line", ctx.line); - ASSERT_EQ(11U, ctx.relPos); - ASSERT_FALSE(ctx.truncatedStart); - ASSERT_FALSE(ctx.truncatedEnd); - } - - // Last line - { - CharReader reader{testStr}; - - char c; - for (int i = 0; i < 24; i++) - reader.read(c); - - CharReader::Context ctx = reader.getContext(80); - ASSERT_EQ("last line", ctx.line); - ASSERT_EQ(0U, ctx.relPos); - ASSERT_FALSE(ctx.truncatedStart); - ASSERT_FALSE(ctx.truncatedEnd); - } - - // Middle of last line - { - CharReader reader{testStr}; - - char c; - for (int i = 0; i < 28; i++) - reader.read(c); - - CharReader::Context ctx = reader.getContext(80); - ASSERT_EQ("last line", ctx.line); - ASSERT_EQ(4U, ctx.relPos); - ASSERT_FALSE(ctx.truncatedStart); - ASSERT_FALSE(ctx.truncatedEnd); - } - - // Middle of last line truncated - { - CharReader reader{testStr}; - - char c; - for (int i = 0; i < 28; i++) - reader.read(c); - - CharReader::Context ctx = reader.getContext(3); - ASSERT_EQ("t l", ctx.line); - ASSERT_EQ(1U, ctx.relPos); - ASSERT_TRUE(ctx.truncatedStart); - ASSERT_TRUE(ctx.truncatedEnd); - } - - // End of stream - { - CharReader reader{testStr}; - - char c; - for (int i = 0; i < 100; i++) - reader.read(c); - - CharReader::Context ctx = reader.getContext(80); - ASSERT_EQ("last line", ctx.line); - ASSERT_EQ(9U, ctx.relPos); - ASSERT_FALSE(ctx.truncatedStart); - ASSERT_FALSE(ctx.truncatedEnd); - } - - // End of stream truncated - { - CharReader reader{testStr}; - - char c; - for (int i = 0; i < 100; i++) - reader.read(c); - - CharReader::Context ctx = reader.getContext(4); - ASSERT_EQ("line", ctx.line); - ASSERT_EQ(4U, ctx.relPos); - ASSERT_TRUE(ctx.truncatedStart); - ASSERT_FALSE(ctx.truncatedEnd); - } -} -} -} - diff --git a/test/core/variant/ReaderTest.cpp b/test/core/variant/ReaderTest.cpp deleted file mode 100644 index 43e85a5..0000000 --- a/test/core/variant/ReaderTest.cpp +++ /dev/null @@ -1,345 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include - -#include - -namespace ousia { -namespace variant { - -//static TerminalLogger logger{std::cerr, true}; -static Logger logger; - -TEST(Reader, readString) -{ - // Simple, double quoted string - { - CharReader reader("\"hello world\""); - auto res = Reader::parseString(reader, logger); - ASSERT_TRUE(res.first); - ASSERT_EQ("hello world", res.second); - } - - // Simple, double quoted string with whitespace - { - CharReader reader(" \"hello world\" "); - auto res = Reader::parseString(reader, logger); - ASSERT_TRUE(res.first); - ASSERT_EQ("hello world", res.second); - } - - // Simple, single quoted string - { - CharReader reader("'hello world'"); - auto res = Reader::parseString(reader, logger); - ASSERT_TRUE(res.first); - ASSERT_EQ("hello world", res.second); - } - - // Escape characters - { - CharReader reader("'\\'\\\"\\b\\f\\n\\r\\t\\v'"); - auto res = Reader::parseString(reader, logger); - ASSERT_TRUE(res.first); - ASSERT_EQ("'\"\b\f\n\r\t\v", res.second); - } -} - -TEST(Reader, parseUnescapedString) -{ - // Simple case - { - CharReader reader("hello world;"); - auto res = Reader::parseUnescapedString(reader, logger, {';'}); - ASSERT_TRUE(res.first); - ASSERT_EQ("hello world", res.second); - } - - // Simple case with whitespace - { - CharReader reader(" hello world ; "); - auto res = Reader::parseUnescapedString(reader, logger, {';'}); - ASSERT_TRUE(res.first); - ASSERT_EQ("hello world", res.second); - } - - // Linebreaks - { - CharReader reader(" hello\nworld ; "); - auto res = Reader::parseUnescapedString(reader, logger, {';'}); - ASSERT_TRUE(res.first); - ASSERT_EQ("hello\nworld", res.second); - } - - // End of stream - { - CharReader reader(" hello world "); - auto res = Reader::parseUnescapedString(reader, logger, {';'}); - ASSERT_TRUE(res.first); - ASSERT_EQ("hello world", res.second); - } -} - -static const std::unordered_set noDelim; - -TEST(Reader, parseInteger) -{ - // Valid integers - { - CharReader reader("0 "); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(0, res.second); - } - - { - CharReader reader("42 "); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(42, res.second); - } - - { - CharReader reader("-42"); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(-42, res.second); - } - - { - CharReader reader(" -0x4A2 "); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(-0x4A2, res.second); - } - - { - CharReader reader(" 0Xaffe"); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(0xAFFE, res.second); - } - - { - CharReader reader("0x7FFFFFFFFFFFFFFF"); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(0x7FFFFFFFFFFFFFFFL, res.second); - } - - { - CharReader reader("-0x7FFFFFFFFFFFFFFF"); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(-0x7FFFFFFFFFFFFFFFL, res.second); - } - - // Invalid integers - { - CharReader reader("-"); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_FALSE(res.first); - } - - { - CharReader reader("0a"); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_FALSE(res.first); - } - - { - CharReader reader("-0xag"); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_FALSE(res.first); - } - - { - CharReader reader("0x8000000000000000"); - auto res = Reader::parseInteger(reader, logger, noDelim); - ASSERT_FALSE(res.first); - } -} - -TEST(Reader, parseDouble) -{ - // Valid doubles - { - CharReader reader("1.25"); - auto res = Reader::parseDouble(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(1.25, res.second); - } - - { - CharReader reader(".25"); - auto res = Reader::parseDouble(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(.25, res.second); - } - - { - CharReader reader(".25e1"); - auto res = Reader::parseDouble(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(2.5, res.second); - } - - { - CharReader reader("-2.5e-1"); - auto res = Reader::parseDouble(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(-0.25, res.second); - } - - { - CharReader reader("-50e-2"); - auto res = Reader::parseDouble(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(-0.5, res.second); - } - - { - CharReader reader("-1."); - auto res = Reader::parseDouble(reader, logger, noDelim); - ASSERT_TRUE(res.first); - ASSERT_EQ(-1., res.second); - } - - { - CharReader reader("-50.e-2"); - auto res = Reader::parseDouble(reader, logger, {'.'}); - ASSERT_TRUE(res.first); - ASSERT_EQ(-50, res.second); - } - - // Invalid doubles - { - CharReader reader(".e1"); - auto res = Reader::parseDouble(reader, logger, noDelim); - ASSERT_FALSE(res.first); - } - - { - CharReader reader("0e100000"); - auto res = Reader::parseDouble(reader, logger, noDelim); - ASSERT_FALSE(res.first); - } -} - -TEST(Reader, parseArray) -{ - // Simple case (only primitive data types) - { - CharReader reader("[\"Hello, World\", unescaped\n string ,\n" - "1234, 0.56, true, false, null]"); - auto res = Reader::parseArray(reader, logger); - ASSERT_TRUE(res.first); - - // Make sure array has the correct size - ASSERT_EQ(7U, res.second.size()); - - // Check the types - ASSERT_TRUE(res.second[0].isString()); - ASSERT_TRUE(res.second[1].isString()); - ASSERT_TRUE(res.second[2].isInt()); - ASSERT_TRUE(res.second[3].isDouble()); - ASSERT_TRUE(res.second[4].isBool()); - ASSERT_TRUE(res.second[5].isBool()); - ASSERT_TRUE(res.second[6].isNull()); - - // Check the values - ASSERT_EQ("Hello, World", res.second[0].asString()); - ASSERT_EQ("unescaped\n string", res.second[1].asString()); - ASSERT_EQ(1234, res.second[2].asInt()); - ASSERT_EQ(0.56, res.second[3].asDouble()); - ASSERT_TRUE(res.second[4].asBool()); - ASSERT_FALSE(res.second[5].asBool()); - } - - // Ending with comma - { - CharReader reader("[ 'test' ,]"); - auto res = Reader::parseArray(reader, logger); - ASSERT_TRUE(res.first); - - // Make sure the array has the correct size - ASSERT_EQ(1U, res.second.size()); - - // Check the types - ASSERT_TRUE(res.second[0].isString()); - - // Check the values - ASSERT_EQ("test", res.second[0].asString()); - } - - // Recovery from invalid values - { - CharReader reader("[ 0invalidNumber, str, 1invalid]"); - auto res = Reader::parseArray(reader, logger); - ASSERT_TRUE(res.first); - - // Make sure the array has the correct size - ASSERT_EQ(3U, res.second.size()); - - // Check the types (all must be strings since the numbers are invalid) - ASSERT_TRUE(res.second[0].isString()); - ASSERT_TRUE(res.second[1].isString()); - ASSERT_TRUE(res.second[2].isString()); - - // Check the values - ASSERT_EQ("0invalidNumber", res.second[0].asString()); - ASSERT_EQ("str", res.second[1].asString()); - ASSERT_EQ("1invalid", res.second[2].asString()); - } -} - -TEST(Reader, parseGeneric) -{ - // Simple case, unescaped string - { - CharReader reader("hello world"); - auto res = Reader::parseGeneric(reader, logger, {';'}); - ASSERT_TRUE(res.first); - ASSERT_TRUE(res.second.isString()); - ASSERT_EQ("hello world", res.second.asString()); - } - - // Simple case, double quoted string - { - CharReader reader(" \"hello world\" "); - auto res = Reader::parseGeneric(reader, logger, {';'}); - ASSERT_TRUE(res.first); - ASSERT_TRUE(res.second.isString()); - ASSERT_EQ("hello world", res.second.asString()); - } - - // Simple case, single quoted string - { - CharReader reader(" 'hello world' "); - auto res = Reader::parseGeneric(reader, logger, {';'}); - ASSERT_TRUE(res.first); - ASSERT_TRUE(res.second.isString()); - ASSERT_EQ("hello world", res.second.asString()); - } -} - -} -} - diff --git a/test/core/variant/VariantTest.cpp b/test/core/variant/VariantTest.cpp deleted file mode 100644 index e51cf36..0000000 --- a/test/core/variant/VariantTest.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include - -#include - -namespace ousia { - -TEST(Variant, nullValue) -{ - Variant v; - ASSERT_TRUE(v.isNull()); - - v = 1; - ASSERT_FALSE(v.isNull()); - - v = nullptr; - ASSERT_TRUE(v.isNull()); - - Variant v2{nullptr}; - ASSERT_TRUE(v.isNull()); -} - -TEST(Variant, booleanValue) -{ - Variant v{true}; - ASSERT_TRUE(v.isBool()); - ASSERT_TRUE(v.asBool()); - - v = false; - ASSERT_TRUE(v.isBool()); - ASSERT_FALSE(v.asBool()); - - v.setBool(true); - ASSERT_TRUE(v.isBool()); - ASSERT_TRUE(v.asBool()); - - v = nullptr; - ASSERT_FALSE(v.isBool()); -} - -TEST(Variant, intValue) -{ - Variant v{42}; - ASSERT_TRUE(v.isInt()); - ASSERT_EQ(42, v.asInt()); - - v = 43; - ASSERT_TRUE(v.isInt()); - ASSERT_EQ(43, v.asInt()); - - v = false; - ASSERT_FALSE(v.isInt()); -} - -TEST(Variant, doubleValue) -{ - Variant v{42.5}; - ASSERT_TRUE(v.isDouble()); - ASSERT_EQ(42.5, v.asDouble()); - - v = 42; - ASSERT_FALSE(v.isDouble()); - - v = 43.5; - ASSERT_TRUE(v.isDouble()); - ASSERT_EQ(43.5, v.asDouble()); -} - -TEST(Variant, stringValue) -{ - Variant v{"Hello World"}; - ASSERT_TRUE(v.isString()); - ASSERT_EQ("Hello World", v.asString()); - - v = "Goodbye World"; - ASSERT_TRUE(v.isString()); - ASSERT_EQ("Goodbye World", v.asString()); - - v = 42; - ASSERT_FALSE(v.isString()); -} - -TEST(Variant, arrayValue) -{ - const Variant v{{"test1", 42}}; - ASSERT_EQ(2, v.asArray().size()); - ASSERT_EQ("test1", v.asArray()[0].asString()); - ASSERT_EQ(42, v.asArray()[1].asInt()); -} - -TEST(Variant, mapValue) -{ - const Variant v{{{"key1", "entry1"}, {"key2", "entry2"}}}; - - auto map = v.asMap(); - ASSERT_EQ(2, map.size()); - - ASSERT_EQ("entry1", map.find("key1")->second.asString()); - ASSERT_EQ("entry2", map.find("key2")->second.asString()); - - const Variant v2{{{"key1", Variant::arrayType{1, 2}}, {"key2", "entry2"}}}; - ASSERT_EQ(2, v2.asMap().find("key1")->second.asArray()[1].asInt()); -} - -TEST(Variant, relationalOperators){ - Variant a{4}; - Variant b{4}; - - ASSERT_EQ(a,b); - - b.setInt(5); - ASSERT_TRUE(a < b); - - b.setDouble(4); - ASSERT_FALSE(a == b); - - a.setDouble(4); - ASSERT_EQ(a,b); -} - -} - diff --git a/test/plugins/css/CSSParserTest.cpp b/test/plugins/css/CSSParserTest.cpp index 6499375..3ea3a19 100644 --- a/test/plugins/css/CSSParserTest.cpp +++ b/test/plugins/css/CSSParserTest.cpp @@ -186,11 +186,11 @@ TEST(CSSParser, testParseCSS) { Rooted ruleSet = A->getRuleSet(); ASSERT_EQ(2, ruleSet->getRules().size()); - variant::Variant v = ruleSet->getRules()["ident1"]; - ASSERT_EQ(variant::Variant::Type::STRING, v.getType()); + Variant v = ruleSet->getRules()["ident1"]; + ASSERT_EQ(Variant::Type::STRING, v.getType()); ASSERT_EQ("val4", v.asString()); v = ruleSet->getRules()["ident2"]; - ASSERT_EQ(variant::Variant::Type::STRING, v.getType()); + ASSERT_EQ(Variant::Type::STRING, v.getType()); ASSERT_EQ("val2", v.asString()); } /* @@ -211,8 +211,8 @@ TEST(CSSParser, testParseCSS) { Rooted ruleSet = Aselect->getRuleSet(); ASSERT_EQ(1, ruleSet->getRules().size()); - variant::Variant v = ruleSet->getRules()["ident3"]; - ASSERT_EQ(variant::Variant::Type::STRING, v.getType()); + Variant v = ruleSet->getRules()["ident3"]; + ASSERT_EQ(Variant::Type::STRING, v.getType()); ASSERT_EQ("val3", v.asString()); } /* @@ -250,11 +250,11 @@ TEST(CSSParser, testParseCSS) { Rooted ruleSet = BA->getRuleSet(); ASSERT_EQ(2, ruleSet->getRules().size()); - variant::Variant v = ruleSet->getRules()["ident1"]; - ASSERT_EQ(variant::Variant::Type::STRING, v.getType()); + Variant v = ruleSet->getRules()["ident1"]; + ASSERT_EQ(Variant::Type::STRING, v.getType()); ASSERT_EQ("val1", v.asString()); v = ruleSet->getRules()["ident2"]; - ASSERT_EQ(variant::Variant::Type::STRING, v.getType()); + ASSERT_EQ(Variant::Type::STRING, v.getType()); ASSERT_EQ("val2", v.asString()); } } -- cgit v1.2.3