From 3f62168ed0b088eec3cb2903f03966f7d501f564 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Thu, 11 Dec 2014 15:26:50 +0100 Subject: moved to CharReader everywhere --- src/core/BufferedCharReader.cpp | 263 ------------- src/core/BufferedCharReader.hpp | 288 -------------- src/core/CSS.hpp | 2 +- src/core/CodeTokenizer.hpp | 8 +- src/core/Exceptions.cpp | 46 --- src/core/Exceptions.hpp | 162 -------- src/core/Logger.cpp | 161 -------- src/core/Logger.hpp | 609 ------------------------------ src/core/Registry.cpp | 3 +- src/core/Tokenizer.cpp | 12 +- src/core/Tokenizer.hpp | 27 +- src/core/Utils.cpp | 59 --- src/core/Utils.hpp | 110 ------ src/core/common/CharReader.cpp | 640 +++++++++++++++++++++++++++++++ src/core/common/CharReader.hpp | 665 +++++++++++++++++++++++++++++++++ src/core/common/Exceptions.cpp | 46 +++ src/core/common/Exceptions.hpp | 162 ++++++++ src/core/common/Logger.cpp | 161 ++++++++ src/core/common/Logger.hpp | 609 ++++++++++++++++++++++++++++++ src/core/common/Utils.cpp | 59 +++ src/core/common/Utils.hpp | 110 ++++++ src/core/common/Variant.cpp | 154 ++++++++ src/core/common/Variant.hpp | 761 +++++++++++++++++++++++++++++++++++++ src/core/common/VariantReader.cpp | 625 +++++++++++++++++++++++++++++++ src/core/common/VariantReader.hpp | 166 +++++++++ src/core/parser/Parser.hpp | 4 +- src/core/parser/ParserStack.cpp | 4 +- src/core/parser/ParserStack.hpp | 2 +- src/core/utils/CharReader.cpp | 643 -------------------------------- src/core/utils/CharReader.hpp | 672 --------------------------------- src/core/variant/Reader.cpp | 624 ------------------------------- src/core/variant/Reader.hpp | 169 --------- src/core/variant/Variant.cpp | 155 -------- src/core/variant/Variant.hpp | 766 -------------------------------------- src/plugins/css/CSSParser.cpp | 18 +- src/plugins/css/CSSParser.hpp | 2 +- src/plugins/xml/XmlParser.cpp | 2 +- 37 files changed, 4200 insertions(+), 4769 deletions(-) delete mode 100644 src/core/BufferedCharReader.cpp delete mode 100644 src/core/BufferedCharReader.hpp delete mode 100644 src/core/Exceptions.cpp delete mode 100644 src/core/Exceptions.hpp delete mode 100644 src/core/Logger.cpp delete mode 100644 src/core/Logger.hpp delete mode 100644 src/core/Utils.cpp delete mode 100644 src/core/Utils.hpp create mode 100644 src/core/common/CharReader.cpp create mode 100644 src/core/common/CharReader.hpp create mode 100644 src/core/common/Exceptions.cpp create mode 100644 src/core/common/Exceptions.hpp create mode 100644 src/core/common/Logger.cpp create mode 100644 src/core/common/Logger.hpp create mode 100644 src/core/common/Utils.cpp create mode 100644 src/core/common/Utils.hpp create mode 100644 src/core/common/Variant.cpp create mode 100644 src/core/common/Variant.hpp create mode 100644 src/core/common/VariantReader.cpp create mode 100644 src/core/common/VariantReader.hpp delete mode 100644 src/core/utils/CharReader.cpp delete mode 100644 src/core/utils/CharReader.hpp delete mode 100644 src/core/variant/Reader.cpp delete mode 100644 src/core/variant/Reader.hpp delete mode 100644 src/core/variant/Variant.cpp delete mode 100644 src/core/variant/Variant.hpp (limited to 'src') diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp deleted file mode 100644 index aeedf12..0000000 --- a/src/core/BufferedCharReader.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "Utils.hpp" - -#include "BufferedCharReader.hpp" - -namespace ousia { - -// Constants used within the linebreak statemachine. -static const uint8_t LB_STATE_NONE = 0x00; -static const uint8_t LB_STATE_ONE = 0x01; -static const uint8_t LB_STATE_LF = 0x10; -static const uint8_t LB_STATE_CR = 0x20; -static const uint8_t LB_STATE_MASK_CNT = 0x0F; -static const uint8_t LB_STATE_MASK_TYPE = 0xF0; - -/* Struct BufferedCharReader::ReadCursor */ - -BufferedCharReader::ReadCursor::ReadCursor(unsigned int line, - unsigned int column, - bool destructive) - : line(line), - column(column), - bufferElem(0), - bufferPos(0), - destructive(destructive), - lbState(LB_STATE_NONE) -{ -} - -void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) -{ - this->line = cursor.line; - this->column = cursor.column; - this->bufferElem = cursor.bufferElem; - this->bufferPos = cursor.bufferPos; - this->lbState = cursor.lbState; -} - -/* Class BufferedCharReader */ - -BufferedCharReader::BufferedCharReader(int line, int column) - : inputStream(nullptr), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(false) -{ -} - -BufferedCharReader::BufferedCharReader(const std::string &str, int line, - int column) - : inputStream(nullptr), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(true) -{ - buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(const std::string &str) - : inputStream(nullptr), - readCursor(1, 1, true), - peekCursor(1, 1, false), - depleted(true) -{ - buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line, - int column) - : inputStream(&inputStream), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(false) -{ -} - -void BufferedCharReader::feed(const std::string &data) -{ - if (!depleted && !inputStream) { - buffer.push_back(data); - } -} - -void BufferedCharReader::close() -{ - if (!inputStream) { - depleted = true; - } -} - -bool BufferedCharReader::substituteLinebreaks(ReadCursor &cursor, char *c) -{ - // Handle line breaks, inserts breakes after the following character - // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n - if ((*c == '\n') || (*c == '\r')) { - // Determine the type of the current linebreak character - const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; - - // Read the last count and the last type from the state - const uint8_t lastCount = cursor.lbState & LB_STATE_MASK_CNT; - const uint8_t lastType = cursor.lbState & LB_STATE_MASK_TYPE; - - // Set the current linebreak type and counter in the state - cursor.lbState = ((lastCount + 1) & 1) | type; - - // If either this is the first instance of this character or the same - // return character is repeated - if (!lastCount || (lastType == type)) { - *c = '\n'; - return true; - } - return false; - } - - // Find the state - cursor.lbState = LB_STATE_NONE; - return true; -} - -bool BufferedCharReader::readCharacterAtCursor(ReadCursor &cursor, char *c) -{ - bool hasChar = false; - while (!hasChar) { - // Abort if the current buffer element does not point to a valid entry - // in the buffer -- we must try to feed another data block into the - // internal buffer - if (cursor.bufferElem >= buffer.size()) { - // Abort if there is no more data or no input stream is set - if (depleted || !inputStream) { - return false; - } - - // Read a buffer of the specified size - constexpr std::streamsize BUFFER_SIZE = 1024; - std::array buf; - const std::streamsize cnt = - (*inputStream).read(buf.data(), BUFFER_SIZE).gcount(); - - // If data has been read, append it to the input buffer and try - // again - if (cnt > 0) { - buffer.emplace_back(std::string(buf.data(), cnt)); - continue; - } - - // End of file handling - if (inputStream->fail() || inputStream->eof()) { - depleted = true; - return false; - } - } - - // Fetch the current element the peek pointer points to - const std::string &data = buffer[cursor.bufferElem]; - - // Handle the "no data" case -- either in a destructive or - // non-destructive manner. - if (cursor.bufferPos >= data.length()) { - if (cursor.destructive) { - buffer.pop_front(); - } else { - cursor.bufferElem++; - } - cursor.bufferPos = 0; - continue; - } - - // Read the character, advance the buffer position - *c = *(data.data() + cursor.bufferPos); - cursor.bufferPos++; - - // Substitute linebreaks with a single LF (0x0A) - hasChar = substituteLinebreaks(cursor, c); - } - - // Update the position counter - if (*c == '\n') { - cursor.line++; - cursor.column = 1; - } else { - // Ignore UTF-8 continuation bytes - if (!((*c & 0x80) && !(*c & 0x40))) { - cursor.column++; - } - } - - return true; -} - -bool BufferedCharReader::peek(char *c) -{ - return readCharacterAtCursor(peekCursor, c); -} - -bool BufferedCharReader::read(char *c) -{ - resetPeek(); - return readCharacterAtCursor(readCursor, c); -} - -void BufferedCharReader::consumePeek() -{ - // Remove all no longer needed buffer elements - for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { - buffer.pop_front(); - } - peekCursor.bufferElem = 0; - - // Copy the peek cursor to the read cursor - readCursor.assign(peekCursor); -} - -bool BufferedCharReader::consumeWhitespace() -{ - char c; - while (peek(&c)) { - if (!Utils::isWhitespace(c)) { - resetPeek(); - return true; - } - consumePeek(); - } - return false; -} - -void BufferedCharReader::resetPeek() -{ - // Reset the peek cursor to the read cursor - peekCursor.assign(readCursor); -} - -bool BufferedCharReader::atEnd() const -{ - if (depleted || !inputStream) { - if (buffer.size() <= 0) { - return true; - } else if (buffer.size() == 1) { - return buffer[0].size() == readCursor.bufferPos; - } - } - return false; -} -} - diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp deleted file mode 100644 index e7f3186..0000000 --- a/src/core/BufferedCharReader.hpp +++ /dev/null @@ -1,288 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file BufferedCharReader.hpp - * - * Contains the BufferedCharReader class which is used for reading/peeking - * single characters from an input stream or string. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_BUFFERED_CHAR_READER_H_ -#define _OUSIA_BUFFERED_CHAR_READER_H_ - -#include -#include -#include -#include - -namespace ousia { - -// TODO: Better split this class into multiple classes with base class -// BufferedCharReader where each sub class represents one method of supplying -// the input data (feeding, initial string, input stream). - -/** - * The BufferedCharReader class is used for storing incomming data that - * is fed into the pipeline as well as reading/peeking single characters - * from that buffer. Additionally it counts the current column/row - * (with correct handling for UTF-8) and contains an internal state - * machine that handles the detection of linebreaks and converts these to a - * single '\n'. - */ -class BufferedCharReader { -private: - /** - * The ReadCursor structure is responsible for representing the read - * position within the text an all state machine states belonging to the - * cursor. There are two types of read cursors: destructive and - * non-destructive read cursors. - */ - struct ReadCursor { - /** - * The line the cursor currently points to. - */ - unsigned int line; - - /** - * The column the cursor currently points to. - */ - unsigned int column; - - /** - * The index of the element in the data buffer we're currently reading - * from. - */ - unsigned int bufferElem; - - /** - * The byte position within this data buffer. - */ - unsigned int bufferPos; - - /** - * Specifies whether this is a destructive cursor (bytes are discarded - * once they were read from the buffer). - */ - const bool destructive; - - /** - * State variable used in the internal state machine of the - * line feed detection. - */ - uint8_t lbState; - - /** - * Constructor of the ReadCursor structure. - * - * @param line is the start line. - * @param column is the start column. - * @param destructive specifies whether the ReadCursor is destructive - * (consumes all read characters, as used in the "read cursor") or - * non-destructive (as used in the "peek cursor"). - */ - ReadCursor(unsigned int line, unsigned int column, bool destructive); - - /** - * Copys the data from another ReadCursor without overriding the - * "destructive" flag. - * - * @param cursor is the cursor that should be copied. - */ - void assign(const ReadCursor &cursor); - }; - - /** - * Pointer at an (optional) input stream used for reading a chunk of data - * whenever the input buffer depletes. - */ - std::istream *inputStream; - - /** - * The read and the peek cursor. - */ - ReadCursor readCursor, peekCursor; - - /** - * Set to true if there is no more input data. - */ - bool depleted; - - /** - * Queue containing the data that has been fed into the char reader. - */ - std::deque buffer; - - /** - * Substitute any combination of linebreaks in the incomming code with "\n". - * Returns true if the current character is meant as output, false - * otherwise. - */ - bool substituteLinebreaks(ReadCursor &cursor, char *c); - - /** - * Reads a character from the input buffer and advances the given read - * cursor. - * - * @param cursor is a reference to the read cursor that should be used - * for reading. - * @param hasChar is set to true, if a character is available, false if - * no character is available (e.g. because line breaks are substituted or - * the end of a buffer boundary is reached -- in this case this function - * should be called again with the same parameters.) - * @param c is a output parameter, which will be set to the read character. - * @param returns true if there was enough data in the buffer, false - * otherwise. - */ - bool readCharacterAtCursor(ReadCursor &cursor, char *c); - - /** - * Function that is called for each read character -- updates the row and - * column count. - */ - void updatePositionCounters(const char c); - -public: - - /** - * Constructor of the buffered char reader class with empty buffer as input. - * This operates the BufferedCharReader in a mode where new data has to be - * fed using the "feed" function and explicitly closed using the "close" - * function. - * - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(int line = 1, int column = 1); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param str is a string containing the input data. - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(const std::string &str, int line, int column); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param str is a string containing the input data. - */ - BufferedCharReader(const std::string &str); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param inputStream is the input stream from which incomming data should - * be read. - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(std::istream &inputStream, int line = 1, int column = 1); - - /** - * Peeks a single character. If called multiple times, returns the - * character after the previously peeked character. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool peek(char *c); - - /** - * Reads a character from the input data. If "peek" was called - * beforehand resets the peek pointer. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool read(char *c); - - /** - * Advances the read pointer to the peek pointer -- so if the "peek" - * function was called, "read" will now return the character after - * the last peeked character. - */ - void consumePeek(); - - /** - * Moves the read cursor to the next non-whitespace character. Returns - * false, if the end of the stream was reached. - * - * @return false if the end of the stream was reached, false othrwise. - */ - bool consumeWhitespace(); - - /** - * Resets the peek pointer to the "read" pointer. - */ - void resetPeek(); - - /** - * Feeds new data into the internal buffer of the BufferedCharReader - * class. Only applicable if the buffered char reader was constructed - * without an input stream or string. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - */ - void feed(const std::string &data); - - /** - * Tells the buffered char reader that no more data will be fed. - * Only applicable if the buffered char reader was constructed without an - * input stream or string. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - */ - void close(); - - /** - * Returns true if there are no more characters as the stream was - * closed. - * - * @return true if there is no more data. - */ - bool atEnd() const; - - /** - * Returns the current line (starting with one). - * - * @return the current line number. - */ - int getLine() const { return readCursor.line; } - - /** - * Returns the current column (starting with one). - * - * @return the current column number. - */ - int getColumn() const { return readCursor.column; } -}; -} - -#endif /* _OUSIA_BUFFERED_CHAR_READER_H_ */ - diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp index 1510f3a..a54d956 100644 --- a/src/core/CSS.hpp +++ b/src/core/CSS.hpp @@ -23,7 +23,7 @@ #include #include -#include +#include #include "Managed.hpp" #include "Node.hpp" diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp index 43c7abb..4190297 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/core/CodeTokenizer.hpp @@ -22,7 +22,7 @@ #include #include -#include "BufferedCharReader.hpp" +#include #include "Tokenizer.hpp" namespace ousia { @@ -108,8 +108,8 @@ public: /** * - * @param input a BufferedCharReader containing the input for this - * tokenizer, as with a regular tokenizer. + * @param input a CharReader containing the input for this tokenizer, as + * with a regular tokenizer. * @param root a TokenTreeNode representing the root of the TokenTree. * Please note that you have to specify all tokenIDs here that you use * in the descriptors map. @@ -120,7 +120,7 @@ public: * and this CodeTokenizer would recognize the token "//" as starting a * line comment. */ - CodeTokenizer(BufferedCharReader &input, const TokenTreeNode &root, + CodeTokenizer(CharReader &input, const TokenTreeNode &root, std::map descriptors) : Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL) { diff --git a/src/core/Exceptions.cpp b/src/core/Exceptions.cpp deleted file mode 100644 index d064f35..0000000 --- a/src/core/Exceptions.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "Exceptions.hpp" - -namespace ousia { - -/* Class LoggableException */ - -std::string LoggableException::formatMessage(const std::string &msg, - const std::string &file, - int line, int column) -{ - std::stringstream ss; - ss << "error "; - if (!file.empty()) { - ss << "while processing \"" << file << "\" "; - } - if (line >= 0) { - ss << "at line " << line << ", "; - if (column >= 0) { - ss << "column " << column << " "; - } - } - ss << "with message: " << msg; - return ss.str(); -} -} - diff --git a/src/core/Exceptions.hpp b/src/core/Exceptions.hpp deleted file mode 100644 index 00d6106..0000000 --- a/src/core/Exceptions.hpp +++ /dev/null @@ -1,162 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file Exceptions.hpp - * - * Describes basic exception classes which are used throughout Ousía. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_EXCEPTIONS_HPP_ -#define _OUSIA_EXCEPTIONS_HPP_ - -namespace ousia { - -/** - * Base exception class all other Ousía exceptions should derive from. - */ -class OusiaException : public std::exception { -private: - /** - * Error message which will be printed by the runtime environment if the - * exception is not caught and handled in the code. - */ - const std::string formatedMessage; - -public: - /** - * Constructor of the OusiaException class. - * - * @param formatedMessage is a formated message that should be printed by - * the runtime environment if the exception is not caught. - */ - OusiaException(std::string formatedMessage) - : formatedMessage(std::move(formatedMessage)) - { - } - - /** - * Virtual destructor. - */ - virtual ~OusiaException() {} - - /** - * Implementation of the std::exception what function and used to retrieve - * the error message that should be printed by the runtime environment. - * - * @return a reference to the formated message string given in the - * constructor. - */ - const char *what() const noexcept override - { - return formatedMessage.c_str(); - } -}; - -/** - * Exception class which can be directly passed to a Logger instance and thus - * makes it simple to handle non-recoverable errors in the code. - */ -class LoggableException : public OusiaException { -private: - /** - * Function used internally to build the formated message that should be - * reported to the runtime environment. - */ - static std::string formatMessage(const std::string &msg, - const std::string &file, int line, - int column); - -public: - /** - * Message describing the error that occured. - */ - const std::string msg; - - /** - * Name of the file in which the error occured. May be empty. - */ - const std::string file; - - /** - * Line at which the exception occured. Negative values are ignored. - */ - const int line; - - /** - * Column at which the exception occured. Negative values are ignored. - */ - const int column; - - /** - * Constructor of the LoggableException class. - * - * @param msg contains the error message. - * @param file provides the context the message refers to. May be empty. - * @param line is the line in the above file the message refers to. - * @param column is the column in the above file the message refers to. - */ - LoggableException(std::string msg, std::string file, int line = -1, - int column = -1) - : OusiaException(formatMessage(msg, file, line, column)), - msg(std::move(msg)), - file(std::move(file)), - line(line), - column(column) - { - } - - /** - * Constructor of the LoggableException class with empty file. - * - * @param msg contains the error message. - * @param line is the line in the above file the message refers to. - * @param column is the column in the above file the message refers to. - */ - LoggableException(std::string msg, int line = -1, int column = -1) - : OusiaException(formatMessage(msg, "", line, column)), - msg(std::move(msg)), - line(line), - column(column) - { - } - - /** - * Constructor of the LoggableException class with empty file and an - * position object. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - LoggableException(std::string msg, const PosType &pos) - : OusiaException( - formatMessage(msg, "", pos.getLine(), pos.getColumn())), - msg(std::move(msg)), - line(pos.getLine()), - column(pos.getColumn()) - { - } -}; -} - -#endif /* _OUSIA_EXCEPTIONS_HPP_ */ - diff --git a/src/core/Logger.cpp b/src/core/Logger.cpp deleted file mode 100644 index 17f55a6..0000000 --- a/src/core/Logger.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include - -#include "Logger.hpp" - -namespace ousia { - -/* Class Logger */ - -void Logger::log(Severity severity, const std::string &msg, - const std::string &file, int line, int column) -{ - // Copy the current severity level - if (static_cast(severity) > static_cast(maxEncounteredSeverity)) { - maxEncounteredSeverity = severity; - } - - // Call the actual log message function if the severity is larger or equal - // to the minimum severity - if (static_cast(severity) >= static_cast(minSeverity)) { - process(Message{severity, msg, file, line, column}); - } -} - -unsigned int Logger::pushFilename(const std::string &name) -{ - filenameStack.push(name); - return filenameStack.size(); -} - -unsigned int Logger::popFilename() -{ - filenameStack.pop(); - return filenameStack.size(); -} - -void Logger::unwindFilenameStack(unsigned int pos) -{ - while (filenameStack.size() > pos && !filenameStack.empty()) { - filenameStack.pop(); - } -} - -/* Class TerminalLogger */ - -/** - * Small class used internally for formated terminal output using ANSI/VT100 - * escape codes on supported terminals. - * - * TODO: Deactivate if using windows or use the corresponding API function. - */ -class Terminal { -private: - /** - * If set to false, no control codes are generated. - */ - bool active; - -public: - static const int BLACK = 30; - static const int RED = 31; - static const int GREEN = 32; - static const int YELLOW = 33; - static const int BLUE = 34; - static const int MAGENTA = 35; - static const int CYAN = 36; - static const int WHITE = 37; - - Terminal(bool active) : active(active) {} - - std::string color(int color, bool bright = true) const - { - if (!active) { - return std::string{}; - } - std::stringstream ss; - ss << "\x1b["; - if (bright) { - ss << "1;"; - } - ss << color << "m"; - return ss.str(); - } - - std::string reset() const - { - if (!active) { - return std::string{}; - } - return "\x1b[0m"; - } -}; - -void TerminalLogger::process(const Message &msg) -{ - Terminal t(useColor); - - // Print the file name - if (msg.hasFile()) { - os << t.color(Terminal::WHITE, true) << msg.file << t.reset(); - } - - // Print line and column number - if (msg.hasLine()) { - if (msg.hasFile()) { - os << ':'; - } - os << t.color(Terminal::WHITE, true) << msg.line - << t.reset(); - if (msg.hasColumn()) { - os << ':' << msg.column; - } - } - - // Print the optional seperator - if (msg.hasFile() || msg.hasLine()) { - os << ": "; - } - - // Print the severity - switch (msg.severity) { - case Severity::DEBUG: - break; - case Severity::NOTE: - os << t.color(Terminal::CYAN, true) << "note: "; - break; - case Severity::WARNING: - os << t.color(Terminal::MAGENTA, true) << "warning: "; - break; - case Severity::ERROR: - os << t.color(Terminal::RED, true) << "error: "; - break; - case Severity::FATAL_ERROR: - os << t.color(Terminal::RED, true) << "fatal: "; - break; - } - os << t.reset(); - - // Print the actual message - os << msg.msg << std::endl; -} -} - diff --git a/src/core/Logger.hpp b/src/core/Logger.hpp deleted file mode 100644 index e6b97f4..0000000 --- a/src/core/Logger.hpp +++ /dev/null @@ -1,609 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file Logger.hpp - * - * Contains classes for logging messages in Ousía. Provides a generic Logger - * class, and TerminalLogger, an extension of Logger which logs do an output - * stream. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_LOGGER_HPP_ -#define _OUSIA_LOGGER_HPP_ - -#include -#include -#include -#include - -#include "Exceptions.hpp" - -namespace ousia { - -/** - * Enum containing the severities used for logging errors and debug messages. - */ -enum class Severity : int { - /** - * Indicates that this message was only printed for debugging. Note that - * in release builds messages with this severity are discarded. - */ - DEBUG = 0, - - /** - * A message which might provide additional information to the user. - */ - NOTE = 1, - - /** - * A message which warns of possible mistakes by the user which might not be - * actual errors but may lead to unintended behaviour. - */ - WARNING = 2, - - /** - * An error occurred while processing, however program execution continues, - * trying to deal with the error situation (graceful degradation). However, - * messages with this severity may be followed up by fatal errors. - */ - ERROR = 3, - - /** - * A fatal error occurred. Program execution cannot continue. - */ - FATAL_ERROR = 4 -}; - -#ifdef NDEBUG -static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::NOTE; -#else -static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::DEBUG; -#endif - -/** - * The Logger class is the base class the individual logging systems should - * derive from. It provides a simple interface for logging errors, warnings and - * notes and filters these according to the set minimum severity. Additionally - * a stack of file names is maintained in order to allow simple descent into - * included files. Note however, that this base Logger class simply discards the - * incomming log messages. Use one of the derived classes to actually handle the - * log messages. - */ -class Logger { -public: - /** - * The message struct represents a single log message and all information - * attached to it. - */ - struct Message { - /** - * Severity of the log message. - */ - Severity severity; - - /** - * Actual log message. - */ - std::string msg; - - /** - * Refers to the file which provides the context for this error message. - * May be empty. - */ - std::string file; - - /** - * Line in the above file the error message refers to. Ignored if - * smaller than zero. - */ - int line; - - /** - * Column in the above file the error message refers to. Ignored if - * smaller than zero. - */ - int column; - - /** - * Constructor of the Message struct. - * - * @param severity describes the message severity. - * @param msg contains the actual message. - * @param file provides the context the message refers to. May be empty. - * @param line is the line in the above file the message refers to. - * @param column is the column in the above file the message refers to. - */ - Message(Severity severity, std::string msg, std::string file, int line, - int column) - : severity(severity), - msg(std::move(msg)), - file(std::move(file)), - line(line), - column(column){}; - - /** - * Returns true if the file string is set. - * - * @return true if the file string is set. - */ - bool hasFile() const { return !file.empty(); } - - /** - * Returns true if the line is set. - * - * @return true if the line number is a non-negative integer. - */ - bool hasLine() const { return line >= 0; } - - /** - * Returns true if column and line are set (since a column has no - * significance without a line number). - * - * @return true if line number and column number are non-negative - * integers. - */ - bool hasColumn() const { return hasLine() && column >= 0; } - }; - -private: - /** - * Minimum severity a log message should have before it is discarded. - */ - Severity minSeverity; - - /** - * Maximum encountered log message severity. - */ - Severity maxEncounteredSeverity; - - /** - * Stack containing the current file names that have been processed. - */ - std::stack filenameStack; - -protected: - /** - * Function to be overriden by child classes to actually display or store - * the messages. The default implementation just discards all incomming - * messages. - * - * @param msg is an instance of the Message struct containing the data that - * should be logged. - */ - virtual void process(const Message &msg){}; - -public: - /** - * Constructor of the Logger class. - * - * @param minSeverity is the minimum severity a log message should have. - * Messages below this severity are discarded. - */ - Logger(Severity minSeverity = DEFAULT_MIN_SEVERITY) - : minSeverity(minSeverity), maxEncounteredSeverity(Severity::DEBUG) - { - } - - Logger(const Logger &) = delete; - - /** - * Virtual destructor. - */ - virtual ~Logger(){}; - - /** - * Logs the given message. Most generic log function. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void log(Severity severity, const std::string &msg, const std::string &file, - int line = -1, int column = -1); - - /** - * Logs the given message. The file name is set to the topmost file name on - * the file name stack. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void log(Severity severity, const std::string &msg, int line = -1, - int column = -1) - { - log(severity, msg, currentFilename(), line, column); - } - - /** - * Logs the given message. The file name is set to the topmost file name on - * the file name stack. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - * @tparam PosType is the actual type of pos and must implement a getLine - * and getColumn function. - */ - template - void logAt(Severity severity, const std::string &msg, const PosType &pos) - { - log(severity, msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs the given loggable exception. - * - * @param ex is the exception that should be logged. - */ - void log(const LoggableException &ex) - { - log(Severity::ERROR, ex.msg, - ex.file.empty() ? currentFilename() : ex.file, ex.line, ex.column); - } - - /** - * Logs a debug message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void debug(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::DEBUG, msg, file, line, column); - } - - /** - * Logs a debug message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void debug(const std::string &msg, int line = -1, int column = -1) - { - debug(msg, currentFilename(), line, column); - } - - /** - * Logs a debug message. The file name is set to the topmost file name on - * the file name stack. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - void debugAt(const std::string &msg, const PosType &pos) - { - debug(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a note. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void note(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::NOTE, msg, file, line, column); - } - - /** - * Logs a note. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void note(const std::string &msg, int line = -1, int column = -1) - { - note(msg, currentFilename(), line, column); - } - - /** - * Logs a note. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - void noteAt(const std::string &msg, const PosType &pos) - { - note(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a warning. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void warning(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::WARNING, msg, file, line, column); - } - - /** - * Logs a warning. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - void warningAt(const std::string &msg, const PosType &pos) - { - warning(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a warning. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void warning(const std::string &msg, int line = -1, int column = -1) - { - warning(msg, currentFilename(), line, column); - } - - /** - * Logs an error message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void error(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::ERROR, msg, file, line, column); - } - - /** - * Logs an error message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void error(const std::string &msg, int line = -1, int column = -1) - { - error(msg, currentFilename(), line, column); - } - - /** - * Logs an error message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - void errorAt(const std::string &msg, const PosType &pos) - { - error(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a fatal error. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void fatalError(const std::string &msg, const std::string &file, - int line = -1, int column = -1) - { - log(Severity::FATAL_ERROR, msg, file, line, column); - } - - /** - * Logs a fatal error. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void fatalError(const std::string &msg, int line = -1, int column = -1) - { - fatalError(msg, currentFilename(), line, column); - } - - /** - * Logs a fatal error. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template - void fatalErrorAt(const std::string &msg, const PosType &pos) - { - fatalError(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Pushes a new file name onto the internal filename stack. - * - * @param name is the name of the file that should be added to the filename - * stack. - * @return the size of the filename stack. This number can be passed to the - * "unwindFilenameStack" method in order to return the stack to state it was - * in after this function has been called. - */ - unsigned int pushFilename(const std::string &name); - - /** - * Pops the filename from the internal filename stack. - * - * @return the current size of the filename stack. - */ - unsigned int popFilename(); - - /** - * Pops elements from the filename stack while it has more elements than - * the given number and the stack is non-empty. - * - * @param pos is the position the filename stack should be unwound to. Use - * a number returned by pushFilename. - */ - void unwindFilenameStack(unsigned int pos); - - /** - * Returns the topmost filename from the internal filename stack. - * - * @return the topmost filename from the filename stack or an empty string - * if the filename stack is empty. - */ - std::string currentFilename() - { - return filenameStack.empty() ? std::string{} : filenameStack.top(); - } - - /** - * Returns the maximum severity that was encountered by the Logger but at - * least Severity::DEBUG. - * - * @return the severity of the most severe log message but at least - * Severity::DEBUG. - */ - Severity getMaxEncounteredSeverity() { return maxEncounteredSeverity; } - - /** - * Returns the minimum severity. Messages with a smaller severity are - * discarded. - * - * @return the minimum severity. - */ - Severity getMinSeverity() { return minSeverity; } - - /** - * Sets the minimum severity. Messages with a smaller severity will be - * discarded. Only new messages will be filtered according to the new value. - * - * @param severity is the minimum severity for new log messages. - */ - void setMinSeverity(Severity severity) { minSeverity = severity; } -}; - -/** - * Class extending the Logger class and printing the log messages to the given - * stream. - */ -class TerminalLogger : public Logger { -private: - /** - * Reference to the target output stream. - */ - std::ostream &os; - - /** - * If true, the TerminalLogger will use colors to make the log messages - * prettier. - */ - bool useColor; - -protected: - /** - * Implements the process function and logs the messages to the output. - */ - void process(const Message &msg) override; - -public: - /** - * Constructor of the TerminalLogger class. - * - * @param os is the output stream the log messages should be logged to. - * Should be set to std::cerr in most cases. - * @param useColor if true, the TerminalLogger class will do its best to - * use ANSI/VT100 control sequences for colored log messages. - * @param minSeverity is the minimum severity below which log messages are - * discarded. - */ - TerminalLogger(std::ostream &os, bool useColor = false, - Severity minSeverity = DEFAULT_MIN_SEVERITY) - : Logger(minSeverity), os(os), useColor(useColor) - { - } -}; -} - -#endif /* _OUSIA_LOGGER_HPP_ */ - diff --git a/src/core/Registry.cpp b/src/core/Registry.cpp index 6ff9594..74d1cf8 100644 --- a/src/core/Registry.cpp +++ b/src/core/Registry.cpp @@ -16,8 +16,7 @@ along with this program. If not, see . */ -#include - +#include #include namespace ousia { diff --git a/src/core/Tokenizer.cpp b/src/core/Tokenizer.cpp index b99d1ed..0af5f5a 100644 --- a/src/core/Tokenizer.cpp +++ b/src/core/Tokenizer.cpp @@ -72,7 +72,7 @@ TokenTreeNode::TokenTreeNode(const std::map &inputs) { } -Tokenizer::Tokenizer(BufferedCharReader &input, const TokenTreeNode &root) +Tokenizer::Tokenizer(CharReader &input, const TokenTreeNode &root) : input(input), root(root) { } @@ -81,10 +81,10 @@ bool Tokenizer::prepare() { std::stringstream buffer; char c; - int startColumn = input.getColumn(); - int startLine = input.getLine(); + uint32_t startColumn = input.getColumn(); + uint32_t startLine = input.getLine(); bool bufEmpty = true; - while (input.peek(&c)) { + while (input.peek(c)) { if (root.children.find(c) != root.children.end()) { // if there might be a special token, keep peeking forward // until we find the token (or we don't). @@ -107,7 +107,7 @@ bool Tokenizer::prepare() input.consumePeek(); } } - if (!input.peek(&c)) { + if (!input.peek(c)) { // if we are at the end we break off the search. break; } @@ -153,7 +153,7 @@ bool Tokenizer::prepare() } } else{ //if we found nothing, read at least one character. - input.peek(&c); + input.peek(c); } } buffer << c; diff --git a/src/core/Tokenizer.hpp b/src/core/Tokenizer.hpp index 8f80150..33327cc 100644 --- a/src/core/Tokenizer.hpp +++ b/src/core/Tokenizer.hpp @@ -19,11 +19,12 @@ #ifndef _OUSIA_TOKENIZER_HPP_ #define _OUSIA_TOKENIZER_HPP_ +#include +#include #include #include -#include -#include "BufferedCharReader.hpp" +#include namespace ousia { @@ -120,13 +121,13 @@ static const int TOKEN_TEXT = -2; struct Token { int tokenId; std::string content; - int startColumn; - int startLine; - int endColumn; - int endLine; + uint32_t startColumn; + uint32_t startLine; + uint32_t endColumn; + uint32_t endLine; - Token(int tokenId, std::string content, int startColumn, int startLine, - int endColumn, int endLine) + Token(int tokenId, std::string content, uint32_t startColumn, uint32_t startLine, + uint32_t endColumn, uint32_t endLine) : tokenId(tokenId), content(content), startColumn(startColumn), @@ -160,7 +161,7 @@ struct Token { */ class Tokenizer { private: - BufferedCharReader &input; + CharReader &input; const TokenTreeNode &root; std::deque peeked; unsigned int peekCursor = 0; @@ -185,14 +186,14 @@ protected: public: /** * @param input The input of a Tokenizer is given in the form of a - * BufferedCharReader. Please refer to the respective documentation. + * CharReader. Please refer to the respective documentation. * @param root This is meant to be the root of a TokenTree giving the * specification of user-defined tokens this Tokenizer should recognize. * The Tokenizer promises to not change the TokenTree such that you can * re-use the same specification for multiple inputs. * Please refer to the TokenTreeNode documentation for more information. */ - Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); + Tokenizer(CharReader &input, const TokenTreeNode &root); /** * The next method consumes one Token from the input stream and gives @@ -224,9 +225,9 @@ public: */ void consumePeek(); - const BufferedCharReader &getInput() const { return input; } + const CharReader &getInput() const { return input; } - BufferedCharReader &getInput() { return input; } + CharReader &getInput() { return input; } }; } diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp deleted file mode 100644 index c460ed4..0000000 --- a/src/core/Utils.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include - -#include "Utils.hpp" - -namespace ousia { - -std::string Utils::trim(const std::string &s) -{ - size_t firstNonWhitespace = std::numeric_limits::max(); - size_t lastNonWhitespace = 0; - for (size_t i = 0; i < s.size(); i++) { - if (!isWhitespace(s[i])) { - firstNonWhitespace = std::min(i, firstNonWhitespace); - lastNonWhitespace = std::max(i, lastNonWhitespace); - } - } - - if (firstNonWhitespace < lastNonWhitespace) { - return s.substr(firstNonWhitespace, - lastNonWhitespace - firstNonWhitespace + 1); - } - return std::string{}; -} - -bool Utils::isIdentifier(const std::string &name) -{ - bool first = true; - for (char c : name) { - if (first && !(isAlphabetic(c) || c == '_')) { - return false; - } - if (first && !(isAlphanumeric(c) || c == '_' || c == '-')) { - return false; - } - first = false; - } - return true; -} -} - diff --git a/src/core/Utils.hpp b/src/core/Utils.hpp deleted file mode 100644 index 5332b50..0000000 --- a/src/core/Utils.hpp +++ /dev/null @@ -1,110 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_UTILS_H_ -#define _OUSIA_UTILS_H_ - -#include -#include - -namespace ousia { - -class Utils { -public: - /** - * Returns true if the given character is in [A-Za-z] - */ - static bool isAlphabetic(const char c) - { - return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); - } - - /** - * Returns true if the given character is in [0-9] - */ - static bool isNumeric(const char c) { return (c >= '0') && (c <= '9'); } - - /** - * Returns true if the given character is in [0-9A-Fa-f] - */ - static bool isHexadecimal(const char c) - { - return ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'F')) || - ((c >= 'a') && (c <= 'f')); - } - - /** - * Returns true if the given character is in [A-Za-z0-9] - */ - static bool isAlphanumeric(const char c) - { - return isAlphabetic(c) || isNumeric(c); - } - - /** - * Returns true if the given character is in [A-Za-z_][A-Za-z0-9_-]* - */ - static bool isIdentifier(const std::string &name); - - /** - * Returns true if the given character is a whitespace character. - */ - static bool isWhitespace(const char c) - { - return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); - } - - /** - * Removes whitespace at the beginning and the end of the given string. - */ - static std::string trim(const std::string &s); - - /** - * Turns the elements of a collection into a string separated by the - * given delimiter. - * - * @param es is an iterable container of elements that can be appended to an - * output stream (the << operator must be implemented). - * @param delim is the delimiter that should be used to separate the items. - * @param start is a character sequence that should be prepended to the - * result. - * @param end is a character sequence that should be appended to the result. - */ - template - static std::string join(T es, const std::string &delim, - const std::string &start = "", - const std::string &end = "") - { - std::stringstream res; - bool first = true; - res << start; - for (const auto &e : es) { - if (!first) { - res << delim; - } - res << e; - first = false; - } - res << end; - return res.str(); - } -}; -} - -#endif /* _OUSIA_UTILS_H_ */ - diff --git a/src/core/common/CharReader.cpp b/src/core/common/CharReader.cpp new file mode 100644 index 0000000..373c0c1 --- /dev/null +++ b/src/core/common/CharReader.cpp @@ -0,0 +1,640 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include + +#include "CharReader.hpp" +#include "Utils.hpp" + +namespace ousia { + +/* Helper functions */ + +/** + * istreamReadCallback is used internally by the Buffer calss to stream data + * from an input stream. + * + * @param buf is points a the target memory region. + * @param size is the requested number of bytes. + * @param userData is a pointer at some user defined data. + * @return the actual number of bytes read. If the result is smaller than + * the requested size, this tells the Buffer that the end of the input + * stream is reached. + */ +static size_t istreamReadCallback(char *buf, size_t size, void *userData) +{ + return (static_cast(userData))->read(buf, size).gcount(); +} + +/* Class Buffer */ + +Buffer::Buffer(ReadCallback callback, void *userData) + : callback(callback), + userData(userData), + reachedEnd(false), + startBucket(buckets.end()), + endBucket(buckets.end()), + startOffset(0), + firstDead(0) +{ + // Load a first block of data from the stream + stream(); + startBucket = buckets.begin(); +} + +Buffer::Buffer(std::istream &istream) : Buffer(istreamReadCallback, &istream) {} + +Buffer::Buffer(const std::string &str) + : callback(nullptr), + userData(nullptr), + reachedEnd(true), + startBucket(buckets.end()), + endBucket(buckets.end()), + startOffset(0), + firstDead(0) +{ + // Copy the given string into a first buffer and set the start buffer + // correctly + Bucket &bucket = nextBucket(); + bucket.resize(str.size()); + std::copy(str.begin(), str.end(), bucket.begin()); + startBucket = buckets.begin(); +} + +#ifndef NDEBUG +Buffer::~Buffer() +{ + // Make sure all cursors have been deleted + for (bool cursor_alive: alive) { + assert(!cursor_alive); + } +} +#endif + +void Buffer::advance(BucketIterator &it) +{ + it++; + if (it == buckets.end()) { + it = buckets.begin(); + } +} + +void Buffer::advance(BucketList::const_iterator &it) const +{ + it++; + if (it == buckets.cend()) { + it = buckets.cbegin(); + } +} + +Buffer::Bucket &Buffer::nextBucket() +{ + constexpr size_t MAXVAL = std::numeric_limits::max(); + + // Fetch the minimum bucket index + size_t minBucketIdx = MAXVAL; + for (size_t i = 0; i < cursors.size(); i++) { + if (alive[i]) { + // Fetch references to the bucket and the cursor + const Cursor &cur = cursors[i]; + const Bucket &bucket = *(cur.bucket); + + // Increment the bucket index by one, if the cursor is at the end + // of the bucket (only valid if the LOOKBACK_SIZE is set to zero) + size_t bIdx = cur.bucketIdx; + if (LOOKBACK_SIZE == 0 && cur.bucketOffs == bucket.size()) { + bIdx++; + } + + // Decrement the bucket index by one, if the previous bucket still + // needs to be reached and cannot be overridden + if (bIdx > 0 && cur.bucketOffs < LOOKBACK_SIZE) { + bIdx--; + } + + // Set the bucket index to the minium + minBucketIdx = std::min(minBucketIdx, bIdx); + } + } + + // If there is space between the current start bucket and the read + // cursor, the start bucket can be safely overridden. + if (minBucketIdx > 0 && minBucketIdx != MAXVAL) { + // All cursor bucket indices will be decreased by one + for (size_t i = 0; i < cursors.size(); i++) { + cursors[i].bucketIdx--; + } + + // Increment the start offset + startOffset += startBucket->size(); + + // The old start bucket is the new end bucket + endBucket = startBucket; + + // Advance the start bucket, wrap around at the end of the list + advance(startBucket); + } else { + // No free bucket, insert a new one before the start bucket + endBucket = buckets.emplace(startBucket); + } + return *endBucket; +} + +Buffer::CursorId Buffer::nextCursor() +{ + bool hasCursor = false; + CursorId res = 0; + + // Search for the next free cursor starting with minNextCursorId + for (size_t i = firstDead; i < alive.size(); i++) { + if (!alive[i]) { + res = i; + hasCursor = true; + break; + } + } + + // Add a new cursor to the cursor list if no cursor is currently free + if (!hasCursor) { + res = cursors.size(); + cursors.resize(res + 1); + alive.resize(res + 1); + } + + // The next dead cursor is at least the next cursor + firstDead = res + 1; + + // Mark the new cursor as alive + alive[res] = true; + + return res; +} + +void Buffer::stream() +{ + // Fetch the bucket into which the data should be inserted, make sure it + // has the correct size + Bucket &tar = nextBucket(); + tar.resize(REQUEST_SIZE); + + // Read data from the stream into the target buffer + size_t size = callback(tar.data(), REQUEST_SIZE, userData); + + // If not enough bytes were returned, we're at the end of the stream + if (size < REQUEST_SIZE) { + tar.resize(size); + reachedEnd = true; + } +} + +Buffer::CursorId Buffer::createCursor() +{ + CursorId res = nextCursor(); + cursors[res].bucket = startBucket; + cursors[res].bucketIdx = 0; + cursors[res].bucketOffs = 0; + return res; +} + +Buffer::CursorId Buffer::createCursor(Buffer::CursorId ref) +{ + CursorId res = nextCursor(); + cursors[res] = cursors[ref]; + return res; +} + +void Buffer::copyCursor(Buffer::CursorId from, Buffer::CursorId to) +{ + cursors[to] = cursors[from]; +} + +void Buffer::deleteCursor(Buffer::CursorId cursor) +{ + alive[cursor] = false; + firstDead = std::min(firstDead, cursor); +} + +size_t Buffer::offset(Buffer::CursorId cursor) const +{ + const Cursor &cur = cursors[cursor]; + size_t offs = startOffset + cur.bucketOffs; + BucketList::const_iterator it = startBucket; + while (it != cur.bucket) { + offs += it->size(); + advance(it); + } + return offs; +} + +size_t Buffer::moveForward(CursorId cursor, size_t relativeOffs) +{ + size_t offs = relativeOffs; + Cursor &cur = cursors[cursor]; + while (offs > 0) { + // Fetch the current bucket of the cursor + Bucket &bucket = *(cur.bucket); + + // If there is enough space in the bucket, simply increment the bucket + // offset by the given relative offset + const size_t space = bucket.size() - cur.bucketOffs; + if (space >= offs) { + cur.bucketOffs += offs; + break; + } else { + // Go to the end of the current bucket otherwise + offs -= space; + cur.bucketOffs = bucket.size(); + + // Go to the next bucket + if (cur.bucket != endBucket) { + // Go to the next bucket + advance(cur.bucket); + cur.bucketIdx++; + cur.bucketOffs = 0; + } else { + // Abort, if there is no more data to stream, otherwise just + // load new data + if (reachedEnd) { + return relativeOffs - offs; + } + stream(); + } + } + } + return relativeOffs; +} + +size_t Buffer::moveBackward(CursorId cursor, size_t relativeOffs) +{ + size_t offs = relativeOffs; + Cursor &cur = cursors[cursor]; + while (offs > 0) { + // If there is enough space in the bucket, simply decrement the bucket + // offset by the given relative offset + if (cur.bucketOffs >= offs) { + cur.bucketOffs -= offs; + break; + } else { + // Go to the beginning of the current bucket otherwise + offs -= cur.bucketOffs; + cur.bucketOffs = 0; + + // Abort if there is no more bucket to got back to + if (cur.bucketIdx == 0) { + return relativeOffs - offs; + } + + // Go to the previous bucket (wrap around at the beginning of the + // list) + if (cur.bucket == buckets.begin()) { + cur.bucket = buckets.end(); + } + cur.bucket--; + + // Decrement the bucket index, and set the current offset to the + // end of the new bucket + cur.bucketIdx--; + cur.bucketOffs = cur.bucket->size(); + } + } + return relativeOffs; +} + +ssize_t Buffer::moveCursor(CursorId cursor, ssize_t relativeOffs) +{ + if (relativeOffs > 0) { + return moveForward(cursor, relativeOffs); + } else if (relativeOffs < 0) { + return -moveBackward(cursor, -relativeOffs); + } else { + return 0; + } +} + +bool Buffer::atEnd(Buffer::CursorId cursor) const +{ + const Cursor &c = cursors[cursor]; + return reachedEnd && + (c.bucket == endBucket && c.bucketOffs == endBucket->size()); +} + +bool Buffer::fetchCharacter(CursorId cursor, char &c, bool incr) +{ + Cursor &cur = cursors[cursor]; + while (true) { + // Reference at the current bucket + Bucket &bucket = *(cur.bucket); + + // If there is still data in the current bucket, return this data + if (cur.bucketOffs < bucket.size()) { + c = bucket[cur.bucketOffs]; + if (incr) { + cur.bucketOffs++; + } + return true; + } else if (cur.bucket == endBucket) { + // Return false if the end of the stream has been reached, otherwise + // load new data + if (reachedEnd) { + return false; + } + stream(); + } + + // Go to the next bucket + cur.bucketIdx++; + cur.bucketOffs = 0; + advance(cur.bucket); + } +} + +bool Buffer::read(Buffer::CursorId cursor, char &c) +{ + return fetchCharacter(cursor, c, true); +} + +bool Buffer::fetch(CursorId cursor, char &c) +{ + return fetchCharacter(cursor, c, false); +} + +/* CharReader::Cursor class */ + +void CharReader::Cursor::assign(std::shared_ptr buffer, + CharReader::Cursor &cursor) +{ + // Copy the cursor position + buffer->copyCursor(cursor.cursor, this->cursor); + + // Copy the state + line = cursor.line; + column = cursor.column; +} + +/* CharReader class */ + +CharReader::CharReader(std::shared_ptr buffer, size_t line, + size_t column) + : buffer(buffer), + readCursor(buffer->createCursor(), line, column), + peekCursor(buffer->createCursor(), line, column), + coherent(true) +{ +} + +CharReader::CharReader(const std::string &str, size_t line, size_t column) + : CharReader(std::shared_ptr{new Buffer{str}}, line, column) +{ +} + +CharReader::CharReader(std::istream &istream, size_t line, size_t column) + : CharReader(std::shared_ptr{new Buffer{istream}}, line, column) +{ +} + +CharReader::~CharReader() +{ + buffer->deleteCursor(readCursor.cursor); + buffer->deleteCursor(peekCursor.cursor); +} + +bool CharReader::readAtCursor(Cursor &cursor, char &c) +{ + // Return false if we're at the end of the stream + if (!buffer->read(cursor.cursor, c)) { + return false; + } + + // Substitute linebreak sequences with a single '\n' + if (c == '\n' || c == '\r') { + // Output a single \n + c = '\n'; + + // Check whether the next character is a continuation of the + // current character + char c2; + if (buffer->read(cursor.cursor, c2)) { + if ((c2 != '\n' && c2 != '\r') || c2 == c) { + buffer->moveCursor(cursor.cursor, -1); + } + } + } + + // Count lines and columns + if (c == '\n') { + // A linebreak was reached, go to the next line + cursor.line++; + cursor.column = 1; + } else { + // Ignore UTF-8 continuation bytes + if (!((c & 0x80) && !(c & 0x40))) { + cursor.column++; + } + } + return true; +} + +bool CharReader::peek(char &c) +{ + // If the reader was coherent, update the peek cursor state + if (coherent) { + peekCursor.assign(buffer, readCursor); + coherent = false; + } + + // Read a character from the peek cursor + return readAtCursor(peekCursor, c); +} + +bool CharReader::read(char &c) +{ + // Read a character from the buffer at the current read cursor + bool res = readAtCursor(readCursor, c); + + // Set the peek position to the current read position, if reading was not + // coherent + if (!coherent) { + peekCursor.assign(buffer, readCursor); + coherent = true; + } else { + buffer->copyCursor(readCursor.cursor, peekCursor.cursor); + } + + // Return the result of the read function + return res; +} + +void CharReader::resetPeek() +{ + if (!coherent) { + peekCursor.assign(buffer, readCursor); + coherent = true; + } +} + +void CharReader::consumePeek() +{ + if (!coherent) { + readCursor.assign(buffer, peekCursor); + coherent = true; + } +} + +bool CharReader::consumeWhitespace() +{ + char c; + while (peek(c)) { + if (!Utils::isWhitespace(c)) { + resetPeek(); + return true; + } + consumePeek(); + } + return false; +} + +CharReaderFork CharReader::fork() +{ + return CharReaderFork(buffer, readCursor, peekCursor, coherent); +} + +CharReader::Context CharReader::getContext(ssize_t maxSize) +{ + // Clone the current read cursor + Buffer::CursorId cur = buffer->createCursor(readCursor.cursor); + + // Fetch the start position of the search + ssize_t offs = buffer->offset(cur); + ssize_t start = offs; + ssize_t end = offs; + char c; + + // Search the beginning of the line with the last non-whitespace character + bool hadNonWhitespace = false; + bool foundBegin = false; + for (ssize_t i = 0; i < maxSize; i++) { + // Fetch the character at the current position + if (buffer->fetch(cur, c)) { + // Abort, at linebreaks if we found a non-linebreak character + hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c); + if (hadNonWhitespace && (c == '\n' || c == '\r')) { + buffer->moveCursor(cur, 1); + start++; + foundBegin = true; + break; + } + } + if (buffer->moveCursor(cur, -1) == 0) { + foundBegin = true; + break; + } else { + // Update the start position and the hadNonWhitespace flag + start--; + } + } + + // Search the end of the line + buffer->moveCursor(cur, offs - start); + bool foundEnd = false; + for (ssize_t i = 0; i < maxSize; i++) { + // Increment the end counter if a character was read, abort if the end + // of the stream has been reached + if (buffer->read(cur, c)) { + end++; + } else { + foundEnd = true; + break; + } + + // Abort on linebreak characters + if (c == '\n' || c == '\r') { + foundEnd = true; + break; + } + } + + // Calculate the truncated start and end position and limit the number of + // characters to the maximum number of characters + ssize_t tStart = start; + ssize_t tEnd = end; + if (tEnd - tStart > maxSize) { + tStart = std::max(offs - maxSize / 2, tStart); + tEnd = tStart + maxSize; + } + + // Try to go to the calculated start position and fetch the actual start + // position + ssize_t aStart = end + buffer->moveCursor(cur, tStart - end); + if (aStart > tStart) { + tEnd = tEnd + (aStart - tStart); + tStart = aStart; + } + + // Read one line + std::stringstream ss; + size_t relPos = 0; + for (ssize_t i = tStart; i < tEnd; i++) { + if (buffer->read(cur, c)) { + // Break once a linebreak is reached + if (c == '\n' || c == '\r') { + break; + } + + // Add the current character to the output + ss << c; + + // Increment the string-relative offset as long as the original + // offset is not reached in the for loop + if (i < offs) { + relPos++; + } + } + } + + // Delete the newly created cursor + buffer->deleteCursor(cur); + + return CharReader::Context{ss.str(), relPos, !foundBegin || tStart != start, + !foundEnd || tEnd != end}; +} + +/* Class CharReaderFork */ + +CharReaderFork::CharReaderFork(std::shared_ptr buffer, + CharReader::Cursor &parentReadCursor, + CharReader::Cursor &parentPeekCursor, + bool coherent) + : CharReader(buffer, 1, 1), + parentReadCursor(parentReadCursor), + parentPeekCursor(parentPeekCursor) +{ + readCursor.assign(buffer, parentReadCursor); + peekCursor.assign(buffer, parentPeekCursor); + this->coherent = coherent; +} + +void CharReaderFork::commit() +{ + parentReadCursor.assign(buffer, readCursor); + parentPeekCursor.assign(buffer, peekCursor); +} +} + diff --git a/src/core/common/CharReader.hpp b/src/core/common/CharReader.hpp new file mode 100644 index 0000000..3cbe4b4 --- /dev/null +++ b/src/core/common/CharReader.hpp @@ -0,0 +1,665 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file CharReader.hpp + * + * Used within all parsers to read single characters from an underlying stream. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_CHAR_READER_HPP_ +#define _OUSIA_CHAR_READER_HPP_ + +#include +#include +#include +#include + +namespace ousia { + +/** + * A chunked ring buffer used in CharReader to provide access to an input stream + * with multiple read cursors. The Buffer automatically expands to the + * size of the spanned by the read cursors while reusing already allocated + * memory. + */ +class Buffer { +public: + /** + * Callback function which is called whenever new data is requested from the + * input stream. + * + * @param buf is points a the target memory region. + * @param size is the requested number of bytes. + * @param userData is a pointer at some user defined data given in the + * constructor. + * @return the actual number of bytes read. If the result is smaller than + * the requested size, this tells the Buffer that the end of the input + * stream is reached. + */ + using ReadCallback = size_t (*)(char *buf, size_t size, void *userData); + + /** + * Handle used to identify a cursor. + */ + using CursorId = size_t; + +private: + /** + * Number of bytes to request from the input stream. Set to 64 KiB because + * this seems to be a nice value for I/O operations according to multiple + * sources. + */ + static constexpr size_t REQUEST_SIZE = 64 * 1024; + + /** + * Number of bytes the buffer guarantees to be capable of looking back + * for extracting the current context. + */ + static constexpr size_t LOOKBACK_SIZE = 128; + + /** + * Type used internally to represent one chunk of memory. + */ + using Bucket = std::vector; + + /** + * Type used internally to represent a bucket container. + */ + using BucketList = std::list; + + /** + * Type used internally for representing iterators in the bucket list. + */ + using BucketIterator = BucketList::iterator; + + /** + * Type used internally to represent a read cursor. + */ + struct Cursor { + /** + * Iterator pointing at the current bucket. + */ + BucketIterator bucket; + + /** + * Index of the bucket relative to the start bucket. + */ + size_t bucketIdx; + + /** + * Current offset within that bucket. + */ + size_t bucketOffs; + }; + + /** + * List of buckets containing the buffered memory. + */ + BucketList buckets; + + /** + * List of cursors used to access the memory. Note that cursors can be + * marked as inactive and reused lateron (to avoid having to resize the + * vector). + */ + std::vector cursors; + + /** + * Bitfield specifying which of the cursors is actually valid. + */ + std::vector alive; + + /** + * Function to be called whenever new data is needed. Set to nullptr if the + * Buffer is not backed by an input stream. + */ + const ReadCallback callback; + + /** + * User data given in the constructor. + */ + void *userData; + + /** + * Set to true if the input stream is at its end. + */ + bool reachedEnd; + + /** + * Iterator pointing at the current start bucket. + */ + BucketIterator startBucket; + + /** + * Iterator pointing at the last bucket. + */ + BucketIterator endBucket; + + /** + * Byte offset of the start bucket relative to the beginning of the stream. + */ + size_t startOffset; + + /** + * Points at the smallest possible available cursor index, yet does not + * guarantee that this cursor index actuall is free. + */ + CursorId firstDead; + + /** + * Advances the bucket iterator, cares about wrapping around in the ring. + */ + void advance(BucketIterator &it); + + /** + * Advances the bucket iterator, cares about wrapping around in the ring. + */ + void advance(BucketList::const_iterator &it) const; + + /** + * Internally used to find the next free cursor in the cursors vector. The + * cursor is marked as active. + * + * @return the next free cursor index. + */ + CursorId nextCursor(); + + /** + * Returns a reference at the next bucket into which data should be + * inserted. + * + * @return a bucket into which the data can be inserted. + */ + Bucket &nextBucket(); + + /** + * Reads data from the input stream and places it in the next free buffer. + */ + void stream(); + + /** + * Moves the given cursor forward. + */ + size_t moveForward(CursorId cursor, size_t relativeOffs); + + /** + * Moves the given cursor backward. + */ + size_t moveBackward(CursorId cursor, size_t relativeOffs); + + /** + * Reads a character from the current cursor position and optionally + * advances. + */ + bool fetchCharacter(CursorId cursor, char &c, bool incr); + +public: + /** + * Intializes the Buffer with a reference to a ReadCallback that is used + * to fetch data from an underlying input stream. + * + * @param callback is the function that will be called whenever data is read + * from the ring buffer and the buffer does not hold enough data to fulfill + * this read request. + * @param userData is a pointer to user defined data which will be passed to + * the callback function. + */ + Buffer(ReadCallback callback, void *userData); + + /** + * Initializes the Buffer with a reference to an std::istream from which + * data will be read. + * + * @param istream is the input stream from which the data should be read. + */ + Buffer(std::istream &istream); + + /** + * Initializes the Buffer with the contents of the given string, after + * this operation the Buffer has a fixed size. + * + * @param str is the string containing the data that should be copied into + * the ring buffer. + */ + Buffer(const std::string &str); + +#ifndef NDEBUG + /** + * Destructor of the Buffer class. Makes sure that all cursors have been + * freed. + */ + ~Buffer(); +#endif + + // No copy + Buffer(const Buffer &) = delete; + + // No assign + Buffer &operator=(const Buffer &) = delete; + + /** + * Creates a new read cursor positioned at the smallest possible position + * in the ring buffer. + */ + CursorId createCursor(); + + /** + * Creates a new read cursor positioned at the same position as the given + * read cursor. + * + * @param ref is the read cursor that should be used as reference for the + * new read cursor. + */ + CursorId createCursor(CursorId ref); + + /** + * Copies the position of one cursor to another cursor. + * + * @param from is the cursor id of which the position should be copied. + * @param to is the cursor id to which the position should be copied. + */ + void copyCursor(CursorId from, CursorId to); + + /** + * Deletes the cursor with the given id. The cursor may no longer be used + * after this function has been called. + * + * @param cursor is the id of the cursor that should be freed. + */ + void deleteCursor(CursorId cursor); + + /** + * Moves a cursor by offs bytes. Note that moving backwards is theoretically + * limited by the LOOKBACK_SIZE of the Buffer, practically it will most + * likely be limited by the REQUEST_SIZE, so you can got at most 64 KiB + * backwards. + * + * @param cursor is the cursor that should be moved. + * @param relativeOffs is a positive or negative integer number specifying + * the number of bytes the cursor should be moved forward (positive numbers) + * or backwards (negative numbers). + * @return the actual number of bytes the cursor was moved. This number is + * smaller than the relativeOffs given in the constructor if the + */ + ssize_t moveCursor(CursorId cursor, ssize_t relativeOffs); + + /** + * Returns the current byte offset of the given cursor relative to the + * beginning of the stream. + * + * @param cursor is the cursor for which the byte offset relative to the + * beginning of the stream should be returned. + * @return the number of bytes since the beginning of the stream for the + * given cursor. + */ + size_t offset(CursorId cursor) const; + + /** + * Returns true if the given cursor currently is at the end of the stream. + * + * @param cursor is the cursor for which the atEnd flag should be returned. + * @return true if the there are no more bytes for this cursor. If false + * is returned, this means that there may be more bytes in the stream, + * nevertheless the end of the stream may be hit once the next read function + * is called. + */ + bool atEnd(CursorId cursor) const; + + /** + * Reads a single character from the ring buffer from the given cursor and + * moves to the next character. + * + * @param cursor specifies the cursor from which the data should be read. + * The cursor will be advanced by one byte. + * @param c is the character into which the data needs to be read. + * @return true if a character was read, false if the end of the stream has + * been reached. + */ + bool read(CursorId cursor, char &c); + + /** + * Returns a single character from the ring buffer from the current cursor + * position and stays at that position. + * + * @param cursor specifies the cursor from which the data should be read. + * The cursor will be advanced by one byte. + * @param c is the character into which the data needs to be read. + * @return true if a character could be fetched, false if the end of the + * stream has been reached. + */ + bool fetch(CursorId cursor, char &c); +}; + +// Forward declaration +class CharReaderFork; + +/** + * Used within parsers for convenient access to single characters in an input + * stream or buffer. It allows reading and peeking single characters from a + * buffer. Additionally it counts the current column/row (with correct handling + * for UTF-8) and contains an internal state machine that handles the detection + * of linebreaks and converts these to a single '\n'. + */ +class CharReader { +public: + /** + * The context struct is used to represent the current context the char + * reader is in. This context can for example be used when building error + * messages. + */ + struct Context { + /** + * Set to the content of the current line. + */ + std::string line; + + /** + * Relative position (in characters) within that line. + */ + size_t relPos; + + /** + * Set to true if the beginning of the line has been truncated (because + * the reader position is too far away from the actual position of the + * line). + */ + bool truncatedStart; + + /** + * Set to true if the end of the line has been truncated (because the + * reader position is too far away from the actual end position of the + * line. + */ + bool truncatedEnd; + + Context() + : line(), relPos(0), truncatedStart(false), truncatedEnd(false) + { + } + + Context(std::string line, size_t relPos, bool truncatedStart, + bool truncatedEnd) + : line(std::move(line)), + relPos(relPos), + truncatedStart(truncatedStart), + truncatedEnd(truncatedEnd) + { + } + }; + +protected: + /** + * Internally used cursor structure for managing the read and the peek + * cursor. + */ + struct Cursor { + /** + * Corresponding cursor in the underlying buffer instance. + */ + const Buffer::CursorId cursor; + + /** + * Current line the cursor is in. + */ + uint32_t line; + + /** + * Current column the cursor is in. + */ + uint32_t column; + + /** + * Constructor of the Cursor class. + * + * @param cursor is the underlying cursor in the Buffer instance. + */ + Cursor(Buffer::CursorId cursor, size_t line, size_t column) + : cursor(cursor), line(line), column(column) + { + } + + /** + * Assigns one cursor to another. + * + * @param buffer is the underlying buffer instance the internal cursor + * belongs to. + * @param cursor is the cursor from which the state should be copied. + */ + void assign(std::shared_ptr buffer, Cursor &cursor); + }; + +private: + /** + * Substitutes "\r", "\n\r", "\r\n" with a single "\n". + * + * @param cursor is the cursor from which the character should be read. + * @param c a reference to the character that should be written. + * @return true if another character needs to be read. + */ + bool substituteLinebreaks(Cursor &cursor, char &c); + + /** + * Reads a single character from the given cursor. + * + * @param cursor is the cursor from which the character should be read. + * @param c a reference to the character that should be written. + * @return true if a character was read, false if the end of the stream has + * been reached. + */ + bool readAtCursor(Cursor &cursor, char &c); + +protected: + /** + * Reference pointing at the underlying buffer. + */ + std::shared_ptr buffer; + + /** + * Cursor used for reading. + */ + Cursor readCursor; + + /** + * Cursor used for peeking. + */ + Cursor peekCursor; + + /** + * Set to true as long the underlying Buffer cursor is at the same position + * for the read and the peek cursor. This is only used for optimization + * purposes and makes consecutive reads a bit faster. + */ + bool coherent; + + /** + * Protected constructor of the CharReader base class. Creates new read + * and peek cursors for the given buffer. + * + * @param buffer is a reference to the underlying Buffer class responsible + * for allowing to read from a single input stream from multiple locations. + */ + CharReader(std::shared_ptr buffer, size_t line, size_t column); + +public: + /** + * Creates a new CharReader instance from a string. + * + * @param str is a string containing the input data. + * @param line is the start line. + * @param column is the start column. + */ + CharReader(const std::string &str, size_t line = 1, size_t column = 1); + + /** + * Creates a new CharReader instance for an input stream. + * + * @param istream is the input stream from which incomming data should be + * read. + * @param line is the start line. + * @param column is the start column. + */ + CharReader(std::istream &istream, size_t line = 1, size_t column = 1); + + /** + * Deletes the used cursors from the underlying buffer instance. + */ + ~CharReader(); + + // No copy + CharReader(const Buffer &) = delete; + + // No assign + CharReader &operator=(const Buffer &) = delete; + + /** + * Peeks a single character. If called multiple times, returns the + * character after the previously peeked character. + * + * @param c is a reference to the character to which the result should be + * written. + * @return true if the character was successfully read, false if there are + * no more characters to be read in the buffer. + */ + bool peek(char &c); + + /** + * Reads a character from the input data. If "peek" was called + * beforehand resets the peek pointer. + * + * @param c is a reference to the character to which the result should be + * written. + * @return true if the character was successfully read, false if there are + * no more characters to be read in the buffer. + */ + bool read(char &c); + + /** + * Resets the peek pointer to the "read" pointer. + */ + void resetPeek(); + + /** + * Advances the read pointer to the peek pointer -- so if the "peek" + * function was called, "read" will now return the character after + * the last peeked character. + */ + void consumePeek(); + + /** + * Moves the read cursor to the next non-whitespace character. Returns + * false, if the end of the stream was reached. + * + * @return false if the end of the stream was reached, false othrwise. + */ + bool consumeWhitespace(); + + /** + * Creates a new CharReader located at the same position as this CharReader + * instance, yet the new CharReader can be used independently of this + * CharReader. Use the "commit" function of the returned CharReader to + * copy the state of the forked CharReaderFork to this CharReader. + * + * @return a CharReaderFork instance positioned at the same location as this + * CharReader instance. + */ + CharReaderFork fork(); + + /** + * Returns true if there are no more characters as the stream was + * closed. + * + * @return true if there is no more data. + */ + bool atEnd() const { return buffer->atEnd(readCursor.cursor); } + + /** + * Returns the current line (starting with one). + * + * @return the current line number. + */ + uint32_t getLine() const { return readCursor.line; } + + /** + * Returns the current column (starting with one). + * + * @return the current column number. + */ + uint32_t getColumn() const { return readCursor.column; } + + /** + * Returns the current byte offset of the read cursor. + * + * @return the byte position within the stream. + */ + size_t getOffset() const { return buffer->offset(readCursor.cursor); }; + + /** + * Returns the line the read cursor currently is in, but at most the + * given number of characters in the form of a Context structure. + */ + Context getContext(ssize_t maxSize); +}; + +/** + * A CharReaderFork is returned whenever the "fork" function of the CharReader + * class is used. Its "commit" function can be used to move the underlying + * CharReader instance to the location of the CharReaderFork instance. Otherwise + * the read location of the underlying CharReader is left unchanged. + */ +class CharReaderFork : public CharReader { +private: + friend CharReader; + + /** + * The reader cursor of the underlying CharReader instance. + */ + CharReader::Cursor &parentReadCursor; + + /** + * The peek cursor of the underlying CharReader instance. + */ + CharReader::Cursor &parentPeekCursor; + + /** + * Constructor of the CharReaderFork class. + * + * @param buffer is a reference at the parent Buffer instance. + * @param parentPeekCursor is a reference at the parent read cursor. + * @param parentPeekCursor is a reference at the parent peek cursor. + * @param coherent specifies whether the char reader cursors are initialized + * coherently. + */ + CharReaderFork(std::shared_ptr buffer, + CharReader::Cursor &parentReadCursor, + CharReader::Cursor &parentPeekCursor, bool coherent); + +public: + /** + * Moves the read and peek cursor of the parent CharReader to the location + * of the read and peek cursor in the fork. + */ + void commit(); +}; + +} + +#endif /* _OUSIA_CHAR_READER_HPP_ */ + diff --git a/src/core/common/Exceptions.cpp b/src/core/common/Exceptions.cpp new file mode 100644 index 0000000..d064f35 --- /dev/null +++ b/src/core/common/Exceptions.cpp @@ -0,0 +1,46 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "Exceptions.hpp" + +namespace ousia { + +/* Class LoggableException */ + +std::string LoggableException::formatMessage(const std::string &msg, + const std::string &file, + int line, int column) +{ + std::stringstream ss; + ss << "error "; + if (!file.empty()) { + ss << "while processing \"" << file << "\" "; + } + if (line >= 0) { + ss << "at line " << line << ", "; + if (column >= 0) { + ss << "column " << column << " "; + } + } + ss << "with message: " << msg; + return ss.str(); +} +} + diff --git a/src/core/common/Exceptions.hpp b/src/core/common/Exceptions.hpp new file mode 100644 index 0000000..00d6106 --- /dev/null +++ b/src/core/common/Exceptions.hpp @@ -0,0 +1,162 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Exceptions.hpp + * + * Describes basic exception classes which are used throughout Ousía. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_EXCEPTIONS_HPP_ +#define _OUSIA_EXCEPTIONS_HPP_ + +namespace ousia { + +/** + * Base exception class all other Ousía exceptions should derive from. + */ +class OusiaException : public std::exception { +private: + /** + * Error message which will be printed by the runtime environment if the + * exception is not caught and handled in the code. + */ + const std::string formatedMessage; + +public: + /** + * Constructor of the OusiaException class. + * + * @param formatedMessage is a formated message that should be printed by + * the runtime environment if the exception is not caught. + */ + OusiaException(std::string formatedMessage) + : formatedMessage(std::move(formatedMessage)) + { + } + + /** + * Virtual destructor. + */ + virtual ~OusiaException() {} + + /** + * Implementation of the std::exception what function and used to retrieve + * the error message that should be printed by the runtime environment. + * + * @return a reference to the formated message string given in the + * constructor. + */ + const char *what() const noexcept override + { + return formatedMessage.c_str(); + } +}; + +/** + * Exception class which can be directly passed to a Logger instance and thus + * makes it simple to handle non-recoverable errors in the code. + */ +class LoggableException : public OusiaException { +private: + /** + * Function used internally to build the formated message that should be + * reported to the runtime environment. + */ + static std::string formatMessage(const std::string &msg, + const std::string &file, int line, + int column); + +public: + /** + * Message describing the error that occured. + */ + const std::string msg; + + /** + * Name of the file in which the error occured. May be empty. + */ + const std::string file; + + /** + * Line at which the exception occured. Negative values are ignored. + */ + const int line; + + /** + * Column at which the exception occured. Negative values are ignored. + */ + const int column; + + /** + * Constructor of the LoggableException class. + * + * @param msg contains the error message. + * @param file provides the context the message refers to. May be empty. + * @param line is the line in the above file the message refers to. + * @param column is the column in the above file the message refers to. + */ + LoggableException(std::string msg, std::string file, int line = -1, + int column = -1) + : OusiaException(formatMessage(msg, file, line, column)), + msg(std::move(msg)), + file(std::move(file)), + line(line), + column(column) + { + } + + /** + * Constructor of the LoggableException class with empty file. + * + * @param msg contains the error message. + * @param line is the line in the above file the message refers to. + * @param column is the column in the above file the message refers to. + */ + LoggableException(std::string msg, int line = -1, int column = -1) + : OusiaException(formatMessage(msg, "", line, column)), + msg(std::move(msg)), + line(line), + column(column) + { + } + + /** + * Constructor of the LoggableException class with empty file and an + * position object. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + LoggableException(std::string msg, const PosType &pos) + : OusiaException( + formatMessage(msg, "", pos.getLine(), pos.getColumn())), + msg(std::move(msg)), + line(pos.getLine()), + column(pos.getColumn()) + { + } +}; +} + +#endif /* _OUSIA_EXCEPTIONS_HPP_ */ + diff --git a/src/core/common/Logger.cpp b/src/core/common/Logger.cpp new file mode 100644 index 0000000..17f55a6 --- /dev/null +++ b/src/core/common/Logger.cpp @@ -0,0 +1,161 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include + +#include "Logger.hpp" + +namespace ousia { + +/* Class Logger */ + +void Logger::log(Severity severity, const std::string &msg, + const std::string &file, int line, int column) +{ + // Copy the current severity level + if (static_cast(severity) > static_cast(maxEncounteredSeverity)) { + maxEncounteredSeverity = severity; + } + + // Call the actual log message function if the severity is larger or equal + // to the minimum severity + if (static_cast(severity) >= static_cast(minSeverity)) { + process(Message{severity, msg, file, line, column}); + } +} + +unsigned int Logger::pushFilename(const std::string &name) +{ + filenameStack.push(name); + return filenameStack.size(); +} + +unsigned int Logger::popFilename() +{ + filenameStack.pop(); + return filenameStack.size(); +} + +void Logger::unwindFilenameStack(unsigned int pos) +{ + while (filenameStack.size() > pos && !filenameStack.empty()) { + filenameStack.pop(); + } +} + +/* Class TerminalLogger */ + +/** + * Small class used internally for formated terminal output using ANSI/VT100 + * escape codes on supported terminals. + * + * TODO: Deactivate if using windows or use the corresponding API function. + */ +class Terminal { +private: + /** + * If set to false, no control codes are generated. + */ + bool active; + +public: + static const int BLACK = 30; + static const int RED = 31; + static const int GREEN = 32; + static const int YELLOW = 33; + static const int BLUE = 34; + static const int MAGENTA = 35; + static const int CYAN = 36; + static const int WHITE = 37; + + Terminal(bool active) : active(active) {} + + std::string color(int color, bool bright = true) const + { + if (!active) { + return std::string{}; + } + std::stringstream ss; + ss << "\x1b["; + if (bright) { + ss << "1;"; + } + ss << color << "m"; + return ss.str(); + } + + std::string reset() const + { + if (!active) { + return std::string{}; + } + return "\x1b[0m"; + } +}; + +void TerminalLogger::process(const Message &msg) +{ + Terminal t(useColor); + + // Print the file name + if (msg.hasFile()) { + os << t.color(Terminal::WHITE, true) << msg.file << t.reset(); + } + + // Print line and column number + if (msg.hasLine()) { + if (msg.hasFile()) { + os << ':'; + } + os << t.color(Terminal::WHITE, true) << msg.line + << t.reset(); + if (msg.hasColumn()) { + os << ':' << msg.column; + } + } + + // Print the optional seperator + if (msg.hasFile() || msg.hasLine()) { + os << ": "; + } + + // Print the severity + switch (msg.severity) { + case Severity::DEBUG: + break; + case Severity::NOTE: + os << t.color(Terminal::CYAN, true) << "note: "; + break; + case Severity::WARNING: + os << t.color(Terminal::MAGENTA, true) << "warning: "; + break; + case Severity::ERROR: + os << t.color(Terminal::RED, true) << "error: "; + break; + case Severity::FATAL_ERROR: + os << t.color(Terminal::RED, true) << "fatal: "; + break; + } + os << t.reset(); + + // Print the actual message + os << msg.msg << std::endl; +} +} + diff --git a/src/core/common/Logger.hpp b/src/core/common/Logger.hpp new file mode 100644 index 0000000..e6b97f4 --- /dev/null +++ b/src/core/common/Logger.hpp @@ -0,0 +1,609 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Logger.hpp + * + * Contains classes for logging messages in Ousía. Provides a generic Logger + * class, and TerminalLogger, an extension of Logger which logs do an output + * stream. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_LOGGER_HPP_ +#define _OUSIA_LOGGER_HPP_ + +#include +#include +#include +#include + +#include "Exceptions.hpp" + +namespace ousia { + +/** + * Enum containing the severities used for logging errors and debug messages. + */ +enum class Severity : int { + /** + * Indicates that this message was only printed for debugging. Note that + * in release builds messages with this severity are discarded. + */ + DEBUG = 0, + + /** + * A message which might provide additional information to the user. + */ + NOTE = 1, + + /** + * A message which warns of possible mistakes by the user which might not be + * actual errors but may lead to unintended behaviour. + */ + WARNING = 2, + + /** + * An error occurred while processing, however program execution continues, + * trying to deal with the error situation (graceful degradation). However, + * messages with this severity may be followed up by fatal errors. + */ + ERROR = 3, + + /** + * A fatal error occurred. Program execution cannot continue. + */ + FATAL_ERROR = 4 +}; + +#ifdef NDEBUG +static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::NOTE; +#else +static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::DEBUG; +#endif + +/** + * The Logger class is the base class the individual logging systems should + * derive from. It provides a simple interface for logging errors, warnings and + * notes and filters these according to the set minimum severity. Additionally + * a stack of file names is maintained in order to allow simple descent into + * included files. Note however, that this base Logger class simply discards the + * incomming log messages. Use one of the derived classes to actually handle the + * log messages. + */ +class Logger { +public: + /** + * The message struct represents a single log message and all information + * attached to it. + */ + struct Message { + /** + * Severity of the log message. + */ + Severity severity; + + /** + * Actual log message. + */ + std::string msg; + + /** + * Refers to the file which provides the context for this error message. + * May be empty. + */ + std::string file; + + /** + * Line in the above file the error message refers to. Ignored if + * smaller than zero. + */ + int line; + + /** + * Column in the above file the error message refers to. Ignored if + * smaller than zero. + */ + int column; + + /** + * Constructor of the Message struct. + * + * @param severity describes the message severity. + * @param msg contains the actual message. + * @param file provides the context the message refers to. May be empty. + * @param line is the line in the above file the message refers to. + * @param column is the column in the above file the message refers to. + */ + Message(Severity severity, std::string msg, std::string file, int line, + int column) + : severity(severity), + msg(std::move(msg)), + file(std::move(file)), + line(line), + column(column){}; + + /** + * Returns true if the file string is set. + * + * @return true if the file string is set. + */ + bool hasFile() const { return !file.empty(); } + + /** + * Returns true if the line is set. + * + * @return true if the line number is a non-negative integer. + */ + bool hasLine() const { return line >= 0; } + + /** + * Returns true if column and line are set (since a column has no + * significance without a line number). + * + * @return true if line number and column number are non-negative + * integers. + */ + bool hasColumn() const { return hasLine() && column >= 0; } + }; + +private: + /** + * Minimum severity a log message should have before it is discarded. + */ + Severity minSeverity; + + /** + * Maximum encountered log message severity. + */ + Severity maxEncounteredSeverity; + + /** + * Stack containing the current file names that have been processed. + */ + std::stack filenameStack; + +protected: + /** + * Function to be overriden by child classes to actually display or store + * the messages. The default implementation just discards all incomming + * messages. + * + * @param msg is an instance of the Message struct containing the data that + * should be logged. + */ + virtual void process(const Message &msg){}; + +public: + /** + * Constructor of the Logger class. + * + * @param minSeverity is the minimum severity a log message should have. + * Messages below this severity are discarded. + */ + Logger(Severity minSeverity = DEFAULT_MIN_SEVERITY) + : minSeverity(minSeverity), maxEncounteredSeverity(Severity::DEBUG) + { + } + + Logger(const Logger &) = delete; + + /** + * Virtual destructor. + */ + virtual ~Logger(){}; + + /** + * Logs the given message. Most generic log function. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void log(Severity severity, const std::string &msg, const std::string &file, + int line = -1, int column = -1); + + /** + * Logs the given message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void log(Severity severity, const std::string &msg, int line = -1, + int column = -1) + { + log(severity, msg, currentFilename(), line, column); + } + + /** + * Logs the given message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + * @tparam PosType is the actual type of pos and must implement a getLine + * and getColumn function. + */ + template + void logAt(Severity severity, const std::string &msg, const PosType &pos) + { + log(severity, msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs the given loggable exception. + * + * @param ex is the exception that should be logged. + */ + void log(const LoggableException &ex) + { + log(Severity::ERROR, ex.msg, + ex.file.empty() ? currentFilename() : ex.file, ex.line, ex.column); + } + + /** + * Logs a debug message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void debug(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::DEBUG, msg, file, line, column); + } + + /** + * Logs a debug message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void debug(const std::string &msg, int line = -1, int column = -1) + { + debug(msg, currentFilename(), line, column); + } + + /** + * Logs a debug message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + void debugAt(const std::string &msg, const PosType &pos) + { + debug(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a note. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void note(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::NOTE, msg, file, line, column); + } + + /** + * Logs a note. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void note(const std::string &msg, int line = -1, int column = -1) + { + note(msg, currentFilename(), line, column); + } + + /** + * Logs a note. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + void noteAt(const std::string &msg, const PosType &pos) + { + note(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a warning. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void warning(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::WARNING, msg, file, line, column); + } + + /** + * Logs a warning. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + void warningAt(const std::string &msg, const PosType &pos) + { + warning(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a warning. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void warning(const std::string &msg, int line = -1, int column = -1) + { + warning(msg, currentFilename(), line, column); + } + + /** + * Logs an error message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void error(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::ERROR, msg, file, line, column); + } + + /** + * Logs an error message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void error(const std::string &msg, int line = -1, int column = -1) + { + error(msg, currentFilename(), line, column); + } + + /** + * Logs an error message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + void errorAt(const std::string &msg, const PosType &pos) + { + error(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a fatal error. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void fatalError(const std::string &msg, const std::string &file, + int line = -1, int column = -1) + { + log(Severity::FATAL_ERROR, msg, file, line, column); + } + + /** + * Logs a fatal error. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void fatalError(const std::string &msg, int line = -1, int column = -1) + { + fatalError(msg, currentFilename(), line, column); + } + + /** + * Logs a fatal error. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template + void fatalErrorAt(const std::string &msg, const PosType &pos) + { + fatalError(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Pushes a new file name onto the internal filename stack. + * + * @param name is the name of the file that should be added to the filename + * stack. + * @return the size of the filename stack. This number can be passed to the + * "unwindFilenameStack" method in order to return the stack to state it was + * in after this function has been called. + */ + unsigned int pushFilename(const std::string &name); + + /** + * Pops the filename from the internal filename stack. + * + * @return the current size of the filename stack. + */ + unsigned int popFilename(); + + /** + * Pops elements from the filename stack while it has more elements than + * the given number and the stack is non-empty. + * + * @param pos is the position the filename stack should be unwound to. Use + * a number returned by pushFilename. + */ + void unwindFilenameStack(unsigned int pos); + + /** + * Returns the topmost filename from the internal filename stack. + * + * @return the topmost filename from the filename stack or an empty string + * if the filename stack is empty. + */ + std::string currentFilename() + { + return filenameStack.empty() ? std::string{} : filenameStack.top(); + } + + /** + * Returns the maximum severity that was encountered by the Logger but at + * least Severity::DEBUG. + * + * @return the severity of the most severe log message but at least + * Severity::DEBUG. + */ + Severity getMaxEncounteredSeverity() { return maxEncounteredSeverity; } + + /** + * Returns the minimum severity. Messages with a smaller severity are + * discarded. + * + * @return the minimum severity. + */ + Severity getMinSeverity() { return minSeverity; } + + /** + * Sets the minimum severity. Messages with a smaller severity will be + * discarded. Only new messages will be filtered according to the new value. + * + * @param severity is the minimum severity for new log messages. + */ + void setMinSeverity(Severity severity) { minSeverity = severity; } +}; + +/** + * Class extending the Logger class and printing the log messages to the given + * stream. + */ +class TerminalLogger : public Logger { +private: + /** + * Reference to the target output stream. + */ + std::ostream &os; + + /** + * If true, the TerminalLogger will use colors to make the log messages + * prettier. + */ + bool useColor; + +protected: + /** + * Implements the process function and logs the messages to the output. + */ + void process(const Message &msg) override; + +public: + /** + * Constructor of the TerminalLogger class. + * + * @param os is the output stream the log messages should be logged to. + * Should be set to std::cerr in most cases. + * @param useColor if true, the TerminalLogger class will do its best to + * use ANSI/VT100 control sequences for colored log messages. + * @param minSeverity is the minimum severity below which log messages are + * discarded. + */ + TerminalLogger(std::ostream &os, bool useColor = false, + Severity minSeverity = DEFAULT_MIN_SEVERITY) + : Logger(minSeverity), os(os), useColor(useColor) + { + } +}; +} + +#endif /* _OUSIA_LOGGER_HPP_ */ + diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp new file mode 100644 index 0000000..c460ed4 --- /dev/null +++ b/src/core/common/Utils.cpp @@ -0,0 +1,59 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include + +#include "Utils.hpp" + +namespace ousia { + +std::string Utils::trim(const std::string &s) +{ + size_t firstNonWhitespace = std::numeric_limits::max(); + size_t lastNonWhitespace = 0; + for (size_t i = 0; i < s.size(); i++) { + if (!isWhitespace(s[i])) { + firstNonWhitespace = std::min(i, firstNonWhitespace); + lastNonWhitespace = std::max(i, lastNonWhitespace); + } + } + + if (firstNonWhitespace < lastNonWhitespace) { + return s.substr(firstNonWhitespace, + lastNonWhitespace - firstNonWhitespace + 1); + } + return std::string{}; +} + +bool Utils::isIdentifier(const std::string &name) +{ + bool first = true; + for (char c : name) { + if (first && !(isAlphabetic(c) || c == '_')) { + return false; + } + if (first && !(isAlphanumeric(c) || c == '_' || c == '-')) { + return false; + } + first = false; + } + return true; +} +} + diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp new file mode 100644 index 0000000..5332b50 --- /dev/null +++ b/src/core/common/Utils.hpp @@ -0,0 +1,110 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef _OUSIA_UTILS_H_ +#define _OUSIA_UTILS_H_ + +#include +#include + +namespace ousia { + +class Utils { +public: + /** + * Returns true if the given character is in [A-Za-z] + */ + static bool isAlphabetic(const char c) + { + return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); + } + + /** + * Returns true if the given character is in [0-9] + */ + static bool isNumeric(const char c) { return (c >= '0') && (c <= '9'); } + + /** + * Returns true if the given character is in [0-9A-Fa-f] + */ + static bool isHexadecimal(const char c) + { + return ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'F')) || + ((c >= 'a') && (c <= 'f')); + } + + /** + * Returns true if the given character is in [A-Za-z0-9] + */ + static bool isAlphanumeric(const char c) + { + return isAlphabetic(c) || isNumeric(c); + } + + /** + * Returns true if the given character is in [A-Za-z_][A-Za-z0-9_-]* + */ + static bool isIdentifier(const std::string &name); + + /** + * Returns true if the given character is a whitespace character. + */ + static bool isWhitespace(const char c) + { + return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); + } + + /** + * Removes whitespace at the beginning and the end of the given string. + */ + static std::string trim(const std::string &s); + + /** + * Turns the elements of a collection into a string separated by the + * given delimiter. + * + * @param es is an iterable container of elements that can be appended to an + * output stream (the << operator must be implemented). + * @param delim is the delimiter that should be used to separate the items. + * @param start is a character sequence that should be prepended to the + * result. + * @param end is a character sequence that should be appended to the result. + */ + template + static std::string join(T es, const std::string &delim, + const std::string &start = "", + const std::string &end = "") + { + std::stringstream res; + bool first = true; + res << start; + for (const auto &e : es) { + if (!first) { + res << delim; + } + res << e; + first = false; + } + res << end; + return res.str(); + } +}; +} + +#endif /* _OUSIA_UTILS_H_ */ + diff --git a/src/core/common/Variant.cpp b/src/core/common/Variant.cpp new file mode 100644 index 0000000..27fc6e7 --- /dev/null +++ b/src/core/common/Variant.cpp @@ -0,0 +1,154 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "Utils.hpp" +#include "Variant.hpp" + +namespace ousia { + +/* Class Variant::TypeException */ + +Variant::TypeException::TypeException(Type actualType, Type requestedType) + : OusiaException(std::string("Variant: Requested \"") + + Variant::getTypeName(requestedType) + + std::string("\" but is \"") + + Variant::getTypeName(actualType) + std::string("\"")), + actualType(actualType), + requestedType(requestedType) +{ +} + +/* Class Variant */ + +const char *Variant::getTypeName(Type type) +{ + switch (type) { + case Type::NULLPTR: + return "null"; + case Type::BOOL: + return "boolean"; + case Type::INT: + return "integer"; + case Type::DOUBLE: + return "double"; + case Type::STRING: + return "string"; + case Type::ARRAY: + return "array"; + case Type::MAP: + return "map"; + } + return "unknown"; +} + +Variant::boolType Variant::toBool() const +{ + switch (getType()) { + case Type::NULLPTR: + return false; + case Type::BOOL: + return asBool(); + case Type::INT: + return asInt() != 0; + case Type::DOUBLE: + return asDouble() != 0.0; + case Type::STRING: + return true; + case Type::ARRAY: + return true; + case Type::MAP: + return true; + } + return false; +} + +Variant::intType Variant::toInt() const +{ + switch (getType()) { + case Type::NULLPTR: + return 0; + case Type::BOOL: + return asBool() ? 1 : 0; + case Type::INT: + return asInt(); + case Type::DOUBLE: + return asDouble(); + case Type::STRING: + return 0; // TODO: Parse string as int + case Type::ARRAY: { + const arrayType &a = asArray(); + return (a.size() == 1) ? a[0].toInt() : 0; + } + case Type::MAP: + return 0; + } + return false; +} + +Variant::doubleType Variant::toDouble() const +{ + switch (getType()) { + case Type::NULLPTR: + return 0.0; + case Type::BOOL: + return asBool() ? 1.0 : 0.0; + case Type::INT: + return asInt(); + case Type::DOUBLE: + return asDouble(); + case Type::STRING: + return 0.0; // TODO: Parse string as double + case Type::ARRAY: { + const arrayType &a = asArray(); + return (a.size() == 1) ? a[0].toDouble() : 0; + } + case Type::MAP: + return 0; + } + return false; +} + +Variant::stringType Variant::toString(bool escape) const +{ + switch (getType()) { + case Type::NULLPTR: + return "null"; + case Type::BOOL: + return asBool() ? "true" : "false"; + case Type::INT: + return std::to_string(asInt()); + case Type::DOUBLE: + return std::to_string(asDouble()); + case Type::STRING: { + // TODO: Use proper serialization function + std::stringstream ss; + ss << "\"" << asString() << "\""; + return ss.str(); + } + case Type::ARRAY: + return Utils::join(asArray(), ", ", "[", "]"); + case Type::MAP: + return Utils::join(asMap(), ", ", "{", "}"); + } + return ""; +} + +} + diff --git a/src/core/common/Variant.hpp b/src/core/common/Variant.hpp new file mode 100644 index 0000000..d411fd3 --- /dev/null +++ b/src/core/common/Variant.hpp @@ -0,0 +1,761 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Variant.hpp + * + * The Variant class is used to efficiently represent a variables of varying + * type. Variant instances are used to represent data given by the end user and + * to exchange information between the host application and the script clients. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_VARIANT_HPP_ +#define _OUSIA_VARIANT_HPP_ + +#include +#include +#include +#include +#include + +// TODO: Use +// http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html +// later (will allow to use 8 bytes for a variant) + +#include "Exceptions.hpp" + +namespace ousia { + +/** + * Instances of the Variant class represent any kind of data that is exchanged + * between the host application and the script engine. Variants are immutable. + */ +class Variant { +public: + /** + * Enum containing the possible types a variant may have. + */ + enum class Type : int16_t { + NULLPTR, + BOOL, + INT, + DOUBLE, + STRING, + ARRAY, + MAP + }; + + /** + * Exception thrown whenever a variant is accessed via a getter function + * that is not supported for the current variant type. + */ + class TypeException : public OusiaException { + private: + /** + * Internally used string holding the exception message. + */ + const std::string msg; + + public: + /** + * Contains the actual type of the variant. + */ + const Type actualType; + + /** + * Contains the requested type of the variant. + */ + const Type requestedType; + + /** + * Constructor of the TypeException. + * + * @param actualType describes the actual type of the variant. + * @param requestedType describes the type in which the variant was + * requested. + */ + TypeException(Type actualType, Type requestedType); + }; + + using boolType = bool; + using intType = int32_t; + using doubleType = double; + using stringType = std::string; + using arrayType = std::vector; + using mapType = std::map; + +private: + /** + * Used to store the actual type of the variant. + */ + Type type = Type::NULLPTR; + + /** + * Anonymous union containing the possible value of the variant. + */ + union { + /** + * The boolean value. Only valid if type is Type::BOOL. + */ + boolType boolVal; + /** + * The integer value. Only valid if type is Type::INT. + */ + intType intVal; + /** + * The number value. Only valid if type is Type::DOUBLE. + */ + doubleType doubleVal; + /** + * Pointer to the more complex data structures on the free store. Only + * valid if type is one of Type::STRING, Type::ARRAY, + * Type::MAP. + */ + void *ptrVal; + }; + + /** + * Internally used to convert the current pointer value to a reference of + * the specified type. + */ + template + T &asObj(Type requestedType) const + { + const Type actualType = getType(); + if (actualType == requestedType) { + return *(static_cast(ptrVal)); + } + throw TypeException{actualType, requestedType}; + } + + /** + * Used internally to assign the value of another Variant instance to this + * instance. + * + * @param v is the Variant instance that should be copied to this instance. + */ + void copy(const Variant &v) + { + destroy(); + type = v.type; + switch (type) { + case Type::NULLPTR: + break; + case Type::BOOL: + boolVal = v.boolVal; + break; + case Type::INT: + intVal = v.intVal; + break; + case Type::DOUBLE: + doubleVal = v.doubleVal; + break; + case Type::STRING: + ptrVal = new stringType(v.asString()); + break; + case Type::ARRAY: + ptrVal = new arrayType(v.asArray()); + break; + case Type::MAP: + ptrVal = new mapType(v.asMap()); + break; + } + } + + /** + * Used internally to move the value of another Variant instance to this + * instance. + * + * @param v is the Variant instance that should be copied to this instance. + */ + void move(Variant &&v) + { + destroy(); + type = v.type; + switch (type) { + case Type::NULLPTR: + break; + case Type::BOOL: + boolVal = v.boolVal; + break; + case Type::INT: + intVal = v.intVal; + break; + case Type::DOUBLE: + doubleVal = v.doubleVal; + break; + case Type::STRING: + case Type::ARRAY: + case Type::MAP: + ptrVal = v.ptrVal; + v.ptrVal = nullptr; + break; + } + v.type = Type::NULLPTR; + } + + /** + * Used internally to destroy any value that was allocated on the heap. + */ + void destroy() + { + if (ptrVal) { + switch (type) { + case Type::STRING: + delete static_cast(ptrVal); + break; + case Type::ARRAY: + delete static_cast(ptrVal); + break; + case Type::MAP: + delete static_cast(ptrVal); + break; + default: + break; + } + } + } + +public: + /** + * Copy constructor of the Variant class. + * + * @param v is the Variant instance that should be cloned. + */ + Variant(const Variant &v) : ptrVal(nullptr) { copy(v); } + + /** + * Move constructor of the Variant class. + * + * @param v is the reference to the Variant instance that should be moved, + * this instance is invalidated afterwards. + */ + Variant(Variant &&v) : ptrVal(nullptr) { move(std::move(v)); } + + /** + * Default constructor. Type is set to Type:null. + */ + Variant() : ptrVal(nullptr) { setNull(); } + + /** + * Default destructor, frees any memory that was allocated on the heap. + */ + ~Variant() { destroy(); } + + /** + * Constructor for null values. Initializes the variant as null value. + */ + Variant(std::nullptr_t) : ptrVal(nullptr) { setNull(); } + + /** + * Constructor for boolean values. + * + * @param b boolean value. + */ + Variant(boolType b) : ptrVal(nullptr) { setBool(b); } + + /** + * Constructor for integer values. + * + * @param i integer value. + */ + Variant(intType i) : ptrVal(nullptr) { setInt(i); } + + /** + * Constructor for double values. + * + * @param d double value. + */ + Variant(doubleType d) : ptrVal(nullptr) { setDouble(d); } + + /** + * Constructor for string values. The given string is copied and managed by + * the new Variant instance. + * + * @param s is a reference to a C-Style string used as string value. + */ + Variant(const char *s) : ptrVal(nullptr) { setString(s); } + + /** + * Constructor for array values. The given array is copied and managed by + * the new Variant instance. + * + * @param a is a reference to the array + */ + Variant(arrayType a) : ptrVal(nullptr) { setArray(std::move(a)); } + + /** + * Constructor for map values. The given map is copied and managed by the + * new Variant instance. + * + * @param m is a reference to the map. + */ + Variant(mapType m) : ptrVal(nullptr) { setMap(std::move(m)); } + + /** + * Copy assignment operator. + */ + Variant &operator=(const Variant &v) + { + copy(v); + return *this; + } + + /** + * Move assignment operator. + */ + Variant &operator=(Variant &&v) + { + move(std::move(v)); + return *this; + } + + /** + * Assign nullptr_t operator (allows to write Variant v = nullptr). + * + * @param p is an instance of std::nullptr_t. + */ + Variant &operator=(std::nullptr_t) + { + setNull(); + return *this; + } + + /** + * Assign a boolean value. + * + * @param b is the boolean value to which the variant should be set. + */ + Variant &operator=(boolType b) + { + setBool(b); + return *this; + } + + /** + * Assign an integer value. + * + * @param i is the integer value to which the variant should be set. + */ + Variant &operator=(intType i) + { + setInt(i); + return *this; + } + + /** + * Assign a double value. + * + * @param d is the double value to which the variant should be set. + */ + Variant &operator=(doubleType d) + { + setDouble(d); + return *this; + } + + /** + * Assign a zero terminated const char array. + * + * @param s is the zero terminated const char array to which the variant + * should be set. + */ + Variant &operator=(const char *s) + { + setString(s); + return *this; + } + + /** + * Checks whether this Variant instance represents the nullptr. + * + * @return true if the Variant instance represents the nullptr, false + * otherwise. + */ + bool isNull() const { return type == Type::NULLPTR; } + + /** + * Checks whether this Variant instance is a boolean. + * + * @return true if the Variant instance is a boolean, false otherwise. + */ + bool isBool() const { return type == Type::BOOL; } + + /** + * Checks whether this Variant instance is an integer. + * + * @return true if the Variant instance is an integer, false otherwise. + */ + bool isInt() const { return type == Type::INT; } + + /** + * Checks whether this Variant instance is a double. + * + * @return true if the Variant instance is a double, false otherwise. + */ + bool isDouble() const { return type == Type::DOUBLE; } + + /** + * Checks whether this Variant instance is a string. + * + * @return true if the Variant instance is a string, false otherwise. + */ + bool isString() const { return type == Type::STRING; } + + /** + * Checks whether this Variant instance is an array. + * + * @return true if the Variant instance is an array, false otherwise. + */ + bool isArray() const { return type == Type::ARRAY; } + + /** + * Checks whether this Variant instance is a map. + * + * @return true if the Variant instance is a map, false otherwise. + */ + bool isMap() const { return type == Type::MAP; } + + /** + * Returns the Variant boolean value. Performs no type conversion. Throws an + * exception if the underlying type is not a boolean. + * + * @return the boolean value. + */ + boolType asBool() const + { + if (isBool()) { + return boolVal; + } + throw TypeException{getType(), Type::BOOL}; + } + + /** + * Returns the Variant integer value. Performs no type conversion. Throws an + * exception if the underlying type is not an integer. + * + * @return the integer value. + */ + intType asInt() const + { + if (isInt()) { + return intVal; + } + throw TypeException{getType(), Type::INT}; + } + + /** + * Returns the Variant double value. Performs no type conversion. Throws an + * exception if the underlying type is not a double. + * + * @return the double value. + */ + doubleType asDouble() const + { + if (isDouble()) { + return doubleVal; + } + throw TypeException{getType(), Type::DOUBLE}; + } + + /** + * Returns a const reference to the string value. Performs no type + * conversion. Throws an exception if the underlying type is not a string. + * + * @return the string value as const reference. + */ + const stringType &asString() const + { + return asObj(Type::STRING); + } + + /** + * Returns a const reference to the string value. Performs no type + * conversion. Throws an exception if the underlying type is not a string. + * + * @return the string value as reference. + */ + stringType &asString() { return asObj(Type::STRING); } + + /** + * Returns a const reference to the array value. Performs no type + * conversion. Throws an exception if the underlying type is not an array. + * + * @return the array value as const reference. + */ + const arrayType &asArray() const { return asObj(Type::ARRAY); } + + /** + * Returns a const reference to the array value. Performs no type + * conversion. Throws an exception if the underlying type is not an array. + * + * @return the array value as reference. + */ + arrayType &asArray() { return asObj(Type::ARRAY); } + + /** + * Returns a const reference to the map value. Performs no type + * conversion. Throws an exception if the underlying type is not a map. + * + * @return the map value as const reference. + */ + const mapType &asMap() const { return asObj(Type::MAP); } + + /** + * Returns a reference to the map value. Performs no type conversion. + * Throws an exception if the underlying type is not a map. + * + * @return the map value as reference. + */ + mapType &asMap() { return asObj(Type::MAP); } + + /** + * Returns the value of the Variant as boolean, performs type conversion. + * + * @return the Variant value converted to a boolean value. + */ + boolType toBool() const; + + /** + * Returns the value of the Variant as integer, performs type conversion. + * + * @return the Variant value converted to an integer value. + */ + intType toInt() const; + + /** + * Returns the value of the Variant as double, performs type conversion. + * + * @return the Variant value converted to a double value. + */ + doubleType toDouble() const; + + /** + * Returns the value of the Variant as string, performs type conversion. + * + * @return the value of the variant as string. + * @param escape if set to true, adds double quotes to strings and escapes + * them properly (resulting in a more or less JSONesque output). + */ + stringType toString(bool escape = false) const; + + /** + * Sets the variant to null. + */ + void setNull() + { + destroy(); + type = Type::NULLPTR; + ptrVal = nullptr; + } + + /** + * Sets the variant to the given boolean value. + * + * @param b is the new boolean value. + */ + void setBool(boolType b) + { + destroy(); + type = Type::BOOL; + boolVal = b; + } + + /** + * Sets the variant to the given integer value. + * + * @param i is the new integer value. + */ + void setInt(intType i) + { + destroy(); + type = Type::INT; + intVal = i; + } + + /** + * Sets the variant to the given double value. + * + * @param d is the new double value. + */ + void setDouble(doubleType d) + { + destroy(); + type = Type::DOUBLE; + doubleVal = d; + } + + /** + * Sets the variant to the given string value. + * + * @param d is the new string value. + */ + void setString(const char *s) + { + if (isString()) { + asString().assign(s); + } else { + destroy(); + type = Type::STRING; + ptrVal = new stringType(s); + } + } + + /** + * Sets the variant to the given array value. + * + * @param a is the new array value. + */ + void setArray(arrayType a) + { + if (isArray()) { + asArray().swap(a); + } else { + destroy(); + type = Type::ARRAY; + ptrVal = new arrayType(std::move(a)); + } + } + + /** + * Sets the variant to the given map value. + * + * @param a is the new map value. + */ + void setMap(mapType m) + { + if (isMap()) { + asMap().swap(m); + } else { + destroy(); + type = Type::MAP; + ptrVal = new mapType(std::move(m)); + } + } + + /** + * Returns the current type of the Variant. + * + * @return the current type of the Variant. + */ + Type getType() const { return type; } + + /** + * Returns the name of the given variant type as C-style string. + */ + static const char *getTypeName(Type type); + + /** + * Returns the name of the type of this variant instance. + */ + const char *getTypeName() { return Variant::getTypeName(getType()); } + + /** + * Prints the Variant to the output stream. + */ + friend std::ostream &operator<<(std::ostream &os, const Variant &v) + { + return os << v.toString(true); + } + + /** + * Prints a key value pair to the output stream. + */ + friend std::ostream &operator<<(std::ostream &os, + const mapType::value_type &v) + { + // TODO: Use proper serialization function + return os << "\"" << v.first << "\": " << v.second.toString(true); + } + + /* + * Comprison operators. + */ + + friend bool operator<(const Variant &lhs, const Variant &rhs) + { + // If the types do not match, we can not do a meaningful comparison. + if (lhs.getType() != rhs.getType()) { + throw TypeException(lhs.getType(), rhs.getType()); + } + switch (lhs.getType()) { + case Type::NULLPTR: + return false; + case Type::BOOL: + return lhs.boolVal < rhs.boolVal; + case Type::INT: + return lhs.intVal < rhs.intVal; + case Type::DOUBLE: + return lhs.doubleVal < rhs.doubleVal; + case Type::STRING: + return lhs.asString() < rhs.asString(); + case Type::ARRAY: + return lhs.asArray() < rhs.asArray(); + case Type::MAP: + return lhs.asMap() < rhs.asMap(); + } + throw OusiaException("Internal Error! Unknown type!"); + } + friend bool operator>(const Variant &lhs, const Variant &rhs) + { + return rhs < lhs; + } + friend bool operator<=(const Variant &lhs, const Variant &rhs) + { + return !(lhs > rhs); + } + friend bool operator>=(const Variant &lhs, const Variant &rhs) + { + return !(lhs < rhs); + } + + friend bool operator==(const Variant &lhs, const Variant &rhs) + { + if (lhs.getType() != rhs.getType()) { + return false; + } + switch (lhs.getType()) { + case Type::NULLPTR: + return true; + case Type::BOOL: + return lhs.boolVal == rhs.boolVal; + case Type::INT: + return lhs.intVal == rhs.intVal; + case Type::DOUBLE: + return lhs.doubleVal == rhs.doubleVal; + case Type::STRING: + return lhs.asString() == rhs.asString(); + case Type::ARRAY: + return lhs.asArray() == rhs.asArray(); + case Type::MAP: + return lhs.asMap() == rhs.asMap(); + } + throw OusiaException("Internal Error! Unknown type!"); + } + + friend bool operator!=(const Variant &lhs, const Variant &rhs) + { + return !(lhs == rhs); + } +}; +} + +#endif /* _OUSIA_VARIANT_HPP_ */ + diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp new file mode 100644 index 0000000..e611842 --- /dev/null +++ b/src/core/common/VariantReader.cpp @@ -0,0 +1,625 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include +#include + +#include "VariantReader.hpp" +#include "Utils.hpp" + +namespace ousia { + +// TODO: Better error messages (like "Expected 'x' but got 'y'") +// TODO: Replace delims with single char delim where possible +// TODO: Use custom return value instead of std::pair +// TODO: Allow buffered char reader to "fork" +// TODO: Rename CharReader to shorter CharReader +// TODO: Implement context in CharReader (to allow error messages to extract the +// current line) + +/* Error Messages */ + +static const char *ERR_UNEXPECTED_CHAR = "Unexpected character"; +static const char *ERR_UNEXPECTED_END = "Unexpected literal end"; +static const char *ERR_UNTERMINATED = "Unterminated literal"; +static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence"; +static const char *ERR_INVALID_INTEGER = "Invalid integer value"; +static const char *ERR_TOO_LARGE = "Value too large to represent"; + +/* Class Number */ + +/** + * Class used internally to represent a number (integer or double). The number + * is represented by its components (base value a, nominator n, denominator d, + * exponent e, sign s and exponent sign sE). + */ +class Number { +private: + /** + * Reprsents the part of the number: Base value a, nominator n, exponent e. + */ + enum class Part { A, N, E }; + + /** + * State used in the parser state machine + */ + enum class State { + INIT, + HAS_MINUS, + LEADING_ZERO, + LEADING_POINT, + INT, + HEX, + POINT, + EXP_INIT, + EXP_HAS_MINUS, + EXP + }; + + /** + * Returns the numeric value of the given ASCII character (returns 0 for + * '0', 1 for '1', 10 for 'A' and so on). + * + * @param c is the character for which the numeric value should be returned. + * @return the numeric value the character represents. + */ + static int charValue(char c) + { + if (c >= '0' && c <= '9') { + return c & 0x0F; + } + if ((c >= 'A' && c <= 'O') || (c >= 'a' && c <= 'o')) { + return (c & 0x0F) + 9; + } + return -1; + } + + /** + * Appends the value of the character c to the internal number + * representation and reports any errors that might occur. + */ + bool appendChar(char c, int base, Part p, CharReader &reader, + Logger &logger) + { + // Check whether the given character is valid + int v = charValue(c); + if (v < 0 || v >= base) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + + // Append the number to the specified part + switch (p) { + case Part::A: + a = a * base + v; + break; + case Part::N: + n = n * base + v; + d = d * base; + break; + case Part::E: + e = e * base + v; + break; + } + + // Check for any overflows + if (a < 0 || n < 0 || d < 0 || e < 0) { + logger.errorAt(ERR_TOO_LARGE, reader); + return false; + } + return true; + } + +public: + /** + * Sign and exponent sign. + */ + int8_t s, sE; + + /** + * Exponent + */ + int16_t e; + + /** + * Base value, nominator, denominator + */ + int64_t a, n, d; + + /** + * Constructor of the number class. + */ + Number() : s(1), sE(1), e(0), a(0), n(0), d(1) {} + + /** + * Returns the represented double value. + */ + double doubleValue() + { + return s * (a + ((double)n / (double)d)) * pow(10.0, (double)(sE * e)); + } + + /** + * Returns the represented integer value. Only a lossless operation, if the + * number is an integer (as can be checked via the isInt method), otherwise + * the exponent and the fractional value will be truncated. + */ + int64_t intValue() { return s * a; } + + /** + * Returns true, if the number is an integer (has no fractional or + * exponential part). + */ + bool isInt() { return (n == 0) && (d == 1) && (e == 0); } + + /** + * Tries to parse the number from the given stream and loggs any errors to + * the given logger instance. Numbers are terminated by one of the given + * delimiters. + */ + bool parse(CharReader &reader, Logger &logger, + const std::unordered_set &delims); +}; + +bool Number::parse(CharReader &reader, Logger &logger, + const std::unordered_set &delims) +{ + State state = State::INIT; + char c; + + // Consume the first whitespace characters + reader.consumeWhitespace(); + + // Iterate over the FSM to extract numbers + while (reader.peek(c)) { + // Abort, once a delimiter or whitespace is reached + if (Utils::isWhitespace(c) || delims.count(c)) { + reader.resetPeek(); + break; + } + + // The character is not a whitespace character and not a delimiter + switch (state) { + case State::INIT: + case State::HAS_MINUS: + switch (c) { + case '-': + // Do not allow multiple minus signs + if (state == State::HAS_MINUS) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::HAS_MINUS; + s = -1; + break; + case '0': + // Remember a leading zero for the detection of "0x" + state = State::LEADING_ZERO; + break; + case '.': + // Remember a leading point as ".eXXX" is invalid + state = State::LEADING_POINT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::LEADING_ZERO: + if (c == 'x' || c == 'X') { + state = State::HEX; + break; + } + // fallthrough + case State::INT: + switch (c) { + case '.': + state = State::POINT; + break; + case 'e': + case 'E': + state = State::EXP_INIT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::HEX: + if (!appendChar(c, 16, Part::A, reader, logger)) { + return false; + } + break; + case State::LEADING_POINT: + case State::POINT: + switch (c) { + case 'e': + case 'E': + if (state == State::LEADING_POINT) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::EXP_INIT; + break; + default: + state = State::POINT; + if (!appendChar(c, 10, Part::N, reader, logger)) { + return false; + } + break; + } + break; + case State::EXP_HAS_MINUS: + case State::EXP_INIT: + if (c == '-') { + if (state == State::EXP_HAS_MINUS) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::EXP_HAS_MINUS; + sE = -1; + } else { + state = State::EXP; + if (!appendChar(c, 10, Part::E, reader, logger)) { + return false; + } + } + break; + case State::EXP: + if (!appendChar(c, 10, Part::E, reader, logger)) { + return false; + } + break; + } + reader.consumePeek(); + } + + // States in which ending is valid. Log an error in other states + if (state == State::LEADING_ZERO || state == State::HEX || + state == State::INT || state == State::POINT || + state == State::EXP) { + return true; + } + logger.errorAt(ERR_UNEXPECTED_END, reader); + return false; +} + + +/* Class Reader */ + +static const int STATE_INIT = 0; +static const int STATE_IN_STRING = 1; +static const int STATE_IN_ARRAY = 2; +static const int STATE_EXPECT_COMMA = 3; +static const int STATE_ESCAPE = 4; +static const int STATE_WHITESPACE = 5; +static const int STATE_RESYNC = 6; + +template +static std::pair error(CharReader &reader, Logger &logger, + const char *err, T res) +{ + logger.errorAt(err, reader); + return std::make_pair(false, std::move(res)); +} + +std::pair VariantReader::parseString( + CharReader &reader, Logger &logger, + const std::unordered_set *delims) +{ + // Initialize the internal state + int state = STATE_INIT; + char quote = 0; + std::stringstream res; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Statemachine whic iterates over each character in the stream + // TODO: Combination of peeking and consumePeek is stupid as consumePeek is + // the default (read and putBack would obviously be better, yet the latter + // is not trivial to implement in the current CharReader). + char c; + while (reader.peek(c)) { + switch (state) { + case STATE_INIT: + if (c == '"' || c == '\'') { + quote = c; + state = STATE_IN_STRING; + break; + } else if (delims && delims->count(c)) { + return error(reader, logger, ERR_UNEXPECTED_END, res.str()); + } + return error(reader, logger, ERR_UNEXPECTED_CHAR, res.str()); + case STATE_IN_STRING: + if (c == quote) { + reader.consumePeek(); + return std::make_pair(true, res.str()); + } else if (c == '\\') { + state = STATE_ESCAPE; + reader.consumePeek(); + break; + } else if (c == '\n') { + return error(reader, logger, ERR_UNTERMINATED, res.str()); + } + res << c; + reader.consumePeek(); + break; + case STATE_ESCAPE: + // Handle all possible special escape characters + switch (c) { + case 'b': + res << '\b'; + break; + case 'f': + res << '\f'; + break; + case 'n': + res << '\n'; + break; + case 'r': + res << '\r'; + break; + case 't': + res << '\t'; + break; + case 'v': + res << '\v'; + break; + case '\'': + res << '\''; + break; + case '"': + res << '"'; + break; + case '\\': + res << '\\'; + break; + case '\n': + break; + case 'x': + // TODO: Parse Latin-1 sequence hex XX + break; + case 'u': + // TODO: Parse 16-Bit unicode character hex XXXX + break; + default: + if (Utils::isNumeric(c)) { + // TODO: Parse octal 000 sequence + } else { + logger.errorAt(ERR_INVALID_ESCAPE, reader); + } + break; + } + + // Switch back to the "normal" state + state = STATE_IN_STRING; + reader.consumePeek(); + break; + } + } + return error(reader, logger, ERR_UNEXPECTED_END, res.str()); +} + +std::pair VariantReader::parseArray( + CharReader &reader, Logger &logger, char delim) +{ + Variant::arrayType res; + bool hadError = false; + int state = delim ? STATE_IN_ARRAY : STATE_INIT; + delim = delim ? delim : ']'; + char c; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Iterate over the characters, use the parseGeneric function to read the + // pairs + while (reader.peek(c)) { + // Generically handle the end of the array + if (state != STATE_INIT && c == delim) { + reader.consumePeek(); + return std::make_pair(!hadError, res); + } + + switch (state) { + case STATE_INIT: + if (c != '[') { + return error(reader, logger, ERR_UNEXPECTED_CHAR, res); + } + state = STATE_IN_ARRAY; + reader.consumePeek(); + break; + case STATE_IN_ARRAY: { + // Try to read an element using the parseGeneric function + reader.resetPeek(); + auto elem = parseGeneric(reader, logger, {',', delim}); + res.push_back(elem.second); + + // If the reader had no error, expect an comma, otherwise skip + // to the next comma in the stream + if (elem.first) { + state = STATE_EXPECT_COMMA; + } else { + state = STATE_RESYNC; + hadError = true; + } + break; + } + case STATE_EXPECT_COMMA: + // Skip whitespace + if (c == ',') { + state = STATE_IN_ARRAY; + } else if (!Utils::isWhitespace(c)) { + hadError = true; + state = STATE_RESYNC; + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + } + reader.consumePeek(); + break; + case STATE_RESYNC: + // Just wait for another comma to arrive + if (c == ',') { + state = STATE_IN_ARRAY; + } + reader.consumePeek(); + break; + } + } + return error(reader, logger, ERR_UNEXPECTED_END, res); +} + +std::pair VariantReader::parseUnescapedString( + CharReader &reader, Logger &logger, + const std::unordered_set &delims) +{ + std::stringstream res; + std::stringstream buf; + char c; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Copy all characters, skip whitespace at the end + int state = STATE_IN_STRING; + while (reader.peek(c)) { + if (delims.count(c)) { + reader.resetPeek(); + return std::make_pair(true, res.str()); + } else if (Utils::isWhitespace(c)) { + // Do not add whitespace to the output buffer + state = STATE_WHITESPACE; + buf << c; + } else { + // If we just hat a sequence of whitespace, append it to the output + // buffer and continue + if (state == STATE_WHITESPACE) { + res << buf.str(); + buf.str(std::string{}); + buf.clear(); + state = STATE_IN_STRING; + } + res << c; + } + reader.consumePeek(); + } + return std::make_pair(true, res.str()); +} + +std::pair VariantReader::parseInteger( + CharReader &reader, Logger &logger, + const std::unordered_set &delims) +{ + Number n; + if (n.parse(reader, logger, delims)) { + // Only succeed if the parsed number is an integer, otherwise this is an + // error + if (n.isInt()) { + return std::make_pair(true, n.intValue()); + } else { + return error(reader, logger, ERR_INVALID_INTEGER, n.intValue()); + } + } + return std::make_pair(false, n.intValue()); +} + +std::pair VariantReader::parseDouble( + CharReader &reader, Logger &logger, + const std::unordered_set &delims) +{ + Number n; + bool res = n.parse(reader, logger, delims); + return std::make_pair(res, n.doubleValue()); +} + +std::pair VariantReader::parseGeneric( + CharReader &reader, Logger &logger, + const std::unordered_set &delims) +{ + char c; + + // Skip all whitespace characters + reader.consumeWhitespace(); + while (reader.peek(c)) { + // Stop if a delimiter is reached + if (delims.count(c)) { + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); + } + + // Parse a string if a quote is reached + if (c == '"' || c == '\'') { + auto res = parseString(reader, logger); + return std::make_pair(res.first, res.second.c_str()); + } + + if (c == '[') { + // TODO: Parse struct descriptor + } + + // Try to parse everything that looks like a number as number + if (Utils::isNumeric(c) || c == '-') { + Number n; + + // Fork the reader + CharReaderFork fork = reader.fork(); + + // TODO: Fork logger + + // Try to parse the number + if (n.parse(fork, logger, delims)) { + // Parsing was successful, advance the reader + fork.commit(); + if (n.isInt()) { + return std::make_pair( + true, + Variant{static_cast(n.intValue())}); + } else { + return std::make_pair(true, n.doubleValue()); + } + } + } + + // Parse an unescaped string in any other case + auto res = parseUnescapedString(reader, logger, delims); + + // Handling for special primitive values + if (res.first) { + if (res.second == "true") { + return std::make_pair(true, Variant{true}); + } + if (res.second == "false") { + return std::make_pair(true, Variant{false}); + } + if (res.second == "null") { + return std::make_pair(true, Variant{nullptr}); + } + } + return std::make_pair(res.first, res.second.c_str()); + } + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); +} +} + diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp new file mode 100644 index 0000000..5e7c5d2 --- /dev/null +++ b/src/core/common/VariantReader.hpp @@ -0,0 +1,166 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file VariantReader.hpp + * + * Provides parsers for various micro formats. These formats include integers, + * doubles, strings, JSON and the Ousía struct notation. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_VARIANT_READER_HPP_ +#define _OUSIA_VARIANT_READER_HPP_ + +#include +#include +#include + +#include "CharReader.hpp" +#include "Logger.hpp" +#include "Variant.hpp" + +namespace ousia { + +class VariantReader { +private: + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + * @param delims is an optional set of delimiters after which parsing has to + * be stopped (the delimiters may occur inside the actual string, but not + * outside). If nullptr is given, no delimiter is used and a complete string + * is read. + */ + static std::pair parseString( + CharReader &VariantReader, Logger &logger, + const std::unordered_set *delims); + +public: + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + * @param delims is a set of delimiters after which parsing has to + * be stopped (the delimiters may occur inside the actual string, but not + * outside). + */ + static std::pair parseString( + CharReader &VariantReader, Logger &logger, + const std::unordered_set &delims) + { + return parseString(VariantReader, logger, &delims); + } + + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + */ + static std::pair parseString(CharReader &VariantReader, + Logger &logger) + { + return parseString(VariantReader, logger, nullptr); + } + + /** + * Extracts an unescaped string from the given buffered char VariantReader + * instance. This function just reads text until one of the given delimiter + * characters is reached. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. + * @param delims is a set of characters which will terminate the string. + * These characters are not included in the result. May not be nullptr. + */ + static std::pair parseUnescapedString( + CharReader &VariantReader, Logger &logger, + const std::unordered_set &delims); + + /** + * Parses an integer from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. + * + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. + */ + static std::pair parseInteger( + CharReader &VariantReader, Logger &logger, + const std::unordered_set &delims); + + /** + * Parses an double from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. + * + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. + */ + static std::pair parseDouble( + CharReader &VariantReader, Logger &logger, + const std::unordered_set &delims); + + /** + * Parses an array of values. + */ + static std::pair parseArray( + CharReader &VariantReader, Logger &logger, char delim = 0); + + /** + * Tries to parse the most specific item from the given stream until one of + * the given delimiters is reached or a meaningful literal has been read. + * The resulting variant represents the value that has been read. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. + * @param delims is a set of characters which will terminate the string. + * These characters are not included in the result. May not be nullptr. + */ + static std::pair parseGeneric( + CharReader &VariantReader, Logger &logger, + const std::unordered_set &delims); +}; +} + +#endif /* _OUSIA_VARIANT_READER_HPP_ */ + diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index 5dac956..e155cfd 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -32,10 +32,10 @@ #include #include -#include #include -#include #include +#include +#include #include "Scope.hpp" diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp index dca7f35..5e801ee 100644 --- a/src/core/parser/ParserStack.cpp +++ b/src/core/parser/ParserStack.cpp @@ -20,8 +20,8 @@ #include "ParserStack.hpp" -#include -#include +#include +#include namespace ousia { namespace parser { diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp index c5ed4e4..233f4f9 100644 --- a/src/core/parser/ParserStack.hpp +++ b/src/core/parser/ParserStack.hpp @@ -37,7 +37,7 @@ #include #include -#include +#include #include "Parser.hpp" diff --git a/src/core/utils/CharReader.cpp b/src/core/utils/CharReader.cpp deleted file mode 100644 index 61616d7..0000000 --- a/src/core/utils/CharReader.cpp +++ /dev/null @@ -1,643 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include -#include -#include - -#include - -#include "CharReader.hpp" - -namespace ousia { -namespace utils { - -/* Helper functions */ - -/** - * istreamReadCallback is used internally by the Buffer calss to stream data - * from an input stream. - * - * @param buf is points a the target memory region. - * @param size is the requested number of bytes. - * @param userData is a pointer at some user defined data. - * @return the actual number of bytes read. If the result is smaller than - * the requested size, this tells the Buffer that the end of the input - * stream is reached. - */ -static size_t istreamReadCallback(char *buf, size_t size, void *userData) -{ - return (static_cast(userData))->read(buf, size).gcount(); -} - -/* Class Buffer */ - -Buffer::Buffer(ReadCallback callback, void *userData) - : callback(callback), - userData(userData), - reachedEnd(false), - startBucket(buckets.end()), - endBucket(buckets.end()), - startOffset(0), - firstDead(0) -{ - // Load a first block of data from the stream - stream(); - startBucket = buckets.begin(); -} - -Buffer::Buffer(std::istream &istream) : Buffer(istreamReadCallback, &istream) {} - -Buffer::Buffer(const std::string &str) - : callback(nullptr), - userData(nullptr), - reachedEnd(true), - startBucket(buckets.end()), - endBucket(buckets.end()), - startOffset(0), - firstDead(0) -{ - // Copy the given string into a first buffer and set the start buffer - // correctly - Bucket &bucket = nextBucket(); - bucket.resize(str.size()); - std::copy(str.begin(), str.end(), bucket.begin()); - startBucket = buckets.begin(); -} - -#ifndef NDEBUG -Buffer::~Buffer() -{ - // Make sure all cursors have been deleted - for (bool cursor_alive: alive) { - assert(!cursor_alive); - } -} -#endif - -void Buffer::advance(BucketIterator &it) -{ - it++; - if (it == buckets.end()) { - it = buckets.begin(); - } -} - -void Buffer::advance(BucketList::const_iterator &it) const -{ - it++; - if (it == buckets.cend()) { - it = buckets.cbegin(); - } -} - -Buffer::Bucket &Buffer::nextBucket() -{ - constexpr size_t MAXVAL = std::numeric_limits::max(); - - // Fetch the minimum bucket index - size_t minBucketIdx = MAXVAL; - for (size_t i = 0; i < cursors.size(); i++) { - if (alive[i]) { - // Fetch references to the bucket and the cursor - const Cursor &cur = cursors[i]; - const Bucket &bucket = *(cur.bucket); - - // Increment the bucket index by one, if the cursor is at the end - // of the bucket (only valid if the LOOKBACK_SIZE is set to zero) - size_t bIdx = cur.bucketIdx; - if (LOOKBACK_SIZE == 0 && cur.bucketOffs == bucket.size()) { - bIdx++; - } - - // Decrement the bucket index by one, if the previous bucket still - // needs to be reached and cannot be overridden - if (bIdx > 0 && cur.bucketOffs < LOOKBACK_SIZE) { - bIdx--; - } - - // Set the bucket index to the minium - minBucketIdx = std::min(minBucketIdx, bIdx); - } - } - - // If there is space between the current start bucket and the read - // cursor, the start bucket can be safely overridden. - if (minBucketIdx > 0 && minBucketIdx != MAXVAL) { - // All cursor bucket indices will be decreased by one - for (size_t i = 0; i < cursors.size(); i++) { - cursors[i].bucketIdx--; - } - - // Increment the start offset - startOffset += startBucket->size(); - - // The old start bucket is the new end bucket - endBucket = startBucket; - - // Advance the start bucket, wrap around at the end of the list - advance(startBucket); - } else { - // No free bucket, insert a new one before the start bucket - endBucket = buckets.emplace(startBucket); - } - return *endBucket; -} - -Buffer::CursorId Buffer::nextCursor() -{ - bool hasCursor = false; - CursorId res = 0; - - // Search for the next free cursor starting with minNextCursorId - for (size_t i = firstDead; i < alive.size(); i++) { - if (!alive[i]) { - res = i; - hasCursor = true; - break; - } - } - - // Add a new cursor to the cursor list if no cursor is currently free - if (!hasCursor) { - res = cursors.size(); - cursors.resize(res + 1); - alive.resize(res + 1); - } - - // The next dead cursor is at least the next cursor - firstDead = res + 1; - - // Mark the new cursor as alive - alive[res] = true; - - return res; -} - -void Buffer::stream() -{ - // Fetch the bucket into which the data should be inserted, make sure it - // has the correct size - Bucket &tar = nextBucket(); - tar.resize(REQUEST_SIZE); - - // Read data from the stream into the target buffer - size_t size = callback(tar.data(), REQUEST_SIZE, userData); - - // If not enough bytes were returned, we're at the end of the stream - if (size < REQUEST_SIZE) { - tar.resize(size); - reachedEnd = true; - } -} - -Buffer::CursorId Buffer::createCursor() -{ - CursorId res = nextCursor(); - cursors[res].bucket = startBucket; - cursors[res].bucketIdx = 0; - cursors[res].bucketOffs = 0; - return res; -} - -Buffer::CursorId Buffer::createCursor(Buffer::CursorId ref) -{ - CursorId res = nextCursor(); - cursors[res] = cursors[ref]; - return res; -} - -void Buffer::copyCursor(Buffer::CursorId from, Buffer::CursorId to) -{ - cursors[to] = cursors[from]; -} - -void Buffer::deleteCursor(Buffer::CursorId cursor) -{ - alive[cursor] = false; - firstDead = std::min(firstDead, cursor); -} - -size_t Buffer::offset(Buffer::CursorId cursor) const -{ - const Cursor &cur = cursors[cursor]; - size_t offs = startOffset + cur.bucketOffs; - BucketList::const_iterator it = startBucket; - while (it != cur.bucket) { - offs += it->size(); - advance(it); - } - return offs; -} - -size_t Buffer::moveForward(CursorId cursor, size_t relativeOffs) -{ - size_t offs = relativeOffs; - Cursor &cur = cursors[cursor]; - while (offs > 0) { - // Fetch the current bucket of the cursor - Bucket &bucket = *(cur.bucket); - - // If there is enough space in the bucket, simply increment the bucket - // offset by the given relative offset - const size_t space = bucket.size() - cur.bucketOffs; - if (space >= offs) { - cur.bucketOffs += offs; - break; - } else { - // Go to the end of the current bucket otherwise - offs -= space; - cur.bucketOffs = bucket.size(); - - // Go to the next bucket - if (cur.bucket != endBucket) { - // Go to the next bucket - advance(cur.bucket); - cur.bucketIdx++; - cur.bucketOffs = 0; - } else { - // Abort, if there is no more data to stream, otherwise just - // load new data - if (reachedEnd) { - return relativeOffs - offs; - } - stream(); - } - } - } - return relativeOffs; -} - -size_t Buffer::moveBackward(CursorId cursor, size_t relativeOffs) -{ - size_t offs = relativeOffs; - Cursor &cur = cursors[cursor]; - while (offs > 0) { - // If there is enough space in the bucket, simply decrement the bucket - // offset by the given relative offset - if (cur.bucketOffs >= offs) { - cur.bucketOffs -= offs; - break; - } else { - // Go to the beginning of the current bucket otherwise - offs -= cur.bucketOffs; - cur.bucketOffs = 0; - - // Abort if there is no more bucket to got back to - if (cur.bucketIdx == 0) { - return relativeOffs - offs; - } - - // Go to the previous bucket (wrap around at the beginning of the - // list) - if (cur.bucket == buckets.begin()) { - cur.bucket = buckets.end(); - } - cur.bucket--; - - // Decrement the bucket index, and set the current offset to the - // end of the new bucket - cur.bucketIdx--; - cur.bucketOffs = cur.bucket->size(); - } - } - return relativeOffs; -} - -ssize_t Buffer::moveCursor(CursorId cursor, ssize_t relativeOffs) -{ - if (relativeOffs > 0) { - return moveForward(cursor, relativeOffs); - } else if (relativeOffs < 0) { - return -moveBackward(cursor, -relativeOffs); - } else { - return 0; - } -} - -bool Buffer::atEnd(Buffer::CursorId cursor) const -{ - const Cursor &c = cursors[cursor]; - return reachedEnd && - (c.bucket == endBucket && c.bucketOffs == endBucket->size()); -} - -bool Buffer::fetchCharacter(CursorId cursor, char &c, bool incr) -{ - Cursor &cur = cursors[cursor]; - while (true) { - // Reference at the current bucket - Bucket &bucket = *(cur.bucket); - - // If there is still data in the current bucket, return this data - if (cur.bucketOffs < bucket.size()) { - c = bucket[cur.bucketOffs]; - if (incr) { - cur.bucketOffs++; - } - return true; - } else if (cur.bucket == endBucket) { - // Return false if the end of the stream has been reached, otherwise - // load new data - if (reachedEnd) { - return false; - } - stream(); - } - - // Go to the next bucket - cur.bucketIdx++; - cur.bucketOffs = 0; - advance(cur.bucket); - } -} - -bool Buffer::read(Buffer::CursorId cursor, char &c) -{ - return fetchCharacter(cursor, c, true); -} - -bool Buffer::fetch(CursorId cursor, char &c) -{ - return fetchCharacter(cursor, c, false); -} - -/* CharReader::Cursor class */ - -void CharReader::Cursor::assign(std::shared_ptr buffer, - CharReader::Cursor &cursor) -{ - // Copy the cursor position - buffer->copyCursor(cursor.cursor, this->cursor); - - // Copy the state - line = cursor.line; - column = cursor.column; -} - -/* CharReader class */ - -CharReader::CharReader(std::shared_ptr buffer, size_t line, - size_t column) - : buffer(buffer), - readCursor(buffer->createCursor(), line, column), - peekCursor(buffer->createCursor(), line, column), - coherent(true) -{ -} - -CharReader::CharReader(const std::string &str, size_t line, size_t column) - : CharReader(std::shared_ptr{new Buffer{str}}, line, column) -{ -} - -CharReader::CharReader(std::istream &istream, size_t line, size_t column) - : CharReader(std::shared_ptr{new Buffer{istream}}, line, column) -{ -} - -CharReader::~CharReader() -{ - buffer->deleteCursor(readCursor.cursor); - buffer->deleteCursor(peekCursor.cursor); -} - -bool CharReader::readAtCursor(Cursor &cursor, char &c) -{ - // Return false if we're at the end of the stream - if (!buffer->read(cursor.cursor, c)) { - return false; - } - - // Substitute linebreak sequences with a single '\n' - if (c == '\n' || c == '\r') { - // Output a single \n - c = '\n'; - - // Check whether the next character is a continuation of the - // current character - char c2; - if (buffer->read(cursor.cursor, c2)) { - if ((c2 != '\n' && c2 != '\r') || c2 == c) { - buffer->moveCursor(cursor.cursor, -1); - } - } - } - - // Count lines and columns - if (c == '\n') { - // A linebreak was reached, go to the next line - cursor.line++; - cursor.column = 1; - } else { - // Ignore UTF-8 continuation bytes - if (!((c & 0x80) && !(c & 0x40))) { - cursor.column++; - } - } - return true; -} - -bool CharReader::peek(char &c) -{ - // If the reader was coherent, update the peek cursor state - if (coherent) { - peekCursor.assign(buffer, readCursor); - coherent = false; - } - - // Read a character from the peek cursor - return readAtCursor(peekCursor, c); -} - -bool CharReader::read(char &c) -{ - // Read a character from the buffer at the current read cursor - bool res = readAtCursor(readCursor, c); - - // Set the peek position to the current read position, if reading was not - // coherent - if (!coherent) { - peekCursor.assign(buffer, readCursor); - coherent = true; - } else { - buffer->copyCursor(readCursor.cursor, peekCursor.cursor); - } - - // Return the result of the read function - return res; -} - -void CharReader::resetPeek() -{ - if (!coherent) { - peekCursor.assign(buffer, readCursor); - coherent = true; - } -} - -void CharReader::consumePeek() -{ - if (!coherent) { - readCursor.assign(buffer, peekCursor); - coherent = true; - } -} - -bool CharReader::consumeWhitespace() -{ - char c; - while (peek(c)) { - if (!Utils::isWhitespace(c)) { - resetPeek(); - return true; - } - consumePeek(); - } - return false; -} - -CharReaderFork CharReader::fork() -{ - return CharReaderFork(buffer, readCursor, peekCursor, coherent); -} - -CharReader::Context CharReader::getContext(ssize_t maxSize) -{ - // Clone the current read cursor - Buffer::CursorId cur = buffer->createCursor(readCursor.cursor); - - // Fetch the start position of the search - ssize_t offs = buffer->offset(cur); - ssize_t start = offs; - ssize_t end = offs; - char c; - - // Search the beginning of the line with the last non-whitespace character - bool hadNonWhitespace = false; - bool foundBegin = false; - for (ssize_t i = 0; i < maxSize; i++) { - // Fetch the character at the current position - if (buffer->fetch(cur, c)) { - // Abort, at linebreaks if we found a non-linebreak character - hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c); - if (hadNonWhitespace && (c == '\n' || c == '\r')) { - buffer->moveCursor(cur, 1); - start++; - foundBegin = true; - break; - } - } - if (buffer->moveCursor(cur, -1) == 0) { - foundBegin = true; - break; - } else { - // Update the start position and the hadNonWhitespace flag - start--; - } - } - - // Search the end of the line - buffer->moveCursor(cur, offs - start); - bool foundEnd = false; - for (ssize_t i = 0; i < maxSize; i++) { - // Increment the end counter if a character was read, abort if the end - // of the stream has been reached - if (buffer->read(cur, c)) { - end++; - } else { - foundEnd = true; - break; - } - - // Abort on linebreak characters - if (c == '\n' || c == '\r') { - foundEnd = true; - break; - } - } - - // Calculate the truncated start and end position and limit the number of - // characters to the maximum number of characters - ssize_t tStart = start; - ssize_t tEnd = end; - if (tEnd - tStart > maxSize) { - tStart = std::max(offs - maxSize / 2, tStart); - tEnd = tStart + maxSize; - } - - // Try to go to the calculated start position and fetch the actual start - // position - ssize_t aStart = end + buffer->moveCursor(cur, tStart - end); - if (aStart > tStart) { - tEnd = tEnd + (aStart - tStart); - tStart = aStart; - } - - // Read one line - std::stringstream ss; - size_t relPos = 0; - for (ssize_t i = tStart; i < tEnd; i++) { - if (buffer->read(cur, c)) { - // Break once a linebreak is reached - if (c == '\n' || c == '\r') { - break; - } - - // Add the current character to the output - ss << c; - - // Increment the string-relative offset as long as the original - // offset is not reached in the for loop - if (i < offs) { - relPos++; - } - } - } - - // Delete the newly created cursor - buffer->deleteCursor(cur); - - return CharReader::Context{ss.str(), relPos, !foundBegin || tStart != start, - !foundEnd || tEnd != end}; -} - -/* Class CharReaderFork */ - -CharReaderFork::CharReaderFork(std::shared_ptr buffer, - CharReader::Cursor &parentReadCursor, - CharReader::Cursor &parentPeekCursor, - bool coherent) - : CharReader(buffer, 1, 1), - parentReadCursor(parentReadCursor), - parentPeekCursor(parentPeekCursor) -{ - readCursor.assign(buffer, parentReadCursor); - peekCursor.assign(buffer, parentPeekCursor); - this->coherent = coherent; -} - -void CharReaderFork::commit() -{ - parentReadCursor.assign(buffer, readCursor); - parentPeekCursor.assign(buffer, peekCursor); -} -} -} - diff --git a/src/core/utils/CharReader.hpp b/src/core/utils/CharReader.hpp deleted file mode 100644 index 1306026..0000000 --- a/src/core/utils/CharReader.hpp +++ /dev/null @@ -1,672 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file CharReader.hpp - * - * Used within all parsers to read single characters from an underlying stream. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_CHAR_READER_HPP_ -#define _OUSIA_CHAR_READER_HPP_ - -#include -#include -#include -#include - -namespace ousia { -namespace utils { - -/** - * A chunked ring buffer used in CharReader to provide access to an input stream - * with multiple read cursors. The Buffer automatically expands to the - * size of the spanned by the read cursors while reusing already allocated - * memory. - */ -class Buffer { -public: - /** - * Callback function which is called whenever new data is requested from the - * input stream. - * - * @param buf is points a the target memory region. - * @param size is the requested number of bytes. - * @param userData is a pointer at some user defined data given in the - * constructor. - * @return the actual number of bytes read. If the result is smaller than - * the requested size, this tells the Buffer that the end of the input - * stream is reached. - */ - using ReadCallback = size_t (*)(char *buf, size_t size, void *userData); - - /** - * Handle used to identify a cursor. - */ - using CursorId = size_t; - -private: - /** - * Number of bytes to request from the input stream. Set to 64 KiB because - * this seems to be a nice value for I/O operations according to multiple - * sources. - */ - static constexpr size_t REQUEST_SIZE = 64 * 1024; - - /** - * Number of bytes the buffer guarantees to be capable of looking back - * for extracting the current context. - */ - static constexpr size_t LOOKBACK_SIZE = 128; - - /** - * Type used internally to represent one chunk of memory. - */ - using Bucket = std::vector; - - /** - * Type used internally to represent a bucket container. - */ - using BucketList = std::list; - - /** - * Type used internally for representing iterators in the bucket list. - */ - using BucketIterator = BucketList::iterator; - - /** - * Type used internally to represent a read cursor. - */ - struct Cursor { - /** - * Iterator pointing at the current bucket. - */ - BucketIterator bucket; - - /** - * Index of the bucket relative to the start bucket. - */ - size_t bucketIdx; - - /** - * Current offset within that bucket. - */ - size_t bucketOffs; - }; - - /** - * List of buckets containing the buffered memory. - */ - BucketList buckets; - - /** - * List of cursors used to access the memory. Note that cursors can be - * marked as inactive and reused lateron (to avoid having to resize the - * vector). - */ - std::vector cursors; - - /** - * Bitfield specifying which of the cursors is actually valid. - */ - std::vector alive; - - /** - * Function to be called whenever new data is needed. Set to nullptr if the - * Buffer is not backed by an input stream. - */ - const ReadCallback callback; - - /** - * User data given in the constructor. - */ - void *userData; - - /** - * Set to true if the input stream is at its end. - */ - bool reachedEnd; - - /** - * Iterator pointing at the current start bucket. - */ - BucketIterator startBucket; - - /** - * Iterator pointing at the last bucket. - */ - BucketIterator endBucket; - - /** - * Byte offset of the start bucket relative to the beginning of the stream. - */ - size_t startOffset; - - /** - * Points at the smallest possible available cursor index, yet does not - * guarantee that this cursor index actuall is free. - */ - CursorId firstDead; - - /** - * Advances the bucket iterator, cares about wrapping around in the ring. - */ - void advance(BucketIterator &it); - - /** - * Advances the bucket iterator, cares about wrapping around in the ring. - */ - void advance(BucketList::const_iterator &it) const; - - /** - * Internally used to find the next free cursor in the cursors vector. The - * cursor is marked as active. - * - * @return the next free cursor index. - */ - CursorId nextCursor(); - - /** - * Returns a reference at the next bucket into which data should be - * inserted. - * - * @return a bucket into which the data can be inserted. - */ - Bucket &nextBucket(); - - /** - * Reads data from the input stream and places it in the next free buffer. - */ - void stream(); - - /** - * Moves the given cursor forward. - */ - size_t moveForward(CursorId cursor, size_t relativeOffs); - - /** - * Moves the given cursor backward. - */ - size_t moveBackward(CursorId cursor, size_t relativeOffs); - - /** - * Reads a character from the current cursor position and optionally - * advances. - */ - bool fetchCharacter(CursorId cursor, char &c, bool incr); - -public: - /** - * Intializes the Buffer with a reference to a ReadCallback that is used - * to fetch data from an underlying input stream. - * - * @param callback is the function that will be called whenever data is read - * from the ring buffer and the buffer does not hold enough data to fulfill - * this read request. - * @param userData is a pointer to user defined data which will be passed to - * the callback function. - */ - Buffer(ReadCallback callback, void *userData); - - /** - * Initializes the Buffer with a reference to an std::istream from which - * data will be read. - * - * @param istream is the input stream from which the data should be read. - */ - Buffer(std::istream &istream); - - /** - * Initializes the Buffer with the contents of the given string, after - * this operation the Buffer has a fixed size. - * - * @param str is the string containing the data that should be copied into - * the ring buffer. - */ - Buffer(const std::string &str); - -#ifndef NDEBUG - /** - * Destructor of the Buffer class. Makes sure that all cursors have been - * freed. - */ - ~Buffer(); -#endif - - // No copy - Buffer(const Buffer &) = delete; - - // No assign - Buffer &operator=(const Buffer &) = delete; - - /** - * Creates a new read cursor positioned at the smallest possible position - * in the ring buffer. - */ - CursorId createCursor(); - - /** - * Creates a new read cursor positioned at the same position as the given - * read cursor. - * - * @param ref is the read cursor that should be used as reference for the - * new read cursor. - */ - CursorId createCursor(CursorId ref); - - /** - * Copies the position of one cursor to another cursor. - * - * @param from is the cursor id of which the position should be copied. - * @param to is the cursor id to which the position should be copied. - */ - void copyCursor(CursorId from, CursorId to); - - /** - * Deletes the cursor with the given id. The cursor may no longer be used - * after this function has been called. - * - * @param cursor is the id of the cursor that should be freed. - */ - void deleteCursor(CursorId cursor); - - /** - * Moves a cursor by offs bytes. Note that moving backwards is theoretically - * limited by the LOOKBACK_SIZE of the Buffer, practically it will most - * likely be limited by the REQUEST_SIZE, so you can got at most 64 KiB - * backwards. - * - * @param cursor is the cursor that should be moved. - * @param relativeOffs is a positive or negative integer number specifying - * the number of bytes the cursor should be moved forward (positive numbers) - * or backwards (negative numbers). - * @return the actual number of bytes the cursor was moved. This number is - * smaller than the relativeOffs given in the constructor if the - */ - ssize_t moveCursor(CursorId cursor, ssize_t relativeOffs); - - /** - * Returns the current byte offset of the given cursor relative to the - * beginning of the stream. - * - * @param cursor is the cursor for which the byte offset relative to the - * beginning of the stream should be returned. - * @return the number of bytes since the beginning of the stream for the - * given cursor. - */ - size_t offset(CursorId cursor) const; - - /** - * Returns true if the given cursor currently is at the end of the stream. - * - * @param cursor is the cursor for which the atEnd flag should be returned. - * @return true if the there are no more bytes for this cursor. If false - * is returned, this means that there may be more bytes in the stream, - * nevertheless the end of the stream may be hit once the next read function - * is called. - */ - bool atEnd(CursorId cursor) const; - - /** - * Reads a single character from the ring buffer from the given cursor and - * moves to the next character. - * - * @param cursor specifies the cursor from which the data should be read. - * The cursor will be advanced by one byte. - * @param c is the character into which the data needs to be read. - * @return true if a character was read, false if the end of the stream has - * been reached. - */ - bool read(CursorId cursor, char &c); - - /** - * Returns a single character from the ring buffer from the current cursor - * position and stays at that position. - * - * @param cursor specifies the cursor from which the data should be read. - * The cursor will be advanced by one byte. - * @param c is the character into which the data needs to be read. - * @return true if a character could be fetched, false if the end of the - * stream has been reached. - */ - bool fetch(CursorId cursor, char &c); -}; - -// Forward declaration -class CharReaderFork; - -/** - * Used within parsers for convenient access to single characters in an input - * stream or buffer. It allows reading and peeking single characters from a - * buffer. Additionally it counts the current column/row (with correct handling - * for UTF-8) and contains an internal state machine that handles the detection - * of linebreaks and converts these to a single '\n'. - */ -class CharReader { -public: - /** - * The context struct is used to represent the current context the char - * reader is in. This context can for example be used when building error - * messages. - */ - struct Context { - /** - * Set to the content of the current line. - */ - std::string line; - - /** - * Relative position (in characters) within that line. - */ - size_t relPos; - - /** - * Set to true if the beginning of the line has been truncated (because - * the reader position is too far away from the actual position of the - * line). - */ - bool truncatedStart; - - /** - * Set to true if the end of the line has been truncated (because the - * reader position is too far away from the actual end position of the - * line. - */ - bool truncatedEnd; - - Context() - : line(), relPos(0), truncatedStart(false), truncatedEnd(false) - { - } - - Context(std::string line, size_t relPos, bool truncatedStart, - bool truncatedEnd) - : line(std::move(line)), - relPos(relPos), - truncatedStart(truncatedStart), - truncatedEnd(truncatedEnd) - { - } - }; - -protected: - /** - * Internally used cursor structure for managing the read and the peek - * cursor. - */ - struct Cursor { - /** - * Corresponding cursor in the underlying buffer instance. - */ - const Buffer::CursorId cursor; - - /** - * Current line the cursor is in. - */ - uint32_t line; - - /** - * Current column the cursor is in. - */ - uint32_t column; - - /** - * Constructor of the Cursor class. - * - * @param cursor is the underlying cursor in the Buffer instance. - */ - Cursor(Buffer::CursorId cursor, size_t line, size_t column) - : cursor(cursor), line(line), column(column) - { - } - - /** - * Assigns one cursor to another. - * - * @param buffer is the underlying buffer instance the internal cursor - * belongs to. - * @param cursor is the cursor from which the state should be copied. - */ - void assign(std::shared_ptr buffer, Cursor &cursor); - }; - -private: - /** - * Substitutes "\r", "\n\r", "\r\n" with a single "\n". - * - * @param cursor is the cursor from which the character should be read. - * @param c a reference to the character that should be written. - * @return true if another character needs to be read. - */ - bool substituteLinebreaks(Cursor &cursor, char &c); - - /** - * Reads a single character from the given cursor. - * - * @param cursor is the cursor from which the character should be read. - * @param c a reference to the character that should be written. - * @return true if a character was read, false if the end of the stream has - * been reached. - */ - bool readAtCursor(Cursor &cursor, char &c); - -protected: - /** - * Reference pointing at the underlying buffer. - */ - std::shared_ptr buffer; - - /** - * Cursor used for reading. - */ - Cursor readCursor; - - /** - * Cursor used for peeking. - */ - Cursor peekCursor; - - /** - * Set to true as long the underlying Buffer cursor is at the same position - * for the read and the peek cursor. This is only used for optimization - * purposes and makes consecutive reads a bit faster. - */ - bool coherent; - - /** - * Protected constructor of the CharReader base class. Creates new read - * and peek cursors for the given buffer. - * - * @param buffer is a reference to the underlying Buffer class responsible - * for allowing to read from a single input stream from multiple locations. - */ - CharReader(std::shared_ptr buffer, size_t line, size_t column); - -public: - /** - * Creates a new CharReader instance from a string. - * - * @param str is a string containing the input data. - * @param line is the start line. - * @param column is the start column. - */ - CharReader(const std::string &str, size_t line = 1, size_t column = 1); - - /** - * Creates a new CharReader instance for an input stream. - * - * @param istream is the input stream from which incomming data should be - * read. - * @param line is the start line. - * @param column is the start column. - */ - CharReader(std::istream &istream, size_t line = 1, size_t column = 1); - - /** - * Deletes the used cursors from the underlying buffer instance. - */ - ~CharReader(); - - // No copy - CharReader(const Buffer &) = delete; - - // No assign - CharReader &operator=(const Buffer &) = delete; - - /** - * Peeks a single character. If called multiple times, returns the - * character after the previously peeked character. - * - * @param c is a reference to the character to which the result should be - * written. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool peek(char &c); - - /** - * Reads a character from the input data. If "peek" was called - * beforehand resets the peek pointer. - * - * @param c is a reference to the character to which the result should be - * written. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool read(char &c); - - /** - * Resets the peek pointer to the "read" pointer. - */ - void resetPeek(); - - /** - * Advances the read pointer to the peek pointer -- so if the "peek" - * function was called, "read" will now return the character after - * the last peeked character. - */ - void consumePeek(); - - /** - * Moves the read cursor to the next non-whitespace character. Returns - * false, if the end of the stream was reached. - * - * @return false if the end of the stream was reached, false othrwise. - */ - bool consumeWhitespace(); - - /** - * Creates a new CharReader located at the same position as this CharReader - * instance, yet the new CharReader can be used independently of this - * CharReader. Use the "commit" function of the returned CharReader to - * copy the state of the forked CharReaderFork to this CharReader. - * - * @return a CharReaderFork instance positioned at the same location as this - * CharReader instance. - */ - CharReaderFork fork(); - - /** - * Returns true if there are no more characters as the stream was - * closed. - * - * @return true if there is no more data. - */ - bool atEnd() const { return buffer->atEnd(readCursor.cursor); } - - /** - * Returns the current line (starting with one). - * - * @return the current line number. - */ - uint32_t getLine() const { return readCursor.line; } - - /** - * Returns the current column (starting with one). - * - * @return the current column number. - */ - uint32_t getColumn() const { return readCursor.column; } - - /** - * Returns the current byte offset of the read cursor. - * - * @return the byte position within the stream. - */ - size_t getOffset() const { return buffer->offset(readCursor.cursor); }; - - /** - * Returns the line the read cursor currently is in, but at most the - * given number of characters in the form of a Context structure. - */ - Context getContext(ssize_t maxSize); -}; - -/** - * A CharReaderFork is returned whenever the "fork" function of the CharReader - * class is used. Its "commit" function can be used to move the underlying - * CharReader instance to the location of the CharReaderFork instance. Otherwise - * the read location of the underlying CharReader is left unchanged. - */ -class CharReaderFork : public CharReader { -private: - friend CharReader; - - /** - * The reader cursor of the underlying CharReader instance. - */ - CharReader::Cursor &parentReadCursor; - - /** - * The peek cursor of the underlying CharReader instance. - */ - CharReader::Cursor &parentPeekCursor; - - /** - * Constructor of the CharReaderFork class. - * - * @param buffer is a reference at the parent Buffer instance. - * @param parentPeekCursor is a reference at the parent read cursor. - * @param parentPeekCursor is a reference at the parent peek cursor. - * @param coherent specifies whether the char reader cursors are initialized - * coherently. - */ - CharReaderFork(std::shared_ptr buffer, - CharReader::Cursor &parentReadCursor, - CharReader::Cursor &parentPeekCursor, bool coherent); - -public: - /** - * Moves the read and peek cursor of the parent CharReader to the location - * of the read and peek cursor in the fork. - */ - void commit(); -}; -} - -/** - * Alias of the commonly used CharReader class. - */ -using CharReader = utils::CharReader; - -} - -#endif /* _OUSIA_CHAR_READER_HPP_ */ - diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp deleted file mode 100644 index 5c167cd..0000000 --- a/src/core/variant/Reader.cpp +++ /dev/null @@ -1,624 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include -#include - -#include - -#include "Reader.hpp" - -namespace ousia { -namespace variant { - -// TODO: Better error messages (like "Expected 'x' but got 'y'") -// TODO: Replace delims with single char delim where possible -// TODO: Use custom return value instead of std::pair -// TODO: Allow buffered char reader to "fork" -// TODO: Rename CharReader to shorter CharReader -// TODO: Implement context in CharReader (to allow error messages to extract the -// current line) - -/* Error Messages */ - -static const char *ERR_UNEXPECTED_CHAR = "Unexpected character"; -static const char *ERR_UNEXPECTED_END = "Unexpected literal end"; -static const char *ERR_UNTERMINATED = "Unterminated literal"; -static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence"; -static const char *ERR_INVALID_INTEGER = "Invalid integer value"; -static const char *ERR_TOO_LARGE = "Value too large to represent"; - -/* Class Number */ - -/** - * Class used internally to represent a number (integer or double). The number - * is represented by its components (base value a, nominator n, denominator d, - * exponent e, sign s and exponent sign sE). - */ -class Number { -private: - /** - * Reprsents the part of the number: Base value a, nominator n, exponent e. - */ - enum class Part { A, N, E }; - - /** - * State used in the parser state machine - */ - enum class State { - INIT, - HAS_MINUS, - LEADING_ZERO, - LEADING_POINT, - INT, - HEX, - POINT, - EXP_INIT, - EXP_HAS_MINUS, - EXP - }; - - /** - * Returns the numeric value of the given ASCII character (returns 0 for - * '0', 1 for '1', 10 for 'A' and so on). - * - * @param c is the character for which the numeric value should be returned. - * @return the numeric value the character represents. - */ - static int charValue(char c) - { - if (c >= '0' && c <= '9') { - return c & 0x0F; - } - if ((c >= 'A' && c <= 'O') || (c >= 'a' && c <= 'o')) { - return (c & 0x0F) + 9; - } - return -1; - } - - /** - * Appends the value of the character c to the internal number - * representation and reports any errors that might occur. - */ - bool appendChar(char c, int base, Part p, CharReader &reader, - Logger &logger) - { - // Check whether the given character is valid - int v = charValue(c); - if (v < 0 || v >= base) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - - // Append the number to the specified part - switch (p) { - case Part::A: - a = a * base + v; - break; - case Part::N: - n = n * base + v; - d = d * base; - break; - case Part::E: - e = e * base + v; - break; - } - - // Check for any overflows - if (a < 0 || n < 0 || d < 0 || e < 0) { - logger.errorAt(ERR_TOO_LARGE, reader); - return false; - } - return true; - } - -public: - /** - * Sign and exponent sign. - */ - int8_t s, sE; - - /** - * Exponent - */ - int16_t e; - - /** - * Base value, nominator, denominator - */ - int64_t a, n, d; - - /** - * Constructor of the number class. - */ - Number() : s(1), sE(1), e(0), a(0), n(0), d(1) {} - - /** - * Returns the represented double value. - */ - double doubleValue() - { - return s * (a + ((double)n / (double)d)) * pow(10.0, (double)(sE * e)); - } - - /** - * Returns the represented integer value. Only a lossless operation, if the - * number is an integer (as can be checked via the isInt method), otherwise - * the exponent and the fractional value will be truncated. - */ - int64_t intValue() { return s * a; } - - /** - * Returns true, if the number is an integer (has no fractional or - * exponential part). - */ - bool isInt() { return (n == 0) && (d == 1) && (e == 0); } - - /** - * Tries to parse the number from the given stream and loggs any errors to - * the given logger instance. Numbers are terminated by one of the given - * delimiters. - */ - bool parse(CharReader &reader, Logger &logger, - const std::unordered_set &delims) - { - State state = State::INIT; - char c; - - // Consume the first whitespace characters - reader.consumeWhitespace(); - - // Iterate over the FSM to extract numbers - while (reader.peek(c)) { - // Abort, once a delimiter or whitespace is reached - if (Utils::isWhitespace(c) || delims.count(c)) { - reader.resetPeek(); - break; - } - - // The character is not a whitespace character and not a delimiter - switch (state) { - case State::INIT: - case State::HAS_MINUS: - switch (c) { - case '-': - // Do not allow multiple minus signs - if (state == State::HAS_MINUS) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::HAS_MINUS; - s = -1; - break; - case '0': - // Remember a leading zero for the detection of "0x" - state = State::LEADING_ZERO; - break; - case '.': - // Remember a leading point as ".eXXX" is invalid - state = State::LEADING_POINT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::LEADING_ZERO: - if (c == 'x' || c == 'X') { - state = State::HEX; - break; - } - // fallthrough - case State::INT: - switch (c) { - case '.': - state = State::POINT; - break; - case 'e': - case 'E': - state = State::EXP_INIT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::HEX: - if (!appendChar(c, 16, Part::A, reader, logger)) { - return false; - } - break; - case State::LEADING_POINT: - case State::POINT: - switch (c) { - case 'e': - case 'E': - if (state == State::LEADING_POINT) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::EXP_INIT; - break; - default: - state = State::POINT; - if (!appendChar(c, 10, Part::N, reader, logger)) { - return false; - } - break; - } - break; - case State::EXP_HAS_MINUS: - case State::EXP_INIT: - if (c == '-') { - if (state == State::EXP_HAS_MINUS) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::EXP_HAS_MINUS; - sE = -1; - } else { - state = State::EXP; - if (!appendChar(c, 10, Part::E, reader, logger)) { - return false; - } - } - break; - case State::EXP: - if (!appendChar(c, 10, Part::E, reader, logger)) { - return false; - } - break; - } - reader.consumePeek(); - } - - // States in which ending is valid. Log an error in other states - if (state == State::LEADING_ZERO || state == State::HEX || - state == State::INT || state == State::POINT || - state == State::EXP) { - return true; - } - logger.errorAt(ERR_UNEXPECTED_END, reader); - return false; - } -}; - -/* Class Reader */ - -static const int STATE_INIT = 0; -static const int STATE_IN_STRING = 1; -static const int STATE_IN_ARRAY = 2; -static const int STATE_EXPECT_COMMA = 3; -static const int STATE_ESCAPE = 4; -static const int STATE_WHITESPACE = 5; -static const int STATE_RESYNC = 6; - -template -static std::pair error(CharReader &reader, Logger &logger, - const char *err, T res) -{ - logger.errorAt(err, reader); - return std::make_pair(false, std::move(res)); -} - -std::pair Reader::parseString( - CharReader &reader, Logger &logger, - const std::unordered_set *delims) -{ - // Initialize the internal state - int state = STATE_INIT; - char quote = 0; - std::stringstream res; - - // Consume all whitespace - reader.consumeWhitespace(); - - // Statemachine whic iterates over each character in the stream - // TODO: Combination of peeking and consumePeek is stupid as consumePeek is - // the default (read and putBack would obviously be better, yet the latter - // is not trivial to implement in the current CharReader). - char c; - while (reader.peek(c)) { - switch (state) { - case STATE_INIT: - if (c == '"' || c == '\'') { - quote = c; - state = STATE_IN_STRING; - break; - } else if (delims && delims->count(c)) { - return error(reader, logger, ERR_UNEXPECTED_END, res.str()); - } - return error(reader, logger, ERR_UNEXPECTED_CHAR, res.str()); - case STATE_IN_STRING: - if (c == quote) { - reader.consumePeek(); - return std::make_pair(true, res.str()); - } else if (c == '\\') { - state = STATE_ESCAPE; - reader.consumePeek(); - break; - } else if (c == '\n') { - return error(reader, logger, ERR_UNTERMINATED, res.str()); - } - res << c; - reader.consumePeek(); - break; - case STATE_ESCAPE: - // Handle all possible special escape characters - switch (c) { - case 'b': - res << '\b'; - break; - case 'f': - res << '\f'; - break; - case 'n': - res << '\n'; - break; - case 'r': - res << '\r'; - break; - case 't': - res << '\t'; - break; - case 'v': - res << '\v'; - break; - case '\'': - res << '\''; - break; - case '"': - res << '"'; - break; - case '\\': - res << '\\'; - break; - case '\n': - break; - case 'x': - // TODO: Parse Latin-1 sequence hex XX - break; - case 'u': - // TODO: Parse 16-Bit unicode character hex XXXX - break; - default: - if (Utils::isNumeric(c)) { - // TODO: Parse octal 000 sequence - } else { - logger.errorAt(ERR_INVALID_ESCAPE, reader); - } - break; - } - - // Switch back to the "normal" state - state = STATE_IN_STRING; - reader.consumePeek(); - break; - } - } - return error(reader, logger, ERR_UNEXPECTED_END, res.str()); -} - -std::pair Reader::parseArray( - CharReader &reader, Logger &logger, char delim) -{ - Variant::arrayType res; - bool hadError = false; - int state = delim ? STATE_IN_ARRAY : STATE_INIT; - delim = delim ? delim : ']'; - char c; - - // Consume all whitespace - reader.consumeWhitespace(); - - // Iterate over the characters, use the parseGeneric function to read the - // pairs - while (reader.peek(c)) { - // Generically handle the end of the array - if (state != STATE_INIT && c == delim) { - reader.consumePeek(); - return std::make_pair(!hadError, res); - } - - switch (state) { - case STATE_INIT: - if (c != '[') { - return error(reader, logger, ERR_UNEXPECTED_CHAR, res); - } - state = STATE_IN_ARRAY; - reader.consumePeek(); - break; - case STATE_IN_ARRAY: { - // Try to read an element using the parseGeneric function - reader.resetPeek(); - auto elem = parseGeneric(reader, logger, {',', delim}); - res.push_back(elem.second); - - // If the reader had no error, expect an comma, otherwise skip - // to the next comma in the stream - if (elem.first) { - state = STATE_EXPECT_COMMA; - } else { - state = STATE_RESYNC; - hadError = true; - } - break; - } - case STATE_EXPECT_COMMA: - // Skip whitespace - if (c == ',') { - state = STATE_IN_ARRAY; - } else if (!Utils::isWhitespace(c)) { - hadError = true; - state = STATE_RESYNC; - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - } - reader.consumePeek(); - break; - case STATE_RESYNC: - // Just wait for another comma to arrive - if (c == ',') { - state = STATE_IN_ARRAY; - } - reader.consumePeek(); - break; - } - } - return error(reader, logger, ERR_UNEXPECTED_END, res); -} - -std::pair Reader::parseUnescapedString( - CharReader &reader, Logger &logger, - const std::unordered_set &delims) -{ - std::stringstream res; - std::stringstream buf; - char c; - - // Consume all whitespace - reader.consumeWhitespace(); - - // Copy all characters, skip whitespace at the end - int state = STATE_IN_STRING; - while (reader.peek(c)) { - if (delims.count(c)) { - reader.resetPeek(); - return std::make_pair(true, res.str()); - } else if (Utils::isWhitespace(c)) { - // Do not add whitespace to the output buffer - state = STATE_WHITESPACE; - buf << c; - } else { - // If we just hat a sequence of whitespace, append it to the output - // buffer and continue - if (state == STATE_WHITESPACE) { - res << buf.str(); - buf.str(std::string{}); - buf.clear(); - state = STATE_IN_STRING; - } - res << c; - } - reader.consumePeek(); - } - return std::make_pair(true, res.str()); -} - -std::pair Reader::parseInteger( - CharReader &reader, Logger &logger, - const std::unordered_set &delims) -{ - Number n; - if (n.parse(reader, logger, delims)) { - // Only succeed if the parsed number is an integer, otherwise this is an - // error - if (n.isInt()) { - return std::make_pair(true, n.intValue()); - } else { - return error(reader, logger, ERR_INVALID_INTEGER, n.intValue()); - } - } - return std::make_pair(false, n.intValue()); -} - -std::pair Reader::parseDouble( - CharReader &reader, Logger &logger, - const std::unordered_set &delims) -{ - Number n; - bool res = n.parse(reader, logger, delims); - return std::make_pair(res, n.doubleValue()); -} - -std::pair Reader::parseGeneric( - CharReader &reader, Logger &logger, - const std::unordered_set &delims) -{ - char c; - - // Skip all whitespace characters - reader.consumeWhitespace(); - while (reader.peek(c)) { - // Stop if a delimiter is reached - if (delims.count(c)) { - return error(reader, logger, ERR_UNEXPECTED_END, nullptr); - } - - // Parse a string if a quote is reached - if (c == '"' || c == '\'') { - auto res = parseString(reader, logger); - return std::make_pair(res.first, res.second.c_str()); - } - - if (c == '[') { - // TODO: Parse struct descriptor - } - - // Try to parse everything that looks like a number as number - if (Utils::isNumeric(c) || c == '-') { - Number n; - - // Fork the reader - utils::CharReaderFork fork = reader.fork(); - - // TODO: Fork logger - - // Try to parse the number - if (n.parse(fork, logger, delims)) { - // Parsing was successful, advance the reader - fork.commit(); - if (n.isInt()) { - return std::make_pair( - true, - Variant{static_cast(n.intValue())}); - } else { - return std::make_pair(true, n.doubleValue()); - } - } - } - - // Parse an unescaped string in any other case - auto res = parseUnescapedString(reader, logger, delims); - - // Handling for special primitive values - if (res.first) { - if (res.second == "true") { - return std::make_pair(true, Variant{true}); - } - if (res.second == "false") { - return std::make_pair(true, Variant{false}); - } - if (res.second == "null") { - return std::make_pair(true, Variant{nullptr}); - } - } - return std::make_pair(res.first, res.second.c_str()); - } - return error(reader, logger, ERR_UNEXPECTED_END, nullptr); -} -} -} - diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp deleted file mode 100644 index 4114d46..0000000 --- a/src/core/variant/Reader.hpp +++ /dev/null @@ -1,169 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file Reader.hpp - * - * Provides parsers for various micro formats. These formats include integers, - * doubles, strings, JSON and the Ousía struct notation. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_VARIANT_READER_HPP_ -#define _OUSIA_VARIANT_READER_HPP_ - -#include -#include -#include - -#include -#include - -#include "Variant.hpp" - -namespace ousia { -namespace variant { - -class Reader { -private: - /** - * Parses a string which may either be enclosed by " or ', unescapes - * entities in the string as specified for JavaScript. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting - * character. - * @param logger is the logger instance that should be used to log error - * messages and warnings. - * @param delims is an optional set of delimiters after which parsing has to - * be stopped (the delimiters may occur inside the actual string, but not - * outside). If nullptr is given, no delimiter is used and a complete string - * is read. - */ - static std::pair parseString( - CharReader &reader, Logger &logger, - const std::unordered_set *delims); - -public: - /** - * Parses a string which may either be enclosed by " or ', unescapes - * entities in the string as specified for JavaScript. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting - * character. - * @param logger is the logger instance that should be used to log error - * messages and warnings. - * @param delims is a set of delimiters after which parsing has to - * be stopped (the delimiters may occur inside the actual string, but not - * outside). - */ - static std::pair parseString( - CharReader &reader, Logger &logger, - const std::unordered_set &delims) - { - return parseString(reader, logger, &delims); - } - - /** - * Parses a string which may either be enclosed by " or ', unescapes - * entities in the string as specified for JavaScript. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting - * character. - * @param logger is the logger instance that should be used to log error - * messages and warnings. - */ - static std::pair parseString(CharReader &reader, - Logger &logger) - { - return parseString(reader, logger, nullptr); - } - - /** - * Extracts an unescaped string from the given buffered char reader - * instance. This function just reads text until one of the given delimiter - * characters is reached. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned at the - * terminating delimiting character. - * @param delims is a set of characters which will terminate the string. - * These characters are not included in the result. May not be nullptr. - */ - static std::pair parseUnescapedString( - CharReader &reader, Logger &logger, - const std::unordered_set &delims); - - /** - * Parses an integer from the given buffered char reader instance until one - * of the given delimiter characters is reached. - * - * @param reader is a reference to the CharReader instance from - * which the character data should been reader. The reader will be - * positioned at the terminating delimiting character or directly after the - * integer. - */ - static std::pair parseInteger( - CharReader &reader, Logger &logger, - const std::unordered_set &delims); - - /** - * Parses an double from the given buffered char reader instance until one - * of the given delimiter characters is reached. - * - * @param reader is a reference to the CharReader instance from - * which the character data should been reader. The reader will be - * positioned at the terminating delimiting character or directly after the - * integer. - */ - static std::pair parseDouble( - CharReader &reader, Logger &logger, - const std::unordered_set &delims); - - /** - * Parses an array of values. - */ - static std::pair parseArray( - CharReader &reader, Logger &logger, char delim = 0); - - /** - * Tries to parse the most specific item from the given stream until one of - * the given delimiters is reached or a meaningful literal has been read. - * The resulting variant represents the value that has been read. - * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned at the - * terminating delimiting character. - * @param delims is a set of characters which will terminate the string. - * These characters are not included in the result. May not be nullptr. - */ - static std::pair parseGeneric( - CharReader &reader, Logger &logger, - const std::unordered_set &delims); -}; -} -} - -#endif /* _OUSIA_VARIANT_READER_HPP_ */ - diff --git a/src/core/variant/Variant.cpp b/src/core/variant/Variant.cpp deleted file mode 100644 index d33cd4f..0000000 --- a/src/core/variant/Variant.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include - -#include "Variant.hpp" - -namespace ousia { - -/* Class Variant::TypeException */ - -Variant::TypeException::TypeException(Type actualType, Type requestedType) - : OusiaException(std::string("Variant: Requested \"") + - Variant::getTypeName(requestedType) + - std::string("\" but is \"") + - Variant::getTypeName(actualType) + std::string("\"")), - actualType(actualType), - requestedType(requestedType) -{ -} - -/* Class Variant */ - -const char *Variant::getTypeName(Type type) -{ - switch (type) { - case Type::NULLPTR: - return "null"; - case Type::BOOL: - return "boolean"; - case Type::INT: - return "integer"; - case Type::DOUBLE: - return "double"; - case Type::STRING: - return "string"; - case Type::ARRAY: - return "array"; - case Type::MAP: - return "map"; - } - return "unknown"; -} - -Variant::boolType Variant::toBool() const -{ - switch (getType()) { - case Type::NULLPTR: - return false; - case Type::BOOL: - return asBool(); - case Type::INT: - return asInt() != 0; - case Type::DOUBLE: - return asDouble() != 0.0; - case Type::STRING: - return true; - case Type::ARRAY: - return true; - case Type::MAP: - return true; - } - return false; -} - -Variant::intType Variant::toInt() const -{ - switch (getType()) { - case Type::NULLPTR: - return 0; - case Type::BOOL: - return asBool() ? 1 : 0; - case Type::INT: - return asInt(); - case Type::DOUBLE: - return asDouble(); - case Type::STRING: - return 0; // TODO: Parse string as int - case Type::ARRAY: { - const arrayType &a = asArray(); - return (a.size() == 1) ? a[0].toInt() : 0; - } - case Type::MAP: - return 0; - } - return false; -} - -Variant::doubleType Variant::toDouble() const -{ - switch (getType()) { - case Type::NULLPTR: - return 0.0; - case Type::BOOL: - return asBool() ? 1.0 : 0.0; - case Type::INT: - return asInt(); - case Type::DOUBLE: - return asDouble(); - case Type::STRING: - return 0.0; // TODO: Parse string as double - case Type::ARRAY: { - const arrayType &a = asArray(); - return (a.size() == 1) ? a[0].toDouble() : 0; - } - case Type::MAP: - return 0; - } - return false; -} - -Variant::stringType Variant::toString(bool escape) const -{ - switch (getType()) { - case Type::NULLPTR: - return "null"; - case Type::BOOL: - return asBool() ? "true" : "false"; - case Type::INT: - return std::to_string(asInt()); - case Type::DOUBLE: - return std::to_string(asDouble()); - case Type::STRING: { - // TODO: Use proper serialization function - std::stringstream ss; - ss << "\"" << asString() << "\""; - return ss.str(); - } - case Type::ARRAY: - return Utils::join(asArray(), ", ", "[", "]"); - case Type::MAP: - return Utils::join(asMap(), ", ", "{", "}"); - } - return ""; -} - -} - diff --git a/src/core/variant/Variant.hpp b/src/core/variant/Variant.hpp deleted file mode 100644 index 1e62644..0000000 --- a/src/core/variant/Variant.hpp +++ /dev/null @@ -1,766 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/** - * @file Variant.hpp - * - * The Variant class is used to efficiently represent a variables of varying - * type. Variant instances are used to represent data given by the end user and - * to exchange information between the host application and the script clients. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_VARIANT_HPP_ -#define _OUSIA_VARIANT_HPP_ - -#include -#include -#include -#include -#include - -// TODO: Use -// http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html -// later (will allow to use 8 bytes for a variant) - -#include - -namespace ousia { -namespace variant { - -/** - * Instances of the Variant class represent any kind of data that is exchanged - * between the host application and the script engine. Variants are immutable. - */ -class Variant { -public: - /** - * Enum containing the possible types a variant may have. - */ - enum class Type : int16_t { - NULLPTR, - BOOL, - INT, - DOUBLE, - STRING, - ARRAY, - MAP - }; - - /** - * Exception thrown whenever a variant is accessed via a getter function - * that is not supported for the current variant type. - */ - class TypeException : public OusiaException { - private: - /** - * Internally used string holding the exception message. - */ - const std::string msg; - - public: - /** - * Contains the actual type of the variant. - */ - const Type actualType; - - /** - * Contains the requested type of the variant. - */ - const Type requestedType; - - /** - * Constructor of the TypeException. - * - * @param actualType describes the actual type of the variant. - * @param requestedType describes the type in which the variant was - * requested. - */ - TypeException(Type actualType, Type requestedType); - }; - - using boolType = bool; - using intType = int32_t; - using doubleType = double; - using stringType = std::string; - using arrayType = std::vector; - using mapType = std::map; - -private: - /** - * Used to store the actual type of the variant. - */ - Type type = Type::NULLPTR; - - /** - * Anonymous union containing the possible value of the variant. - */ - union { - /** - * The boolean value. Only valid if type is Type::BOOL. - */ - boolType boolVal; - /** - * The integer value. Only valid if type is Type::INT. - */ - intType intVal; - /** - * The number value. Only valid if type is Type::DOUBLE. - */ - doubleType doubleVal; - /** - * Pointer to the more complex data structures on the free store. Only - * valid if type is one of Type::STRING, Type::ARRAY, - * Type::MAP. - */ - void *ptrVal; - }; - - /** - * Internally used to convert the current pointer value to a reference of - * the specified type. - */ - template - T &asObj(Type requestedType) const - { - const Type actualType = getType(); - if (actualType == requestedType) { - return *(static_cast(ptrVal)); - } - throw TypeException{actualType, requestedType}; - } - - /** - * Used internally to assign the value of another Variant instance to this - * instance. - * - * @param v is the Variant instance that should be copied to this instance. - */ - void copy(const Variant &v) - { - destroy(); - type = v.type; - switch (type) { - case Type::NULLPTR: - break; - case Type::BOOL: - boolVal = v.boolVal; - break; - case Type::INT: - intVal = v.intVal; - break; - case Type::DOUBLE: - doubleVal = v.doubleVal; - break; - case Type::STRING: - ptrVal = new stringType(v.asString()); - break; - case Type::ARRAY: - ptrVal = new arrayType(v.asArray()); - break; - case Type::MAP: - ptrVal = new mapType(v.asMap()); - break; - } - } - - /** - * Used internally to move the value of another Variant instance to this - * instance. - * - * @param v is the Variant instance that should be copied to this instance. - */ - void move(Variant &&v) - { - destroy(); - type = v.type; - switch (type) { - case Type::NULLPTR: - break; - case Type::BOOL: - boolVal = v.boolVal; - break; - case Type::INT: - intVal = v.intVal; - break; - case Type::DOUBLE: - doubleVal = v.doubleVal; - break; - case Type::STRING: - case Type::ARRAY: - case Type::MAP: - ptrVal = v.ptrVal; - v.ptrVal = nullptr; - break; - } - v.type = Type::NULLPTR; - } - - /** - * Used internally to destroy any value that was allocated on the heap. - */ - void destroy() - { - if (ptrVal) { - switch (type) { - case Type::STRING: - delete static_cast(ptrVal); - break; - case Type::ARRAY: - delete static_cast(ptrVal); - break; - case Type::MAP: - delete static_cast(ptrVal); - break; - default: - break; - } - } - } - -public: - /** - * Copy constructor of the Variant class. - * - * @param v is the Variant instance that should be cloned. - */ - Variant(const Variant &v) : ptrVal(nullptr) { copy(v); } - - /** - * Move constructor of the Variant class. - * - * @param v is the reference to the Variant instance that should be moved, - * this instance is invalidated afterwards. - */ - Variant(Variant &&v) : ptrVal(nullptr) { move(std::move(v)); } - - /** - * Default constructor. Type is set to Type:null. - */ - Variant() : ptrVal(nullptr) { setNull(); } - - /** - * Default destructor, frees any memory that was allocated on the heap. - */ - ~Variant() { destroy(); } - - /** - * Constructor for null values. Initializes the variant as null value. - */ - Variant(std::nullptr_t) : ptrVal(nullptr) { setNull(); } - - /** - * Constructor for boolean values. - * - * @param b boolean value. - */ - Variant(boolType b) : ptrVal(nullptr) { setBool(b); } - - /** - * Constructor for integer values. - * - * @param i integer value. - */ - Variant(intType i) : ptrVal(nullptr) { setInt(i); } - - /** - * Constructor for double values. - * - * @param d double value. - */ - Variant(doubleType d) : ptrVal(nullptr) { setDouble(d); } - - /** - * Constructor for string values. The given string is copied and managed by - * the new Variant instance. - * - * @param s is a reference to a C-Style string used as string value. - */ - Variant(const char *s) : ptrVal(nullptr) { setString(s); } - - /** - * Constructor for array values. The given array is copied and managed by - * the new Variant instance. - * - * @param a is a reference to the array - */ - Variant(arrayType a) : ptrVal(nullptr) { setArray(std::move(a)); } - - /** - * Constructor for map values. The given map is copied and managed by the - * new Variant instance. - * - * @param m is a reference to the map. - */ - Variant(mapType m) : ptrVal(nullptr) { setMap(std::move(m)); } - - /** - * Copy assignment operator. - */ - Variant &operator=(const Variant &v) - { - copy(v); - return *this; - } - - /** - * Move assignment operator. - */ - Variant &operator=(Variant &&v) - { - move(std::move(v)); - return *this; - } - - /** - * Assign nullptr_t operator (allows to write Variant v = nullptr). - * - * @param p is an instance of std::nullptr_t. - */ - Variant &operator=(std::nullptr_t) - { - setNull(); - return *this; - } - - /** - * Assign a boolean value. - * - * @param b is the boolean value to which the variant should be set. - */ - Variant &operator=(boolType b) - { - setBool(b); - return *this; - } - - /** - * Assign an integer value. - * - * @param i is the integer value to which the variant should be set. - */ - Variant &operator=(intType i) - { - setInt(i); - return *this; - } - - /** - * Assign a double value. - * - * @param d is the double value to which the variant should be set. - */ - Variant &operator=(doubleType d) - { - setDouble(d); - return *this; - } - - /** - * Assign a zero terminated const char array. - * - * @param s is the zero terminated const char array to which the variant - * should be set. - */ - Variant &operator=(const char *s) - { - setString(s); - return *this; - } - - /** - * Checks whether this Variant instance represents the nullptr. - * - * @return true if the Variant instance represents the nullptr, false - * otherwise. - */ - bool isNull() const { return type == Type::NULLPTR; } - - /** - * Checks whether this Variant instance is a boolean. - * - * @return true if the Variant instance is a boolean, false otherwise. - */ - bool isBool() const { return type == Type::BOOL; } - - /** - * Checks whether this Variant instance is an integer. - * - * @return true if the Variant instance is an integer, false otherwise. - */ - bool isInt() const { return type == Type::INT; } - - /** - * Checks whether this Variant instance is a double. - * - * @return true if the Variant instance is a double, false otherwise. - */ - bool isDouble() const { return type == Type::DOUBLE; } - - /** - * Checks whether this Variant instance is a string. - * - * @return true if the Variant instance is a string, false otherwise. - */ - bool isString() const { return type == Type::STRING; } - - /** - * Checks whether this Variant instance is an array. - * - * @return true if the Variant instance is an array, false otherwise. - */ - bool isArray() const { return type == Type::ARRAY; } - - /** - * Checks whether this Variant instance is a map. - * - * @return true if the Variant instance is a map, false otherwise. - */ - bool isMap() const { return type == Type::MAP; } - - /** - * Returns the Variant boolean value. Performs no type conversion. Throws an - * exception if the underlying type is not a boolean. - * - * @return the boolean value. - */ - boolType asBool() const - { - if (isBool()) { - return boolVal; - } - throw TypeException{getType(), Type::BOOL}; - } - - /** - * Returns the Variant integer value. Performs no type conversion. Throws an - * exception if the underlying type is not an integer. - * - * @return the integer value. - */ - intType asInt() const - { - if (isInt()) { - return intVal; - } - throw TypeException{getType(), Type::INT}; - } - - /** - * Returns the Variant double value. Performs no type conversion. Throws an - * exception if the underlying type is not a double. - * - * @return the double value. - */ - doubleType asDouble() const - { - if (isDouble()) { - return doubleVal; - } - throw TypeException{getType(), Type::DOUBLE}; - } - - /** - * Returns a const reference to the string value. Performs no type - * conversion. Throws an exception if the underlying type is not a string. - * - * @return the string value as const reference. - */ - const stringType &asString() const - { - return asObj(Type::STRING); - } - - /** - * Returns a const reference to the string value. Performs no type - * conversion. Throws an exception if the underlying type is not a string. - * - * @return the string value as reference. - */ - stringType &asString() { return asObj(Type::STRING); } - - /** - * Returns a const reference to the array value. Performs no type - * conversion. Throws an exception if the underlying type is not an array. - * - * @return the array value as const reference. - */ - const arrayType &asArray() const { return asObj(Type::ARRAY); } - - /** - * Returns a const reference to the array value. Performs no type - * conversion. Throws an exception if the underlying type is not an array. - * - * @return the array value as reference. - */ - arrayType &asArray() { return asObj(Type::ARRAY); } - - /** - * Returns a const reference to the map value. Performs no type - * conversion. Throws an exception if the underlying type is not a map. - * - * @return the map value as const reference. - */ - const mapType &asMap() const { return asObj(Type::MAP); } - - /** - * Returns a reference to the map value. Performs no type conversion. - * Throws an exception if the underlying type is not a map. - * - * @return the map value as reference. - */ - mapType &asMap() { return asObj(Type::MAP); } - - /** - * Returns the value of the Variant as boolean, performs type conversion. - * - * @return the Variant value converted to a boolean value. - */ - boolType toBool() const; - - /** - * Returns the value of the Variant as integer, performs type conversion. - * - * @return the Variant value converted to an integer value. - */ - intType toInt() const; - - /** - * Returns the value of the Variant as double, performs type conversion. - * - * @return the Variant value converted to a double value. - */ - doubleType toDouble() const; - - /** - * Returns the value of the Variant as string, performs type conversion. - * - * @return the value of the variant as string. - * @param escape if set to true, adds double quotes to strings and escapes - * them properly (resulting in a more or less JSONesque output). - */ - stringType toString(bool escape = false) const; - - /** - * Sets the variant to null. - */ - void setNull() - { - destroy(); - type = Type::NULLPTR; - ptrVal = nullptr; - } - - /** - * Sets the variant to the given boolean value. - * - * @param b is the new boolean value. - */ - void setBool(boolType b) - { - destroy(); - type = Type::BOOL; - boolVal = b; - } - - /** - * Sets the variant to the given integer value. - * - * @param i is the new integer value. - */ - void setInt(intType i) - { - destroy(); - type = Type::INT; - intVal = i; - } - - /** - * Sets the variant to the given double value. - * - * @param d is the new double value. - */ - void setDouble(doubleType d) - { - destroy(); - type = Type::DOUBLE; - doubleVal = d; - } - - /** - * Sets the variant to the given string value. - * - * @param d is the new string value. - */ - void setString(const char *s) - { - if (isString()) { - asString().assign(s); - } else { - destroy(); - type = Type::STRING; - ptrVal = new stringType(s); - } - } - - /** - * Sets the variant to the given array value. - * - * @param a is the new array value. - */ - void setArray(arrayType a) - { - if (isArray()) { - asArray().swap(a); - } else { - destroy(); - type = Type::ARRAY; - ptrVal = new arrayType(std::move(a)); - } - } - - /** - * Sets the variant to the given map value. - * - * @param a is the new map value. - */ - void setMap(mapType m) - { - if (isMap()) { - asMap().swap(m); - } else { - destroy(); - type = Type::MAP; - ptrVal = new mapType(std::move(m)); - } - } - - /** - * Returns the current type of the Variant. - * - * @return the current type of the Variant. - */ - Type getType() const { return type; } - - /** - * Returns the name of the given variant type as C-style string. - */ - static const char *getTypeName(Type type); - - /** - * Returns the name of the type of this variant instance. - */ - const char *getTypeName() { return Variant::getTypeName(getType()); } - - /** - * Prints the Variant to the output stream. - */ - friend std::ostream &operator<<(std::ostream &os, const Variant &v) - { - return os << v.toString(true); - } - - /** - * Prints a key value pair to the output stream. - */ - friend std::ostream &operator<<(std::ostream &os, - const mapType::value_type &v) - { - // TODO: Use proper serialization function - return os << "\"" << v.first << "\": " << v.second.toString(true); - } - - /* - * Comprison operators. - */ - - friend bool operator<(const Variant &lhs, const Variant &rhs) - { - // If the types do not match, we can not do a meaningful comparison. - if (lhs.getType() != rhs.getType()) { - throw TypeException(lhs.getType(), rhs.getType()); - } - switch (lhs.getType()) { - case Type::NULLPTR: - return false; - case Type::BOOL: - return lhs.boolVal < rhs.boolVal; - case Type::INT: - return lhs.intVal < rhs.intVal; - case Type::DOUBLE: - return lhs.doubleVal < rhs.doubleVal; - case Type::STRING: - return lhs.asString() < rhs.asString(); - case Type::ARRAY: - return lhs.asArray() < rhs.asArray(); - case Type::MAP: - return lhs.asMap() < rhs.asMap(); - } - throw OusiaException("Internal Error! Unknown type!"); - } - friend bool operator>(const Variant &lhs, const Variant &rhs) - { - return rhs < lhs; - } - friend bool operator<=(const Variant &lhs, const Variant &rhs) - { - return !(lhs > rhs); - } - friend bool operator>=(const Variant &lhs, const Variant &rhs) - { - return !(lhs < rhs); - } - - friend bool operator==(const Variant &lhs, const Variant &rhs) - { - if (lhs.getType() != rhs.getType()) { - return false; - } - switch (lhs.getType()) { - case Type::NULLPTR: - return true; - case Type::BOOL: - return lhs.boolVal == rhs.boolVal; - case Type::INT: - return lhs.intVal == rhs.intVal; - case Type::DOUBLE: - return lhs.doubleVal == rhs.doubleVal; - case Type::STRING: - return lhs.asString() == rhs.asString(); - case Type::ARRAY: - return lhs.asArray() == rhs.asArray(); - case Type::MAP: - return lhs.asMap() == rhs.asMap(); - } - throw OusiaException("Internal Error! Unknown type!"); - } - - friend bool operator!=(const Variant &lhs, const Variant &rhs) - { - return !(lhs == rhs); - } -}; -} - -// Alias for the (very often used and unambigous) variant class -using Variant = variant::Variant; -} - -#endif /* _OUSIA_VARIANT_HPP_ */ - diff --git a/src/plugins/css/CSSParser.cpp b/src/plugins/css/CSSParser.cpp index 4cbe93f..5985047 100644 --- a/src/plugins/css/CSSParser.cpp +++ b/src/plugins/css/CSSParser.cpp @@ -18,7 +18,7 @@ #include "CSSParser.hpp" -#include +#include namespace ousia { namespace parser { @@ -77,7 +77,7 @@ static const std::map CSS_DESCRIPTORS = { Rooted CSSParser::parse(std::istream &is, ParserContext &ctx) { - BufferedCharReader input{is}; + CharReader input{is}; CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS}; tokenizer.ignoreComments = true; tokenizer.ignoreLinebreaks = true; @@ -228,14 +228,14 @@ Rooted CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer, Variant::arrayType args; // we require at least one argument, if parantheses are used // XXX - /*args.push_back(variant::Reader::parseGeneric(tokenizer.getInput(), + args.push_back(VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, - {',', ')'}).second);*/ + {',', ')'}).second); while (expect(COMMA, tokenizer, t, false, ctx)) { // as long as we find commas we expect new arguments. - /*args.push_back( - variant::Reader::parseGeneric( - tokenizer.getInput(), ctx.logger, {',', ')'}).second);*/ + args.push_back( + VariantReader::parseGeneric( + tokenizer.getInput(), ctx.logger, {',', ')'}).second); } expect(PAREN_CLOSE, tokenizer, t, true, ctx); // and we return with the finished Selector. @@ -334,8 +334,8 @@ bool CSSParser::parseRule(CodeTokenizer &tokenizer, ParserContext &ctx, expect(COLON, tokenizer, t, true, ctx); // then the value // TODO: Resolve key for appropriate parsing function here. - /*value = variant::Reader::parseGeneric(tokenizer.getInput(), ctx.logger, - {';'}).second;*/ + value = VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, + {';'}).second; // and a ; expect(SEMICOLON, tokenizer, t, true, ctx); return true; diff --git a/src/plugins/css/CSSParser.hpp b/src/plugins/css/CSSParser.hpp index 82f0cd1..eeb5b2c 100644 --- a/src/plugins/css/CSSParser.hpp +++ b/src/plugins/css/CSSParser.hpp @@ -22,9 +22,9 @@ #include #include -#include #include #include +#include #include namespace ousia { diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp index ce2857e..9a7b4d8 100644 --- a/src/plugins/xml/XmlParser.cpp +++ b/src/plugins/xml/XmlParser.cpp @@ -20,7 +20,7 @@ #include -#include +#include #include #include "XmlParser.hpp" -- cgit v1.2.3