diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/BufferedCharReader.cpp | 263 | ||||
-rw-r--r-- | src/core/BufferedCharReader.hpp | 288 | ||||
-rw-r--r-- | src/core/CSS.hpp | 2 | ||||
-rw-r--r-- | src/core/CodeTokenizer.hpp | 8 | ||||
-rw-r--r-- | src/core/Logger.hpp | 609 | ||||
-rw-r--r-- | src/core/Registry.cpp | 3 | ||||
-rw-r--r-- | src/core/Tokenizer.cpp | 12 | ||||
-rw-r--r-- | src/core/Tokenizer.hpp | 27 | ||||
-rw-r--r-- | src/core/common/CharReader.cpp (renamed from src/core/utils/CharReader.cpp) | 20 | ||||
-rw-r--r-- | src/core/common/CharReader.hpp (renamed from src/core/utils/CharReader.hpp) | 95 | ||||
-rw-r--r-- | src/core/common/Exceptions.cpp (renamed from src/core/Exceptions.cpp) | 18 | ||||
-rw-r--r-- | src/core/common/Exceptions.hpp (renamed from src/core/Exceptions.hpp) | 77 | ||||
-rw-r--r-- | src/core/common/Logger.cpp (renamed from src/core/Logger.cpp) | 122 | ||||
-rw-r--r-- | src/core/common/Logger.hpp | 602 | ||||
-rw-r--r-- | src/core/common/TextCursor.hpp | 168 | ||||
-rw-r--r-- | src/core/common/Utils.cpp (renamed from src/core/Utils.cpp) | 0 | ||||
-rw-r--r-- | src/core/common/Utils.hpp (renamed from src/core/Utils.hpp) | 0 | ||||
-rw-r--r-- | src/core/common/Variant.cpp (renamed from src/core/variant/Variant.cpp) | 3 | ||||
-rw-r--r-- | src/core/common/Variant.hpp (renamed from src/core/variant/Variant.hpp) | 7 | ||||
-rw-r--r-- | src/core/common/VariantReader.cpp (renamed from src/core/variant/Reader.cpp) | 303 | ||||
-rw-r--r-- | src/core/common/VariantReader.hpp (renamed from src/core/variant/Reader.hpp) | 87 | ||||
-rw-r--r-- | src/core/parser/Parser.hpp | 4 | ||||
-rw-r--r-- | src/core/parser/ParserStack.cpp | 4 | ||||
-rw-r--r-- | src/core/parser/ParserStack.hpp | 2 | ||||
-rw-r--r-- | src/plugins/css/CSSParser.cpp | 11 | ||||
-rw-r--r-- | src/plugins/css/CSSParser.hpp | 2 | ||||
-rw-r--r-- | src/plugins/xml/XmlParser.cpp | 12 |
27 files changed, 1199 insertions, 1550 deletions
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp deleted file mode 100644 index aeedf12..0000000 --- a/src/core/BufferedCharReader.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <array> - -#include "Utils.hpp" - -#include "BufferedCharReader.hpp" - -namespace ousia { - -// Constants used within the linebreak statemachine. -static const uint8_t LB_STATE_NONE = 0x00; -static const uint8_t LB_STATE_ONE = 0x01; -static const uint8_t LB_STATE_LF = 0x10; -static const uint8_t LB_STATE_CR = 0x20; -static const uint8_t LB_STATE_MASK_CNT = 0x0F; -static const uint8_t LB_STATE_MASK_TYPE = 0xF0; - -/* Struct BufferedCharReader::ReadCursor */ - -BufferedCharReader::ReadCursor::ReadCursor(unsigned int line, - unsigned int column, - bool destructive) - : line(line), - column(column), - bufferElem(0), - bufferPos(0), - destructive(destructive), - lbState(LB_STATE_NONE) -{ -} - -void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) -{ - this->line = cursor.line; - this->column = cursor.column; - this->bufferElem = cursor.bufferElem; - this->bufferPos = cursor.bufferPos; - this->lbState = cursor.lbState; -} - -/* Class BufferedCharReader */ - -BufferedCharReader::BufferedCharReader(int line, int column) - : inputStream(nullptr), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(false) -{ -} - -BufferedCharReader::BufferedCharReader(const std::string &str, int line, - int column) - : inputStream(nullptr), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(true) -{ - buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(const std::string &str) - : inputStream(nullptr), - readCursor(1, 1, true), - peekCursor(1, 1, false), - depleted(true) -{ - buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line, - int column) - : inputStream(&inputStream), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(false) -{ -} - -void BufferedCharReader::feed(const std::string &data) -{ - if (!depleted && !inputStream) { - buffer.push_back(data); - } -} - -void BufferedCharReader::close() -{ - if (!inputStream) { - depleted = true; - } -} - -bool BufferedCharReader::substituteLinebreaks(ReadCursor &cursor, char *c) -{ - // Handle line breaks, inserts breakes after the following character - // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n - if ((*c == '\n') || (*c == '\r')) { - // Determine the type of the current linebreak character - const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; - - // Read the last count and the last type from the state - const uint8_t lastCount = cursor.lbState & LB_STATE_MASK_CNT; - const uint8_t lastType = cursor.lbState & LB_STATE_MASK_TYPE; - - // Set the current linebreak type and counter in the state - cursor.lbState = ((lastCount + 1) & 1) | type; - - // If either this is the first instance of this character or the same - // return character is repeated - if (!lastCount || (lastType == type)) { - *c = '\n'; - return true; - } - return false; - } - - // Find the state - cursor.lbState = LB_STATE_NONE; - return true; -} - -bool BufferedCharReader::readCharacterAtCursor(ReadCursor &cursor, char *c) -{ - bool hasChar = false; - while (!hasChar) { - // Abort if the current buffer element does not point to a valid entry - // in the buffer -- we must try to feed another data block into the - // internal buffer - if (cursor.bufferElem >= buffer.size()) { - // Abort if there is no more data or no input stream is set - if (depleted || !inputStream) { - return false; - } - - // Read a buffer of the specified size - constexpr std::streamsize BUFFER_SIZE = 1024; - std::array<char, BUFFER_SIZE> buf; - const std::streamsize cnt = - (*inputStream).read(buf.data(), BUFFER_SIZE).gcount(); - - // If data has been read, append it to the input buffer and try - // again - if (cnt > 0) { - buffer.emplace_back(std::string(buf.data(), cnt)); - continue; - } - - // End of file handling - if (inputStream->fail() || inputStream->eof()) { - depleted = true; - return false; - } - } - - // Fetch the current element the peek pointer points to - const std::string &data = buffer[cursor.bufferElem]; - - // Handle the "no data" case -- either in a destructive or - // non-destructive manner. - if (cursor.bufferPos >= data.length()) { - if (cursor.destructive) { - buffer.pop_front(); - } else { - cursor.bufferElem++; - } - cursor.bufferPos = 0; - continue; - } - - // Read the character, advance the buffer position - *c = *(data.data() + cursor.bufferPos); - cursor.bufferPos++; - - // Substitute linebreaks with a single LF (0x0A) - hasChar = substituteLinebreaks(cursor, c); - } - - // Update the position counter - if (*c == '\n') { - cursor.line++; - cursor.column = 1; - } else { - // Ignore UTF-8 continuation bytes - if (!((*c & 0x80) && !(*c & 0x40))) { - cursor.column++; - } - } - - return true; -} - -bool BufferedCharReader::peek(char *c) -{ - return readCharacterAtCursor(peekCursor, c); -} - -bool BufferedCharReader::read(char *c) -{ - resetPeek(); - return readCharacterAtCursor(readCursor, c); -} - -void BufferedCharReader::consumePeek() -{ - // Remove all no longer needed buffer elements - for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { - buffer.pop_front(); - } - peekCursor.bufferElem = 0; - - // Copy the peek cursor to the read cursor - readCursor.assign(peekCursor); -} - -bool BufferedCharReader::consumeWhitespace() -{ - char c; - while (peek(&c)) { - if (!Utils::isWhitespace(c)) { - resetPeek(); - return true; - } - consumePeek(); - } - return false; -} - -void BufferedCharReader::resetPeek() -{ - // Reset the peek cursor to the read cursor - peekCursor.assign(readCursor); -} - -bool BufferedCharReader::atEnd() const -{ - if (depleted || !inputStream) { - if (buffer.size() <= 0) { - return true; - } else if (buffer.size() == 1) { - return buffer[0].size() == readCursor.bufferPos; - } - } - return false; -} -} - diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp deleted file mode 100644 index e7f3186..0000000 --- a/src/core/BufferedCharReader.hpp +++ /dev/null @@ -1,288 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file BufferedCharReader.hpp - * - * Contains the BufferedCharReader class which is used for reading/peeking - * single characters from an input stream or string. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_BUFFERED_CHAR_READER_H_ -#define _OUSIA_BUFFERED_CHAR_READER_H_ - -#include <deque> -#include <string> -#include <istream> -#include <cstdint> - -namespace ousia { - -// TODO: Better split this class into multiple classes with base class -// BufferedCharReader where each sub class represents one method of supplying -// the input data (feeding, initial string, input stream). - -/** - * The BufferedCharReader class is used for storing incomming data that - * is fed into the pipeline as well as reading/peeking single characters - * from that buffer. Additionally it counts the current column/row - * (with correct handling for UTF-8) and contains an internal state - * machine that handles the detection of linebreaks and converts these to a - * single '\n'. - */ -class BufferedCharReader { -private: - /** - * The ReadCursor structure is responsible for representing the read - * position within the text an all state machine states belonging to the - * cursor. There are two types of read cursors: destructive and - * non-destructive read cursors. - */ - struct ReadCursor { - /** - * The line the cursor currently points to. - */ - unsigned int line; - - /** - * The column the cursor currently points to. - */ - unsigned int column; - - /** - * The index of the element in the data buffer we're currently reading - * from. - */ - unsigned int bufferElem; - - /** - * The byte position within this data buffer. - */ - unsigned int bufferPos; - - /** - * Specifies whether this is a destructive cursor (bytes are discarded - * once they were read from the buffer). - */ - const bool destructive; - - /** - * State variable used in the internal state machine of the - * line feed detection. - */ - uint8_t lbState; - - /** - * Constructor of the ReadCursor structure. - * - * @param line is the start line. - * @param column is the start column. - * @param destructive specifies whether the ReadCursor is destructive - * (consumes all read characters, as used in the "read cursor") or - * non-destructive (as used in the "peek cursor"). - */ - ReadCursor(unsigned int line, unsigned int column, bool destructive); - - /** - * Copys the data from another ReadCursor without overriding the - * "destructive" flag. - * - * @param cursor is the cursor that should be copied. - */ - void assign(const ReadCursor &cursor); - }; - - /** - * Pointer at an (optional) input stream used for reading a chunk of data - * whenever the input buffer depletes. - */ - std::istream *inputStream; - - /** - * The read and the peek cursor. - */ - ReadCursor readCursor, peekCursor; - - /** - * Set to true if there is no more input data. - */ - bool depleted; - - /** - * Queue containing the data that has been fed into the char reader. - */ - std::deque<std::string> buffer; - - /** - * Substitute any combination of linebreaks in the incomming code with "\n". - * Returns true if the current character is meant as output, false - * otherwise. - */ - bool substituteLinebreaks(ReadCursor &cursor, char *c); - - /** - * Reads a character from the input buffer and advances the given read - * cursor. - * - * @param cursor is a reference to the read cursor that should be used - * for reading. - * @param hasChar is set to true, if a character is available, false if - * no character is available (e.g. because line breaks are substituted or - * the end of a buffer boundary is reached -- in this case this function - * should be called again with the same parameters.) - * @param c is a output parameter, which will be set to the read character. - * @param returns true if there was enough data in the buffer, false - * otherwise. - */ - bool readCharacterAtCursor(ReadCursor &cursor, char *c); - - /** - * Function that is called for each read character -- updates the row and - * column count. - */ - void updatePositionCounters(const char c); - -public: - - /** - * Constructor of the buffered char reader class with empty buffer as input. - * This operates the BufferedCharReader in a mode where new data has to be - * fed using the "feed" function and explicitly closed using the "close" - * function. - * - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(int line = 1, int column = 1); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param str is a string containing the input data. - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(const std::string &str, int line, int column); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param str is a string containing the input data. - */ - BufferedCharReader(const std::string &str); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param inputStream is the input stream from which incomming data should - * be read. - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(std::istream &inputStream, int line = 1, int column = 1); - - /** - * Peeks a single character. If called multiple times, returns the - * character after the previously peeked character. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool peek(char *c); - - /** - * Reads a character from the input data. If "peek" was called - * beforehand resets the peek pointer. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool read(char *c); - - /** - * Advances the read pointer to the peek pointer -- so if the "peek" - * function was called, "read" will now return the character after - * the last peeked character. - */ - void consumePeek(); - - /** - * Moves the read cursor to the next non-whitespace character. Returns - * false, if the end of the stream was reached. - * - * @return false if the end of the stream was reached, false othrwise. - */ - bool consumeWhitespace(); - - /** - * Resets the peek pointer to the "read" pointer. - */ - void resetPeek(); - - /** - * Feeds new data into the internal buffer of the BufferedCharReader - * class. Only applicable if the buffered char reader was constructed - * without an input stream or string. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - */ - void feed(const std::string &data); - - /** - * Tells the buffered char reader that no more data will be fed. - * Only applicable if the buffered char reader was constructed without an - * input stream or string. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - */ - void close(); - - /** - * Returns true if there are no more characters as the stream was - * closed. - * - * @return true if there is no more data. - */ - bool atEnd() const; - - /** - * Returns the current line (starting with one). - * - * @return the current line number. - */ - int getLine() const { return readCursor.line; } - - /** - * Returns the current column (starting with one). - * - * @return the current column number. - */ - int getColumn() const { return readCursor.column; } -}; -} - -#endif /* _OUSIA_BUFFERED_CHAR_READER_H_ */ - diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp index 1510f3a..a54d956 100644 --- a/src/core/CSS.hpp +++ b/src/core/CSS.hpp @@ -23,7 +23,7 @@ #include <vector> #include <tuple> -#include <core/variant/Variant.hpp> +#include <core/common/Variant.hpp> #include "Managed.hpp" #include "Node.hpp" diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp index 43c7abb..4190297 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/core/CodeTokenizer.hpp @@ -22,7 +22,7 @@ #include <map> #include <sstream> -#include "BufferedCharReader.hpp" +#include <core/common/CharReader.hpp> #include "Tokenizer.hpp" namespace ousia { @@ -108,8 +108,8 @@ public: /** * - * @param input a BufferedCharReader containing the input for this - * tokenizer, as with a regular tokenizer. + * @param input a CharReader containing the input for this tokenizer, as + * with a regular tokenizer. * @param root a TokenTreeNode representing the root of the TokenTree. * Please note that you have to specify all tokenIDs here that you use * in the descriptors map. @@ -120,7 +120,7 @@ public: * and this CodeTokenizer would recognize the token "//" as starting a * line comment. */ - CodeTokenizer(BufferedCharReader &input, const TokenTreeNode &root, + CodeTokenizer(CharReader &input, const TokenTreeNode &root, std::map<int, CodeTokenDescriptor> descriptors) : Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL) { diff --git a/src/core/Logger.hpp b/src/core/Logger.hpp deleted file mode 100644 index e6b97f4..0000000 --- a/src/core/Logger.hpp +++ /dev/null @@ -1,609 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file Logger.hpp - * - * Contains classes for logging messages in Ousía. Provides a generic Logger - * class, and TerminalLogger, an extension of Logger which logs do an output - * stream. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_LOGGER_HPP_ -#define _OUSIA_LOGGER_HPP_ - -#include <ostream> -#include <stack> -#include <string> -#include <vector> - -#include "Exceptions.hpp" - -namespace ousia { - -/** - * Enum containing the severities used for logging errors and debug messages. - */ -enum class Severity : int { - /** - * Indicates that this message was only printed for debugging. Note that - * in release builds messages with this severity are discarded. - */ - DEBUG = 0, - - /** - * A message which might provide additional information to the user. - */ - NOTE = 1, - - /** - * A message which warns of possible mistakes by the user which might not be - * actual errors but may lead to unintended behaviour. - */ - WARNING = 2, - - /** - * An error occurred while processing, however program execution continues, - * trying to deal with the error situation (graceful degradation). However, - * messages with this severity may be followed up by fatal errors. - */ - ERROR = 3, - - /** - * A fatal error occurred. Program execution cannot continue. - */ - FATAL_ERROR = 4 -}; - -#ifdef NDEBUG -static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::NOTE; -#else -static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::DEBUG; -#endif - -/** - * The Logger class is the base class the individual logging systems should - * derive from. It provides a simple interface for logging errors, warnings and - * notes and filters these according to the set minimum severity. Additionally - * a stack of file names is maintained in order to allow simple descent into - * included files. Note however, that this base Logger class simply discards the - * incomming log messages. Use one of the derived classes to actually handle the - * log messages. - */ -class Logger { -public: - /** - * The message struct represents a single log message and all information - * attached to it. - */ - struct Message { - /** - * Severity of the log message. - */ - Severity severity; - - /** - * Actual log message. - */ - std::string msg; - - /** - * Refers to the file which provides the context for this error message. - * May be empty. - */ - std::string file; - - /** - * Line in the above file the error message refers to. Ignored if - * smaller than zero. - */ - int line; - - /** - * Column in the above file the error message refers to. Ignored if - * smaller than zero. - */ - int column; - - /** - * Constructor of the Message struct. - * - * @param severity describes the message severity. - * @param msg contains the actual message. - * @param file provides the context the message refers to. May be empty. - * @param line is the line in the above file the message refers to. - * @param column is the column in the above file the message refers to. - */ - Message(Severity severity, std::string msg, std::string file, int line, - int column) - : severity(severity), - msg(std::move(msg)), - file(std::move(file)), - line(line), - column(column){}; - - /** - * Returns true if the file string is set. - * - * @return true if the file string is set. - */ - bool hasFile() const { return !file.empty(); } - - /** - * Returns true if the line is set. - * - * @return true if the line number is a non-negative integer. - */ - bool hasLine() const { return line >= 0; } - - /** - * Returns true if column and line are set (since a column has no - * significance without a line number). - * - * @return true if line number and column number are non-negative - * integers. - */ - bool hasColumn() const { return hasLine() && column >= 0; } - }; - -private: - /** - * Minimum severity a log message should have before it is discarded. - */ - Severity minSeverity; - - /** - * Maximum encountered log message severity. - */ - Severity maxEncounteredSeverity; - - /** - * Stack containing the current file names that have been processed. - */ - std::stack<std::string> filenameStack; - -protected: - /** - * Function to be overriden by child classes to actually display or store - * the messages. The default implementation just discards all incomming - * messages. - * - * @param msg is an instance of the Message struct containing the data that - * should be logged. - */ - virtual void process(const Message &msg){}; - -public: - /** - * Constructor of the Logger class. - * - * @param minSeverity is the minimum severity a log message should have. - * Messages below this severity are discarded. - */ - Logger(Severity minSeverity = DEFAULT_MIN_SEVERITY) - : minSeverity(minSeverity), maxEncounteredSeverity(Severity::DEBUG) - { - } - - Logger(const Logger &) = delete; - - /** - * Virtual destructor. - */ - virtual ~Logger(){}; - - /** - * Logs the given message. Most generic log function. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void log(Severity severity, const std::string &msg, const std::string &file, - int line = -1, int column = -1); - - /** - * Logs the given message. The file name is set to the topmost file name on - * the file name stack. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void log(Severity severity, const std::string &msg, int line = -1, - int column = -1) - { - log(severity, msg, currentFilename(), line, column); - } - - /** - * Logs the given message. The file name is set to the topmost file name on - * the file name stack. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - * @tparam PosType is the actual type of pos and must implement a getLine - * and getColumn function. - */ - template <class PosType> - void logAt(Severity severity, const std::string &msg, const PosType &pos) - { - log(severity, msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs the given loggable exception. - * - * @param ex is the exception that should be logged. - */ - void log(const LoggableException &ex) - { - log(Severity::ERROR, ex.msg, - ex.file.empty() ? currentFilename() : ex.file, ex.line, ex.column); - } - - /** - * Logs a debug message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void debug(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::DEBUG, msg, file, line, column); - } - - /** - * Logs a debug message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void debug(const std::string &msg, int line = -1, int column = -1) - { - debug(msg, currentFilename(), line, column); - } - - /** - * Logs a debug message. The file name is set to the topmost file name on - * the file name stack. - * - * @param severity is the severity of the log message. - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template <class PosType> - void debugAt(const std::string &msg, const PosType &pos) - { - debug(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a note. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void note(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::NOTE, msg, file, line, column); - } - - /** - * Logs a note. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void note(const std::string &msg, int line = -1, int column = -1) - { - note(msg, currentFilename(), line, column); - } - - /** - * Logs a note. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template <class PosType> - void noteAt(const std::string &msg, const PosType &pos) - { - note(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a warning. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void warning(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::WARNING, msg, file, line, column); - } - - /** - * Logs a warning. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template <class PosType> - void warningAt(const std::string &msg, const PosType &pos) - { - warning(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a warning. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void warning(const std::string &msg, int line = -1, int column = -1) - { - warning(msg, currentFilename(), line, column); - } - - /** - * Logs an error message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void error(const std::string &msg, const std::string &file, int line = -1, - int column = -1) - { - log(Severity::ERROR, msg, file, line, column); - } - - /** - * Logs an error message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void error(const std::string &msg, int line = -1, int column = -1) - { - error(msg, currentFilename(), line, column); - } - - /** - * Logs an error message. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template <class PosType> - void errorAt(const std::string &msg, const PosType &pos) - { - error(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Logs a fatal error. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param file is the name of the file the message refers to. May be empty. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void fatalError(const std::string &msg, const std::string &file, - int line = -1, int column = -1) - { - log(Severity::FATAL_ERROR, msg, file, line, column); - } - - /** - * Logs a fatal error. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param line is the line in the above file at which the error occured. - * Ignored if negative. - * @param column is the column in the above file at which the error occured. - * Ignored if negative. - */ - void fatalError(const std::string &msg, int line = -1, int column = -1) - { - fatalError(msg, currentFilename(), line, column); - } - - /** - * Logs a fatal error. The file name is set to the topmost file name on - * the file name stack. - * - * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. - */ - template <class PosType> - void fatalErrorAt(const std::string &msg, const PosType &pos) - { - fatalError(msg, pos.getLine(), pos.getColumn()); - } - - /** - * Pushes a new file name onto the internal filename stack. - * - * @param name is the name of the file that should be added to the filename - * stack. - * @return the size of the filename stack. This number can be passed to the - * "unwindFilenameStack" method in order to return the stack to state it was - * in after this function has been called. - */ - unsigned int pushFilename(const std::string &name); - - /** - * Pops the filename from the internal filename stack. - * - * @return the current size of the filename stack. - */ - unsigned int popFilename(); - - /** - * Pops elements from the filename stack while it has more elements than - * the given number and the stack is non-empty. - * - * @param pos is the position the filename stack should be unwound to. Use - * a number returned by pushFilename. - */ - void unwindFilenameStack(unsigned int pos); - - /** - * Returns the topmost filename from the internal filename stack. - * - * @return the topmost filename from the filename stack or an empty string - * if the filename stack is empty. - */ - std::string currentFilename() - { - return filenameStack.empty() ? std::string{} : filenameStack.top(); - } - - /** - * Returns the maximum severity that was encountered by the Logger but at - * least Severity::DEBUG. - * - * @return the severity of the most severe log message but at least - * Severity::DEBUG. - */ - Severity getMaxEncounteredSeverity() { return maxEncounteredSeverity; } - - /** - * Returns the minimum severity. Messages with a smaller severity are - * discarded. - * - * @return the minimum severity. - */ - Severity getMinSeverity() { return minSeverity; } - - /** - * Sets the minimum severity. Messages with a smaller severity will be - * discarded. Only new messages will be filtered according to the new value. - * - * @param severity is the minimum severity for new log messages. - */ - void setMinSeverity(Severity severity) { minSeverity = severity; } -}; - -/** - * Class extending the Logger class and printing the log messages to the given - * stream. - */ -class TerminalLogger : public Logger { -private: - /** - * Reference to the target output stream. - */ - std::ostream &os; - - /** - * If true, the TerminalLogger will use colors to make the log messages - * prettier. - */ - bool useColor; - -protected: - /** - * Implements the process function and logs the messages to the output. - */ - void process(const Message &msg) override; - -public: - /** - * Constructor of the TerminalLogger class. - * - * @param os is the output stream the log messages should be logged to. - * Should be set to std::cerr in most cases. - * @param useColor if true, the TerminalLogger class will do its best to - * use ANSI/VT100 control sequences for colored log messages. - * @param minSeverity is the minimum severity below which log messages are - * discarded. - */ - TerminalLogger(std::ostream &os, bool useColor = false, - Severity minSeverity = DEFAULT_MIN_SEVERITY) - : Logger(minSeverity), os(os), useColor(useColor) - { - } -}; -} - -#endif /* _OUSIA_LOGGER_HPP_ */ - diff --git a/src/core/Registry.cpp b/src/core/Registry.cpp index 6ff9594..74d1cf8 100644 --- a/src/core/Registry.cpp +++ b/src/core/Registry.cpp @@ -16,8 +16,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <core/Logger.hpp> - +#include <core/common/Logger.hpp> #include <core/parser/Parser.hpp> namespace ousia { diff --git a/src/core/Tokenizer.cpp b/src/core/Tokenizer.cpp index b99d1ed..0af5f5a 100644 --- a/src/core/Tokenizer.cpp +++ b/src/core/Tokenizer.cpp @@ -72,7 +72,7 @@ TokenTreeNode::TokenTreeNode(const std::map<std::string, int> &inputs) { } -Tokenizer::Tokenizer(BufferedCharReader &input, const TokenTreeNode &root) +Tokenizer::Tokenizer(CharReader &input, const TokenTreeNode &root) : input(input), root(root) { } @@ -81,10 +81,10 @@ bool Tokenizer::prepare() { std::stringstream buffer; char c; - int startColumn = input.getColumn(); - int startLine = input.getLine(); + uint32_t startColumn = input.getColumn(); + uint32_t startLine = input.getLine(); bool bufEmpty = true; - while (input.peek(&c)) { + while (input.peek(c)) { if (root.children.find(c) != root.children.end()) { // if there might be a special token, keep peeking forward // until we find the token (or we don't). @@ -107,7 +107,7 @@ bool Tokenizer::prepare() input.consumePeek(); } } - if (!input.peek(&c)) { + if (!input.peek(c)) { // if we are at the end we break off the search. break; } @@ -153,7 +153,7 @@ bool Tokenizer::prepare() } } else{ //if we found nothing, read at least one character. - input.peek(&c); + input.peek(c); } } buffer << c; diff --git a/src/core/Tokenizer.hpp b/src/core/Tokenizer.hpp index 8f80150..33327cc 100644 --- a/src/core/Tokenizer.hpp +++ b/src/core/Tokenizer.hpp @@ -19,11 +19,12 @@ #ifndef _OUSIA_TOKENIZER_HPP_ #define _OUSIA_TOKENIZER_HPP_ +#include <cstdint> +#include <deque> #include <istream> #include <map> -#include <deque> -#include "BufferedCharReader.hpp" +#include <core/common/CharReader.hpp> namespace ousia { @@ -120,13 +121,13 @@ static const int TOKEN_TEXT = -2; struct Token { int tokenId; std::string content; - int startColumn; - int startLine; - int endColumn; - int endLine; + uint32_t startColumn; + uint32_t startLine; + uint32_t endColumn; + uint32_t endLine; - Token(int tokenId, std::string content, int startColumn, int startLine, - int endColumn, int endLine) + Token(int tokenId, std::string content, uint32_t startColumn, uint32_t startLine, + uint32_t endColumn, uint32_t endLine) : tokenId(tokenId), content(content), startColumn(startColumn), @@ -160,7 +161,7 @@ struct Token { */ class Tokenizer { private: - BufferedCharReader &input; + CharReader &input; const TokenTreeNode &root; std::deque<Token> peeked; unsigned int peekCursor = 0; @@ -185,14 +186,14 @@ protected: public: /** * @param input The input of a Tokenizer is given in the form of a - * BufferedCharReader. Please refer to the respective documentation. + * CharReader. Please refer to the respective documentation. * @param root This is meant to be the root of a TokenTree giving the * specification of user-defined tokens this Tokenizer should recognize. * The Tokenizer promises to not change the TokenTree such that you can * re-use the same specification for multiple inputs. * Please refer to the TokenTreeNode documentation for more information. */ - Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); + Tokenizer(CharReader &input, const TokenTreeNode &root); /** * The next method consumes one Token from the input stream and gives @@ -224,9 +225,9 @@ public: */ void consumePeek(); - const BufferedCharReader &getInput() const { return input; } + const CharReader &getInput() const { return input; } - BufferedCharReader &getInput() { return input; } + CharReader &getInput() { return input; } }; } diff --git a/src/core/utils/CharReader.cpp b/src/core/common/CharReader.cpp index c661b6f..4bd81ed 100644 --- a/src/core/utils/CharReader.cpp +++ b/src/core/common/CharReader.cpp @@ -17,15 +17,14 @@ */ #include <algorithm> +#include <cassert> #include <limits> #include <sstream> -#include <core/Utils.hpp> - #include "CharReader.hpp" +#include "Utils.hpp" namespace ousia { -namespace utils { /* Helper functions */ @@ -80,6 +79,16 @@ Buffer::Buffer(const std::string &str) startBucket = buckets.begin(); } +#ifndef NDEBUG +Buffer::~Buffer() +{ + // Make sure all cursors have been deleted + for (bool cursor_alive : alive) { + assert(!cursor_alive); + } +} +#endif + void Buffer::advance(BucketIterator &it) { it++; @@ -507,7 +516,7 @@ CharReaderFork CharReader::fork() return CharReaderFork(buffer, readCursor, peekCursor, coherent); } -CharReader::Context CharReader::getContext(ssize_t maxSize) +TextCursor::Context CharReader::getContext(ssize_t maxSize) { // Clone the current read cursor Buffer::CursorId cur = buffer->createCursor(readCursor.cursor); @@ -603,7 +612,7 @@ CharReader::Context CharReader::getContext(ssize_t maxSize) // Delete the newly created cursor buffer->deleteCursor(cur); - return CharReader::Context{ss.str(), relPos, !foundBegin || tStart != start, + return TextCursor::Context{ss.str(), relPos, !foundBegin || tStart != start, !foundEnd || tEnd != end}; } @@ -628,5 +637,4 @@ void CharReaderFork::commit() parentPeekCursor.assign(buffer, peekCursor); } } -} diff --git a/src/core/utils/CharReader.hpp b/src/core/common/CharReader.hpp index 5daa21d..7be5e08 100644 --- a/src/core/utils/CharReader.hpp +++ b/src/core/common/CharReader.hpp @@ -32,8 +32,9 @@ #include <memory> #include <vector> +#include "TextCursor.hpp" + namespace ousia { -namespace utils { /** * A chunked ring buffer used in CharReader to provide access to an input stream @@ -242,6 +243,14 @@ public: */ Buffer(const std::string &str); +#ifndef NDEBUG + /** + * Destructor of the Buffer class. Makes sure that all cursors have been + * freed. + */ + ~Buffer(); +#endif + // No copy Buffer(const Buffer &) = delete; @@ -352,52 +361,6 @@ class CharReaderFork; * of linebreaks and converts these to a single '\n'. */ class CharReader { -public: - /** - * The context struct is used to represent the current context the char - * reader is in. This context can for example be used when building error - * messages. - */ - struct Context { - /** - * Set to the content of the current line. - */ - std::string line; - - /** - * Relative position (in characters) within that line. - */ - size_t relPos; - - /** - * Set to true if the beginning of the line has been truncated (because - * the reader position is too far away from the actual position of the - * line). - */ - bool truncatedStart; - - /** - * Set to true if the end of the line has been truncated (because the - * reader position is too far away from the actual end position of the - * line. - */ - bool truncatedEnd; - - Context() - : line(), relPos(0), truncatedStart(false), truncatedEnd(false) - { - } - - Context(std::string line, size_t relPos, bool truncatedStart, - bool truncatedEnd) - : line(std::move(line)), - relPos(relPos), - truncatedStart(truncatedStart), - truncatedEnd(truncatedEnd) - { - } - }; - protected: /** * Internally used cursor structure for managing the read and the peek @@ -412,19 +375,20 @@ protected: /** * Current line the cursor is in. */ - uint32_t line; + TextCursor::PosType line; /** * Current column the cursor is in. */ - uint32_t column; + TextCursor::PosType column; /** * Constructor of the Cursor class. * * @param cursor is the underlying cursor in the Buffer instance. */ - Cursor(Buffer::CursorId cursor, size_t line, size_t column) + Cursor(Buffer::CursorId cursor, TextCursor::PosType line, + TextCursor::PosType column) : cursor(cursor), line(line), column(column) { } @@ -584,31 +548,35 @@ public: bool atEnd() const { return buffer->atEnd(readCursor.cursor); } /** - * Returns the current line (starting with one). - * - * @return the current line number. + * Returns the offset of the read cursor in bytes. */ - uint32_t getLine() const { return readCursor.line; } + size_t getOffset() const { return buffer->offset(readCursor.cursor); } /** - * Returns the current column (starting with one). - * - * @return the current column number. + * Returns the line number the read cursor currently is at. */ - uint32_t getColumn() const { return readCursor.column; } + TextCursor::PosType getLine() const { return readCursor.line; } /** - * Returns the current byte offset of the read cursor. - * - * @return the byte position within the stream. + * Returns the column the read cursor currently is at. + */ + TextCursor::PosType getColumn() const { return readCursor.column; } + + /** + * Returns the current position of the read cursor (line and column). */ - size_t getOffset() const { return buffer->offset(readCursor.cursor); }; + TextCursor::Position getPosition() const + { + return TextCursor::Position(getLine(), getColumn(), getOffset()); + } /** * Returns the line the read cursor currently is in, but at most the * given number of characters in the form of a Context structure. + * + * @param maxSize is the maximum length of the extracted context */ - Context getContext(ssize_t maxSize); + TextCursor::Context getContext(ssize_t maxSize = 60); }; /** @@ -652,7 +620,6 @@ public: void commit(); }; } -} #endif /* _OUSIA_CHAR_READER_HPP_ */ diff --git a/src/core/Exceptions.cpp b/src/core/common/Exceptions.cpp index d064f35..30c5626 100644 --- a/src/core/Exceptions.cpp +++ b/src/core/common/Exceptions.cpp @@ -25,21 +25,21 @@ namespace ousia { /* Class LoggableException */ std::string LoggableException::formatMessage(const std::string &msg, - const std::string &file, - int line, int column) + const TextCursor::Position &pos, + const TextCursor::Context &ctx) { std::stringstream ss; ss << "error "; - if (!file.empty()) { - ss << "while processing \"" << file << "\" "; - } - if (line >= 0) { - ss << "at line " << line << ", "; - if (column >= 0) { - ss << "column " << column << " "; + if (pos.hasLine()) { + ss << "at line " << pos.line << ", "; + if (pos.hasColumn()) { + ss << "column " << pos.column << " "; } } ss << "with message: " << msg; + if (ctx.valid()) { + ss << " in context \"" << ctx.text << "\""; + } return ss.str(); } } diff --git a/src/core/Exceptions.hpp b/src/core/common/Exceptions.hpp index 00d6106..443c176 100644 --- a/src/core/Exceptions.hpp +++ b/src/core/common/Exceptions.hpp @@ -27,6 +27,8 @@ #ifndef _OUSIA_EXCEPTIONS_HPP_ #define _OUSIA_EXCEPTIONS_HPP_ +#include "TextCursor.hpp" + namespace ousia { /** @@ -81,80 +83,81 @@ private: * reported to the runtime environment. */ static std::string formatMessage(const std::string &msg, - const std::string &file, int line, - int column); + const TextCursor::Position &pos, + const TextCursor::Context &ctx); public: /** - * Message describing the error that occured. + * Reported error message. */ const std::string msg; /** - * Name of the file in which the error occured. May be empty. + * Position in the document at which the exception occurred. */ - const std::string file; + const TextCursor::Position pos; /** - * Line at which the exception occured. Negative values are ignored. + * Context in the document text in which the exception occurred. */ - const int line; - - /** - * Column at which the exception occured. Negative values are ignored. - */ - const int column; + const TextCursor::Context ctx; /** * Constructor of the LoggableException class. * * @param msg contains the error message. - * @param file provides the context the message refers to. May be empty. - * @param line is the line in the above file the message refers to. - * @param column is the column in the above file the message refers to. + * @param pos is the position at which the error occured. + * @param ctx describes the context in which the error occured. */ - LoggableException(std::string msg, std::string file, int line = -1, - int column = -1) - : OusiaException(formatMessage(msg, file, line, column)), + LoggableException(std::string msg, + TextCursor::Position pos = TextCursor::Position{}, + TextCursor::Context ctx = TextCursor::Context{}) + : OusiaException(formatMessage(msg, pos, ctx)), msg(std::move(msg)), - file(std::move(file)), - line(line), - column(column) + pos(std::move(pos)), + ctx(std::move(ctx)) { } /** - * Constructor of the LoggableException class with empty file. + * Constructor of the LoggableException class. * * @param msg contains the error message. * @param line is the line in the above file the message refers to. * @param column is the column in the above file the message refers to. + * @param offs is the byte offset. */ - LoggableException(std::string msg, int line = -1, int column = -1) - : OusiaException(formatMessage(msg, "", line, column)), - msg(std::move(msg)), - line(line), - column(column) + LoggableException(std::string msg, TextCursor::PosType line, + TextCursor::PosType column, size_t offs) + : LoggableException(msg, TextCursor::Position(line, column, offs)) { } /** - * Constructor of the LoggableException class with empty file and an - * position object. + * Constructor of LoggableException for arbitrary position objects. * * @param msg is the actual log message. - * @param pos is a const reference to a variable which provides position - * information. + * @param pos is a reference to a variable with position and context data. */ template <class PosType> - LoggableException(std::string msg, const PosType &pos) - : OusiaException( - formatMessage(msg, "", pos.getLine(), pos.getColumn())), - msg(std::move(msg)), - line(pos.getLine()), - column(pos.getColumn()) + LoggableException(std::string msg, PosType &pos) + : LoggableException(std::move(msg), pos.getPosition(), pos.getContext()) { } + + /** + * Returns the position at which the exception occured in the text. + * + * @return the position descriptor. + */ + TextCursor::Position getPosition() const { return pos; } + + /** + * Returns the context in which the exception occured in the text. + * + * @return the context descriptor. + */ + TextCursor::Context getContext() const { return ctx; } }; } diff --git a/src/core/Logger.cpp b/src/core/common/Logger.cpp index 17f55a6..c1d6343 100644 --- a/src/core/Logger.cpp +++ b/src/core/common/Logger.cpp @@ -25,48 +25,66 @@ namespace ousia { /* Class Logger */ -void Logger::log(Severity severity, const std::string &msg, - const std::string &file, int line, int column) +void Logger::log(Severity severity, std::string msg, TextCursor::Position pos, + TextCursor::Context ctx) { - // Copy the current severity level + // Update the maximum encountered severity level if (static_cast<int>(severity) > static_cast<int>(maxEncounteredSeverity)) { maxEncounteredSeverity = severity; } - // Call the actual log message function if the severity is larger or equal - // to the minimum severity + // Only process the message if its severity is larger than the + // set minimum severity. if (static_cast<int>(severity) >= static_cast<int>(minSeverity)) { - process(Message{severity, msg, file, line, column}); + processMessage( + Message{severity, std::move(msg), std::move(pos), std::move(ctx)}); } } -unsigned int Logger::pushFilename(const std::string &name) +LoggerFork Logger::fork() { return LoggerFork{this, minSeverity}; } + +/* Class LoggerFork */ + +void LoggerFork::processMessage(Message msg) +{ + calls.push_back(Call(CallType::MESSAGE, messages.size())); + messages.push_back(msg); +} + +void LoggerFork::processPushFile(File file) { - filenameStack.push(name); - return filenameStack.size(); + calls.push_back(Call(CallType::PUSH_FILE, files.size())); + files.push_back(file); } -unsigned int Logger::popFilename() +void LoggerFork::processPopFile() { - filenameStack.pop(); - return filenameStack.size(); + calls.push_back(Call(CallType::POP_FILE, 0)); } -void Logger::unwindFilenameStack(unsigned int pos) +void LoggerFork::commit() { - while (filenameStack.size() > pos && !filenameStack.empty()) { - filenameStack.pop(); + for (const Call &call : calls) { + switch (call.type) { + case CallType::MESSAGE: { + const Message &msg = messages[call.dataIdx]; + parent->log(msg.severity, msg.msg, msg.pos, msg.ctx); + break; + } + case CallType::PUSH_FILE: { + const File &file = files[call.dataIdx]; + parent->pushFile(file.file, file.pos, file.ctx); + break; + } + case CallType::POP_FILE: + parent->popFile(); + break; + } } } -/* Class TerminalLogger */ +/* Class Terminal */ -/** - * Small class used internally for formated terminal output using ANSI/VT100 - * escape codes on supported terminals. - * - * TODO: Deactivate if using windows or use the corresponding API function. - */ class Terminal { private: /** @@ -109,29 +127,47 @@ public: } }; -void TerminalLogger::process(const Message &msg) +/* Class TerminalLogger */ + +/** + * Small class used internally for formated terminal output using ANSI/VT100 + * escape codes on supported terminals. + * + * TODO: Deactivate if using windows or use the corresponding API function. + */ + +std::string TerminalLogger::currentFilename() +{ + if (!files.empty()) { + return files.top().file; + } + return std::string{}; +} + +void TerminalLogger::processMessage(Message msg) { Terminal t(useColor); // Print the file name - if (msg.hasFile()) { - os << t.color(Terminal::WHITE, true) << msg.file << t.reset(); + std::string filename = currentFilename(); + bool hasFile = !filename.empty(); + if (hasFile) { + os << t.color(Terminal::WHITE, true) << filename << t.reset(); } // Print line and column number - if (msg.hasLine()) { - if (msg.hasFile()) { + if (msg.pos.hasLine()) { + if (hasFile) { os << ':'; } - os << t.color(Terminal::WHITE, true) << msg.line - << t.reset(); - if (msg.hasColumn()) { - os << ':' << msg.column; + os << t.color(Terminal::WHITE, true) << msg.pos.line << t.reset(); + if (msg.pos.hasColumn()) { + os << ':' << msg.pos.column; } } // Print the optional seperator - if (msg.hasFile() || msg.hasLine()) { + if (hasFile || msg.pos.hasLine()) { os << ": "; } @@ -156,6 +192,28 @@ void TerminalLogger::process(const Message &msg) // Print the actual message os << msg.msg << std::endl; + + // Print the error message context if available + if (msg.ctx.valid()) { + size_t relPos = msg.ctx.relPos; + if (msg.ctx.truncatedStart) { + os << "[...] "; + relPos += 6; + } + os << msg.ctx.text; + if (msg.ctx.truncatedEnd) { + os << " [...]"; + } + os << std::endl; + for (size_t i = 0; i < relPos; i++) { + os << ' '; + } + os << t.color(Terminal::GREEN) << '^' << t.reset() << std::endl; + } } + +void TerminalLogger::processPushFile(File file) { files.push(file); } + +void TerminalLogger::processPopFile() { files.pop(); } } diff --git a/src/core/common/Logger.hpp b/src/core/common/Logger.hpp new file mode 100644 index 0000000..be82ea0 --- /dev/null +++ b/src/core/common/Logger.hpp @@ -0,0 +1,602 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Logger.hpp + * + * Contains classes for logging messages in Ousía. Provides a generic Logger + * class, and TerminalLogger, an extension of Logger which logs do an output + * stream. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_LOGGER_HPP_ +#define _OUSIA_LOGGER_HPP_ + +#include <ostream> +#include <stack> +#include <string> +#include <vector> + +#include "Exceptions.hpp" +#include "TextCursor.hpp" + +namespace ousia { + +/** + * Enum containing the severities used for logging errors and debug messages. + */ +enum class Severity : int { + /** + * Indicates that this message was only printed for debugging. Note that + * in release builds messages with this severity are discarded. + */ + DEBUG = 0, + + /** + * A message which might provide additional information to the user. + */ + NOTE = 1, + + /** + * A message which warns of possible mistakes by the user which might not be + * actual errors but may lead to unintended behaviour. + */ + WARNING = 2, + + /** + * An error occurred while processing, however program execution continues, + * trying to deal with the error situation (graceful degradation). However, + * messages with this severity may be followed up by fatal errors. + */ + ERROR = 3, + + /** + * A fatal error occurred. Program execution cannot continue. + */ + FATAL_ERROR = 4 +}; + +#ifdef NDEBUG +static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::NOTE; +#else +static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::DEBUG; +#endif + +// Forward declaration +class LoggerFork; + +/** + * The Logger class is the base class the individual logging systems should + * derive from. It provides a simple interface for logging errors, warnings and + * notes and filters these according to the set minimum severity. Additionally + * a stack of file names is maintained in order to allow simple descent into + * included files. Note however, that this base Logger class simply discards the + * incomming log messages. Use one of the derived classes to actually handle the + * log messages. + */ +class Logger { +public: + /** + * Describes an included file. + */ + struct File { + /** + * Is the name of the file. + */ + std::string file; + + /** + * Position at which the file was included. + */ + TextCursor::Position pos; + + /** + * Context in which the file was included. + */ + TextCursor::Context ctx; + + /** + * Constructor of the File struct. + * + * @param file is the name of the included file. + * @param pos is the position in the parent file, at which this file + * was included. + * @param ctx is the context in which the feil was included. + */ + File(std::string file, TextCursor::Position pos, + TextCursor::Context ctx) + : file(file), pos(pos), ctx(ctx) + { + } + }; + + /** + * The message struct represents a single log message and all information + * attached to it. + */ + struct Message { + /** + * Severity of the log message. + */ + Severity severity; + + /** + * Actual log message. + */ + std::string msg; + + /** + * Position in the text the message refers to. + */ + TextCursor::Position pos; + + /** + * Context the message refers to. + */ + TextCursor::Context ctx; + + /** + * Constructor of the Message struct. + * + * @param severity describes the message severity. + * @param msg contains the actual message. + * @param line is the line in the above file the message refers to. + * @param column is the column in the above file the message refers to. + */ + Message(Severity severity, std::string msg, TextCursor::Position pos, + TextCursor::Context ctx) + : severity(severity), + msg(std::move(msg)), + pos(std::move(pos)), + ctx(std::move(ctx)){}; + }; + +protected: + /** + * Minimum severity a log message should have before it is discarded. + */ + const Severity minSeverity; + + /** + * Maximum encountered log message severity. + */ + Severity maxEncounteredSeverity; + + /** + * Function to be overriden by child classes to actually display or store + * the messages. The default implementation just discards all incomming + * messages. + * + * @param msg is an instance of the Message struct containing the data that + * should be logged. + */ + virtual void processMessage(Message msg) {} + + /** + * Called whenever a new file is pushed onto the stack. + * + * @param file is the file that should be pushed onto the stack. + */ + virtual void processPushFile(File file) {} + + /** + * Called whenever a file is popped from the stack. + */ + virtual void processPopFile() {} + +public: + /** + * Constructor of the Logger class. + * + * @param minSeverity is the minimum severity a log message should have. + * Messages below this severity are discarded. + */ + Logger(Severity minSeverity = DEFAULT_MIN_SEVERITY) + : minSeverity(minSeverity), maxEncounteredSeverity(Severity::DEBUG) + { + } + + /** + * Virtual destructor. + */ + virtual ~Logger(){}; + + // No copy + Logger(const Logger &) = delete; + + // No assign + Logger &operator=(const Logger &) = delete; + + /** + * Logs the given message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param pos is the position the log message refers to. + * @param ctx describes the context of the log message. + */ + void log(Severity severity, std::string msg, + TextCursor::Position pos = TextCursor::Position{}, + TextCursor::Context ctx = TextCursor::Context{}); + + /** + * Logs the given loggable exception. + * + * @param ex is the exception that should be logged. + */ + void log(const LoggableException &ex) + { + log(Severity::ERROR, ex.msg, ex.getPosition(), ex.getContext()); + } + + /** + * Logs the given message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param pos is a reference to a variable which provides position and + * context information. + */ + template <class PosType> + void logAt(Severity severity, std::string msg, PosType &pos) + { + log(severity, std::move(msg), pos.getPosition(), pos.getContext()); + } + + /** + * Logs a debug message. Debug messages will be discarded if the software + * is compiled in the release mode (with the NDEBUG flag). + * + * @param msg is the actual log message. + * @param pos describes the position of the debug message. + * @param ctx describes the context of the debug message. + */ + void debug(std::string msg, + TextCursor::Position pos = TextCursor::Position{}, + TextCursor::Context ctx = TextCursor::Context{}) + { +#ifndef NDEBUG + log(Severity::DEBUG, std::move(msg), std::move(pos), std::move(ctx)); +#endif + } + + /** + * Logs a debug message. Debug messages will be discarded if the software + * is compiled in the release mode. + * + * @param msg is the actual log message. + * @param pos is a reference to a variable which provides position and + * context information. + */ + template <class PosType> + void debug(std::string msg, PosType &pos) + { +#ifndef NDEBUG + logAt(Severity::DEBUG, std::move(msg), pos); +#endif + } + + /** + * Logs a note. + * + * @param msg is the actual log message. + * @param pos describes the position of the note. + * @param ctx describes the context of the note. + */ + void note(std::string msg, + TextCursor::Position pos = TextCursor::Position{}, + TextCursor::Context ctx = TextCursor::Context{}) + { + log(Severity::NOTE, std::move(msg), std::move(pos), std::move(ctx)); + } + + /** + * Logs a note. + * + * @param msg is the actual log message. + * @param pos is a reference to a variable which provides position and + * context information. + */ + template <class PosType> + void note(std::string msg, PosType &pos) + { + logAt(Severity::NOTE, std::move(msg), pos); + } + + /** + * Logs a warning. + * + * @param msg is the actual log message. + * @param pos describes the position of the warning. + * @param ctx describes the context of the warning. + */ + void warning(std::string msg, + TextCursor::Position pos = TextCursor::Position{}, + TextCursor::Context ctx = TextCursor::Context{}) + { + log(Severity::WARNING, std::move(msg), std::move(pos), std::move(ctx)); + } + + /** + * Logs a warning. + * + * @param msg is the actual log message. + * @param pos is a reference to a variable which provides position and + * context information. + */ + template <class PosType> + void warning(std::string msg, PosType &pos) + { + logAt(Severity::WARNING, std::move(msg), pos); + } + + /** + * Logs an error message. + * + * @param msg is the actual log message. + * @param pos is the position at which the error occured. + * @param ctx describes the context in which the error occured. + */ + void error(std::string msg, + TextCursor::Position pos = TextCursor::Position{}, + TextCursor::Context ctx = TextCursor::Context{}) + { + log(Severity::ERROR, std::move(msg), std::move(pos), std::move(ctx)); + } + + /** + * Logs an error message. + * + * @param msg is the actual log message. + * @param pos is a reference to a variable which provides position and + * context information. + */ + template <class PosType> + void error(std::string msg, PosType &pos) + { + logAt(Severity::ERROR, std::move(msg), pos); + } + + /** + * Logs a fatal error message. + * + * @param msg is the actual log message. + * @param pos is the position at which the error occured. + * @param ctx describes the context in which the error occured. + */ + void fatalError(std::string msg, + TextCursor::Position pos = TextCursor::Position{}, + TextCursor::Context ctx = TextCursor::Context{}) + { + log(Severity::FATAL_ERROR, std::move(msg), std::move(pos), + std::move(ctx)); + } + + /** + * Logs a fatal error message. + * + * @param msg is the actual log message. + * @param pos is a reference to a variable which provides position and + * context information. + */ + template <class PosType> + void fatalError(std::string msg, PosType &pos) + { + logAt(Severity::FATAL_ERROR, std::move(msg), pos); + } + + /** + * Pushes a new file name onto the internal filename stack. + * + * @param name is the name of the file to be added to the stack. + * @param pos is the position from which the new file is included. + * @param ctx is the context in which the new file is included. + */ + void pushFile(std::string name, + TextCursor::Position pos = TextCursor::Position{}, + TextCursor::Context ctx = TextCursor::Context{}) + { + processPushFile(File(std::move(name), std::move(pos), std::move(ctx))); + } + + /** + * Pops the filename from the internal filename stack. + * + * @return the current size of the filename stack. + */ + void popFile() { processPopFile(); } + + /** + * Returns the maximum severity that was encountered by the Logger but at + * least Severity::DEBUG. + * + * @return the severity of the most severe log message but at least + * Severity::DEBUG. + */ + Severity getMaxEncounteredSeverity() { return maxEncounteredSeverity; } + + /** + * Returns the minimum severity. Messages with a smaller severity are + * discarded. + * + * @return the minimum severity. + */ + Severity getMinSeverity() { return minSeverity; } + + /** + * Returns a forked logger instance which can be used to collect log + * messages for which it is not sure whether they will be used. + */ + LoggerFork fork(); +}; + +/** + * Fork of the Logger -- stores all logged messages without actually pushing + * them to the underlying logger instance. + */ +class LoggerFork : public Logger { +private: + friend Logger; + + /** + * Intanally used to store the incomming function calls. + */ + enum class CallType { MESSAGE, PUSH_FILE, POP_FILE }; + + /** + * Datastructure used to represent a logger function call. + */ + struct Call { + /** + * Type of the function call. + */ + CallType type; + + /** + * Index of the associated data in the type-specific vector. + */ + size_t dataIdx; + + /** + * Constructor of the Call structure. + * + * @param type is the type of the call. + * @param dataIdx is the index of the associated data in the type + * specific data vector. + */ + Call(CallType type, size_t dataIdx) : type(type), dataIdx(dataIdx) {} + }; + + /** + * Vector storing all incomming calls. + */ + std::vector<Call> calls; + + /** + * Vector storing all incomming messages. + */ + std::vector<Message> messages; + + /** + * Vector storing all incomming pushed files. + */ + std::vector<File> files; + + /** + * Parent logger instance. + */ + Logger *parent; + + /** + * Constructor of the LoggerFork class. + * + * @param minSeverity is the minimum severity a message should have to be + * stored. + * @param parent is the parent logger instance. + */ + LoggerFork(Logger *parent, Severity minSeverity) + : Logger(minSeverity), parent(parent) + { + } + +protected: + void processMessage(Message msg) override; + void processPushFile(File file) override; + void processPopFile() override; + +public: + /** + * Commits all collected messages to the parent Logger instance. + */ + void commit(); + + /** + * Explicitly declared move constructor. + */ + LoggerFork(LoggerFork &&l) + : Logger(l.getMinSeverity()), + calls(std::move(l.calls)), + messages(std::move(l.messages)), + files(std::move(l.files)), + parent(std::move(l.parent)) + { + } +}; + +/** + * Class extending the Logger class and printing the log messages to the given + * stream. + */ +class TerminalLogger : public Logger { +private: + /** + * Reference to the target output stream. + */ + std::ostream &os; + + /** + * If true, the TerminalLogger will use colors to make the log messages + * prettier. + */ + bool useColor; + + /** + * Stack used to keep the file references. + */ + std::stack<File> files; + + /** + * The size of the stack the last time a file backtrace was printed. + */ + size_t lastFilePrinted = 0; + +protected: + void processMessage(Message msg) override; + void processPushFile(File file) override; + void processPopFile() override; + +public: + /** + * Constructor of the TerminalLogger class. + * + * @param os is the output stream the log messages should be logged to. + * Should be set to std::cerr in most cases. + * @param useColor if true, the TerminalLogger class will do its best to + * use ANSI/VT100 control sequences for colored log messages. + * @param minSeverity is the minimum severity below which log messages are + * discarded. + */ + TerminalLogger(std::ostream &os, bool useColor = false, + Severity minSeverity = DEFAULT_MIN_SEVERITY) + : Logger(minSeverity), os(os), useColor(useColor) + { + } + + /** + * Returns the name of the topmost file. + */ + std::string currentFilename(); +}; +} + +#endif /* _OUSIA_LOGGER_HPP_ */ + diff --git a/src/core/common/TextCursor.hpp b/src/core/common/TextCursor.hpp new file mode 100644 index 0000000..2633345 --- /dev/null +++ b/src/core/common/TextCursor.hpp @@ -0,0 +1,168 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_TEXT_CURSOR_HPP_ +#define _OUSIA_TEXT_CURSOR_HPP_ + +namespace ousia { +namespace TextCursor { + +/** + * Type used for representing line or column positions. + */ +using PosType = unsigned int; + +/** + * Struct representing a position within the text. A position is defined by a + * byte offset (which is always reproducable), a line number and a column + * number. + */ +struct Position { + /** + * Current line, starting with one. + */ + PosType line; + + /** + * Current column, starting with one. + */ + PosType column; + + /** + * Current byte offset. + */ + size_t offs; + + /** + * Default constructor of the Position struct, initializes all memebers + * with zero. + */ + Position() : line(0), column(0), offs(0) {} + + /** + * Creates a new Position struct with only a line and no column. + * + * @param line is the line number. + * @param column is the column number. + */ + Position(PosType line) : line(line), column(0), offs(0) {} + + /** + * Creates a new Position struct with a line and column. + * + * @param line is the line number. + * @param column is the column number. + */ + Position(PosType line, PosType column) : line(line), column(column), offs(0) + { + } + + /** + * Creates a new Position struct with a line, column and byte offset. + * + * @param line is the line number. + * @param column is the column number. + * @param offs is the byte offset. + */ + Position(PosType line, PosType column, size_t offs) + : line(line), column(column), offs(offs) + { + } + + /** + * Returns true, if the line number is valid, false otherwise. + * + * @return true for valid line numbers. + */ + bool hasLine() const { return line > 0; } + + /** + * Returns true, if the column number is valid, false otherwise. + * + * @return true for valid column numbers. + */ + bool hasColumn() const { return column > 0; } +}; + +/** + * Represents the current context a CharReader is in. Used for building error + * messages. + */ +struct Context { + /** + * Set to the content of the current line. + */ + std::string text; + + /** + * Relative position (in characters) within that line. May point to + * locations beyond the text content. + */ + PosType relPos; + + /** + * Set to true if the beginning of the line has been truncated (because + * the reader position is too far away from the actual position of the + * line). + */ + bool truncatedStart; + + /** + * Set to true if the end of the line has been truncated (because the + * reader position is too far away from the actual end position of the + * line. + */ + bool truncatedEnd; + + /** + * Default constructor, initializes all members with zero values. + */ + Context() : text(), relPos(0), truncatedStart(false), truncatedEnd(false) {} + + /** + * Constructor of the Context class. + * + * @param text is the current line the text cursor is at. + * @param relPos is the relative position of the text cursor within that + * line. + * @param truncatedStart specifies whether the text was truncated at the + * beginning. + * @param truncatedEnd specifies whether the text was truncated at the + * end. + */ + Context(std::string text, size_t relPos, bool truncatedStart, + bool truncatedEnd) + : text(std::move(text)), + relPos(relPos), + truncatedStart(truncatedStart), + truncatedEnd(truncatedEnd) + { + } + + /** + * Returns true the context text is not empty. + * + * @return true if the context is valid and e.g. should be printed. + */ + bool valid() const { return !text.empty(); } +}; +} +} + +#endif /* _OUSIA_TEXT_CURSOR_HPP_ */ + diff --git a/src/core/Utils.cpp b/src/core/common/Utils.cpp index c460ed4..c460ed4 100644 --- a/src/core/Utils.cpp +++ b/src/core/common/Utils.cpp diff --git a/src/core/Utils.hpp b/src/core/common/Utils.hpp index 5332b50..5332b50 100644 --- a/src/core/Utils.hpp +++ b/src/core/common/Utils.hpp diff --git a/src/core/variant/Variant.cpp b/src/core/common/Variant.cpp index d33cd4f..27fc6e7 100644 --- a/src/core/variant/Variant.cpp +++ b/src/core/common/Variant.cpp @@ -18,8 +18,7 @@ #include <sstream> -#include <core/Utils.hpp> - +#include "Utils.hpp" #include "Variant.hpp" namespace ousia { diff --git a/src/core/variant/Variant.hpp b/src/core/common/Variant.hpp index 1e62644..d411fd3 100644 --- a/src/core/variant/Variant.hpp +++ b/src/core/common/Variant.hpp @@ -39,10 +39,9 @@ // http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html // later (will allow to use 8 bytes for a variant) -#include <core/Exceptions.hpp> +#include "Exceptions.hpp" namespace ousia { -namespace variant { /** * Instances of the Variant class represent any kind of data that is exchanged @@ -758,9 +757,5 @@ public: }; } -// Alias for the (very often used and unambigous) variant class -using Variant = variant::Variant; -} - #endif /* _OUSIA_VARIANT_HPP_ */ diff --git a/src/core/variant/Reader.cpp b/src/core/common/VariantReader.cpp index ba857af..a31a658 100644 --- a/src/core/variant/Reader.cpp +++ b/src/core/common/VariantReader.cpp @@ -21,18 +21,16 @@ #include <cmath> #include <sstream> -#include <core/Utils.hpp> - -#include "Reader.hpp" +#include "VariantReader.hpp" +#include "Utils.hpp" namespace ousia { -namespace variant { // TODO: Better error messages (like "Expected 'x' but got 'y'") // TODO: Replace delims with single char delim where possible // TODO: Use custom return value instead of std::pair // TODO: Allow buffered char reader to "fork" -// TODO: Rename BufferedCharReader to shorter CharReader +// TODO: Rename CharReader to shorter CharReader // TODO: Implement context in CharReader (to allow error messages to extract the // current line) @@ -97,13 +95,13 @@ private: * Appends the value of the character c to the internal number * representation and reports any errors that might occur. */ - bool appendChar(char c, int base, Part p, BufferedCharReader &reader, + bool appendChar(char c, int base, Part p, CharReader &reader, Logger &logger) { // Check whether the given character is valid int v = charValue(c); if (v < 0 || v >= base) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + logger.error(ERR_UNEXPECTED_CHAR, reader); return false; } @@ -123,7 +121,7 @@ private: // Check for any overflows if (a < 0 || n < 0 || d < 0 || e < 0) { - logger.errorAt(ERR_TOO_LARGE, reader); + logger.error(ERR_TOO_LARGE, reader); return false; } return true; @@ -176,135 +174,139 @@ public: * the given logger instance. Numbers are terminated by one of the given * delimiters. */ - bool parse(BufferedCharReader &reader, Logger &logger, - const std::unordered_set<char> &delims) - { - State state = State::INIT; - char c; + bool parse(CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims); +}; - // Consume the first whitespace characters - reader.consumeWhitespace(); +bool Number::parse(CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) +{ + State state = State::INIT; + char c; - // Iterate over the FSM to extract numbers - while (reader.peek(&c)) { - // Abort, once a delimiter or whitespace is reached - if (Utils::isWhitespace(c) || delims.count(c)) { - reader.resetPeek(); - break; - } + // Consume the first whitespace characters + reader.consumeWhitespace(); - // The character is not a whitespace character and not a delimiter - switch (state) { - case State::INIT: - case State::HAS_MINUS: - switch (c) { - case '-': - // Do not allow multiple minus signs - if (state == State::HAS_MINUS) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::HAS_MINUS; - s = -1; - break; - case '0': - // Remember a leading zero for the detection of "0x" - state = State::LEADING_ZERO; - break; - case '.': - // Remember a leading point as ".eXXX" is invalid - state = State::LEADING_POINT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::LEADING_ZERO: - if (c == 'x' || c == 'X') { - state = State::HEX; + // Iterate over the FSM to extract numbers + while (reader.peek(c)) { + // Abort, once a delimiter or whitespace is reached + if (Utils::isWhitespace(c) || delims.count(c)) { + reader.resetPeek(); + break; + } + + // The character is not a whitespace character and not a delimiter + switch (state) { + case State::INIT: + case State::HAS_MINUS: + switch (c) { + case '-': + // Do not allow multiple minus signs + if (state == State::HAS_MINUS) { + logger.error(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::HAS_MINUS; + s = -1; break; - } - // fallthrough - case State::INT: - switch (c) { - case '.': - state = State::POINT; - break; - case 'e': - case 'E': - state = State::EXP_INIT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::HEX: - if (!appendChar(c, 16, Part::A, reader, logger)) { - return false; - } - break; - case State::LEADING_POINT: - case State::POINT: - switch (c) { - case 'e': - case 'E': - if (state == State::LEADING_POINT) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::EXP_INIT; - break; - default: - state = State::POINT; - if (!appendChar(c, 10, Part::N, reader, logger)) { - return false; - } - break; - } + case '0': + // Remember a leading zero for the detection of "0x" + state = State::LEADING_ZERO; + break; + case '.': + // Remember a leading point as ".eXXX" is invalid + state = State::LEADING_POINT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::LEADING_ZERO: + if (c == 'x' || c == 'X') { + state = State::HEX; break; - case State::EXP_HAS_MINUS: - case State::EXP_INIT: - if (c == '-') { - if (state == State::EXP_HAS_MINUS) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + } + // fallthrough + case State::INT: + switch (c) { + case '.': + state = State::POINT; + break; + case 'e': + case 'E': + state = State::EXP_INIT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::HEX: + if (!appendChar(c, 16, Part::A, reader, logger)) { + return false; + } + break; + case State::LEADING_POINT: + case State::POINT: + switch (c) { + case 'e': + case 'E': + if (state == State::LEADING_POINT) { + logger.error(ERR_UNEXPECTED_CHAR, reader); return false; } - state = State::EXP_HAS_MINUS; - sE = -1; - } else { - state = State::EXP; - if (!appendChar(c, 10, Part::E, reader, logger)) { + state = State::EXP_INIT; + break; + default: + state = State::POINT; + if (!appendChar(c, 10, Part::N, reader, logger)) { return false; } + break; + } + break; + case State::EXP_HAS_MINUS: + case State::EXP_INIT: + if (c == '-') { + if (state == State::EXP_HAS_MINUS) { + logger.error(ERR_UNEXPECTED_CHAR, reader); + return false; } - break; - case State::EXP: + state = State::EXP_HAS_MINUS; + sE = -1; + } else { + state = State::EXP; if (!appendChar(c, 10, Part::E, reader, logger)) { return false; } - break; - } - reader.consumePeek(); + } + break; + case State::EXP: + if (!appendChar(c, 10, Part::E, reader, logger)) { + return false; + } + break; } + reader.consumePeek(); + } - // States in which ending is valid. Log an error in other states - if (state == State::LEADING_ZERO || state == State::HEX || - state == State::INT || state == State::POINT || - state == State::EXP) { - return true; - } - logger.errorAt(ERR_UNEXPECTED_END, reader); - return false; + // States in which ending is valid. Log an error in other states + if (state == State::LEADING_ZERO || state == State::HEX || + state == State::INT || state == State::POINT || + state == State::EXP) { + return true; } -}; + logger.error(ERR_UNEXPECTED_END, reader); + return false; +} + /* Class Reader */ @@ -317,15 +319,15 @@ static const int STATE_WHITESPACE = 5; static const int STATE_RESYNC = 6; template <class T> -static std::pair<bool, T> error(BufferedCharReader &reader, Logger &logger, +static std::pair<bool, T> error(CharReader &reader, Logger &logger, const char *err, T res) { - logger.errorAt(err, reader); + logger.error(err, reader); return std::make_pair(false, std::move(res)); } -std::pair<bool, std::string> Reader::parseString( - BufferedCharReader &reader, Logger &logger, +std::pair<bool, std::string> VariantReader::parseString( + CharReader &reader, Logger &logger, const std::unordered_set<char> *delims) { // Initialize the internal state @@ -339,9 +341,9 @@ std::pair<bool, std::string> Reader::parseString( // Statemachine whic iterates over each character in the stream // TODO: Combination of peeking and consumePeek is stupid as consumePeek is // the default (read and putBack would obviously be better, yet the latter - // is not trivial to implement in the current BufferedCharReader). + // is not trivial to implement in the current CharReader). char c; - while (reader.peek(&c)) { + while (reader.peek(c)) { switch (state) { case STATE_INIT: if (c == '"' || c == '\'') { @@ -408,7 +410,7 @@ std::pair<bool, std::string> Reader::parseString( if (Utils::isNumeric(c)) { // TODO: Parse octal 000 sequence } else { - logger.errorAt(ERR_INVALID_ESCAPE, reader); + logger.error(ERR_INVALID_ESCAPE, reader); } break; } @@ -422,8 +424,8 @@ std::pair<bool, std::string> Reader::parseString( return error(reader, logger, ERR_UNEXPECTED_END, res.str()); } -std::pair<bool, Variant::arrayType> Reader::parseArray( - BufferedCharReader &reader, Logger &logger, char delim) +std::pair<bool, Variant::arrayType> VariantReader::parseArray( + CharReader &reader, Logger &logger, char delim) { Variant::arrayType res; bool hadError = false; @@ -436,7 +438,7 @@ std::pair<bool, Variant::arrayType> Reader::parseArray( // Iterate over the characters, use the parseGeneric function to read the // pairs - while (reader.peek(&c)) { + while (reader.peek(c)) { // Generically handle the end of the array if (state != STATE_INIT && c == delim) { reader.consumePeek(); @@ -474,7 +476,7 @@ std::pair<bool, Variant::arrayType> Reader::parseArray( } else if (!Utils::isWhitespace(c)) { hadError = true; state = STATE_RESYNC; - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + logger.error(ERR_UNEXPECTED_CHAR, reader); } reader.consumePeek(); break; @@ -490,8 +492,8 @@ std::pair<bool, Variant::arrayType> Reader::parseArray( return error(reader, logger, ERR_UNEXPECTED_END, res); } -std::pair<bool, std::string> Reader::parseUnescapedString( - BufferedCharReader &reader, Logger &logger, +std::pair<bool, std::string> VariantReader::parseUnescapedString( + CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { std::stringstream res; @@ -503,7 +505,7 @@ std::pair<bool, std::string> Reader::parseUnescapedString( // Copy all characters, skip whitespace at the end int state = STATE_IN_STRING; - while (reader.peek(&c)) { + while (reader.peek(c)) { if (delims.count(c)) { reader.resetPeek(); return std::make_pair(true, res.str()); @@ -527,8 +529,8 @@ std::pair<bool, std::string> Reader::parseUnescapedString( return std::make_pair(true, res.str()); } -std::pair<bool, int64_t> Reader::parseInteger( - BufferedCharReader &reader, Logger &logger, +std::pair<bool, int64_t> VariantReader::parseInteger( + CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { Number n; @@ -544,8 +546,8 @@ std::pair<bool, int64_t> Reader::parseInteger( return std::make_pair(false, n.intValue()); } -std::pair<bool, double> Reader::parseDouble( - BufferedCharReader &reader, Logger &logger, +std::pair<bool, double> VariantReader::parseDouble( + CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { Number n; @@ -553,15 +555,15 @@ std::pair<bool, double> Reader::parseDouble( return std::make_pair(res, n.doubleValue()); } -std::pair<bool, Variant> Reader::parseGeneric( - BufferedCharReader &reader, Logger &logger, +std::pair<bool, Variant> VariantReader::parseGeneric( + CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { char c; // Skip all whitespace characters reader.consumeWhitespace(); - while (reader.peek(&c)) { + while (reader.peek(c)) { // Stop if a delimiter is reached if (delims.count(c)) { return error(reader, logger, ERR_UNEXPECTED_END, nullptr); @@ -577,11 +579,19 @@ std::pair<bool, Variant> Reader::parseGeneric( // TODO: Parse struct descriptor } - // Try to parse a number if a character in [0-9-] is reached + // Try to parse everything that looks like a number as number if (Utils::isNumeric(c) || c == '-') { - reader.resetPeek(); Number n; - if (n.parse(reader, logger, delims)) { + + // Fork the reader + CharReaderFork fork = reader.fork(); + + // TODO: Fork logger + + // Try to parse the number + if (n.parse(fork, logger, delims)) { + // Parsing was successful, advance the reader + fork.commit(); if (n.isInt()) { return std::make_pair( true, @@ -589,8 +599,6 @@ std::pair<bool, Variant> Reader::parseGeneric( } else { return std::make_pair(true, n.doubleValue()); } - } else { - return std::make_pair(false, n.doubleValue()); } } @@ -614,5 +622,4 @@ std::pair<bool, Variant> Reader::parseGeneric( return error(reader, logger, ERR_UNEXPECTED_END, nullptr); } } -} diff --git a/src/core/variant/Reader.hpp b/src/core/common/VariantReader.hpp index 710f7c4..5e7c5d2 100644 --- a/src/core/variant/Reader.hpp +++ b/src/core/common/VariantReader.hpp @@ -17,7 +17,7 @@ */ /** - * @file Reader.hpp + * @file VariantReader.hpp * * Provides parsers for various micro formats. These formats include integers, * doubles, strings, JSON and the Ousía struct notation. @@ -32,23 +32,21 @@ #include <unordered_set> #include <utility> -#include <core/BufferedCharReader.hpp> -#include <core/Logger.hpp> - +#include "CharReader.hpp" +#include "Logger.hpp" #include "Variant.hpp" namespace ousia { -namespace variant { -class Reader { +class VariantReader { private: /** * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. * - * @param reader is a reference to the BufferedCharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting * character. * @param logger is the logger instance that should be used to log error * messages and warnings. @@ -58,7 +56,7 @@ private: * is read. */ static std::pair<bool, std::string> parseString( - BufferedCharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> *delims); public: @@ -66,9 +64,9 @@ public: * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. * - * @param reader is a reference to the BufferedCharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting * character. * @param logger is the logger instance that should be used to log error * messages and warnings. @@ -77,93 +75,92 @@ public: * outside). */ static std::pair<bool, std::string> parseString( - BufferedCharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims) { - return parseString(reader, logger, &delims); + return parseString(VariantReader, logger, &delims); } /** * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. * - * @param reader is a reference to the BufferedCharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting * character. * @param logger is the logger instance that should be used to log error * messages and warnings. */ - static std::pair<bool, std::string> parseString(BufferedCharReader &reader, + static std::pair<bool, std::string> parseString(CharReader &VariantReader, Logger &logger) { - return parseString(reader, logger, nullptr); + return parseString(VariantReader, logger, nullptr); } /** - * Extracts an unescaped string from the given buffered char reader + * Extracts an unescaped string from the given buffered char VariantReader * instance. This function just reads text until one of the given delimiter * characters is reached. * - * @param reader is a reference to the BufferedCharReader instance which is - * the source for the character data. The reader will be positioned at the - * terminating delimiting character. + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. * @param delims is a set of characters which will terminate the string. * These characters are not included in the result. May not be nullptr. */ static std::pair<bool, std::string> parseUnescapedString( - BufferedCharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); /** - * Parses an integer from the given buffered char reader instance until one - * of the given delimiter characters is reached. + * Parses an integer from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. * - * @param reader is a reference to the BufferedCharReader instance from - * which the character data should been reader. The reader will be - * positioned at the terminating delimiting character or directly after the - * integer. + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. */ static std::pair<bool, int64_t> parseInteger( - BufferedCharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); /** - * Parses an double from the given buffered char reader instance until one - * of the given delimiter characters is reached. + * Parses an double from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. * - * @param reader is a reference to the BufferedCharReader instance from - * which the character data should been reader. The reader will be - * positioned at the terminating delimiting character or directly after the - * integer. + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. */ static std::pair<bool, double> parseDouble( - BufferedCharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); /** * Parses an array of values. */ static std::pair<bool, Variant::arrayType> parseArray( - BufferedCharReader &reader, Logger &logger, char delim = 0); + CharReader &VariantReader, Logger &logger, char delim = 0); /** * Tries to parse the most specific item from the given stream until one of * the given delimiters is reached or a meaningful literal has been read. * The resulting variant represents the value that has been read. * - * @param reader is a reference to the BufferedCharReader instance which is - * the source for the character data. The reader will be positioned at the - * terminating delimiting character. + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. * @param delims is a set of characters which will terminate the string. * These characters are not included in the result. May not be nullptr. */ static std::pair<bool, Variant> parseGeneric( - BufferedCharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); }; } -} #endif /* _OUSIA_VARIANT_READER_HPP_ */ diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index 5dac956..e155cfd 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -32,10 +32,10 @@ #include <set> #include <string> -#include <core/Exceptions.hpp> #include <core/Node.hpp> -#include <core/Logger.hpp> #include <core/Registry.hpp> +#include <core/common/Exceptions.hpp> +#include <core/common/Logger.hpp> #include "Scope.hpp" diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp index dca7f35..5e801ee 100644 --- a/src/core/parser/ParserStack.cpp +++ b/src/core/parser/ParserStack.cpp @@ -20,8 +20,8 @@ #include "ParserStack.hpp" -#include <core/Utils.hpp> -#include <core/Exceptions.hpp> +#include <core/common/Utils.hpp> +#include <core/common/Exceptions.hpp> namespace ousia { namespace parser { diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp index c5ed4e4..233f4f9 100644 --- a/src/core/parser/ParserStack.hpp +++ b/src/core/parser/ParserStack.hpp @@ -37,7 +37,7 @@ #include <stack> #include <vector> -#include <core/variant/Variant.hpp> +#include <core/common/Variant.hpp> #include "Parser.hpp" diff --git a/src/plugins/css/CSSParser.cpp b/src/plugins/css/CSSParser.cpp index 4bbcc18..5985047 100644 --- a/src/plugins/css/CSSParser.cpp +++ b/src/plugins/css/CSSParser.cpp @@ -18,7 +18,7 @@ #include "CSSParser.hpp" -#include <core/variant/Reader.hpp> +#include <core/common/VariantReader.hpp> namespace ousia { namespace parser { @@ -77,7 +77,7 @@ static const std::map<int, CodeTokenDescriptor> CSS_DESCRIPTORS = { Rooted<Node> CSSParser::parse(std::istream &is, ParserContext &ctx) { - BufferedCharReader input{is}; + CharReader input{is}; CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS}; tokenizer.ignoreComments = true; tokenizer.ignoreLinebreaks = true; @@ -227,13 +227,14 @@ Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer, // parse the argument list. Variant::arrayType args; // we require at least one argument, if parantheses are used - args.push_back(variant::Reader::parseGeneric(tokenizer.getInput(), + // XXX + args.push_back(VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, {',', ')'}).second); while (expect(COMMA, tokenizer, t, false, ctx)) { // as long as we find commas we expect new arguments. args.push_back( - variant::Reader::parseGeneric( + VariantReader::parseGeneric( tokenizer.getInput(), ctx.logger, {',', ')'}).second); } expect(PAREN_CLOSE, tokenizer, t, true, ctx); @@ -333,7 +334,7 @@ bool CSSParser::parseRule(CodeTokenizer &tokenizer, ParserContext &ctx, expect(COLON, tokenizer, t, true, ctx); // then the value // TODO: Resolve key for appropriate parsing function here. - value = variant::Reader::parseGeneric(tokenizer.getInput(), ctx.logger, + value = VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, {';'}).second; // and a ; expect(SEMICOLON, tokenizer, t, true, ctx); diff --git a/src/plugins/css/CSSParser.hpp b/src/plugins/css/CSSParser.hpp index 82f0cd1..eeb5b2c 100644 --- a/src/plugins/css/CSSParser.hpp +++ b/src/plugins/css/CSSParser.hpp @@ -22,9 +22,9 @@ #include <vector> #include <utility> -#include <core/BufferedCharReader.hpp> #include <core/CodeTokenizer.hpp> #include <core/CSS.hpp> +#include <core/common/CharReader.hpp> #include <core/parser/Parser.hpp> namespace ousia { diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp index ce2857e..824219a 100644 --- a/src/plugins/xml/XmlParser.cpp +++ b/src/plugins/xml/XmlParser.cpp @@ -20,7 +20,7 @@ #include <expat.h> -#include <core/Utils.hpp> +#include <core/common/Utils.hpp> #include <core/parser/ParserStack.hpp> #include "XmlParser.hpp" @@ -204,11 +204,15 @@ Rooted<Node> XmlParser::parse(std::istream &is, ParserContext &ctx) // Parse the data and handle any XML error if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) { - const int line = XML_GetCurrentLineNumber(&p); - const int column = XML_GetCurrentColumnNumber(&p); + const TextCursor::PosType line = + static_cast<TextCursor::PosType>(XML_GetCurrentLineNumber(&p)); + const TextCursor::PosType column = static_cast<TextCursor::PosType>( + XML_GetCurrentColumnNumber(&p)); + const size_t offs = XML_GetCurrentByteIndex(&p); const XML_Error code = XML_GetErrorCode(&p); const std::string msg = std::string{XML_ErrorString(code)}; - throw ParserException{"XML Syntax Error: " + msg, line, column}; + throw ParserException{"XML Syntax Error: " + msg, line, column, + offs}; } // Abort once there are no more bytes in the stream |