diff options
Diffstat (limited to 'src/core/common')
-rw-r--r-- | src/core/common/CharReader.cpp | 640 | ||||
-rw-r--r-- | src/core/common/CharReader.hpp | 665 | ||||
-rw-r--r-- | src/core/common/Exceptions.cpp | 46 | ||||
-rw-r--r-- | src/core/common/Exceptions.hpp | 162 | ||||
-rw-r--r-- | src/core/common/Logger.cpp | 161 | ||||
-rw-r--r-- | src/core/common/Logger.hpp | 609 | ||||
-rw-r--r-- | src/core/common/Utils.cpp | 59 | ||||
-rw-r--r-- | src/core/common/Utils.hpp | 110 | ||||
-rw-r--r-- | src/core/common/Variant.cpp | 154 | ||||
-rw-r--r-- | src/core/common/Variant.hpp | 761 | ||||
-rw-r--r-- | src/core/common/VariantReader.cpp | 625 | ||||
-rw-r--r-- | src/core/common/VariantReader.hpp | 166 |
12 files changed, 4158 insertions, 0 deletions
diff --git a/src/core/common/CharReader.cpp b/src/core/common/CharReader.cpp new file mode 100644 index 0000000..373c0c1 --- /dev/null +++ b/src/core/common/CharReader.cpp @@ -0,0 +1,640 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <algorithm> +#include <cassert> +#include <limits> +#include <sstream> + +#include "CharReader.hpp" +#include "Utils.hpp" + +namespace ousia { + +/* Helper functions */ + +/** + * istreamReadCallback is used internally by the Buffer calss to stream data + * from an input stream. + * + * @param buf is points a the target memory region. + * @param size is the requested number of bytes. + * @param userData is a pointer at some user defined data. + * @return the actual number of bytes read. If the result is smaller than + * the requested size, this tells the Buffer that the end of the input + * stream is reached. + */ +static size_t istreamReadCallback(char *buf, size_t size, void *userData) +{ + return (static_cast<std::istream *>(userData))->read(buf, size).gcount(); +} + +/* Class Buffer */ + +Buffer::Buffer(ReadCallback callback, void *userData) + : callback(callback), + userData(userData), + reachedEnd(false), + startBucket(buckets.end()), + endBucket(buckets.end()), + startOffset(0), + firstDead(0) +{ + // Load a first block of data from the stream + stream(); + startBucket = buckets.begin(); +} + +Buffer::Buffer(std::istream &istream) : Buffer(istreamReadCallback, &istream) {} + +Buffer::Buffer(const std::string &str) + : callback(nullptr), + userData(nullptr), + reachedEnd(true), + startBucket(buckets.end()), + endBucket(buckets.end()), + startOffset(0), + firstDead(0) +{ + // Copy the given string into a first buffer and set the start buffer + // correctly + Bucket &bucket = nextBucket(); + bucket.resize(str.size()); + std::copy(str.begin(), str.end(), bucket.begin()); + startBucket = buckets.begin(); +} + +#ifndef NDEBUG +Buffer::~Buffer() +{ + // Make sure all cursors have been deleted + for (bool cursor_alive: alive) { + assert(!cursor_alive); + } +} +#endif + +void Buffer::advance(BucketIterator &it) +{ + it++; + if (it == buckets.end()) { + it = buckets.begin(); + } +} + +void Buffer::advance(BucketList::const_iterator &it) const +{ + it++; + if (it == buckets.cend()) { + it = buckets.cbegin(); + } +} + +Buffer::Bucket &Buffer::nextBucket() +{ + constexpr size_t MAXVAL = std::numeric_limits<size_t>::max(); + + // Fetch the minimum bucket index + size_t minBucketIdx = MAXVAL; + for (size_t i = 0; i < cursors.size(); i++) { + if (alive[i]) { + // Fetch references to the bucket and the cursor + const Cursor &cur = cursors[i]; + const Bucket &bucket = *(cur.bucket); + + // Increment the bucket index by one, if the cursor is at the end + // of the bucket (only valid if the LOOKBACK_SIZE is set to zero) + size_t bIdx = cur.bucketIdx; + if (LOOKBACK_SIZE == 0 && cur.bucketOffs == bucket.size()) { + bIdx++; + } + + // Decrement the bucket index by one, if the previous bucket still + // needs to be reached and cannot be overridden + if (bIdx > 0 && cur.bucketOffs < LOOKBACK_SIZE) { + bIdx--; + } + + // Set the bucket index to the minium + minBucketIdx = std::min(minBucketIdx, bIdx); + } + } + + // If there is space between the current start bucket and the read + // cursor, the start bucket can be safely overridden. + if (minBucketIdx > 0 && minBucketIdx != MAXVAL) { + // All cursor bucket indices will be decreased by one + for (size_t i = 0; i < cursors.size(); i++) { + cursors[i].bucketIdx--; + } + + // Increment the start offset + startOffset += startBucket->size(); + + // The old start bucket is the new end bucket + endBucket = startBucket; + + // Advance the start bucket, wrap around at the end of the list + advance(startBucket); + } else { + // No free bucket, insert a new one before the start bucket + endBucket = buckets.emplace(startBucket); + } + return *endBucket; +} + +Buffer::CursorId Buffer::nextCursor() +{ + bool hasCursor = false; + CursorId res = 0; + + // Search for the next free cursor starting with minNextCursorId + for (size_t i = firstDead; i < alive.size(); i++) { + if (!alive[i]) { + res = i; + hasCursor = true; + break; + } + } + + // Add a new cursor to the cursor list if no cursor is currently free + if (!hasCursor) { + res = cursors.size(); + cursors.resize(res + 1); + alive.resize(res + 1); + } + + // The next dead cursor is at least the next cursor + firstDead = res + 1; + + // Mark the new cursor as alive + alive[res] = true; + + return res; +} + +void Buffer::stream() +{ + // Fetch the bucket into which the data should be inserted, make sure it + // has the correct size + Bucket &tar = nextBucket(); + tar.resize(REQUEST_SIZE); + + // Read data from the stream into the target buffer + size_t size = callback(tar.data(), REQUEST_SIZE, userData); + + // If not enough bytes were returned, we're at the end of the stream + if (size < REQUEST_SIZE) { + tar.resize(size); + reachedEnd = true; + } +} + +Buffer::CursorId Buffer::createCursor() +{ + CursorId res = nextCursor(); + cursors[res].bucket = startBucket; + cursors[res].bucketIdx = 0; + cursors[res].bucketOffs = 0; + return res; +} + +Buffer::CursorId Buffer::createCursor(Buffer::CursorId ref) +{ + CursorId res = nextCursor(); + cursors[res] = cursors[ref]; + return res; +} + +void Buffer::copyCursor(Buffer::CursorId from, Buffer::CursorId to) +{ + cursors[to] = cursors[from]; +} + +void Buffer::deleteCursor(Buffer::CursorId cursor) +{ + alive[cursor] = false; + firstDead = std::min(firstDead, cursor); +} + +size_t Buffer::offset(Buffer::CursorId cursor) const +{ + const Cursor &cur = cursors[cursor]; + size_t offs = startOffset + cur.bucketOffs; + BucketList::const_iterator it = startBucket; + while (it != cur.bucket) { + offs += it->size(); + advance(it); + } + return offs; +} + +size_t Buffer::moveForward(CursorId cursor, size_t relativeOffs) +{ + size_t offs = relativeOffs; + Cursor &cur = cursors[cursor]; + while (offs > 0) { + // Fetch the current bucket of the cursor + Bucket &bucket = *(cur.bucket); + + // If there is enough space in the bucket, simply increment the bucket + // offset by the given relative offset + const size_t space = bucket.size() - cur.bucketOffs; + if (space >= offs) { + cur.bucketOffs += offs; + break; + } else { + // Go to the end of the current bucket otherwise + offs -= space; + cur.bucketOffs = bucket.size(); + + // Go to the next bucket + if (cur.bucket != endBucket) { + // Go to the next bucket + advance(cur.bucket); + cur.bucketIdx++; + cur.bucketOffs = 0; + } else { + // Abort, if there is no more data to stream, otherwise just + // load new data + if (reachedEnd) { + return relativeOffs - offs; + } + stream(); + } + } + } + return relativeOffs; +} + +size_t Buffer::moveBackward(CursorId cursor, size_t relativeOffs) +{ + size_t offs = relativeOffs; + Cursor &cur = cursors[cursor]; + while (offs > 0) { + // If there is enough space in the bucket, simply decrement the bucket + // offset by the given relative offset + if (cur.bucketOffs >= offs) { + cur.bucketOffs -= offs; + break; + } else { + // Go to the beginning of the current bucket otherwise + offs -= cur.bucketOffs; + cur.bucketOffs = 0; + + // Abort if there is no more bucket to got back to + if (cur.bucketIdx == 0) { + return relativeOffs - offs; + } + + // Go to the previous bucket (wrap around at the beginning of the + // list) + if (cur.bucket == buckets.begin()) { + cur.bucket = buckets.end(); + } + cur.bucket--; + + // Decrement the bucket index, and set the current offset to the + // end of the new bucket + cur.bucketIdx--; + cur.bucketOffs = cur.bucket->size(); + } + } + return relativeOffs; +} + +ssize_t Buffer::moveCursor(CursorId cursor, ssize_t relativeOffs) +{ + if (relativeOffs > 0) { + return moveForward(cursor, relativeOffs); + } else if (relativeOffs < 0) { + return -moveBackward(cursor, -relativeOffs); + } else { + return 0; + } +} + +bool Buffer::atEnd(Buffer::CursorId cursor) const +{ + const Cursor &c = cursors[cursor]; + return reachedEnd && + (c.bucket == endBucket && c.bucketOffs == endBucket->size()); +} + +bool Buffer::fetchCharacter(CursorId cursor, char &c, bool incr) +{ + Cursor &cur = cursors[cursor]; + while (true) { + // Reference at the current bucket + Bucket &bucket = *(cur.bucket); + + // If there is still data in the current bucket, return this data + if (cur.bucketOffs < bucket.size()) { + c = bucket[cur.bucketOffs]; + if (incr) { + cur.bucketOffs++; + } + return true; + } else if (cur.bucket == endBucket) { + // Return false if the end of the stream has been reached, otherwise + // load new data + if (reachedEnd) { + return false; + } + stream(); + } + + // Go to the next bucket + cur.bucketIdx++; + cur.bucketOffs = 0; + advance(cur.bucket); + } +} + +bool Buffer::read(Buffer::CursorId cursor, char &c) +{ + return fetchCharacter(cursor, c, true); +} + +bool Buffer::fetch(CursorId cursor, char &c) +{ + return fetchCharacter(cursor, c, false); +} + +/* CharReader::Cursor class */ + +void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer, + CharReader::Cursor &cursor) +{ + // Copy the cursor position + buffer->copyCursor(cursor.cursor, this->cursor); + + // Copy the state + line = cursor.line; + column = cursor.column; +} + +/* CharReader class */ + +CharReader::CharReader(std::shared_ptr<Buffer> buffer, size_t line, + size_t column) + : buffer(buffer), + readCursor(buffer->createCursor(), line, column), + peekCursor(buffer->createCursor(), line, column), + coherent(true) +{ +} + +CharReader::CharReader(const std::string &str, size_t line, size_t column) + : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, line, column) +{ +} + +CharReader::CharReader(std::istream &istream, size_t line, size_t column) + : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, line, column) +{ +} + +CharReader::~CharReader() +{ + buffer->deleteCursor(readCursor.cursor); + buffer->deleteCursor(peekCursor.cursor); +} + +bool CharReader::readAtCursor(Cursor &cursor, char &c) +{ + // Return false if we're at the end of the stream + if (!buffer->read(cursor.cursor, c)) { + return false; + } + + // Substitute linebreak sequences with a single '\n' + if (c == '\n' || c == '\r') { + // Output a single \n + c = '\n'; + + // Check whether the next character is a continuation of the + // current character + char c2; + if (buffer->read(cursor.cursor, c2)) { + if ((c2 != '\n' && c2 != '\r') || c2 == c) { + buffer->moveCursor(cursor.cursor, -1); + } + } + } + + // Count lines and columns + if (c == '\n') { + // A linebreak was reached, go to the next line + cursor.line++; + cursor.column = 1; + } else { + // Ignore UTF-8 continuation bytes + if (!((c & 0x80) && !(c & 0x40))) { + cursor.column++; + } + } + return true; +} + +bool CharReader::peek(char &c) +{ + // If the reader was coherent, update the peek cursor state + if (coherent) { + peekCursor.assign(buffer, readCursor); + coherent = false; + } + + // Read a character from the peek cursor + return readAtCursor(peekCursor, c); +} + +bool CharReader::read(char &c) +{ + // Read a character from the buffer at the current read cursor + bool res = readAtCursor(readCursor, c); + + // Set the peek position to the current read position, if reading was not + // coherent + if (!coherent) { + peekCursor.assign(buffer, readCursor); + coherent = true; + } else { + buffer->copyCursor(readCursor.cursor, peekCursor.cursor); + } + + // Return the result of the read function + return res; +} + +void CharReader::resetPeek() +{ + if (!coherent) { + peekCursor.assign(buffer, readCursor); + coherent = true; + } +} + +void CharReader::consumePeek() +{ + if (!coherent) { + readCursor.assign(buffer, peekCursor); + coherent = true; + } +} + +bool CharReader::consumeWhitespace() +{ + char c; + while (peek(c)) { + if (!Utils::isWhitespace(c)) { + resetPeek(); + return true; + } + consumePeek(); + } + return false; +} + +CharReaderFork CharReader::fork() +{ + return CharReaderFork(buffer, readCursor, peekCursor, coherent); +} + +CharReader::Context CharReader::getContext(ssize_t maxSize) +{ + // Clone the current read cursor + Buffer::CursorId cur = buffer->createCursor(readCursor.cursor); + + // Fetch the start position of the search + ssize_t offs = buffer->offset(cur); + ssize_t start = offs; + ssize_t end = offs; + char c; + + // Search the beginning of the line with the last non-whitespace character + bool hadNonWhitespace = false; + bool foundBegin = false; + for (ssize_t i = 0; i < maxSize; i++) { + // Fetch the character at the current position + if (buffer->fetch(cur, c)) { + // Abort, at linebreaks if we found a non-linebreak character + hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c); + if (hadNonWhitespace && (c == '\n' || c == '\r')) { + buffer->moveCursor(cur, 1); + start++; + foundBegin = true; + break; + } + } + if (buffer->moveCursor(cur, -1) == 0) { + foundBegin = true; + break; + } else { + // Update the start position and the hadNonWhitespace flag + start--; + } + } + + // Search the end of the line + buffer->moveCursor(cur, offs - start); + bool foundEnd = false; + for (ssize_t i = 0; i < maxSize; i++) { + // Increment the end counter if a character was read, abort if the end + // of the stream has been reached + if (buffer->read(cur, c)) { + end++; + } else { + foundEnd = true; + break; + } + + // Abort on linebreak characters + if (c == '\n' || c == '\r') { + foundEnd = true; + break; + } + } + + // Calculate the truncated start and end position and limit the number of + // characters to the maximum number of characters + ssize_t tStart = start; + ssize_t tEnd = end; + if (tEnd - tStart > maxSize) { + tStart = std::max(offs - maxSize / 2, tStart); + tEnd = tStart + maxSize; + } + + // Try to go to the calculated start position and fetch the actual start + // position + ssize_t aStart = end + buffer->moveCursor(cur, tStart - end); + if (aStart > tStart) { + tEnd = tEnd + (aStart - tStart); + tStart = aStart; + } + + // Read one line + std::stringstream ss; + size_t relPos = 0; + for (ssize_t i = tStart; i < tEnd; i++) { + if (buffer->read(cur, c)) { + // Break once a linebreak is reached + if (c == '\n' || c == '\r') { + break; + } + + // Add the current character to the output + ss << c; + + // Increment the string-relative offset as long as the original + // offset is not reached in the for loop + if (i < offs) { + relPos++; + } + } + } + + // Delete the newly created cursor + buffer->deleteCursor(cur); + + return CharReader::Context{ss.str(), relPos, !foundBegin || tStart != start, + !foundEnd || tEnd != end}; +} + +/* Class CharReaderFork */ + +CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer, + CharReader::Cursor &parentReadCursor, + CharReader::Cursor &parentPeekCursor, + bool coherent) + : CharReader(buffer, 1, 1), + parentReadCursor(parentReadCursor), + parentPeekCursor(parentPeekCursor) +{ + readCursor.assign(buffer, parentReadCursor); + peekCursor.assign(buffer, parentPeekCursor); + this->coherent = coherent; +} + +void CharReaderFork::commit() +{ + parentReadCursor.assign(buffer, readCursor); + parentPeekCursor.assign(buffer, peekCursor); +} +} + diff --git a/src/core/common/CharReader.hpp b/src/core/common/CharReader.hpp new file mode 100644 index 0000000..3cbe4b4 --- /dev/null +++ b/src/core/common/CharReader.hpp @@ -0,0 +1,665 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file CharReader.hpp + * + * Used within all parsers to read single characters from an underlying stream. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_CHAR_READER_HPP_ +#define _OUSIA_CHAR_READER_HPP_ + +#include <istream> +#include <list> +#include <memory> +#include <vector> + +namespace ousia { + +/** + * A chunked ring buffer used in CharReader to provide access to an input stream + * with multiple read cursors. The Buffer automatically expands to the + * size of the spanned by the read cursors while reusing already allocated + * memory. + */ +class Buffer { +public: + /** + * Callback function which is called whenever new data is requested from the + * input stream. + * + * @param buf is points a the target memory region. + * @param size is the requested number of bytes. + * @param userData is a pointer at some user defined data given in the + * constructor. + * @return the actual number of bytes read. If the result is smaller than + * the requested size, this tells the Buffer that the end of the input + * stream is reached. + */ + using ReadCallback = size_t (*)(char *buf, size_t size, void *userData); + + /** + * Handle used to identify a cursor. + */ + using CursorId = size_t; + +private: + /** + * Number of bytes to request from the input stream. Set to 64 KiB because + * this seems to be a nice value for I/O operations according to multiple + * sources. + */ + static constexpr size_t REQUEST_SIZE = 64 * 1024; + + /** + * Number of bytes the buffer guarantees to be capable of looking back + * for extracting the current context. + */ + static constexpr size_t LOOKBACK_SIZE = 128; + + /** + * Type used internally to represent one chunk of memory. + */ + using Bucket = std::vector<char>; + + /** + * Type used internally to represent a bucket container. + */ + using BucketList = std::list<Bucket>; + + /** + * Type used internally for representing iterators in the bucket list. + */ + using BucketIterator = BucketList::iterator; + + /** + * Type used internally to represent a read cursor. + */ + struct Cursor { + /** + * Iterator pointing at the current bucket. + */ + BucketIterator bucket; + + /** + * Index of the bucket relative to the start bucket. + */ + size_t bucketIdx; + + /** + * Current offset within that bucket. + */ + size_t bucketOffs; + }; + + /** + * List of buckets containing the buffered memory. + */ + BucketList buckets; + + /** + * List of cursors used to access the memory. Note that cursors can be + * marked as inactive and reused lateron (to avoid having to resize the + * vector). + */ + std::vector<Cursor> cursors; + + /** + * Bitfield specifying which of the cursors is actually valid. + */ + std::vector<bool> alive; + + /** + * Function to be called whenever new data is needed. Set to nullptr if the + * Buffer is not backed by an input stream. + */ + const ReadCallback callback; + + /** + * User data given in the constructor. + */ + void *userData; + + /** + * Set to true if the input stream is at its end. + */ + bool reachedEnd; + + /** + * Iterator pointing at the current start bucket. + */ + BucketIterator startBucket; + + /** + * Iterator pointing at the last bucket. + */ + BucketIterator endBucket; + + /** + * Byte offset of the start bucket relative to the beginning of the stream. + */ + size_t startOffset; + + /** + * Points at the smallest possible available cursor index, yet does not + * guarantee that this cursor index actuall is free. + */ + CursorId firstDead; + + /** + * Advances the bucket iterator, cares about wrapping around in the ring. + */ + void advance(BucketIterator &it); + + /** + * Advances the bucket iterator, cares about wrapping around in the ring. + */ + void advance(BucketList::const_iterator &it) const; + + /** + * Internally used to find the next free cursor in the cursors vector. The + * cursor is marked as active. + * + * @return the next free cursor index. + */ + CursorId nextCursor(); + + /** + * Returns a reference at the next bucket into which data should be + * inserted. + * + * @return a bucket into which the data can be inserted. + */ + Bucket &nextBucket(); + + /** + * Reads data from the input stream and places it in the next free buffer. + */ + void stream(); + + /** + * Moves the given cursor forward. + */ + size_t moveForward(CursorId cursor, size_t relativeOffs); + + /** + * Moves the given cursor backward. + */ + size_t moveBackward(CursorId cursor, size_t relativeOffs); + + /** + * Reads a character from the current cursor position and optionally + * advances. + */ + bool fetchCharacter(CursorId cursor, char &c, bool incr); + +public: + /** + * Intializes the Buffer with a reference to a ReadCallback that is used + * to fetch data from an underlying input stream. + * + * @param callback is the function that will be called whenever data is read + * from the ring buffer and the buffer does not hold enough data to fulfill + * this read request. + * @param userData is a pointer to user defined data which will be passed to + * the callback function. + */ + Buffer(ReadCallback callback, void *userData); + + /** + * Initializes the Buffer with a reference to an std::istream from which + * data will be read. + * + * @param istream is the input stream from which the data should be read. + */ + Buffer(std::istream &istream); + + /** + * Initializes the Buffer with the contents of the given string, after + * this operation the Buffer has a fixed size. + * + * @param str is the string containing the data that should be copied into + * the ring buffer. + */ + Buffer(const std::string &str); + +#ifndef NDEBUG + /** + * Destructor of the Buffer class. Makes sure that all cursors have been + * freed. + */ + ~Buffer(); +#endif + + // No copy + Buffer(const Buffer &) = delete; + + // No assign + Buffer &operator=(const Buffer &) = delete; + + /** + * Creates a new read cursor positioned at the smallest possible position + * in the ring buffer. + */ + CursorId createCursor(); + + /** + * Creates a new read cursor positioned at the same position as the given + * read cursor. + * + * @param ref is the read cursor that should be used as reference for the + * new read cursor. + */ + CursorId createCursor(CursorId ref); + + /** + * Copies the position of one cursor to another cursor. + * + * @param from is the cursor id of which the position should be copied. + * @param to is the cursor id to which the position should be copied. + */ + void copyCursor(CursorId from, CursorId to); + + /** + * Deletes the cursor with the given id. The cursor may no longer be used + * after this function has been called. + * + * @param cursor is the id of the cursor that should be freed. + */ + void deleteCursor(CursorId cursor); + + /** + * Moves a cursor by offs bytes. Note that moving backwards is theoretically + * limited by the LOOKBACK_SIZE of the Buffer, practically it will most + * likely be limited by the REQUEST_SIZE, so you can got at most 64 KiB + * backwards. + * + * @param cursor is the cursor that should be moved. + * @param relativeOffs is a positive or negative integer number specifying + * the number of bytes the cursor should be moved forward (positive numbers) + * or backwards (negative numbers). + * @return the actual number of bytes the cursor was moved. This number is + * smaller than the relativeOffs given in the constructor if the + */ + ssize_t moveCursor(CursorId cursor, ssize_t relativeOffs); + + /** + * Returns the current byte offset of the given cursor relative to the + * beginning of the stream. + * + * @param cursor is the cursor for which the byte offset relative to the + * beginning of the stream should be returned. + * @return the number of bytes since the beginning of the stream for the + * given cursor. + */ + size_t offset(CursorId cursor) const; + + /** + * Returns true if the given cursor currently is at the end of the stream. + * + * @param cursor is the cursor for which the atEnd flag should be returned. + * @return true if the there are no more bytes for this cursor. If false + * is returned, this means that there may be more bytes in the stream, + * nevertheless the end of the stream may be hit once the next read function + * is called. + */ + bool atEnd(CursorId cursor) const; + + /** + * Reads a single character from the ring buffer from the given cursor and + * moves to the next character. + * + * @param cursor specifies the cursor from which the data should be read. + * The cursor will be advanced by one byte. + * @param c is the character into which the data needs to be read. + * @return true if a character was read, false if the end of the stream has + * been reached. + */ + bool read(CursorId cursor, char &c); + + /** + * Returns a single character from the ring buffer from the current cursor + * position and stays at that position. + * + * @param cursor specifies the cursor from which the data should be read. + * The cursor will be advanced by one byte. + * @param c is the character into which the data needs to be read. + * @return true if a character could be fetched, false if the end of the + * stream has been reached. + */ + bool fetch(CursorId cursor, char &c); +}; + +// Forward declaration +class CharReaderFork; + +/** + * Used within parsers for convenient access to single characters in an input + * stream or buffer. It allows reading and peeking single characters from a + * buffer. Additionally it counts the current column/row (with correct handling + * for UTF-8) and contains an internal state machine that handles the detection + * of linebreaks and converts these to a single '\n'. + */ +class CharReader { +public: + /** + * The context struct is used to represent the current context the char + * reader is in. This context can for example be used when building error + * messages. + */ + struct Context { + /** + * Set to the content of the current line. + */ + std::string line; + + /** + * Relative position (in characters) within that line. + */ + size_t relPos; + + /** + * Set to true if the beginning of the line has been truncated (because + * the reader position is too far away from the actual position of the + * line). + */ + bool truncatedStart; + + /** + * Set to true if the end of the line has been truncated (because the + * reader position is too far away from the actual end position of the + * line. + */ + bool truncatedEnd; + + Context() + : line(), relPos(0), truncatedStart(false), truncatedEnd(false) + { + } + + Context(std::string line, size_t relPos, bool truncatedStart, + bool truncatedEnd) + : line(std::move(line)), + relPos(relPos), + truncatedStart(truncatedStart), + truncatedEnd(truncatedEnd) + { + } + }; + +protected: + /** + * Internally used cursor structure for managing the read and the peek + * cursor. + */ + struct Cursor { + /** + * Corresponding cursor in the underlying buffer instance. + */ + const Buffer::CursorId cursor; + + /** + * Current line the cursor is in. + */ + uint32_t line; + + /** + * Current column the cursor is in. + */ + uint32_t column; + + /** + * Constructor of the Cursor class. + * + * @param cursor is the underlying cursor in the Buffer instance. + */ + Cursor(Buffer::CursorId cursor, size_t line, size_t column) + : cursor(cursor), line(line), column(column) + { + } + + /** + * Assigns one cursor to another. + * + * @param buffer is the underlying buffer instance the internal cursor + * belongs to. + * @param cursor is the cursor from which the state should be copied. + */ + void assign(std::shared_ptr<Buffer> buffer, Cursor &cursor); + }; + +private: + /** + * Substitutes "\r", "\n\r", "\r\n" with a single "\n". + * + * @param cursor is the cursor from which the character should be read. + * @param c a reference to the character that should be written. + * @return true if another character needs to be read. + */ + bool substituteLinebreaks(Cursor &cursor, char &c); + + /** + * Reads a single character from the given cursor. + * + * @param cursor is the cursor from which the character should be read. + * @param c a reference to the character that should be written. + * @return true if a character was read, false if the end of the stream has + * been reached. + */ + bool readAtCursor(Cursor &cursor, char &c); + +protected: + /** + * Reference pointing at the underlying buffer. + */ + std::shared_ptr<Buffer> buffer; + + /** + * Cursor used for reading. + */ + Cursor readCursor; + + /** + * Cursor used for peeking. + */ + Cursor peekCursor; + + /** + * Set to true as long the underlying Buffer cursor is at the same position + * for the read and the peek cursor. This is only used for optimization + * purposes and makes consecutive reads a bit faster. + */ + bool coherent; + + /** + * Protected constructor of the CharReader base class. Creates new read + * and peek cursors for the given buffer. + * + * @param buffer is a reference to the underlying Buffer class responsible + * for allowing to read from a single input stream from multiple locations. + */ + CharReader(std::shared_ptr<Buffer> buffer, size_t line, size_t column); + +public: + /** + * Creates a new CharReader instance from a string. + * + * @param str is a string containing the input data. + * @param line is the start line. + * @param column is the start column. + */ + CharReader(const std::string &str, size_t line = 1, size_t column = 1); + + /** + * Creates a new CharReader instance for an input stream. + * + * @param istream is the input stream from which incomming data should be + * read. + * @param line is the start line. + * @param column is the start column. + */ + CharReader(std::istream &istream, size_t line = 1, size_t column = 1); + + /** + * Deletes the used cursors from the underlying buffer instance. + */ + ~CharReader(); + + // No copy + CharReader(const Buffer &) = delete; + + // No assign + CharReader &operator=(const Buffer &) = delete; + + /** + * Peeks a single character. If called multiple times, returns the + * character after the previously peeked character. + * + * @param c is a reference to the character to which the result should be + * written. + * @return true if the character was successfully read, false if there are + * no more characters to be read in the buffer. + */ + bool peek(char &c); + + /** + * Reads a character from the input data. If "peek" was called + * beforehand resets the peek pointer. + * + * @param c is a reference to the character to which the result should be + * written. + * @return true if the character was successfully read, false if there are + * no more characters to be read in the buffer. + */ + bool read(char &c); + + /** + * Resets the peek pointer to the "read" pointer. + */ + void resetPeek(); + + /** + * Advances the read pointer to the peek pointer -- so if the "peek" + * function was called, "read" will now return the character after + * the last peeked character. + */ + void consumePeek(); + + /** + * Moves the read cursor to the next non-whitespace character. Returns + * false, if the end of the stream was reached. + * + * @return false if the end of the stream was reached, false othrwise. + */ + bool consumeWhitespace(); + + /** + * Creates a new CharReader located at the same position as this CharReader + * instance, yet the new CharReader can be used independently of this + * CharReader. Use the "commit" function of the returned CharReader to + * copy the state of the forked CharReaderFork to this CharReader. + * + * @return a CharReaderFork instance positioned at the same location as this + * CharReader instance. + */ + CharReaderFork fork(); + + /** + * Returns true if there are no more characters as the stream was + * closed. + * + * @return true if there is no more data. + */ + bool atEnd() const { return buffer->atEnd(readCursor.cursor); } + + /** + * Returns the current line (starting with one). + * + * @return the current line number. + */ + uint32_t getLine() const { return readCursor.line; } + + /** + * Returns the current column (starting with one). + * + * @return the current column number. + */ + uint32_t getColumn() const { return readCursor.column; } + + /** + * Returns the current byte offset of the read cursor. + * + * @return the byte position within the stream. + */ + size_t getOffset() const { return buffer->offset(readCursor.cursor); }; + + /** + * Returns the line the read cursor currently is in, but at most the + * given number of characters in the form of a Context structure. + */ + Context getContext(ssize_t maxSize); +}; + +/** + * A CharReaderFork is returned whenever the "fork" function of the CharReader + * class is used. Its "commit" function can be used to move the underlying + * CharReader instance to the location of the CharReaderFork instance. Otherwise + * the read location of the underlying CharReader is left unchanged. + */ +class CharReaderFork : public CharReader { +private: + friend CharReader; + + /** + * The reader cursor of the underlying CharReader instance. + */ + CharReader::Cursor &parentReadCursor; + + /** + * The peek cursor of the underlying CharReader instance. + */ + CharReader::Cursor &parentPeekCursor; + + /** + * Constructor of the CharReaderFork class. + * + * @param buffer is a reference at the parent Buffer instance. + * @param parentPeekCursor is a reference at the parent read cursor. + * @param parentPeekCursor is a reference at the parent peek cursor. + * @param coherent specifies whether the char reader cursors are initialized + * coherently. + */ + CharReaderFork(std::shared_ptr<Buffer> buffer, + CharReader::Cursor &parentReadCursor, + CharReader::Cursor &parentPeekCursor, bool coherent); + +public: + /** + * Moves the read and peek cursor of the parent CharReader to the location + * of the read and peek cursor in the fork. + */ + void commit(); +}; + +} + +#endif /* _OUSIA_CHAR_READER_HPP_ */ + diff --git a/src/core/common/Exceptions.cpp b/src/core/common/Exceptions.cpp new file mode 100644 index 0000000..d064f35 --- /dev/null +++ b/src/core/common/Exceptions.cpp @@ -0,0 +1,46 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include "Exceptions.hpp" + +namespace ousia { + +/* Class LoggableException */ + +std::string LoggableException::formatMessage(const std::string &msg, + const std::string &file, + int line, int column) +{ + std::stringstream ss; + ss << "error "; + if (!file.empty()) { + ss << "while processing \"" << file << "\" "; + } + if (line >= 0) { + ss << "at line " << line << ", "; + if (column >= 0) { + ss << "column " << column << " "; + } + } + ss << "with message: " << msg; + return ss.str(); +} +} + diff --git a/src/core/common/Exceptions.hpp b/src/core/common/Exceptions.hpp new file mode 100644 index 0000000..00d6106 --- /dev/null +++ b/src/core/common/Exceptions.hpp @@ -0,0 +1,162 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Exceptions.hpp + * + * Describes basic exception classes which are used throughout Ousía. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_EXCEPTIONS_HPP_ +#define _OUSIA_EXCEPTIONS_HPP_ + +namespace ousia { + +/** + * Base exception class all other Ousía exceptions should derive from. + */ +class OusiaException : public std::exception { +private: + /** + * Error message which will be printed by the runtime environment if the + * exception is not caught and handled in the code. + */ + const std::string formatedMessage; + +public: + /** + * Constructor of the OusiaException class. + * + * @param formatedMessage is a formated message that should be printed by + * the runtime environment if the exception is not caught. + */ + OusiaException(std::string formatedMessage) + : formatedMessage(std::move(formatedMessage)) + { + } + + /** + * Virtual destructor. + */ + virtual ~OusiaException() {} + + /** + * Implementation of the std::exception what function and used to retrieve + * the error message that should be printed by the runtime environment. + * + * @return a reference to the formated message string given in the + * constructor. + */ + const char *what() const noexcept override + { + return formatedMessage.c_str(); + } +}; + +/** + * Exception class which can be directly passed to a Logger instance and thus + * makes it simple to handle non-recoverable errors in the code. + */ +class LoggableException : public OusiaException { +private: + /** + * Function used internally to build the formated message that should be + * reported to the runtime environment. + */ + static std::string formatMessage(const std::string &msg, + const std::string &file, int line, + int column); + +public: + /** + * Message describing the error that occured. + */ + const std::string msg; + + /** + * Name of the file in which the error occured. May be empty. + */ + const std::string file; + + /** + * Line at which the exception occured. Negative values are ignored. + */ + const int line; + + /** + * Column at which the exception occured. Negative values are ignored. + */ + const int column; + + /** + * Constructor of the LoggableException class. + * + * @param msg contains the error message. + * @param file provides the context the message refers to. May be empty. + * @param line is the line in the above file the message refers to. + * @param column is the column in the above file the message refers to. + */ + LoggableException(std::string msg, std::string file, int line = -1, + int column = -1) + : OusiaException(formatMessage(msg, file, line, column)), + msg(std::move(msg)), + file(std::move(file)), + line(line), + column(column) + { + } + + /** + * Constructor of the LoggableException class with empty file. + * + * @param msg contains the error message. + * @param line is the line in the above file the message refers to. + * @param column is the column in the above file the message refers to. + */ + LoggableException(std::string msg, int line = -1, int column = -1) + : OusiaException(formatMessage(msg, "", line, column)), + msg(std::move(msg)), + line(line), + column(column) + { + } + + /** + * Constructor of the LoggableException class with empty file and an + * position object. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template <class PosType> + LoggableException(std::string msg, const PosType &pos) + : OusiaException( + formatMessage(msg, "", pos.getLine(), pos.getColumn())), + msg(std::move(msg)), + line(pos.getLine()), + column(pos.getColumn()) + { + } +}; +} + +#endif /* _OUSIA_EXCEPTIONS_HPP_ */ + diff --git a/src/core/common/Logger.cpp b/src/core/common/Logger.cpp new file mode 100644 index 0000000..17f55a6 --- /dev/null +++ b/src/core/common/Logger.cpp @@ -0,0 +1,161 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <iostream> +#include <sstream> + +#include "Logger.hpp" + +namespace ousia { + +/* Class Logger */ + +void Logger::log(Severity severity, const std::string &msg, + const std::string &file, int line, int column) +{ + // Copy the current severity level + if (static_cast<int>(severity) > static_cast<int>(maxEncounteredSeverity)) { + maxEncounteredSeverity = severity; + } + + // Call the actual log message function if the severity is larger or equal + // to the minimum severity + if (static_cast<int>(severity) >= static_cast<int>(minSeverity)) { + process(Message{severity, msg, file, line, column}); + } +} + +unsigned int Logger::pushFilename(const std::string &name) +{ + filenameStack.push(name); + return filenameStack.size(); +} + +unsigned int Logger::popFilename() +{ + filenameStack.pop(); + return filenameStack.size(); +} + +void Logger::unwindFilenameStack(unsigned int pos) +{ + while (filenameStack.size() > pos && !filenameStack.empty()) { + filenameStack.pop(); + } +} + +/* Class TerminalLogger */ + +/** + * Small class used internally for formated terminal output using ANSI/VT100 + * escape codes on supported terminals. + * + * TODO: Deactivate if using windows or use the corresponding API function. + */ +class Terminal { +private: + /** + * If set to false, no control codes are generated. + */ + bool active; + +public: + static const int BLACK = 30; + static const int RED = 31; + static const int GREEN = 32; + static const int YELLOW = 33; + static const int BLUE = 34; + static const int MAGENTA = 35; + static const int CYAN = 36; + static const int WHITE = 37; + + Terminal(bool active) : active(active) {} + + std::string color(int color, bool bright = true) const + { + if (!active) { + return std::string{}; + } + std::stringstream ss; + ss << "\x1b["; + if (bright) { + ss << "1;"; + } + ss << color << "m"; + return ss.str(); + } + + std::string reset() const + { + if (!active) { + return std::string{}; + } + return "\x1b[0m"; + } +}; + +void TerminalLogger::process(const Message &msg) +{ + Terminal t(useColor); + + // Print the file name + if (msg.hasFile()) { + os << t.color(Terminal::WHITE, true) << msg.file << t.reset(); + } + + // Print line and column number + if (msg.hasLine()) { + if (msg.hasFile()) { + os << ':'; + } + os << t.color(Terminal::WHITE, true) << msg.line + << t.reset(); + if (msg.hasColumn()) { + os << ':' << msg.column; + } + } + + // Print the optional seperator + if (msg.hasFile() || msg.hasLine()) { + os << ": "; + } + + // Print the severity + switch (msg.severity) { + case Severity::DEBUG: + break; + case Severity::NOTE: + os << t.color(Terminal::CYAN, true) << "note: "; + break; + case Severity::WARNING: + os << t.color(Terminal::MAGENTA, true) << "warning: "; + break; + case Severity::ERROR: + os << t.color(Terminal::RED, true) << "error: "; + break; + case Severity::FATAL_ERROR: + os << t.color(Terminal::RED, true) << "fatal: "; + break; + } + os << t.reset(); + + // Print the actual message + os << msg.msg << std::endl; +} +} + diff --git a/src/core/common/Logger.hpp b/src/core/common/Logger.hpp new file mode 100644 index 0000000..e6b97f4 --- /dev/null +++ b/src/core/common/Logger.hpp @@ -0,0 +1,609 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Logger.hpp + * + * Contains classes for logging messages in Ousía. Provides a generic Logger + * class, and TerminalLogger, an extension of Logger which logs do an output + * stream. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_LOGGER_HPP_ +#define _OUSIA_LOGGER_HPP_ + +#include <ostream> +#include <stack> +#include <string> +#include <vector> + +#include "Exceptions.hpp" + +namespace ousia { + +/** + * Enum containing the severities used for logging errors and debug messages. + */ +enum class Severity : int { + /** + * Indicates that this message was only printed for debugging. Note that + * in release builds messages with this severity are discarded. + */ + DEBUG = 0, + + /** + * A message which might provide additional information to the user. + */ + NOTE = 1, + + /** + * A message which warns of possible mistakes by the user which might not be + * actual errors but may lead to unintended behaviour. + */ + WARNING = 2, + + /** + * An error occurred while processing, however program execution continues, + * trying to deal with the error situation (graceful degradation). However, + * messages with this severity may be followed up by fatal errors. + */ + ERROR = 3, + + /** + * A fatal error occurred. Program execution cannot continue. + */ + FATAL_ERROR = 4 +}; + +#ifdef NDEBUG +static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::NOTE; +#else +static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::DEBUG; +#endif + +/** + * The Logger class is the base class the individual logging systems should + * derive from. It provides a simple interface for logging errors, warnings and + * notes and filters these according to the set minimum severity. Additionally + * a stack of file names is maintained in order to allow simple descent into + * included files. Note however, that this base Logger class simply discards the + * incomming log messages. Use one of the derived classes to actually handle the + * log messages. + */ +class Logger { +public: + /** + * The message struct represents a single log message and all information + * attached to it. + */ + struct Message { + /** + * Severity of the log message. + */ + Severity severity; + + /** + * Actual log message. + */ + std::string msg; + + /** + * Refers to the file which provides the context for this error message. + * May be empty. + */ + std::string file; + + /** + * Line in the above file the error message refers to. Ignored if + * smaller than zero. + */ + int line; + + /** + * Column in the above file the error message refers to. Ignored if + * smaller than zero. + */ + int column; + + /** + * Constructor of the Message struct. + * + * @param severity describes the message severity. + * @param msg contains the actual message. + * @param file provides the context the message refers to. May be empty. + * @param line is the line in the above file the message refers to. + * @param column is the column in the above file the message refers to. + */ + Message(Severity severity, std::string msg, std::string file, int line, + int column) + : severity(severity), + msg(std::move(msg)), + file(std::move(file)), + line(line), + column(column){}; + + /** + * Returns true if the file string is set. + * + * @return true if the file string is set. + */ + bool hasFile() const { return !file.empty(); } + + /** + * Returns true if the line is set. + * + * @return true if the line number is a non-negative integer. + */ + bool hasLine() const { return line >= 0; } + + /** + * Returns true if column and line are set (since a column has no + * significance without a line number). + * + * @return true if line number and column number are non-negative + * integers. + */ + bool hasColumn() const { return hasLine() && column >= 0; } + }; + +private: + /** + * Minimum severity a log message should have before it is discarded. + */ + Severity minSeverity; + + /** + * Maximum encountered log message severity. + */ + Severity maxEncounteredSeverity; + + /** + * Stack containing the current file names that have been processed. + */ + std::stack<std::string> filenameStack; + +protected: + /** + * Function to be overriden by child classes to actually display or store + * the messages. The default implementation just discards all incomming + * messages. + * + * @param msg is an instance of the Message struct containing the data that + * should be logged. + */ + virtual void process(const Message &msg){}; + +public: + /** + * Constructor of the Logger class. + * + * @param minSeverity is the minimum severity a log message should have. + * Messages below this severity are discarded. + */ + Logger(Severity minSeverity = DEFAULT_MIN_SEVERITY) + : minSeverity(minSeverity), maxEncounteredSeverity(Severity::DEBUG) + { + } + + Logger(const Logger &) = delete; + + /** + * Virtual destructor. + */ + virtual ~Logger(){}; + + /** + * Logs the given message. Most generic log function. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void log(Severity severity, const std::string &msg, const std::string &file, + int line = -1, int column = -1); + + /** + * Logs the given message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void log(Severity severity, const std::string &msg, int line = -1, + int column = -1) + { + log(severity, msg, currentFilename(), line, column); + } + + /** + * Logs the given message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + * @tparam PosType is the actual type of pos and must implement a getLine + * and getColumn function. + */ + template <class PosType> + void logAt(Severity severity, const std::string &msg, const PosType &pos) + { + log(severity, msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs the given loggable exception. + * + * @param ex is the exception that should be logged. + */ + void log(const LoggableException &ex) + { + log(Severity::ERROR, ex.msg, + ex.file.empty() ? currentFilename() : ex.file, ex.line, ex.column); + } + + /** + * Logs a debug message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void debug(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::DEBUG, msg, file, line, column); + } + + /** + * Logs a debug message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void debug(const std::string &msg, int line = -1, int column = -1) + { + debug(msg, currentFilename(), line, column); + } + + /** + * Logs a debug message. The file name is set to the topmost file name on + * the file name stack. + * + * @param severity is the severity of the log message. + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template <class PosType> + void debugAt(const std::string &msg, const PosType &pos) + { + debug(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a note. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void note(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::NOTE, msg, file, line, column); + } + + /** + * Logs a note. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void note(const std::string &msg, int line = -1, int column = -1) + { + note(msg, currentFilename(), line, column); + } + + /** + * Logs a note. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template <class PosType> + void noteAt(const std::string &msg, const PosType &pos) + { + note(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a warning. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void warning(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::WARNING, msg, file, line, column); + } + + /** + * Logs a warning. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template <class PosType> + void warningAt(const std::string &msg, const PosType &pos) + { + warning(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a warning. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void warning(const std::string &msg, int line = -1, int column = -1) + { + warning(msg, currentFilename(), line, column); + } + + /** + * Logs an error message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void error(const std::string &msg, const std::string &file, int line = -1, + int column = -1) + { + log(Severity::ERROR, msg, file, line, column); + } + + /** + * Logs an error message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void error(const std::string &msg, int line = -1, int column = -1) + { + error(msg, currentFilename(), line, column); + } + + /** + * Logs an error message. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template <class PosType> + void errorAt(const std::string &msg, const PosType &pos) + { + error(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Logs a fatal error. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param file is the name of the file the message refers to. May be empty. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void fatalError(const std::string &msg, const std::string &file, + int line = -1, int column = -1) + { + log(Severity::FATAL_ERROR, msg, file, line, column); + } + + /** + * Logs a fatal error. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param line is the line in the above file at which the error occured. + * Ignored if negative. + * @param column is the column in the above file at which the error occured. + * Ignored if negative. + */ + void fatalError(const std::string &msg, int line = -1, int column = -1) + { + fatalError(msg, currentFilename(), line, column); + } + + /** + * Logs a fatal error. The file name is set to the topmost file name on + * the file name stack. + * + * @param msg is the actual log message. + * @param pos is a const reference to a variable which provides position + * information. + */ + template <class PosType> + void fatalErrorAt(const std::string &msg, const PosType &pos) + { + fatalError(msg, pos.getLine(), pos.getColumn()); + } + + /** + * Pushes a new file name onto the internal filename stack. + * + * @param name is the name of the file that should be added to the filename + * stack. + * @return the size of the filename stack. This number can be passed to the + * "unwindFilenameStack" method in order to return the stack to state it was + * in after this function has been called. + */ + unsigned int pushFilename(const std::string &name); + + /** + * Pops the filename from the internal filename stack. + * + * @return the current size of the filename stack. + */ + unsigned int popFilename(); + + /** + * Pops elements from the filename stack while it has more elements than + * the given number and the stack is non-empty. + * + * @param pos is the position the filename stack should be unwound to. Use + * a number returned by pushFilename. + */ + void unwindFilenameStack(unsigned int pos); + + /** + * Returns the topmost filename from the internal filename stack. + * + * @return the topmost filename from the filename stack or an empty string + * if the filename stack is empty. + */ + std::string currentFilename() + { + return filenameStack.empty() ? std::string{} : filenameStack.top(); + } + + /** + * Returns the maximum severity that was encountered by the Logger but at + * least Severity::DEBUG. + * + * @return the severity of the most severe log message but at least + * Severity::DEBUG. + */ + Severity getMaxEncounteredSeverity() { return maxEncounteredSeverity; } + + /** + * Returns the minimum severity. Messages with a smaller severity are + * discarded. + * + * @return the minimum severity. + */ + Severity getMinSeverity() { return minSeverity; } + + /** + * Sets the minimum severity. Messages with a smaller severity will be + * discarded. Only new messages will be filtered according to the new value. + * + * @param severity is the minimum severity for new log messages. + */ + void setMinSeverity(Severity severity) { minSeverity = severity; } +}; + +/** + * Class extending the Logger class and printing the log messages to the given + * stream. + */ +class TerminalLogger : public Logger { +private: + /** + * Reference to the target output stream. + */ + std::ostream &os; + + /** + * If true, the TerminalLogger will use colors to make the log messages + * prettier. + */ + bool useColor; + +protected: + /** + * Implements the process function and logs the messages to the output. + */ + void process(const Message &msg) override; + +public: + /** + * Constructor of the TerminalLogger class. + * + * @param os is the output stream the log messages should be logged to. + * Should be set to std::cerr in most cases. + * @param useColor if true, the TerminalLogger class will do its best to + * use ANSI/VT100 control sequences for colored log messages. + * @param minSeverity is the minimum severity below which log messages are + * discarded. + */ + TerminalLogger(std::ostream &os, bool useColor = false, + Severity minSeverity = DEFAULT_MIN_SEVERITY) + : Logger(minSeverity), os(os), useColor(useColor) + { + } +}; +} + +#endif /* _OUSIA_LOGGER_HPP_ */ + diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp new file mode 100644 index 0000000..c460ed4 --- /dev/null +++ b/src/core/common/Utils.cpp @@ -0,0 +1,59 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <algorithm> +#include <limits> + +#include "Utils.hpp" + +namespace ousia { + +std::string Utils::trim(const std::string &s) +{ + size_t firstNonWhitespace = std::numeric_limits<size_t>::max(); + size_t lastNonWhitespace = 0; + for (size_t i = 0; i < s.size(); i++) { + if (!isWhitespace(s[i])) { + firstNonWhitespace = std::min(i, firstNonWhitespace); + lastNonWhitespace = std::max(i, lastNonWhitespace); + } + } + + if (firstNonWhitespace < lastNonWhitespace) { + return s.substr(firstNonWhitespace, + lastNonWhitespace - firstNonWhitespace + 1); + } + return std::string{}; +} + +bool Utils::isIdentifier(const std::string &name) +{ + bool first = true; + for (char c : name) { + if (first && !(isAlphabetic(c) || c == '_')) { + return false; + } + if (first && !(isAlphanumeric(c) || c == '_' || c == '-')) { + return false; + } + first = false; + } + return true; +} +} + diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp new file mode 100644 index 0000000..5332b50 --- /dev/null +++ b/src/core/common/Utils.hpp @@ -0,0 +1,110 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_UTILS_H_ +#define _OUSIA_UTILS_H_ + +#include <sstream> +#include <string> + +namespace ousia { + +class Utils { +public: + /** + * Returns true if the given character is in [A-Za-z] + */ + static bool isAlphabetic(const char c) + { + return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); + } + + /** + * Returns true if the given character is in [0-9] + */ + static bool isNumeric(const char c) { return (c >= '0') && (c <= '9'); } + + /** + * Returns true if the given character is in [0-9A-Fa-f] + */ + static bool isHexadecimal(const char c) + { + return ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'F')) || + ((c >= 'a') && (c <= 'f')); + } + + /** + * Returns true if the given character is in [A-Za-z0-9] + */ + static bool isAlphanumeric(const char c) + { + return isAlphabetic(c) || isNumeric(c); + } + + /** + * Returns true if the given character is in [A-Za-z_][A-Za-z0-9_-]* + */ + static bool isIdentifier(const std::string &name); + + /** + * Returns true if the given character is a whitespace character. + */ + static bool isWhitespace(const char c) + { + return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); + } + + /** + * Removes whitespace at the beginning and the end of the given string. + */ + static std::string trim(const std::string &s); + + /** + * Turns the elements of a collection into a string separated by the + * given delimiter. + * + * @param es is an iterable container of elements that can be appended to an + * output stream (the << operator must be implemented). + * @param delim is the delimiter that should be used to separate the items. + * @param start is a character sequence that should be prepended to the + * result. + * @param end is a character sequence that should be appended to the result. + */ + template <class T> + static std::string join(T es, const std::string &delim, + const std::string &start = "", + const std::string &end = "") + { + std::stringstream res; + bool first = true; + res << start; + for (const auto &e : es) { + if (!first) { + res << delim; + } + res << e; + first = false; + } + res << end; + return res.str(); + } +}; +} + +#endif /* _OUSIA_UTILS_H_ */ + diff --git a/src/core/common/Variant.cpp b/src/core/common/Variant.cpp new file mode 100644 index 0000000..27fc6e7 --- /dev/null +++ b/src/core/common/Variant.cpp @@ -0,0 +1,154 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include "Utils.hpp" +#include "Variant.hpp" + +namespace ousia { + +/* Class Variant::TypeException */ + +Variant::TypeException::TypeException(Type actualType, Type requestedType) + : OusiaException(std::string("Variant: Requested \"") + + Variant::getTypeName(requestedType) + + std::string("\" but is \"") + + Variant::getTypeName(actualType) + std::string("\"")), + actualType(actualType), + requestedType(requestedType) +{ +} + +/* Class Variant */ + +const char *Variant::getTypeName(Type type) +{ + switch (type) { + case Type::NULLPTR: + return "null"; + case Type::BOOL: + return "boolean"; + case Type::INT: + return "integer"; + case Type::DOUBLE: + return "double"; + case Type::STRING: + return "string"; + case Type::ARRAY: + return "array"; + case Type::MAP: + return "map"; + } + return "unknown"; +} + +Variant::boolType Variant::toBool() const +{ + switch (getType()) { + case Type::NULLPTR: + return false; + case Type::BOOL: + return asBool(); + case Type::INT: + return asInt() != 0; + case Type::DOUBLE: + return asDouble() != 0.0; + case Type::STRING: + return true; + case Type::ARRAY: + return true; + case Type::MAP: + return true; + } + return false; +} + +Variant::intType Variant::toInt() const +{ + switch (getType()) { + case Type::NULLPTR: + return 0; + case Type::BOOL: + return asBool() ? 1 : 0; + case Type::INT: + return asInt(); + case Type::DOUBLE: + return asDouble(); + case Type::STRING: + return 0; // TODO: Parse string as int + case Type::ARRAY: { + const arrayType &a = asArray(); + return (a.size() == 1) ? a[0].toInt() : 0; + } + case Type::MAP: + return 0; + } + return false; +} + +Variant::doubleType Variant::toDouble() const +{ + switch (getType()) { + case Type::NULLPTR: + return 0.0; + case Type::BOOL: + return asBool() ? 1.0 : 0.0; + case Type::INT: + return asInt(); + case Type::DOUBLE: + return asDouble(); + case Type::STRING: + return 0.0; // TODO: Parse string as double + case Type::ARRAY: { + const arrayType &a = asArray(); + return (a.size() == 1) ? a[0].toDouble() : 0; + } + case Type::MAP: + return 0; + } + return false; +} + +Variant::stringType Variant::toString(bool escape) const +{ + switch (getType()) { + case Type::NULLPTR: + return "null"; + case Type::BOOL: + return asBool() ? "true" : "false"; + case Type::INT: + return std::to_string(asInt()); + case Type::DOUBLE: + return std::to_string(asDouble()); + case Type::STRING: { + // TODO: Use proper serialization function + std::stringstream ss; + ss << "\"" << asString() << "\""; + return ss.str(); + } + case Type::ARRAY: + return Utils::join(asArray(), ", ", "[", "]"); + case Type::MAP: + return Utils::join(asMap(), ", ", "{", "}"); + } + return ""; +} + +} + diff --git a/src/core/common/Variant.hpp b/src/core/common/Variant.hpp new file mode 100644 index 0000000..d411fd3 --- /dev/null +++ b/src/core/common/Variant.hpp @@ -0,0 +1,761 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Variant.hpp + * + * The Variant class is used to efficiently represent a variables of varying + * type. Variant instances are used to represent data given by the end user and + * to exchange information between the host application and the script clients. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_VARIANT_HPP_ +#define _OUSIA_VARIANT_HPP_ + +#include <cstdint> +#include <map> +#include <string> +#include <vector> +#include <ostream> + +// TODO: Use +// http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html +// later (will allow to use 8 bytes for a variant) + +#include "Exceptions.hpp" + +namespace ousia { + +/** + * Instances of the Variant class represent any kind of data that is exchanged + * between the host application and the script engine. Variants are immutable. + */ +class Variant { +public: + /** + * Enum containing the possible types a variant may have. + */ + enum class Type : int16_t { + NULLPTR, + BOOL, + INT, + DOUBLE, + STRING, + ARRAY, + MAP + }; + + /** + * Exception thrown whenever a variant is accessed via a getter function + * that is not supported for the current variant type. + */ + class TypeException : public OusiaException { + private: + /** + * Internally used string holding the exception message. + */ + const std::string msg; + + public: + /** + * Contains the actual type of the variant. + */ + const Type actualType; + + /** + * Contains the requested type of the variant. + */ + const Type requestedType; + + /** + * Constructor of the TypeException. + * + * @param actualType describes the actual type of the variant. + * @param requestedType describes the type in which the variant was + * requested. + */ + TypeException(Type actualType, Type requestedType); + }; + + using boolType = bool; + using intType = int32_t; + using doubleType = double; + using stringType = std::string; + using arrayType = std::vector<Variant>; + using mapType = std::map<std::string, Variant>; + +private: + /** + * Used to store the actual type of the variant. + */ + Type type = Type::NULLPTR; + + /** + * Anonymous union containing the possible value of the variant. + */ + union { + /** + * The boolean value. Only valid if type is Type::BOOL. + */ + boolType boolVal; + /** + * The integer value. Only valid if type is Type::INT. + */ + intType intVal; + /** + * The number value. Only valid if type is Type::DOUBLE. + */ + doubleType doubleVal; + /** + * Pointer to the more complex data structures on the free store. Only + * valid if type is one of Type::STRING, Type::ARRAY, + * Type::MAP. + */ + void *ptrVal; + }; + + /** + * Internally used to convert the current pointer value to a reference of + * the specified type. + */ + template <typename T> + T &asObj(Type requestedType) const + { + const Type actualType = getType(); + if (actualType == requestedType) { + return *(static_cast<T *>(ptrVal)); + } + throw TypeException{actualType, requestedType}; + } + + /** + * Used internally to assign the value of another Variant instance to this + * instance. + * + * @param v is the Variant instance that should be copied to this instance. + */ + void copy(const Variant &v) + { + destroy(); + type = v.type; + switch (type) { + case Type::NULLPTR: + break; + case Type::BOOL: + boolVal = v.boolVal; + break; + case Type::INT: + intVal = v.intVal; + break; + case Type::DOUBLE: + doubleVal = v.doubleVal; + break; + case Type::STRING: + ptrVal = new stringType(v.asString()); + break; + case Type::ARRAY: + ptrVal = new arrayType(v.asArray()); + break; + case Type::MAP: + ptrVal = new mapType(v.asMap()); + break; + } + } + + /** + * Used internally to move the value of another Variant instance to this + * instance. + * + * @param v is the Variant instance that should be copied to this instance. + */ + void move(Variant &&v) + { + destroy(); + type = v.type; + switch (type) { + case Type::NULLPTR: + break; + case Type::BOOL: + boolVal = v.boolVal; + break; + case Type::INT: + intVal = v.intVal; + break; + case Type::DOUBLE: + doubleVal = v.doubleVal; + break; + case Type::STRING: + case Type::ARRAY: + case Type::MAP: + ptrVal = v.ptrVal; + v.ptrVal = nullptr; + break; + } + v.type = Type::NULLPTR; + } + + /** + * Used internally to destroy any value that was allocated on the heap. + */ + void destroy() + { + if (ptrVal) { + switch (type) { + case Type::STRING: + delete static_cast<stringType *>(ptrVal); + break; + case Type::ARRAY: + delete static_cast<arrayType *>(ptrVal); + break; + case Type::MAP: + delete static_cast<mapType *>(ptrVal); + break; + default: + break; + } + } + } + +public: + /** + * Copy constructor of the Variant class. + * + * @param v is the Variant instance that should be cloned. + */ + Variant(const Variant &v) : ptrVal(nullptr) { copy(v); } + + /** + * Move constructor of the Variant class. + * + * @param v is the reference to the Variant instance that should be moved, + * this instance is invalidated afterwards. + */ + Variant(Variant &&v) : ptrVal(nullptr) { move(std::move(v)); } + + /** + * Default constructor. Type is set to Type:null. + */ + Variant() : ptrVal(nullptr) { setNull(); } + + /** + * Default destructor, frees any memory that was allocated on the heap. + */ + ~Variant() { destroy(); } + + /** + * Constructor for null values. Initializes the variant as null value. + */ + Variant(std::nullptr_t) : ptrVal(nullptr) { setNull(); } + + /** + * Constructor for boolean values. + * + * @param b boolean value. + */ + Variant(boolType b) : ptrVal(nullptr) { setBool(b); } + + /** + * Constructor for integer values. + * + * @param i integer value. + */ + Variant(intType i) : ptrVal(nullptr) { setInt(i); } + + /** + * Constructor for double values. + * + * @param d double value. + */ + Variant(doubleType d) : ptrVal(nullptr) { setDouble(d); } + + /** + * Constructor for string values. The given string is copied and managed by + * the new Variant instance. + * + * @param s is a reference to a C-Style string used as string value. + */ + Variant(const char *s) : ptrVal(nullptr) { setString(s); } + + /** + * Constructor for array values. The given array is copied and managed by + * the new Variant instance. + * + * @param a is a reference to the array + */ + Variant(arrayType a) : ptrVal(nullptr) { setArray(std::move(a)); } + + /** + * Constructor for map values. The given map is copied and managed by the + * new Variant instance. + * + * @param m is a reference to the map. + */ + Variant(mapType m) : ptrVal(nullptr) { setMap(std::move(m)); } + + /** + * Copy assignment operator. + */ + Variant &operator=(const Variant &v) + { + copy(v); + return *this; + } + + /** + * Move assignment operator. + */ + Variant &operator=(Variant &&v) + { + move(std::move(v)); + return *this; + } + + /** + * Assign nullptr_t operator (allows to write Variant v = nullptr). + * + * @param p is an instance of std::nullptr_t. + */ + Variant &operator=(std::nullptr_t) + { + setNull(); + return *this; + } + + /** + * Assign a boolean value. + * + * @param b is the boolean value to which the variant should be set. + */ + Variant &operator=(boolType b) + { + setBool(b); + return *this; + } + + /** + * Assign an integer value. + * + * @param i is the integer value to which the variant should be set. + */ + Variant &operator=(intType i) + { + setInt(i); + return *this; + } + + /** + * Assign a double value. + * + * @param d is the double value to which the variant should be set. + */ + Variant &operator=(doubleType d) + { + setDouble(d); + return *this; + } + + /** + * Assign a zero terminated const char array. + * + * @param s is the zero terminated const char array to which the variant + * should be set. + */ + Variant &operator=(const char *s) + { + setString(s); + return *this; + } + + /** + * Checks whether this Variant instance represents the nullptr. + * + * @return true if the Variant instance represents the nullptr, false + * otherwise. + */ + bool isNull() const { return type == Type::NULLPTR; } + + /** + * Checks whether this Variant instance is a boolean. + * + * @return true if the Variant instance is a boolean, false otherwise. + */ + bool isBool() const { return type == Type::BOOL; } + + /** + * Checks whether this Variant instance is an integer. + * + * @return true if the Variant instance is an integer, false otherwise. + */ + bool isInt() const { return type == Type::INT; } + + /** + * Checks whether this Variant instance is a double. + * + * @return true if the Variant instance is a double, false otherwise. + */ + bool isDouble() const { return type == Type::DOUBLE; } + + /** + * Checks whether this Variant instance is a string. + * + * @return true if the Variant instance is a string, false otherwise. + */ + bool isString() const { return type == Type::STRING; } + + /** + * Checks whether this Variant instance is an array. + * + * @return true if the Variant instance is an array, false otherwise. + */ + bool isArray() const { return type == Type::ARRAY; } + + /** + * Checks whether this Variant instance is a map. + * + * @return true if the Variant instance is a map, false otherwise. + */ + bool isMap() const { return type == Type::MAP; } + + /** + * Returns the Variant boolean value. Performs no type conversion. Throws an + * exception if the underlying type is not a boolean. + * + * @return the boolean value. + */ + boolType asBool() const + { + if (isBool()) { + return boolVal; + } + throw TypeException{getType(), Type::BOOL}; + } + + /** + * Returns the Variant integer value. Performs no type conversion. Throws an + * exception if the underlying type is not an integer. + * + * @return the integer value. + */ + intType asInt() const + { + if (isInt()) { + return intVal; + } + throw TypeException{getType(), Type::INT}; + } + + /** + * Returns the Variant double value. Performs no type conversion. Throws an + * exception if the underlying type is not a double. + * + * @return the double value. + */ + doubleType asDouble() const + { + if (isDouble()) { + return doubleVal; + } + throw TypeException{getType(), Type::DOUBLE}; + } + + /** + * Returns a const reference to the string value. Performs no type + * conversion. Throws an exception if the underlying type is not a string. + * + * @return the string value as const reference. + */ + const stringType &asString() const + { + return asObj<stringType>(Type::STRING); + } + + /** + * Returns a const reference to the string value. Performs no type + * conversion. Throws an exception if the underlying type is not a string. + * + * @return the string value as reference. + */ + stringType &asString() { return asObj<stringType>(Type::STRING); } + + /** + * Returns a const reference to the array value. Performs no type + * conversion. Throws an exception if the underlying type is not an array. + * + * @return the array value as const reference. + */ + const arrayType &asArray() const { return asObj<arrayType>(Type::ARRAY); } + + /** + * Returns a const reference to the array value. Performs no type + * conversion. Throws an exception if the underlying type is not an array. + * + * @return the array value as reference. + */ + arrayType &asArray() { return asObj<arrayType>(Type::ARRAY); } + + /** + * Returns a const reference to the map value. Performs no type + * conversion. Throws an exception if the underlying type is not a map. + * + * @return the map value as const reference. + */ + const mapType &asMap() const { return asObj<mapType>(Type::MAP); } + + /** + * Returns a reference to the map value. Performs no type conversion. + * Throws an exception if the underlying type is not a map. + * + * @return the map value as reference. + */ + mapType &asMap() { return asObj<mapType>(Type::MAP); } + + /** + * Returns the value of the Variant as boolean, performs type conversion. + * + * @return the Variant value converted to a boolean value. + */ + boolType toBool() const; + + /** + * Returns the value of the Variant as integer, performs type conversion. + * + * @return the Variant value converted to an integer value. + */ + intType toInt() const; + + /** + * Returns the value of the Variant as double, performs type conversion. + * + * @return the Variant value converted to a double value. + */ + doubleType toDouble() const; + + /** + * Returns the value of the Variant as string, performs type conversion. + * + * @return the value of the variant as string. + * @param escape if set to true, adds double quotes to strings and escapes + * them properly (resulting in a more or less JSONesque output). + */ + stringType toString(bool escape = false) const; + + /** + * Sets the variant to null. + */ + void setNull() + { + destroy(); + type = Type::NULLPTR; + ptrVal = nullptr; + } + + /** + * Sets the variant to the given boolean value. + * + * @param b is the new boolean value. + */ + void setBool(boolType b) + { + destroy(); + type = Type::BOOL; + boolVal = b; + } + + /** + * Sets the variant to the given integer value. + * + * @param i is the new integer value. + */ + void setInt(intType i) + { + destroy(); + type = Type::INT; + intVal = i; + } + + /** + * Sets the variant to the given double value. + * + * @param d is the new double value. + */ + void setDouble(doubleType d) + { + destroy(); + type = Type::DOUBLE; + doubleVal = d; + } + + /** + * Sets the variant to the given string value. + * + * @param d is the new string value. + */ + void setString(const char *s) + { + if (isString()) { + asString().assign(s); + } else { + destroy(); + type = Type::STRING; + ptrVal = new stringType(s); + } + } + + /** + * Sets the variant to the given array value. + * + * @param a is the new array value. + */ + void setArray(arrayType a) + { + if (isArray()) { + asArray().swap(a); + } else { + destroy(); + type = Type::ARRAY; + ptrVal = new arrayType(std::move(a)); + } + } + + /** + * Sets the variant to the given map value. + * + * @param a is the new map value. + */ + void setMap(mapType m) + { + if (isMap()) { + asMap().swap(m); + } else { + destroy(); + type = Type::MAP; + ptrVal = new mapType(std::move(m)); + } + } + + /** + * Returns the current type of the Variant. + * + * @return the current type of the Variant. + */ + Type getType() const { return type; } + + /** + * Returns the name of the given variant type as C-style string. + */ + static const char *getTypeName(Type type); + + /** + * Returns the name of the type of this variant instance. + */ + const char *getTypeName() { return Variant::getTypeName(getType()); } + + /** + * Prints the Variant to the output stream. + */ + friend std::ostream &operator<<(std::ostream &os, const Variant &v) + { + return os << v.toString(true); + } + + /** + * Prints a key value pair to the output stream. + */ + friend std::ostream &operator<<(std::ostream &os, + const mapType::value_type &v) + { + // TODO: Use proper serialization function + return os << "\"" << v.first << "\": " << v.second.toString(true); + } + + /* + * Comprison operators. + */ + + friend bool operator<(const Variant &lhs, const Variant &rhs) + { + // If the types do not match, we can not do a meaningful comparison. + if (lhs.getType() != rhs.getType()) { + throw TypeException(lhs.getType(), rhs.getType()); + } + switch (lhs.getType()) { + case Type::NULLPTR: + return false; + case Type::BOOL: + return lhs.boolVal < rhs.boolVal; + case Type::INT: + return lhs.intVal < rhs.intVal; + case Type::DOUBLE: + return lhs.doubleVal < rhs.doubleVal; + case Type::STRING: + return lhs.asString() < rhs.asString(); + case Type::ARRAY: + return lhs.asArray() < rhs.asArray(); + case Type::MAP: + return lhs.asMap() < rhs.asMap(); + } + throw OusiaException("Internal Error! Unknown type!"); + } + friend bool operator>(const Variant &lhs, const Variant &rhs) + { + return rhs < lhs; + } + friend bool operator<=(const Variant &lhs, const Variant &rhs) + { + return !(lhs > rhs); + } + friend bool operator>=(const Variant &lhs, const Variant &rhs) + { + return !(lhs < rhs); + } + + friend bool operator==(const Variant &lhs, const Variant &rhs) + { + if (lhs.getType() != rhs.getType()) { + return false; + } + switch (lhs.getType()) { + case Type::NULLPTR: + return true; + case Type::BOOL: + return lhs.boolVal == rhs.boolVal; + case Type::INT: + return lhs.intVal == rhs.intVal; + case Type::DOUBLE: + return lhs.doubleVal == rhs.doubleVal; + case Type::STRING: + return lhs.asString() == rhs.asString(); + case Type::ARRAY: + return lhs.asArray() == rhs.asArray(); + case Type::MAP: + return lhs.asMap() == rhs.asMap(); + } + throw OusiaException("Internal Error! Unknown type!"); + } + + friend bool operator!=(const Variant &lhs, const Variant &rhs) + { + return !(lhs == rhs); + } +}; +} + +#endif /* _OUSIA_VARIANT_HPP_ */ + diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp new file mode 100644 index 0000000..e611842 --- /dev/null +++ b/src/core/common/VariantReader.cpp @@ -0,0 +1,625 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <iostream> + +#include <cmath> +#include <sstream> + +#include "VariantReader.hpp" +#include "Utils.hpp" + +namespace ousia { + +// TODO: Better error messages (like "Expected 'x' but got 'y'") +// TODO: Replace delims with single char delim where possible +// TODO: Use custom return value instead of std::pair +// TODO: Allow buffered char reader to "fork" +// TODO: Rename CharReader to shorter CharReader +// TODO: Implement context in CharReader (to allow error messages to extract the +// current line) + +/* Error Messages */ + +static const char *ERR_UNEXPECTED_CHAR = "Unexpected character"; +static const char *ERR_UNEXPECTED_END = "Unexpected literal end"; +static const char *ERR_UNTERMINATED = "Unterminated literal"; +static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence"; +static const char *ERR_INVALID_INTEGER = "Invalid integer value"; +static const char *ERR_TOO_LARGE = "Value too large to represent"; + +/* Class Number */ + +/** + * Class used internally to represent a number (integer or double). The number + * is represented by its components (base value a, nominator n, denominator d, + * exponent e, sign s and exponent sign sE). + */ +class Number { +private: + /** + * Reprsents the part of the number: Base value a, nominator n, exponent e. + */ + enum class Part { A, N, E }; + + /** + * State used in the parser state machine + */ + enum class State { + INIT, + HAS_MINUS, + LEADING_ZERO, + LEADING_POINT, + INT, + HEX, + POINT, + EXP_INIT, + EXP_HAS_MINUS, + EXP + }; + + /** + * Returns the numeric value of the given ASCII character (returns 0 for + * '0', 1 for '1', 10 for 'A' and so on). + * + * @param c is the character for which the numeric value should be returned. + * @return the numeric value the character represents. + */ + static int charValue(char c) + { + if (c >= '0' && c <= '9') { + return c & 0x0F; + } + if ((c >= 'A' && c <= 'O') || (c >= 'a' && c <= 'o')) { + return (c & 0x0F) + 9; + } + return -1; + } + + /** + * Appends the value of the character c to the internal number + * representation and reports any errors that might occur. + */ + bool appendChar(char c, int base, Part p, CharReader &reader, + Logger &logger) + { + // Check whether the given character is valid + int v = charValue(c); + if (v < 0 || v >= base) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + + // Append the number to the specified part + switch (p) { + case Part::A: + a = a * base + v; + break; + case Part::N: + n = n * base + v; + d = d * base; + break; + case Part::E: + e = e * base + v; + break; + } + + // Check for any overflows + if (a < 0 || n < 0 || d < 0 || e < 0) { + logger.errorAt(ERR_TOO_LARGE, reader); + return false; + } + return true; + } + +public: + /** + * Sign and exponent sign. + */ + int8_t s, sE; + + /** + * Exponent + */ + int16_t e; + + /** + * Base value, nominator, denominator + */ + int64_t a, n, d; + + /** + * Constructor of the number class. + */ + Number() : s(1), sE(1), e(0), a(0), n(0), d(1) {} + + /** + * Returns the represented double value. + */ + double doubleValue() + { + return s * (a + ((double)n / (double)d)) * pow(10.0, (double)(sE * e)); + } + + /** + * Returns the represented integer value. Only a lossless operation, if the + * number is an integer (as can be checked via the isInt method), otherwise + * the exponent and the fractional value will be truncated. + */ + int64_t intValue() { return s * a; } + + /** + * Returns true, if the number is an integer (has no fractional or + * exponential part). + */ + bool isInt() { return (n == 0) && (d == 1) && (e == 0); } + + /** + * Tries to parse the number from the given stream and loggs any errors to + * the given logger instance. Numbers are terminated by one of the given + * delimiters. + */ + bool parse(CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims); +}; + +bool Number::parse(CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) +{ + State state = State::INIT; + char c; + + // Consume the first whitespace characters + reader.consumeWhitespace(); + + // Iterate over the FSM to extract numbers + while (reader.peek(c)) { + // Abort, once a delimiter or whitespace is reached + if (Utils::isWhitespace(c) || delims.count(c)) { + reader.resetPeek(); + break; + } + + // The character is not a whitespace character and not a delimiter + switch (state) { + case State::INIT: + case State::HAS_MINUS: + switch (c) { + case '-': + // Do not allow multiple minus signs + if (state == State::HAS_MINUS) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::HAS_MINUS; + s = -1; + break; + case '0': + // Remember a leading zero for the detection of "0x" + state = State::LEADING_ZERO; + break; + case '.': + // Remember a leading point as ".eXXX" is invalid + state = State::LEADING_POINT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::LEADING_ZERO: + if (c == 'x' || c == 'X') { + state = State::HEX; + break; + } + // fallthrough + case State::INT: + switch (c) { + case '.': + state = State::POINT; + break; + case 'e': + case 'E': + state = State::EXP_INIT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::HEX: + if (!appendChar(c, 16, Part::A, reader, logger)) { + return false; + } + break; + case State::LEADING_POINT: + case State::POINT: + switch (c) { + case 'e': + case 'E': + if (state == State::LEADING_POINT) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::EXP_INIT; + break; + default: + state = State::POINT; + if (!appendChar(c, 10, Part::N, reader, logger)) { + return false; + } + break; + } + break; + case State::EXP_HAS_MINUS: + case State::EXP_INIT: + if (c == '-') { + if (state == State::EXP_HAS_MINUS) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::EXP_HAS_MINUS; + sE = -1; + } else { + state = State::EXP; + if (!appendChar(c, 10, Part::E, reader, logger)) { + return false; + } + } + break; + case State::EXP: + if (!appendChar(c, 10, Part::E, reader, logger)) { + return false; + } + break; + } + reader.consumePeek(); + } + + // States in which ending is valid. Log an error in other states + if (state == State::LEADING_ZERO || state == State::HEX || + state == State::INT || state == State::POINT || + state == State::EXP) { + return true; + } + logger.errorAt(ERR_UNEXPECTED_END, reader); + return false; +} + + +/* Class Reader */ + +static const int STATE_INIT = 0; +static const int STATE_IN_STRING = 1; +static const int STATE_IN_ARRAY = 2; +static const int STATE_EXPECT_COMMA = 3; +static const int STATE_ESCAPE = 4; +static const int STATE_WHITESPACE = 5; +static const int STATE_RESYNC = 6; + +template <class T> +static std::pair<bool, T> error(CharReader &reader, Logger &logger, + const char *err, T res) +{ + logger.errorAt(err, reader); + return std::make_pair(false, std::move(res)); +} + +std::pair<bool, std::string> VariantReader::parseString( + CharReader &reader, Logger &logger, + const std::unordered_set<char> *delims) +{ + // Initialize the internal state + int state = STATE_INIT; + char quote = 0; + std::stringstream res; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Statemachine whic iterates over each character in the stream + // TODO: Combination of peeking and consumePeek is stupid as consumePeek is + // the default (read and putBack would obviously be better, yet the latter + // is not trivial to implement in the current CharReader). + char c; + while (reader.peek(c)) { + switch (state) { + case STATE_INIT: + if (c == '"' || c == '\'') { + quote = c; + state = STATE_IN_STRING; + break; + } else if (delims && delims->count(c)) { + return error(reader, logger, ERR_UNEXPECTED_END, res.str()); + } + return error(reader, logger, ERR_UNEXPECTED_CHAR, res.str()); + case STATE_IN_STRING: + if (c == quote) { + reader.consumePeek(); + return std::make_pair(true, res.str()); + } else if (c == '\\') { + state = STATE_ESCAPE; + reader.consumePeek(); + break; + } else if (c == '\n') { + return error(reader, logger, ERR_UNTERMINATED, res.str()); + } + res << c; + reader.consumePeek(); + break; + case STATE_ESCAPE: + // Handle all possible special escape characters + switch (c) { + case 'b': + res << '\b'; + break; + case 'f': + res << '\f'; + break; + case 'n': + res << '\n'; + break; + case 'r': + res << '\r'; + break; + case 't': + res << '\t'; + break; + case 'v': + res << '\v'; + break; + case '\'': + res << '\''; + break; + case '"': + res << '"'; + break; + case '\\': + res << '\\'; + break; + case '\n': + break; + case 'x': + // TODO: Parse Latin-1 sequence hex XX + break; + case 'u': + // TODO: Parse 16-Bit unicode character hex XXXX + break; + default: + if (Utils::isNumeric(c)) { + // TODO: Parse octal 000 sequence + } else { + logger.errorAt(ERR_INVALID_ESCAPE, reader); + } + break; + } + + // Switch back to the "normal" state + state = STATE_IN_STRING; + reader.consumePeek(); + break; + } + } + return error(reader, logger, ERR_UNEXPECTED_END, res.str()); +} + +std::pair<bool, Variant::arrayType> VariantReader::parseArray( + CharReader &reader, Logger &logger, char delim) +{ + Variant::arrayType res; + bool hadError = false; + int state = delim ? STATE_IN_ARRAY : STATE_INIT; + delim = delim ? delim : ']'; + char c; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Iterate over the characters, use the parseGeneric function to read the + // pairs + while (reader.peek(c)) { + // Generically handle the end of the array + if (state != STATE_INIT && c == delim) { + reader.consumePeek(); + return std::make_pair(!hadError, res); + } + + switch (state) { + case STATE_INIT: + if (c != '[') { + return error(reader, logger, ERR_UNEXPECTED_CHAR, res); + } + state = STATE_IN_ARRAY; + reader.consumePeek(); + break; + case STATE_IN_ARRAY: { + // Try to read an element using the parseGeneric function + reader.resetPeek(); + auto elem = parseGeneric(reader, logger, {',', delim}); + res.push_back(elem.second); + + // If the reader had no error, expect an comma, otherwise skip + // to the next comma in the stream + if (elem.first) { + state = STATE_EXPECT_COMMA; + } else { + state = STATE_RESYNC; + hadError = true; + } + break; + } + case STATE_EXPECT_COMMA: + // Skip whitespace + if (c == ',') { + state = STATE_IN_ARRAY; + } else if (!Utils::isWhitespace(c)) { + hadError = true; + state = STATE_RESYNC; + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + } + reader.consumePeek(); + break; + case STATE_RESYNC: + // Just wait for another comma to arrive + if (c == ',') { + state = STATE_IN_ARRAY; + } + reader.consumePeek(); + break; + } + } + return error(reader, logger, ERR_UNEXPECTED_END, res); +} + +std::pair<bool, std::string> VariantReader::parseUnescapedString( + CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) +{ + std::stringstream res; + std::stringstream buf; + char c; + + // Consume all whitespace + reader.consumeWhitespace(); + + // Copy all characters, skip whitespace at the end + int state = STATE_IN_STRING; + while (reader.peek(c)) { + if (delims.count(c)) { + reader.resetPeek(); + return std::make_pair(true, res.str()); + } else if (Utils::isWhitespace(c)) { + // Do not add whitespace to the output buffer + state = STATE_WHITESPACE; + buf << c; + } else { + // If we just hat a sequence of whitespace, append it to the output + // buffer and continue + if (state == STATE_WHITESPACE) { + res << buf.str(); + buf.str(std::string{}); + buf.clear(); + state = STATE_IN_STRING; + } + res << c; + } + reader.consumePeek(); + } + return std::make_pair(true, res.str()); +} + +std::pair<bool, int64_t> VariantReader::parseInteger( + CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) +{ + Number n; + if (n.parse(reader, logger, delims)) { + // Only succeed if the parsed number is an integer, otherwise this is an + // error + if (n.isInt()) { + return std::make_pair(true, n.intValue()); + } else { + return error(reader, logger, ERR_INVALID_INTEGER, n.intValue()); + } + } + return std::make_pair(false, n.intValue()); +} + +std::pair<bool, double> VariantReader::parseDouble( + CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) +{ + Number n; + bool res = n.parse(reader, logger, delims); + return std::make_pair(res, n.doubleValue()); +} + +std::pair<bool, Variant> VariantReader::parseGeneric( + CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) +{ + char c; + + // Skip all whitespace characters + reader.consumeWhitespace(); + while (reader.peek(c)) { + // Stop if a delimiter is reached + if (delims.count(c)) { + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); + } + + // Parse a string if a quote is reached + if (c == '"' || c == '\'') { + auto res = parseString(reader, logger); + return std::make_pair(res.first, res.second.c_str()); + } + + if (c == '[') { + // TODO: Parse struct descriptor + } + + // Try to parse everything that looks like a number as number + if (Utils::isNumeric(c) || c == '-') { + Number n; + + // Fork the reader + CharReaderFork fork = reader.fork(); + + // TODO: Fork logger + + // Try to parse the number + if (n.parse(fork, logger, delims)) { + // Parsing was successful, advance the reader + fork.commit(); + if (n.isInt()) { + return std::make_pair( + true, + Variant{static_cast<Variant::intType>(n.intValue())}); + } else { + return std::make_pair(true, n.doubleValue()); + } + } + } + + // Parse an unescaped string in any other case + auto res = parseUnescapedString(reader, logger, delims); + + // Handling for special primitive values + if (res.first) { + if (res.second == "true") { + return std::make_pair(true, Variant{true}); + } + if (res.second == "false") { + return std::make_pair(true, Variant{false}); + } + if (res.second == "null") { + return std::make_pair(true, Variant{nullptr}); + } + } + return std::make_pair(res.first, res.second.c_str()); + } + return error(reader, logger, ERR_UNEXPECTED_END, nullptr); +} +} + diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp new file mode 100644 index 0000000..5e7c5d2 --- /dev/null +++ b/src/core/common/VariantReader.hpp @@ -0,0 +1,166 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file VariantReader.hpp + * + * Provides parsers for various micro formats. These formats include integers, + * doubles, strings, JSON and the Ousía struct notation. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_VARIANT_READER_HPP_ +#define _OUSIA_VARIANT_READER_HPP_ + +#include <cstdint> +#include <unordered_set> +#include <utility> + +#include "CharReader.hpp" +#include "Logger.hpp" +#include "Variant.hpp" + +namespace ousia { + +class VariantReader { +private: + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + * @param delims is an optional set of delimiters after which parsing has to + * be stopped (the delimiters may occur inside the actual string, but not + * outside). If nullptr is given, no delimiter is used and a complete string + * is read. + */ + static std::pair<bool, std::string> parseString( + CharReader &VariantReader, Logger &logger, + const std::unordered_set<char> *delims); + +public: + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + * @param delims is a set of delimiters after which parsing has to + * be stopped (the delimiters may occur inside the actual string, but not + * outside). + */ + static std::pair<bool, std::string> parseString( + CharReader &VariantReader, Logger &logger, + const std::unordered_set<char> &delims) + { + return parseString(VariantReader, logger, &delims); + } + + /** + * Parses a string which may either be enclosed by " or ', unescapes + * entities in the string as specified for JavaScript. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting + * character. + * @param logger is the logger instance that should be used to log error + * messages and warnings. + */ + static std::pair<bool, std::string> parseString(CharReader &VariantReader, + Logger &logger) + { + return parseString(VariantReader, logger, nullptr); + } + + /** + * Extracts an unescaped string from the given buffered char VariantReader + * instance. This function just reads text until one of the given delimiter + * characters is reached. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. + * @param delims is a set of characters which will terminate the string. + * These characters are not included in the result. May not be nullptr. + */ + static std::pair<bool, std::string> parseUnescapedString( + CharReader &VariantReader, Logger &logger, + const std::unordered_set<char> &delims); + + /** + * Parses an integer from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. + * + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. + */ + static std::pair<bool, int64_t> parseInteger( + CharReader &VariantReader, Logger &logger, + const std::unordered_set<char> &delims); + + /** + * Parses an double from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. + * + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. + */ + static std::pair<bool, double> parseDouble( + CharReader &VariantReader, Logger &logger, + const std::unordered_set<char> &delims); + + /** + * Parses an array of values. + */ + static std::pair<bool, Variant::arrayType> parseArray( + CharReader &VariantReader, Logger &logger, char delim = 0); + + /** + * Tries to parse the most specific item from the given stream until one of + * the given delimiters is reached or a meaningful literal has been read. + * The resulting variant represents the value that has been read. + * + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. + * @param delims is a set of characters which will terminate the string. + * These characters are not included in the result. May not be nullptr. + */ + static std::pair<bool, Variant> parseGeneric( + CharReader &VariantReader, Logger &logger, + const std::unordered_set<char> &delims); +}; +} + +#endif /* _OUSIA_VARIANT_READER_HPP_ */ + |