summaryrefslogtreecommitdiff
path: root/src/core/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/common')
-rw-r--r--src/core/common/CharReader.cpp640
-rw-r--r--src/core/common/CharReader.hpp665
-rw-r--r--src/core/common/Exceptions.cpp46
-rw-r--r--src/core/common/Exceptions.hpp162
-rw-r--r--src/core/common/Logger.cpp161
-rw-r--r--src/core/common/Logger.hpp609
-rw-r--r--src/core/common/Utils.cpp59
-rw-r--r--src/core/common/Utils.hpp110
-rw-r--r--src/core/common/Variant.cpp154
-rw-r--r--src/core/common/Variant.hpp761
-rw-r--r--src/core/common/VariantReader.cpp625
-rw-r--r--src/core/common/VariantReader.hpp166
12 files changed, 4158 insertions, 0 deletions
diff --git a/src/core/common/CharReader.cpp b/src/core/common/CharReader.cpp
new file mode 100644
index 0000000..373c0c1
--- /dev/null
+++ b/src/core/common/CharReader.cpp
@@ -0,0 +1,640 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <sstream>
+
+#include "CharReader.hpp"
+#include "Utils.hpp"
+
+namespace ousia {
+
+/* Helper functions */
+
+/**
+ * istreamReadCallback is used internally by the Buffer calss to stream data
+ * from an input stream.
+ *
+ * @param buf is points a the target memory region.
+ * @param size is the requested number of bytes.
+ * @param userData is a pointer at some user defined data.
+ * @return the actual number of bytes read. If the result is smaller than
+ * the requested size, this tells the Buffer that the end of the input
+ * stream is reached.
+ */
+static size_t istreamReadCallback(char *buf, size_t size, void *userData)
+{
+ return (static_cast<std::istream *>(userData))->read(buf, size).gcount();
+}
+
+/* Class Buffer */
+
+Buffer::Buffer(ReadCallback callback, void *userData)
+ : callback(callback),
+ userData(userData),
+ reachedEnd(false),
+ startBucket(buckets.end()),
+ endBucket(buckets.end()),
+ startOffset(0),
+ firstDead(0)
+{
+ // Load a first block of data from the stream
+ stream();
+ startBucket = buckets.begin();
+}
+
+Buffer::Buffer(std::istream &istream) : Buffer(istreamReadCallback, &istream) {}
+
+Buffer::Buffer(const std::string &str)
+ : callback(nullptr),
+ userData(nullptr),
+ reachedEnd(true),
+ startBucket(buckets.end()),
+ endBucket(buckets.end()),
+ startOffset(0),
+ firstDead(0)
+{
+ // Copy the given string into a first buffer and set the start buffer
+ // correctly
+ Bucket &bucket = nextBucket();
+ bucket.resize(str.size());
+ std::copy(str.begin(), str.end(), bucket.begin());
+ startBucket = buckets.begin();
+}
+
+#ifndef NDEBUG
+Buffer::~Buffer()
+{
+ // Make sure all cursors have been deleted
+ for (bool cursor_alive: alive) {
+ assert(!cursor_alive);
+ }
+}
+#endif
+
+void Buffer::advance(BucketIterator &it)
+{
+ it++;
+ if (it == buckets.end()) {
+ it = buckets.begin();
+ }
+}
+
+void Buffer::advance(BucketList::const_iterator &it) const
+{
+ it++;
+ if (it == buckets.cend()) {
+ it = buckets.cbegin();
+ }
+}
+
+Buffer::Bucket &Buffer::nextBucket()
+{
+ constexpr size_t MAXVAL = std::numeric_limits<size_t>::max();
+
+ // Fetch the minimum bucket index
+ size_t minBucketIdx = MAXVAL;
+ for (size_t i = 0; i < cursors.size(); i++) {
+ if (alive[i]) {
+ // Fetch references to the bucket and the cursor
+ const Cursor &cur = cursors[i];
+ const Bucket &bucket = *(cur.bucket);
+
+ // Increment the bucket index by one, if the cursor is at the end
+ // of the bucket (only valid if the LOOKBACK_SIZE is set to zero)
+ size_t bIdx = cur.bucketIdx;
+ if (LOOKBACK_SIZE == 0 && cur.bucketOffs == bucket.size()) {
+ bIdx++;
+ }
+
+ // Decrement the bucket index by one, if the previous bucket still
+ // needs to be reached and cannot be overridden
+ if (bIdx > 0 && cur.bucketOffs < LOOKBACK_SIZE) {
+ bIdx--;
+ }
+
+ // Set the bucket index to the minium
+ minBucketIdx = std::min(minBucketIdx, bIdx);
+ }
+ }
+
+ // If there is space between the current start bucket and the read
+ // cursor, the start bucket can be safely overridden.
+ if (minBucketIdx > 0 && minBucketIdx != MAXVAL) {
+ // All cursor bucket indices will be decreased by one
+ for (size_t i = 0; i < cursors.size(); i++) {
+ cursors[i].bucketIdx--;
+ }
+
+ // Increment the start offset
+ startOffset += startBucket->size();
+
+ // The old start bucket is the new end bucket
+ endBucket = startBucket;
+
+ // Advance the start bucket, wrap around at the end of the list
+ advance(startBucket);
+ } else {
+ // No free bucket, insert a new one before the start bucket
+ endBucket = buckets.emplace(startBucket);
+ }
+ return *endBucket;
+}
+
+Buffer::CursorId Buffer::nextCursor()
+{
+ bool hasCursor = false;
+ CursorId res = 0;
+
+ // Search for the next free cursor starting with minNextCursorId
+ for (size_t i = firstDead; i < alive.size(); i++) {
+ if (!alive[i]) {
+ res = i;
+ hasCursor = true;
+ break;
+ }
+ }
+
+ // Add a new cursor to the cursor list if no cursor is currently free
+ if (!hasCursor) {
+ res = cursors.size();
+ cursors.resize(res + 1);
+ alive.resize(res + 1);
+ }
+
+ // The next dead cursor is at least the next cursor
+ firstDead = res + 1;
+
+ // Mark the new cursor as alive
+ alive[res] = true;
+
+ return res;
+}
+
+void Buffer::stream()
+{
+ // Fetch the bucket into which the data should be inserted, make sure it
+ // has the correct size
+ Bucket &tar = nextBucket();
+ tar.resize(REQUEST_SIZE);
+
+ // Read data from the stream into the target buffer
+ size_t size = callback(tar.data(), REQUEST_SIZE, userData);
+
+ // If not enough bytes were returned, we're at the end of the stream
+ if (size < REQUEST_SIZE) {
+ tar.resize(size);
+ reachedEnd = true;
+ }
+}
+
+Buffer::CursorId Buffer::createCursor()
+{
+ CursorId res = nextCursor();
+ cursors[res].bucket = startBucket;
+ cursors[res].bucketIdx = 0;
+ cursors[res].bucketOffs = 0;
+ return res;
+}
+
+Buffer::CursorId Buffer::createCursor(Buffer::CursorId ref)
+{
+ CursorId res = nextCursor();
+ cursors[res] = cursors[ref];
+ return res;
+}
+
+void Buffer::copyCursor(Buffer::CursorId from, Buffer::CursorId to)
+{
+ cursors[to] = cursors[from];
+}
+
+void Buffer::deleteCursor(Buffer::CursorId cursor)
+{
+ alive[cursor] = false;
+ firstDead = std::min(firstDead, cursor);
+}
+
+size_t Buffer::offset(Buffer::CursorId cursor) const
+{
+ const Cursor &cur = cursors[cursor];
+ size_t offs = startOffset + cur.bucketOffs;
+ BucketList::const_iterator it = startBucket;
+ while (it != cur.bucket) {
+ offs += it->size();
+ advance(it);
+ }
+ return offs;
+}
+
+size_t Buffer::moveForward(CursorId cursor, size_t relativeOffs)
+{
+ size_t offs = relativeOffs;
+ Cursor &cur = cursors[cursor];
+ while (offs > 0) {
+ // Fetch the current bucket of the cursor
+ Bucket &bucket = *(cur.bucket);
+
+ // If there is enough space in the bucket, simply increment the bucket
+ // offset by the given relative offset
+ const size_t space = bucket.size() - cur.bucketOffs;
+ if (space >= offs) {
+ cur.bucketOffs += offs;
+ break;
+ } else {
+ // Go to the end of the current bucket otherwise
+ offs -= space;
+ cur.bucketOffs = bucket.size();
+
+ // Go to the next bucket
+ if (cur.bucket != endBucket) {
+ // Go to the next bucket
+ advance(cur.bucket);
+ cur.bucketIdx++;
+ cur.bucketOffs = 0;
+ } else {
+ // Abort, if there is no more data to stream, otherwise just
+ // load new data
+ if (reachedEnd) {
+ return relativeOffs - offs;
+ }
+ stream();
+ }
+ }
+ }
+ return relativeOffs;
+}
+
+size_t Buffer::moveBackward(CursorId cursor, size_t relativeOffs)
+{
+ size_t offs = relativeOffs;
+ Cursor &cur = cursors[cursor];
+ while (offs > 0) {
+ // If there is enough space in the bucket, simply decrement the bucket
+ // offset by the given relative offset
+ if (cur.bucketOffs >= offs) {
+ cur.bucketOffs -= offs;
+ break;
+ } else {
+ // Go to the beginning of the current bucket otherwise
+ offs -= cur.bucketOffs;
+ cur.bucketOffs = 0;
+
+ // Abort if there is no more bucket to got back to
+ if (cur.bucketIdx == 0) {
+ return relativeOffs - offs;
+ }
+
+ // Go to the previous bucket (wrap around at the beginning of the
+ // list)
+ if (cur.bucket == buckets.begin()) {
+ cur.bucket = buckets.end();
+ }
+ cur.bucket--;
+
+ // Decrement the bucket index, and set the current offset to the
+ // end of the new bucket
+ cur.bucketIdx--;
+ cur.bucketOffs = cur.bucket->size();
+ }
+ }
+ return relativeOffs;
+}
+
+ssize_t Buffer::moveCursor(CursorId cursor, ssize_t relativeOffs)
+{
+ if (relativeOffs > 0) {
+ return moveForward(cursor, relativeOffs);
+ } else if (relativeOffs < 0) {
+ return -moveBackward(cursor, -relativeOffs);
+ } else {
+ return 0;
+ }
+}
+
+bool Buffer::atEnd(Buffer::CursorId cursor) const
+{
+ const Cursor &c = cursors[cursor];
+ return reachedEnd &&
+ (c.bucket == endBucket && c.bucketOffs == endBucket->size());
+}
+
+bool Buffer::fetchCharacter(CursorId cursor, char &c, bool incr)
+{
+ Cursor &cur = cursors[cursor];
+ while (true) {
+ // Reference at the current bucket
+ Bucket &bucket = *(cur.bucket);
+
+ // If there is still data in the current bucket, return this data
+ if (cur.bucketOffs < bucket.size()) {
+ c = bucket[cur.bucketOffs];
+ if (incr) {
+ cur.bucketOffs++;
+ }
+ return true;
+ } else if (cur.bucket == endBucket) {
+ // Return false if the end of the stream has been reached, otherwise
+ // load new data
+ if (reachedEnd) {
+ return false;
+ }
+ stream();
+ }
+
+ // Go to the next bucket
+ cur.bucketIdx++;
+ cur.bucketOffs = 0;
+ advance(cur.bucket);
+ }
+}
+
+bool Buffer::read(Buffer::CursorId cursor, char &c)
+{
+ return fetchCharacter(cursor, c, true);
+}
+
+bool Buffer::fetch(CursorId cursor, char &c)
+{
+ return fetchCharacter(cursor, c, false);
+}
+
+/* CharReader::Cursor class */
+
+void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer,
+ CharReader::Cursor &cursor)
+{
+ // Copy the cursor position
+ buffer->copyCursor(cursor.cursor, this->cursor);
+
+ // Copy the state
+ line = cursor.line;
+ column = cursor.column;
+}
+
+/* CharReader class */
+
+CharReader::CharReader(std::shared_ptr<Buffer> buffer, size_t line,
+ size_t column)
+ : buffer(buffer),
+ readCursor(buffer->createCursor(), line, column),
+ peekCursor(buffer->createCursor(), line, column),
+ coherent(true)
+{
+}
+
+CharReader::CharReader(const std::string &str, size_t line, size_t column)
+ : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, line, column)
+{
+}
+
+CharReader::CharReader(std::istream &istream, size_t line, size_t column)
+ : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, line, column)
+{
+}
+
+CharReader::~CharReader()
+{
+ buffer->deleteCursor(readCursor.cursor);
+ buffer->deleteCursor(peekCursor.cursor);
+}
+
+bool CharReader::readAtCursor(Cursor &cursor, char &c)
+{
+ // Return false if we're at the end of the stream
+ if (!buffer->read(cursor.cursor, c)) {
+ return false;
+ }
+
+ // Substitute linebreak sequences with a single '\n'
+ if (c == '\n' || c == '\r') {
+ // Output a single \n
+ c = '\n';
+
+ // Check whether the next character is a continuation of the
+ // current character
+ char c2;
+ if (buffer->read(cursor.cursor, c2)) {
+ if ((c2 != '\n' && c2 != '\r') || c2 == c) {
+ buffer->moveCursor(cursor.cursor, -1);
+ }
+ }
+ }
+
+ // Count lines and columns
+ if (c == '\n') {
+ // A linebreak was reached, go to the next line
+ cursor.line++;
+ cursor.column = 1;
+ } else {
+ // Ignore UTF-8 continuation bytes
+ if (!((c & 0x80) && !(c & 0x40))) {
+ cursor.column++;
+ }
+ }
+ return true;
+}
+
+bool CharReader::peek(char &c)
+{
+ // If the reader was coherent, update the peek cursor state
+ if (coherent) {
+ peekCursor.assign(buffer, readCursor);
+ coherent = false;
+ }
+
+ // Read a character from the peek cursor
+ return readAtCursor(peekCursor, c);
+}
+
+bool CharReader::read(char &c)
+{
+ // Read a character from the buffer at the current read cursor
+ bool res = readAtCursor(readCursor, c);
+
+ // Set the peek position to the current read position, if reading was not
+ // coherent
+ if (!coherent) {
+ peekCursor.assign(buffer, readCursor);
+ coherent = true;
+ } else {
+ buffer->copyCursor(readCursor.cursor, peekCursor.cursor);
+ }
+
+ // Return the result of the read function
+ return res;
+}
+
+void CharReader::resetPeek()
+{
+ if (!coherent) {
+ peekCursor.assign(buffer, readCursor);
+ coherent = true;
+ }
+}
+
+void CharReader::consumePeek()
+{
+ if (!coherent) {
+ readCursor.assign(buffer, peekCursor);
+ coherent = true;
+ }
+}
+
+bool CharReader::consumeWhitespace()
+{
+ char c;
+ while (peek(c)) {
+ if (!Utils::isWhitespace(c)) {
+ resetPeek();
+ return true;
+ }
+ consumePeek();
+ }
+ return false;
+}
+
+CharReaderFork CharReader::fork()
+{
+ return CharReaderFork(buffer, readCursor, peekCursor, coherent);
+}
+
+CharReader::Context CharReader::getContext(ssize_t maxSize)
+{
+ // Clone the current read cursor
+ Buffer::CursorId cur = buffer->createCursor(readCursor.cursor);
+
+ // Fetch the start position of the search
+ ssize_t offs = buffer->offset(cur);
+ ssize_t start = offs;
+ ssize_t end = offs;
+ char c;
+
+ // Search the beginning of the line with the last non-whitespace character
+ bool hadNonWhitespace = false;
+ bool foundBegin = false;
+ for (ssize_t i = 0; i < maxSize; i++) {
+ // Fetch the character at the current position
+ if (buffer->fetch(cur, c)) {
+ // Abort, at linebreaks if we found a non-linebreak character
+ hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c);
+ if (hadNonWhitespace && (c == '\n' || c == '\r')) {
+ buffer->moveCursor(cur, 1);
+ start++;
+ foundBegin = true;
+ break;
+ }
+ }
+ if (buffer->moveCursor(cur, -1) == 0) {
+ foundBegin = true;
+ break;
+ } else {
+ // Update the start position and the hadNonWhitespace flag
+ start--;
+ }
+ }
+
+ // Search the end of the line
+ buffer->moveCursor(cur, offs - start);
+ bool foundEnd = false;
+ for (ssize_t i = 0; i < maxSize; i++) {
+ // Increment the end counter if a character was read, abort if the end
+ // of the stream has been reached
+ if (buffer->read(cur, c)) {
+ end++;
+ } else {
+ foundEnd = true;
+ break;
+ }
+
+ // Abort on linebreak characters
+ if (c == '\n' || c == '\r') {
+ foundEnd = true;
+ break;
+ }
+ }
+
+ // Calculate the truncated start and end position and limit the number of
+ // characters to the maximum number of characters
+ ssize_t tStart = start;
+ ssize_t tEnd = end;
+ if (tEnd - tStart > maxSize) {
+ tStart = std::max(offs - maxSize / 2, tStart);
+ tEnd = tStart + maxSize;
+ }
+
+ // Try to go to the calculated start position and fetch the actual start
+ // position
+ ssize_t aStart = end + buffer->moveCursor(cur, tStart - end);
+ if (aStart > tStart) {
+ tEnd = tEnd + (aStart - tStart);
+ tStart = aStart;
+ }
+
+ // Read one line
+ std::stringstream ss;
+ size_t relPos = 0;
+ for (ssize_t i = tStart; i < tEnd; i++) {
+ if (buffer->read(cur, c)) {
+ // Break once a linebreak is reached
+ if (c == '\n' || c == '\r') {
+ break;
+ }
+
+ // Add the current character to the output
+ ss << c;
+
+ // Increment the string-relative offset as long as the original
+ // offset is not reached in the for loop
+ if (i < offs) {
+ relPos++;
+ }
+ }
+ }
+
+ // Delete the newly created cursor
+ buffer->deleteCursor(cur);
+
+ return CharReader::Context{ss.str(), relPos, !foundBegin || tStart != start,
+ !foundEnd || tEnd != end};
+}
+
+/* Class CharReaderFork */
+
+CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer,
+ CharReader::Cursor &parentReadCursor,
+ CharReader::Cursor &parentPeekCursor,
+ bool coherent)
+ : CharReader(buffer, 1, 1),
+ parentReadCursor(parentReadCursor),
+ parentPeekCursor(parentPeekCursor)
+{
+ readCursor.assign(buffer, parentReadCursor);
+ peekCursor.assign(buffer, parentPeekCursor);
+ this->coherent = coherent;
+}
+
+void CharReaderFork::commit()
+{
+ parentReadCursor.assign(buffer, readCursor);
+ parentPeekCursor.assign(buffer, peekCursor);
+}
+}
+
diff --git a/src/core/common/CharReader.hpp b/src/core/common/CharReader.hpp
new file mode 100644
index 0000000..3cbe4b4
--- /dev/null
+++ b/src/core/common/CharReader.hpp
@@ -0,0 +1,665 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file CharReader.hpp
+ *
+ * Used within all parsers to read single characters from an underlying stream.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_CHAR_READER_HPP_
+#define _OUSIA_CHAR_READER_HPP_
+
+#include <istream>
+#include <list>
+#include <memory>
+#include <vector>
+
+namespace ousia {
+
+/**
+ * A chunked ring buffer used in CharReader to provide access to an input stream
+ * with multiple read cursors. The Buffer automatically expands to the
+ * size of the spanned by the read cursors while reusing already allocated
+ * memory.
+ */
+class Buffer {
+public:
+ /**
+ * Callback function which is called whenever new data is requested from the
+ * input stream.
+ *
+ * @param buf is points a the target memory region.
+ * @param size is the requested number of bytes.
+ * @param userData is a pointer at some user defined data given in the
+ * constructor.
+ * @return the actual number of bytes read. If the result is smaller than
+ * the requested size, this tells the Buffer that the end of the input
+ * stream is reached.
+ */
+ using ReadCallback = size_t (*)(char *buf, size_t size, void *userData);
+
+ /**
+ * Handle used to identify a cursor.
+ */
+ using CursorId = size_t;
+
+private:
+ /**
+ * Number of bytes to request from the input stream. Set to 64 KiB because
+ * this seems to be a nice value for I/O operations according to multiple
+ * sources.
+ */
+ static constexpr size_t REQUEST_SIZE = 64 * 1024;
+
+ /**
+ * Number of bytes the buffer guarantees to be capable of looking back
+ * for extracting the current context.
+ */
+ static constexpr size_t LOOKBACK_SIZE = 128;
+
+ /**
+ * Type used internally to represent one chunk of memory.
+ */
+ using Bucket = std::vector<char>;
+
+ /**
+ * Type used internally to represent a bucket container.
+ */
+ using BucketList = std::list<Bucket>;
+
+ /**
+ * Type used internally for representing iterators in the bucket list.
+ */
+ using BucketIterator = BucketList::iterator;
+
+ /**
+ * Type used internally to represent a read cursor.
+ */
+ struct Cursor {
+ /**
+ * Iterator pointing at the current bucket.
+ */
+ BucketIterator bucket;
+
+ /**
+ * Index of the bucket relative to the start bucket.
+ */
+ size_t bucketIdx;
+
+ /**
+ * Current offset within that bucket.
+ */
+ size_t bucketOffs;
+ };
+
+ /**
+ * List of buckets containing the buffered memory.
+ */
+ BucketList buckets;
+
+ /**
+ * List of cursors used to access the memory. Note that cursors can be
+ * marked as inactive and reused lateron (to avoid having to resize the
+ * vector).
+ */
+ std::vector<Cursor> cursors;
+
+ /**
+ * Bitfield specifying which of the cursors is actually valid.
+ */
+ std::vector<bool> alive;
+
+ /**
+ * Function to be called whenever new data is needed. Set to nullptr if the
+ * Buffer is not backed by an input stream.
+ */
+ const ReadCallback callback;
+
+ /**
+ * User data given in the constructor.
+ */
+ void *userData;
+
+ /**
+ * Set to true if the input stream is at its end.
+ */
+ bool reachedEnd;
+
+ /**
+ * Iterator pointing at the current start bucket.
+ */
+ BucketIterator startBucket;
+
+ /**
+ * Iterator pointing at the last bucket.
+ */
+ BucketIterator endBucket;
+
+ /**
+ * Byte offset of the start bucket relative to the beginning of the stream.
+ */
+ size_t startOffset;
+
+ /**
+ * Points at the smallest possible available cursor index, yet does not
+ * guarantee that this cursor index actuall is free.
+ */
+ CursorId firstDead;
+
+ /**
+ * Advances the bucket iterator, cares about wrapping around in the ring.
+ */
+ void advance(BucketIterator &it);
+
+ /**
+ * Advances the bucket iterator, cares about wrapping around in the ring.
+ */
+ void advance(BucketList::const_iterator &it) const;
+
+ /**
+ * Internally used to find the next free cursor in the cursors vector. The
+ * cursor is marked as active.
+ *
+ * @return the next free cursor index.
+ */
+ CursorId nextCursor();
+
+ /**
+ * Returns a reference at the next bucket into which data should be
+ * inserted.
+ *
+ * @return a bucket into which the data can be inserted.
+ */
+ Bucket &nextBucket();
+
+ /**
+ * Reads data from the input stream and places it in the next free buffer.
+ */
+ void stream();
+
+ /**
+ * Moves the given cursor forward.
+ */
+ size_t moveForward(CursorId cursor, size_t relativeOffs);
+
+ /**
+ * Moves the given cursor backward.
+ */
+ size_t moveBackward(CursorId cursor, size_t relativeOffs);
+
+ /**
+ * Reads a character from the current cursor position and optionally
+ * advances.
+ */
+ bool fetchCharacter(CursorId cursor, char &c, bool incr);
+
+public:
+ /**
+ * Intializes the Buffer with a reference to a ReadCallback that is used
+ * to fetch data from an underlying input stream.
+ *
+ * @param callback is the function that will be called whenever data is read
+ * from the ring buffer and the buffer does not hold enough data to fulfill
+ * this read request.
+ * @param userData is a pointer to user defined data which will be passed to
+ * the callback function.
+ */
+ Buffer(ReadCallback callback, void *userData);
+
+ /**
+ * Initializes the Buffer with a reference to an std::istream from which
+ * data will be read.
+ *
+ * @param istream is the input stream from which the data should be read.
+ */
+ Buffer(std::istream &istream);
+
+ /**
+ * Initializes the Buffer with the contents of the given string, after
+ * this operation the Buffer has a fixed size.
+ *
+ * @param str is the string containing the data that should be copied into
+ * the ring buffer.
+ */
+ Buffer(const std::string &str);
+
+#ifndef NDEBUG
+ /**
+ * Destructor of the Buffer class. Makes sure that all cursors have been
+ * freed.
+ */
+ ~Buffer();
+#endif
+
+ // No copy
+ Buffer(const Buffer &) = delete;
+
+ // No assign
+ Buffer &operator=(const Buffer &) = delete;
+
+ /**
+ * Creates a new read cursor positioned at the smallest possible position
+ * in the ring buffer.
+ */
+ CursorId createCursor();
+
+ /**
+ * Creates a new read cursor positioned at the same position as the given
+ * read cursor.
+ *
+ * @param ref is the read cursor that should be used as reference for the
+ * new read cursor.
+ */
+ CursorId createCursor(CursorId ref);
+
+ /**
+ * Copies the position of one cursor to another cursor.
+ *
+ * @param from is the cursor id of which the position should be copied.
+ * @param to is the cursor id to which the position should be copied.
+ */
+ void copyCursor(CursorId from, CursorId to);
+
+ /**
+ * Deletes the cursor with the given id. The cursor may no longer be used
+ * after this function has been called.
+ *
+ * @param cursor is the id of the cursor that should be freed.
+ */
+ void deleteCursor(CursorId cursor);
+
+ /**
+ * Moves a cursor by offs bytes. Note that moving backwards is theoretically
+ * limited by the LOOKBACK_SIZE of the Buffer, practically it will most
+ * likely be limited by the REQUEST_SIZE, so you can got at most 64 KiB
+ * backwards.
+ *
+ * @param cursor is the cursor that should be moved.
+ * @param relativeOffs is a positive or negative integer number specifying
+ * the number of bytes the cursor should be moved forward (positive numbers)
+ * or backwards (negative numbers).
+ * @return the actual number of bytes the cursor was moved. This number is
+ * smaller than the relativeOffs given in the constructor if the
+ */
+ ssize_t moveCursor(CursorId cursor, ssize_t relativeOffs);
+
+ /**
+ * Returns the current byte offset of the given cursor relative to the
+ * beginning of the stream.
+ *
+ * @param cursor is the cursor for which the byte offset relative to the
+ * beginning of the stream should be returned.
+ * @return the number of bytes since the beginning of the stream for the
+ * given cursor.
+ */
+ size_t offset(CursorId cursor) const;
+
+ /**
+ * Returns true if the given cursor currently is at the end of the stream.
+ *
+ * @param cursor is the cursor for which the atEnd flag should be returned.
+ * @return true if the there are no more bytes for this cursor. If false
+ * is returned, this means that there may be more bytes in the stream,
+ * nevertheless the end of the stream may be hit once the next read function
+ * is called.
+ */
+ bool atEnd(CursorId cursor) const;
+
+ /**
+ * Reads a single character from the ring buffer from the given cursor and
+ * moves to the next character.
+ *
+ * @param cursor specifies the cursor from which the data should be read.
+ * The cursor will be advanced by one byte.
+ * @param c is the character into which the data needs to be read.
+ * @return true if a character was read, false if the end of the stream has
+ * been reached.
+ */
+ bool read(CursorId cursor, char &c);
+
+ /**
+ * Returns a single character from the ring buffer from the current cursor
+ * position and stays at that position.
+ *
+ * @param cursor specifies the cursor from which the data should be read.
+ * The cursor will be advanced by one byte.
+ * @param c is the character into which the data needs to be read.
+ * @return true if a character could be fetched, false if the end of the
+ * stream has been reached.
+ */
+ bool fetch(CursorId cursor, char &c);
+};
+
+// Forward declaration
+class CharReaderFork;
+
+/**
+ * Used within parsers for convenient access to single characters in an input
+ * stream or buffer. It allows reading and peeking single characters from a
+ * buffer. Additionally it counts the current column/row (with correct handling
+ * for UTF-8) and contains an internal state machine that handles the detection
+ * of linebreaks and converts these to a single '\n'.
+ */
+class CharReader {
+public:
+ /**
+ * The context struct is used to represent the current context the char
+ * reader is in. This context can for example be used when building error
+ * messages.
+ */
+ struct Context {
+ /**
+ * Set to the content of the current line.
+ */
+ std::string line;
+
+ /**
+ * Relative position (in characters) within that line.
+ */
+ size_t relPos;
+
+ /**
+ * Set to true if the beginning of the line has been truncated (because
+ * the reader position is too far away from the actual position of the
+ * line).
+ */
+ bool truncatedStart;
+
+ /**
+ * Set to true if the end of the line has been truncated (because the
+ * reader position is too far away from the actual end position of the
+ * line.
+ */
+ bool truncatedEnd;
+
+ Context()
+ : line(), relPos(0), truncatedStart(false), truncatedEnd(false)
+ {
+ }
+
+ Context(std::string line, size_t relPos, bool truncatedStart,
+ bool truncatedEnd)
+ : line(std::move(line)),
+ relPos(relPos),
+ truncatedStart(truncatedStart),
+ truncatedEnd(truncatedEnd)
+ {
+ }
+ };
+
+protected:
+ /**
+ * Internally used cursor structure for managing the read and the peek
+ * cursor.
+ */
+ struct Cursor {
+ /**
+ * Corresponding cursor in the underlying buffer instance.
+ */
+ const Buffer::CursorId cursor;
+
+ /**
+ * Current line the cursor is in.
+ */
+ uint32_t line;
+
+ /**
+ * Current column the cursor is in.
+ */
+ uint32_t column;
+
+ /**
+ * Constructor of the Cursor class.
+ *
+ * @param cursor is the underlying cursor in the Buffer instance.
+ */
+ Cursor(Buffer::CursorId cursor, size_t line, size_t column)
+ : cursor(cursor), line(line), column(column)
+ {
+ }
+
+ /**
+ * Assigns one cursor to another.
+ *
+ * @param buffer is the underlying buffer instance the internal cursor
+ * belongs to.
+ * @param cursor is the cursor from which the state should be copied.
+ */
+ void assign(std::shared_ptr<Buffer> buffer, Cursor &cursor);
+ };
+
+private:
+ /**
+ * Substitutes "\r", "\n\r", "\r\n" with a single "\n".
+ *
+ * @param cursor is the cursor from which the character should be read.
+ * @param c a reference to the character that should be written.
+ * @return true if another character needs to be read.
+ */
+ bool substituteLinebreaks(Cursor &cursor, char &c);
+
+ /**
+ * Reads a single character from the given cursor.
+ *
+ * @param cursor is the cursor from which the character should be read.
+ * @param c a reference to the character that should be written.
+ * @return true if a character was read, false if the end of the stream has
+ * been reached.
+ */
+ bool readAtCursor(Cursor &cursor, char &c);
+
+protected:
+ /**
+ * Reference pointing at the underlying buffer.
+ */
+ std::shared_ptr<Buffer> buffer;
+
+ /**
+ * Cursor used for reading.
+ */
+ Cursor readCursor;
+
+ /**
+ * Cursor used for peeking.
+ */
+ Cursor peekCursor;
+
+ /**
+ * Set to true as long the underlying Buffer cursor is at the same position
+ * for the read and the peek cursor. This is only used for optimization
+ * purposes and makes consecutive reads a bit faster.
+ */
+ bool coherent;
+
+ /**
+ * Protected constructor of the CharReader base class. Creates new read
+ * and peek cursors for the given buffer.
+ *
+ * @param buffer is a reference to the underlying Buffer class responsible
+ * for allowing to read from a single input stream from multiple locations.
+ */
+ CharReader(std::shared_ptr<Buffer> buffer, size_t line, size_t column);
+
+public:
+ /**
+ * Creates a new CharReader instance from a string.
+ *
+ * @param str is a string containing the input data.
+ * @param line is the start line.
+ * @param column is the start column.
+ */
+ CharReader(const std::string &str, size_t line = 1, size_t column = 1);
+
+ /**
+ * Creates a new CharReader instance for an input stream.
+ *
+ * @param istream is the input stream from which incomming data should be
+ * read.
+ * @param line is the start line.
+ * @param column is the start column.
+ */
+ CharReader(std::istream &istream, size_t line = 1, size_t column = 1);
+
+ /**
+ * Deletes the used cursors from the underlying buffer instance.
+ */
+ ~CharReader();
+
+ // No copy
+ CharReader(const Buffer &) = delete;
+
+ // No assign
+ CharReader &operator=(const Buffer &) = delete;
+
+ /**
+ * Peeks a single character. If called multiple times, returns the
+ * character after the previously peeked character.
+ *
+ * @param c is a reference to the character to which the result should be
+ * written.
+ * @return true if the character was successfully read, false if there are
+ * no more characters to be read in the buffer.
+ */
+ bool peek(char &c);
+
+ /**
+ * Reads a character from the input data. If "peek" was called
+ * beforehand resets the peek pointer.
+ *
+ * @param c is a reference to the character to which the result should be
+ * written.
+ * @return true if the character was successfully read, false if there are
+ * no more characters to be read in the buffer.
+ */
+ bool read(char &c);
+
+ /**
+ * Resets the peek pointer to the "read" pointer.
+ */
+ void resetPeek();
+
+ /**
+ * Advances the read pointer to the peek pointer -- so if the "peek"
+ * function was called, "read" will now return the character after
+ * the last peeked character.
+ */
+ void consumePeek();
+
+ /**
+ * Moves the read cursor to the next non-whitespace character. Returns
+ * false, if the end of the stream was reached.
+ *
+ * @return false if the end of the stream was reached, false othrwise.
+ */
+ bool consumeWhitespace();
+
+ /**
+ * Creates a new CharReader located at the same position as this CharReader
+ * instance, yet the new CharReader can be used independently of this
+ * CharReader. Use the "commit" function of the returned CharReader to
+ * copy the state of the forked CharReaderFork to this CharReader.
+ *
+ * @return a CharReaderFork instance positioned at the same location as this
+ * CharReader instance.
+ */
+ CharReaderFork fork();
+
+ /**
+ * Returns true if there are no more characters as the stream was
+ * closed.
+ *
+ * @return true if there is no more data.
+ */
+ bool atEnd() const { return buffer->atEnd(readCursor.cursor); }
+
+ /**
+ * Returns the current line (starting with one).
+ *
+ * @return the current line number.
+ */
+ uint32_t getLine() const { return readCursor.line; }
+
+ /**
+ * Returns the current column (starting with one).
+ *
+ * @return the current column number.
+ */
+ uint32_t getColumn() const { return readCursor.column; }
+
+ /**
+ * Returns the current byte offset of the read cursor.
+ *
+ * @return the byte position within the stream.
+ */
+ size_t getOffset() const { return buffer->offset(readCursor.cursor); };
+
+ /**
+ * Returns the line the read cursor currently is in, but at most the
+ * given number of characters in the form of a Context structure.
+ */
+ Context getContext(ssize_t maxSize);
+};
+
+/**
+ * A CharReaderFork is returned whenever the "fork" function of the CharReader
+ * class is used. Its "commit" function can be used to move the underlying
+ * CharReader instance to the location of the CharReaderFork instance. Otherwise
+ * the read location of the underlying CharReader is left unchanged.
+ */
+class CharReaderFork : public CharReader {
+private:
+ friend CharReader;
+
+ /**
+ * The reader cursor of the underlying CharReader instance.
+ */
+ CharReader::Cursor &parentReadCursor;
+
+ /**
+ * The peek cursor of the underlying CharReader instance.
+ */
+ CharReader::Cursor &parentPeekCursor;
+
+ /**
+ * Constructor of the CharReaderFork class.
+ *
+ * @param buffer is a reference at the parent Buffer instance.
+ * @param parentPeekCursor is a reference at the parent read cursor.
+ * @param parentPeekCursor is a reference at the parent peek cursor.
+ * @param coherent specifies whether the char reader cursors are initialized
+ * coherently.
+ */
+ CharReaderFork(std::shared_ptr<Buffer> buffer,
+ CharReader::Cursor &parentReadCursor,
+ CharReader::Cursor &parentPeekCursor, bool coherent);
+
+public:
+ /**
+ * Moves the read and peek cursor of the parent CharReader to the location
+ * of the read and peek cursor in the fork.
+ */
+ void commit();
+};
+
+}
+
+#endif /* _OUSIA_CHAR_READER_HPP_ */
+
diff --git a/src/core/common/Exceptions.cpp b/src/core/common/Exceptions.cpp
new file mode 100644
index 0000000..d064f35
--- /dev/null
+++ b/src/core/common/Exceptions.cpp
@@ -0,0 +1,46 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <sstream>
+
+#include "Exceptions.hpp"
+
+namespace ousia {
+
+/* Class LoggableException */
+
+std::string LoggableException::formatMessage(const std::string &msg,
+ const std::string &file,
+ int line, int column)
+{
+ std::stringstream ss;
+ ss << "error ";
+ if (!file.empty()) {
+ ss << "while processing \"" << file << "\" ";
+ }
+ if (line >= 0) {
+ ss << "at line " << line << ", ";
+ if (column >= 0) {
+ ss << "column " << column << " ";
+ }
+ }
+ ss << "with message: " << msg;
+ return ss.str();
+}
+}
+
diff --git a/src/core/common/Exceptions.hpp b/src/core/common/Exceptions.hpp
new file mode 100644
index 0000000..00d6106
--- /dev/null
+++ b/src/core/common/Exceptions.hpp
@@ -0,0 +1,162 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Exceptions.hpp
+ *
+ * Describes basic exception classes which are used throughout Ousía.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_EXCEPTIONS_HPP_
+#define _OUSIA_EXCEPTIONS_HPP_
+
+namespace ousia {
+
+/**
+ * Base exception class all other Ousía exceptions should derive from.
+ */
+class OusiaException : public std::exception {
+private:
+ /**
+ * Error message which will be printed by the runtime environment if the
+ * exception is not caught and handled in the code.
+ */
+ const std::string formatedMessage;
+
+public:
+ /**
+ * Constructor of the OusiaException class.
+ *
+ * @param formatedMessage is a formated message that should be printed by
+ * the runtime environment if the exception is not caught.
+ */
+ OusiaException(std::string formatedMessage)
+ : formatedMessage(std::move(formatedMessage))
+ {
+ }
+
+ /**
+ * Virtual destructor.
+ */
+ virtual ~OusiaException() {}
+
+ /**
+ * Implementation of the std::exception what function and used to retrieve
+ * the error message that should be printed by the runtime environment.
+ *
+ * @return a reference to the formated message string given in the
+ * constructor.
+ */
+ const char *what() const noexcept override
+ {
+ return formatedMessage.c_str();
+ }
+};
+
+/**
+ * Exception class which can be directly passed to a Logger instance and thus
+ * makes it simple to handle non-recoverable errors in the code.
+ */
+class LoggableException : public OusiaException {
+private:
+ /**
+ * Function used internally to build the formated message that should be
+ * reported to the runtime environment.
+ */
+ static std::string formatMessage(const std::string &msg,
+ const std::string &file, int line,
+ int column);
+
+public:
+ /**
+ * Message describing the error that occured.
+ */
+ const std::string msg;
+
+ /**
+ * Name of the file in which the error occured. May be empty.
+ */
+ const std::string file;
+
+ /**
+ * Line at which the exception occured. Negative values are ignored.
+ */
+ const int line;
+
+ /**
+ * Column at which the exception occured. Negative values are ignored.
+ */
+ const int column;
+
+ /**
+ * Constructor of the LoggableException class.
+ *
+ * @param msg contains the error message.
+ * @param file provides the context the message refers to. May be empty.
+ * @param line is the line in the above file the message refers to.
+ * @param column is the column in the above file the message refers to.
+ */
+ LoggableException(std::string msg, std::string file, int line = -1,
+ int column = -1)
+ : OusiaException(formatMessage(msg, file, line, column)),
+ msg(std::move(msg)),
+ file(std::move(file)),
+ line(line),
+ column(column)
+ {
+ }
+
+ /**
+ * Constructor of the LoggableException class with empty file.
+ *
+ * @param msg contains the error message.
+ * @param line is the line in the above file the message refers to.
+ * @param column is the column in the above file the message refers to.
+ */
+ LoggableException(std::string msg, int line = -1, int column = -1)
+ : OusiaException(formatMessage(msg, "", line, column)),
+ msg(std::move(msg)),
+ line(line),
+ column(column)
+ {
+ }
+
+ /**
+ * Constructor of the LoggableException class with empty file and an
+ * position object.
+ *
+ * @param msg is the actual log message.
+ * @param pos is a const reference to a variable which provides position
+ * information.
+ */
+ template <class PosType>
+ LoggableException(std::string msg, const PosType &pos)
+ : OusiaException(
+ formatMessage(msg, "", pos.getLine(), pos.getColumn())),
+ msg(std::move(msg)),
+ line(pos.getLine()),
+ column(pos.getColumn())
+ {
+ }
+};
+}
+
+#endif /* _OUSIA_EXCEPTIONS_HPP_ */
+
diff --git a/src/core/common/Logger.cpp b/src/core/common/Logger.cpp
new file mode 100644
index 0000000..17f55a6
--- /dev/null
+++ b/src/core/common/Logger.cpp
@@ -0,0 +1,161 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+#include <sstream>
+
+#include "Logger.hpp"
+
+namespace ousia {
+
+/* Class Logger */
+
+void Logger::log(Severity severity, const std::string &msg,
+ const std::string &file, int line, int column)
+{
+ // Copy the current severity level
+ if (static_cast<int>(severity) > static_cast<int>(maxEncounteredSeverity)) {
+ maxEncounteredSeverity = severity;
+ }
+
+ // Call the actual log message function if the severity is larger or equal
+ // to the minimum severity
+ if (static_cast<int>(severity) >= static_cast<int>(minSeverity)) {
+ process(Message{severity, msg, file, line, column});
+ }
+}
+
+unsigned int Logger::pushFilename(const std::string &name)
+{
+ filenameStack.push(name);
+ return filenameStack.size();
+}
+
+unsigned int Logger::popFilename()
+{
+ filenameStack.pop();
+ return filenameStack.size();
+}
+
+void Logger::unwindFilenameStack(unsigned int pos)
+{
+ while (filenameStack.size() > pos && !filenameStack.empty()) {
+ filenameStack.pop();
+ }
+}
+
+/* Class TerminalLogger */
+
+/**
+ * Small class used internally for formated terminal output using ANSI/VT100
+ * escape codes on supported terminals.
+ *
+ * TODO: Deactivate if using windows or use the corresponding API function.
+ */
+class Terminal {
+private:
+ /**
+ * If set to false, no control codes are generated.
+ */
+ bool active;
+
+public:
+ static const int BLACK = 30;
+ static const int RED = 31;
+ static const int GREEN = 32;
+ static const int YELLOW = 33;
+ static const int BLUE = 34;
+ static const int MAGENTA = 35;
+ static const int CYAN = 36;
+ static const int WHITE = 37;
+
+ Terminal(bool active) : active(active) {}
+
+ std::string color(int color, bool bright = true) const
+ {
+ if (!active) {
+ return std::string{};
+ }
+ std::stringstream ss;
+ ss << "\x1b[";
+ if (bright) {
+ ss << "1;";
+ }
+ ss << color << "m";
+ return ss.str();
+ }
+
+ std::string reset() const
+ {
+ if (!active) {
+ return std::string{};
+ }
+ return "\x1b[0m";
+ }
+};
+
+void TerminalLogger::process(const Message &msg)
+{
+ Terminal t(useColor);
+
+ // Print the file name
+ if (msg.hasFile()) {
+ os << t.color(Terminal::WHITE, true) << msg.file << t.reset();
+ }
+
+ // Print line and column number
+ if (msg.hasLine()) {
+ if (msg.hasFile()) {
+ os << ':';
+ }
+ os << t.color(Terminal::WHITE, true) << msg.line
+ << t.reset();
+ if (msg.hasColumn()) {
+ os << ':' << msg.column;
+ }
+ }
+
+ // Print the optional seperator
+ if (msg.hasFile() || msg.hasLine()) {
+ os << ": ";
+ }
+
+ // Print the severity
+ switch (msg.severity) {
+ case Severity::DEBUG:
+ break;
+ case Severity::NOTE:
+ os << t.color(Terminal::CYAN, true) << "note: ";
+ break;
+ case Severity::WARNING:
+ os << t.color(Terminal::MAGENTA, true) << "warning: ";
+ break;
+ case Severity::ERROR:
+ os << t.color(Terminal::RED, true) << "error: ";
+ break;
+ case Severity::FATAL_ERROR:
+ os << t.color(Terminal::RED, true) << "fatal: ";
+ break;
+ }
+ os << t.reset();
+
+ // Print the actual message
+ os << msg.msg << std::endl;
+}
+}
+
diff --git a/src/core/common/Logger.hpp b/src/core/common/Logger.hpp
new file mode 100644
index 0000000..e6b97f4
--- /dev/null
+++ b/src/core/common/Logger.hpp
@@ -0,0 +1,609 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Logger.hpp
+ *
+ * Contains classes for logging messages in Ousía. Provides a generic Logger
+ * class, and TerminalLogger, an extension of Logger which logs do an output
+ * stream.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_LOGGER_HPP_
+#define _OUSIA_LOGGER_HPP_
+
+#include <ostream>
+#include <stack>
+#include <string>
+#include <vector>
+
+#include "Exceptions.hpp"
+
+namespace ousia {
+
+/**
+ * Enum containing the severities used for logging errors and debug messages.
+ */
+enum class Severity : int {
+ /**
+ * Indicates that this message was only printed for debugging. Note that
+ * in release builds messages with this severity are discarded.
+ */
+ DEBUG = 0,
+
+ /**
+ * A message which might provide additional information to the user.
+ */
+ NOTE = 1,
+
+ /**
+ * A message which warns of possible mistakes by the user which might not be
+ * actual errors but may lead to unintended behaviour.
+ */
+ WARNING = 2,
+
+ /**
+ * An error occurred while processing, however program execution continues,
+ * trying to deal with the error situation (graceful degradation). However,
+ * messages with this severity may be followed up by fatal errors.
+ */
+ ERROR = 3,
+
+ /**
+ * A fatal error occurred. Program execution cannot continue.
+ */
+ FATAL_ERROR = 4
+};
+
+#ifdef NDEBUG
+static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::NOTE;
+#else
+static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::DEBUG;
+#endif
+
+/**
+ * The Logger class is the base class the individual logging systems should
+ * derive from. It provides a simple interface for logging errors, warnings and
+ * notes and filters these according to the set minimum severity. Additionally
+ * a stack of file names is maintained in order to allow simple descent into
+ * included files. Note however, that this base Logger class simply discards the
+ * incomming log messages. Use one of the derived classes to actually handle the
+ * log messages.
+ */
+class Logger {
+public:
+ /**
+ * The message struct represents a single log message and all information
+ * attached to it.
+ */
+ struct Message {
+ /**
+ * Severity of the log message.
+ */
+ Severity severity;
+
+ /**
+ * Actual log message.
+ */
+ std::string msg;
+
+ /**
+ * Refers to the file which provides the context for this error message.
+ * May be empty.
+ */
+ std::string file;
+
+ /**
+ * Line in the above file the error message refers to. Ignored if
+ * smaller than zero.
+ */
+ int line;
+
+ /**
+ * Column in the above file the error message refers to. Ignored if
+ * smaller than zero.
+ */
+ int column;
+
+ /**
+ * Constructor of the Message struct.
+ *
+ * @param severity describes the message severity.
+ * @param msg contains the actual message.
+ * @param file provides the context the message refers to. May be empty.
+ * @param line is the line in the above file the message refers to.
+ * @param column is the column in the above file the message refers to.
+ */
+ Message(Severity severity, std::string msg, std::string file, int line,
+ int column)
+ : severity(severity),
+ msg(std::move(msg)),
+ file(std::move(file)),
+ line(line),
+ column(column){};
+
+ /**
+ * Returns true if the file string is set.
+ *
+ * @return true if the file string is set.
+ */
+ bool hasFile() const { return !file.empty(); }
+
+ /**
+ * Returns true if the line is set.
+ *
+ * @return true if the line number is a non-negative integer.
+ */
+ bool hasLine() const { return line >= 0; }
+
+ /**
+ * Returns true if column and line are set (since a column has no
+ * significance without a line number).
+ *
+ * @return true if line number and column number are non-negative
+ * integers.
+ */
+ bool hasColumn() const { return hasLine() && column >= 0; }
+ };
+
+private:
+ /**
+ * Minimum severity a log message should have before it is discarded.
+ */
+ Severity minSeverity;
+
+ /**
+ * Maximum encountered log message severity.
+ */
+ Severity maxEncounteredSeverity;
+
+ /**
+ * Stack containing the current file names that have been processed.
+ */
+ std::stack<std::string> filenameStack;
+
+protected:
+ /**
+ * Function to be overriden by child classes to actually display or store
+ * the messages. The default implementation just discards all incomming
+ * messages.
+ *
+ * @param msg is an instance of the Message struct containing the data that
+ * should be logged.
+ */
+ virtual void process(const Message &msg){};
+
+public:
+ /**
+ * Constructor of the Logger class.
+ *
+ * @param minSeverity is the minimum severity a log message should have.
+ * Messages below this severity are discarded.
+ */
+ Logger(Severity minSeverity = DEFAULT_MIN_SEVERITY)
+ : minSeverity(minSeverity), maxEncounteredSeverity(Severity::DEBUG)
+ {
+ }
+
+ Logger(const Logger &) = delete;
+
+ /**
+ * Virtual destructor.
+ */
+ virtual ~Logger(){};
+
+ /**
+ * Logs the given message. Most generic log function.
+ *
+ * @param severity is the severity of the log message.
+ * @param msg is the actual log message.
+ * @param file is the name of the file the message refers to. May be empty.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void log(Severity severity, const std::string &msg, const std::string &file,
+ int line = -1, int column = -1);
+
+ /**
+ * Logs the given message. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param severity is the severity of the log message.
+ * @param msg is the actual log message.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void log(Severity severity, const std::string &msg, int line = -1,
+ int column = -1)
+ {
+ log(severity, msg, currentFilename(), line, column);
+ }
+
+ /**
+ * Logs the given message. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param severity is the severity of the log message.
+ * @param msg is the actual log message.
+ * @param pos is a const reference to a variable which provides position
+ * information.
+ * @tparam PosType is the actual type of pos and must implement a getLine
+ * and getColumn function.
+ */
+ template <class PosType>
+ void logAt(Severity severity, const std::string &msg, const PosType &pos)
+ {
+ log(severity, msg, pos.getLine(), pos.getColumn());
+ }
+
+ /**
+ * Logs the given loggable exception.
+ *
+ * @param ex is the exception that should be logged.
+ */
+ void log(const LoggableException &ex)
+ {
+ log(Severity::ERROR, ex.msg,
+ ex.file.empty() ? currentFilename() : ex.file, ex.line, ex.column);
+ }
+
+ /**
+ * Logs a debug message. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param file is the name of the file the message refers to. May be empty.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void debug(const std::string &msg, const std::string &file, int line = -1,
+ int column = -1)
+ {
+ log(Severity::DEBUG, msg, file, line, column);
+ }
+
+ /**
+ * Logs a debug message. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void debug(const std::string &msg, int line = -1, int column = -1)
+ {
+ debug(msg, currentFilename(), line, column);
+ }
+
+ /**
+ * Logs a debug message. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param severity is the severity of the log message.
+ * @param msg is the actual log message.
+ * @param pos is a const reference to a variable which provides position
+ * information.
+ */
+ template <class PosType>
+ void debugAt(const std::string &msg, const PosType &pos)
+ {
+ debug(msg, pos.getLine(), pos.getColumn());
+ }
+
+ /**
+ * Logs a note. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param file is the name of the file the message refers to. May be empty.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void note(const std::string &msg, const std::string &file, int line = -1,
+ int column = -1)
+ {
+ log(Severity::NOTE, msg, file, line, column);
+ }
+
+ /**
+ * Logs a note. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void note(const std::string &msg, int line = -1, int column = -1)
+ {
+ note(msg, currentFilename(), line, column);
+ }
+
+ /**
+ * Logs a note. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param pos is a const reference to a variable which provides position
+ * information.
+ */
+ template <class PosType>
+ void noteAt(const std::string &msg, const PosType &pos)
+ {
+ note(msg, pos.getLine(), pos.getColumn());
+ }
+
+ /**
+ * Logs a warning. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param file is the name of the file the message refers to. May be empty.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void warning(const std::string &msg, const std::string &file, int line = -1,
+ int column = -1)
+ {
+ log(Severity::WARNING, msg, file, line, column);
+ }
+
+ /**
+ * Logs a warning. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param pos is a const reference to a variable which provides position
+ * information.
+ */
+ template <class PosType>
+ void warningAt(const std::string &msg, const PosType &pos)
+ {
+ warning(msg, pos.getLine(), pos.getColumn());
+ }
+
+ /**
+ * Logs a warning. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void warning(const std::string &msg, int line = -1, int column = -1)
+ {
+ warning(msg, currentFilename(), line, column);
+ }
+
+ /**
+ * Logs an error message. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param file is the name of the file the message refers to. May be empty.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void error(const std::string &msg, const std::string &file, int line = -1,
+ int column = -1)
+ {
+ log(Severity::ERROR, msg, file, line, column);
+ }
+
+ /**
+ * Logs an error message. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void error(const std::string &msg, int line = -1, int column = -1)
+ {
+ error(msg, currentFilename(), line, column);
+ }
+
+ /**
+ * Logs an error message. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param pos is a const reference to a variable which provides position
+ * information.
+ */
+ template <class PosType>
+ void errorAt(const std::string &msg, const PosType &pos)
+ {
+ error(msg, pos.getLine(), pos.getColumn());
+ }
+
+ /**
+ * Logs a fatal error. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param file is the name of the file the message refers to. May be empty.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void fatalError(const std::string &msg, const std::string &file,
+ int line = -1, int column = -1)
+ {
+ log(Severity::FATAL_ERROR, msg, file, line, column);
+ }
+
+ /**
+ * Logs a fatal error. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param line is the line in the above file at which the error occured.
+ * Ignored if negative.
+ * @param column is the column in the above file at which the error occured.
+ * Ignored if negative.
+ */
+ void fatalError(const std::string &msg, int line = -1, int column = -1)
+ {
+ fatalError(msg, currentFilename(), line, column);
+ }
+
+ /**
+ * Logs a fatal error. The file name is set to the topmost file name on
+ * the file name stack.
+ *
+ * @param msg is the actual log message.
+ * @param pos is a const reference to a variable which provides position
+ * information.
+ */
+ template <class PosType>
+ void fatalErrorAt(const std::string &msg, const PosType &pos)
+ {
+ fatalError(msg, pos.getLine(), pos.getColumn());
+ }
+
+ /**
+ * Pushes a new file name onto the internal filename stack.
+ *
+ * @param name is the name of the file that should be added to the filename
+ * stack.
+ * @return the size of the filename stack. This number can be passed to the
+ * "unwindFilenameStack" method in order to return the stack to state it was
+ * in after this function has been called.
+ */
+ unsigned int pushFilename(const std::string &name);
+
+ /**
+ * Pops the filename from the internal filename stack.
+ *
+ * @return the current size of the filename stack.
+ */
+ unsigned int popFilename();
+
+ /**
+ * Pops elements from the filename stack while it has more elements than
+ * the given number and the stack is non-empty.
+ *
+ * @param pos is the position the filename stack should be unwound to. Use
+ * a number returned by pushFilename.
+ */
+ void unwindFilenameStack(unsigned int pos);
+
+ /**
+ * Returns the topmost filename from the internal filename stack.
+ *
+ * @return the topmost filename from the filename stack or an empty string
+ * if the filename stack is empty.
+ */
+ std::string currentFilename()
+ {
+ return filenameStack.empty() ? std::string{} : filenameStack.top();
+ }
+
+ /**
+ * Returns the maximum severity that was encountered by the Logger but at
+ * least Severity::DEBUG.
+ *
+ * @return the severity of the most severe log message but at least
+ * Severity::DEBUG.
+ */
+ Severity getMaxEncounteredSeverity() { return maxEncounteredSeverity; }
+
+ /**
+ * Returns the minimum severity. Messages with a smaller severity are
+ * discarded.
+ *
+ * @return the minimum severity.
+ */
+ Severity getMinSeverity() { return minSeverity; }
+
+ /**
+ * Sets the minimum severity. Messages with a smaller severity will be
+ * discarded. Only new messages will be filtered according to the new value.
+ *
+ * @param severity is the minimum severity for new log messages.
+ */
+ void setMinSeverity(Severity severity) { minSeverity = severity; }
+};
+
+/**
+ * Class extending the Logger class and printing the log messages to the given
+ * stream.
+ */
+class TerminalLogger : public Logger {
+private:
+ /**
+ * Reference to the target output stream.
+ */
+ std::ostream &os;
+
+ /**
+ * If true, the TerminalLogger will use colors to make the log messages
+ * prettier.
+ */
+ bool useColor;
+
+protected:
+ /**
+ * Implements the process function and logs the messages to the output.
+ */
+ void process(const Message &msg) override;
+
+public:
+ /**
+ * Constructor of the TerminalLogger class.
+ *
+ * @param os is the output stream the log messages should be logged to.
+ * Should be set to std::cerr in most cases.
+ * @param useColor if true, the TerminalLogger class will do its best to
+ * use ANSI/VT100 control sequences for colored log messages.
+ * @param minSeverity is the minimum severity below which log messages are
+ * discarded.
+ */
+ TerminalLogger(std::ostream &os, bool useColor = false,
+ Severity minSeverity = DEFAULT_MIN_SEVERITY)
+ : Logger(minSeverity), os(os), useColor(useColor)
+ {
+ }
+};
+}
+
+#endif /* _OUSIA_LOGGER_HPP_ */
+
diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
new file mode 100644
index 0000000..c460ed4
--- /dev/null
+++ b/src/core/common/Utils.cpp
@@ -0,0 +1,59 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <algorithm>
+#include <limits>
+
+#include "Utils.hpp"
+
+namespace ousia {
+
+std::string Utils::trim(const std::string &s)
+{
+ size_t firstNonWhitespace = std::numeric_limits<size_t>::max();
+ size_t lastNonWhitespace = 0;
+ for (size_t i = 0; i < s.size(); i++) {
+ if (!isWhitespace(s[i])) {
+ firstNonWhitespace = std::min(i, firstNonWhitespace);
+ lastNonWhitespace = std::max(i, lastNonWhitespace);
+ }
+ }
+
+ if (firstNonWhitespace < lastNonWhitespace) {
+ return s.substr(firstNonWhitespace,
+ lastNonWhitespace - firstNonWhitespace + 1);
+ }
+ return std::string{};
+}
+
+bool Utils::isIdentifier(const std::string &name)
+{
+ bool first = true;
+ for (char c : name) {
+ if (first && !(isAlphabetic(c) || c == '_')) {
+ return false;
+ }
+ if (first && !(isAlphanumeric(c) || c == '_' || c == '-')) {
+ return false;
+ }
+ first = false;
+ }
+ return true;
+}
+}
+
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
new file mode 100644
index 0000000..5332b50
--- /dev/null
+++ b/src/core/common/Utils.hpp
@@ -0,0 +1,110 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _OUSIA_UTILS_H_
+#define _OUSIA_UTILS_H_
+
+#include <sstream>
+#include <string>
+
+namespace ousia {
+
+class Utils {
+public:
+ /**
+ * Returns true if the given character is in [A-Za-z]
+ */
+ static bool isAlphabetic(const char c)
+ {
+ return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'));
+ }
+
+ /**
+ * Returns true if the given character is in [0-9]
+ */
+ static bool isNumeric(const char c) { return (c >= '0') && (c <= '9'); }
+
+ /**
+ * Returns true if the given character is in [0-9A-Fa-f]
+ */
+ static bool isHexadecimal(const char c)
+ {
+ return ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'F')) ||
+ ((c >= 'a') && (c <= 'f'));
+ }
+
+ /**
+ * Returns true if the given character is in [A-Za-z0-9]
+ */
+ static bool isAlphanumeric(const char c)
+ {
+ return isAlphabetic(c) || isNumeric(c);
+ }
+
+ /**
+ * Returns true if the given character is in [A-Za-z_][A-Za-z0-9_-]*
+ */
+ static bool isIdentifier(const std::string &name);
+
+ /**
+ * Returns true if the given character is a whitespace character.
+ */
+ static bool isWhitespace(const char c)
+ {
+ return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r');
+ }
+
+ /**
+ * Removes whitespace at the beginning and the end of the given string.
+ */
+ static std::string trim(const std::string &s);
+
+ /**
+ * Turns the elements of a collection into a string separated by the
+ * given delimiter.
+ *
+ * @param es is an iterable container of elements that can be appended to an
+ * output stream (the << operator must be implemented).
+ * @param delim is the delimiter that should be used to separate the items.
+ * @param start is a character sequence that should be prepended to the
+ * result.
+ * @param end is a character sequence that should be appended to the result.
+ */
+ template <class T>
+ static std::string join(T es, const std::string &delim,
+ const std::string &start = "",
+ const std::string &end = "")
+ {
+ std::stringstream res;
+ bool first = true;
+ res << start;
+ for (const auto &e : es) {
+ if (!first) {
+ res << delim;
+ }
+ res << e;
+ first = false;
+ }
+ res << end;
+ return res.str();
+ }
+};
+}
+
+#endif /* _OUSIA_UTILS_H_ */
+
diff --git a/src/core/common/Variant.cpp b/src/core/common/Variant.cpp
new file mode 100644
index 0000000..27fc6e7
--- /dev/null
+++ b/src/core/common/Variant.cpp
@@ -0,0 +1,154 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <sstream>
+
+#include "Utils.hpp"
+#include "Variant.hpp"
+
+namespace ousia {
+
+/* Class Variant::TypeException */
+
+Variant::TypeException::TypeException(Type actualType, Type requestedType)
+ : OusiaException(std::string("Variant: Requested \"") +
+ Variant::getTypeName(requestedType) +
+ std::string("\" but is \"") +
+ Variant::getTypeName(actualType) + std::string("\"")),
+ actualType(actualType),
+ requestedType(requestedType)
+{
+}
+
+/* Class Variant */
+
+const char *Variant::getTypeName(Type type)
+{
+ switch (type) {
+ case Type::NULLPTR:
+ return "null";
+ case Type::BOOL:
+ return "boolean";
+ case Type::INT:
+ return "integer";
+ case Type::DOUBLE:
+ return "double";
+ case Type::STRING:
+ return "string";
+ case Type::ARRAY:
+ return "array";
+ case Type::MAP:
+ return "map";
+ }
+ return "unknown";
+}
+
+Variant::boolType Variant::toBool() const
+{
+ switch (getType()) {
+ case Type::NULLPTR:
+ return false;
+ case Type::BOOL:
+ return asBool();
+ case Type::INT:
+ return asInt() != 0;
+ case Type::DOUBLE:
+ return asDouble() != 0.0;
+ case Type::STRING:
+ return true;
+ case Type::ARRAY:
+ return true;
+ case Type::MAP:
+ return true;
+ }
+ return false;
+}
+
+Variant::intType Variant::toInt() const
+{
+ switch (getType()) {
+ case Type::NULLPTR:
+ return 0;
+ case Type::BOOL:
+ return asBool() ? 1 : 0;
+ case Type::INT:
+ return asInt();
+ case Type::DOUBLE:
+ return asDouble();
+ case Type::STRING:
+ return 0; // TODO: Parse string as int
+ case Type::ARRAY: {
+ const arrayType &a = asArray();
+ return (a.size() == 1) ? a[0].toInt() : 0;
+ }
+ case Type::MAP:
+ return 0;
+ }
+ return false;
+}
+
+Variant::doubleType Variant::toDouble() const
+{
+ switch (getType()) {
+ case Type::NULLPTR:
+ return 0.0;
+ case Type::BOOL:
+ return asBool() ? 1.0 : 0.0;
+ case Type::INT:
+ return asInt();
+ case Type::DOUBLE:
+ return asDouble();
+ case Type::STRING:
+ return 0.0; // TODO: Parse string as double
+ case Type::ARRAY: {
+ const arrayType &a = asArray();
+ return (a.size() == 1) ? a[0].toDouble() : 0;
+ }
+ case Type::MAP:
+ return 0;
+ }
+ return false;
+}
+
+Variant::stringType Variant::toString(bool escape) const
+{
+ switch (getType()) {
+ case Type::NULLPTR:
+ return "null";
+ case Type::BOOL:
+ return asBool() ? "true" : "false";
+ case Type::INT:
+ return std::to_string(asInt());
+ case Type::DOUBLE:
+ return std::to_string(asDouble());
+ case Type::STRING: {
+ // TODO: Use proper serialization function
+ std::stringstream ss;
+ ss << "\"" << asString() << "\"";
+ return ss.str();
+ }
+ case Type::ARRAY:
+ return Utils::join(asArray(), ", ", "[", "]");
+ case Type::MAP:
+ return Utils::join(asMap(), ", ", "{", "}");
+ }
+ return "";
+}
+
+}
+
diff --git a/src/core/common/Variant.hpp b/src/core/common/Variant.hpp
new file mode 100644
index 0000000..d411fd3
--- /dev/null
+++ b/src/core/common/Variant.hpp
@@ -0,0 +1,761 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Variant.hpp
+ *
+ * The Variant class is used to efficiently represent a variables of varying
+ * type. Variant instances are used to represent data given by the end user and
+ * to exchange information between the host application and the script clients.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_VARIANT_HPP_
+#define _OUSIA_VARIANT_HPP_
+
+#include <cstdint>
+#include <map>
+#include <string>
+#include <vector>
+#include <ostream>
+
+// TODO: Use
+// http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html
+// later (will allow to use 8 bytes for a variant)
+
+#include "Exceptions.hpp"
+
+namespace ousia {
+
+/**
+ * Instances of the Variant class represent any kind of data that is exchanged
+ * between the host application and the script engine. Variants are immutable.
+ */
+class Variant {
+public:
+ /**
+ * Enum containing the possible types a variant may have.
+ */
+ enum class Type : int16_t {
+ NULLPTR,
+ BOOL,
+ INT,
+ DOUBLE,
+ STRING,
+ ARRAY,
+ MAP
+ };
+
+ /**
+ * Exception thrown whenever a variant is accessed via a getter function
+ * that is not supported for the current variant type.
+ */
+ class TypeException : public OusiaException {
+ private:
+ /**
+ * Internally used string holding the exception message.
+ */
+ const std::string msg;
+
+ public:
+ /**
+ * Contains the actual type of the variant.
+ */
+ const Type actualType;
+
+ /**
+ * Contains the requested type of the variant.
+ */
+ const Type requestedType;
+
+ /**
+ * Constructor of the TypeException.
+ *
+ * @param actualType describes the actual type of the variant.
+ * @param requestedType describes the type in which the variant was
+ * requested.
+ */
+ TypeException(Type actualType, Type requestedType);
+ };
+
+ using boolType = bool;
+ using intType = int32_t;
+ using doubleType = double;
+ using stringType = std::string;
+ using arrayType = std::vector<Variant>;
+ using mapType = std::map<std::string, Variant>;
+
+private:
+ /**
+ * Used to store the actual type of the variant.
+ */
+ Type type = Type::NULLPTR;
+
+ /**
+ * Anonymous union containing the possible value of the variant.
+ */
+ union {
+ /**
+ * The boolean value. Only valid if type is Type::BOOL.
+ */
+ boolType boolVal;
+ /**
+ * The integer value. Only valid if type is Type::INT.
+ */
+ intType intVal;
+ /**
+ * The number value. Only valid if type is Type::DOUBLE.
+ */
+ doubleType doubleVal;
+ /**
+ * Pointer to the more complex data structures on the free store. Only
+ * valid if type is one of Type::STRING, Type::ARRAY,
+ * Type::MAP.
+ */
+ void *ptrVal;
+ };
+
+ /**
+ * Internally used to convert the current pointer value to a reference of
+ * the specified type.
+ */
+ template <typename T>
+ T &asObj(Type requestedType) const
+ {
+ const Type actualType = getType();
+ if (actualType == requestedType) {
+ return *(static_cast<T *>(ptrVal));
+ }
+ throw TypeException{actualType, requestedType};
+ }
+
+ /**
+ * Used internally to assign the value of another Variant instance to this
+ * instance.
+ *
+ * @param v is the Variant instance that should be copied to this instance.
+ */
+ void copy(const Variant &v)
+ {
+ destroy();
+ type = v.type;
+ switch (type) {
+ case Type::NULLPTR:
+ break;
+ case Type::BOOL:
+ boolVal = v.boolVal;
+ break;
+ case Type::INT:
+ intVal = v.intVal;
+ break;
+ case Type::DOUBLE:
+ doubleVal = v.doubleVal;
+ break;
+ case Type::STRING:
+ ptrVal = new stringType(v.asString());
+ break;
+ case Type::ARRAY:
+ ptrVal = new arrayType(v.asArray());
+ break;
+ case Type::MAP:
+ ptrVal = new mapType(v.asMap());
+ break;
+ }
+ }
+
+ /**
+ * Used internally to move the value of another Variant instance to this
+ * instance.
+ *
+ * @param v is the Variant instance that should be copied to this instance.
+ */
+ void move(Variant &&v)
+ {
+ destroy();
+ type = v.type;
+ switch (type) {
+ case Type::NULLPTR:
+ break;
+ case Type::BOOL:
+ boolVal = v.boolVal;
+ break;
+ case Type::INT:
+ intVal = v.intVal;
+ break;
+ case Type::DOUBLE:
+ doubleVal = v.doubleVal;
+ break;
+ case Type::STRING:
+ case Type::ARRAY:
+ case Type::MAP:
+ ptrVal = v.ptrVal;
+ v.ptrVal = nullptr;
+ break;
+ }
+ v.type = Type::NULLPTR;
+ }
+
+ /**
+ * Used internally to destroy any value that was allocated on the heap.
+ */
+ void destroy()
+ {
+ if (ptrVal) {
+ switch (type) {
+ case Type::STRING:
+ delete static_cast<stringType *>(ptrVal);
+ break;
+ case Type::ARRAY:
+ delete static_cast<arrayType *>(ptrVal);
+ break;
+ case Type::MAP:
+ delete static_cast<mapType *>(ptrVal);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+public:
+ /**
+ * Copy constructor of the Variant class.
+ *
+ * @param v is the Variant instance that should be cloned.
+ */
+ Variant(const Variant &v) : ptrVal(nullptr) { copy(v); }
+
+ /**
+ * Move constructor of the Variant class.
+ *
+ * @param v is the reference to the Variant instance that should be moved,
+ * this instance is invalidated afterwards.
+ */
+ Variant(Variant &&v) : ptrVal(nullptr) { move(std::move(v)); }
+
+ /**
+ * Default constructor. Type is set to Type:null.
+ */
+ Variant() : ptrVal(nullptr) { setNull(); }
+
+ /**
+ * Default destructor, frees any memory that was allocated on the heap.
+ */
+ ~Variant() { destroy(); }
+
+ /**
+ * Constructor for null values. Initializes the variant as null value.
+ */
+ Variant(std::nullptr_t) : ptrVal(nullptr) { setNull(); }
+
+ /**
+ * Constructor for boolean values.
+ *
+ * @param b boolean value.
+ */
+ Variant(boolType b) : ptrVal(nullptr) { setBool(b); }
+
+ /**
+ * Constructor for integer values.
+ *
+ * @param i integer value.
+ */
+ Variant(intType i) : ptrVal(nullptr) { setInt(i); }
+
+ /**
+ * Constructor for double values.
+ *
+ * @param d double value.
+ */
+ Variant(doubleType d) : ptrVal(nullptr) { setDouble(d); }
+
+ /**
+ * Constructor for string values. The given string is copied and managed by
+ * the new Variant instance.
+ *
+ * @param s is a reference to a C-Style string used as string value.
+ */
+ Variant(const char *s) : ptrVal(nullptr) { setString(s); }
+
+ /**
+ * Constructor for array values. The given array is copied and managed by
+ * the new Variant instance.
+ *
+ * @param a is a reference to the array
+ */
+ Variant(arrayType a) : ptrVal(nullptr) { setArray(std::move(a)); }
+
+ /**
+ * Constructor for map values. The given map is copied and managed by the
+ * new Variant instance.
+ *
+ * @param m is a reference to the map.
+ */
+ Variant(mapType m) : ptrVal(nullptr) { setMap(std::move(m)); }
+
+ /**
+ * Copy assignment operator.
+ */
+ Variant &operator=(const Variant &v)
+ {
+ copy(v);
+ return *this;
+ }
+
+ /**
+ * Move assignment operator.
+ */
+ Variant &operator=(Variant &&v)
+ {
+ move(std::move(v));
+ return *this;
+ }
+
+ /**
+ * Assign nullptr_t operator (allows to write Variant v = nullptr).
+ *
+ * @param p is an instance of std::nullptr_t.
+ */
+ Variant &operator=(std::nullptr_t)
+ {
+ setNull();
+ return *this;
+ }
+
+ /**
+ * Assign a boolean value.
+ *
+ * @param b is the boolean value to which the variant should be set.
+ */
+ Variant &operator=(boolType b)
+ {
+ setBool(b);
+ return *this;
+ }
+
+ /**
+ * Assign an integer value.
+ *
+ * @param i is the integer value to which the variant should be set.
+ */
+ Variant &operator=(intType i)
+ {
+ setInt(i);
+ return *this;
+ }
+
+ /**
+ * Assign a double value.
+ *
+ * @param d is the double value to which the variant should be set.
+ */
+ Variant &operator=(doubleType d)
+ {
+ setDouble(d);
+ return *this;
+ }
+
+ /**
+ * Assign a zero terminated const char array.
+ *
+ * @param s is the zero terminated const char array to which the variant
+ * should be set.
+ */
+ Variant &operator=(const char *s)
+ {
+ setString(s);
+ return *this;
+ }
+
+ /**
+ * Checks whether this Variant instance represents the nullptr.
+ *
+ * @return true if the Variant instance represents the nullptr, false
+ * otherwise.
+ */
+ bool isNull() const { return type == Type::NULLPTR; }
+
+ /**
+ * Checks whether this Variant instance is a boolean.
+ *
+ * @return true if the Variant instance is a boolean, false otherwise.
+ */
+ bool isBool() const { return type == Type::BOOL; }
+
+ /**
+ * Checks whether this Variant instance is an integer.
+ *
+ * @return true if the Variant instance is an integer, false otherwise.
+ */
+ bool isInt() const { return type == Type::INT; }
+
+ /**
+ * Checks whether this Variant instance is a double.
+ *
+ * @return true if the Variant instance is a double, false otherwise.
+ */
+ bool isDouble() const { return type == Type::DOUBLE; }
+
+ /**
+ * Checks whether this Variant instance is a string.
+ *
+ * @return true if the Variant instance is a string, false otherwise.
+ */
+ bool isString() const { return type == Type::STRING; }
+
+ /**
+ * Checks whether this Variant instance is an array.
+ *
+ * @return true if the Variant instance is an array, false otherwise.
+ */
+ bool isArray() const { return type == Type::ARRAY; }
+
+ /**
+ * Checks whether this Variant instance is a map.
+ *
+ * @return true if the Variant instance is a map, false otherwise.
+ */
+ bool isMap() const { return type == Type::MAP; }
+
+ /**
+ * Returns the Variant boolean value. Performs no type conversion. Throws an
+ * exception if the underlying type is not a boolean.
+ *
+ * @return the boolean value.
+ */
+ boolType asBool() const
+ {
+ if (isBool()) {
+ return boolVal;
+ }
+ throw TypeException{getType(), Type::BOOL};
+ }
+
+ /**
+ * Returns the Variant integer value. Performs no type conversion. Throws an
+ * exception if the underlying type is not an integer.
+ *
+ * @return the integer value.
+ */
+ intType asInt() const
+ {
+ if (isInt()) {
+ return intVal;
+ }
+ throw TypeException{getType(), Type::INT};
+ }
+
+ /**
+ * Returns the Variant double value. Performs no type conversion. Throws an
+ * exception if the underlying type is not a double.
+ *
+ * @return the double value.
+ */
+ doubleType asDouble() const
+ {
+ if (isDouble()) {
+ return doubleVal;
+ }
+ throw TypeException{getType(), Type::DOUBLE};
+ }
+
+ /**
+ * Returns a const reference to the string value. Performs no type
+ * conversion. Throws an exception if the underlying type is not a string.
+ *
+ * @return the string value as const reference.
+ */
+ const stringType &asString() const
+ {
+ return asObj<stringType>(Type::STRING);
+ }
+
+ /**
+ * Returns a const reference to the string value. Performs no type
+ * conversion. Throws an exception if the underlying type is not a string.
+ *
+ * @return the string value as reference.
+ */
+ stringType &asString() { return asObj<stringType>(Type::STRING); }
+
+ /**
+ * Returns a const reference to the array value. Performs no type
+ * conversion. Throws an exception if the underlying type is not an array.
+ *
+ * @return the array value as const reference.
+ */
+ const arrayType &asArray() const { return asObj<arrayType>(Type::ARRAY); }
+
+ /**
+ * Returns a const reference to the array value. Performs no type
+ * conversion. Throws an exception if the underlying type is not an array.
+ *
+ * @return the array value as reference.
+ */
+ arrayType &asArray() { return asObj<arrayType>(Type::ARRAY); }
+
+ /**
+ * Returns a const reference to the map value. Performs no type
+ * conversion. Throws an exception if the underlying type is not a map.
+ *
+ * @return the map value as const reference.
+ */
+ const mapType &asMap() const { return asObj<mapType>(Type::MAP); }
+
+ /**
+ * Returns a reference to the map value. Performs no type conversion.
+ * Throws an exception if the underlying type is not a map.
+ *
+ * @return the map value as reference.
+ */
+ mapType &asMap() { return asObj<mapType>(Type::MAP); }
+
+ /**
+ * Returns the value of the Variant as boolean, performs type conversion.
+ *
+ * @return the Variant value converted to a boolean value.
+ */
+ boolType toBool() const;
+
+ /**
+ * Returns the value of the Variant as integer, performs type conversion.
+ *
+ * @return the Variant value converted to an integer value.
+ */
+ intType toInt() const;
+
+ /**
+ * Returns the value of the Variant as double, performs type conversion.
+ *
+ * @return the Variant value converted to a double value.
+ */
+ doubleType toDouble() const;
+
+ /**
+ * Returns the value of the Variant as string, performs type conversion.
+ *
+ * @return the value of the variant as string.
+ * @param escape if set to true, adds double quotes to strings and escapes
+ * them properly (resulting in a more or less JSONesque output).
+ */
+ stringType toString(bool escape = false) const;
+
+ /**
+ * Sets the variant to null.
+ */
+ void setNull()
+ {
+ destroy();
+ type = Type::NULLPTR;
+ ptrVal = nullptr;
+ }
+
+ /**
+ * Sets the variant to the given boolean value.
+ *
+ * @param b is the new boolean value.
+ */
+ void setBool(boolType b)
+ {
+ destroy();
+ type = Type::BOOL;
+ boolVal = b;
+ }
+
+ /**
+ * Sets the variant to the given integer value.
+ *
+ * @param i is the new integer value.
+ */
+ void setInt(intType i)
+ {
+ destroy();
+ type = Type::INT;
+ intVal = i;
+ }
+
+ /**
+ * Sets the variant to the given double value.
+ *
+ * @param d is the new double value.
+ */
+ void setDouble(doubleType d)
+ {
+ destroy();
+ type = Type::DOUBLE;
+ doubleVal = d;
+ }
+
+ /**
+ * Sets the variant to the given string value.
+ *
+ * @param d is the new string value.
+ */
+ void setString(const char *s)
+ {
+ if (isString()) {
+ asString().assign(s);
+ } else {
+ destroy();
+ type = Type::STRING;
+ ptrVal = new stringType(s);
+ }
+ }
+
+ /**
+ * Sets the variant to the given array value.
+ *
+ * @param a is the new array value.
+ */
+ void setArray(arrayType a)
+ {
+ if (isArray()) {
+ asArray().swap(a);
+ } else {
+ destroy();
+ type = Type::ARRAY;
+ ptrVal = new arrayType(std::move(a));
+ }
+ }
+
+ /**
+ * Sets the variant to the given map value.
+ *
+ * @param a is the new map value.
+ */
+ void setMap(mapType m)
+ {
+ if (isMap()) {
+ asMap().swap(m);
+ } else {
+ destroy();
+ type = Type::MAP;
+ ptrVal = new mapType(std::move(m));
+ }
+ }
+
+ /**
+ * Returns the current type of the Variant.
+ *
+ * @return the current type of the Variant.
+ */
+ Type getType() const { return type; }
+
+ /**
+ * Returns the name of the given variant type as C-style string.
+ */
+ static const char *getTypeName(Type type);
+
+ /**
+ * Returns the name of the type of this variant instance.
+ */
+ const char *getTypeName() { return Variant::getTypeName(getType()); }
+
+ /**
+ * Prints the Variant to the output stream.
+ */
+ friend std::ostream &operator<<(std::ostream &os, const Variant &v)
+ {
+ return os << v.toString(true);
+ }
+
+ /**
+ * Prints a key value pair to the output stream.
+ */
+ friend std::ostream &operator<<(std::ostream &os,
+ const mapType::value_type &v)
+ {
+ // TODO: Use proper serialization function
+ return os << "\"" << v.first << "\": " << v.second.toString(true);
+ }
+
+ /*
+ * Comprison operators.
+ */
+
+ friend bool operator<(const Variant &lhs, const Variant &rhs)
+ {
+ // If the types do not match, we can not do a meaningful comparison.
+ if (lhs.getType() != rhs.getType()) {
+ throw TypeException(lhs.getType(), rhs.getType());
+ }
+ switch (lhs.getType()) {
+ case Type::NULLPTR:
+ return false;
+ case Type::BOOL:
+ return lhs.boolVal < rhs.boolVal;
+ case Type::INT:
+ return lhs.intVal < rhs.intVal;
+ case Type::DOUBLE:
+ return lhs.doubleVal < rhs.doubleVal;
+ case Type::STRING:
+ return lhs.asString() < rhs.asString();
+ case Type::ARRAY:
+ return lhs.asArray() < rhs.asArray();
+ case Type::MAP:
+ return lhs.asMap() < rhs.asMap();
+ }
+ throw OusiaException("Internal Error! Unknown type!");
+ }
+ friend bool operator>(const Variant &lhs, const Variant &rhs)
+ {
+ return rhs < lhs;
+ }
+ friend bool operator<=(const Variant &lhs, const Variant &rhs)
+ {
+ return !(lhs > rhs);
+ }
+ friend bool operator>=(const Variant &lhs, const Variant &rhs)
+ {
+ return !(lhs < rhs);
+ }
+
+ friend bool operator==(const Variant &lhs, const Variant &rhs)
+ {
+ if (lhs.getType() != rhs.getType()) {
+ return false;
+ }
+ switch (lhs.getType()) {
+ case Type::NULLPTR:
+ return true;
+ case Type::BOOL:
+ return lhs.boolVal == rhs.boolVal;
+ case Type::INT:
+ return lhs.intVal == rhs.intVal;
+ case Type::DOUBLE:
+ return lhs.doubleVal == rhs.doubleVal;
+ case Type::STRING:
+ return lhs.asString() == rhs.asString();
+ case Type::ARRAY:
+ return lhs.asArray() == rhs.asArray();
+ case Type::MAP:
+ return lhs.asMap() == rhs.asMap();
+ }
+ throw OusiaException("Internal Error! Unknown type!");
+ }
+
+ friend bool operator!=(const Variant &lhs, const Variant &rhs)
+ {
+ return !(lhs == rhs);
+ }
+};
+}
+
+#endif /* _OUSIA_VARIANT_HPP_ */
+
diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp
new file mode 100644
index 0000000..e611842
--- /dev/null
+++ b/src/core/common/VariantReader.cpp
@@ -0,0 +1,625 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <iostream>
+
+#include <cmath>
+#include <sstream>
+
+#include "VariantReader.hpp"
+#include "Utils.hpp"
+
+namespace ousia {
+
+// TODO: Better error messages (like "Expected 'x' but got 'y'")
+// TODO: Replace delims with single char delim where possible
+// TODO: Use custom return value instead of std::pair
+// TODO: Allow buffered char reader to "fork"
+// TODO: Rename CharReader to shorter CharReader
+// TODO: Implement context in CharReader (to allow error messages to extract the
+// current line)
+
+/* Error Messages */
+
+static const char *ERR_UNEXPECTED_CHAR = "Unexpected character";
+static const char *ERR_UNEXPECTED_END = "Unexpected literal end";
+static const char *ERR_UNTERMINATED = "Unterminated literal";
+static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence";
+static const char *ERR_INVALID_INTEGER = "Invalid integer value";
+static const char *ERR_TOO_LARGE = "Value too large to represent";
+
+/* Class Number */
+
+/**
+ * Class used internally to represent a number (integer or double). The number
+ * is represented by its components (base value a, nominator n, denominator d,
+ * exponent e, sign s and exponent sign sE).
+ */
+class Number {
+private:
+ /**
+ * Reprsents the part of the number: Base value a, nominator n, exponent e.
+ */
+ enum class Part { A, N, E };
+
+ /**
+ * State used in the parser state machine
+ */
+ enum class State {
+ INIT,
+ HAS_MINUS,
+ LEADING_ZERO,
+ LEADING_POINT,
+ INT,
+ HEX,
+ POINT,
+ EXP_INIT,
+ EXP_HAS_MINUS,
+ EXP
+ };
+
+ /**
+ * Returns the numeric value of the given ASCII character (returns 0 for
+ * '0', 1 for '1', 10 for 'A' and so on).
+ *
+ * @param c is the character for which the numeric value should be returned.
+ * @return the numeric value the character represents.
+ */
+ static int charValue(char c)
+ {
+ if (c >= '0' && c <= '9') {
+ return c & 0x0F;
+ }
+ if ((c >= 'A' && c <= 'O') || (c >= 'a' && c <= 'o')) {
+ return (c & 0x0F) + 9;
+ }
+ return -1;
+ }
+
+ /**
+ * Appends the value of the character c to the internal number
+ * representation and reports any errors that might occur.
+ */
+ bool appendChar(char c, int base, Part p, CharReader &reader,
+ Logger &logger)
+ {
+ // Check whether the given character is valid
+ int v = charValue(c);
+ if (v < 0 || v >= base) {
+ logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
+ return false;
+ }
+
+ // Append the number to the specified part
+ switch (p) {
+ case Part::A:
+ a = a * base + v;
+ break;
+ case Part::N:
+ n = n * base + v;
+ d = d * base;
+ break;
+ case Part::E:
+ e = e * base + v;
+ break;
+ }
+
+ // Check for any overflows
+ if (a < 0 || n < 0 || d < 0 || e < 0) {
+ logger.errorAt(ERR_TOO_LARGE, reader);
+ return false;
+ }
+ return true;
+ }
+
+public:
+ /**
+ * Sign and exponent sign.
+ */
+ int8_t s, sE;
+
+ /**
+ * Exponent
+ */
+ int16_t e;
+
+ /**
+ * Base value, nominator, denominator
+ */
+ int64_t a, n, d;
+
+ /**
+ * Constructor of the number class.
+ */
+ Number() : s(1), sE(1), e(0), a(0), n(0), d(1) {}
+
+ /**
+ * Returns the represented double value.
+ */
+ double doubleValue()
+ {
+ return s * (a + ((double)n / (double)d)) * pow(10.0, (double)(sE * e));
+ }
+
+ /**
+ * Returns the represented integer value. Only a lossless operation, if the
+ * number is an integer (as can be checked via the isInt method), otherwise
+ * the exponent and the fractional value will be truncated.
+ */
+ int64_t intValue() { return s * a; }
+
+ /**
+ * Returns true, if the number is an integer (has no fractional or
+ * exponential part).
+ */
+ bool isInt() { return (n == 0) && (d == 1) && (e == 0); }
+
+ /**
+ * Tries to parse the number from the given stream and loggs any errors to
+ * the given logger instance. Numbers are terminated by one of the given
+ * delimiters.
+ */
+ bool parse(CharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims);
+};
+
+bool Number::parse(CharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
+{
+ State state = State::INIT;
+ char c;
+
+ // Consume the first whitespace characters
+ reader.consumeWhitespace();
+
+ // Iterate over the FSM to extract numbers
+ while (reader.peek(c)) {
+ // Abort, once a delimiter or whitespace is reached
+ if (Utils::isWhitespace(c) || delims.count(c)) {
+ reader.resetPeek();
+ break;
+ }
+
+ // The character is not a whitespace character and not a delimiter
+ switch (state) {
+ case State::INIT:
+ case State::HAS_MINUS:
+ switch (c) {
+ case '-':
+ // Do not allow multiple minus signs
+ if (state == State::HAS_MINUS) {
+ logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
+ return false;
+ }
+ state = State::HAS_MINUS;
+ s = -1;
+ break;
+ case '0':
+ // Remember a leading zero for the detection of "0x"
+ state = State::LEADING_ZERO;
+ break;
+ case '.':
+ // Remember a leading point as ".eXXX" is invalid
+ state = State::LEADING_POINT;
+ break;
+ default:
+ state = State::INT;
+ if (!appendChar(c, 10, Part::A, reader, logger)) {
+ return false;
+ }
+ break;
+ }
+ break;
+ case State::LEADING_ZERO:
+ if (c == 'x' || c == 'X') {
+ state = State::HEX;
+ break;
+ }
+ // fallthrough
+ case State::INT:
+ switch (c) {
+ case '.':
+ state = State::POINT;
+ break;
+ case 'e':
+ case 'E':
+ state = State::EXP_INIT;
+ break;
+ default:
+ state = State::INT;
+ if (!appendChar(c, 10, Part::A, reader, logger)) {
+ return false;
+ }
+ break;
+ }
+ break;
+ case State::HEX:
+ if (!appendChar(c, 16, Part::A, reader, logger)) {
+ return false;
+ }
+ break;
+ case State::LEADING_POINT:
+ case State::POINT:
+ switch (c) {
+ case 'e':
+ case 'E':
+ if (state == State::LEADING_POINT) {
+ logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
+ return false;
+ }
+ state = State::EXP_INIT;
+ break;
+ default:
+ state = State::POINT;
+ if (!appendChar(c, 10, Part::N, reader, logger)) {
+ return false;
+ }
+ break;
+ }
+ break;
+ case State::EXP_HAS_MINUS:
+ case State::EXP_INIT:
+ if (c == '-') {
+ if (state == State::EXP_HAS_MINUS) {
+ logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
+ return false;
+ }
+ state = State::EXP_HAS_MINUS;
+ sE = -1;
+ } else {
+ state = State::EXP;
+ if (!appendChar(c, 10, Part::E, reader, logger)) {
+ return false;
+ }
+ }
+ break;
+ case State::EXP:
+ if (!appendChar(c, 10, Part::E, reader, logger)) {
+ return false;
+ }
+ break;
+ }
+ reader.consumePeek();
+ }
+
+ // States in which ending is valid. Log an error in other states
+ if (state == State::LEADING_ZERO || state == State::HEX ||
+ state == State::INT || state == State::POINT ||
+ state == State::EXP) {
+ return true;
+ }
+ logger.errorAt(ERR_UNEXPECTED_END, reader);
+ return false;
+}
+
+
+/* Class Reader */
+
+static const int STATE_INIT = 0;
+static const int STATE_IN_STRING = 1;
+static const int STATE_IN_ARRAY = 2;
+static const int STATE_EXPECT_COMMA = 3;
+static const int STATE_ESCAPE = 4;
+static const int STATE_WHITESPACE = 5;
+static const int STATE_RESYNC = 6;
+
+template <class T>
+static std::pair<bool, T> error(CharReader &reader, Logger &logger,
+ const char *err, T res)
+{
+ logger.errorAt(err, reader);
+ return std::make_pair(false, std::move(res));
+}
+
+std::pair<bool, std::string> VariantReader::parseString(
+ CharReader &reader, Logger &logger,
+ const std::unordered_set<char> *delims)
+{
+ // Initialize the internal state
+ int state = STATE_INIT;
+ char quote = 0;
+ std::stringstream res;
+
+ // Consume all whitespace
+ reader.consumeWhitespace();
+
+ // Statemachine whic iterates over each character in the stream
+ // TODO: Combination of peeking and consumePeek is stupid as consumePeek is
+ // the default (read and putBack would obviously be better, yet the latter
+ // is not trivial to implement in the current CharReader).
+ char c;
+ while (reader.peek(c)) {
+ switch (state) {
+ case STATE_INIT:
+ if (c == '"' || c == '\'') {
+ quote = c;
+ state = STATE_IN_STRING;
+ break;
+ } else if (delims && delims->count(c)) {
+ return error(reader, logger, ERR_UNEXPECTED_END, res.str());
+ }
+ return error(reader, logger, ERR_UNEXPECTED_CHAR, res.str());
+ case STATE_IN_STRING:
+ if (c == quote) {
+ reader.consumePeek();
+ return std::make_pair(true, res.str());
+ } else if (c == '\\') {
+ state = STATE_ESCAPE;
+ reader.consumePeek();
+ break;
+ } else if (c == '\n') {
+ return error(reader, logger, ERR_UNTERMINATED, res.str());
+ }
+ res << c;
+ reader.consumePeek();
+ break;
+ case STATE_ESCAPE:
+ // Handle all possible special escape characters
+ switch (c) {
+ case 'b':
+ res << '\b';
+ break;
+ case 'f':
+ res << '\f';
+ break;
+ case 'n':
+ res << '\n';
+ break;
+ case 'r':
+ res << '\r';
+ break;
+ case 't':
+ res << '\t';
+ break;
+ case 'v':
+ res << '\v';
+ break;
+ case '\'':
+ res << '\'';
+ break;
+ case '"':
+ res << '"';
+ break;
+ case '\\':
+ res << '\\';
+ break;
+ case '\n':
+ break;
+ case 'x':
+ // TODO: Parse Latin-1 sequence hex XX
+ break;
+ case 'u':
+ // TODO: Parse 16-Bit unicode character hex XXXX
+ break;
+ default:
+ if (Utils::isNumeric(c)) {
+ // TODO: Parse octal 000 sequence
+ } else {
+ logger.errorAt(ERR_INVALID_ESCAPE, reader);
+ }
+ break;
+ }
+
+ // Switch back to the "normal" state
+ state = STATE_IN_STRING;
+ reader.consumePeek();
+ break;
+ }
+ }
+ return error(reader, logger, ERR_UNEXPECTED_END, res.str());
+}
+
+std::pair<bool, Variant::arrayType> VariantReader::parseArray(
+ CharReader &reader, Logger &logger, char delim)
+{
+ Variant::arrayType res;
+ bool hadError = false;
+ int state = delim ? STATE_IN_ARRAY : STATE_INIT;
+ delim = delim ? delim : ']';
+ char c;
+
+ // Consume all whitespace
+ reader.consumeWhitespace();
+
+ // Iterate over the characters, use the parseGeneric function to read the
+ // pairs
+ while (reader.peek(c)) {
+ // Generically handle the end of the array
+ if (state != STATE_INIT && c == delim) {
+ reader.consumePeek();
+ return std::make_pair(!hadError, res);
+ }
+
+ switch (state) {
+ case STATE_INIT:
+ if (c != '[') {
+ return error(reader, logger, ERR_UNEXPECTED_CHAR, res);
+ }
+ state = STATE_IN_ARRAY;
+ reader.consumePeek();
+ break;
+ case STATE_IN_ARRAY: {
+ // Try to read an element using the parseGeneric function
+ reader.resetPeek();
+ auto elem = parseGeneric(reader, logger, {',', delim});
+ res.push_back(elem.second);
+
+ // If the reader had no error, expect an comma, otherwise skip
+ // to the next comma in the stream
+ if (elem.first) {
+ state = STATE_EXPECT_COMMA;
+ } else {
+ state = STATE_RESYNC;
+ hadError = true;
+ }
+ break;
+ }
+ case STATE_EXPECT_COMMA:
+ // Skip whitespace
+ if (c == ',') {
+ state = STATE_IN_ARRAY;
+ } else if (!Utils::isWhitespace(c)) {
+ hadError = true;
+ state = STATE_RESYNC;
+ logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
+ }
+ reader.consumePeek();
+ break;
+ case STATE_RESYNC:
+ // Just wait for another comma to arrive
+ if (c == ',') {
+ state = STATE_IN_ARRAY;
+ }
+ reader.consumePeek();
+ break;
+ }
+ }
+ return error(reader, logger, ERR_UNEXPECTED_END, res);
+}
+
+std::pair<bool, std::string> VariantReader::parseUnescapedString(
+ CharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
+{
+ std::stringstream res;
+ std::stringstream buf;
+ char c;
+
+ // Consume all whitespace
+ reader.consumeWhitespace();
+
+ // Copy all characters, skip whitespace at the end
+ int state = STATE_IN_STRING;
+ while (reader.peek(c)) {
+ if (delims.count(c)) {
+ reader.resetPeek();
+ return std::make_pair(true, res.str());
+ } else if (Utils::isWhitespace(c)) {
+ // Do not add whitespace to the output buffer
+ state = STATE_WHITESPACE;
+ buf << c;
+ } else {
+ // If we just hat a sequence of whitespace, append it to the output
+ // buffer and continue
+ if (state == STATE_WHITESPACE) {
+ res << buf.str();
+ buf.str(std::string{});
+ buf.clear();
+ state = STATE_IN_STRING;
+ }
+ res << c;
+ }
+ reader.consumePeek();
+ }
+ return std::make_pair(true, res.str());
+}
+
+std::pair<bool, int64_t> VariantReader::parseInteger(
+ CharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
+{
+ Number n;
+ if (n.parse(reader, logger, delims)) {
+ // Only succeed if the parsed number is an integer, otherwise this is an
+ // error
+ if (n.isInt()) {
+ return std::make_pair(true, n.intValue());
+ } else {
+ return error(reader, logger, ERR_INVALID_INTEGER, n.intValue());
+ }
+ }
+ return std::make_pair(false, n.intValue());
+}
+
+std::pair<bool, double> VariantReader::parseDouble(
+ CharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
+{
+ Number n;
+ bool res = n.parse(reader, logger, delims);
+ return std::make_pair(res, n.doubleValue());
+}
+
+std::pair<bool, Variant> VariantReader::parseGeneric(
+ CharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
+{
+ char c;
+
+ // Skip all whitespace characters
+ reader.consumeWhitespace();
+ while (reader.peek(c)) {
+ // Stop if a delimiter is reached
+ if (delims.count(c)) {
+ return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
+ }
+
+ // Parse a string if a quote is reached
+ if (c == '"' || c == '\'') {
+ auto res = parseString(reader, logger);
+ return std::make_pair(res.first, res.second.c_str());
+ }
+
+ if (c == '[') {
+ // TODO: Parse struct descriptor
+ }
+
+ // Try to parse everything that looks like a number as number
+ if (Utils::isNumeric(c) || c == '-') {
+ Number n;
+
+ // Fork the reader
+ CharReaderFork fork = reader.fork();
+
+ // TODO: Fork logger
+
+ // Try to parse the number
+ if (n.parse(fork, logger, delims)) {
+ // Parsing was successful, advance the reader
+ fork.commit();
+ if (n.isInt()) {
+ return std::make_pair(
+ true,
+ Variant{static_cast<Variant::intType>(n.intValue())});
+ } else {
+ return std::make_pair(true, n.doubleValue());
+ }
+ }
+ }
+
+ // Parse an unescaped string in any other case
+ auto res = parseUnescapedString(reader, logger, delims);
+
+ // Handling for special primitive values
+ if (res.first) {
+ if (res.second == "true") {
+ return std::make_pair(true, Variant{true});
+ }
+ if (res.second == "false") {
+ return std::make_pair(true, Variant{false});
+ }
+ if (res.second == "null") {
+ return std::make_pair(true, Variant{nullptr});
+ }
+ }
+ return std::make_pair(res.first, res.second.c_str());
+ }
+ return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
+}
+}
+
diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp
new file mode 100644
index 0000000..5e7c5d2
--- /dev/null
+++ b/src/core/common/VariantReader.hpp
@@ -0,0 +1,166 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file VariantReader.hpp
+ *
+ * Provides parsers for various micro formats. These formats include integers,
+ * doubles, strings, JSON and the Ousía struct notation.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_VARIANT_READER_HPP_
+#define _OUSIA_VARIANT_READER_HPP_
+
+#include <cstdint>
+#include <unordered_set>
+#include <utility>
+
+#include "CharReader.hpp"
+#include "Logger.hpp"
+#include "Variant.hpp"
+
+namespace ousia {
+
+class VariantReader {
+private:
+ /**
+ * Parses a string which may either be enclosed by " or ', unescapes
+ * entities in the string as specified for JavaScript.
+ *
+ * @param VariantReader is a reference to the CharReader instance which is
+ * the source for the character data. The VariantReader will be positioned
+ * after the terminating quote character or at the terminating delimiting
+ * character.
+ * @param logger is the logger instance that should be used to log error
+ * messages and warnings.
+ * @param delims is an optional set of delimiters after which parsing has to
+ * be stopped (the delimiters may occur inside the actual string, but not
+ * outside). If nullptr is given, no delimiter is used and a complete string
+ * is read.
+ */
+ static std::pair<bool, std::string> parseString(
+ CharReader &VariantReader, Logger &logger,
+ const std::unordered_set<char> *delims);
+
+public:
+ /**
+ * Parses a string which may either be enclosed by " or ', unescapes
+ * entities in the string as specified for JavaScript.
+ *
+ * @param VariantReader is a reference to the CharReader instance which is
+ * the source for the character data. The VariantReader will be positioned
+ * after the terminating quote character or at the terminating delimiting
+ * character.
+ * @param logger is the logger instance that should be used to log error
+ * messages and warnings.
+ * @param delims is a set of delimiters after which parsing has to
+ * be stopped (the delimiters may occur inside the actual string, but not
+ * outside).
+ */
+ static std::pair<bool, std::string> parseString(
+ CharReader &VariantReader, Logger &logger,
+ const std::unordered_set<char> &delims)
+ {
+ return parseString(VariantReader, logger, &delims);
+ }
+
+ /**
+ * Parses a string which may either be enclosed by " or ', unescapes
+ * entities in the string as specified for JavaScript.
+ *
+ * @param VariantReader is a reference to the CharReader instance which is
+ * the source for the character data. The VariantReader will be positioned
+ * after the terminating quote character or at the terminating delimiting
+ * character.
+ * @param logger is the logger instance that should be used to log error
+ * messages and warnings.
+ */
+ static std::pair<bool, std::string> parseString(CharReader &VariantReader,
+ Logger &logger)
+ {
+ return parseString(VariantReader, logger, nullptr);
+ }
+
+ /**
+ * Extracts an unescaped string from the given buffered char VariantReader
+ * instance. This function just reads text until one of the given delimiter
+ * characters is reached.
+ *
+ * @param VariantReader is a reference to the CharReader instance which is
+ * the source for the character data. The VariantReader will be positioned
+ * at the terminating delimiting character.
+ * @param delims is a set of characters which will terminate the string.
+ * These characters are not included in the result. May not be nullptr.
+ */
+ static std::pair<bool, std::string> parseUnescapedString(
+ CharReader &VariantReader, Logger &logger,
+ const std::unordered_set<char> &delims);
+
+ /**
+ * Parses an integer from the given buffered char VariantReader instance
+ * until one of the given delimiter characters is reached.
+ *
+ * @param VariantReader is a reference to the CharReader instance from
+ * which the character data should been VariantReader. The VariantReader
+ * will be positioned at the terminating delimiting character or directly
+ * after the integer.
+ */
+ static std::pair<bool, int64_t> parseInteger(
+ CharReader &VariantReader, Logger &logger,
+ const std::unordered_set<char> &delims);
+
+ /**
+ * Parses an double from the given buffered char VariantReader instance
+ * until one of the given delimiter characters is reached.
+ *
+ * @param VariantReader is a reference to the CharReader instance from
+ * which the character data should been VariantReader. The VariantReader
+ * will be positioned at the terminating delimiting character or directly
+ * after the integer.
+ */
+ static std::pair<bool, double> parseDouble(
+ CharReader &VariantReader, Logger &logger,
+ const std::unordered_set<char> &delims);
+
+ /**
+ * Parses an array of values.
+ */
+ static std::pair<bool, Variant::arrayType> parseArray(
+ CharReader &VariantReader, Logger &logger, char delim = 0);
+
+ /**
+ * Tries to parse the most specific item from the given stream until one of
+ * the given delimiters is reached or a meaningful literal has been read.
+ * The resulting variant represents the value that has been read.
+ *
+ * @param VariantReader is a reference to the CharReader instance which is
+ * the source for the character data. The VariantReader will be positioned
+ * at the terminating delimiting character.
+ * @param delims is a set of characters which will terminate the string.
+ * These characters are not included in the result. May not be nullptr.
+ */
+ static std::pair<bool, Variant> parseGeneric(
+ CharReader &VariantReader, Logger &logger,
+ const std::unordered_set<char> &delims);
+};
+}
+
+#endif /* _OUSIA_VARIANT_READER_HPP_ */
+