diff options
Diffstat (limited to 'src/core/common')
| -rw-r--r-- | src/core/common/CharReader.cpp | 640 | ||||
| -rw-r--r-- | src/core/common/CharReader.hpp | 625 | ||||
| -rw-r--r-- | src/core/common/Exceptions.cpp | 46 | ||||
| -rw-r--r-- | src/core/common/Exceptions.hpp | 165 | ||||
| -rw-r--r-- | src/core/common/Logger.cpp | 219 | ||||
| -rw-r--r-- | src/core/common/Logger.hpp | 602 | ||||
| -rw-r--r-- | src/core/common/TextCursor.hpp | 168 | ||||
| -rw-r--r-- | src/core/common/Utils.cpp | 59 | ||||
| -rw-r--r-- | src/core/common/Utils.hpp | 110 | ||||
| -rw-r--r-- | src/core/common/Variant.cpp | 154 | ||||
| -rw-r--r-- | src/core/common/Variant.hpp | 761 | ||||
| -rw-r--r-- | src/core/common/VariantReader.cpp | 625 | ||||
| -rw-r--r-- | src/core/common/VariantReader.hpp | 166 | 
13 files changed, 4340 insertions, 0 deletions
diff --git a/src/core/common/CharReader.cpp b/src/core/common/CharReader.cpp new file mode 100644 index 0000000..4bd81ed --- /dev/null +++ b/src/core/common/CharReader.cpp @@ -0,0 +1,640 @@ +/* +    Ousía +    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <algorithm> +#include <cassert> +#include <limits> +#include <sstream> + +#include "CharReader.hpp" +#include "Utils.hpp" + +namespace ousia { + +/* Helper functions */ + +/** + * istreamReadCallback is used internally by the Buffer calss to stream data + * from an input stream. + * + * @param buf is points a the target memory region. + * @param size is the requested number of bytes. + * @param userData is a pointer at some user defined data. + * @return the actual number of bytes read. If the result is smaller than + * the requested size, this tells the Buffer that the end of the input + * stream is reached. + */ +static size_t istreamReadCallback(char *buf, size_t size, void *userData) +{ +	return (static_cast<std::istream *>(userData))->read(buf, size).gcount(); +} + +/* Class Buffer */ + +Buffer::Buffer(ReadCallback callback, void *userData) +    : callback(callback), +      userData(userData), +      reachedEnd(false), +      startBucket(buckets.end()), +      endBucket(buckets.end()), +      startOffset(0), +      firstDead(0) +{ +	// Load a first block of data from the stream +	stream(); +	startBucket = buckets.begin(); +} + +Buffer::Buffer(std::istream &istream) : Buffer(istreamReadCallback, &istream) {} + +Buffer::Buffer(const std::string &str) +    : callback(nullptr), +      userData(nullptr), +      reachedEnd(true), +      startBucket(buckets.end()), +      endBucket(buckets.end()), +      startOffset(0), +      firstDead(0) +{ +	// Copy the given string into a first buffer and set the start buffer +	// correctly +	Bucket &bucket = nextBucket(); +	bucket.resize(str.size()); +	std::copy(str.begin(), str.end(), bucket.begin()); +	startBucket = buckets.begin(); +} + +#ifndef NDEBUG +Buffer::~Buffer() +{ +	// Make sure all cursors have been deleted +	for (bool cursor_alive : alive) { +		assert(!cursor_alive); +	} +} +#endif + +void Buffer::advance(BucketIterator &it) +{ +	it++; +	if (it == buckets.end()) { +		it = buckets.begin(); +	} +} + +void Buffer::advance(BucketList::const_iterator &it) const +{ +	it++; +	if (it == buckets.cend()) { +		it = buckets.cbegin(); +	} +} + +Buffer::Bucket &Buffer::nextBucket() +{ +	constexpr size_t MAXVAL = std::numeric_limits<size_t>::max(); + +	// Fetch the minimum bucket index +	size_t minBucketIdx = MAXVAL; +	for (size_t i = 0; i < cursors.size(); i++) { +		if (alive[i]) { +			// Fetch references to the bucket and the cursor +			const Cursor &cur = cursors[i]; +			const Bucket &bucket = *(cur.bucket); + +			// Increment the bucket index by one, if the cursor is at the end +			// of the bucket (only valid if the LOOKBACK_SIZE is set to zero) +			size_t bIdx = cur.bucketIdx; +			if (LOOKBACK_SIZE == 0 && cur.bucketOffs == bucket.size()) { +				bIdx++; +			} + +			// Decrement the bucket index by one, if the previous bucket still +			// needs to be reached and cannot be overridden +			if (bIdx > 0 && cur.bucketOffs < LOOKBACK_SIZE) { +				bIdx--; +			} + +			// Set the bucket index to the minium +			minBucketIdx = std::min(minBucketIdx, bIdx); +		} +	} + +	// If there is space between the current start bucket and the read +	// cursor, the start bucket can be safely overridden. +	if (minBucketIdx > 0 && minBucketIdx != MAXVAL) { +		// All cursor bucket indices will be decreased by one +		for (size_t i = 0; i < cursors.size(); i++) { +			cursors[i].bucketIdx--; +		} + +		// Increment the start offset +		startOffset += startBucket->size(); + +		// The old start bucket is the new end bucket +		endBucket = startBucket; + +		// Advance the start bucket, wrap around at the end of the list +		advance(startBucket); +	} else { +		// No free bucket, insert a new one before the start bucket +		endBucket = buckets.emplace(startBucket); +	} +	return *endBucket; +} + +Buffer::CursorId Buffer::nextCursor() +{ +	bool hasCursor = false; +	CursorId res = 0; + +	// Search for the next free cursor starting with minNextCursorId +	for (size_t i = firstDead; i < alive.size(); i++) { +		if (!alive[i]) { +			res = i; +			hasCursor = true; +			break; +		} +	} + +	// Add a new cursor to the cursor list if no cursor is currently free +	if (!hasCursor) { +		res = cursors.size(); +		cursors.resize(res + 1); +		alive.resize(res + 1); +	} + +	// The next dead cursor is at least the next cursor +	firstDead = res + 1; + +	// Mark the new cursor as alive +	alive[res] = true; + +	return res; +} + +void Buffer::stream() +{ +	// Fetch the bucket into which the data should be inserted, make sure it +	// has the correct size +	Bucket &tar = nextBucket(); +	tar.resize(REQUEST_SIZE); + +	// Read data from the stream into the target buffer +	size_t size = callback(tar.data(), REQUEST_SIZE, userData); + +	// If not enough bytes were returned, we're at the end of the stream +	if (size < REQUEST_SIZE) { +		tar.resize(size); +		reachedEnd = true; +	} +} + +Buffer::CursorId Buffer::createCursor() +{ +	CursorId res = nextCursor(); +	cursors[res].bucket = startBucket; +	cursors[res].bucketIdx = 0; +	cursors[res].bucketOffs = 0; +	return res; +} + +Buffer::CursorId Buffer::createCursor(Buffer::CursorId ref) +{ +	CursorId res = nextCursor(); +	cursors[res] = cursors[ref]; +	return res; +} + +void Buffer::copyCursor(Buffer::CursorId from, Buffer::CursorId to) +{ +	cursors[to] = cursors[from]; +} + +void Buffer::deleteCursor(Buffer::CursorId cursor) +{ +	alive[cursor] = false; +	firstDead = std::min(firstDead, cursor); +} + +size_t Buffer::offset(Buffer::CursorId cursor) const +{ +	const Cursor &cur = cursors[cursor]; +	size_t offs = startOffset + cur.bucketOffs; +	BucketList::const_iterator it = startBucket; +	while (it != cur.bucket) { +		offs += it->size(); +		advance(it); +	} +	return offs; +} + +size_t Buffer::moveForward(CursorId cursor, size_t relativeOffs) +{ +	size_t offs = relativeOffs; +	Cursor &cur = cursors[cursor]; +	while (offs > 0) { +		// Fetch the current bucket of the cursor +		Bucket &bucket = *(cur.bucket); + +		// If there is enough space in the bucket, simply increment the bucket +		// offset by the given relative offset +		const size_t space = bucket.size() - cur.bucketOffs; +		if (space >= offs) { +			cur.bucketOffs += offs; +			break; +		} else { +			// Go to the end of the current bucket otherwise +			offs -= space; +			cur.bucketOffs = bucket.size(); + +			// Go to the next bucket +			if (cur.bucket != endBucket) { +				// Go to the next bucket +				advance(cur.bucket); +				cur.bucketIdx++; +				cur.bucketOffs = 0; +			} else { +				// Abort, if there is no more data to stream, otherwise just +				// load new data +				if (reachedEnd) { +					return relativeOffs - offs; +				} +				stream(); +			} +		} +	} +	return relativeOffs; +} + +size_t Buffer::moveBackward(CursorId cursor, size_t relativeOffs) +{ +	size_t offs = relativeOffs; +	Cursor &cur = cursors[cursor]; +	while (offs > 0) { +		// If there is enough space in the bucket, simply decrement the bucket +		// offset by the given relative offset +		if (cur.bucketOffs >= offs) { +			cur.bucketOffs -= offs; +			break; +		} else { +			// Go to the beginning of the current bucket otherwise +			offs -= cur.bucketOffs; +			cur.bucketOffs = 0; + +			// Abort if there is no more bucket to got back to +			if (cur.bucketIdx == 0) { +				return relativeOffs - offs; +			} + +			// Go to the previous bucket (wrap around at the beginning of the +			// list) +			if (cur.bucket == buckets.begin()) { +				cur.bucket = buckets.end(); +			} +			cur.bucket--; + +			// Decrement the bucket index, and set the current offset to the +			// end of the new bucket +			cur.bucketIdx--; +			cur.bucketOffs = cur.bucket->size(); +		} +	} +	return relativeOffs; +} + +ssize_t Buffer::moveCursor(CursorId cursor, ssize_t relativeOffs) +{ +	if (relativeOffs > 0) { +		return moveForward(cursor, relativeOffs); +	} else if (relativeOffs < 0) { +		return -moveBackward(cursor, -relativeOffs); +	} else { +		return 0; +	} +} + +bool Buffer::atEnd(Buffer::CursorId cursor) const +{ +	const Cursor &c = cursors[cursor]; +	return reachedEnd && +	       (c.bucket == endBucket && c.bucketOffs == endBucket->size()); +} + +bool Buffer::fetchCharacter(CursorId cursor, char &c, bool incr) +{ +	Cursor &cur = cursors[cursor]; +	while (true) { +		// Reference at the current bucket +		Bucket &bucket = *(cur.bucket); + +		// If there is still data in the current bucket, return this data +		if (cur.bucketOffs < bucket.size()) { +			c = bucket[cur.bucketOffs]; +			if (incr) { +				cur.bucketOffs++; +			} +			return true; +		} else if (cur.bucket == endBucket) { +			// Return false if the end of the stream has been reached, otherwise +			// load new data +			if (reachedEnd) { +				return false; +			} +			stream(); +		} + +		// Go to the next bucket +		cur.bucketIdx++; +		cur.bucketOffs = 0; +		advance(cur.bucket); +	} +} + +bool Buffer::read(Buffer::CursorId cursor, char &c) +{ +	return fetchCharacter(cursor, c, true); +} + +bool Buffer::fetch(CursorId cursor, char &c) +{ +	return fetchCharacter(cursor, c, false); +} + +/* CharReader::Cursor class */ + +void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer, +                                CharReader::Cursor &cursor) +{ +	// Copy the cursor position +	buffer->copyCursor(cursor.cursor, this->cursor); + +	// Copy the state +	line = cursor.line; +	column = cursor.column; +} + +/* CharReader class */ + +CharReader::CharReader(std::shared_ptr<Buffer> buffer, size_t line, +                       size_t column) +    : buffer(buffer), +      readCursor(buffer->createCursor(), line, column), +      peekCursor(buffer->createCursor(), line, column), +      coherent(true) +{ +} + +CharReader::CharReader(const std::string &str, size_t line, size_t column) +    : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, line, column) +{ +} + +CharReader::CharReader(std::istream &istream, size_t line, size_t column) +    : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, line, column) +{ +} + +CharReader::~CharReader() +{ +	buffer->deleteCursor(readCursor.cursor); +	buffer->deleteCursor(peekCursor.cursor); +} + +bool CharReader::readAtCursor(Cursor &cursor, char &c) +{ +	// Return false if we're at the end of the stream +	if (!buffer->read(cursor.cursor, c)) { +		return false; +	} + +	// Substitute linebreak sequences with a single '\n' +	if (c == '\n' || c == '\r') { +		// Output a single \n +		c = '\n'; + +		// Check whether the next character is a continuation of the +		// current character +		char c2; +		if (buffer->read(cursor.cursor, c2)) { +			if ((c2 != '\n' && c2 != '\r') || c2 == c) { +				buffer->moveCursor(cursor.cursor, -1); +			} +		} +	} + +	// Count lines and columns +	if (c == '\n') { +		// A linebreak was reached, go to the next line +		cursor.line++; +		cursor.column = 1; +	} else { +		// Ignore UTF-8 continuation bytes +		if (!((c & 0x80) && !(c & 0x40))) { +			cursor.column++; +		} +	} +	return true; +} + +bool CharReader::peek(char &c) +{ +	// If the reader was coherent, update the peek cursor state +	if (coherent) { +		peekCursor.assign(buffer, readCursor); +		coherent = false; +	} + +	// Read a character from the peek cursor +	return readAtCursor(peekCursor, c); +} + +bool CharReader::read(char &c) +{ +	// Read a character from the buffer at the current read cursor +	bool res = readAtCursor(readCursor, c); + +	// Set the peek position to the current read position, if reading was not +	// coherent +	if (!coherent) { +		peekCursor.assign(buffer, readCursor); +		coherent = true; +	} else { +		buffer->copyCursor(readCursor.cursor, peekCursor.cursor); +	} + +	// Return the result of the read function +	return res; +} + +void CharReader::resetPeek() +{ +	if (!coherent) { +		peekCursor.assign(buffer, readCursor); +		coherent = true; +	} +} + +void CharReader::consumePeek() +{ +	if (!coherent) { +		readCursor.assign(buffer, peekCursor); +		coherent = true; +	} +} + +bool CharReader::consumeWhitespace() +{ +	char c; +	while (peek(c)) { +		if (!Utils::isWhitespace(c)) { +			resetPeek(); +			return true; +		} +		consumePeek(); +	} +	return false; +} + +CharReaderFork CharReader::fork() +{ +	return CharReaderFork(buffer, readCursor, peekCursor, coherent); +} + +TextCursor::Context CharReader::getContext(ssize_t maxSize) +{ +	// Clone the current read cursor +	Buffer::CursorId cur = buffer->createCursor(readCursor.cursor); + +	// Fetch the start position of the search +	ssize_t offs = buffer->offset(cur); +	ssize_t start = offs; +	ssize_t end = offs; +	char c; + +	// Search the beginning of the line with the last non-whitespace character +	bool hadNonWhitespace = false; +	bool foundBegin = false; +	for (ssize_t i = 0; i < maxSize; i++) { +		// Fetch the character at the current position +		if (buffer->fetch(cur, c)) { +			// Abort, at linebreaks if we found a non-linebreak character +			hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c); +			if (hadNonWhitespace && (c == '\n' || c == '\r')) { +				buffer->moveCursor(cur, 1); +				start++; +				foundBegin = true; +				break; +			} +		} +		if (buffer->moveCursor(cur, -1) == 0) { +			foundBegin = true; +			break; +		} else { +			// Update the start position and the hadNonWhitespace flag +			start--; +		} +	} + +	// Search the end of the line +	buffer->moveCursor(cur, offs - start); +	bool foundEnd = false; +	for (ssize_t i = 0; i < maxSize; i++) { +		// Increment the end counter if a character was read, abort if the end +		// of the stream has been reached +		if (buffer->read(cur, c)) { +			end++; +		} else { +			foundEnd = true; +			break; +		} + +		// Abort on linebreak characters +		if (c == '\n' || c == '\r') { +			foundEnd = true; +			break; +		} +	} + +	// Calculate the truncated start and end position and limit the number of +	// characters to the maximum number of characters +	ssize_t tStart = start; +	ssize_t tEnd = end; +	if (tEnd - tStart > maxSize) { +		tStart = std::max(offs - maxSize / 2, tStart); +		tEnd = tStart + maxSize; +	} + +	// Try to go to the calculated start position and fetch the actual start +	// position +	ssize_t aStart = end + buffer->moveCursor(cur, tStart - end); +	if (aStart > tStart) { +		tEnd = tEnd + (aStart - tStart); +		tStart = aStart; +	} + +	// Read one line +	std::stringstream ss; +	size_t relPos = 0; +	for (ssize_t i = tStart; i < tEnd; i++) { +		if (buffer->read(cur, c)) { +			// Break once a linebreak is reached +			if (c == '\n' || c == '\r') { +				break; +			} + +			// Add the current character to the output +			ss << c; + +			// Increment the string-relative offset as long as the original +			// offset is not reached in the for loop +			if (i < offs) { +				relPos++; +			} +		} +	} + +	// Delete the newly created cursor +	buffer->deleteCursor(cur); + +	return TextCursor::Context{ss.str(), relPos, !foundBegin || tStart != start, +	                           !foundEnd || tEnd != end}; +} + +/* Class CharReaderFork */ + +CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer, +                               CharReader::Cursor &parentReadCursor, +                               CharReader::Cursor &parentPeekCursor, +                               bool coherent) +    : CharReader(buffer, 1, 1), +      parentReadCursor(parentReadCursor), +      parentPeekCursor(parentPeekCursor) +{ +	readCursor.assign(buffer, parentReadCursor); +	peekCursor.assign(buffer, parentPeekCursor); +	this->coherent = coherent; +} + +void CharReaderFork::commit() +{ +	parentReadCursor.assign(buffer, readCursor); +	parentPeekCursor.assign(buffer, peekCursor); +} +} + diff --git a/src/core/common/CharReader.hpp b/src/core/common/CharReader.hpp new file mode 100644 index 0000000..7be5e08 --- /dev/null +++ b/src/core/common/CharReader.hpp @@ -0,0 +1,625 @@ +/* +    Ousía +    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file CharReader.hpp + * + * Used within all parsers to read single characters from an underlying stream. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_CHAR_READER_HPP_ +#define _OUSIA_CHAR_READER_HPP_ + +#include <istream> +#include <list> +#include <memory> +#include <vector> + +#include "TextCursor.hpp" + +namespace ousia { + +/** + * A chunked ring buffer used in CharReader to provide access to an input stream + * with multiple read cursors. The Buffer automatically expands to the + * size of the spanned by the read cursors while reusing already allocated + * memory. + */ +class Buffer { +public: +	/** +	 * Callback function which is called whenever new data is requested from the +	 * input stream. +	 * +	 * @param buf is points a the target memory region. +	 * @param size is the requested number of bytes. +	 * @param userData is a pointer at some user defined data given in the +	 * constructor. +	 * @return the actual number of bytes read. If the result is smaller than +	 * the requested size, this tells the Buffer that the end of the input +	 * stream is reached. +	 */ +	using ReadCallback = size_t (*)(char *buf, size_t size, void *userData); + +	/** +	 * Handle used to identify a cursor. +	 */ +	using CursorId = size_t; + +private: +	/** +	 * Number of bytes to request from the input stream. Set to 64 KiB because +	 * this seems to be a nice value for I/O operations according to multiple +	 * sources. +	 */ +	static constexpr size_t REQUEST_SIZE = 64 * 1024; + +	/** +	 * Number of bytes the buffer guarantees to be capable of looking back +	 * for extracting the current context. +	 */ +	static constexpr size_t LOOKBACK_SIZE = 128; + +	/** +	 * Type used internally to represent one chunk of memory. +	 */ +	using Bucket = std::vector<char>; + +	/** +	 * Type used internally to represent a bucket container. +	 */ +	using BucketList = std::list<Bucket>; + +	/** +	 * Type used internally for representing iterators in the bucket list. +	 */ +	using BucketIterator = BucketList::iterator; + +	/** +	 * Type used internally to represent a read cursor. +	 */ +	struct Cursor { +		/** +		 * Iterator pointing at the current bucket. +		 */ +		BucketIterator bucket; + +		/** +		 * Index of the bucket relative to the start bucket. +		 */ +		size_t bucketIdx; + +		/** +		 * Current offset within that bucket. +		 */ +		size_t bucketOffs; +	}; + +	/** +	 * List of buckets containing the buffered memory. +	 */ +	BucketList buckets; + +	/** +	 * List of cursors used to access the memory. Note that cursors can be +	 * marked as inactive and reused lateron (to avoid having to resize the +	 * vector). +	 */ +	std::vector<Cursor> cursors; + +	/** +	 * Bitfield specifying which of the cursors is actually valid. +	 */ +	std::vector<bool> alive; + +	/** +	 * Function to be called whenever new data is needed. Set to nullptr if the +	 * Buffer is not backed by an input stream. +	 */ +	const ReadCallback callback; + +	/** +	 * User data given in the constructor. +	 */ +	void *userData; + +	/** +	 * Set to true if the input stream is at its end. +	 */ +	bool reachedEnd; + +	/** +	 * Iterator pointing at the current start bucket. +	 */ +	BucketIterator startBucket; + +	/** +	 * Iterator pointing at the last bucket. +	 */ +	BucketIterator endBucket; + +	/** +	 * Byte offset of the start bucket relative to the beginning of the stream. +	 */ +	size_t startOffset; + +	/** +	 * Points at the smallest possible available cursor index, yet does not +	 * guarantee that this cursor index actuall is free. +	 */ +	CursorId firstDead; + +	/** +	 * Advances the bucket iterator, cares about wrapping around in the ring. +	 */ +	void advance(BucketIterator &it); + +	/** +	 * Advances the bucket iterator, cares about wrapping around in the ring. +	 */ +	void advance(BucketList::const_iterator &it) const; + +	/** +	 * Internally used to find the next free cursor in the cursors vector. The +	 * cursor is marked as active. +	 * +	 * @return the next free cursor index. +	 */ +	CursorId nextCursor(); + +	/** +	 * Returns a reference at the next bucket into which data should be +	 * inserted. +	 * +	 * @return a bucket into which the data can be inserted. +	 */ +	Bucket &nextBucket(); + +	/** +	 * Reads data from the input stream and places it in the next free buffer. +	 */ +	void stream(); + +	/** +	 * Moves the given cursor forward. +	 */ +	size_t moveForward(CursorId cursor, size_t relativeOffs); + +	/** +	 * Moves the given cursor backward. +	 */ +	size_t moveBackward(CursorId cursor, size_t relativeOffs); + +	/** +	 * Reads a character from the current cursor position and optionally +	 * advances. +	 */ +	bool fetchCharacter(CursorId cursor, char &c, bool incr); + +public: +	/** +	 * Intializes the Buffer with a reference to a ReadCallback that is used +	 * to fetch data from an underlying input stream. +	 * +	 * @param callback is the function that will be called whenever data is read +	 * from the ring buffer and the buffer does not hold enough data to fulfill +	 * this read request. +	 * @param userData is a pointer to user defined data which will be passed to +	 * the callback function. +	 */ +	Buffer(ReadCallback callback, void *userData); + +	/** +	 * Initializes the Buffer with a reference to an std::istream from which +	 * data will be read. +	 * +	 * @param istream is the input stream from which the data should be read. +	 */ +	Buffer(std::istream &istream); + +	/** +	 * Initializes the Buffer with the contents of the given string, after +	 * this operation the Buffer has a fixed size. +	 * +	 * @param str is the string containing the data that should be copied into +	 * the ring buffer. +	 */ +	Buffer(const std::string &str); + +#ifndef NDEBUG +	/** +	 * Destructor of the Buffer class. Makes sure that all cursors have been +	 * freed. +	 */ +	~Buffer(); +#endif + +	// No copy +	Buffer(const Buffer &) = delete; + +	// No assign +	Buffer &operator=(const Buffer &) = delete; + +	/** +	 * Creates a new read cursor positioned at the smallest possible position +	 * in the ring buffer. +	 */ +	CursorId createCursor(); + +	/** +	 * Creates a new read cursor positioned at the same position as the given +	 * read cursor. +	 * +	 * @param ref is the read cursor that should be used as reference for the +	 * new read cursor. +	 */ +	CursorId createCursor(CursorId ref); + +	/** +	 * Copies the position of one cursor to another cursor. +	 * +	 * @param from is the cursor id of which the position should be copied. +	 * @param to is the cursor id to which the position should be copied. +	 */ +	void copyCursor(CursorId from, CursorId to); + +	/** +	 * Deletes the cursor with the given id. The cursor may no longer be used +	 * after this function has been called. +	 * +	 * @param cursor is the id of the cursor that should be freed. +	 */ +	void deleteCursor(CursorId cursor); + +	/** +	 * Moves a cursor by offs bytes. Note that moving backwards is theoretically +	 * limited by the LOOKBACK_SIZE of the Buffer, practically it will most +	 * likely be limited by the REQUEST_SIZE, so you can got at most 64 KiB +	 * backwards. +	 * +	 * @param cursor is the cursor that should be moved. +	 * @param relativeOffs is a positive or negative integer number specifying +	 * the number of bytes the cursor should be moved forward (positive numbers) +	 * or backwards (negative numbers). +	 * @return the actual number of bytes the cursor was moved. This number is +	 * smaller than the relativeOffs given in the constructor if the +	 */ +	ssize_t moveCursor(CursorId cursor, ssize_t relativeOffs); + +	/** +	 * Returns the current byte offset of the given cursor relative to the +	 * beginning of the stream. +	 * +	 * @param cursor is the cursor for which the byte offset relative to the +	 * beginning of the stream should be returned. +	 * @return the number of bytes since the beginning of the stream for the +	 * given cursor. +	 */ +	size_t offset(CursorId cursor) const; + +	/** +	 * Returns true if the given cursor currently is at the end of the stream. +	 * +	 * @param cursor is the cursor for which the atEnd flag should be returned. +	 * @return true if the there are no more bytes for this cursor. If false +	 * is returned, this means that there may be more bytes in the stream, +	 * nevertheless the end of the stream may be hit once the next read function +	 * is called. +	 */ +	bool atEnd(CursorId cursor) const; + +	/** +	 * Reads a single character from the ring buffer from the given cursor and +	 * moves to the next character. +	 * +	 * @param cursor specifies the cursor from which the data should be read. +	 * The cursor will be advanced by one byte. +	 * @param c is the character into which the data needs to be read. +	 * @return true if a character was read, false if the end of the stream has +	 * been reached. +	 */ +	bool read(CursorId cursor, char &c); + +	/** +	 * Returns a single character from the ring buffer from the current cursor +	 * position and stays at that position. +	 * +	 * @param cursor specifies the cursor from which the data should be read. +	 * The cursor will be advanced by one byte. +	 * @param c is the character into which the data needs to be read. +	 * @return true if a character could be fetched, false if the end of the +	 * stream has been reached. +	 */ +	bool fetch(CursorId cursor, char &c); +}; + +// Forward declaration +class CharReaderFork; + +/** + * Used within parsers for convenient access to single characters in an input + * stream or buffer. It allows reading and peeking single characters from a + * buffer. Additionally it counts the current column/row (with correct handling + * for UTF-8) and contains an internal state machine that handles the detection + * of linebreaks and converts these to a single '\n'. + */ +class CharReader { +protected: +	/** +	 * Internally used cursor structure for managing the read and the peek +	 * cursor. +	 */ +	struct Cursor { +		/** +		 * Corresponding cursor in the underlying buffer instance. +		 */ +		const Buffer::CursorId cursor; + +		/** +		 * Current line the cursor is in. +		 */ +		TextCursor::PosType line; + +		/** +		 * Current column the cursor is in. +		 */ +		TextCursor::PosType column; + +		/** +		 * Constructor of the Cursor class. +		 * +		 * @param cursor is the underlying cursor in the Buffer instance. +		 */ +		Cursor(Buffer::CursorId cursor, TextCursor::PosType line, +		       TextCursor::PosType column) +		    : cursor(cursor), line(line), column(column) +		{ +		} + +		/** +		 * Assigns one cursor to another. +		 * +		 * @param buffer is the underlying buffer instance the internal cursor +		 * belongs to. +		 * @param cursor is the cursor from which the state should be copied. +		 */ +		void assign(std::shared_ptr<Buffer> buffer, Cursor &cursor); +	}; + +private: +	/** +	 * Substitutes "\r", "\n\r", "\r\n" with a single "\n". +	 * +	 * @param cursor is the cursor from which the character should be read. +	 * @param c a reference to the character that should be written. +	 * @return true if another character needs to be read. +	 */ +	bool substituteLinebreaks(Cursor &cursor, char &c); + +	/** +	 * Reads a single character from the given cursor. +	 * +	 * @param cursor is the cursor from which the character should be read. +	 * @param c a reference to the character that should be written. +	 * @return true if a character was read, false if the end of the stream has +	 * been reached. +	 */ +	bool readAtCursor(Cursor &cursor, char &c); + +protected: +	/** +	 * Reference pointing at the underlying buffer. +	 */ +	std::shared_ptr<Buffer> buffer; + +	/** +	 * Cursor used for reading. +	 */ +	Cursor readCursor; + +	/** +	 * Cursor used for peeking. +	 */ +	Cursor peekCursor; + +	/** +	 * Set to true as long the underlying Buffer cursor is at the same position +	 * for the read and the peek cursor. This is only used for optimization +	 * purposes and makes consecutive reads a bit faster. +	 */ +	bool coherent; + +	/** +	 * Protected constructor of the CharReader base class. Creates new read +	 * and peek cursors for the given buffer. +	 * +	 * @param buffer is a reference to the underlying Buffer class responsible +	 * for allowing to read from a single input stream from multiple locations. +	 */ +	CharReader(std::shared_ptr<Buffer> buffer, size_t line, size_t column); + +public: +	/** +	 * Creates a new CharReader instance from a string. +	 * +	 * @param str is a string containing the input data. +	 * @param line is the start line. +	 * @param column is the start column. +	 */ +	CharReader(const std::string &str, size_t line = 1, size_t column = 1); + +	/** +	 * Creates a new CharReader instance for an input stream. +	 * +	 * @param istream is the input stream from which incomming data should be +	 * read. +	 * @param line is the start line. +	 * @param column is the start column. +	 */ +	CharReader(std::istream &istream, size_t line = 1, size_t column = 1); + +	/** +	 * Deletes the used cursors from the underlying buffer instance. +	 */ +	~CharReader(); + +	// No copy +	CharReader(const Buffer &) = delete; + +	// No assign +	CharReader &operator=(const Buffer &) = delete; + +	/** +	 * Peeks a single character. If called multiple times, returns the +	 * character after the previously peeked character. +	 * +	 * @param c is a reference to the character to which the result should be +	 * written. +	 * @return true if the character was successfully read, false if there are +	 * no more characters to be read in the buffer. +	 */ +	bool peek(char &c); + +	/** +	 * Reads a character from the input data. If "peek" was called +	 * beforehand resets the peek pointer. +	 * +	 * @param c is a reference to the character to which the result should be +	 * written. +	 * @return true if the character was successfully read, false if there are +	 * no more characters to be read in the buffer. +	 */ +	bool read(char &c); + +	/** +	 * Resets the peek pointer to the "read" pointer. +	 */ +	void resetPeek(); + +	/** +	 * Advances the read pointer to the peek pointer -- so if the "peek" +	 * function was called, "read" will now return the character after +	 * the last peeked character. +	 */ +	void consumePeek(); + +	/** +	 * Moves the read cursor to the next non-whitespace character. Returns +	 * false, if the end of the stream was reached. +	 * +	 * @return false if the end of the stream was reached, false othrwise. +	 */ +	bool consumeWhitespace(); + +	/** +	 * Creates a new CharReader located at the same position as this CharReader +	 * instance, yet the new CharReader can be used independently of this +	 * CharReader. Use the "commit" function of the returned CharReader to +	 * copy the state of the forked CharReaderFork to this CharReader. +	 * +	 * @return a CharReaderFork instance positioned at the same location as this +	 * CharReader instance. +	 */ +	CharReaderFork fork(); + +	/** +	 * Returns true if there are no more characters as the stream was +	 * closed. +	 * +	 * @return true if there is no more data. +	 */ +	bool atEnd() const { return buffer->atEnd(readCursor.cursor); } + +	/** +	 * Returns the offset of the read cursor in bytes. +	 */ +	size_t getOffset() const { return buffer->offset(readCursor.cursor); } + +	/** +	 * Returns the line number the read cursor currently is at. +	 */ +	TextCursor::PosType getLine() const { return readCursor.line; } + +	/** +	 * Returns the column the read cursor currently is at. +	 */ +	TextCursor::PosType getColumn() const { return readCursor.column; } + +	/** +	 * Returns the current position of the read cursor (line and column). +	 */ +	TextCursor::Position getPosition() const +	{ +		return TextCursor::Position(getLine(), getColumn(), getOffset()); +	} + +	/** +	 * Returns the line the read cursor currently is in, but at most the +	 * given number of characters in the form of a Context structure. +	 * +	 * @param maxSize is the maximum length of the extracted context +	 */ +	TextCursor::Context getContext(ssize_t maxSize = 60); +}; + +/** + * A CharReaderFork is returned whenever the "fork" function of the CharReader + * class is used. Its "commit" function can be used to move the underlying + * CharReader instance to the location of the CharReaderFork instance. Otherwise + * the read location of the underlying CharReader is left unchanged. + */ +class CharReaderFork : public CharReader { +private: +	friend CharReader; + +	/** +	 * The reader cursor of the underlying CharReader instance. +	 */ +	CharReader::Cursor &parentReadCursor; + +	/** +	 * The peek cursor of the underlying CharReader instance. +	 */ +	CharReader::Cursor &parentPeekCursor; + +	/** +	 * Constructor of the CharReaderFork class. +	 * +	 * @param buffer is a reference at the parent Buffer instance. +	 * @param parentPeekCursor is a reference at the parent read cursor. +	 * @param parentPeekCursor is a reference at the parent peek cursor. +	 * @param coherent specifies whether the char reader cursors are initialized +	 * coherently. +	 */ +	CharReaderFork(std::shared_ptr<Buffer> buffer, +	               CharReader::Cursor &parentReadCursor, +	               CharReader::Cursor &parentPeekCursor, bool coherent); + +public: +	/** +	 * Moves the read and peek cursor of the parent CharReader to the location +	 * of the read and peek cursor in the fork. +	 */ +	void commit(); +}; +} + +#endif /* _OUSIA_CHAR_READER_HPP_ */ + diff --git a/src/core/common/Exceptions.cpp b/src/core/common/Exceptions.cpp new file mode 100644 index 0000000..30c5626 --- /dev/null +++ b/src/core/common/Exceptions.cpp @@ -0,0 +1,46 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include "Exceptions.hpp" + +namespace ousia { + +/* Class LoggableException */ + +std::string LoggableException::formatMessage(const std::string &msg, +                                             const TextCursor::Position &pos, +                                             const TextCursor::Context &ctx) +{ +	std::stringstream ss; +	ss << "error "; +	if (pos.hasLine()) { +		ss << "at line " << pos.line << ", "; +		if (pos.hasColumn()) { +			ss << "column " << pos.column << " "; +		} +	} +	ss << "with message: " << msg; +	if (ctx.valid()) { +		ss << " in context \"" << ctx.text << "\""; +	} +	return ss.str(); +} +} + diff --git a/src/core/common/Exceptions.hpp b/src/core/common/Exceptions.hpp new file mode 100644 index 0000000..443c176 --- /dev/null +++ b/src/core/common/Exceptions.hpp @@ -0,0 +1,165 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Exceptions.hpp + * + * Describes basic exception classes which are used throughout Ousía. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_EXCEPTIONS_HPP_ +#define _OUSIA_EXCEPTIONS_HPP_ + +#include "TextCursor.hpp" + +namespace ousia { + +/** + * Base exception class all other Ousía exceptions should derive from. + */ +class OusiaException : public std::exception { +private: +	/** +	 * Error message which will be printed by the runtime environment if the +	 * exception is not caught and handled in the code. +	 */ +	const std::string formatedMessage; + +public: +	/** +	 * Constructor of the OusiaException class. +	 * +	 * @param formatedMessage is a formated message that should be printed by +	 * the runtime environment if the exception is not caught. +	 */ +	OusiaException(std::string formatedMessage) +	    : formatedMessage(std::move(formatedMessage)) +	{ +	} + +	/** +	 * Virtual destructor. +	 */ +	virtual ~OusiaException() {} + +	/** +	 * Implementation of the std::exception what function and used to retrieve +	 * the error message that should be printed by the runtime environment. +	 * +	 * @return a reference to the formated message string given in the +	 * constructor. +	 */ +	const char *what() const noexcept override +	{ +		return formatedMessage.c_str(); +	} +}; + +/** + * Exception class which can be directly passed to a Logger instance and thus + * makes it simple to handle non-recoverable errors in the code. + */ +class LoggableException : public OusiaException { +private: +	/** +	 * Function used internally to build the formated message that should be +	 * reported to the runtime environment. +	 */ +	static std::string formatMessage(const std::string &msg, +	                                 const TextCursor::Position &pos, +	                                 const TextCursor::Context &ctx); + +public: +	/** +	 * Reported error message. +	 */ +	const std::string msg; + +	/** +	 * Position in the document at which the exception occurred. +	 */ +	const TextCursor::Position pos; + +	/** +	 * Context in the document text in which the exception occurred. +	 */ +	const TextCursor::Context ctx; + +	/** +	 * Constructor of the LoggableException class. +	 * +	 * @param msg contains the error message. +	 * @param pos is the position at which the error occured. +	 * @param ctx describes the context in which the error occured. +	 */ +	LoggableException(std::string msg, +	                  TextCursor::Position pos = TextCursor::Position{}, +	                  TextCursor::Context ctx = TextCursor::Context{}) +	    : OusiaException(formatMessage(msg, pos, ctx)), +	      msg(std::move(msg)), +	      pos(std::move(pos)), +	      ctx(std::move(ctx)) +	{ +	} + +	/** +	 * Constructor of the LoggableException class. +	 * +	 * @param msg contains the error message. +	 * @param line is the line in the above file the message refers to. +	 * @param column is the column in the above file the message refers to. +	 * @param offs is the byte offset. +	 */ +	LoggableException(std::string msg, TextCursor::PosType line, +	                  TextCursor::PosType column, size_t offs) +	    : LoggableException(msg, TextCursor::Position(line, column, offs)) +	{ +	} + +	/** +	 * Constructor of LoggableException for arbitrary position objects. +	 * +	 * @param msg is the actual log message. +	 * @param pos is a reference to a variable with position and context data. +	 */ +	template <class PosType> +	LoggableException(std::string msg, PosType &pos) +	    : LoggableException(std::move(msg), pos.getPosition(), pos.getContext()) +	{ +	} + +	/** +	 * Returns the position at which the exception occured in the text. +	 * +	 * @return the position descriptor. +	 */ +	TextCursor::Position getPosition() const { return pos; } + +	/** +	 * Returns the context in which the exception occured in the text. +	 * +	 * @return the context descriptor. +	 */ +	TextCursor::Context getContext() const { return ctx; } +}; +} + +#endif /* _OUSIA_EXCEPTIONS_HPP_ */ + diff --git a/src/core/common/Logger.cpp b/src/core/common/Logger.cpp new file mode 100644 index 0000000..c1d6343 --- /dev/null +++ b/src/core/common/Logger.cpp @@ -0,0 +1,219 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <iostream> +#include <sstream> + +#include "Logger.hpp" + +namespace ousia { + +/* Class Logger */ + +void Logger::log(Severity severity, std::string msg, TextCursor::Position pos, +                 TextCursor::Context ctx) +{ +	// Update the maximum encountered severity level +	if (static_cast<int>(severity) > static_cast<int>(maxEncounteredSeverity)) { +		maxEncounteredSeverity = severity; +	} + +	// Only process the message if its severity is larger than the +	// set minimum severity. +	if (static_cast<int>(severity) >= static_cast<int>(minSeverity)) { +		processMessage( +		    Message{severity, std::move(msg), std::move(pos), std::move(ctx)}); +	} +} + +LoggerFork Logger::fork() { return LoggerFork{this, minSeverity}; } + +/* Class LoggerFork */ + +void LoggerFork::processMessage(Message msg) +{ +	calls.push_back(Call(CallType::MESSAGE, messages.size())); +	messages.push_back(msg); +} + +void LoggerFork::processPushFile(File file) +{ +	calls.push_back(Call(CallType::PUSH_FILE, files.size())); +	files.push_back(file); +} + +void LoggerFork::processPopFile() +{ +	calls.push_back(Call(CallType::POP_FILE, 0)); +} + +void LoggerFork::commit() +{ +	for (const Call &call : calls) { +		switch (call.type) { +			case CallType::MESSAGE: { +				const Message &msg = messages[call.dataIdx]; +				parent->log(msg.severity, msg.msg, msg.pos, msg.ctx); +				break; +			} +			case CallType::PUSH_FILE: { +				const File &file = files[call.dataIdx]; +				parent->pushFile(file.file, file.pos, file.ctx); +				break; +			} +			case CallType::POP_FILE: +				parent->popFile(); +				break; +		} +	} +} + +/* Class Terminal */ + +class Terminal { +private: +	/** +	 * If set to false, no control codes are generated. +	 */ +	bool active; + +public: +	static const int BLACK = 30; +	static const int RED = 31; +	static const int GREEN = 32; +	static const int YELLOW = 33; +	static const int BLUE = 34; +	static const int MAGENTA = 35; +	static const int CYAN = 36; +	static const int WHITE = 37; + +	Terminal(bool active) : active(active) {} + +	std::string color(int color, bool bright = true) const +	{ +		if (!active) { +			return std::string{}; +		} +		std::stringstream ss; +		ss << "\x1b["; +		if (bright) { +			ss << "1;"; +		} +		ss << color << "m"; +		return ss.str(); +	} + +	std::string reset() const +	{ +		if (!active) { +			return std::string{}; +		} +		return "\x1b[0m"; +	} +}; + +/* Class TerminalLogger */ + +/** + * Small class used internally for formated terminal output using ANSI/VT100 + * escape codes on supported terminals. + * + * TODO: Deactivate if using windows or use the corresponding API function. + */ + +std::string TerminalLogger::currentFilename() +{ +	if (!files.empty()) { +		return files.top().file; +	} +	return std::string{}; +} + +void TerminalLogger::processMessage(Message msg) +{ +	Terminal t(useColor); + +	// Print the file name +	std::string filename = currentFilename(); +	bool hasFile = !filename.empty(); +	if (hasFile) { +		os << t.color(Terminal::WHITE, true) << filename << t.reset(); +	} + +	// Print line and column number +	if (msg.pos.hasLine()) { +		if (hasFile) { +			os << ':'; +		} +		os << t.color(Terminal::WHITE, true) << msg.pos.line << t.reset(); +		if (msg.pos.hasColumn()) { +			os << ':' << msg.pos.column; +		} +	} + +	// Print the optional seperator +	if (hasFile || msg.pos.hasLine()) { +		os << ": "; +	} + +	// Print the severity +	switch (msg.severity) { +		case Severity::DEBUG: +			break; +		case Severity::NOTE: +			os << t.color(Terminal::CYAN, true) << "note: "; +			break; +		case Severity::WARNING: +			os << t.color(Terminal::MAGENTA, true) << "warning: "; +			break; +		case Severity::ERROR: +			os << t.color(Terminal::RED, true) << "error: "; +			break; +		case Severity::FATAL_ERROR: +			os << t.color(Terminal::RED, true) << "fatal: "; +			break; +	} +	os << t.reset(); + +	// Print the actual message +	os << msg.msg << std::endl; + +	// Print the error message context if available +	if (msg.ctx.valid()) { +		size_t relPos = msg.ctx.relPos; +		if (msg.ctx.truncatedStart) { +			os << "[...] "; +			relPos += 6; +		} +		os << msg.ctx.text; +		if (msg.ctx.truncatedEnd) { +			os << " [...]"; +		} +		os << std::endl; +		for (size_t i = 0; i < relPos; i++) { +			os << ' '; +		} +		os << t.color(Terminal::GREEN) << '^' << t.reset() << std::endl; +	} +} + +void TerminalLogger::processPushFile(File file) { files.push(file); } + +void TerminalLogger::processPopFile() { files.pop(); } +} + diff --git a/src/core/common/Logger.hpp b/src/core/common/Logger.hpp new file mode 100644 index 0000000..be82ea0 --- /dev/null +++ b/src/core/common/Logger.hpp @@ -0,0 +1,602 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Logger.hpp + * + * Contains classes for logging messages in Ousía. Provides a generic Logger + * class, and TerminalLogger, an extension of Logger which logs do an output + * stream. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_LOGGER_HPP_ +#define _OUSIA_LOGGER_HPP_ + +#include <ostream> +#include <stack> +#include <string> +#include <vector> + +#include "Exceptions.hpp" +#include "TextCursor.hpp" + +namespace ousia { + +/** + * Enum containing the severities used for logging errors and debug messages. + */ +enum class Severity : int { +	/** +     * Indicates that this message was only printed for debugging. Note that +     * in release builds messages with this severity are discarded. +     */ +	DEBUG = 0, + +	/** +     * A message which might provide additional information to the user. +     */ +	NOTE = 1, + +	/** +     * A message which warns of possible mistakes by the user which might not be +     * actual errors but may lead to unintended behaviour. +     */ +	WARNING = 2, + +	/** +     * An error occurred while processing, however program execution continues, +     * trying to deal with the error situation (graceful degradation). However, +     * messages with this severity may be followed up by fatal errors. +     */ +	ERROR = 3, + +	/** +     * A fatal error occurred. Program execution cannot continue. +     */ +	FATAL_ERROR = 4 +}; + +#ifdef NDEBUG +static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::NOTE; +#else +static constexpr Severity DEFAULT_MIN_SEVERITY = Severity::DEBUG; +#endif + +// Forward declaration +class LoggerFork; + +/** + * The Logger class is the base class the individual logging systems should + * derive from. It provides a simple interface for logging errors, warnings and + * notes and filters these according to the set minimum severity. Additionally + * a stack of file names is maintained in order to allow simple descent into + * included files. Note however, that this base Logger class simply discards the + * incomming log messages. Use one of the derived classes to actually handle the + * log messages. + */ +class Logger { +public: +	/** +	 * Describes an included file. +	 */ +	struct File { +		/** +		 * Is the name of the file. +		 */ +		std::string file; + +		/** +		 * Position at which the file was included. +		 */ +		TextCursor::Position pos; + +		/** +		 * Context in which the file was included. +		 */ +		TextCursor::Context ctx; + +		/** +		 * Constructor of the File struct. +		 * +		 * @param file is the name of the included file. +		 * @param pos is the position in the parent file, at which this file +		 * was included. +		 * @param ctx is the context in which the feil was included. +		 */ +		File(std::string file, TextCursor::Position pos, +		     TextCursor::Context ctx) +		    : file(file), pos(pos), ctx(ctx) +		{ +		} +	}; + +	/** +	 * The message struct represents a single log message and all information +	 * attached to it. +	 */ +	struct Message { +		/** +		 * Severity of the log message. +		 */ +		Severity severity; + +		/** +		 * Actual log message. +		 */ +		std::string msg; + +		/** +		 * Position in the text the message refers to. +		 */ +		TextCursor::Position pos; + +		/** +		 * Context the message refers to. +		 */ +		TextCursor::Context ctx; + +		/** +		 * Constructor of the Message struct. +		 * +		 * @param severity describes the message severity. +		 * @param msg contains the actual message. +		 * @param line is the line in the above file the message refers to. +		 * @param column is the column in the above file the message refers to. +		 */ +		Message(Severity severity, std::string msg, TextCursor::Position pos, +		        TextCursor::Context ctx) +		    : severity(severity), +		      msg(std::move(msg)), +		      pos(std::move(pos)), +		      ctx(std::move(ctx)){}; +	}; + +protected: +	/** +	 * Minimum severity a log message should have before it is discarded. +	 */ +	const Severity minSeverity; + +	/** +	 * Maximum encountered log message severity. +	 */ +	Severity maxEncounteredSeverity; + +	/** +	 * Function to be overriden by child classes to actually display or store +	 * the messages. The default implementation just discards all incomming +	 * messages. +	 * +	 * @param msg is an instance of the Message struct containing the data that +	 * should be logged. +	 */ +	virtual void processMessage(Message msg) {} + +	/** +	 * Called whenever a new file is pushed onto the stack. +	 * +	 * @param file is the file that should be pushed onto the stack. +	 */ +	virtual void processPushFile(File file) {} + +	/** +	 * Called whenever a file is popped from the stack. +	 */ +	virtual void processPopFile() {} + +public: +	/** +	 * Constructor of the Logger class. +	 * +	 * @param minSeverity is the minimum severity a log message should have. +	 * Messages below this severity are discarded. +	 */ +	Logger(Severity minSeverity = DEFAULT_MIN_SEVERITY) +	    : minSeverity(minSeverity), maxEncounteredSeverity(Severity::DEBUG) +	{ +	} + +	/** +	 * Virtual destructor. +	 */ +	virtual ~Logger(){}; + +	// No copy +	Logger(const Logger &) = delete; + +	// No assign +	Logger &operator=(const Logger &) = delete; + +	/** +	 * Logs the given message. The file name is set to the topmost file name on +	 * the file name stack. +	 * +	 * @param severity is the severity of the log message. +	 * @param msg is the actual log message. +	 * @param pos is the position the log message refers to. +	 * @param ctx describes the context of the log message. +	 */ +	void log(Severity severity, std::string msg, +	         TextCursor::Position pos = TextCursor::Position{}, +	         TextCursor::Context ctx = TextCursor::Context{}); + +	/** +	 * Logs the given loggable exception. +	 * +	 * @param ex is the exception that should be logged. +	 */ +	void log(const LoggableException &ex) +	{ +		log(Severity::ERROR, ex.msg, ex.getPosition(), ex.getContext()); +	} + +	/** +	 * Logs the given message. The file name is set to the topmost file name on +	 * the file name stack. +	 * +	 * @param severity is the severity of the log message. +	 * @param msg is the actual log message. +	 * @param pos is a reference to a variable which provides position and +	 * context information. +	 */ +	template <class PosType> +	void logAt(Severity severity, std::string msg, PosType &pos) +	{ +		log(severity, std::move(msg), pos.getPosition(), pos.getContext()); +	} + +	/** +	 * Logs a debug message. Debug messages will be discarded if the software +	 * is compiled in the release mode (with the NDEBUG flag). +	 * +	 * @param msg is the actual log message. +	 * @param pos describes the position of the debug message. +	 * @param ctx describes the context of the debug message. +	 */ +	void debug(std::string msg, +	           TextCursor::Position pos = TextCursor::Position{}, +	           TextCursor::Context ctx = TextCursor::Context{}) +	{ +#ifndef NDEBUG +		log(Severity::DEBUG, std::move(msg), std::move(pos), std::move(ctx)); +#endif +	} + +	/** +	 * Logs a debug message. Debug messages will be discarded if the software +	 * is compiled in the release mode. +	 * +	 * @param msg is the actual log message. +	 * @param pos is a reference to a variable which provides position and +	 * context information. +	 */ +	template <class PosType> +	void debug(std::string msg, PosType &pos) +	{ +#ifndef NDEBUG +		logAt(Severity::DEBUG, std::move(msg), pos); +#endif +	} + +	/** +	 * Logs a note. +	 * +	 * @param msg is the actual log message. +	 * @param pos describes the position of the note. +	 * @param ctx describes the context of the note. +	 */ +	void note(std::string msg, +	          TextCursor::Position pos = TextCursor::Position{}, +	          TextCursor::Context ctx = TextCursor::Context{}) +	{ +		log(Severity::NOTE, std::move(msg), std::move(pos), std::move(ctx)); +	} + +	/** +	 * Logs a note. +	 * +	 * @param msg is the actual log message. +	 * @param pos is a reference to a variable which provides position and +	 * context information. +	 */ +	template <class PosType> +	void note(std::string msg, PosType &pos) +	{ +		logAt(Severity::NOTE, std::move(msg), pos); +	} + +	/** +	 * Logs a warning. +	 * +	 * @param msg is the actual log message. +	 * @param pos describes the position of the warning. +	 * @param ctx describes the context of the warning. +	 */ +	void warning(std::string msg, +	             TextCursor::Position pos = TextCursor::Position{}, +	             TextCursor::Context ctx = TextCursor::Context{}) +	{ +		log(Severity::WARNING, std::move(msg), std::move(pos), std::move(ctx)); +	} + +	/** +	 * Logs a warning. +	 * +	 * @param msg is the actual log message. +	 * @param pos is a reference to a variable which provides position and +	 * context information. +	 */ +	template <class PosType> +	void warning(std::string msg, PosType &pos) +	{ +		logAt(Severity::WARNING, std::move(msg), pos); +	} + +	/** +	 * Logs an error message. +	 * +	 * @param msg is the actual log message. +	 * @param pos is the position at which the error occured. +	 * @param ctx describes the context in which the error occured. +	 */ +	void error(std::string msg, +	           TextCursor::Position pos = TextCursor::Position{}, +	           TextCursor::Context ctx = TextCursor::Context{}) +	{ +		log(Severity::ERROR, std::move(msg), std::move(pos), std::move(ctx)); +	} + +	/** +	 * Logs an error message. +	 * +	 * @param msg is the actual log message. +	 * @param pos is a reference to a variable which provides position and +	 * context information. +	 */ +	template <class PosType> +	void error(std::string msg, PosType &pos) +	{ +		logAt(Severity::ERROR, std::move(msg), pos); +	} + +	/** +	 * Logs a fatal error message. +	 * +	 * @param msg is the actual log message. +	 * @param pos is the position at which the error occured. +	 * @param ctx describes the context in which the error occured. +	 */ +	void fatalError(std::string msg, +	                TextCursor::Position pos = TextCursor::Position{}, +	                TextCursor::Context ctx = TextCursor::Context{}) +	{ +		log(Severity::FATAL_ERROR, std::move(msg), std::move(pos), +		    std::move(ctx)); +	} + +	/** +	 * Logs a fatal error message. +	 * +	 * @param msg is the actual log message. +	 * @param pos is a reference to a variable which provides position and +	 * context information. +	 */ +	template <class PosType> +	void fatalError(std::string msg, PosType &pos) +	{ +		logAt(Severity::FATAL_ERROR, std::move(msg), pos); +	} + +	/** +	 * Pushes a new file name onto the internal filename stack. +	 * +	 * @param name is the name of the file to be added to the stack. +	 * @param pos is the position from which the new file is included. +	 * @param ctx is the context in which the new file is included. +	 */ +	void pushFile(std::string name, +	              TextCursor::Position pos = TextCursor::Position{}, +	              TextCursor::Context ctx = TextCursor::Context{}) +	{ +		processPushFile(File(std::move(name), std::move(pos), std::move(ctx))); +	} + +	/** +	 * Pops the filename from the internal filename stack. +	 * +	 * @return the current size of the filename stack. +	 */ +	void popFile() { processPopFile(); } + +	/** +	 * Returns the maximum severity that was encountered by the Logger but at +	 * least Severity::DEBUG. +	 * +	 * @return the severity of the most severe log message but at least +	 * Severity::DEBUG. +	 */ +	Severity getMaxEncounteredSeverity() { return maxEncounteredSeverity; } + +	/** +	 * Returns the minimum severity. Messages with a smaller severity are +	 * discarded. +	 * +	 * @return the minimum severity. +	 */ +	Severity getMinSeverity() { return minSeverity; } + +	/** +	 * Returns a forked logger instance which can be used to collect log +	 * messages for which it is not sure whether they will be used. +	 */ +	LoggerFork fork(); +}; + +/** + * Fork of the Logger -- stores all logged messages without actually pushing + * them to the underlying logger instance. + */ +class LoggerFork : public Logger { +private: +	friend Logger; + +	/** +	 * Intanally used to store the incomming function calls. +	 */ +	enum class CallType { MESSAGE, PUSH_FILE, POP_FILE }; + +	/** +	 * Datastructure used to represent a logger function call. +	 */ +	struct Call { +		/** +		 * Type of the function call. +		 */ +		CallType type; + +		/** +		 * Index of the associated data in the type-specific vector. +		 */ +		size_t dataIdx; + +		/** +		 * Constructor of the Call structure. +		 * +		 * @param type is the type of the call. +		 * @param dataIdx is the index of the associated data in the type +		 * specific data vector. +		 */ +		Call(CallType type, size_t dataIdx) : type(type), dataIdx(dataIdx) {} +	}; + +	/** +	 * Vector storing all incomming calls. +	 */ +	std::vector<Call> calls; + +	/** +	 * Vector storing all incomming messages. +	 */ +	std::vector<Message> messages; + +	/** +	 * Vector storing all incomming pushed files. +	 */ +	std::vector<File> files; + +	/** +	 * Parent logger instance. +	 */ +	Logger *parent; + +	/** +	 * Constructor of the LoggerFork class. +	 * +	 * @param minSeverity is the minimum severity a message should have to be +	 * stored. +	 * @param parent is the parent logger instance. +	 */ +	LoggerFork(Logger *parent, Severity minSeverity) +	    : Logger(minSeverity), parent(parent) +	{ +	} + +protected: +	void processMessage(Message msg) override; +	void processPushFile(File file) override; +	void processPopFile() override; + +public: +	/** +	 * Commits all collected messages to the parent Logger instance. +	 */ +	void commit(); + +	/** +	 * Explicitly declared move constructor. +	 */ +	LoggerFork(LoggerFork &&l) +	    : Logger(l.getMinSeverity()), +	      calls(std::move(l.calls)), +	      messages(std::move(l.messages)), +	      files(std::move(l.files)), +	      parent(std::move(l.parent)) +	{ +	} +}; + +/** + * Class extending the Logger class and printing the log messages to the given + * stream. + */ +class TerminalLogger : public Logger { +private: +	/** +	 * Reference to the target output stream. +	 */ +	std::ostream &os; + +	/** +	 * If true, the TerminalLogger will use colors to make the log messages +	 * prettier. +	 */ +	bool useColor; + +	/** +	 * Stack used to keep the file references. +	 */ +	std::stack<File> files; + +	/** +	 * The size of the stack the last time a file backtrace was printed. +	 */ +	size_t lastFilePrinted = 0; + +protected: +	void processMessage(Message msg) override; +	void processPushFile(File file) override; +	void processPopFile() override; + +public: +	/** +	 * Constructor of the TerminalLogger class. +	 * +	 * @param os is the output stream the log messages should be logged to. +	 * Should be set to std::cerr in most cases. +	 * @param useColor if true, the TerminalLogger class will do its best to +	 * use ANSI/VT100 control sequences for colored log messages. +	 * @param minSeverity is the minimum severity below which log messages are +	 * discarded. +	 */ +	TerminalLogger(std::ostream &os, bool useColor = false, +	               Severity minSeverity = DEFAULT_MIN_SEVERITY) +	    : Logger(minSeverity), os(os), useColor(useColor) +	{ +	} + +	/** +	 * Returns the name of the topmost file. +	 */ +	std::string currentFilename(); +}; +} + +#endif /* _OUSIA_LOGGER_HPP_ */ + diff --git a/src/core/common/TextCursor.hpp b/src/core/common/TextCursor.hpp new file mode 100644 index 0000000..2633345 --- /dev/null +++ b/src/core/common/TextCursor.hpp @@ -0,0 +1,168 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_TEXT_CURSOR_HPP_ +#define _OUSIA_TEXT_CURSOR_HPP_ + +namespace ousia { +namespace TextCursor { + +/** + * Type used for representing line or column positions. + */ +using PosType = unsigned int; + +/** + * Struct representing a position within the text. A position is defined by a + * byte offset (which is always reproducable), a line number and a column + * number. + */ +struct Position { +	/** +	 * Current line, starting with one. +	 */ +	PosType line; + +	/** +	 * Current column, starting with one. +	 */ +	PosType column; + +	/** +	 * Current byte offset. +	 */ +	size_t offs; + +	/** +	 * Default constructor of the Position struct, initializes all memebers +	 * with zero. +	 */ +	Position() : line(0), column(0), offs(0) {} + +	/** +	 * Creates a new Position struct with only a line and no column. +	 * +	 * @param line is the line number. +	 * @param column is the column number. +	 */ +	Position(PosType line) : line(line), column(0), offs(0) {} + +	/** +	 * Creates a new Position struct with a line and column. +	 * +	 * @param line is the line number. +	 * @param column is the column number. +	 */ +	Position(PosType line, PosType column) : line(line), column(column), offs(0) +	{ +	} + +	/** +	 * Creates a new Position struct with a line, column and byte offset. +	 * +	 * @param line is the line number. +	 * @param column is the column number. +	 * @param offs is the byte offset. +	 */ +	Position(PosType line, PosType column, size_t offs) +	    : line(line), column(column), offs(offs) +	{ +	} + +	/** +	 * Returns true, if the line number is valid, false otherwise. +	 * +	 * @return true for valid line numbers. +	 */ +	bool hasLine() const { return line > 0; } + +	/** +	 * Returns true, if the column number is valid, false otherwise. +	 * +	 * @return true for valid column numbers. +	 */ +	bool hasColumn() const { return column > 0; } +}; + +/** + * Represents the current context a CharReader is in. Used for building error + * messages. + */ +struct Context { +	/** +	 * Set to the content of the current line. +	 */ +	std::string text; + +	/** +	 * Relative position (in characters) within that line. May point to +	 * locations beyond the text content. +	 */ +	PosType relPos; + +	/** +	 * Set to true if the beginning of the line has been truncated (because +	 * the reader position is too far away from the actual position of the +	 * line). +	 */ +	bool truncatedStart; + +	/** +	 * Set to true if the end of the line has been truncated (because the +	 * reader position is too far away from the actual end position of the +	 * line. +	 */ +	bool truncatedEnd; + +	/** +	 * Default constructor, initializes all members with zero values. +	 */ +	Context() : text(), relPos(0), truncatedStart(false), truncatedEnd(false) {} + +	/** +	 * Constructor of the Context class. +	 * +	 * @param text is the current line the text cursor is at. +	 * @param relPos is the relative position of the text cursor within that +	 * line. +	 * @param truncatedStart specifies whether the text was truncated at the +	 * beginning. +	 * @param truncatedEnd specifies whether the text was truncated at the +	 * end. +	 */ +	Context(std::string text, size_t relPos, bool truncatedStart, +	        bool truncatedEnd) +	    : text(std::move(text)), +	      relPos(relPos), +	      truncatedStart(truncatedStart), +	      truncatedEnd(truncatedEnd) +	{ +	} + +	/** +	 * Returns true the context text is not empty. +	 * +	 * @return true if the context is valid and e.g. should be printed. +	 */ +	bool valid() const { return !text.empty(); } +}; +} +} + +#endif /* _OUSIA_TEXT_CURSOR_HPP_ */ + diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp new file mode 100644 index 0000000..c460ed4 --- /dev/null +++ b/src/core/common/Utils.cpp @@ -0,0 +1,59 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <algorithm> +#include <limits> + +#include "Utils.hpp" + +namespace ousia { + +std::string Utils::trim(const std::string &s) +{ +	size_t firstNonWhitespace = std::numeric_limits<size_t>::max(); +	size_t lastNonWhitespace = 0; +	for (size_t i = 0; i < s.size(); i++) { +		if (!isWhitespace(s[i])) { +			firstNonWhitespace = std::min(i, firstNonWhitespace); +			lastNonWhitespace = std::max(i, lastNonWhitespace); +		} +	} + +	if (firstNonWhitespace < lastNonWhitespace) { +		return s.substr(firstNonWhitespace, +		                lastNonWhitespace - firstNonWhitespace + 1); +	} +	return std::string{}; +} + +bool Utils::isIdentifier(const std::string &name) +{ +	bool first = true; +	for (char c : name) { +		if (first && !(isAlphabetic(c) || c == '_')) { +			return false; +		} +		if (first && !(isAlphanumeric(c) || c == '_' || c == '-')) { +			return false; +		} +		first = false; +	} +	return true; +} +} + diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp new file mode 100644 index 0000000..5332b50 --- /dev/null +++ b/src/core/common/Utils.hpp @@ -0,0 +1,110 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_UTILS_H_ +#define _OUSIA_UTILS_H_ + +#include <sstream> +#include <string> + +namespace ousia { + +class Utils { +public: +	/** +	 * Returns true if the given character is in [A-Za-z] +	 */ +	static bool isAlphabetic(const char c) +	{ +		return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); +	} + +	/** +	 * Returns true if the given character is in [0-9] +	 */ +	static bool isNumeric(const char c) { return (c >= '0') && (c <= '9'); } + +	/** +	 * Returns true if the given character is in [0-9A-Fa-f] +	 */ +	static bool isHexadecimal(const char c) +	{ +		return ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'F')) || +		       ((c >= 'a') && (c <= 'f')); +	} + +	/** +	 * Returns true if the given character is in [A-Za-z0-9] +	 */ +	static bool isAlphanumeric(const char c) +	{ +		return isAlphabetic(c) || isNumeric(c); +	} + +	/** +	 * Returns true if the given character is in [A-Za-z_][A-Za-z0-9_-]* +	 */ +	static bool isIdentifier(const std::string &name); + +	/** +	 * Returns true if the given character is a whitespace character. +	 */ +	static bool isWhitespace(const char c) +	{ +		return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); +	} + +	/** +	 * Removes whitespace at the beginning and the end of the given string. +	 */ +	static std::string trim(const std::string &s); + +	/** +	 * Turns the elements of a collection into a string separated by the +	 * given delimiter. +	 * +	 * @param es is an iterable container of elements that can be appended to an +	 * output stream (the << operator must be implemented). +	 * @param delim is the delimiter that should be used to separate the items. +	 * @param start is a character sequence that should be prepended to the +	 * result. +	 * @param end is a character sequence that should be appended to the result. +	 */ +	template <class T> +	static std::string join(T es, const std::string &delim, +	                        const std::string &start = "", +	                        const std::string &end = "") +	{ +		std::stringstream res; +		bool first = true; +		res << start; +		for (const auto &e : es) { +			if (!first) { +				res << delim; +			} +			res << e; +			first = false; +		} +		res << end; +		return res.str(); +	} +}; +} + +#endif /* _OUSIA_UTILS_H_ */ + diff --git a/src/core/common/Variant.cpp b/src/core/common/Variant.cpp new file mode 100644 index 0000000..27fc6e7 --- /dev/null +++ b/src/core/common/Variant.cpp @@ -0,0 +1,154 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sstream> + +#include "Utils.hpp" +#include "Variant.hpp" + +namespace ousia { + +/* Class Variant::TypeException */ + +Variant::TypeException::TypeException(Type actualType, Type requestedType) +    : OusiaException(std::string("Variant: Requested \"") + +                     Variant::getTypeName(requestedType) + +                     std::string("\" but is \"") + +                     Variant::getTypeName(actualType) + std::string("\"")), +      actualType(actualType), +      requestedType(requestedType) +{ +} + +/* Class Variant */ + +const char *Variant::getTypeName(Type type) +{ +	switch (type) { +		case Type::NULLPTR: +			return "null"; +		case Type::BOOL: +			return "boolean"; +		case Type::INT: +			return "integer"; +		case Type::DOUBLE: +			return "double"; +		case Type::STRING: +			return "string"; +		case Type::ARRAY: +			return "array"; +		case Type::MAP: +			return "map"; +	} +	return "unknown"; +} + +Variant::boolType Variant::toBool() const +{ +	switch (getType()) { +		case Type::NULLPTR: +			return false; +		case Type::BOOL: +			return asBool(); +		case Type::INT: +			return asInt() != 0; +		case Type::DOUBLE: +			return asDouble() != 0.0; +		case Type::STRING: +			return true; +		case Type::ARRAY: +			return true; +		case Type::MAP: +			return true; +	} +	return false; +} + +Variant::intType Variant::toInt() const +{ +	switch (getType()) { +		case Type::NULLPTR: +			return 0; +		case Type::BOOL: +			return asBool() ? 1 : 0; +		case Type::INT: +			return asInt(); +		case Type::DOUBLE: +			return asDouble(); +		case Type::STRING: +			return 0; // TODO: Parse string as int +		case Type::ARRAY: { +			const arrayType &a = asArray(); +			return (a.size() == 1) ? a[0].toInt() : 0; +		} +		case Type::MAP: +			return 0; +	} +	return false; +} + +Variant::doubleType Variant::toDouble() const +{ +	switch (getType()) { +		case Type::NULLPTR: +			return 0.0; +		case Type::BOOL: +			return asBool() ? 1.0 : 0.0; +		case Type::INT: +			return asInt(); +		case Type::DOUBLE: +			return asDouble(); +		case Type::STRING: +			return 0.0; // TODO: Parse string as double +		case Type::ARRAY: { +			const arrayType &a = asArray(); +			return (a.size() == 1) ? a[0].toDouble() : 0; +		} +		case Type::MAP: +			return 0; +	} +	return false; +} + +Variant::stringType Variant::toString(bool escape) const +{ +	switch (getType()) { +		case Type::NULLPTR: +			return "null"; +		case Type::BOOL: +			return asBool() ? "true" : "false"; +		case Type::INT: +			return std::to_string(asInt()); +		case Type::DOUBLE: +			return std::to_string(asDouble()); +		case Type::STRING: { +			// TODO: Use proper serialization function +			std::stringstream ss; +			ss << "\"" << asString() << "\""; +			return ss.str(); +		} +		case Type::ARRAY: +			return Utils::join(asArray(), ", ", "[", "]"); +		case Type::MAP: +			return Utils::join(asMap(), ", ", "{", "}"); +	} +	return ""; +} + +} + diff --git a/src/core/common/Variant.hpp b/src/core/common/Variant.hpp new file mode 100644 index 0000000..d411fd3 --- /dev/null +++ b/src/core/common/Variant.hpp @@ -0,0 +1,761 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Variant.hpp + * + * The Variant class is used to efficiently represent a variables of varying + * type. Variant instances are used to represent data given by the end user and + * to exchange information between the host application and the script clients. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_VARIANT_HPP_ +#define _OUSIA_VARIANT_HPP_ + +#include <cstdint> +#include <map> +#include <string> +#include <vector> +#include <ostream> + +// TODO: Use +// http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html +// later (will allow to use 8 bytes for a variant) + +#include "Exceptions.hpp" + +namespace ousia { + +/** + * Instances of the Variant class represent any kind of data that is exchanged + * between the host application and the script engine. Variants are immutable. + */ +class Variant { +public: +	/** +	 * Enum containing the possible types a variant may have. +	 */ +	enum class Type : int16_t { +		NULLPTR, +		BOOL, +		INT, +		DOUBLE, +		STRING, +		ARRAY, +		MAP +	}; + +	/** +	 * Exception thrown whenever a variant is accessed via a getter function +	 * that is not supported for the current variant type. +	 */ +	class TypeException : public OusiaException { +	private: +		/** +		 * Internally used string holding the exception message. +		 */ +		const std::string msg; + +	public: +		/** +		 * Contains the actual type of the variant. +		 */ +		const Type actualType; + +		/** +		 * Contains the requested type of the variant. +		 */ +		const Type requestedType; + +		/** +		 * Constructor of the TypeException. +		 * +		 * @param actualType describes the actual type of the variant. +		 * @param requestedType describes the type in which the variant was +		 * requested. +		 */ +		TypeException(Type actualType, Type requestedType); +	}; + +	using boolType = bool; +	using intType = int32_t; +	using doubleType = double; +	using stringType = std::string; +	using arrayType = std::vector<Variant>; +	using mapType = std::map<std::string, Variant>; + +private: +	/** +	 * Used to store the actual type of the variant. +	 */ +	Type type = Type::NULLPTR; + +	/** +	 * Anonymous union containing the possible value of the variant. +	 */ +	union { +		/** +		 * The boolean value. Only valid if type is Type::BOOL. +		 */ +		boolType boolVal; +		/** +		 * The integer value. Only valid if type is Type::INT. +		 */ +		intType intVal; +		/** +		 * The number value. Only valid if type is Type::DOUBLE. +		 */ +		doubleType doubleVal; +		/** +		 * Pointer to the more complex data structures on the free store. Only +		 * valid if type is one of Type::STRING, Type::ARRAY, +		 * Type::MAP. +		 */ +		void *ptrVal; +	}; + +	/** +	 * Internally used to convert the current pointer value to a reference of +	 * the specified type. +	 */ +	template <typename T> +	T &asObj(Type requestedType) const +	{ +		const Type actualType = getType(); +		if (actualType == requestedType) { +			return *(static_cast<T *>(ptrVal)); +		} +		throw TypeException{actualType, requestedType}; +	} + +	/** +	 * Used internally to assign the value of another Variant instance to this +	 * instance. +	 * +	 * @param v is the Variant instance that should be copied to this instance. +	 */ +	void copy(const Variant &v) +	{ +		destroy(); +		type = v.type; +		switch (type) { +			case Type::NULLPTR: +				break; +			case Type::BOOL: +				boolVal = v.boolVal; +				break; +			case Type::INT: +				intVal = v.intVal; +				break; +			case Type::DOUBLE: +				doubleVal = v.doubleVal; +				break; +			case Type::STRING: +				ptrVal = new stringType(v.asString()); +				break; +			case Type::ARRAY: +				ptrVal = new arrayType(v.asArray()); +				break; +			case Type::MAP: +				ptrVal = new mapType(v.asMap()); +				break; +		} +	} + +	/** +	 * Used internally to move the value of another Variant instance to this +	 * instance. +	 * +	 * @param v is the Variant instance that should be copied to this instance. +	 */ +	void move(Variant &&v) +	{ +		destroy(); +		type = v.type; +		switch (type) { +			case Type::NULLPTR: +				break; +			case Type::BOOL: +				boolVal = v.boolVal; +				break; +			case Type::INT: +				intVal = v.intVal; +				break; +			case Type::DOUBLE: +				doubleVal = v.doubleVal; +				break; +			case Type::STRING: +			case Type::ARRAY: +			case Type::MAP: +				ptrVal = v.ptrVal; +				v.ptrVal = nullptr; +				break; +		} +		v.type = Type::NULLPTR; +	} + +	/** +	 * Used internally to destroy any value that was allocated on the heap. +	 */ +	void destroy() +	{ +		if (ptrVal) { +			switch (type) { +				case Type::STRING: +					delete static_cast<stringType *>(ptrVal); +					break; +				case Type::ARRAY: +					delete static_cast<arrayType *>(ptrVal); +					break; +				case Type::MAP: +					delete static_cast<mapType *>(ptrVal); +					break; +				default: +					break; +			} +		} +	} + +public: +	/** +	 * Copy constructor of the Variant class. +	 * +	 * @param v is the Variant instance that should be cloned. +	 */ +	Variant(const Variant &v) : ptrVal(nullptr) { copy(v); } + +	/** +	 * Move constructor of the Variant class. +	 * +	 * @param v is the reference to the Variant instance that should be moved, +	 * this instance is invalidated afterwards. +	 */ +	Variant(Variant &&v) : ptrVal(nullptr) { move(std::move(v)); } + +	/** +	 * Default constructor. Type is set to Type:null. +	 */ +	Variant() : ptrVal(nullptr) { setNull(); } + +	/** +	 * Default destructor, frees any memory that was allocated on the heap. +	 */ +	~Variant() { destroy(); } + +	/** +	 * Constructor for null values. Initializes the variant as null value. +	 */ +	Variant(std::nullptr_t) : ptrVal(nullptr) { setNull(); } + +	/** +	 * Constructor for boolean values. +	 * +	 * @param b boolean value. +	 */ +	Variant(boolType b) : ptrVal(nullptr) { setBool(b); } + +	/** +	 * Constructor for integer values. +	 * +	 * @param i integer value. +	 */ +	Variant(intType i) : ptrVal(nullptr) { setInt(i); } + +	/** +	 * Constructor for double values. +	 * +	 * @param d double value. +	 */ +	Variant(doubleType d) : ptrVal(nullptr) { setDouble(d); } + +	/** +	 * Constructor for string values. The given string is copied and managed by +	 * the new Variant instance. +	 * +	 * @param s is a reference to a C-Style string used as string value. +	 */ +	Variant(const char *s) : ptrVal(nullptr) { setString(s); } + +	/** +	 * Constructor for array values. The given array is copied and managed by +	 * the new Variant instance. +	 * +	 * @param a is a reference to the array +	 */ +	Variant(arrayType a) : ptrVal(nullptr) { setArray(std::move(a)); } + +	/** +	 * Constructor for map values. The given map is copied and managed by the +	 * new Variant instance. +	 * +	 * @param m is a reference to the map. +	 */ +	Variant(mapType m) : ptrVal(nullptr) { setMap(std::move(m)); } + +	/** +	 * Copy assignment operator. +	 */ +	Variant &operator=(const Variant &v) +	{ +		copy(v); +		return *this; +	} + +	/** +	 * Move assignment operator. +	 */ +	Variant &operator=(Variant &&v) +	{ +		move(std::move(v)); +		return *this; +	} + +	/** +	 * Assign nullptr_t operator (allows to write Variant v = nullptr). +	 * +	 * @param p is an instance of std::nullptr_t. +	 */ +	Variant &operator=(std::nullptr_t) +	{ +		setNull(); +		return *this; +	} + +	/** +	 * Assign a boolean value. +	 * +	 * @param b is the boolean value to which the variant should be set. +	 */ +	Variant &operator=(boolType b) +	{ +		setBool(b); +		return *this; +	} + +	/** +	 * Assign an integer value. +	 * +	 * @param i is the integer value to which the variant should be set. +	 */ +	Variant &operator=(intType i) +	{ +		setInt(i); +		return *this; +	} + +	/** +	 * Assign a double value. +	 * +	 * @param d is the double value to which the variant should be set. +	 */ +	Variant &operator=(doubleType d) +	{ +		setDouble(d); +		return *this; +	} + +	/** +	 * Assign a zero terminated const char array. +	 * +	 * @param s is the zero terminated const char array to which the variant +	 * should be set. +	 */ +	Variant &operator=(const char *s) +	{ +		setString(s); +		return *this; +	} + +	/** +	 * Checks whether this Variant instance represents the nullptr. +	 * +	 * @return true if the Variant instance represents the nullptr, false +	 * otherwise. +	 */ +	bool isNull() const { return type == Type::NULLPTR; } + +	/** +	 * Checks whether this Variant instance is a boolean. +	 * +	 * @return true if the Variant instance is a boolean, false otherwise. +	 */ +	bool isBool() const { return type == Type::BOOL; } + +	/** +	 * Checks whether this Variant instance is an integer. +	 * +	 * @return true if the Variant instance is an integer, false otherwise. +	 */ +	bool isInt() const { return type == Type::INT; } + +	/** +	 * Checks whether this Variant instance is a double. +	 * +	 * @return true if the Variant instance is a double, false otherwise. +	 */ +	bool isDouble() const { return type == Type::DOUBLE; } + +	/** +	 * Checks whether this Variant instance is a string. +	 * +	 * @return true if the Variant instance is a string, false otherwise. +	 */ +	bool isString() const { return type == Type::STRING; } + +	/** +	 * Checks whether this Variant instance is an array. +	 * +	 * @return true if the Variant instance is an array, false otherwise. +	 */ +	bool isArray() const { return type == Type::ARRAY; } + +	/** +	 * Checks whether this Variant instance is a map. +	 * +	 * @return true if the Variant instance is a map, false otherwise. +	 */ +	bool isMap() const { return type == Type::MAP; } + +	/** +	 * Returns the Variant boolean value. Performs no type conversion. Throws an +	 * exception if the underlying type is not a boolean. +	 * +	 * @return the boolean value. +	 */ +	boolType asBool() const +	{ +		if (isBool()) { +			return boolVal; +		} +		throw TypeException{getType(), Type::BOOL}; +	} + +	/** +	 * Returns the Variant integer value. Performs no type conversion. Throws an +	 * exception if the underlying type is not an integer. +	 * +	 * @return the integer value. +	 */ +	intType asInt() const +	{ +		if (isInt()) { +			return intVal; +		} +		throw TypeException{getType(), Type::INT}; +	} + +	/** +	 * Returns the Variant double value. Performs no type conversion. Throws an +	 * exception if the underlying type is not a double. +	 * +	 * @return the double value. +	 */ +	doubleType asDouble() const +	{ +		if (isDouble()) { +			return doubleVal; +		} +		throw TypeException{getType(), Type::DOUBLE}; +	} + +	/** +	 * Returns a const reference to the string value. Performs no type +	 * conversion. Throws an exception if the underlying type is not a string. +	 * +	 * @return the string value as const reference. +	 */ +	const stringType &asString() const +	{ +		return asObj<stringType>(Type::STRING); +	} + +	/** +	 * Returns a const reference to the string value. Performs no type +	 * conversion. Throws an exception if the underlying type is not a string. +	 * +	 * @return the string value as reference. +	 */ +	stringType &asString() { return asObj<stringType>(Type::STRING); } + +	/** +	 * Returns a const reference to the array value. Performs no type +	 * conversion. Throws an exception if the underlying type is not an array. +	 * +	 * @return the array value as const reference. +	 */ +	const arrayType &asArray() const { return asObj<arrayType>(Type::ARRAY); } + +	/** +	 * Returns a const reference to the array value. Performs no type +	 * conversion. Throws an exception if the underlying type is not an array. +	 * +	 * @return the array value as reference. +	 */ +	arrayType &asArray() { return asObj<arrayType>(Type::ARRAY); } + +	/** +	 * Returns a const reference to the map value. Performs no type +	 * conversion. Throws an exception if the underlying type is not a map. +	 * +	 * @return the map value as const reference. +	 */ +	const mapType &asMap() const { return asObj<mapType>(Type::MAP); } + +	/** +	 * Returns a reference to the map value. Performs no type conversion. +	 * Throws an exception if the underlying type is not a map. +	 * +	 * @return the map value as reference. +	 */ +	mapType &asMap() { return asObj<mapType>(Type::MAP); } + +	/** +	 * Returns the value of the Variant as boolean, performs type conversion. +	 * +	 * @return the Variant value converted to a boolean value. +	 */ +	boolType toBool() const; + +	/** +	 * Returns the value of the Variant as integer, performs type conversion. +	 * +	 * @return the Variant value converted to an integer value. +	 */ +	intType toInt() const; + +	/** +	 * Returns the value of the Variant as double, performs type conversion. +	 * +	 * @return the Variant value converted to a double value. +	 */ +	doubleType toDouble() const; + +	/** +	 * Returns the value of the Variant as string, performs type conversion. +	 * +	 * @return the value of the variant as string. +	 * @param escape if set to true, adds double quotes to strings and escapes +	 * them properly (resulting in a more or less JSONesque output). +	 */ +	stringType toString(bool escape = false) const; + +	/** +	 * Sets the variant to null. +	 */ +	void setNull() +	{ +		destroy(); +		type = Type::NULLPTR; +		ptrVal = nullptr; +	} + +	/** +	 * Sets the variant to the given boolean value. +	 * +	 * @param b is the new boolean value. +	 */ +	void setBool(boolType b) +	{ +		destroy(); +		type = Type::BOOL; +		boolVal = b; +	} + +	/** +	 * Sets the variant to the given integer value. +	 * +	 * @param i is the new integer value. +	 */ +	void setInt(intType i) +	{ +		destroy(); +		type = Type::INT; +		intVal = i; +	} + +	/** +	 * Sets the variant to the given double value. +	 * +	 * @param d is the new double value. +	 */ +	void setDouble(doubleType d) +	{ +		destroy(); +		type = Type::DOUBLE; +		doubleVal = d; +	} + +	/** +	 * Sets the variant to the given string value. +	 * +	 * @param d is the new string value. +	 */ +	void setString(const char *s) +	{ +		if (isString()) { +			asString().assign(s); +		} else { +			destroy(); +			type = Type::STRING; +			ptrVal = new stringType(s); +		} +	} + +	/** +	 * Sets the variant to the given array value. +	 * +	 * @param a is the new array value. +	 */ +	void setArray(arrayType a) +	{ +		if (isArray()) { +			asArray().swap(a); +		} else { +			destroy(); +			type = Type::ARRAY; +			ptrVal = new arrayType(std::move(a)); +		} +	} + +	/** +	 * Sets the variant to the given map value. +	 * +	 * @param a is the new map value. +	 */ +	void setMap(mapType m) +	{ +		if (isMap()) { +			asMap().swap(m); +		} else { +			destroy(); +			type = Type::MAP; +			ptrVal = new mapType(std::move(m)); +		} +	} + +	/** +	 * Returns the current type of the Variant. +	 * +	 * @return the current type of the Variant. +	 */ +	Type getType() const { return type; } + +	/** +	 * Returns the name of the given variant type as C-style string. +	 */ +	static const char *getTypeName(Type type); + +	/** +	 * Returns the name of the type of this variant instance. +	 */ +	const char *getTypeName() { return Variant::getTypeName(getType()); } + +	/** +	 * Prints the Variant to the output stream. +	 */ +	friend std::ostream &operator<<(std::ostream &os, const Variant &v) +	{ +		return os << v.toString(true); +	} + +	/** +	 * Prints a key value pair to the output stream. +	 */ +	friend std::ostream &operator<<(std::ostream &os, +	                                const mapType::value_type &v) +	{ +		// TODO: Use proper serialization function +		return os << "\"" << v.first << "\": " << v.second.toString(true); +	} + +	/* +	 * Comprison operators. +	 */ + +	friend bool operator<(const Variant &lhs, const Variant &rhs) +	{ +		// If the types do not match, we can not do a meaningful comparison. +		if (lhs.getType() != rhs.getType()) { +			throw TypeException(lhs.getType(), rhs.getType()); +		} +		switch (lhs.getType()) { +			case Type::NULLPTR: +				return false; +			case Type::BOOL: +				return lhs.boolVal < rhs.boolVal; +			case Type::INT: +				return lhs.intVal < rhs.intVal; +			case Type::DOUBLE: +				return lhs.doubleVal < rhs.doubleVal; +			case Type::STRING: +				return lhs.asString() < rhs.asString(); +			case Type::ARRAY: +				return lhs.asArray() < rhs.asArray(); +			case Type::MAP: +				return lhs.asMap() < rhs.asMap(); +		} +		throw OusiaException("Internal Error! Unknown type!"); +	} +	friend bool operator>(const Variant &lhs, const Variant &rhs) +	{ +		return rhs < lhs; +	} +	friend bool operator<=(const Variant &lhs, const Variant &rhs) +	{ +		return !(lhs > rhs); +	} +	friend bool operator>=(const Variant &lhs, const Variant &rhs) +	{ +		return !(lhs < rhs); +	} + +	friend bool operator==(const Variant &lhs, const Variant &rhs) +	{ +		if (lhs.getType() != rhs.getType()) { +			return false; +		} +		switch (lhs.getType()) { +			case Type::NULLPTR: +				return true; +			case Type::BOOL: +				return lhs.boolVal == rhs.boolVal; +			case Type::INT: +				return lhs.intVal == rhs.intVal; +			case Type::DOUBLE: +				return lhs.doubleVal == rhs.doubleVal; +			case Type::STRING: +				return lhs.asString() == rhs.asString(); +			case Type::ARRAY: +				return lhs.asArray() == rhs.asArray(); +			case Type::MAP: +				return lhs.asMap() == rhs.asMap(); +		} +		throw OusiaException("Internal Error! Unknown type!"); +	} +	 +	friend bool operator!=(const Variant &lhs, const Variant &rhs) +	{ +		return !(lhs == rhs); +	} +}; +} + +#endif /* _OUSIA_VARIANT_HPP_ */ + diff --git a/src/core/common/VariantReader.cpp b/src/core/common/VariantReader.cpp new file mode 100644 index 0000000..a31a658 --- /dev/null +++ b/src/core/common/VariantReader.cpp @@ -0,0 +1,625 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <iostream> + +#include <cmath> +#include <sstream> + +#include "VariantReader.hpp" +#include "Utils.hpp" + +namespace ousia { + +// TODO: Better error messages (like "Expected 'x' but got 'y'") +// TODO: Replace delims with single char delim where possible +// TODO: Use custom return value instead of std::pair +// TODO: Allow buffered char reader to "fork" +// TODO: Rename CharReader to shorter CharReader +// TODO: Implement context in CharReader (to allow error messages to extract the +// current line) + +/* Error Messages */ + +static const char *ERR_UNEXPECTED_CHAR = "Unexpected character"; +static const char *ERR_UNEXPECTED_END = "Unexpected literal end"; +static const char *ERR_UNTERMINATED = "Unterminated literal"; +static const char *ERR_INVALID_ESCAPE = "Invalid escape sequence"; +static const char *ERR_INVALID_INTEGER = "Invalid integer value"; +static const char *ERR_TOO_LARGE = "Value too large to represent"; + +/* Class Number */ + +/** + * Class used internally to represent a number (integer or double). The number + * is represented by its components (base value a, nominator n, denominator d, + * exponent e, sign s and exponent sign sE). + */ +class Number { +private: +	/** +	 * Reprsents the part of the number: Base value a, nominator n, exponent e. +	 */ +	enum class Part { A, N, E }; + +	/** +	 * State used in the parser state machine +	 */ +	enum class State { +		INIT, +		HAS_MINUS, +		LEADING_ZERO, +		LEADING_POINT, +		INT, +		HEX, +		POINT, +		EXP_INIT, +		EXP_HAS_MINUS, +		EXP +	}; + +	/** +	 * Returns the numeric value of the given ASCII character (returns 0 for +	 * '0', 1 for '1', 10 for 'A' and so on). +	 * +	 * @param c is the character for which the numeric value should be returned. +	 * @return the numeric value the character represents. +	 */ +	static int charValue(char c) +	{ +		if (c >= '0' && c <= '9') { +			return c & 0x0F; +		} +		if ((c >= 'A' && c <= 'O') || (c >= 'a' && c <= 'o')) { +			return (c & 0x0F) + 9; +		} +		return -1; +	} + +	/** +	 * Appends the value of the character c to the internal number +	 * representation and reports any errors that might occur. +	 */ +	bool appendChar(char c, int base, Part p, CharReader &reader, +	                Logger &logger) +	{ +		// Check whether the given character is valid +		int v = charValue(c); +		if (v < 0 || v >= base) { +			logger.error(ERR_UNEXPECTED_CHAR, reader); +			return false; +		} + +		// Append the number to the specified part +		switch (p) { +			case Part::A: +				a = a * base + v; +				break; +			case Part::N: +				n = n * base + v; +				d = d * base; +				break; +			case Part::E: +				e = e * base + v; +				break; +		} + +		// Check for any overflows +		if (a < 0 || n < 0 || d < 0 || e < 0) { +			logger.error(ERR_TOO_LARGE, reader); +			return false; +		} +		return true; +	} + +public: +	/** +	 * Sign and exponent sign. +	 */ +	int8_t s, sE; + +	/** +	 * Exponent +	 */ +	int16_t e; + +	/** +	 * Base value, nominator, denominator +	 */ +	int64_t a, n, d; + +	/** +	 * Constructor of the number class. +	 */ +	Number() : s(1), sE(1), e(0), a(0), n(0), d(1) {} + +	/** +	 * Returns the represented double value. +	 */ +	double doubleValue() +	{ +		return s * (a + ((double)n / (double)d)) * pow(10.0, (double)(sE * e)); +	} + +	/** +	 * Returns the represented integer value. Only a lossless operation, if the +	 * number is an integer (as can be checked via the isInt method), otherwise +	 * the exponent and the fractional value will be truncated. +	 */ +	int64_t intValue() { return s * a; } + +	/** +	 * Returns true, if the number is an integer (has no fractional or +	 * exponential part). +	 */ +	bool isInt() { return (n == 0) && (d == 1) && (e == 0); } + +	/** +	 * Tries to parse the number from the given stream and loggs any errors to +	 * the given logger instance. Numbers are terminated by one of the given +	 * delimiters. +	 */ +	bool parse(CharReader &reader, Logger &logger, +	           const std::unordered_set<char> &delims); +}; + +bool Number::parse(CharReader &reader, Logger &logger, +           const std::unordered_set<char> &delims) +{ +	State state = State::INIT; +	char c; + +	// Consume the first whitespace characters +	reader.consumeWhitespace(); + +	// Iterate over the FSM to extract numbers +	while (reader.peek(c)) { +		// Abort, once a delimiter or whitespace is reached +		if (Utils::isWhitespace(c) || delims.count(c)) { +			reader.resetPeek(); +			break; +		} + +		// The character is not a whitespace character and not a delimiter +		switch (state) { +			case State::INIT: +			case State::HAS_MINUS: +				switch (c) { +					case '-': +						// Do not allow multiple minus signs +						if (state == State::HAS_MINUS) { +							logger.error(ERR_UNEXPECTED_CHAR, reader); +							return false; +						} +						state = State::HAS_MINUS; +						s = -1; +						break; +					case '0': +						// Remember a leading zero for the detection of "0x" +						state = State::LEADING_ZERO; +						break; +					case '.': +						// Remember a leading point as ".eXXX" is invalid +						state = State::LEADING_POINT; +						break; +					default: +						state = State::INT; +						if (!appendChar(c, 10, Part::A, reader, logger)) { +							return false; +						} +						break; +				} +				break; +			case State::LEADING_ZERO: +				if (c == 'x' || c == 'X') { +					state = State::HEX; +					break; +				} +			// fallthrough +			case State::INT: +				switch (c) { +					case '.': +						state = State::POINT; +						break; +					case 'e': +					case 'E': +						state = State::EXP_INIT; +						break; +					default: +						state = State::INT; +						if (!appendChar(c, 10, Part::A, reader, logger)) { +							return false; +						} +						break; +				} +				break; +			case State::HEX: +				if (!appendChar(c, 16, Part::A, reader, logger)) { +					return false; +				} +				break; +			case State::LEADING_POINT: +			case State::POINT: +				switch (c) { +					case 'e': +					case 'E': +						if (state == State::LEADING_POINT) { +							logger.error(ERR_UNEXPECTED_CHAR, reader); +							return false; +						} +						state = State::EXP_INIT; +						break; +					default: +						state = State::POINT; +						if (!appendChar(c, 10, Part::N, reader, logger)) { +							return false; +						} +						break; +				} +				break; +			case State::EXP_HAS_MINUS: +			case State::EXP_INIT: +				if (c == '-') { +					if (state == State::EXP_HAS_MINUS) { +						logger.error(ERR_UNEXPECTED_CHAR, reader); +						return false; +					} +					state = State::EXP_HAS_MINUS; +					sE = -1; +				} else { +					state = State::EXP; +					if (!appendChar(c, 10, Part::E, reader, logger)) { +						return false; +					} +				} +				break; +			case State::EXP: +				if (!appendChar(c, 10, Part::E, reader, logger)) { +					return false; +				} +				break; +		} +		reader.consumePeek(); +	} + +	// States in which ending is valid. Log an error in other states +	if (state == State::LEADING_ZERO || state == State::HEX || +	    state == State::INT || state == State::POINT || +	    state == State::EXP) { +		return true; +	} +	logger.error(ERR_UNEXPECTED_END, reader); +	return false; +} + + +/* Class Reader */ + +static const int STATE_INIT = 0; +static const int STATE_IN_STRING = 1; +static const int STATE_IN_ARRAY = 2; +static const int STATE_EXPECT_COMMA = 3; +static const int STATE_ESCAPE = 4; +static const int STATE_WHITESPACE = 5; +static const int STATE_RESYNC = 6; + +template <class T> +static std::pair<bool, T> error(CharReader &reader, Logger &logger, +                                const char *err, T res) +{ +	logger.error(err, reader); +	return std::make_pair(false, std::move(res)); +} + +std::pair<bool, std::string> VariantReader::parseString( +    CharReader &reader, Logger &logger, +    const std::unordered_set<char> *delims) +{ +	// Initialize the internal state +	int state = STATE_INIT; +	char quote = 0; +	std::stringstream res; + +	// Consume all whitespace +	reader.consumeWhitespace(); + +	// Statemachine whic iterates over each character in the stream +	// TODO: Combination of peeking and consumePeek is stupid as consumePeek is +	// the default (read and putBack would obviously be better, yet the latter +	// is not trivial to implement in the current CharReader). +	char c; +	while (reader.peek(c)) { +		switch (state) { +			case STATE_INIT: +				if (c == '"' || c == '\'') { +					quote = c; +					state = STATE_IN_STRING; +					break; +				} else if (delims && delims->count(c)) { +					return error(reader, logger, ERR_UNEXPECTED_END, res.str()); +				} +				return error(reader, logger, ERR_UNEXPECTED_CHAR, res.str()); +			case STATE_IN_STRING: +				if (c == quote) { +					reader.consumePeek(); +					return std::make_pair(true, res.str()); +				} else if (c == '\\') { +					state = STATE_ESCAPE; +					reader.consumePeek(); +					break; +				} else if (c == '\n') { +					return error(reader, logger, ERR_UNTERMINATED, res.str()); +				} +				res << c; +				reader.consumePeek(); +				break; +			case STATE_ESCAPE: +				// Handle all possible special escape characters +				switch (c) { +					case 'b': +						res << '\b'; +						break; +					case 'f': +						res << '\f'; +						break; +					case 'n': +						res << '\n'; +						break; +					case 'r': +						res << '\r'; +						break; +					case 't': +						res << '\t'; +						break; +					case 'v': +						res << '\v'; +						break; +					case '\'': +						res << '\''; +						break; +					case '"': +						res << '"'; +						break; +					case '\\': +						res << '\\'; +						break; +					case '\n': +						break; +					case 'x': +						// TODO: Parse Latin-1 sequence hex XX +						break; +					case 'u': +						// TODO: Parse 16-Bit unicode character hex XXXX +						break; +					default: +						if (Utils::isNumeric(c)) { +							// TODO: Parse octal 000 sequence +						} else { +							logger.error(ERR_INVALID_ESCAPE, reader); +						} +						break; +				} + +				// Switch back to the "normal" state +				state = STATE_IN_STRING; +				reader.consumePeek(); +				break; +		} +	} +	return error(reader, logger, ERR_UNEXPECTED_END, res.str()); +} + +std::pair<bool, Variant::arrayType> VariantReader::parseArray( +    CharReader &reader, Logger &logger, char delim) +{ +	Variant::arrayType res; +	bool hadError = false; +	int state = delim ? STATE_IN_ARRAY : STATE_INIT; +	delim = delim ? delim : ']'; +	char c; + +	// Consume all whitespace +	reader.consumeWhitespace(); + +	// Iterate over the characters, use the parseGeneric function to read the +	// pairs +	while (reader.peek(c)) { +		// Generically handle the end of the array +		if (state != STATE_INIT && c == delim) { +			reader.consumePeek(); +			return std::make_pair(!hadError, res); +		} + +		switch (state) { +			case STATE_INIT: +				if (c != '[') { +					return error(reader, logger, ERR_UNEXPECTED_CHAR, res); +				} +				state = STATE_IN_ARRAY; +				reader.consumePeek(); +				break; +			case STATE_IN_ARRAY: { +				// Try to read an element using the parseGeneric function +				reader.resetPeek(); +				auto elem = parseGeneric(reader, logger, {',', delim}); +				res.push_back(elem.second); + +				// If the reader had no error, expect an comma, otherwise skip +				// to the next comma in the stream +				if (elem.first) { +					state = STATE_EXPECT_COMMA; +				} else { +					state = STATE_RESYNC; +					hadError = true; +				} +				break; +			} +			case STATE_EXPECT_COMMA: +				// Skip whitespace +				if (c == ',') { +					state = STATE_IN_ARRAY; +				} else if (!Utils::isWhitespace(c)) { +					hadError = true; +					state = STATE_RESYNC; +					logger.error(ERR_UNEXPECTED_CHAR, reader); +				} +				reader.consumePeek(); +				break; +			case STATE_RESYNC: +				// Just wait for another comma to arrive +				if (c == ',') { +					state = STATE_IN_ARRAY; +				} +				reader.consumePeek(); +				break; +		} +	} +	return error(reader, logger, ERR_UNEXPECTED_END, res); +} + +std::pair<bool, std::string> VariantReader::parseUnescapedString( +    CharReader &reader, Logger &logger, +    const std::unordered_set<char> &delims) +{ +	std::stringstream res; +	std::stringstream buf; +	char c; + +	// Consume all whitespace +	reader.consumeWhitespace(); + +	// Copy all characters, skip whitespace at the end +	int state = STATE_IN_STRING; +	while (reader.peek(c)) { +		if (delims.count(c)) { +			reader.resetPeek(); +			return std::make_pair(true, res.str()); +		} else if (Utils::isWhitespace(c)) { +			// Do not add whitespace to the output buffer +			state = STATE_WHITESPACE; +			buf << c; +		} else { +			// If we just hat a sequence of whitespace, append it to the output +			// buffer and continue +			if (state == STATE_WHITESPACE) { +				res << buf.str(); +				buf.str(std::string{}); +				buf.clear(); +				state = STATE_IN_STRING; +			} +			res << c; +		} +		reader.consumePeek(); +	} +	return std::make_pair(true, res.str()); +} + +std::pair<bool, int64_t> VariantReader::parseInteger( +    CharReader &reader, Logger &logger, +    const std::unordered_set<char> &delims) +{ +	Number n; +	if (n.parse(reader, logger, delims)) { +		// Only succeed if the parsed number is an integer, otherwise this is an +		// error +		if (n.isInt()) { +			return std::make_pair(true, n.intValue()); +		} else { +			return error(reader, logger, ERR_INVALID_INTEGER, n.intValue()); +		} +	} +	return std::make_pair(false, n.intValue()); +} + +std::pair<bool, double> VariantReader::parseDouble( +    CharReader &reader, Logger &logger, +    const std::unordered_set<char> &delims) +{ +	Number n; +	bool res = n.parse(reader, logger, delims); +	return std::make_pair(res, n.doubleValue()); +} + +std::pair<bool, Variant> VariantReader::parseGeneric( +    CharReader &reader, Logger &logger, +    const std::unordered_set<char> &delims) +{ +	char c; + +	// Skip all whitespace characters +	reader.consumeWhitespace(); +	while (reader.peek(c)) { +		// Stop if a delimiter is reached +		if (delims.count(c)) { +			return error(reader, logger, ERR_UNEXPECTED_END, nullptr); +		} + +		// Parse a string if a quote is reached +		if (c == '"' || c == '\'') { +			auto res = parseString(reader, logger); +			return std::make_pair(res.first, res.second.c_str()); +		} + +		if (c == '[') { +			// TODO: Parse struct descriptor +		} + +		// Try to parse everything that looks like a number as number +		if (Utils::isNumeric(c) || c == '-') { +			Number n; + +			// Fork the reader +			CharReaderFork fork = reader.fork(); + +			// TODO: Fork logger + +			// Try to parse the number +			if (n.parse(fork, logger, delims)) { +				// Parsing was successful, advance the reader +				fork.commit(); +				if (n.isInt()) { +					return std::make_pair( +					    true, +					    Variant{static_cast<Variant::intType>(n.intValue())}); +				} else { +					return std::make_pair(true, n.doubleValue()); +				} +			} +		} + +		// Parse an unescaped string in any other case +		auto res = parseUnescapedString(reader, logger, delims); + +		// Handling for special primitive values +		if (res.first) { +			if (res.second == "true") { +				return std::make_pair(true, Variant{true}); +			} +			if (res.second == "false") { +				return std::make_pair(true, Variant{false}); +			} +			if (res.second == "null") { +				return std::make_pair(true, Variant{nullptr}); +			} +		} +		return std::make_pair(res.first, res.second.c_str()); +	} +	return error(reader, logger, ERR_UNEXPECTED_END, nullptr); +} +} + diff --git a/src/core/common/VariantReader.hpp b/src/core/common/VariantReader.hpp new file mode 100644 index 0000000..5e7c5d2 --- /dev/null +++ b/src/core/common/VariantReader.hpp @@ -0,0 +1,166 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file VariantReader.hpp + * + * Provides parsers for various micro formats. These formats include integers, + * doubles, strings, JSON and the Ousía struct notation. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_VARIANT_READER_HPP_ +#define _OUSIA_VARIANT_READER_HPP_ + +#include <cstdint> +#include <unordered_set> +#include <utility> + +#include "CharReader.hpp" +#include "Logger.hpp" +#include "Variant.hpp" + +namespace ousia { + +class VariantReader { +private: +	/** +	 * Parses a string which may either be enclosed by " or ', unescapes +	 * entities in the string as specified for JavaScript. +	 * +	 * @param VariantReader is a reference to the CharReader instance which is +	 * the source for the character data. The VariantReader will be positioned +	 * after the terminating quote character or at the terminating delimiting +	 * character. +	 * @param logger is the logger instance that should be used to log error +	 * messages and warnings. +	 * @param delims is an optional set of delimiters after which parsing has to +	 * be stopped (the delimiters may occur inside the actual string, but not +	 * outside). If nullptr is given, no delimiter is used and a complete string +	 * is read. +	 */ +	static std::pair<bool, std::string> parseString( +	    CharReader &VariantReader, Logger &logger, +	    const std::unordered_set<char> *delims); + +public: +	/** +	 * Parses a string which may either be enclosed by " or ', unescapes +	 * entities in the string as specified for JavaScript. +	 * +	 * @param VariantReader is a reference to the CharReader instance which is +	 * the source for the character data. The VariantReader will be positioned +	 * after the terminating quote character or at the terminating delimiting +	 * character. +	 * @param logger is the logger instance that should be used to log error +	 * messages and warnings. +	 * @param delims is a set of delimiters after which parsing has to +	 * be stopped (the delimiters may occur inside the actual string, but not +	 * outside). +	 */ +	static std::pair<bool, std::string> parseString( +	    CharReader &VariantReader, Logger &logger, +	    const std::unordered_set<char> &delims) +	{ +		return parseString(VariantReader, logger, &delims); +	} + +	/** +	 * Parses a string which may either be enclosed by " or ', unescapes +	 * entities in the string as specified for JavaScript. +	 * +	 * @param VariantReader is a reference to the CharReader instance which is +	 * the source for the character data. The VariantReader will be positioned  +	 * after the terminating quote character or at the terminating delimiting +	 * character. +	 * @param logger is the logger instance that should be used to log error +	 * messages and warnings. +	 */ +	static std::pair<bool, std::string> parseString(CharReader &VariantReader, +	                                                Logger &logger) +	{ +		return parseString(VariantReader, logger, nullptr); +	} + +	/** +	 * Extracts an unescaped string from the given buffered char VariantReader +	 * instance. This function just reads text until one of the given delimiter +	 * characters is reached. +	 * +	 * @param VariantReader is a reference to the CharReader instance which is +	 * the source for the character data. The VariantReader will be positioned +	 * at the terminating delimiting character. +	 * @param delims is a set of characters which will terminate the string. +	 * These characters are not included in the result. May not be nullptr. +	 */ +	static std::pair<bool, std::string> parseUnescapedString( +	    CharReader &VariantReader, Logger &logger, +	    const std::unordered_set<char> &delims); + +	/** +	 * Parses an integer from the given buffered char VariantReader instance +	 * until one of the given delimiter characters is reached. +	 * +	 * @param VariantReader is a reference to the CharReader instance from +	 * which the character data should been VariantReader. The VariantReader +	 * will be positioned at the terminating delimiting character or directly +	 * after the integer. +	 */ +	static std::pair<bool, int64_t> parseInteger( +	    CharReader &VariantReader, Logger &logger, +	    const std::unordered_set<char> &delims); + +	/** +	 * Parses an double from the given buffered char VariantReader instance +	 * until one of the given delimiter characters is reached. +	 * +	 * @param VariantReader is a reference to the CharReader instance from +	 * which the character data should been VariantReader. The VariantReader +	 * will be positioned at the terminating delimiting character or directly +	 * after the integer. +	 */ +	static std::pair<bool, double> parseDouble( +	    CharReader &VariantReader, Logger &logger, +	    const std::unordered_set<char> &delims); + +	/** +	 * Parses an array of values. +	 */ +	static std::pair<bool, Variant::arrayType> parseArray( +	    CharReader &VariantReader, Logger &logger, char delim = 0); + +	/** +	 * Tries to parse the most specific item from the given stream until one of +	 * the given delimiters is reached or a meaningful literal has been read. +	 * The resulting variant represents the value that has been read. +	 * +	 * @param VariantReader is a reference to the CharReader instance which is +	 * the source for the character data. The VariantReader will be positioned +	 * at the terminating delimiting character. +	 * @param delims is a set of characters which will terminate the string. +	 * These characters are not included in the result. May not be nullptr. +	 */ +	static std::pair<bool, Variant> parseGeneric( +	    CharReader &VariantReader, Logger &logger, +	    const std::unordered_set<char> &delims); +}; +} + +#endif /* _OUSIA_VARIANT_READER_HPP_ */ +  | 
