4 files changed, 659 insertions, 4 deletions
diff --git a/src/core/utils/BufferedCharReader.cpp b/src/core/utils/BufferedCharReader.cpp
new file mode 100644
index 0000000..0377015
--- /dev/null
+++ b/src/core/utils/BufferedCharReader.cpp
@@ -0,0 +1,216 @@
+/*
+    SCAENEA IDL Compiler (scidlc)
+    Copyright (C) 2014  Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "BufferedCharReader.hpp"
+
+namespace ousia {
+namespace utils {
+
+// Constants used within the linebreak statemachine.
+static const uint8_t LB_STATE_NONE = 0x00;
+static const uint8_t LB_STATE_ONE = 0x01;
+static const uint8_t LB_STATE_LF = 0x10;
+static const uint8_t LB_STATE_CR = 0x20;
+static const uint8_t LB_STATE_MASK_CNT = 0x0F;
+static const uint8_t LB_STATE_MASK_TYPE = 0xF0;
+
+/*******************************************************************************
+ * Struct BufferedCharReader::ReadCursor
+ ******************************************************************************/
+
+BufferedCharReader::ReadCursor::ReadCursor(const bool destructive) :
+		destructive(destructive)
+{
+	reset();
+}
+
+void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor)
+{
+	this->line = cursor.line;
+	this->column = cursor.column;
+	this->bufferElem = cursor.bufferElem;
+	this->bufferPos = cursor.bufferPos;
+	this->lbState = cursor.lbState;
+}
+
+void BufferedCharReader::ReadCursor::reset()
+{
+	this->line = 1;
+	this->column = 1;
+	this->bufferElem = 0;
+	this->bufferPos = 0;
+	this->lbState = LB_STATE_NONE;
+}
+
+/*******************************************************************************
+ * Class BufferedCharReader
+ ******************************************************************************/
+
+BufferedCharReader::BufferedCharReader() :
+	readCursor(true), peekCursor(false)
+{
+	reset();
+}
+
+void BufferedCharReader::reset()
+{
+	readCursor.reset();
+	peekCursor.reset();
+	buffer.clear();
+	closed = false;
+}
+
+bool BufferedCharReader::feed(const std::string &data)
+{
+	// Abort if the BufferedCharReader was closed
+	if (closed) {
+		return false;
+	}
+
+	// Append the data onto the queue
+	buffer.push_back(data);
+	return true;
+}
+
+void BufferedCharReader::close()
+{
+	closed = true;
+}
+
+bool BufferedCharReader::substituteLinebreaks(ReadCursor *cursor, char *c)
+{
+	// Handle line breaks, inserts breakes after the following character
+	// combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n
+	if ((*c == '\n') || (*c == '\r')) {
+		// Determine the type of the current linebreak character
+		const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR;
+
+		// Read the last count and the last type from the state
+		const uint8_t lastCount = cursor->lbState & LB_STATE_MASK_CNT;
+		const uint8_t lastType = cursor->lbState & LB_STATE_MASK_TYPE;
+
+		// Set the current linebreak type and counter in the state
+		cursor->lbState = ((lastCount + 1) & 1) | type;
+
+		// If either this is the first instance of this character or the same
+		// return character is repeated
+		if (!lastCount || (lastType == type)) {
+			*c = '\n';
+			return true;
+		}
+		return false;
+	}
+
+	// Find the state
+	cursor->lbState = LB_STATE_NONE;
+	return true;
+}
+
+bool BufferedCharReader::readCharacterAtCursor(ReadCursor *cursor,
+		char *c)
+{
+	bool hasChar = false;
+	while (!hasChar) {
+		// Abort if the current buffer element does not point to a valid entry
+		// in the buffer -- we must wait until another data block has been fed
+		// into the buffer
+		if (cursor->bufferElem >= buffer.size()) {
+			return false;
+		}
+
+		// Fetch the current element the peek pointer points to
+		const std::string &data = buffer[cursor->bufferElem];
+
+		// Handle the "no data" case -- either in a destructive or
+		// non-destructive manner.
+		if (cursor->bufferPos >= data.length()) {
+			if (cursor->destructive) {
+				buffer.pop_front();
+			} else {
+				cursor->bufferElem++;
+			}
+			cursor->bufferPos = 0;
+			continue;
+		}
+
+		// Read the character, advance the buffer position
+		*c = *(data.data() + cursor->bufferPos);
+		cursor->bufferPos++;
+
+		// Substitute linebreaks with a single LF (0x0A)
+		hasChar = substituteLinebreaks(cursor, c);
+	}
+
+	// Update the position counter
+	if (*c == '\n') {
+		cursor->line++;
+		cursor->column = 1;
+	} else {
+		// Ignore UTF-8 continuation bytes
+		if (!((*c & 0x80) && !(*c & 0x40))) {
+			cursor->column++;
+		}
+	}
+
+	return true;
+}
+
+bool BufferedCharReader::peek(char *c)
+{
+	return readCharacterAtCursor(&peekCursor, c);
+}
+
+bool BufferedCharReader::read(char *c)
+{
+	resetPeek();
+	return readCharacterAtCursor(&readCursor, c);
+}
+
+void BufferedCharReader::consumePeek()
+{
+	// Remove all no longer needed buffer elements
+	for (unsigned int i = 0; i < peekCursor.bufferElem; i++) {
+		buffer.pop_front();
+	}
+	peekCursor.bufferElem = 0;
+
+	// Copy the peek cursor to the read cursor
+	readCursor.assign(peekCursor);
+}
+
+void BufferedCharReader::resetPeek()
+{
+	// Reset the peek cursor to the read cursor
+	peekCursor.assign(readCursor);
+}
+
+bool BufferedCharReader::atEnd()
+{
+	if (closed) {
+		if (buffer.size() <= 0) {
+			return true;
+		} else if (buffer.size() == 1) {
+			return buffer[0].size() == readCursor.bufferPos;
+		}
+	}
+	return false;
+}
+
+}
+}
+
diff --git a/src/core/utils/BufferedCharReader.hpp b/src/core/utils/BufferedCharReader.hpp
new file mode 100644
index 0000000..86f43b5
--- /dev/null
+++ b/src/core/utils/BufferedCharReader.hpp
@@ -0,0 +1,240 @@
+/*
+    SCAENEA IDL Compiler (scidlc)
+    Copyright (C) 2014  Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _OUSIA_UTILS_BUFFERED_CHAR_READER_H_
+#define _OUSIA_UTILS_BUFFERED_CHAR_READER_H_
+
+#include <deque>
+#include <string>
+#include <cstdint>
+
+namespace ousia {
+namespace utils {
+
+/**
+ * The BufferedCharReader class is used for storing incomming data that
+ * is fed into the pipeline as well as reading/peeking single characters
+ * from that buffer. Additionally it counts the current column/row
+ * (with correct handling for UTF-8) and contains an internal state
+ * machine that handles the detection of linebreaks.
+ *
+ * Additionally the BufferedCharReader performs the following tasks:
+ * 1. Convert the incomming character encoding to UTF-8 (TODO: implement)
+ * 2. Convert arbitrary linebreaks to a single "\n"
+ */
+class BufferedCharReader {
+
+private:
+
+	/**
+	 * The ReadCursor structure is responsible for representing the read
+	 * position within the text an all state machine states belonging to the
+	 * cursor. There are two types of read cursors: destructive and
+	 * non-destructive read cursors.
+	 */
+	struct ReadCursor {
+		/**
+		 * Specifies whether this is a destructive cursor (bytes are discarded
+		 * once they were read from the buffer).
+		 */
+		const bool destructive;
+
+		/**
+		 * The line the cursor currently points to.
+		 */
+		unsigned int line;
+
+		/**
+		 * The column the cursor currently points to.
+		 */
+		unsigned int column;
+
+		/**
+		 * The index of the element in the data buffer we're currently reading
+		 * from.
+		 */
+		unsigned int bufferElem;
+
+		/**
+		 * The byte position within this data buffer.
+		 */
+		unsigned int bufferPos;
+
+		/**
+		 * State variable used in the internal state machine of the
+		 * line feed detection.
+		 */
+		uint8_t lbState;
+
+		/**
+		 * Constructor of the ReadCursor structure.
+		 *
+		 * @param destructive specifies whether the ReadCursor is destructive
+		 * (consumes all read characters, as used in the "read cursor") or
+		 * non-destructive (as used in the "peek cursor").
+		 */
+		ReadCursor(const bool destructive);
+
+		/**
+		 * Copys the data from another ReadCursor without overriding the
+		 * "destructive" flag.
+		 */
+		void assign(const ReadCursor &cursor);
+
+		/**
+		 * Resets the cursor without changing the "destructive" flag.
+		 */
+		void reset();
+	};
+
+	/**
+	 * Queue containing the data that has been fed into the char reader.
+	 */
+	std::deque<std::string> buffer;
+
+	/**
+	 * The read and the peek cursor. 
+	 */
+	ReadCursor readCursor, peekCursor;
+
+	/**
+	 * Determines whether the reader has been closed.
+	 */
+	bool closed;
+
+	/**
+	 * Substitute any combination of linebreaks in the incomming code with "\n".
+	 * Returns true if the current character is meant as output, false
+	 * otherwise.
+	 */
+	bool substituteLinebreaks(ReadCursor *cursor, char *c);
+
+	/**
+	 * Reads a character from the input buffer and advances the given read
+	 * cursor.
+	 *
+	 * @param cursor is a reference to the read cursor that should be used
+	 * for reading.
+	 * @param hasChar is set to true, if a character is available, false if
+	 * no character is available (e.g. because line breaks are substituted or
+	 * the end of a buffer boundary is reached -- in this case this function
+	 * should be called again with the same parameters.)
+	 * @param c is a output parameter, which will be set to the read character.
+	 * @param returns true if there was enough data in the buffer, false
+	 * otherwise.
+	 */
+	bool readCharacterAtCursor(ReadCursor *cursor, char *c);
+
+	/**
+	 * Function that is called for each read character -- updates the row and
+	 * column count.
+	 */
+	void updatePositionCounters(const char c);
+
+public:
+
+	/**
+	 * Constructor of the buffered char reader class.
+	 */
+	BufferedCharReader();
+
+	/**
+	 * Resets the reader to its initial state.
+	 */
+	void reset();
+
+	/**
+	 * Feeds new data into the internal buffer of the BufferedCharReader
+	 * class.
+	 *
+	 * @param data is a string containing the data that should be
+	 * appended to the internal buffer.
+	 * @return true if the operation was successful, false otherwise (e.g.
+	 * because the reader is closed).
+	 */
+	bool feed(const std::string &data);
+
+	/**
+	 * Marks the end of the input, allowing successors in the pipeline
+	 * to react properly (e.g. creating the end of stream token).
+	 */
+	void close();
+
+	/**
+	 * Peeks a single character. If called multiple times, returns the
+	 * character after the previously peeked character.
+	 *
+	 * @param c is a reference to the character to which the result should be
+	 * writtern.
+	 * @return true if the character was successfully read, false if there are
+	 * no more characters to be read in the buffer.
+	 */
+	bool peek(char *c);
+
+	/**
+	 * Reads a character from the input data. If "peek" was called
+	 * beforehand resets the peek pointer.
+	 *
+	 * @param c is a reference to the character to which the result should be
+	 * writtern.
+	 * @return true if the character was successfully read, false if there are
+	 * no more characters to be read in the buffer.
+	 */
+	bool read(char *c);
+
+	/**
+	 * Advances the read pointer to the peek pointer -- so if the "peek"
+	 * function was called, "read" will now return the character after
+	 * the last peeked character.
+	 */
+	void consumePeek();
+
+	/**
+	 * Resets the peek pointer to the "read" pointer.
+	 */
+	void resetPeek();
+
+	/**
+	 * Returns true if there are no more characters as the stream was
+	 * closed.
+	 */
+	bool atEnd();
+
+	/**
+	 * Returns the current line (starting with one).
+	 */
+	inline int getLine()
+	{
+		return readCursor.line;
+	}
+
+	/**
+	 * Returns the current column (starting with one).
+	 */
+	inline int getColumn()
+	{
+		return readCursor.column;
+	}
+
+};
+
+}
+}
+
+#endif /* _OUSISA_UTILS_BUFFERED_CHAR_READER_H_ */
+
diff --git a/src/plugins/mozjs/MozJsScriptEngine.cpp b/src/plugins/mozjs/MozJsScriptEngine.cpp
index c67a3b4..f269eb7 100644
--- a/src/plugins/mozjs/MozJsScriptEngine.cpp
+++ b/src/plugins/mozjs/MozJsScriptEngine.cpp
@@ -426,8 +426,10 @@ void MozJsScriptEngineScope::variantToValue(const Variant &var,
 			return;
 		}
 		case VariantType::function: {
-			JS::RootedObject f(cx, JS_NewObject(cx, &functionClass, nullptr, nullptr));
-			JS_SetPrivate(f, new MozJsFunctionData(*this, var.getFunctionValue()->clone()));
+			JS::RootedObject f(
+			    cx, JS_NewObject(cx, &functionClass, nullptr, nullptr));
+			JS_SetPrivate(f, new MozJsFunctionData(
+			                     *this, var.getFunctionValue()->clone()));
 			JS_FreezeObject(cx, f);
 			val.setObjectOrNull(f.get());
 			return;
@@ -497,8 +499,7 @@ MozJsScriptEngine::~MozJsScriptEngine()
 	JS_ShutDown();
 }
 
-MozJsScriptEngineScope *MozJsScriptEngine::createScope()
-{
+MozJsScriptEngineScope *MozJsScriptEngine::createScope() {
 	return new MozJsScriptEngineScope(rt);
 }
 }
diff --git a/test/core/utils/BufferedCharReader.cpp b/test/core/utils/BufferedCharReader.cpp
new file mode 100644
index 0000000..f8f668c
--- /dev/null
+++ b/test/core/utils/BufferedCharReader.cpp
@@ -0,0 +1,198 @@
+/*
+    SCAENEA IDL Compiler (scidlc)
+    Copyright (C) 2014  Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <string>
+#include <iostream>
+
+#include "gtest/gtest.h"
+
+#include "BufferedCharReader.hpp"
+
+TEST(BufferedCharReaderTest, SimpleReadTest)
+{
+	const std::string testStr("this is a test");
+	char c;
+
+	// Feed a test string into the reader
+	scaenea::compiler::BufferedCharReader reader;
+	reader.feed(testStr);
+	reader.close();
+
+	// Try to read the test string
+	std::string res;
+	while (!reader.atEnd()) {
+		ASSERT_TRUE(reader.read(&c));
+		res.append(&c, 1);
+	}
+
+	// The two strings must equal
+	ASSERT_STREQ(testStr.c_str(), res.c_str()) ;
+
+	// We must now be at line 1, column 15
+	ASSERT_EQ(1, reader.getLine());
+	ASSERT_EQ(testStr.size() + 1, reader.getColumn());
+
+	// If we call either read or peek, false is returned
+	ASSERT_FALSE(reader.read(&c));
+	ASSERT_FALSE(reader.peek(&c));
+}
+
+TEST(BufferedCharReaderTest, SimplePeekTest)
+{
+	const std::string testStr("this is a test");
+	char c;
+
+	// Feed a test string into the reader
+	scaenea::compiler::BufferedCharReader reader;
+	reader.feed(testStr);
+	reader.close();
+
+	// Try to read the test string
+	std::string res;
+	while (reader.peek(&c)) {
+		res.append(&c, 1);
+	}
+
+	// Peeking does not trigger the "atEnd" flag
+	ASSERT_FALSE(reader.atEnd());
+
+	// The two strings must equal
+	ASSERT_STREQ(testStr.c_str(), res.c_str());
+
+	// We must now be at line 1, column 1 and NOT at the end of the stream
+	ASSERT_EQ(1, reader.getLine());
+	ASSERT_EQ(1, reader.getColumn());
+	ASSERT_FALSE(reader.atEnd());
+
+	// If we consume the peek, we must be at line 1, column 15 and we should be
+	// at the end of the stream
+	reader.consumePeek();
+	ASSERT_EQ(1, reader.getLine());
+	ASSERT_EQ(testStr.size() + 1, reader.getColumn());
+	ASSERT_TRUE(reader.atEnd());
+
+	// If we call either read or peek, false is returned
+	ASSERT_FALSE(reader.read(&c));
+	ASSERT_FALSE(reader.peek(&c));
+}
+
+TEST(BufferedCharReaderTest, SplittedPeakTest)
+{
+	const std::string testStr("this is a test");
+	char c;
+
+	// Feed a test string into the reader
+	scaenea::compiler::BufferedCharReader reader;
+
+	// Try to peek the test string, feed char after char into the reader
+	std::string res;
+	for (unsigned int i = 0; i < testStr.length(); i++) {
+		reader.feed(std::string(&testStr[i], 1));
+		while (reader.peek(&c)) {
+			res.append(&c, 1);
+		}
+	}
+	reader.close();
+
+	// Consume the peeked data
+	ASSERT_FALSE(reader.atEnd());
+	reader.consumePeek();
+	ASSERT_TRUE(reader.atEnd());
+
+	// The two strings must equal
+	ASSERT_STREQ(testStr.c_str(), res.c_str()) ;
+
+	// We must now be at line 1, column 15
+	ASSERT_EQ(1, reader.getLine());
+	ASSERT_EQ(testStr.size() + 1, reader.getColumn());
+
+	// If we call either read or peek, false is returned
+	ASSERT_FALSE(reader.read(&c));
+	ASSERT_FALSE(reader.peek(&c));
+}
+
+TEST(BufferedCharReaderTest, RowColumnCounterTest)
+{
+	const std::string testStr("1\n\r2\n3\r\n\n4");
+	char c;
+
+	// Feed a test string into the reader
+	scaenea::compiler::BufferedCharReader reader;
+	reader.feed(testStr);
+	reader.close();
+
+	// We should currently be in line 1, column 1
+	ASSERT_EQ(1, reader.getLine());
+	ASSERT_EQ(1, reader.getColumn());
+
+	// Read two characters
+	for (int i = 0; i < 2; i++) reader.read(&c);
+	ASSERT_EQ(2, reader.getLine());
+	ASSERT_EQ(1, reader.getColumn());
+
+	// Read two characters
+	for (int i = 0; i < 2; i++) reader.read(&c);
+	ASSERT_EQ(3, reader.getLine());
+	ASSERT_EQ(1, reader.getColumn());
+
+	// Read three characters
+	for (int i = 0; i < 3; i++) reader.read(&c);
+	ASSERT_EQ(5, reader.getLine());
+	ASSERT_EQ(1, reader.getColumn());
+}
+
+TEST(BufferedCharReaderTest, LinebreakSubstitutionTest)
+{
+	const std::string testStr("this\n\ris\n\rjust\na test\r\n\rtest\n\r");
+	const std::string expStr("this\nis\njust\na test\n\ntest\n");
+
+	// Feed a test string into the reader
+	scaenea::compiler::BufferedCharReader reader;
+	reader.feed(testStr);
+
+	// Read all characters from the test string
+	std::string res;
+	char c;
+	while (reader.read(&c)) {
+		res.append(&c, 1);
+	}
+
+	// Test for equality
+	ASSERT_STREQ(expStr.c_str(), res.c_str());
+}
+
+TEST(BufferedCharReaderTest, RowColumnCounterUTF8Test)
+{
+	// Create a test string with some umlauts
+	const std::string testStr("\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f");
+	char c;
+
+	// Feed a test string into the reader
+	scaenea::compiler::BufferedCharReader reader;
+	reader.feed(testStr);
+	reader.close();
+
+	// Read all bytes
+	while (reader.read(&c));
+
+	// The sequence above equals 5 UTF-8 characters (so after reading all the
+	// cursor is at position 6)
+	ASSERT_EQ(1, reader.getLine());
+	ASSERT_EQ(6, reader.getColumn());
+}
+