diff options
| -rw-r--r-- | src/core/utils/BufferedCharReader.cpp | 216 | ||||
| -rw-r--r-- | src/core/utils/BufferedCharReader.hpp | 240 | ||||
| -rw-r--r-- | src/plugins/mozjs/MozJsScriptEngine.cpp | 9 | ||||
| -rw-r--r-- | test/core/utils/BufferedCharReader.cpp | 198 | 
4 files changed, 659 insertions, 4 deletions
diff --git a/src/core/utils/BufferedCharReader.cpp b/src/core/utils/BufferedCharReader.cpp new file mode 100644 index 0000000..0377015 --- /dev/null +++ b/src/core/utils/BufferedCharReader.cpp @@ -0,0 +1,216 @@ +/* +    SCAENEA IDL Compiler (scidlc) +    Copyright (C) 2014  Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "BufferedCharReader.hpp" + +namespace ousia { +namespace utils { + +// Constants used within the linebreak statemachine. +static const uint8_t LB_STATE_NONE = 0x00; +static const uint8_t LB_STATE_ONE = 0x01; +static const uint8_t LB_STATE_LF = 0x10; +static const uint8_t LB_STATE_CR = 0x20; +static const uint8_t LB_STATE_MASK_CNT = 0x0F; +static const uint8_t LB_STATE_MASK_TYPE = 0xF0; + +/******************************************************************************* + * Struct BufferedCharReader::ReadCursor + ******************************************************************************/ + +BufferedCharReader::ReadCursor::ReadCursor(const bool destructive) : +		destructive(destructive) +{ +	reset(); +} + +void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) +{ +	this->line = cursor.line; +	this->column = cursor.column; +	this->bufferElem = cursor.bufferElem; +	this->bufferPos = cursor.bufferPos; +	this->lbState = cursor.lbState; +} + +void BufferedCharReader::ReadCursor::reset() +{ +	this->line = 1; +	this->column = 1; +	this->bufferElem = 0; +	this->bufferPos = 0; +	this->lbState = LB_STATE_NONE; +} + +/******************************************************************************* + * Class BufferedCharReader + ******************************************************************************/ + +BufferedCharReader::BufferedCharReader() : +	readCursor(true), peekCursor(false) +{ +	reset(); +} + +void BufferedCharReader::reset() +{ +	readCursor.reset(); +	peekCursor.reset(); +	buffer.clear(); +	closed = false; +} + +bool BufferedCharReader::feed(const std::string &data) +{ +	// Abort if the BufferedCharReader was closed +	if (closed) { +		return false; +	} + +	// Append the data onto the queue +	buffer.push_back(data); +	return true; +} + +void BufferedCharReader::close() +{ +	closed = true; +} + +bool BufferedCharReader::substituteLinebreaks(ReadCursor *cursor, char *c) +{ +	// Handle line breaks, inserts breakes after the following character +	// combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n +	if ((*c == '\n') || (*c == '\r')) { +		// Determine the type of the current linebreak character +		const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; + +		// Read the last count and the last type from the state +		const uint8_t lastCount = cursor->lbState & LB_STATE_MASK_CNT; +		const uint8_t lastType = cursor->lbState & LB_STATE_MASK_TYPE; + +		// Set the current linebreak type and counter in the state +		cursor->lbState = ((lastCount + 1) & 1) | type; + +		// If either this is the first instance of this character or the same +		// return character is repeated +		if (!lastCount || (lastType == type)) { +			*c = '\n'; +			return true; +		} +		return false; +	} + +	// Find the state +	cursor->lbState = LB_STATE_NONE; +	return true; +} + +bool BufferedCharReader::readCharacterAtCursor(ReadCursor *cursor, +		char *c) +{ +	bool hasChar = false; +	while (!hasChar) { +		// Abort if the current buffer element does not point to a valid entry +		// in the buffer -- we must wait until another data block has been fed +		// into the buffer +		if (cursor->bufferElem >= buffer.size()) { +			return false; +		} + +		// Fetch the current element the peek pointer points to +		const std::string &data = buffer[cursor->bufferElem]; + +		// Handle the "no data" case -- either in a destructive or +		// non-destructive manner. +		if (cursor->bufferPos >= data.length()) { +			if (cursor->destructive) { +				buffer.pop_front(); +			} else { +				cursor->bufferElem++; +			} +			cursor->bufferPos = 0; +			continue; +		} + +		// Read the character, advance the buffer position +		*c = *(data.data() + cursor->bufferPos); +		cursor->bufferPos++; + +		// Substitute linebreaks with a single LF (0x0A) +		hasChar = substituteLinebreaks(cursor, c); +	} + +	// Update the position counter +	if (*c == '\n') { +		cursor->line++; +		cursor->column = 1; +	} else { +		// Ignore UTF-8 continuation bytes +		if (!((*c & 0x80) && !(*c & 0x40))) { +			cursor->column++; +		} +	} + +	return true; +} + +bool BufferedCharReader::peek(char *c) +{ +	return readCharacterAtCursor(&peekCursor, c); +} + +bool BufferedCharReader::read(char *c) +{ +	resetPeek(); +	return readCharacterAtCursor(&readCursor, c); +} + +void BufferedCharReader::consumePeek() +{ +	// Remove all no longer needed buffer elements +	for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { +		buffer.pop_front(); +	} +	peekCursor.bufferElem = 0; + +	// Copy the peek cursor to the read cursor +	readCursor.assign(peekCursor); +} + +void BufferedCharReader::resetPeek() +{ +	// Reset the peek cursor to the read cursor +	peekCursor.assign(readCursor); +} + +bool BufferedCharReader::atEnd() +{ +	if (closed) { +		if (buffer.size() <= 0) { +			return true; +		} else if (buffer.size() == 1) { +			return buffer[0].size() == readCursor.bufferPos; +		} +	} +	return false; +} + +} +} + diff --git a/src/core/utils/BufferedCharReader.hpp b/src/core/utils/BufferedCharReader.hpp new file mode 100644 index 0000000..86f43b5 --- /dev/null +++ b/src/core/utils/BufferedCharReader.hpp @@ -0,0 +1,240 @@ +/* +    SCAENEA IDL Compiler (scidlc) +    Copyright (C) 2014  Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ +#define _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ + +#include <deque> +#include <string> +#include <cstdint> + +namespace ousia { +namespace utils { + +/** + * The BufferedCharReader class is used for storing incomming data that + * is fed into the pipeline as well as reading/peeking single characters + * from that buffer. Additionally it counts the current column/row + * (with correct handling for UTF-8) and contains an internal state + * machine that handles the detection of linebreaks. + * + * Additionally the BufferedCharReader performs the following tasks: + * 1. Convert the incomming character encoding to UTF-8 (TODO: implement) + * 2. Convert arbitrary linebreaks to a single "\n" + */ +class BufferedCharReader { + +private: + +	/** +	 * The ReadCursor structure is responsible for representing the read +	 * position within the text an all state machine states belonging to the +	 * cursor. There are two types of read cursors: destructive and +	 * non-destructive read cursors. +	 */ +	struct ReadCursor { +		/** +		 * Specifies whether this is a destructive cursor (bytes are discarded +		 * once they were read from the buffer). +		 */ +		const bool destructive; + +		/** +		 * The line the cursor currently points to. +		 */ +		unsigned int line; + +		/** +		 * The column the cursor currently points to. +		 */ +		unsigned int column; + +		/** +		 * The index of the element in the data buffer we're currently reading +		 * from. +		 */ +		unsigned int bufferElem; + +		/** +		 * The byte position within this data buffer. +		 */ +		unsigned int bufferPos; + +		/** +		 * State variable used in the internal state machine of the +		 * line feed detection. +		 */ +		uint8_t lbState; + +		/** +		 * Constructor of the ReadCursor structure. +		 * +		 * @param destructive specifies whether the ReadCursor is destructive +		 * (consumes all read characters, as used in the "read cursor") or +		 * non-destructive (as used in the "peek cursor"). +		 */ +		ReadCursor(const bool destructive); + +		/** +		 * Copys the data from another ReadCursor without overriding the +		 * "destructive" flag. +		 */ +		void assign(const ReadCursor &cursor); + +		/** +		 * Resets the cursor without changing the "destructive" flag. +		 */ +		void reset(); +	}; + +	/** +	 * Queue containing the data that has been fed into the char reader. +	 */ +	std::deque<std::string> buffer; + +	/** +	 * The read and the peek cursor.  +	 */ +	ReadCursor readCursor, peekCursor; + +	/** +	 * Determines whether the reader has been closed. +	 */ +	bool closed; + +	/** +	 * Substitute any combination of linebreaks in the incomming code with "\n". +	 * Returns true if the current character is meant as output, false +	 * otherwise. +	 */ +	bool substituteLinebreaks(ReadCursor *cursor, char *c); + +	/** +	 * Reads a character from the input buffer and advances the given read +	 * cursor. +	 * +	 * @param cursor is a reference to the read cursor that should be used +	 * for reading. +	 * @param hasChar is set to true, if a character is available, false if +	 * no character is available (e.g. because line breaks are substituted or +	 * the end of a buffer boundary is reached -- in this case this function +	 * should be called again with the same parameters.) +	 * @param c is a output parameter, which will be set to the read character. +	 * @param returns true if there was enough data in the buffer, false +	 * otherwise. +	 */ +	bool readCharacterAtCursor(ReadCursor *cursor, char *c); + +	/** +	 * Function that is called for each read character -- updates the row and +	 * column count. +	 */ +	void updatePositionCounters(const char c); + +public: + +	/** +	 * Constructor of the buffered char reader class. +	 */ +	BufferedCharReader(); + +	/** +	 * Resets the reader to its initial state. +	 */ +	void reset(); + +	/** +	 * Feeds new data into the internal buffer of the BufferedCharReader +	 * class. +	 * +	 * @param data is a string containing the data that should be +	 * appended to the internal buffer. +	 * @return true if the operation was successful, false otherwise (e.g. +	 * because the reader is closed). +	 */ +	bool feed(const std::string &data); + +	/** +	 * Marks the end of the input, allowing successors in the pipeline +	 * to react properly (e.g. creating the end of stream token). +	 */ +	void close(); + +	/** +	 * Peeks a single character. If called multiple times, returns the +	 * character after the previously peeked character. +	 * +	 * @param c is a reference to the character to which the result should be +	 * writtern. +	 * @return true if the character was successfully read, false if there are +	 * no more characters to be read in the buffer. +	 */ +	bool peek(char *c); + +	/** +	 * Reads a character from the input data. If "peek" was called +	 * beforehand resets the peek pointer. +	 * +	 * @param c is a reference to the character to which the result should be +	 * writtern. +	 * @return true if the character was successfully read, false if there are +	 * no more characters to be read in the buffer. +	 */ +	bool read(char *c); + +	/** +	 * Advances the read pointer to the peek pointer -- so if the "peek" +	 * function was called, "read" will now return the character after +	 * the last peeked character. +	 */ +	void consumePeek(); + +	/** +	 * Resets the peek pointer to the "read" pointer. +	 */ +	void resetPeek(); + +	/** +	 * Returns true if there are no more characters as the stream was +	 * closed. +	 */ +	bool atEnd(); + +	/** +	 * Returns the current line (starting with one). +	 */ +	inline int getLine() +	{ +		return readCursor.line; +	} + +	/** +	 * Returns the current column (starting with one). +	 */ +	inline int getColumn() +	{ +		return readCursor.column; +	} + +}; + +} +} + +#endif /* _OUSISA_UTILS_BUFFERED_CHAR_READER_H_ */ + diff --git a/src/plugins/mozjs/MozJsScriptEngine.cpp b/src/plugins/mozjs/MozJsScriptEngine.cpp index c67a3b4..f269eb7 100644 --- a/src/plugins/mozjs/MozJsScriptEngine.cpp +++ b/src/plugins/mozjs/MozJsScriptEngine.cpp @@ -426,8 +426,10 @@ void MozJsScriptEngineScope::variantToValue(const Variant &var,  			return;  		}  		case VariantType::function: { -			JS::RootedObject f(cx, JS_NewObject(cx, &functionClass, nullptr, nullptr)); -			JS_SetPrivate(f, new MozJsFunctionData(*this, var.getFunctionValue()->clone())); +			JS::RootedObject f( +			    cx, JS_NewObject(cx, &functionClass, nullptr, nullptr)); +			JS_SetPrivate(f, new MozJsFunctionData( +			                     *this, var.getFunctionValue()->clone()));  			JS_FreezeObject(cx, f);  			val.setObjectOrNull(f.get());  			return; @@ -497,8 +499,7 @@ MozJsScriptEngine::~MozJsScriptEngine()  	JS_ShutDown();  } -MozJsScriptEngineScope *MozJsScriptEngine::createScope() -{ +MozJsScriptEngineScope *MozJsScriptEngine::createScope() {  	return new MozJsScriptEngineScope(rt);  }  } diff --git a/test/core/utils/BufferedCharReader.cpp b/test/core/utils/BufferedCharReader.cpp new file mode 100644 index 0000000..f8f668c --- /dev/null +++ b/test/core/utils/BufferedCharReader.cpp @@ -0,0 +1,198 @@ +/* +    SCAENEA IDL Compiler (scidlc) +    Copyright (C) 2014  Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <string> +#include <iostream> + +#include "gtest/gtest.h" + +#include "BufferedCharReader.hpp" + +TEST(BufferedCharReaderTest, SimpleReadTest) +{ +	const std::string testStr("this is a test"); +	char c; + +	// Feed a test string into the reader +	scaenea::compiler::BufferedCharReader reader; +	reader.feed(testStr); +	reader.close(); + +	// Try to read the test string +	std::string res; +	while (!reader.atEnd()) { +		ASSERT_TRUE(reader.read(&c)); +		res.append(&c, 1); +	} + +	// The two strings must equal +	ASSERT_STREQ(testStr.c_str(), res.c_str()) ; + +	// We must now be at line 1, column 15 +	ASSERT_EQ(1, reader.getLine()); +	ASSERT_EQ(testStr.size() + 1, reader.getColumn()); + +	// If we call either read or peek, false is returned +	ASSERT_FALSE(reader.read(&c)); +	ASSERT_FALSE(reader.peek(&c)); +} + +TEST(BufferedCharReaderTest, SimplePeekTest) +{ +	const std::string testStr("this is a test"); +	char c; + +	// Feed a test string into the reader +	scaenea::compiler::BufferedCharReader reader; +	reader.feed(testStr); +	reader.close(); + +	// Try to read the test string +	std::string res; +	while (reader.peek(&c)) { +		res.append(&c, 1); +	} + +	// Peeking does not trigger the "atEnd" flag +	ASSERT_FALSE(reader.atEnd()); + +	// The two strings must equal +	ASSERT_STREQ(testStr.c_str(), res.c_str()); + +	// We must now be at line 1, column 1 and NOT at the end of the stream +	ASSERT_EQ(1, reader.getLine()); +	ASSERT_EQ(1, reader.getColumn()); +	ASSERT_FALSE(reader.atEnd()); + +	// If we consume the peek, we must be at line 1, column 15 and we should be +	// at the end of the stream +	reader.consumePeek(); +	ASSERT_EQ(1, reader.getLine()); +	ASSERT_EQ(testStr.size() + 1, reader.getColumn()); +	ASSERT_TRUE(reader.atEnd()); + +	// If we call either read or peek, false is returned +	ASSERT_FALSE(reader.read(&c)); +	ASSERT_FALSE(reader.peek(&c)); +} + +TEST(BufferedCharReaderTest, SplittedPeakTest) +{ +	const std::string testStr("this is a test"); +	char c; + +	// Feed a test string into the reader +	scaenea::compiler::BufferedCharReader reader; + +	// Try to peek the test string, feed char after char into the reader +	std::string res; +	for (unsigned int i = 0; i < testStr.length(); i++) { +		reader.feed(std::string(&testStr[i], 1)); +		while (reader.peek(&c)) { +			res.append(&c, 1); +		} +	} +	reader.close(); + +	// Consume the peeked data +	ASSERT_FALSE(reader.atEnd()); +	reader.consumePeek(); +	ASSERT_TRUE(reader.atEnd()); + +	// The two strings must equal +	ASSERT_STREQ(testStr.c_str(), res.c_str()) ; + +	// We must now be at line 1, column 15 +	ASSERT_EQ(1, reader.getLine()); +	ASSERT_EQ(testStr.size() + 1, reader.getColumn()); + +	// If we call either read or peek, false is returned +	ASSERT_FALSE(reader.read(&c)); +	ASSERT_FALSE(reader.peek(&c)); +} + +TEST(BufferedCharReaderTest, RowColumnCounterTest) +{ +	const std::string testStr("1\n\r2\n3\r\n\n4"); +	char c; + +	// Feed a test string into the reader +	scaenea::compiler::BufferedCharReader reader; +	reader.feed(testStr); +	reader.close(); + +	// We should currently be in line 1, column 1 +	ASSERT_EQ(1, reader.getLine()); +	ASSERT_EQ(1, reader.getColumn()); + +	// Read two characters +	for (int i = 0; i < 2; i++) reader.read(&c); +	ASSERT_EQ(2, reader.getLine()); +	ASSERT_EQ(1, reader.getColumn()); + +	// Read two characters +	for (int i = 0; i < 2; i++) reader.read(&c); +	ASSERT_EQ(3, reader.getLine()); +	ASSERT_EQ(1, reader.getColumn()); + +	// Read three characters +	for (int i = 0; i < 3; i++) reader.read(&c); +	ASSERT_EQ(5, reader.getLine()); +	ASSERT_EQ(1, reader.getColumn()); +} + +TEST(BufferedCharReaderTest, LinebreakSubstitutionTest) +{ +	const std::string testStr("this\n\ris\n\rjust\na test\r\n\rtest\n\r"); +	const std::string expStr("this\nis\njust\na test\n\ntest\n"); + +	// Feed a test string into the reader +	scaenea::compiler::BufferedCharReader reader; +	reader.feed(testStr); + +	// Read all characters from the test string +	std::string res; +	char c; +	while (reader.read(&c)) { +		res.append(&c, 1); +	} + +	// Test for equality +	ASSERT_STREQ(expStr.c_str(), res.c_str()); +} + +TEST(BufferedCharReaderTest, RowColumnCounterUTF8Test) +{ +	// Create a test string with some umlauts +	const std::string testStr("\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f"); +	char c; + +	// Feed a test string into the reader +	scaenea::compiler::BufferedCharReader reader; +	reader.feed(testStr); +	reader.close(); + +	// Read all bytes +	while (reader.read(&c)); + +	// The sequence above equals 5 UTF-8 characters (so after reading all the +	// cursor is at position 6) +	ASSERT_EQ(1, reader.getLine()); +	ASSERT_EQ(6, reader.getColumn()); +} +  | 
