summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/utils/BufferedCharReader.cpp216
-rw-r--r--src/core/utils/BufferedCharReader.hpp240
-rw-r--r--src/plugins/mozjs/MozJsScriptEngine.cpp9
-rw-r--r--test/core/utils/BufferedCharReader.cpp198
4 files changed, 659 insertions, 4 deletions
diff --git a/src/core/utils/BufferedCharReader.cpp b/src/core/utils/BufferedCharReader.cpp
new file mode 100644
index 0000000..0377015
--- /dev/null
+++ b/src/core/utils/BufferedCharReader.cpp
@@ -0,0 +1,216 @@
+/*
+ SCAENEA IDL Compiler (scidlc)
+ Copyright (C) 2014 Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "BufferedCharReader.hpp"
+
+namespace ousia {
+namespace utils {
+
+// Constants used within the linebreak statemachine.
+static const uint8_t LB_STATE_NONE = 0x00;
+static const uint8_t LB_STATE_ONE = 0x01;
+static const uint8_t LB_STATE_LF = 0x10;
+static const uint8_t LB_STATE_CR = 0x20;
+static const uint8_t LB_STATE_MASK_CNT = 0x0F;
+static const uint8_t LB_STATE_MASK_TYPE = 0xF0;
+
+/*******************************************************************************
+ * Struct BufferedCharReader::ReadCursor
+ ******************************************************************************/
+
+BufferedCharReader::ReadCursor::ReadCursor(const bool destructive) :
+ destructive(destructive)
+{
+ reset();
+}
+
+void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor)
+{
+ this->line = cursor.line;
+ this->column = cursor.column;
+ this->bufferElem = cursor.bufferElem;
+ this->bufferPos = cursor.bufferPos;
+ this->lbState = cursor.lbState;
+}
+
+void BufferedCharReader::ReadCursor::reset()
+{
+ this->line = 1;
+ this->column = 1;
+ this->bufferElem = 0;
+ this->bufferPos = 0;
+ this->lbState = LB_STATE_NONE;
+}
+
+/*******************************************************************************
+ * Class BufferedCharReader
+ ******************************************************************************/
+
+BufferedCharReader::BufferedCharReader() :
+ readCursor(true), peekCursor(false)
+{
+ reset();
+}
+
+void BufferedCharReader::reset()
+{
+ readCursor.reset();
+ peekCursor.reset();
+ buffer.clear();
+ closed = false;
+}
+
+bool BufferedCharReader::feed(const std::string &data)
+{
+ // Abort if the BufferedCharReader was closed
+ if (closed) {
+ return false;
+ }
+
+ // Append the data onto the queue
+ buffer.push_back(data);
+ return true;
+}
+
+void BufferedCharReader::close()
+{
+ closed = true;
+}
+
+bool BufferedCharReader::substituteLinebreaks(ReadCursor *cursor, char *c)
+{
+ // Handle line breaks, inserts breakes after the following character
+ // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n
+ if ((*c == '\n') || (*c == '\r')) {
+ // Determine the type of the current linebreak character
+ const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR;
+
+ // Read the last count and the last type from the state
+ const uint8_t lastCount = cursor->lbState & LB_STATE_MASK_CNT;
+ const uint8_t lastType = cursor->lbState & LB_STATE_MASK_TYPE;
+
+ // Set the current linebreak type and counter in the state
+ cursor->lbState = ((lastCount + 1) & 1) | type;
+
+ // If either this is the first instance of this character or the same
+ // return character is repeated
+ if (!lastCount || (lastType == type)) {
+ *c = '\n';
+ return true;
+ }
+ return false;
+ }
+
+ // Find the state
+ cursor->lbState = LB_STATE_NONE;
+ return true;
+}
+
+bool BufferedCharReader::readCharacterAtCursor(ReadCursor *cursor,
+ char *c)
+{
+ bool hasChar = false;
+ while (!hasChar) {
+ // Abort if the current buffer element does not point to a valid entry
+ // in the buffer -- we must wait until another data block has been fed
+ // into the buffer
+ if (cursor->bufferElem >= buffer.size()) {
+ return false;
+ }
+
+ // Fetch the current element the peek pointer points to
+ const std::string &data = buffer[cursor->bufferElem];
+
+ // Handle the "no data" case -- either in a destructive or
+ // non-destructive manner.
+ if (cursor->bufferPos >= data.length()) {
+ if (cursor->destructive) {
+ buffer.pop_front();
+ } else {
+ cursor->bufferElem++;
+ }
+ cursor->bufferPos = 0;
+ continue;
+ }
+
+ // Read the character, advance the buffer position
+ *c = *(data.data() + cursor->bufferPos);
+ cursor->bufferPos++;
+
+ // Substitute linebreaks with a single LF (0x0A)
+ hasChar = substituteLinebreaks(cursor, c);
+ }
+
+ // Update the position counter
+ if (*c == '\n') {
+ cursor->line++;
+ cursor->column = 1;
+ } else {
+ // Ignore UTF-8 continuation bytes
+ if (!((*c & 0x80) && !(*c & 0x40))) {
+ cursor->column++;
+ }
+ }
+
+ return true;
+}
+
+bool BufferedCharReader::peek(char *c)
+{
+ return readCharacterAtCursor(&peekCursor, c);
+}
+
+bool BufferedCharReader::read(char *c)
+{
+ resetPeek();
+ return readCharacterAtCursor(&readCursor, c);
+}
+
+void BufferedCharReader::consumePeek()
+{
+ // Remove all no longer needed buffer elements
+ for (unsigned int i = 0; i < peekCursor.bufferElem; i++) {
+ buffer.pop_front();
+ }
+ peekCursor.bufferElem = 0;
+
+ // Copy the peek cursor to the read cursor
+ readCursor.assign(peekCursor);
+}
+
+void BufferedCharReader::resetPeek()
+{
+ // Reset the peek cursor to the read cursor
+ peekCursor.assign(readCursor);
+}
+
+bool BufferedCharReader::atEnd()
+{
+ if (closed) {
+ if (buffer.size() <= 0) {
+ return true;
+ } else if (buffer.size() == 1) {
+ return buffer[0].size() == readCursor.bufferPos;
+ }
+ }
+ return false;
+}
+
+}
+}
+
diff --git a/src/core/utils/BufferedCharReader.hpp b/src/core/utils/BufferedCharReader.hpp
new file mode 100644
index 0000000..86f43b5
--- /dev/null
+++ b/src/core/utils/BufferedCharReader.hpp
@@ -0,0 +1,240 @@
+/*
+ SCAENEA IDL Compiler (scidlc)
+ Copyright (C) 2014 Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _OUSIA_UTILS_BUFFERED_CHAR_READER_H_
+#define _OUSIA_UTILS_BUFFERED_CHAR_READER_H_
+
+#include <deque>
+#include <string>
+#include <cstdint>
+
+namespace ousia {
+namespace utils {
+
+/**
+ * The BufferedCharReader class is used for storing incomming data that
+ * is fed into the pipeline as well as reading/peeking single characters
+ * from that buffer. Additionally it counts the current column/row
+ * (with correct handling for UTF-8) and contains an internal state
+ * machine that handles the detection of linebreaks.
+ *
+ * Additionally the BufferedCharReader performs the following tasks:
+ * 1. Convert the incomming character encoding to UTF-8 (TODO: implement)
+ * 2. Convert arbitrary linebreaks to a single "\n"
+ */
+class BufferedCharReader {
+
+private:
+
+ /**
+ * The ReadCursor structure is responsible for representing the read
+ * position within the text an all state machine states belonging to the
+ * cursor. There are two types of read cursors: destructive and
+ * non-destructive read cursors.
+ */
+ struct ReadCursor {
+ /**
+ * Specifies whether this is a destructive cursor (bytes are discarded
+ * once they were read from the buffer).
+ */
+ const bool destructive;
+
+ /**
+ * The line the cursor currently points to.
+ */
+ unsigned int line;
+
+ /**
+ * The column the cursor currently points to.
+ */
+ unsigned int column;
+
+ /**
+ * The index of the element in the data buffer we're currently reading
+ * from.
+ */
+ unsigned int bufferElem;
+
+ /**
+ * The byte position within this data buffer.
+ */
+ unsigned int bufferPos;
+
+ /**
+ * State variable used in the internal state machine of the
+ * line feed detection.
+ */
+ uint8_t lbState;
+
+ /**
+ * Constructor of the ReadCursor structure.
+ *
+ * @param destructive specifies whether the ReadCursor is destructive
+ * (consumes all read characters, as used in the "read cursor") or
+ * non-destructive (as used in the "peek cursor").
+ */
+ ReadCursor(const bool destructive);
+
+ /**
+ * Copys the data from another ReadCursor without overriding the
+ * "destructive" flag.
+ */
+ void assign(const ReadCursor &cursor);
+
+ /**
+ * Resets the cursor without changing the "destructive" flag.
+ */
+ void reset();
+ };
+
+ /**
+ * Queue containing the data that has been fed into the char reader.
+ */
+ std::deque<std::string> buffer;
+
+ /**
+ * The read and the peek cursor.
+ */
+ ReadCursor readCursor, peekCursor;
+
+ /**
+ * Determines whether the reader has been closed.
+ */
+ bool closed;
+
+ /**
+ * Substitute any combination of linebreaks in the incomming code with "\n".
+ * Returns true if the current character is meant as output, false
+ * otherwise.
+ */
+ bool substituteLinebreaks(ReadCursor *cursor, char *c);
+
+ /**
+ * Reads a character from the input buffer and advances the given read
+ * cursor.
+ *
+ * @param cursor is a reference to the read cursor that should be used
+ * for reading.
+ * @param hasChar is set to true, if a character is available, false if
+ * no character is available (e.g. because line breaks are substituted or
+ * the end of a buffer boundary is reached -- in this case this function
+ * should be called again with the same parameters.)
+ * @param c is a output parameter, which will be set to the read character.
+ * @param returns true if there was enough data in the buffer, false
+ * otherwise.
+ */
+ bool readCharacterAtCursor(ReadCursor *cursor, char *c);
+
+ /**
+ * Function that is called for each read character -- updates the row and
+ * column count.
+ */
+ void updatePositionCounters(const char c);
+
+public:
+
+ /**
+ * Constructor of the buffered char reader class.
+ */
+ BufferedCharReader();
+
+ /**
+ * Resets the reader to its initial state.
+ */
+ void reset();
+
+ /**
+ * Feeds new data into the internal buffer of the BufferedCharReader
+ * class.
+ *
+ * @param data is a string containing the data that should be
+ * appended to the internal buffer.
+ * @return true if the operation was successful, false otherwise (e.g.
+ * because the reader is closed).
+ */
+ bool feed(const std::string &data);
+
+ /**
+ * Marks the end of the input, allowing successors in the pipeline
+ * to react properly (e.g. creating the end of stream token).
+ */
+ void close();
+
+ /**
+ * Peeks a single character. If called multiple times, returns the
+ * character after the previously peeked character.
+ *
+ * @param c is a reference to the character to which the result should be
+ * writtern.
+ * @return true if the character was successfully read, false if there are
+ * no more characters to be read in the buffer.
+ */
+ bool peek(char *c);
+
+ /**
+ * Reads a character from the input data. If "peek" was called
+ * beforehand resets the peek pointer.
+ *
+ * @param c is a reference to the character to which the result should be
+ * writtern.
+ * @return true if the character was successfully read, false if there are
+ * no more characters to be read in the buffer.
+ */
+ bool read(char *c);
+
+ /**
+ * Advances the read pointer to the peek pointer -- so if the "peek"
+ * function was called, "read" will now return the character after
+ * the last peeked character.
+ */
+ void consumePeek();
+
+ /**
+ * Resets the peek pointer to the "read" pointer.
+ */
+ void resetPeek();
+
+ /**
+ * Returns true if there are no more characters as the stream was
+ * closed.
+ */
+ bool atEnd();
+
+ /**
+ * Returns the current line (starting with one).
+ */
+ inline int getLine()
+ {
+ return readCursor.line;
+ }
+
+ /**
+ * Returns the current column (starting with one).
+ */
+ inline int getColumn()
+ {
+ return readCursor.column;
+ }
+
+};
+
+}
+}
+
+#endif /* _OUSISA_UTILS_BUFFERED_CHAR_READER_H_ */
+
diff --git a/src/plugins/mozjs/MozJsScriptEngine.cpp b/src/plugins/mozjs/MozJsScriptEngine.cpp
index c67a3b4..f269eb7 100644
--- a/src/plugins/mozjs/MozJsScriptEngine.cpp
+++ b/src/plugins/mozjs/MozJsScriptEngine.cpp
@@ -426,8 +426,10 @@ void MozJsScriptEngineScope::variantToValue(const Variant &var,
return;
}
case VariantType::function: {
- JS::RootedObject f(cx, JS_NewObject(cx, &functionClass, nullptr, nullptr));
- JS_SetPrivate(f, new MozJsFunctionData(*this, var.getFunctionValue()->clone()));
+ JS::RootedObject f(
+ cx, JS_NewObject(cx, &functionClass, nullptr, nullptr));
+ JS_SetPrivate(f, new MozJsFunctionData(
+ *this, var.getFunctionValue()->clone()));
JS_FreezeObject(cx, f);
val.setObjectOrNull(f.get());
return;
@@ -497,8 +499,7 @@ MozJsScriptEngine::~MozJsScriptEngine()
JS_ShutDown();
}
-MozJsScriptEngineScope *MozJsScriptEngine::createScope()
-{
+MozJsScriptEngineScope *MozJsScriptEngine::createScope() {
return new MozJsScriptEngineScope(rt);
}
}
diff --git a/test/core/utils/BufferedCharReader.cpp b/test/core/utils/BufferedCharReader.cpp
new file mode 100644
index 0000000..f8f668c
--- /dev/null
+++ b/test/core/utils/BufferedCharReader.cpp
@@ -0,0 +1,198 @@
+/*
+ SCAENEA IDL Compiler (scidlc)
+ Copyright (C) 2014 Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <string>
+#include <iostream>
+
+#include "gtest/gtest.h"
+
+#include "BufferedCharReader.hpp"
+
+TEST(BufferedCharReaderTest, SimpleReadTest)
+{
+ const std::string testStr("this is a test");
+ char c;
+
+ // Feed a test string into the reader
+ scaenea::compiler::BufferedCharReader reader;
+ reader.feed(testStr);
+ reader.close();
+
+ // Try to read the test string
+ std::string res;
+ while (!reader.atEnd()) {
+ ASSERT_TRUE(reader.read(&c));
+ res.append(&c, 1);
+ }
+
+ // The two strings must equal
+ ASSERT_STREQ(testStr.c_str(), res.c_str()) ;
+
+ // We must now be at line 1, column 15
+ ASSERT_EQ(1, reader.getLine());
+ ASSERT_EQ(testStr.size() + 1, reader.getColumn());
+
+ // If we call either read or peek, false is returned
+ ASSERT_FALSE(reader.read(&c));
+ ASSERT_FALSE(reader.peek(&c));
+}
+
+TEST(BufferedCharReaderTest, SimplePeekTest)
+{
+ const std::string testStr("this is a test");
+ char c;
+
+ // Feed a test string into the reader
+ scaenea::compiler::BufferedCharReader reader;
+ reader.feed(testStr);
+ reader.close();
+
+ // Try to read the test string
+ std::string res;
+ while (reader.peek(&c)) {
+ res.append(&c, 1);
+ }
+
+ // Peeking does not trigger the "atEnd" flag
+ ASSERT_FALSE(reader.atEnd());
+
+ // The two strings must equal
+ ASSERT_STREQ(testStr.c_str(), res.c_str());
+
+ // We must now be at line 1, column 1 and NOT at the end of the stream
+ ASSERT_EQ(1, reader.getLine());
+ ASSERT_EQ(1, reader.getColumn());
+ ASSERT_FALSE(reader.atEnd());
+
+ // If we consume the peek, we must be at line 1, column 15 and we should be
+ // at the end of the stream
+ reader.consumePeek();
+ ASSERT_EQ(1, reader.getLine());
+ ASSERT_EQ(testStr.size() + 1, reader.getColumn());
+ ASSERT_TRUE(reader.atEnd());
+
+ // If we call either read or peek, false is returned
+ ASSERT_FALSE(reader.read(&c));
+ ASSERT_FALSE(reader.peek(&c));
+}
+
+TEST(BufferedCharReaderTest, SplittedPeakTest)
+{
+ const std::string testStr("this is a test");
+ char c;
+
+ // Feed a test string into the reader
+ scaenea::compiler::BufferedCharReader reader;
+
+ // Try to peek the test string, feed char after char into the reader
+ std::string res;
+ for (unsigned int i = 0; i < testStr.length(); i++) {
+ reader.feed(std::string(&testStr[i], 1));
+ while (reader.peek(&c)) {
+ res.append(&c, 1);
+ }
+ }
+ reader.close();
+
+ // Consume the peeked data
+ ASSERT_FALSE(reader.atEnd());
+ reader.consumePeek();
+ ASSERT_TRUE(reader.atEnd());
+
+ // The two strings must equal
+ ASSERT_STREQ(testStr.c_str(), res.c_str()) ;
+
+ // We must now be at line 1, column 15
+ ASSERT_EQ(1, reader.getLine());
+ ASSERT_EQ(testStr.size() + 1, reader.getColumn());
+
+ // If we call either read or peek, false is returned
+ ASSERT_FALSE(reader.read(&c));
+ ASSERT_FALSE(reader.peek(&c));
+}
+
+TEST(BufferedCharReaderTest, RowColumnCounterTest)
+{
+ const std::string testStr("1\n\r2\n3\r\n\n4");
+ char c;
+
+ // Feed a test string into the reader
+ scaenea::compiler::BufferedCharReader reader;
+ reader.feed(testStr);
+ reader.close();
+
+ // We should currently be in line 1, column 1
+ ASSERT_EQ(1, reader.getLine());
+ ASSERT_EQ(1, reader.getColumn());
+
+ // Read two characters
+ for (int i = 0; i < 2; i++) reader.read(&c);
+ ASSERT_EQ(2, reader.getLine());
+ ASSERT_EQ(1, reader.getColumn());
+
+ // Read two characters
+ for (int i = 0; i < 2; i++) reader.read(&c);
+ ASSERT_EQ(3, reader.getLine());
+ ASSERT_EQ(1, reader.getColumn());
+
+ // Read three characters
+ for (int i = 0; i < 3; i++) reader.read(&c);
+ ASSERT_EQ(5, reader.getLine());
+ ASSERT_EQ(1, reader.getColumn());
+}
+
+TEST(BufferedCharReaderTest, LinebreakSubstitutionTest)
+{
+ const std::string testStr("this\n\ris\n\rjust\na test\r\n\rtest\n\r");
+ const std::string expStr("this\nis\njust\na test\n\ntest\n");
+
+ // Feed a test string into the reader
+ scaenea::compiler::BufferedCharReader reader;
+ reader.feed(testStr);
+
+ // Read all characters from the test string
+ std::string res;
+ char c;
+ while (reader.read(&c)) {
+ res.append(&c, 1);
+ }
+
+ // Test for equality
+ ASSERT_STREQ(expStr.c_str(), res.c_str());
+}
+
+TEST(BufferedCharReaderTest, RowColumnCounterUTF8Test)
+{
+ // Create a test string with some umlauts
+ const std::string testStr("\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f");
+ char c;
+
+ // Feed a test string into the reader
+ scaenea::compiler::BufferedCharReader reader;
+ reader.feed(testStr);
+ reader.close();
+
+ // Read all bytes
+ while (reader.read(&c));
+
+ // The sequence above equals 5 UTF-8 characters (so after reading all the
+ // cursor is at position 6)
+ ASSERT_EQ(1, reader.getLine());
+ ASSERT_EQ(6, reader.getColumn());
+}
+