diff options
-rw-r--r-- | src/core/utils/BufferedCharReader.cpp | 216 | ||||
-rw-r--r-- | src/core/utils/BufferedCharReader.hpp | 240 | ||||
-rw-r--r-- | src/plugins/mozjs/MozJsScriptEngine.cpp | 9 | ||||
-rw-r--r-- | test/core/utils/BufferedCharReader.cpp | 198 |
4 files changed, 659 insertions, 4 deletions
diff --git a/src/core/utils/BufferedCharReader.cpp b/src/core/utils/BufferedCharReader.cpp new file mode 100644 index 0000000..0377015 --- /dev/null +++ b/src/core/utils/BufferedCharReader.cpp @@ -0,0 +1,216 @@ +/* + SCAENEA IDL Compiler (scidlc) + Copyright (C) 2014 Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "BufferedCharReader.hpp" + +namespace ousia { +namespace utils { + +// Constants used within the linebreak statemachine. +static const uint8_t LB_STATE_NONE = 0x00; +static const uint8_t LB_STATE_ONE = 0x01; +static const uint8_t LB_STATE_LF = 0x10; +static const uint8_t LB_STATE_CR = 0x20; +static const uint8_t LB_STATE_MASK_CNT = 0x0F; +static const uint8_t LB_STATE_MASK_TYPE = 0xF0; + +/******************************************************************************* + * Struct BufferedCharReader::ReadCursor + ******************************************************************************/ + +BufferedCharReader::ReadCursor::ReadCursor(const bool destructive) : + destructive(destructive) +{ + reset(); +} + +void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) +{ + this->line = cursor.line; + this->column = cursor.column; + this->bufferElem = cursor.bufferElem; + this->bufferPos = cursor.bufferPos; + this->lbState = cursor.lbState; +} + +void BufferedCharReader::ReadCursor::reset() +{ + this->line = 1; + this->column = 1; + this->bufferElem = 0; + this->bufferPos = 0; + this->lbState = LB_STATE_NONE; +} + +/******************************************************************************* + * Class BufferedCharReader + ******************************************************************************/ + +BufferedCharReader::BufferedCharReader() : + readCursor(true), peekCursor(false) +{ + reset(); +} + +void BufferedCharReader::reset() +{ + readCursor.reset(); + peekCursor.reset(); + buffer.clear(); + closed = false; +} + +bool BufferedCharReader::feed(const std::string &data) +{ + // Abort if the BufferedCharReader was closed + if (closed) { + return false; + } + + // Append the data onto the queue + buffer.push_back(data); + return true; +} + +void BufferedCharReader::close() +{ + closed = true; +} + +bool BufferedCharReader::substituteLinebreaks(ReadCursor *cursor, char *c) +{ + // Handle line breaks, inserts breakes after the following character + // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n + if ((*c == '\n') || (*c == '\r')) { + // Determine the type of the current linebreak character + const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; + + // Read the last count and the last type from the state + const uint8_t lastCount = cursor->lbState & LB_STATE_MASK_CNT; + const uint8_t lastType = cursor->lbState & LB_STATE_MASK_TYPE; + + // Set the current linebreak type and counter in the state + cursor->lbState = ((lastCount + 1) & 1) | type; + + // If either this is the first instance of this character or the same + // return character is repeated + if (!lastCount || (lastType == type)) { + *c = '\n'; + return true; + } + return false; + } + + // Find the state + cursor->lbState = LB_STATE_NONE; + return true; +} + +bool BufferedCharReader::readCharacterAtCursor(ReadCursor *cursor, + char *c) +{ + bool hasChar = false; + while (!hasChar) { + // Abort if the current buffer element does not point to a valid entry + // in the buffer -- we must wait until another data block has been fed + // into the buffer + if (cursor->bufferElem >= buffer.size()) { + return false; + } + + // Fetch the current element the peek pointer points to + const std::string &data = buffer[cursor->bufferElem]; + + // Handle the "no data" case -- either in a destructive or + // non-destructive manner. + if (cursor->bufferPos >= data.length()) { + if (cursor->destructive) { + buffer.pop_front(); + } else { + cursor->bufferElem++; + } + cursor->bufferPos = 0; + continue; + } + + // Read the character, advance the buffer position + *c = *(data.data() + cursor->bufferPos); + cursor->bufferPos++; + + // Substitute linebreaks with a single LF (0x0A) + hasChar = substituteLinebreaks(cursor, c); + } + + // Update the position counter + if (*c == '\n') { + cursor->line++; + cursor->column = 1; + } else { + // Ignore UTF-8 continuation bytes + if (!((*c & 0x80) && !(*c & 0x40))) { + cursor->column++; + } + } + + return true; +} + +bool BufferedCharReader::peek(char *c) +{ + return readCharacterAtCursor(&peekCursor, c); +} + +bool BufferedCharReader::read(char *c) +{ + resetPeek(); + return readCharacterAtCursor(&readCursor, c); +} + +void BufferedCharReader::consumePeek() +{ + // Remove all no longer needed buffer elements + for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { + buffer.pop_front(); + } + peekCursor.bufferElem = 0; + + // Copy the peek cursor to the read cursor + readCursor.assign(peekCursor); +} + +void BufferedCharReader::resetPeek() +{ + // Reset the peek cursor to the read cursor + peekCursor.assign(readCursor); +} + +bool BufferedCharReader::atEnd() +{ + if (closed) { + if (buffer.size() <= 0) { + return true; + } else if (buffer.size() == 1) { + return buffer[0].size() == readCursor.bufferPos; + } + } + return false; +} + +} +} + diff --git a/src/core/utils/BufferedCharReader.hpp b/src/core/utils/BufferedCharReader.hpp new file mode 100644 index 0000000..86f43b5 --- /dev/null +++ b/src/core/utils/BufferedCharReader.hpp @@ -0,0 +1,240 @@ +/* + SCAENEA IDL Compiler (scidlc) + Copyright (C) 2014 Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ +#define _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ + +#include <deque> +#include <string> +#include <cstdint> + +namespace ousia { +namespace utils { + +/** + * The BufferedCharReader class is used for storing incomming data that + * is fed into the pipeline as well as reading/peeking single characters + * from that buffer. Additionally it counts the current column/row + * (with correct handling for UTF-8) and contains an internal state + * machine that handles the detection of linebreaks. + * + * Additionally the BufferedCharReader performs the following tasks: + * 1. Convert the incomming character encoding to UTF-8 (TODO: implement) + * 2. Convert arbitrary linebreaks to a single "\n" + */ +class BufferedCharReader { + +private: + + /** + * The ReadCursor structure is responsible for representing the read + * position within the text an all state machine states belonging to the + * cursor. There are two types of read cursors: destructive and + * non-destructive read cursors. + */ + struct ReadCursor { + /** + * Specifies whether this is a destructive cursor (bytes are discarded + * once they were read from the buffer). + */ + const bool destructive; + + /** + * The line the cursor currently points to. + */ + unsigned int line; + + /** + * The column the cursor currently points to. + */ + unsigned int column; + + /** + * The index of the element in the data buffer we're currently reading + * from. + */ + unsigned int bufferElem; + + /** + * The byte position within this data buffer. + */ + unsigned int bufferPos; + + /** + * State variable used in the internal state machine of the + * line feed detection. + */ + uint8_t lbState; + + /** + * Constructor of the ReadCursor structure. + * + * @param destructive specifies whether the ReadCursor is destructive + * (consumes all read characters, as used in the "read cursor") or + * non-destructive (as used in the "peek cursor"). + */ + ReadCursor(const bool destructive); + + /** + * Copys the data from another ReadCursor without overriding the + * "destructive" flag. + */ + void assign(const ReadCursor &cursor); + + /** + * Resets the cursor without changing the "destructive" flag. + */ + void reset(); + }; + + /** + * Queue containing the data that has been fed into the char reader. + */ + std::deque<std::string> buffer; + + /** + * The read and the peek cursor. + */ + ReadCursor readCursor, peekCursor; + + /** + * Determines whether the reader has been closed. + */ + bool closed; + + /** + * Substitute any combination of linebreaks in the incomming code with "\n". + * Returns true if the current character is meant as output, false + * otherwise. + */ + bool substituteLinebreaks(ReadCursor *cursor, char *c); + + /** + * Reads a character from the input buffer and advances the given read + * cursor. + * + * @param cursor is a reference to the read cursor that should be used + * for reading. + * @param hasChar is set to true, if a character is available, false if + * no character is available (e.g. because line breaks are substituted or + * the end of a buffer boundary is reached -- in this case this function + * should be called again with the same parameters.) + * @param c is a output parameter, which will be set to the read character. + * @param returns true if there was enough data in the buffer, false + * otherwise. + */ + bool readCharacterAtCursor(ReadCursor *cursor, char *c); + + /** + * Function that is called for each read character -- updates the row and + * column count. + */ + void updatePositionCounters(const char c); + +public: + + /** + * Constructor of the buffered char reader class. + */ + BufferedCharReader(); + + /** + * Resets the reader to its initial state. + */ + void reset(); + + /** + * Feeds new data into the internal buffer of the BufferedCharReader + * class. + * + * @param data is a string containing the data that should be + * appended to the internal buffer. + * @return true if the operation was successful, false otherwise (e.g. + * because the reader is closed). + */ + bool feed(const std::string &data); + + /** + * Marks the end of the input, allowing successors in the pipeline + * to react properly (e.g. creating the end of stream token). + */ + void close(); + + /** + * Peeks a single character. If called multiple times, returns the + * character after the previously peeked character. + * + * @param c is a reference to the character to which the result should be + * writtern. + * @return true if the character was successfully read, false if there are + * no more characters to be read in the buffer. + */ + bool peek(char *c); + + /** + * Reads a character from the input data. If "peek" was called + * beforehand resets the peek pointer. + * + * @param c is a reference to the character to which the result should be + * writtern. + * @return true if the character was successfully read, false if there are + * no more characters to be read in the buffer. + */ + bool read(char *c); + + /** + * Advances the read pointer to the peek pointer -- so if the "peek" + * function was called, "read" will now return the character after + * the last peeked character. + */ + void consumePeek(); + + /** + * Resets the peek pointer to the "read" pointer. + */ + void resetPeek(); + + /** + * Returns true if there are no more characters as the stream was + * closed. + */ + bool atEnd(); + + /** + * Returns the current line (starting with one). + */ + inline int getLine() + { + return readCursor.line; + } + + /** + * Returns the current column (starting with one). + */ + inline int getColumn() + { + return readCursor.column; + } + +}; + +} +} + +#endif /* _OUSISA_UTILS_BUFFERED_CHAR_READER_H_ */ + diff --git a/src/plugins/mozjs/MozJsScriptEngine.cpp b/src/plugins/mozjs/MozJsScriptEngine.cpp index c67a3b4..f269eb7 100644 --- a/src/plugins/mozjs/MozJsScriptEngine.cpp +++ b/src/plugins/mozjs/MozJsScriptEngine.cpp @@ -426,8 +426,10 @@ void MozJsScriptEngineScope::variantToValue(const Variant &var, return; } case VariantType::function: { - JS::RootedObject f(cx, JS_NewObject(cx, &functionClass, nullptr, nullptr)); - JS_SetPrivate(f, new MozJsFunctionData(*this, var.getFunctionValue()->clone())); + JS::RootedObject f( + cx, JS_NewObject(cx, &functionClass, nullptr, nullptr)); + JS_SetPrivate(f, new MozJsFunctionData( + *this, var.getFunctionValue()->clone())); JS_FreezeObject(cx, f); val.setObjectOrNull(f.get()); return; @@ -497,8 +499,7 @@ MozJsScriptEngine::~MozJsScriptEngine() JS_ShutDown(); } -MozJsScriptEngineScope *MozJsScriptEngine::createScope() -{ +MozJsScriptEngineScope *MozJsScriptEngine::createScope() { return new MozJsScriptEngineScope(rt); } } diff --git a/test/core/utils/BufferedCharReader.cpp b/test/core/utils/BufferedCharReader.cpp new file mode 100644 index 0000000..f8f668c --- /dev/null +++ b/test/core/utils/BufferedCharReader.cpp @@ -0,0 +1,198 @@ +/* + SCAENEA IDL Compiler (scidlc) + Copyright (C) 2014 Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <string> +#include <iostream> + +#include "gtest/gtest.h" + +#include "BufferedCharReader.hpp" + +TEST(BufferedCharReaderTest, SimpleReadTest) +{ + const std::string testStr("this is a test"); + char c; + + // Feed a test string into the reader + scaenea::compiler::BufferedCharReader reader; + reader.feed(testStr); + reader.close(); + + // Try to read the test string + std::string res; + while (!reader.atEnd()) { + ASSERT_TRUE(reader.read(&c)); + res.append(&c, 1); + } + + // The two strings must equal + ASSERT_STREQ(testStr.c_str(), res.c_str()) ; + + // We must now be at line 1, column 15 + ASSERT_EQ(1, reader.getLine()); + ASSERT_EQ(testStr.size() + 1, reader.getColumn()); + + // If we call either read or peek, false is returned + ASSERT_FALSE(reader.read(&c)); + ASSERT_FALSE(reader.peek(&c)); +} + +TEST(BufferedCharReaderTest, SimplePeekTest) +{ + const std::string testStr("this is a test"); + char c; + + // Feed a test string into the reader + scaenea::compiler::BufferedCharReader reader; + reader.feed(testStr); + reader.close(); + + // Try to read the test string + std::string res; + while (reader.peek(&c)) { + res.append(&c, 1); + } + + // Peeking does not trigger the "atEnd" flag + ASSERT_FALSE(reader.atEnd()); + + // The two strings must equal + ASSERT_STREQ(testStr.c_str(), res.c_str()); + + // We must now be at line 1, column 1 and NOT at the end of the stream + ASSERT_EQ(1, reader.getLine()); + ASSERT_EQ(1, reader.getColumn()); + ASSERT_FALSE(reader.atEnd()); + + // If we consume the peek, we must be at line 1, column 15 and we should be + // at the end of the stream + reader.consumePeek(); + ASSERT_EQ(1, reader.getLine()); + ASSERT_EQ(testStr.size() + 1, reader.getColumn()); + ASSERT_TRUE(reader.atEnd()); + + // If we call either read or peek, false is returned + ASSERT_FALSE(reader.read(&c)); + ASSERT_FALSE(reader.peek(&c)); +} + +TEST(BufferedCharReaderTest, SplittedPeakTest) +{ + const std::string testStr("this is a test"); + char c; + + // Feed a test string into the reader + scaenea::compiler::BufferedCharReader reader; + + // Try to peek the test string, feed char after char into the reader + std::string res; + for (unsigned int i = 0; i < testStr.length(); i++) { + reader.feed(std::string(&testStr[i], 1)); + while (reader.peek(&c)) { + res.append(&c, 1); + } + } + reader.close(); + + // Consume the peeked data + ASSERT_FALSE(reader.atEnd()); + reader.consumePeek(); + ASSERT_TRUE(reader.atEnd()); + + // The two strings must equal + ASSERT_STREQ(testStr.c_str(), res.c_str()) ; + + // We must now be at line 1, column 15 + ASSERT_EQ(1, reader.getLine()); + ASSERT_EQ(testStr.size() + 1, reader.getColumn()); + + // If we call either read or peek, false is returned + ASSERT_FALSE(reader.read(&c)); + ASSERT_FALSE(reader.peek(&c)); +} + +TEST(BufferedCharReaderTest, RowColumnCounterTest) +{ + const std::string testStr("1\n\r2\n3\r\n\n4"); + char c; + + // Feed a test string into the reader + scaenea::compiler::BufferedCharReader reader; + reader.feed(testStr); + reader.close(); + + // We should currently be in line 1, column 1 + ASSERT_EQ(1, reader.getLine()); + ASSERT_EQ(1, reader.getColumn()); + + // Read two characters + for (int i = 0; i < 2; i++) reader.read(&c); + ASSERT_EQ(2, reader.getLine()); + ASSERT_EQ(1, reader.getColumn()); + + // Read two characters + for (int i = 0; i < 2; i++) reader.read(&c); + ASSERT_EQ(3, reader.getLine()); + ASSERT_EQ(1, reader.getColumn()); + + // Read three characters + for (int i = 0; i < 3; i++) reader.read(&c); + ASSERT_EQ(5, reader.getLine()); + ASSERT_EQ(1, reader.getColumn()); +} + +TEST(BufferedCharReaderTest, LinebreakSubstitutionTest) +{ + const std::string testStr("this\n\ris\n\rjust\na test\r\n\rtest\n\r"); + const std::string expStr("this\nis\njust\na test\n\ntest\n"); + + // Feed a test string into the reader + scaenea::compiler::BufferedCharReader reader; + reader.feed(testStr); + + // Read all characters from the test string + std::string res; + char c; + while (reader.read(&c)) { + res.append(&c, 1); + } + + // Test for equality + ASSERT_STREQ(expStr.c_str(), res.c_str()); +} + +TEST(BufferedCharReaderTest, RowColumnCounterUTF8Test) +{ + // Create a test string with some umlauts + const std::string testStr("\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f"); + char c; + + // Feed a test string into the reader + scaenea::compiler::BufferedCharReader reader; + reader.feed(testStr); + reader.close(); + + // Read all bytes + while (reader.read(&c)); + + // The sequence above equals 5 UTF-8 characters (so after reading all the + // cursor is at position 6) + ASSERT_EQ(1, reader.getLine()); + ASSERT_EQ(6, reader.getColumn()); +} + |