diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-10-31 13:26:00 +0000 |
---|---|---|
committer | andreas <andreas@daaaf23c-2e50-4459-9457-1e69db5a47bf> | 2014-10-31 13:26:00 +0000 |
commit | 9e233b3f13daebb9ac4c5cae0da073d0c6f782c0 (patch) | |
tree | 3a87cbf654a8d6f3a9e722e07f55318ef6a71fb8 /src | |
parent | c54065160a03f266c1406edf74d97ab74ee75d51 (diff) |
added buffered char reader
git-svn-id: file:///var/local/svn/basicwriter@89 daaaf23c-2e50-4459-9457-1e69db5a47bf
Diffstat (limited to 'src')
-rw-r--r-- | src/core/utils/BufferedCharReader.cpp | 216 | ||||
-rw-r--r-- | src/core/utils/BufferedCharReader.hpp | 240 | ||||
-rw-r--r-- | src/plugins/mozjs/MozJsScriptEngine.cpp | 9 |
3 files changed, 461 insertions, 4 deletions
diff --git a/src/core/utils/BufferedCharReader.cpp b/src/core/utils/BufferedCharReader.cpp new file mode 100644 index 0000000..0377015 --- /dev/null +++ b/src/core/utils/BufferedCharReader.cpp @@ -0,0 +1,216 @@ +/* + SCAENEA IDL Compiler (scidlc) + Copyright (C) 2014 Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "BufferedCharReader.hpp" + +namespace ousia { +namespace utils { + +// Constants used within the linebreak statemachine. +static const uint8_t LB_STATE_NONE = 0x00; +static const uint8_t LB_STATE_ONE = 0x01; +static const uint8_t LB_STATE_LF = 0x10; +static const uint8_t LB_STATE_CR = 0x20; +static const uint8_t LB_STATE_MASK_CNT = 0x0F; +static const uint8_t LB_STATE_MASK_TYPE = 0xF0; + +/******************************************************************************* + * Struct BufferedCharReader::ReadCursor + ******************************************************************************/ + +BufferedCharReader::ReadCursor::ReadCursor(const bool destructive) : + destructive(destructive) +{ + reset(); +} + +void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) +{ + this->line = cursor.line; + this->column = cursor.column; + this->bufferElem = cursor.bufferElem; + this->bufferPos = cursor.bufferPos; + this->lbState = cursor.lbState; +} + +void BufferedCharReader::ReadCursor::reset() +{ + this->line = 1; + this->column = 1; + this->bufferElem = 0; + this->bufferPos = 0; + this->lbState = LB_STATE_NONE; +} + +/******************************************************************************* + * Class BufferedCharReader + ******************************************************************************/ + +BufferedCharReader::BufferedCharReader() : + readCursor(true), peekCursor(false) +{ + reset(); +} + +void BufferedCharReader::reset() +{ + readCursor.reset(); + peekCursor.reset(); + buffer.clear(); + closed = false; +} + +bool BufferedCharReader::feed(const std::string &data) +{ + // Abort if the BufferedCharReader was closed + if (closed) { + return false; + } + + // Append the data onto the queue + buffer.push_back(data); + return true; +} + +void BufferedCharReader::close() +{ + closed = true; +} + +bool BufferedCharReader::substituteLinebreaks(ReadCursor *cursor, char *c) +{ + // Handle line breaks, inserts breakes after the following character + // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n + if ((*c == '\n') || (*c == '\r')) { + // Determine the type of the current linebreak character + const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; + + // Read the last count and the last type from the state + const uint8_t lastCount = cursor->lbState & LB_STATE_MASK_CNT; + const uint8_t lastType = cursor->lbState & LB_STATE_MASK_TYPE; + + // Set the current linebreak type and counter in the state + cursor->lbState = ((lastCount + 1) & 1) | type; + + // If either this is the first instance of this character or the same + // return character is repeated + if (!lastCount || (lastType == type)) { + *c = '\n'; + return true; + } + return false; + } + + // Find the state + cursor->lbState = LB_STATE_NONE; + return true; +} + +bool BufferedCharReader::readCharacterAtCursor(ReadCursor *cursor, + char *c) +{ + bool hasChar = false; + while (!hasChar) { + // Abort if the current buffer element does not point to a valid entry + // in the buffer -- we must wait until another data block has been fed + // into the buffer + if (cursor->bufferElem >= buffer.size()) { + return false; + } + + // Fetch the current element the peek pointer points to + const std::string &data = buffer[cursor->bufferElem]; + + // Handle the "no data" case -- either in a destructive or + // non-destructive manner. + if (cursor->bufferPos >= data.length()) { + if (cursor->destructive) { + buffer.pop_front(); + } else { + cursor->bufferElem++; + } + cursor->bufferPos = 0; + continue; + } + + // Read the character, advance the buffer position + *c = *(data.data() + cursor->bufferPos); + cursor->bufferPos++; + + // Substitute linebreaks with a single LF (0x0A) + hasChar = substituteLinebreaks(cursor, c); + } + + // Update the position counter + if (*c == '\n') { + cursor->line++; + cursor->column = 1; + } else { + // Ignore UTF-8 continuation bytes + if (!((*c & 0x80) && !(*c & 0x40))) { + cursor->column++; + } + } + + return true; +} + +bool BufferedCharReader::peek(char *c) +{ + return readCharacterAtCursor(&peekCursor, c); +} + +bool BufferedCharReader::read(char *c) +{ + resetPeek(); + return readCharacterAtCursor(&readCursor, c); +} + +void BufferedCharReader::consumePeek() +{ + // Remove all no longer needed buffer elements + for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { + buffer.pop_front(); + } + peekCursor.bufferElem = 0; + + // Copy the peek cursor to the read cursor + readCursor.assign(peekCursor); +} + +void BufferedCharReader::resetPeek() +{ + // Reset the peek cursor to the read cursor + peekCursor.assign(readCursor); +} + +bool BufferedCharReader::atEnd() +{ + if (closed) { + if (buffer.size() <= 0) { + return true; + } else if (buffer.size() == 1) { + return buffer[0].size() == readCursor.bufferPos; + } + } + return false; +} + +} +} + diff --git a/src/core/utils/BufferedCharReader.hpp b/src/core/utils/BufferedCharReader.hpp new file mode 100644 index 0000000..86f43b5 --- /dev/null +++ b/src/core/utils/BufferedCharReader.hpp @@ -0,0 +1,240 @@ +/* + SCAENEA IDL Compiler (scidlc) + Copyright (C) 2014 Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ +#define _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ + +#include <deque> +#include <string> +#include <cstdint> + +namespace ousia { +namespace utils { + +/** + * The BufferedCharReader class is used for storing incomming data that + * is fed into the pipeline as well as reading/peeking single characters + * from that buffer. Additionally it counts the current column/row + * (with correct handling for UTF-8) and contains an internal state + * machine that handles the detection of linebreaks. + * + * Additionally the BufferedCharReader performs the following tasks: + * 1. Convert the incomming character encoding to UTF-8 (TODO: implement) + * 2. Convert arbitrary linebreaks to a single "\n" + */ +class BufferedCharReader { + +private: + + /** + * The ReadCursor structure is responsible for representing the read + * position within the text an all state machine states belonging to the + * cursor. There are two types of read cursors: destructive and + * non-destructive read cursors. + */ + struct ReadCursor { + /** + * Specifies whether this is a destructive cursor (bytes are discarded + * once they were read from the buffer). + */ + const bool destructive; + + /** + * The line the cursor currently points to. + */ + unsigned int line; + + /** + * The column the cursor currently points to. + */ + unsigned int column; + + /** + * The index of the element in the data buffer we're currently reading + * from. + */ + unsigned int bufferElem; + + /** + * The byte position within this data buffer. + */ + unsigned int bufferPos; + + /** + * State variable used in the internal state machine of the + * line feed detection. + */ + uint8_t lbState; + + /** + * Constructor of the ReadCursor structure. + * + * @param destructive specifies whether the ReadCursor is destructive + * (consumes all read characters, as used in the "read cursor") or + * non-destructive (as used in the "peek cursor"). + */ + ReadCursor(const bool destructive); + + /** + * Copys the data from another ReadCursor without overriding the + * "destructive" flag. + */ + void assign(const ReadCursor &cursor); + + /** + * Resets the cursor without changing the "destructive" flag. + */ + void reset(); + }; + + /** + * Queue containing the data that has been fed into the char reader. + */ + std::deque<std::string> buffer; + + /** + * The read and the peek cursor. + */ + ReadCursor readCursor, peekCursor; + + /** + * Determines whether the reader has been closed. + */ + bool closed; + + /** + * Substitute any combination of linebreaks in the incomming code with "\n". + * Returns true if the current character is meant as output, false + * otherwise. + */ + bool substituteLinebreaks(ReadCursor *cursor, char *c); + + /** + * Reads a character from the input buffer and advances the given read + * cursor. + * + * @param cursor is a reference to the read cursor that should be used + * for reading. + * @param hasChar is set to true, if a character is available, false if + * no character is available (e.g. because line breaks are substituted or + * the end of a buffer boundary is reached -- in this case this function + * should be called again with the same parameters.) + * @param c is a output parameter, which will be set to the read character. + * @param returns true if there was enough data in the buffer, false + * otherwise. + */ + bool readCharacterAtCursor(ReadCursor *cursor, char *c); + + /** + * Function that is called for each read character -- updates the row and + * column count. + */ + void updatePositionCounters(const char c); + +public: + + /** + * Constructor of the buffered char reader class. + */ + BufferedCharReader(); + + /** + * Resets the reader to its initial state. + */ + void reset(); + + /** + * Feeds new data into the internal buffer of the BufferedCharReader + * class. + * + * @param data is a string containing the data that should be + * appended to the internal buffer. + * @return true if the operation was successful, false otherwise (e.g. + * because the reader is closed). + */ + bool feed(const std::string &data); + + /** + * Marks the end of the input, allowing successors in the pipeline + * to react properly (e.g. creating the end of stream token). + */ + void close(); + + /** + * Peeks a single character. If called multiple times, returns the + * character after the previously peeked character. + * + * @param c is a reference to the character to which the result should be + * writtern. + * @return true if the character was successfully read, false if there are + * no more characters to be read in the buffer. + */ + bool peek(char *c); + + /** + * Reads a character from the input data. If "peek" was called + * beforehand resets the peek pointer. + * + * @param c is a reference to the character to which the result should be + * writtern. + * @return true if the character was successfully read, false if there are + * no more characters to be read in the buffer. + */ + bool read(char *c); + + /** + * Advances the read pointer to the peek pointer -- so if the "peek" + * function was called, "read" will now return the character after + * the last peeked character. + */ + void consumePeek(); + + /** + * Resets the peek pointer to the "read" pointer. + */ + void resetPeek(); + + /** + * Returns true if there are no more characters as the stream was + * closed. + */ + bool atEnd(); + + /** + * Returns the current line (starting with one). + */ + inline int getLine() + { + return readCursor.line; + } + + /** + * Returns the current column (starting with one). + */ + inline int getColumn() + { + return readCursor.column; + } + +}; + +} +} + +#endif /* _OUSISA_UTILS_BUFFERED_CHAR_READER_H_ */ + diff --git a/src/plugins/mozjs/MozJsScriptEngine.cpp b/src/plugins/mozjs/MozJsScriptEngine.cpp index c67a3b4..f269eb7 100644 --- a/src/plugins/mozjs/MozJsScriptEngine.cpp +++ b/src/plugins/mozjs/MozJsScriptEngine.cpp @@ -426,8 +426,10 @@ void MozJsScriptEngineScope::variantToValue(const Variant &var, return; } case VariantType::function: { - JS::RootedObject f(cx, JS_NewObject(cx, &functionClass, nullptr, nullptr)); - JS_SetPrivate(f, new MozJsFunctionData(*this, var.getFunctionValue()->clone())); + JS::RootedObject f( + cx, JS_NewObject(cx, &functionClass, nullptr, nullptr)); + JS_SetPrivate(f, new MozJsFunctionData( + *this, var.getFunctionValue()->clone())); JS_FreezeObject(cx, f); val.setObjectOrNull(f.get()); return; @@ -497,8 +499,7 @@ MozJsScriptEngine::~MozJsScriptEngine() JS_ShutDown(); } -MozJsScriptEngineScope *MozJsScriptEngine::createScope() -{ +MozJsScriptEngineScope *MozJsScriptEngine::createScope() { return new MozJsScriptEngineScope(rt); } } |