summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2014-10-31 13:26:00 +0000
committerandreas <andreas@daaaf23c-2e50-4459-9457-1e69db5a47bf>2014-10-31 13:26:00 +0000
commit9e233b3f13daebb9ac4c5cae0da073d0c6f782c0 (patch)
tree3a87cbf654a8d6f3a9e722e07f55318ef6a71fb8 /src
parentc54065160a03f266c1406edf74d97ab74ee75d51 (diff)
added buffered char reader
git-svn-id: file:///var/local/svn/basicwriter@89 daaaf23c-2e50-4459-9457-1e69db5a47bf
Diffstat (limited to 'src')
-rw-r--r--src/core/utils/BufferedCharReader.cpp216
-rw-r--r--src/core/utils/BufferedCharReader.hpp240
-rw-r--r--src/plugins/mozjs/MozJsScriptEngine.cpp9
3 files changed, 461 insertions, 4 deletions
diff --git a/src/core/utils/BufferedCharReader.cpp b/src/core/utils/BufferedCharReader.cpp
new file mode 100644
index 0000000..0377015
--- /dev/null
+++ b/src/core/utils/BufferedCharReader.cpp
@@ -0,0 +1,216 @@
+/*
+ SCAENEA IDL Compiler (scidlc)
+ Copyright (C) 2014 Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "BufferedCharReader.hpp"
+
+namespace ousia {
+namespace utils {
+
+// Constants used within the linebreak statemachine.
+static const uint8_t LB_STATE_NONE = 0x00;
+static const uint8_t LB_STATE_ONE = 0x01;
+static const uint8_t LB_STATE_LF = 0x10;
+static const uint8_t LB_STATE_CR = 0x20;
+static const uint8_t LB_STATE_MASK_CNT = 0x0F;
+static const uint8_t LB_STATE_MASK_TYPE = 0xF0;
+
+/*******************************************************************************
+ * Struct BufferedCharReader::ReadCursor
+ ******************************************************************************/
+
+BufferedCharReader::ReadCursor::ReadCursor(const bool destructive) :
+ destructive(destructive)
+{
+ reset();
+}
+
+void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor)
+{
+ this->line = cursor.line;
+ this->column = cursor.column;
+ this->bufferElem = cursor.bufferElem;
+ this->bufferPos = cursor.bufferPos;
+ this->lbState = cursor.lbState;
+}
+
+void BufferedCharReader::ReadCursor::reset()
+{
+ this->line = 1;
+ this->column = 1;
+ this->bufferElem = 0;
+ this->bufferPos = 0;
+ this->lbState = LB_STATE_NONE;
+}
+
+/*******************************************************************************
+ * Class BufferedCharReader
+ ******************************************************************************/
+
+BufferedCharReader::BufferedCharReader() :
+ readCursor(true), peekCursor(false)
+{
+ reset();
+}
+
+void BufferedCharReader::reset()
+{
+ readCursor.reset();
+ peekCursor.reset();
+ buffer.clear();
+ closed = false;
+}
+
+bool BufferedCharReader::feed(const std::string &data)
+{
+ // Abort if the BufferedCharReader was closed
+ if (closed) {
+ return false;
+ }
+
+ // Append the data onto the queue
+ buffer.push_back(data);
+ return true;
+}
+
+void BufferedCharReader::close()
+{
+ closed = true;
+}
+
+bool BufferedCharReader::substituteLinebreaks(ReadCursor *cursor, char *c)
+{
+ // Handle line breaks, inserts breakes after the following character
+ // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n
+ if ((*c == '\n') || (*c == '\r')) {
+ // Determine the type of the current linebreak character
+ const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR;
+
+ // Read the last count and the last type from the state
+ const uint8_t lastCount = cursor->lbState & LB_STATE_MASK_CNT;
+ const uint8_t lastType = cursor->lbState & LB_STATE_MASK_TYPE;
+
+ // Set the current linebreak type and counter in the state
+ cursor->lbState = ((lastCount + 1) & 1) | type;
+
+ // If either this is the first instance of this character or the same
+ // return character is repeated
+ if (!lastCount || (lastType == type)) {
+ *c = '\n';
+ return true;
+ }
+ return false;
+ }
+
+ // Find the state
+ cursor->lbState = LB_STATE_NONE;
+ return true;
+}
+
+bool BufferedCharReader::readCharacterAtCursor(ReadCursor *cursor,
+ char *c)
+{
+ bool hasChar = false;
+ while (!hasChar) {
+ // Abort if the current buffer element does not point to a valid entry
+ // in the buffer -- we must wait until another data block has been fed
+ // into the buffer
+ if (cursor->bufferElem >= buffer.size()) {
+ return false;
+ }
+
+ // Fetch the current element the peek pointer points to
+ const std::string &data = buffer[cursor->bufferElem];
+
+ // Handle the "no data" case -- either in a destructive or
+ // non-destructive manner.
+ if (cursor->bufferPos >= data.length()) {
+ if (cursor->destructive) {
+ buffer.pop_front();
+ } else {
+ cursor->bufferElem++;
+ }
+ cursor->bufferPos = 0;
+ continue;
+ }
+
+ // Read the character, advance the buffer position
+ *c = *(data.data() + cursor->bufferPos);
+ cursor->bufferPos++;
+
+ // Substitute linebreaks with a single LF (0x0A)
+ hasChar = substituteLinebreaks(cursor, c);
+ }
+
+ // Update the position counter
+ if (*c == '\n') {
+ cursor->line++;
+ cursor->column = 1;
+ } else {
+ // Ignore UTF-8 continuation bytes
+ if (!((*c & 0x80) && !(*c & 0x40))) {
+ cursor->column++;
+ }
+ }
+
+ return true;
+}
+
+bool BufferedCharReader::peek(char *c)
+{
+ return readCharacterAtCursor(&peekCursor, c);
+}
+
+bool BufferedCharReader::read(char *c)
+{
+ resetPeek();
+ return readCharacterAtCursor(&readCursor, c);
+}
+
+void BufferedCharReader::consumePeek()
+{
+ // Remove all no longer needed buffer elements
+ for (unsigned int i = 0; i < peekCursor.bufferElem; i++) {
+ buffer.pop_front();
+ }
+ peekCursor.bufferElem = 0;
+
+ // Copy the peek cursor to the read cursor
+ readCursor.assign(peekCursor);
+}
+
+void BufferedCharReader::resetPeek()
+{
+ // Reset the peek cursor to the read cursor
+ peekCursor.assign(readCursor);
+}
+
+bool BufferedCharReader::atEnd()
+{
+ if (closed) {
+ if (buffer.size() <= 0) {
+ return true;
+ } else if (buffer.size() == 1) {
+ return buffer[0].size() == readCursor.bufferPos;
+ }
+ }
+ return false;
+}
+
+}
+}
+
diff --git a/src/core/utils/BufferedCharReader.hpp b/src/core/utils/BufferedCharReader.hpp
new file mode 100644
index 0000000..86f43b5
--- /dev/null
+++ b/src/core/utils/BufferedCharReader.hpp
@@ -0,0 +1,240 @@
+/*
+ SCAENEA IDL Compiler (scidlc)
+ Copyright (C) 2014 Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _OUSIA_UTILS_BUFFERED_CHAR_READER_H_
+#define _OUSIA_UTILS_BUFFERED_CHAR_READER_H_
+
+#include <deque>
+#include <string>
+#include <cstdint>
+
+namespace ousia {
+namespace utils {
+
+/**
+ * The BufferedCharReader class is used for storing incomming data that
+ * is fed into the pipeline as well as reading/peeking single characters
+ * from that buffer. Additionally it counts the current column/row
+ * (with correct handling for UTF-8) and contains an internal state
+ * machine that handles the detection of linebreaks.
+ *
+ * Additionally the BufferedCharReader performs the following tasks:
+ * 1. Convert the incomming character encoding to UTF-8 (TODO: implement)
+ * 2. Convert arbitrary linebreaks to a single "\n"
+ */
+class BufferedCharReader {
+
+private:
+
+ /**
+ * The ReadCursor structure is responsible for representing the read
+ * position within the text an all state machine states belonging to the
+ * cursor. There are two types of read cursors: destructive and
+ * non-destructive read cursors.
+ */
+ struct ReadCursor {
+ /**
+ * Specifies whether this is a destructive cursor (bytes are discarded
+ * once they were read from the buffer).
+ */
+ const bool destructive;
+
+ /**
+ * The line the cursor currently points to.
+ */
+ unsigned int line;
+
+ /**
+ * The column the cursor currently points to.
+ */
+ unsigned int column;
+
+ /**
+ * The index of the element in the data buffer we're currently reading
+ * from.
+ */
+ unsigned int bufferElem;
+
+ /**
+ * The byte position within this data buffer.
+ */
+ unsigned int bufferPos;
+
+ /**
+ * State variable used in the internal state machine of the
+ * line feed detection.
+ */
+ uint8_t lbState;
+
+ /**
+ * Constructor of the ReadCursor structure.
+ *
+ * @param destructive specifies whether the ReadCursor is destructive
+ * (consumes all read characters, as used in the "read cursor") or
+ * non-destructive (as used in the "peek cursor").
+ */
+ ReadCursor(const bool destructive);
+
+ /**
+ * Copys the data from another ReadCursor without overriding the
+ * "destructive" flag.
+ */
+ void assign(const ReadCursor &cursor);
+
+ /**
+ * Resets the cursor without changing the "destructive" flag.
+ */
+ void reset();
+ };
+
+ /**
+ * Queue containing the data that has been fed into the char reader.
+ */
+ std::deque<std::string> buffer;
+
+ /**
+ * The read and the peek cursor.
+ */
+ ReadCursor readCursor, peekCursor;
+
+ /**
+ * Determines whether the reader has been closed.
+ */
+ bool closed;
+
+ /**
+ * Substitute any combination of linebreaks in the incomming code with "\n".
+ * Returns true if the current character is meant as output, false
+ * otherwise.
+ */
+ bool substituteLinebreaks(ReadCursor *cursor, char *c);
+
+ /**
+ * Reads a character from the input buffer and advances the given read
+ * cursor.
+ *
+ * @param cursor is a reference to the read cursor that should be used
+ * for reading.
+ * @param hasChar is set to true, if a character is available, false if
+ * no character is available (e.g. because line breaks are substituted or
+ * the end of a buffer boundary is reached -- in this case this function
+ * should be called again with the same parameters.)
+ * @param c is a output parameter, which will be set to the read character.
+ * @param returns true if there was enough data in the buffer, false
+ * otherwise.
+ */
+ bool readCharacterAtCursor(ReadCursor *cursor, char *c);
+
+ /**
+ * Function that is called for each read character -- updates the row and
+ * column count.
+ */
+ void updatePositionCounters(const char c);
+
+public:
+
+ /**
+ * Constructor of the buffered char reader class.
+ */
+ BufferedCharReader();
+
+ /**
+ * Resets the reader to its initial state.
+ */
+ void reset();
+
+ /**
+ * Feeds new data into the internal buffer of the BufferedCharReader
+ * class.
+ *
+ * @param data is a string containing the data that should be
+ * appended to the internal buffer.
+ * @return true if the operation was successful, false otherwise (e.g.
+ * because the reader is closed).
+ */
+ bool feed(const std::string &data);
+
+ /**
+ * Marks the end of the input, allowing successors in the pipeline
+ * to react properly (e.g. creating the end of stream token).
+ */
+ void close();
+
+ /**
+ * Peeks a single character. If called multiple times, returns the
+ * character after the previously peeked character.
+ *
+ * @param c is a reference to the character to which the result should be
+ * writtern.
+ * @return true if the character was successfully read, false if there are
+ * no more characters to be read in the buffer.
+ */
+ bool peek(char *c);
+
+ /**
+ * Reads a character from the input data. If "peek" was called
+ * beforehand resets the peek pointer.
+ *
+ * @param c is a reference to the character to which the result should be
+ * writtern.
+ * @return true if the character was successfully read, false if there are
+ * no more characters to be read in the buffer.
+ */
+ bool read(char *c);
+
+ /**
+ * Advances the read pointer to the peek pointer -- so if the "peek"
+ * function was called, "read" will now return the character after
+ * the last peeked character.
+ */
+ void consumePeek();
+
+ /**
+ * Resets the peek pointer to the "read" pointer.
+ */
+ void resetPeek();
+
+ /**
+ * Returns true if there are no more characters as the stream was
+ * closed.
+ */
+ bool atEnd();
+
+ /**
+ * Returns the current line (starting with one).
+ */
+ inline int getLine()
+ {
+ return readCursor.line;
+ }
+
+ /**
+ * Returns the current column (starting with one).
+ */
+ inline int getColumn()
+ {
+ return readCursor.column;
+ }
+
+};
+
+}
+}
+
+#endif /* _OUSISA_UTILS_BUFFERED_CHAR_READER_H_ */
+
diff --git a/src/plugins/mozjs/MozJsScriptEngine.cpp b/src/plugins/mozjs/MozJsScriptEngine.cpp
index c67a3b4..f269eb7 100644
--- a/src/plugins/mozjs/MozJsScriptEngine.cpp
+++ b/src/plugins/mozjs/MozJsScriptEngine.cpp
@@ -426,8 +426,10 @@ void MozJsScriptEngineScope::variantToValue(const Variant &var,
return;
}
case VariantType::function: {
- JS::RootedObject f(cx, JS_NewObject(cx, &functionClass, nullptr, nullptr));
- JS_SetPrivate(f, new MozJsFunctionData(*this, var.getFunctionValue()->clone()));
+ JS::RootedObject f(
+ cx, JS_NewObject(cx, &functionClass, nullptr, nullptr));
+ JS_SetPrivate(f, new MozJsFunctionData(
+ *this, var.getFunctionValue()->clone()));
JS_FreezeObject(cx, f);
val.setObjectOrNull(f.get());
return;
@@ -497,8 +499,7 @@ MozJsScriptEngine::~MozJsScriptEngine()
JS_ShutDown();
}
-MozJsScriptEngineScope *MozJsScriptEngine::createScope()
-{
+MozJsScriptEngineScope *MozJsScriptEngine::createScope() {
return new MozJsScriptEngineScope(rt);
}
}