summaryrefslogtreecommitdiff
path: root/src/core/utils/BufferedCharReader.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/utils/BufferedCharReader.hpp')
-rw-r--r--src/core/utils/BufferedCharReader.hpp240
1 files changed, 240 insertions, 0 deletions
diff --git a/src/core/utils/BufferedCharReader.hpp b/src/core/utils/BufferedCharReader.hpp
new file mode 100644
index 0000000..86f43b5
--- /dev/null
+++ b/src/core/utils/BufferedCharReader.hpp
@@ -0,0 +1,240 @@
+/*
+ SCAENEA IDL Compiler (scidlc)
+ Copyright (C) 2014 Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _OUSIA_UTILS_BUFFERED_CHAR_READER_H_
+#define _OUSIA_UTILS_BUFFERED_CHAR_READER_H_
+
+#include <deque>
+#include <string>
+#include <cstdint>
+
+namespace ousia {
+namespace utils {
+
+/**
+ * The BufferedCharReader class is used for storing incomming data that
+ * is fed into the pipeline as well as reading/peeking single characters
+ * from that buffer. Additionally it counts the current column/row
+ * (with correct handling for UTF-8) and contains an internal state
+ * machine that handles the detection of linebreaks.
+ *
+ * Additionally the BufferedCharReader performs the following tasks:
+ * 1. Convert the incomming character encoding to UTF-8 (TODO: implement)
+ * 2. Convert arbitrary linebreaks to a single "\n"
+ */
+class BufferedCharReader {
+
+private:
+
+ /**
+ * The ReadCursor structure is responsible for representing the read
+ * position within the text an all state machine states belonging to the
+ * cursor. There are two types of read cursors: destructive and
+ * non-destructive read cursors.
+ */
+ struct ReadCursor {
+ /**
+ * Specifies whether this is a destructive cursor (bytes are discarded
+ * once they were read from the buffer).
+ */
+ const bool destructive;
+
+ /**
+ * The line the cursor currently points to.
+ */
+ unsigned int line;
+
+ /**
+ * The column the cursor currently points to.
+ */
+ unsigned int column;
+
+ /**
+ * The index of the element in the data buffer we're currently reading
+ * from.
+ */
+ unsigned int bufferElem;
+
+ /**
+ * The byte position within this data buffer.
+ */
+ unsigned int bufferPos;
+
+ /**
+ * State variable used in the internal state machine of the
+ * line feed detection.
+ */
+ uint8_t lbState;
+
+ /**
+ * Constructor of the ReadCursor structure.
+ *
+ * @param destructive specifies whether the ReadCursor is destructive
+ * (consumes all read characters, as used in the "read cursor") or
+ * non-destructive (as used in the "peek cursor").
+ */
+ ReadCursor(const bool destructive);
+
+ /**
+ * Copys the data from another ReadCursor without overriding the
+ * "destructive" flag.
+ */
+ void assign(const ReadCursor &cursor);
+
+ /**
+ * Resets the cursor without changing the "destructive" flag.
+ */
+ void reset();
+ };
+
+ /**
+ * Queue containing the data that has been fed into the char reader.
+ */
+ std::deque<std::string> buffer;
+
+ /**
+ * The read and the peek cursor.
+ */
+ ReadCursor readCursor, peekCursor;
+
+ /**
+ * Determines whether the reader has been closed.
+ */
+ bool closed;
+
+ /**
+ * Substitute any combination of linebreaks in the incomming code with "\n".
+ * Returns true if the current character is meant as output, false
+ * otherwise.
+ */
+ bool substituteLinebreaks(ReadCursor *cursor, char *c);
+
+ /**
+ * Reads a character from the input buffer and advances the given read
+ * cursor.
+ *
+ * @param cursor is a reference to the read cursor that should be used
+ * for reading.
+ * @param hasChar is set to true, if a character is available, false if
+ * no character is available (e.g. because line breaks are substituted or
+ * the end of a buffer boundary is reached -- in this case this function
+ * should be called again with the same parameters.)
+ * @param c is a output parameter, which will be set to the read character.
+ * @param returns true if there was enough data in the buffer, false
+ * otherwise.
+ */
+ bool readCharacterAtCursor(ReadCursor *cursor, char *c);
+
+ /**
+ * Function that is called for each read character -- updates the row and
+ * column count.
+ */
+ void updatePositionCounters(const char c);
+
+public:
+
+ /**
+ * Constructor of the buffered char reader class.
+ */
+ BufferedCharReader();
+
+ /**
+ * Resets the reader to its initial state.
+ */
+ void reset();
+
+ /**
+ * Feeds new data into the internal buffer of the BufferedCharReader
+ * class.
+ *
+ * @param data is a string containing the data that should be
+ * appended to the internal buffer.
+ * @return true if the operation was successful, false otherwise (e.g.
+ * because the reader is closed).
+ */
+ bool feed(const std::string &data);
+
+ /**
+ * Marks the end of the input, allowing successors in the pipeline
+ * to react properly (e.g. creating the end of stream token).
+ */
+ void close();
+
+ /**
+ * Peeks a single character. If called multiple times, returns the
+ * character after the previously peeked character.
+ *
+ * @param c is a reference to the character to which the result should be
+ * writtern.
+ * @return true if the character was successfully read, false if there are
+ * no more characters to be read in the buffer.
+ */
+ bool peek(char *c);
+
+ /**
+ * Reads a character from the input data. If "peek" was called
+ * beforehand resets the peek pointer.
+ *
+ * @param c is a reference to the character to which the result should be
+ * writtern.
+ * @return true if the character was successfully read, false if there are
+ * no more characters to be read in the buffer.
+ */
+ bool read(char *c);
+
+ /**
+ * Advances the read pointer to the peek pointer -- so if the "peek"
+ * function was called, "read" will now return the character after
+ * the last peeked character.
+ */
+ void consumePeek();
+
+ /**
+ * Resets the peek pointer to the "read" pointer.
+ */
+ void resetPeek();
+
+ /**
+ * Returns true if there are no more characters as the stream was
+ * closed.
+ */
+ bool atEnd();
+
+ /**
+ * Returns the current line (starting with one).
+ */
+ inline int getLine()
+ {
+ return readCursor.line;
+ }
+
+ /**
+ * Returns the current column (starting with one).
+ */
+ inline int getColumn()
+ {
+ return readCursor.column;
+ }
+
+};
+
+}
+}
+
+#endif /* _OUSISA_UTILS_BUFFERED_CHAR_READER_H_ */
+