summaryrefslogtreecommitdiff
path: root/src/core/BufferedCharReader.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/BufferedCharReader.hpp')
-rw-r--r--src/core/BufferedCharReader.hpp288
1 files changed, 0 insertions, 288 deletions
diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp
deleted file mode 100644
index e7f3186..0000000
--- a/src/core/BufferedCharReader.hpp
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file BufferedCharReader.hpp
- *
- * Contains the BufferedCharReader class which is used for reading/peeking
- * single characters from an input stream or string.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_BUFFERED_CHAR_READER_H_
-#define _OUSIA_BUFFERED_CHAR_READER_H_
-
-#include <deque>
-#include <string>
-#include <istream>
-#include <cstdint>
-
-namespace ousia {
-
-// TODO: Better split this class into multiple classes with base class
-// BufferedCharReader where each sub class represents one method of supplying
-// the input data (feeding, initial string, input stream).
-
-/**
- * The BufferedCharReader class is used for storing incomming data that
- * is fed into the pipeline as well as reading/peeking single characters
- * from that buffer. Additionally it counts the current column/row
- * (with correct handling for UTF-8) and contains an internal state
- * machine that handles the detection of linebreaks and converts these to a
- * single '\n'.
- */
-class BufferedCharReader {
-private:
- /**
- * The ReadCursor structure is responsible for representing the read
- * position within the text an all state machine states belonging to the
- * cursor. There are two types of read cursors: destructive and
- * non-destructive read cursors.
- */
- struct ReadCursor {
- /**
- * The line the cursor currently points to.
- */
- unsigned int line;
-
- /**
- * The column the cursor currently points to.
- */
- unsigned int column;
-
- /**
- * The index of the element in the data buffer we're currently reading
- * from.
- */
- unsigned int bufferElem;
-
- /**
- * The byte position within this data buffer.
- */
- unsigned int bufferPos;
-
- /**
- * Specifies whether this is a destructive cursor (bytes are discarded
- * once they were read from the buffer).
- */
- const bool destructive;
-
- /**
- * State variable used in the internal state machine of the
- * line feed detection.
- */
- uint8_t lbState;
-
- /**
- * Constructor of the ReadCursor structure.
- *
- * @param line is the start line.
- * @param column is the start column.
- * @param destructive specifies whether the ReadCursor is destructive
- * (consumes all read characters, as used in the "read cursor") or
- * non-destructive (as used in the "peek cursor").
- */
- ReadCursor(unsigned int line, unsigned int column, bool destructive);
-
- /**
- * Copys the data from another ReadCursor without overriding the
- * "destructive" flag.
- *
- * @param cursor is the cursor that should be copied.
- */
- void assign(const ReadCursor &cursor);
- };
-
- /**
- * Pointer at an (optional) input stream used for reading a chunk of data
- * whenever the input buffer depletes.
- */
- std::istream *inputStream;
-
- /**
- * The read and the peek cursor.
- */
- ReadCursor readCursor, peekCursor;
-
- /**
- * Set to true if there is no more input data.
- */
- bool depleted;
-
- /**
- * Queue containing the data that has been fed into the char reader.
- */
- std::deque<std::string> buffer;
-
- /**
- * Substitute any combination of linebreaks in the incomming code with "\n".
- * Returns true if the current character is meant as output, false
- * otherwise.
- */
- bool substituteLinebreaks(ReadCursor &cursor, char *c);
-
- /**
- * Reads a character from the input buffer and advances the given read
- * cursor.
- *
- * @param cursor is a reference to the read cursor that should be used
- * for reading.
- * @param hasChar is set to true, if a character is available, false if
- * no character is available (e.g. because line breaks are substituted or
- * the end of a buffer boundary is reached -- in this case this function
- * should be called again with the same parameters.)
- * @param c is a output parameter, which will be set to the read character.
- * @param returns true if there was enough data in the buffer, false
- * otherwise.
- */
- bool readCharacterAtCursor(ReadCursor &cursor, char *c);
-
- /**
- * Function that is called for each read character -- updates the row and
- * column count.
- */
- void updatePositionCounters(const char c);
-
-public:
-
- /**
- * Constructor of the buffered char reader class with empty buffer as input.
- * This operates the BufferedCharReader in a mode where new data has to be
- * fed using the "feed" function and explicitly closed using the "close"
- * function.
- *
- * @param line is the start line.
- * @param column is the start column.
- */
- BufferedCharReader(int line = 1, int column = 1);
-
- /**
- * Constructor of the buffered char reader class with a string as input.
- *
- * @param str is a string containing the input data.
- * @param line is the start line.
- * @param column is the start column.
- */
- BufferedCharReader(const std::string &str, int line, int column);
-
- /**
- * Constructor of the buffered char reader class with a string as input.
- *
- * @param str is a string containing the input data.
- */
- BufferedCharReader(const std::string &str);
-
- /**
- * Constructor of the buffered char reader class with a string as input.
- *
- * @param inputStream is the input stream from which incomming data should
- * be read.
- * @param line is the start line.
- * @param column is the start column.
- */
- BufferedCharReader(std::istream &inputStream, int line = 1, int column = 1);
-
- /**
- * Peeks a single character. If called multiple times, returns the
- * character after the previously peeked character.
- *
- * @param c is a reference to the character to which the result should be
- * writtern.
- * @return true if the character was successfully read, false if there are
- * no more characters to be read in the buffer.
- */
- bool peek(char *c);
-
- /**
- * Reads a character from the input data. If "peek" was called
- * beforehand resets the peek pointer.
- *
- * @param c is a reference to the character to which the result should be
- * writtern.
- * @return true if the character was successfully read, false if there are
- * no more characters to be read in the buffer.
- */
- bool read(char *c);
-
- /**
- * Advances the read pointer to the peek pointer -- so if the "peek"
- * function was called, "read" will now return the character after
- * the last peeked character.
- */
- void consumePeek();
-
- /**
- * Moves the read cursor to the next non-whitespace character. Returns
- * false, if the end of the stream was reached.
- *
- * @return false if the end of the stream was reached, false othrwise.
- */
- bool consumeWhitespace();
-
- /**
- * Resets the peek pointer to the "read" pointer.
- */
- void resetPeek();
-
- /**
- * Feeds new data into the internal buffer of the BufferedCharReader
- * class. Only applicable if the buffered char reader was constructed
- * without an input stream or string.
- *
- * @param data is a string containing the data that should be
- * appended to the internal buffer.
- */
- void feed(const std::string &data);
-
- /**
- * Tells the buffered char reader that no more data will be fed.
- * Only applicable if the buffered char reader was constructed without an
- * input stream or string.
- *
- * @param data is a string containing the data that should be
- * appended to the internal buffer.
- */
- void close();
-
- /**
- * Returns true if there are no more characters as the stream was
- * closed.
- *
- * @return true if there is no more data.
- */
- bool atEnd() const;
-
- /**
- * Returns the current line (starting with one).
- *
- * @return the current line number.
- */
- int getLine() const { return readCursor.line; }
-
- /**
- * Returns the current column (starting with one).
- *
- * @return the current column number.
- */
- int getColumn() const { return readCursor.column; }
-};
-}
-
-#endif /* _OUSIA_BUFFERED_CHAR_READER_H_ */
-