summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/utils/CharReader.cpp218
-rw-r--r--src/core/utils/CharReader.hpp295
-rw-r--r--test/core/utils/CharReaderTest.cpp222
-rw-r--r--test/core/variant/ReaderTest.cpp40
4 files changed, 732 insertions, 43 deletions
diff --git a/src/core/utils/CharReader.cpp b/src/core/utils/CharReader.cpp
index bf25a01..12d0043 100644
--- a/src/core/utils/CharReader.cpp
+++ b/src/core/utils/CharReader.cpp
@@ -19,11 +19,31 @@
#include <algorithm>
#include <limits>
+#include <core/Utils.hpp>
+
#include "CharReader.hpp"
namespace ousia {
namespace utils {
+/* Helper functions */
+
+/**
+ * istreamReadCallback is used internally by the Buffer calss to stream data
+ * from an input stream.
+ *
+ * @param buf is points a the target memory region.
+ * @param size is the requested number of bytes.
+ * @param userData is a pointer at some user defined data.
+ * @return the actual number of bytes read. If the result is smaller than
+ * the requested size, this tells the Buffer that the end of the input
+ * stream is reached.
+ */
+static size_t istreamReadCallback(char *buf, size_t size, void *userData)
+{
+ return (static_cast<std::istream *>(userData))->read(buf, size).gcount();
+}
+
/* Class Buffer */
Buffer::Buffer(ReadCallback callback, void *userData)
@@ -40,6 +60,8 @@ Buffer::Buffer(ReadCallback callback, void *userData)
startBucket = buckets.begin();
}
+Buffer::Buffer(std::istream &istream) : Buffer(istreamReadCallback, &istream) {}
+
Buffer::Buffer(const std::string &str)
: callback(nullptr),
userData(nullptr),
@@ -331,6 +353,202 @@ bool Buffer::read(Buffer::CursorId cursor, char &c)
advance(cur.bucket);
}
}
+
+/* CharReader::Cursor class */
+
+void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer,
+ CharReader::Cursor &cursor)
+{
+ // Copy the cursor position
+ buffer->copyCursor(cursor.cursor, this->cursor);
+
+ // Copy the state
+ line = cursor.line;
+ column = cursor.column;
+ state = cursor.state;
+ lastLinebreak = cursor.lastLinebreak;
+}
+
+/* CharReader class */
+
+CharReader::CharReader(std::shared_ptr<Buffer> buffer, size_t line,
+ size_t column)
+ : buffer(buffer),
+ readCursor(buffer->createCursor(), line, column),
+ peekCursor(buffer->createCursor(), line, column),
+ coherent(true)
+{
+}
+
+CharReader::CharReader(const std::string &str, size_t line, size_t column)
+ : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, line, column)
+{
+}
+
+CharReader::CharReader(std::istream &istream, size_t line, size_t column)
+ : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, line, column)
+{
+}
+
+CharReader::~CharReader()
+{
+ buffer->deleteCursor(readCursor.cursor);
+ buffer->deleteCursor(peekCursor.cursor);
+}
+
+bool CharReader::substituteLinebreaks(Cursor &cursor, char &c)
+{
+ if (c == '\n' || c == '\r') {
+ switch (cursor.state) {
+ case LinebreakState::NONE:
+ // We got a first linebreak character -- output a '\n'
+ if (c == '\n') {
+ cursor.state = LinebreakState::HAS_LF;
+ } else {
+ cursor.state = LinebreakState::HAS_CR;
+ }
+ c = '\n';
+ return true;
+ case LinebreakState::HAS_LF:
+ // If a LF is followed by a LF, output a new linefeed
+ if (c == '\n') {
+ cursor.state = LinebreakState::HAS_LF;
+ return true;
+ }
+
+ // Otherwise, don't handle this character (part of "\n\r")
+ cursor.state = LinebreakState::NONE;
+ return false;
+ case LinebreakState::HAS_CR:
+ // If a CR is followed by a CR, output a new linefeed
+ if (c == '\r') {
+ cursor.state = LinebreakState::HAS_CR;
+ c = '\n';
+ return true;
+ }
+
+ // Otherwise, don't handle this character (part of "\r\n")
+ cursor.state = LinebreakState::NONE;
+ return false;
+ }
+ }
+
+ // No linebreak character, reset the linebreak state
+ cursor.state = LinebreakState::NONE;
+ return true;
+}
+
+bool CharReader::readAtCursor(Cursor &cursor, char &c)
+{
+ while (true) {
+ // Return false if we're at the end of the stream
+ if (!buffer->read(cursor.cursor, c)) {
+ return false;
+ }
+
+ // Substitute linebreak characters with a single '\n'
+ if (substituteLinebreaks(cursor, c)) {
+ if (c == '\n') {
+ // A linebreak was reached, go to the next line
+ cursor.line++;
+ cursor.column = 1;
+ cursor.lastLinebreak = buffer->offset(cursor.cursor);
+ } else {
+ // Ignore UTF-8 continuation bytes
+ if (!((c & 0x80) && !(c & 0x40))) {
+ cursor.column++;
+ }
+ }
+
+ return true;
+ }
+ }
+}
+
+bool CharReader::peek(char &c)
+{
+ // If the reader was coherent, update the peek cursor state
+ if (coherent) {
+ peekCursor.assign(buffer, readCursor);
+ coherent = false;
+ }
+
+ // Read a character from the peek cursor
+ return readAtCursor(peekCursor, c);
+}
+
+bool CharReader::read(char &c)
+{
+ // Read a character from the buffer at the current read cursor
+ bool res = readAtCursor(readCursor, c);
+
+ // Set the peek position to the current read position, if reading was not
+ // coherent
+ if (!coherent) {
+ peekCursor.assign(buffer, readCursor);
+ coherent = true;
+ } else {
+ buffer->copyCursor(readCursor.cursor, peekCursor.cursor);
+ }
+
+ // Return the result of the read function
+ return res;
+}
+
+void CharReader::resetPeek()
+{
+ if (!coherent) {
+ peekCursor.assign(buffer, readCursor);
+ coherent = true;
+ }
+}
+
+void CharReader::consumePeek()
+{
+ if (!coherent) {
+ readCursor.assign(buffer, peekCursor);
+ coherent = true;
+ }
+}
+
+bool CharReader::consumeWhitespace()
+{
+ char c;
+ while (peek(c)) {
+ if (!Utils::isWhitespace(c)) {
+ resetPeek();
+ return true;
+ }
+ consumePeek();
+ }
+ return false;
+}
+
+CharReaderFork CharReader::fork()
+{
+ return CharReaderFork(buffer, readCursor, peekCursor, coherent);
+}
+
+/* Class CharReaderFork */
+
+CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer,
+ CharReader::Cursor &parentReadCursor,
+ CharReader::Cursor &parentPeekCursor,
+ bool coherent)
+ : CharReader(buffer, 1, 1),
+ parentReadCursor(parentReadCursor),
+ parentPeekCursor(parentPeekCursor)
+{
+ readCursor.assign(buffer, parentReadCursor);
+ peekCursor.assign(buffer, parentPeekCursor);
+ this->coherent = coherent;
+}
+
+void CharReaderFork::commit()
+{
+ parentReadCursor.assign(buffer, readCursor);
+ parentPeekCursor.assign(buffer, peekCursor);
+}
}
}
diff --git a/src/core/utils/CharReader.hpp b/src/core/utils/CharReader.hpp
index 8d97d39..3d4c894 100644
--- a/src/core/utils/CharReader.hpp
+++ b/src/core/utils/CharReader.hpp
@@ -27,7 +27,9 @@
#ifndef _OUSIA_CHAR_READER_HPP_
#define _OUSIA_CHAR_READER_HPP_
+#include <istream>
#include <list>
+#include <memory>
#include <vector>
namespace ousia {
@@ -45,9 +47,8 @@ public:
* Callback function which is called whenever new data is requested from the
* input stream.
*
- * @param buf is a pointer at the memory region to which the data should be
- * writtern.
- * @param size is the size of the
+ * @param buf is points a the target memory region.
+ * @param size is the requested number of bytes.
* @param userData is a pointer at some user defined data given in the
* constructor.
* @return the actual number of bytes read. If the result is smaller than
@@ -219,6 +220,14 @@ public:
Buffer(ReadCallback callback, void *userData);
/**
+ * Initializes the Buffer with a reference to an std::istream from which
+ * data will be read.
+ *
+ * @param istream is the input stream from which the data should be read.
+ */
+ Buffer(std::istream &istream);
+
+ /**
* Initializes the Buffer with the contents of the given string, after
* this operation the Buffer has a fixed size.
*
@@ -266,14 +275,16 @@ public:
/**
* Moves a cursor by offs bytes. Note that moving backwards is theoretically
- * limited by the LOOKBACK_SIZE of the Buffer, practically it will most likely
- * be limited by the REQUEST_SIZE, so you can got at most 64 KiB backwards.
+ * limited by the LOOKBACK_SIZE of the Buffer, practically it will most
+ * likely be limited by the REQUEST_SIZE, so you can got at most 64 KiB
+ * backwards.
*
* @param cursor is the cursor that should be moved.
* @param relativeOffs is a positive or negative integer number specifying
* the number of bytes the cursor should be moved forward (positive numbers)
* or backwards (negative numbers).
- * @return the actual number of bytes the cursor was moved.
+ * @return the actual number of bytes the cursor was moved. This number is
+ * smaller than the relativeOffs given in the constructor if the
*/
ssize_t moveCursor(CursorId cursor, ssize_t relativeOffs);
@@ -311,6 +322,278 @@ public:
bool read(CursorId cursor, char &c);
};
+// Forward declaration
+class CharReaderFork;
+
+/**
+ * Used within parsers for convenient access to single characters in an input
+ * stream or buffer. It allows reading and peeking single characters from a
+ * buffer. Additionally it counts the current column/row (with correct handling
+ * for UTF-8) and contains an internal state machine that handles the detection
+ * of linebreaks and converts these to a single '\n'.
+ */
+class CharReader {
+protected:
+ /**
+ * Enum to represent the current state of the internal state machine that
+ * replaces the linebreaks from multiple platforms to a single '\n'.
+ */
+ enum class LinebreakState { NONE, HAS_LF, HAS_CR };
+
+ /**
+ * Internally used cursor structure for managing the read and the peek
+ * cursor.
+ */
+ struct Cursor {
+ /**
+ * Corresponding cursor in the underlying buffer instance.
+ */
+ const Buffer::CursorId cursor;
+
+ /**
+ * Current line the cursor is in.
+ */
+ size_t line;
+
+ /**
+ * Current column the cursor is in.
+ */
+ size_t column;
+
+ /**
+ * State of the linebreak replacement statemachine.
+ */
+ LinebreakState state;
+
+ /**
+ * Contains the absolute offset in the input stream containing the
+ * position of the last linebreak. This is used for extracting the
+ * context (the line) in which an error occured.
+ */
+ size_t lastLinebreak;
+
+ /**
+ * Constructor of the Cursor class.
+ *
+ * @param cursor is the underlying cursor in the Buffer instance.
+ */
+ Cursor(Buffer::CursorId cursor, size_t line, size_t column)
+ : cursor(cursor),
+ line(line),
+ column(column),
+ state(LinebreakState::NONE),
+ lastLinebreak(0)
+ {
+ }
+
+ /**
+ * Assigns one cursor to another.
+ *
+ * @param buffer is the underlying buffer instance the internal cursor
+ * belongs to.
+ * @param cursor is the cursor from which the state should be copied.
+ */
+ void assign(std::shared_ptr<Buffer> buffer, Cursor &cursor);
+ };
+
+private:
+ /**
+ * Substitutes "\r", "\n\r", "\r\n" with a single "\n".
+ *
+ * @param cursor is the cursor from which the character should be read.
+ * @param c a reference to the character that should be written.
+ * @return true if another character needs to be read.
+ */
+ bool substituteLinebreaks(Cursor &cursor, char &c);
+
+ /**
+ * Reads a single character from the given cursor.
+ *
+ * @param cursor is the cursor from which the character should be read.
+ * @param c a reference to the character that should be written.
+ * @return true if a character was read, false if the end of the stream has
+ * been reached.
+ */
+ bool readAtCursor(Cursor &cursor, char &c);
+
+protected:
+ /**
+ * Reference pointing at the underlying buffer.
+ */
+ std::shared_ptr<Buffer> buffer;
+
+ /**
+ * Cursor used for reading.
+ */
+ Cursor readCursor;
+
+ /**
+ * Cursor used for peeking.
+ */
+ Cursor peekCursor;
+
+ /**
+ * Set to true as long the underlying Buffer cursor is at the same position
+ * for the read and the peek cursor.
+ */
+ bool coherent;
+
+ /**
+ * Protected constructor of the CharReader base class. Creates new read
+ * and peek cursors for the given buffer.
+ *
+ * @param buffer is a reference to the underlying Buffer class responsible
+ * for allowing to read from a single input stream from multiple locations.
+ */
+ CharReader(std::shared_ptr<Buffer> buffer, size_t line, size_t column);
+
+public:
+ /**
+ * Creates a new CharReader instance from a string.
+ *
+ * @param str is a string containing the input data.
+ * @param line is the start line.
+ * @param column is the start column.
+ */
+ CharReader(const std::string &str, size_t line = 1, size_t column = 1);
+
+ /**
+ * Creates a new CharReader instance for an input stream.
+ *
+ * @param istream is the input stream from which incomming data should be
+ * read.
+ * @param line is the start line.
+ * @param column is the start column.
+ */
+ CharReader(std::istream &istream, size_t line = 1, size_t column = 1);
+
+ /**
+ * Deletes the used cursors from the underlying buffer instance.
+ */
+ ~CharReader();
+
+ // No copy
+ CharReader(const Buffer &) = delete;
+
+ // No assign
+ CharReader &operator=(const Buffer &) = delete;
+
+ /**
+ * Peeks a single character. If called multiple times, returns the
+ * character after the previously peeked character.
+ *
+ * @param c is a reference to the character to which the result should be
+ * written.
+ * @return true if the character was successfully read, false if there are
+ * no more characters to be read in the buffer.
+ */
+ bool peek(char &c);
+
+ /**
+ * Reads a character from the input data. If "peek" was called
+ * beforehand resets the peek pointer.
+ *
+ * @param c is a reference to the character to which the result should be
+ * written.
+ * @return true if the character was successfully read, false if there are
+ * no more characters to be read in the buffer.
+ */
+ bool read(char &c);
+
+ /**
+ * Resets the peek pointer to the "read" pointer.
+ */
+ void resetPeek();
+
+ /**
+ * Advances the read pointer to the peek pointer -- so if the "peek"
+ * function was called, "read" will now return the character after
+ * the last peeked character.
+ */
+ void consumePeek();
+
+ /**
+ * Moves the read cursor to the next non-whitespace character. Returns
+ * false, if the end of the stream was reached.
+ *
+ * @return false if the end of the stream was reached, false othrwise.
+ */
+ bool consumeWhitespace();
+
+ /**
+ * Creates a new CharReader located at the same position as this CharReader
+ * instance, yet the new CharReader can be used independently of this
+ * CharReader. Use the "commit" function of the returned CharReader to
+ * copy the state of the forked CharReaderFork to this CharReader.
+ *
+ * @return a CharReaderFork instance positioned at the same location as this
+ * CharReader instance.
+ */
+ CharReaderFork fork();
+
+ /**
+ * Returns true if there are no more characters as the stream was
+ * closed.
+ *
+ * @return true if there is no more data.
+ */
+ bool atEnd() const { return buffer->atEnd(readCursor.cursor); }
+
+ /**
+ * Returns the current line (starting with one).
+ *
+ * @return the current line number.
+ */
+ size_t getLine() const { return readCursor.line; }
+
+ /**
+ * Returns the current column (starting with one).
+ *
+ * @return the current column number.
+ */
+ size_t getColumn() const { return readCursor.column; }
+};
+
+/**
+ * A CharReaderFork is returned whenever the "fork" function of the CharReader
+ * class is used. Its "commit" function can be used to move the underlying
+ * CharReader instance to the location of the CharReaderFork instance. Otherwise
+ * the read location of the underlying CharReader is left unchanged.
+ */
+class CharReaderFork : public CharReader {
+private:
+ friend CharReader;
+
+ /**
+ * The reader cursor of the underlying CharReader instance.
+ */
+ CharReader::Cursor &parentReadCursor;
+
+ /**
+ * The peek cursor of the underlying CharReader instance.
+ */
+ CharReader::Cursor &parentPeekCursor;
+
+ /**
+ * Constructor of the CharReaderFork class.
+ *
+ * @param buffer is a reference at the parent Buffer instance.
+ * @param parentPeekCursor is a reference at the parent read cursor.
+ * @param parentPeekCursor is a reference at the parent peek cursor.
+ * @param coherent specifies whether the char reader cursors are initialized
+ * coherently.
+ */
+ CharReaderFork(std::shared_ptr<Buffer> buffer,
+ CharReader::Cursor &parentReadCursor,
+ CharReader::Cursor &parentPeekCursor, bool coherent);
+
+public:
+ /**
+ * Moves the read and peek cursor of the parent CharReader to the location
+ * of the read and peek cursor in the fork.
+ */
+ void commit();
+};
}
}
diff --git a/test/core/utils/CharReaderTest.cpp b/test/core/utils/CharReaderTest.cpp
index 1518928..9b700ad 100644
--- a/test/core/utils/CharReaderTest.cpp
+++ b/test/core/utils/CharReaderTest.cpp
@@ -27,6 +27,8 @@
namespace ousia {
namespace utils {
+/* Buffer Test */
+
TEST(Buffer, simpleRead)
{
std::string testStr{"this is a test"};
@@ -42,7 +44,7 @@ TEST(Buffer, simpleRead)
ASSERT_FALSE(buf.atEnd(cursor));
// The cursor must be at zero
- ASSERT_EQ(0, buf.offset(cursor));
+ ASSERT_EQ(0U, buf.offset(cursor));
// Try to read the test string
std::string res;
@@ -57,7 +59,7 @@ TEST(Buffer, simpleRead)
ASSERT_EQ(testStr.size(), buf.offset(cursor));
// The two strings must equal
- ASSERT_STREQ(testStr.c_str(), res.c_str());
+ ASSERT_EQ(testStr, res);
}
TEST(Buffer, cursorManagement)
@@ -68,13 +70,13 @@ TEST(Buffer, cursorManagement)
Buffer::CursorId c2 = buf.createCursor();
Buffer::CursorId c3 = buf.createCursor();
- ASSERT_EQ(0, c1);
- ASSERT_EQ(1, c2);
- ASSERT_EQ(2, c3);
+ ASSERT_EQ(0U, c1);
+ ASSERT_EQ(1U, c2);
+ ASSERT_EQ(2U, c3);
buf.deleteCursor(c2);
Buffer::CursorId c4 = buf.createCursor();
- ASSERT_EQ(1, c4);
+ ASSERT_EQ(1U, c4);
}
TEST(Buffer, twoCursors)
@@ -235,10 +237,20 @@ static std::vector<char> generateData(size_t len)
uint32_t v = 0xF3A99148;
std::vector<char> res;
for (size_t i = 0; i < len; i++) {
- v = v ^ (v >> B1);
- v = v ^ (v << B2);
- v = v ^ (v >> B3);
- res.push_back(v & 0xFF);
+ while (true) {
+ // Advance the random seed
+ v = v ^ (v >> B1);
+ v = v ^ (v << B2);
+ v = v ^ (v >> B3);
+
+ // Replace \n and \r in order to avoid line break processing by the
+ // CharReader
+ char c = v & 0xFF;
+ if (c != '\n' && c != '\r') {
+ res.push_back(c);
+ break;
+ }
+ }
}
return res;
}
@@ -259,11 +271,6 @@ static size_t readFromVector(char *buf, size_t size, void *userData)
buf++;
}
size_t res = tar - state.offs;
-
-// std::cout << "called readFromVector, read from " << state.offs << " to "
-// << tar << ", total " << res << " byte, requested " << size
-// << " byte" << std::endl;
-
state.offs = tar;
return res;
}
@@ -311,7 +318,7 @@ TEST(Buffer, streamTwoCursors)
ASSERT_TRUE(buf.atEnd(cur1));
ASSERT_FALSE(buf.atEnd(cur2));
ASSERT_EQ(DATA_LENGTH, buf.offset(cur1));
- ASSERT_EQ(0, buf.offset(cur2));
+ ASSERT_EQ(0U, buf.offset(cur2));
std::vector<char> res2;
while (buf.read(cur2, c)) {
@@ -328,7 +335,7 @@ TEST(Buffer, streamTwoCursors)
ASSERT_EQ(DATA, res2);
}
-TEST(Buffer, streamTwoCursorsInterleaved)
+TEST(Buffer, streamTwoCursorsMovingInterleaved)
{
VectorReadState state(DATA);
@@ -360,6 +367,13 @@ TEST(Buffer, streamTwoCursorsInterleaved)
res2.push_back(c);
}
}
+
+ // Move cur1 60 bytes forward and backward
+ buf.moveCursor(cur1, -buf.moveCursor(cur1, 60));
+
+ // Make sure the cursor position is correct
+ ASSERT_EQ(res1.size(), buf.offset(cur1));
+ ASSERT_EQ(res2.size(), buf.offset(cur2));
}
ASSERT_EQ(DATA_LENGTH, buf.offset(cur1));
@@ -380,7 +394,8 @@ TEST(Buffer, streamMoveForward)
Buffer buf{readFromVector, &state};
Buffer::CursorId cursor = buf.createCursor();
- ASSERT_EQ(DATA_LENGTH - 100, buf.moveCursor(cursor, DATA_LENGTH - 100));
+ ASSERT_EQ(ssize_t(DATA_LENGTH) - 100,
+ buf.moveCursor(cursor, DATA_LENGTH - 100));
char c;
std::vector<char> res;
@@ -390,6 +405,177 @@ TEST(Buffer, streamMoveForward)
ASSERT_EQ(partialData, res);
}
+/* CharReader Test */
+
+TEST(CharReaderTest, simpleReadTest)
+{
+ std::string testStr{"this is a test"};
+ char c;
+
+ // Feed a test string into the reader
+ CharReader reader{testStr};
+
+ // Try to read the test string
+ std::string res;
+ while (!reader.atEnd()) {
+ ASSERT_TRUE(reader.read(c));
+ res.append(&c, 1);
+ }
+
+ // The two strings must equal
+ ASSERT_EQ(testStr, res);
+
+ // We must now be at line 1, column 15
+ ASSERT_EQ(1U, reader.getLine());
+ ASSERT_EQ(testStr.size() + 1, reader.getColumn());
+
+ // If we call either read or peek, false is returned
+ ASSERT_FALSE(reader.read(c));
+ ASSERT_FALSE(reader.peek(c));
+}
+
+TEST(CharReaderTest, simplePeekTest)
+{
+ std::string testStr{"this is a test"};
+ char c;
+
+ // Feed a test string into the reader
+ CharReader reader{testStr};
+
+ // Try to read the test string
+ std::string res;
+ while (reader.peek(c)) {
+ res.append(&c, 1);
+ }
+
+ // Peeking does not trigger the "atEnd" flag
+ ASSERT_FALSE(reader.atEnd());
+
+ // The two strings must equal
+ ASSERT_EQ(testStr, res);
+
+ // We must now be at line 1, column 1 and NOT at the end of the stream
+ ASSERT_EQ(1U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+ ASSERT_FALSE(reader.atEnd());
+
+ // If we consume the peek, we must be at line 1, column 15 and we should be
+ // at the end of the stream
+ reader.consumePeek();
+ ASSERT_EQ(1U, reader.getLine());
+ ASSERT_EQ(testStr.size() + 1, reader.getColumn());
+ ASSERT_TRUE(reader.atEnd());
+
+ // If we call either read or peek, false is returned
+ ASSERT_FALSE(reader.read(c));
+ ASSERT_FALSE(reader.peek(c));
+}
+
+TEST(CharReaderTest, rowColumnCounterTest)
+{
+ // Feed a test string into the reader
+ CharReader reader{"1\n\r2\n3\r\n\n4"};
+
+ // We should currently be in line 1, column 1
+ ASSERT_EQ(1U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+
+ // Read two characters
+ char c;
+ for (int i = 0; i < 2; i++)
+ reader.read(c);
+ ASSERT_EQ(2U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+
+ // Read two characters
+ for (int i = 0; i < 2; i++)
+ reader.read(c);
+ ASSERT_EQ(3U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+
+ // Read three characters
+ for (int i = 0; i < 3; i++)
+ reader.read(c);
+ ASSERT_EQ(5U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+}
+
+TEST(CharReaderTest, rowColumnCounterTestOffs)
+{
+ // Feed a test string into the reader
+ CharReader reader{"1\n\r2\n3\r\n\n4", 4, 10};
+
+ // We should currently be in line 1, column 1
+ ASSERT_EQ(4U, reader.getLine());
+ ASSERT_EQ(10U, reader.getColumn());
+
+ // Read two characters
+ char c;
+ for (int i = 0; i < 2; i++)
+ reader.read(c);
+ ASSERT_EQ(5U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+
+ // Read two characters
+ for (int i = 0; i < 2; i++)
+ reader.read(c);
+ ASSERT_EQ(6U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+
+ // Read three characters
+ for (int i = 0; i < 3; i++)
+ reader.read(c);
+ ASSERT_EQ(8U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+}
+
+TEST(CharReaderTest, linebreakSubstitutionTest)
+{
+ // Feed a test string into the reader and read all characters back
+ CharReader reader{"this\n\ris\n\rjust\na test\r\n\rtest\n\r"};
+ std::string res;
+ char c;
+ while (reader.read(c)) {
+ res.append(&c, 1);
+ }
+
+ // Test for equality
+ ASSERT_EQ("this\nis\njust\na test\n\ntest\n", res);
+}
+
+TEST(CharReaderTest, rowColumnCounterUTF8Test)
+{
+ // Feed a test string with some umlauts into the reader
+ CharReader reader{"\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f"};
+
+ // Read all bytes
+ char c;
+ while (reader.read(c)) {
+ // Do nothing
+ }
+
+ // The sequence above equals 5 UTF-8 characters (so after reading all the
+ // cursor is at position 6)
+ ASSERT_EQ(1U, reader.getLine());
+ ASSERT_EQ(6U, reader.getColumn());
+}
+
+TEST(CharReaderTest, streamTest)
+{
+ // Copy the test data to a string stream
+ std::stringstream ss;
+ std::copy(DATA.begin(), DATA.end(), std::ostream_iterator<char>(ss));
+
+ // Read the data back from the stream
+ std::vector<char> res;
+ char c;
+ CharReader reader{ss};
+ while (reader.read(c)) {
+ res.push_back(c);
+ }
+ ASSERT_EQ(DATA_LENGTH, res.size());
+ ASSERT_EQ(DATA, res);
+}
}
}
diff --git a/test/core/variant/ReaderTest.cpp b/test/core/variant/ReaderTest.cpp
index 3054a14..595bb57 100644
--- a/test/core/variant/ReaderTest.cpp
+++ b/test/core/variant/ReaderTest.cpp
@@ -97,54 +97,56 @@ TEST(Reader, parseUnescapedString)
}
}
+static const std::unordered_set<char> noDelim;
+
TEST(Reader, parseInteger)
{
// Valid integers
{
BufferedCharReader reader("0 ");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(0, res.second);
}
{
BufferedCharReader reader("42 ");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(42, res.second);
}
{
BufferedCharReader reader("-42");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(-42, res.second);
}
{
BufferedCharReader reader(" -0x4A2 ");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(-0x4A2, res.second);
}
{
BufferedCharReader reader(" 0Xaffe");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(0xAFFE, res.second);
}
{
BufferedCharReader reader("0x7FFFFFFFFFFFFFFF");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(0x7FFFFFFFFFFFFFFFL, res.second);
}
{
BufferedCharReader reader("-0x7FFFFFFFFFFFFFFF");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(-0x7FFFFFFFFFFFFFFFL, res.second);
}
@@ -152,25 +154,25 @@ TEST(Reader, parseInteger)
// Invalid integers
{
BufferedCharReader reader("-");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_FALSE(res.first);
}
{
BufferedCharReader reader("0a");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_FALSE(res.first);
}
{
BufferedCharReader reader("-0xag");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_FALSE(res.first);
}
{
BufferedCharReader reader("0x8000000000000000");
- auto res = Reader::parseInteger(reader, logger, {});
+ auto res = Reader::parseInteger(reader, logger, noDelim);
ASSERT_FALSE(res.first);
}
}
@@ -180,42 +182,42 @@ TEST(Reader, parseDouble)
// Valid doubles
{
BufferedCharReader reader("1.25");
- auto res = Reader::parseDouble(reader, logger, {});
+ auto res = Reader::parseDouble(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(1.25, res.second);
}
{
BufferedCharReader reader(".25");
- auto res = Reader::parseDouble(reader, logger, {});
+ auto res = Reader::parseDouble(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(.25, res.second);
}
{
BufferedCharReader reader(".25e1");
- auto res = Reader::parseDouble(reader, logger, {});
+ auto res = Reader::parseDouble(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(2.5, res.second);
}
{
BufferedCharReader reader("-2.5e-1");
- auto res = Reader::parseDouble(reader, logger, {});
+ auto res = Reader::parseDouble(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(-0.25, res.second);
}
{
BufferedCharReader reader("-50e-2");
- auto res = Reader::parseDouble(reader, logger, {});
+ auto res = Reader::parseDouble(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(-0.5, res.second);
}
{
BufferedCharReader reader("-1.");
- auto res = Reader::parseDouble(reader, logger, {});
+ auto res = Reader::parseDouble(reader, logger, noDelim);
ASSERT_TRUE(res.first);
ASSERT_EQ(-1., res.second);
}
@@ -230,13 +232,13 @@ TEST(Reader, parseDouble)
// Invalid doubles
{
BufferedCharReader reader(".e1");
- auto res = Reader::parseDouble(reader, logger, {});
+ auto res = Reader::parseDouble(reader, logger, noDelim);
ASSERT_FALSE(res.first);
}
{
BufferedCharReader reader("0e100000");
- auto res = Reader::parseDouble(reader, logger, {});
+ auto res = Reader::parseDouble(reader, logger, noDelim);
ASSERT_FALSE(res.first);
}
}