summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2014-12-10 02:41:16 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2014-12-10 02:41:16 +0100
commitd84efdc312a9c5d0b6757c826810809a8e78f1e2 (patch)
tree70ca1967749663bf74878b9a4b066168c885760f
parentba7a02e499a2dc1793e902643e1537adafef38ee (diff)
copied old BufferedCharReader tests to CharReaderTest, fixed some bugs and optimized CharReader code
-rw-r--r--src/core/utils/CharReader.cpp72
-rw-r--r--src/core/utils/CharReader.hpp22
-rw-r--r--test/core/utils/CharReaderTest.cpp217
3 files changed, 276 insertions, 35 deletions
diff --git a/src/core/utils/CharReader.cpp b/src/core/utils/CharReader.cpp
index 61bbd64..12d0043 100644
--- a/src/core/utils/CharReader.cpp
+++ b/src/core/utils/CharReader.cpp
@@ -41,7 +41,7 @@ namespace utils {
*/
static size_t istreamReadCallback(char *buf, size_t size, void *userData)
{
- return (static_cast<std::istream*>(userData))->read(buf, size).gcount();
+ return (static_cast<std::istream *>(userData))->read(buf, size).gcount();
}
/* Class Buffer */
@@ -371,24 +371,22 @@ void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer,
/* CharReader class */
-CharReader::CharReader(std::shared_ptr<Buffer> buffer)
+CharReader::CharReader(std::shared_ptr<Buffer> buffer, size_t line,
+ size_t column)
: buffer(buffer),
- readCursor(buffer->createCursor()),
- peekCursor(buffer->createCursor())
+ readCursor(buffer->createCursor(), line, column),
+ peekCursor(buffer->createCursor(), line, column),
+ coherent(true)
{
}
CharReader::CharReader(const std::string &str, size_t line, size_t column)
- : buffer(new Buffer{str}),
- readCursor(buffer->createCursor(), line, column),
- peekCursor(buffer->createCursor(), line, column)
+ : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, line, column)
{
}
CharReader::CharReader(std::istream &istream, size_t line, size_t column)
- : buffer(new Buffer{istream}),
- readCursor(buffer->createCursor(), line, column),
- peekCursor(buffer->createCursor(), line, column)
+ : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, line, column)
{
}
@@ -467,13 +465,51 @@ bool CharReader::readAtCursor(Cursor &cursor, char &c)
}
}
-bool CharReader::peek(char &c) { return readAtCursor(peekCursor, c); }
+bool CharReader::peek(char &c)
+{
+ // If the reader was coherent, update the peek cursor state
+ if (coherent) {
+ peekCursor.assign(buffer, readCursor);
+ coherent = false;
+ }
-bool CharReader::read(char &c) { return readAtCursor(readCursor, c); }
+ // Read a character from the peek cursor
+ return readAtCursor(peekCursor, c);
+}
-void CharReader::resetPeek() { peekCursor.assign(buffer, readCursor); }
+bool CharReader::read(char &c)
+{
+ // Read a character from the buffer at the current read cursor
+ bool res = readAtCursor(readCursor, c);
+
+ // Set the peek position to the current read position, if reading was not
+ // coherent
+ if (!coherent) {
+ peekCursor.assign(buffer, readCursor);
+ coherent = true;
+ } else {
+ buffer->copyCursor(readCursor.cursor, peekCursor.cursor);
+ }
-void CharReader::consumePeek() { readCursor.assign(buffer, peekCursor); }
+ // Return the result of the read function
+ return res;
+}
+
+void CharReader::resetPeek()
+{
+ if (!coherent) {
+ peekCursor.assign(buffer, readCursor);
+ coherent = true;
+ }
+}
+
+void CharReader::consumePeek()
+{
+ if (!coherent) {
+ readCursor.assign(buffer, peekCursor);
+ coherent = true;
+ }
+}
bool CharReader::consumeWhitespace()
{
@@ -490,20 +526,22 @@ bool CharReader::consumeWhitespace()
CharReaderFork CharReader::fork()
{
- return CharReaderFork(buffer, readCursor, peekCursor);
+ return CharReaderFork(buffer, readCursor, peekCursor, coherent);
}
/* Class CharReaderFork */
CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer,
CharReader::Cursor &parentReadCursor,
- CharReader::Cursor &parentPeekCursor)
- : CharReader(buffer),
+ CharReader::Cursor &parentPeekCursor,
+ bool coherent)
+ : CharReader(buffer, 1, 1),
parentReadCursor(parentReadCursor),
parentPeekCursor(parentPeekCursor)
{
readCursor.assign(buffer, parentReadCursor);
peekCursor.assign(buffer, parentPeekCursor);
+ this->coherent = coherent;
}
void CharReaderFork::commit()
diff --git a/src/core/utils/CharReader.hpp b/src/core/utils/CharReader.hpp
index a48f5ad..3d4c894 100644
--- a/src/core/utils/CharReader.hpp
+++ b/src/core/utils/CharReader.hpp
@@ -377,7 +377,7 @@ protected:
*
* @param cursor is the underlying cursor in the Buffer instance.
*/
- Cursor(Buffer::CursorId cursor, size_t line = 1, size_t column = 1)
+ Cursor(Buffer::CursorId cursor, size_t line, size_t column)
: cursor(cursor),
line(line),
column(column),
@@ -433,13 +433,19 @@ protected:
Cursor peekCursor;
/**
+ * Set to true as long the underlying Buffer cursor is at the same position
+ * for the read and the peek cursor.
+ */
+ bool coherent;
+
+ /**
* Protected constructor of the CharReader base class. Creates new read
* and peek cursors for the given buffer.
*
* @param buffer is a reference to the underlying Buffer class responsible
* for allowing to read from a single input stream from multiple locations.
*/
- CharReader(std::shared_ptr<Buffer> buffer);
+ CharReader(std::shared_ptr<Buffer> buffer, size_t line, size_t column);
public:
/**
@@ -538,14 +544,14 @@ public:
*
* @return the current line number.
*/
- int getLine() const { return readCursor.line; }
+ size_t getLine() const { return readCursor.line; }
/**
* Returns the current column (starting with one).
*
* @return the current column number.
*/
- int getColumn() const { return readCursor.column; }
+ size_t getColumn() const { return readCursor.column; }
};
/**
@@ -570,10 +576,16 @@ private:
/**
* Constructor of the CharReaderFork class.
+ *
+ * @param buffer is a reference at the parent Buffer instance.
+ * @param parentPeekCursor is a reference at the parent read cursor.
+ * @param parentPeekCursor is a reference at the parent peek cursor.
+ * @param coherent specifies whether the char reader cursors are initialized
+ * coherently.
*/
CharReaderFork(std::shared_ptr<Buffer> buffer,
CharReader::Cursor &parentReadCursor,
- CharReader::Cursor &parentPeekCursor);
+ CharReader::Cursor &parentPeekCursor, bool coherent);
public:
/**
diff --git a/test/core/utils/CharReaderTest.cpp b/test/core/utils/CharReaderTest.cpp
index c9a1f64..9b700ad 100644
--- a/test/core/utils/CharReaderTest.cpp
+++ b/test/core/utils/CharReaderTest.cpp
@@ -27,6 +27,8 @@
namespace ousia {
namespace utils {
+/* Buffer Test */
+
TEST(Buffer, simpleRead)
{
std::string testStr{"this is a test"};
@@ -42,7 +44,7 @@ TEST(Buffer, simpleRead)
ASSERT_FALSE(buf.atEnd(cursor));
// The cursor must be at zero
- ASSERT_EQ(0, buf.offset(cursor));
+ ASSERT_EQ(0U, buf.offset(cursor));
// Try to read the test string
std::string res;
@@ -57,7 +59,7 @@ TEST(Buffer, simpleRead)
ASSERT_EQ(testStr.size(), buf.offset(cursor));
// The two strings must equal
- ASSERT_STREQ(testStr.c_str(), res.c_str());
+ ASSERT_EQ(testStr, res);
}
TEST(Buffer, cursorManagement)
@@ -68,13 +70,13 @@ TEST(Buffer, cursorManagement)
Buffer::CursorId c2 = buf.createCursor();
Buffer::CursorId c3 = buf.createCursor();
- ASSERT_EQ(0, c1);
- ASSERT_EQ(1, c2);
- ASSERT_EQ(2, c3);
+ ASSERT_EQ(0U, c1);
+ ASSERT_EQ(1U, c2);
+ ASSERT_EQ(2U, c3);
buf.deleteCursor(c2);
Buffer::CursorId c4 = buf.createCursor();
- ASSERT_EQ(1, c4);
+ ASSERT_EQ(1U, c4);
}
TEST(Buffer, twoCursors)
@@ -235,10 +237,20 @@ static std::vector<char> generateData(size_t len)
uint32_t v = 0xF3A99148;
std::vector<char> res;
for (size_t i = 0; i < len; i++) {
- v = v ^ (v >> B1);
- v = v ^ (v << B2);
- v = v ^ (v >> B3);
- res.push_back(v & 0xFF);
+ while (true) {
+ // Advance the random seed
+ v = v ^ (v >> B1);
+ v = v ^ (v << B2);
+ v = v ^ (v >> B3);
+
+ // Replace \n and \r in order to avoid line break processing by the
+ // CharReader
+ char c = v & 0xFF;
+ if (c != '\n' && c != '\r') {
+ res.push_back(c);
+ break;
+ }
+ }
}
return res;
}
@@ -306,7 +318,7 @@ TEST(Buffer, streamTwoCursors)
ASSERT_TRUE(buf.atEnd(cur1));
ASSERT_FALSE(buf.atEnd(cur2));
ASSERT_EQ(DATA_LENGTH, buf.offset(cur1));
- ASSERT_EQ(0, buf.offset(cur2));
+ ASSERT_EQ(0U, buf.offset(cur2));
std::vector<char> res2;
while (buf.read(cur2, c)) {
@@ -323,7 +335,7 @@ TEST(Buffer, streamTwoCursors)
ASSERT_EQ(DATA, res2);
}
-TEST(Buffer, streamTwoCursorsInterleaved)
+TEST(Buffer, streamTwoCursorsMovingInterleaved)
{
VectorReadState state(DATA);
@@ -355,6 +367,13 @@ TEST(Buffer, streamTwoCursorsInterleaved)
res2.push_back(c);
}
}
+
+ // Move cur1 60 bytes forward and backward
+ buf.moveCursor(cur1, -buf.moveCursor(cur1, 60));
+
+ // Make sure the cursor position is correct
+ ASSERT_EQ(res1.size(), buf.offset(cur1));
+ ASSERT_EQ(res2.size(), buf.offset(cur2));
}
ASSERT_EQ(DATA_LENGTH, buf.offset(cur1));
@@ -375,7 +394,8 @@ TEST(Buffer, streamMoveForward)
Buffer buf{readFromVector, &state};
Buffer::CursorId cursor = buf.createCursor();
- ASSERT_EQ(DATA_LENGTH - 100, buf.moveCursor(cursor, DATA_LENGTH - 100));
+ ASSERT_EQ(ssize_t(DATA_LENGTH) - 100,
+ buf.moveCursor(cursor, DATA_LENGTH - 100));
char c;
std::vector<char> res;
@@ -385,6 +405,177 @@ TEST(Buffer, streamMoveForward)
ASSERT_EQ(partialData, res);
}
+/* CharReader Test */
+
+TEST(CharReaderTest, simpleReadTest)
+{
+ std::string testStr{"this is a test"};
+ char c;
+
+ // Feed a test string into the reader
+ CharReader reader{testStr};
+
+ // Try to read the test string
+ std::string res;
+ while (!reader.atEnd()) {
+ ASSERT_TRUE(reader.read(c));
+ res.append(&c, 1);
+ }
+
+ // The two strings must equal
+ ASSERT_EQ(testStr, res);
+
+ // We must now be at line 1, column 15
+ ASSERT_EQ(1U, reader.getLine());
+ ASSERT_EQ(testStr.size() + 1, reader.getColumn());
+
+ // If we call either read or peek, false is returned
+ ASSERT_FALSE(reader.read(c));
+ ASSERT_FALSE(reader.peek(c));
+}
+
+TEST(CharReaderTest, simplePeekTest)
+{
+ std::string testStr{"this is a test"};
+ char c;
+
+ // Feed a test string into the reader
+ CharReader reader{testStr};
+
+ // Try to read the test string
+ std::string res;
+ while (reader.peek(c)) {
+ res.append(&c, 1);
+ }
+
+ // Peeking does not trigger the "atEnd" flag
+ ASSERT_FALSE(reader.atEnd());
+
+ // The two strings must equal
+ ASSERT_EQ(testStr, res);
+
+ // We must now be at line 1, column 1 and NOT at the end of the stream
+ ASSERT_EQ(1U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+ ASSERT_FALSE(reader.atEnd());
+
+ // If we consume the peek, we must be at line 1, column 15 and we should be
+ // at the end of the stream
+ reader.consumePeek();
+ ASSERT_EQ(1U, reader.getLine());
+ ASSERT_EQ(testStr.size() + 1, reader.getColumn());
+ ASSERT_TRUE(reader.atEnd());
+
+ // If we call either read or peek, false is returned
+ ASSERT_FALSE(reader.read(c));
+ ASSERT_FALSE(reader.peek(c));
+}
+
+TEST(CharReaderTest, rowColumnCounterTest)
+{
+ // Feed a test string into the reader
+ CharReader reader{"1\n\r2\n3\r\n\n4"};
+
+ // We should currently be in line 1, column 1
+ ASSERT_EQ(1U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+
+ // Read two characters
+ char c;
+ for (int i = 0; i < 2; i++)
+ reader.read(c);
+ ASSERT_EQ(2U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+
+ // Read two characters
+ for (int i = 0; i < 2; i++)
+ reader.read(c);
+ ASSERT_EQ(3U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+
+ // Read three characters
+ for (int i = 0; i < 3; i++)
+ reader.read(c);
+ ASSERT_EQ(5U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+}
+
+TEST(CharReaderTest, rowColumnCounterTestOffs)
+{
+ // Feed a test string into the reader
+ CharReader reader{"1\n\r2\n3\r\n\n4", 4, 10};
+
+ // We should currently be in line 1, column 1
+ ASSERT_EQ(4U, reader.getLine());
+ ASSERT_EQ(10U, reader.getColumn());
+
+ // Read two characters
+ char c;
+ for (int i = 0; i < 2; i++)
+ reader.read(c);
+ ASSERT_EQ(5U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+
+ // Read two characters
+ for (int i = 0; i < 2; i++)
+ reader.read(c);
+ ASSERT_EQ(6U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+
+ // Read three characters
+ for (int i = 0; i < 3; i++)
+ reader.read(c);
+ ASSERT_EQ(8U, reader.getLine());
+ ASSERT_EQ(1U, reader.getColumn());
+}
+
+TEST(CharReaderTest, linebreakSubstitutionTest)
+{
+ // Feed a test string into the reader and read all characters back
+ CharReader reader{"this\n\ris\n\rjust\na test\r\n\rtest\n\r"};
+ std::string res;
+ char c;
+ while (reader.read(c)) {
+ res.append(&c, 1);
+ }
+
+ // Test for equality
+ ASSERT_EQ("this\nis\njust\na test\n\ntest\n", res);
+}
+
+TEST(CharReaderTest, rowColumnCounterUTF8Test)
+{
+ // Feed a test string with some umlauts into the reader
+ CharReader reader{"\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f"};
+
+ // Read all bytes
+ char c;
+ while (reader.read(c)) {
+ // Do nothing
+ }
+
+ // The sequence above equals 5 UTF-8 characters (so after reading all the
+ // cursor is at position 6)
+ ASSERT_EQ(1U, reader.getLine());
+ ASSERT_EQ(6U, reader.getColumn());
+}
+
+TEST(CharReaderTest, streamTest)
+{
+ // Copy the test data to a string stream
+ std::stringstream ss;
+ std::copy(DATA.begin(), DATA.end(), std::ostream_iterator<char>(ss));
+
+ // Read the data back from the stream
+ std::vector<char> res;
+ char c;
+ CharReader reader{ss};
+ while (reader.read(c)) {
+ res.push_back(c);
+ }
+ ASSERT_EQ(DATA_LENGTH, res.size());
+ ASSERT_EQ(DATA, res);
+}
}
}