diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-10 02:41:16 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-10 02:41:16 +0100 |
commit | d84efdc312a9c5d0b6757c826810809a8e78f1e2 (patch) | |
tree | 70ca1967749663bf74878b9a4b066168c885760f | |
parent | ba7a02e499a2dc1793e902643e1537adafef38ee (diff) |
copied old BufferedCharReader tests to CharReaderTest, fixed some bugs and optimized CharReader code
-rw-r--r-- | src/core/utils/CharReader.cpp | 72 | ||||
-rw-r--r-- | src/core/utils/CharReader.hpp | 22 | ||||
-rw-r--r-- | test/core/utils/CharReaderTest.cpp | 217 |
3 files changed, 276 insertions, 35 deletions
diff --git a/src/core/utils/CharReader.cpp b/src/core/utils/CharReader.cpp index 61bbd64..12d0043 100644 --- a/src/core/utils/CharReader.cpp +++ b/src/core/utils/CharReader.cpp @@ -41,7 +41,7 @@ namespace utils { */ static size_t istreamReadCallback(char *buf, size_t size, void *userData) { - return (static_cast<std::istream*>(userData))->read(buf, size).gcount(); + return (static_cast<std::istream *>(userData))->read(buf, size).gcount(); } /* Class Buffer */ @@ -371,24 +371,22 @@ void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer, /* CharReader class */ -CharReader::CharReader(std::shared_ptr<Buffer> buffer) +CharReader::CharReader(std::shared_ptr<Buffer> buffer, size_t line, + size_t column) : buffer(buffer), - readCursor(buffer->createCursor()), - peekCursor(buffer->createCursor()) + readCursor(buffer->createCursor(), line, column), + peekCursor(buffer->createCursor(), line, column), + coherent(true) { } CharReader::CharReader(const std::string &str, size_t line, size_t column) - : buffer(new Buffer{str}), - readCursor(buffer->createCursor(), line, column), - peekCursor(buffer->createCursor(), line, column) + : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, line, column) { } CharReader::CharReader(std::istream &istream, size_t line, size_t column) - : buffer(new Buffer{istream}), - readCursor(buffer->createCursor(), line, column), - peekCursor(buffer->createCursor(), line, column) + : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, line, column) { } @@ -467,13 +465,51 @@ bool CharReader::readAtCursor(Cursor &cursor, char &c) } } -bool CharReader::peek(char &c) { return readAtCursor(peekCursor, c); } +bool CharReader::peek(char &c) +{ + // If the reader was coherent, update the peek cursor state + if (coherent) { + peekCursor.assign(buffer, readCursor); + coherent = false; + } -bool CharReader::read(char &c) { return readAtCursor(readCursor, c); } + // Read a character from the peek cursor + return readAtCursor(peekCursor, c); +} -void CharReader::resetPeek() { peekCursor.assign(buffer, readCursor); } +bool CharReader::read(char &c) +{ + // Read a character from the buffer at the current read cursor + bool res = readAtCursor(readCursor, c); + + // Set the peek position to the current read position, if reading was not + // coherent + if (!coherent) { + peekCursor.assign(buffer, readCursor); + coherent = true; + } else { + buffer->copyCursor(readCursor.cursor, peekCursor.cursor); + } -void CharReader::consumePeek() { readCursor.assign(buffer, peekCursor); } + // Return the result of the read function + return res; +} + +void CharReader::resetPeek() +{ + if (!coherent) { + peekCursor.assign(buffer, readCursor); + coherent = true; + } +} + +void CharReader::consumePeek() +{ + if (!coherent) { + readCursor.assign(buffer, peekCursor); + coherent = true; + } +} bool CharReader::consumeWhitespace() { @@ -490,20 +526,22 @@ bool CharReader::consumeWhitespace() CharReaderFork CharReader::fork() { - return CharReaderFork(buffer, readCursor, peekCursor); + return CharReaderFork(buffer, readCursor, peekCursor, coherent); } /* Class CharReaderFork */ CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer, CharReader::Cursor &parentReadCursor, - CharReader::Cursor &parentPeekCursor) - : CharReader(buffer), + CharReader::Cursor &parentPeekCursor, + bool coherent) + : CharReader(buffer, 1, 1), parentReadCursor(parentReadCursor), parentPeekCursor(parentPeekCursor) { readCursor.assign(buffer, parentReadCursor); peekCursor.assign(buffer, parentPeekCursor); + this->coherent = coherent; } void CharReaderFork::commit() diff --git a/src/core/utils/CharReader.hpp b/src/core/utils/CharReader.hpp index a48f5ad..3d4c894 100644 --- a/src/core/utils/CharReader.hpp +++ b/src/core/utils/CharReader.hpp @@ -377,7 +377,7 @@ protected: * * @param cursor is the underlying cursor in the Buffer instance. */ - Cursor(Buffer::CursorId cursor, size_t line = 1, size_t column = 1) + Cursor(Buffer::CursorId cursor, size_t line, size_t column) : cursor(cursor), line(line), column(column), @@ -433,13 +433,19 @@ protected: Cursor peekCursor; /** + * Set to true as long the underlying Buffer cursor is at the same position + * for the read and the peek cursor. + */ + bool coherent; + + /** * Protected constructor of the CharReader base class. Creates new read * and peek cursors for the given buffer. * * @param buffer is a reference to the underlying Buffer class responsible * for allowing to read from a single input stream from multiple locations. */ - CharReader(std::shared_ptr<Buffer> buffer); + CharReader(std::shared_ptr<Buffer> buffer, size_t line, size_t column); public: /** @@ -538,14 +544,14 @@ public: * * @return the current line number. */ - int getLine() const { return readCursor.line; } + size_t getLine() const { return readCursor.line; } /** * Returns the current column (starting with one). * * @return the current column number. */ - int getColumn() const { return readCursor.column; } + size_t getColumn() const { return readCursor.column; } }; /** @@ -570,10 +576,16 @@ private: /** * Constructor of the CharReaderFork class. + * + * @param buffer is a reference at the parent Buffer instance. + * @param parentPeekCursor is a reference at the parent read cursor. + * @param parentPeekCursor is a reference at the parent peek cursor. + * @param coherent specifies whether the char reader cursors are initialized + * coherently. */ CharReaderFork(std::shared_ptr<Buffer> buffer, CharReader::Cursor &parentReadCursor, - CharReader::Cursor &parentPeekCursor); + CharReader::Cursor &parentPeekCursor, bool coherent); public: /** diff --git a/test/core/utils/CharReaderTest.cpp b/test/core/utils/CharReaderTest.cpp index c9a1f64..9b700ad 100644 --- a/test/core/utils/CharReaderTest.cpp +++ b/test/core/utils/CharReaderTest.cpp @@ -27,6 +27,8 @@ namespace ousia { namespace utils { +/* Buffer Test */ + TEST(Buffer, simpleRead) { std::string testStr{"this is a test"}; @@ -42,7 +44,7 @@ TEST(Buffer, simpleRead) ASSERT_FALSE(buf.atEnd(cursor)); // The cursor must be at zero - ASSERT_EQ(0, buf.offset(cursor)); + ASSERT_EQ(0U, buf.offset(cursor)); // Try to read the test string std::string res; @@ -57,7 +59,7 @@ TEST(Buffer, simpleRead) ASSERT_EQ(testStr.size(), buf.offset(cursor)); // The two strings must equal - ASSERT_STREQ(testStr.c_str(), res.c_str()); + ASSERT_EQ(testStr, res); } TEST(Buffer, cursorManagement) @@ -68,13 +70,13 @@ TEST(Buffer, cursorManagement) Buffer::CursorId c2 = buf.createCursor(); Buffer::CursorId c3 = buf.createCursor(); - ASSERT_EQ(0, c1); - ASSERT_EQ(1, c2); - ASSERT_EQ(2, c3); + ASSERT_EQ(0U, c1); + ASSERT_EQ(1U, c2); + ASSERT_EQ(2U, c3); buf.deleteCursor(c2); Buffer::CursorId c4 = buf.createCursor(); - ASSERT_EQ(1, c4); + ASSERT_EQ(1U, c4); } TEST(Buffer, twoCursors) @@ -235,10 +237,20 @@ static std::vector<char> generateData(size_t len) uint32_t v = 0xF3A99148; std::vector<char> res; for (size_t i = 0; i < len; i++) { - v = v ^ (v >> B1); - v = v ^ (v << B2); - v = v ^ (v >> B3); - res.push_back(v & 0xFF); + while (true) { + // Advance the random seed + v = v ^ (v >> B1); + v = v ^ (v << B2); + v = v ^ (v >> B3); + + // Replace \n and \r in order to avoid line break processing by the + // CharReader + char c = v & 0xFF; + if (c != '\n' && c != '\r') { + res.push_back(c); + break; + } + } } return res; } @@ -306,7 +318,7 @@ TEST(Buffer, streamTwoCursors) ASSERT_TRUE(buf.atEnd(cur1)); ASSERT_FALSE(buf.atEnd(cur2)); ASSERT_EQ(DATA_LENGTH, buf.offset(cur1)); - ASSERT_EQ(0, buf.offset(cur2)); + ASSERT_EQ(0U, buf.offset(cur2)); std::vector<char> res2; while (buf.read(cur2, c)) { @@ -323,7 +335,7 @@ TEST(Buffer, streamTwoCursors) ASSERT_EQ(DATA, res2); } -TEST(Buffer, streamTwoCursorsInterleaved) +TEST(Buffer, streamTwoCursorsMovingInterleaved) { VectorReadState state(DATA); @@ -355,6 +367,13 @@ TEST(Buffer, streamTwoCursorsInterleaved) res2.push_back(c); } } + + // Move cur1 60 bytes forward and backward + buf.moveCursor(cur1, -buf.moveCursor(cur1, 60)); + + // Make sure the cursor position is correct + ASSERT_EQ(res1.size(), buf.offset(cur1)); + ASSERT_EQ(res2.size(), buf.offset(cur2)); } ASSERT_EQ(DATA_LENGTH, buf.offset(cur1)); @@ -375,7 +394,8 @@ TEST(Buffer, streamMoveForward) Buffer buf{readFromVector, &state}; Buffer::CursorId cursor = buf.createCursor(); - ASSERT_EQ(DATA_LENGTH - 100, buf.moveCursor(cursor, DATA_LENGTH - 100)); + ASSERT_EQ(ssize_t(DATA_LENGTH) - 100, + buf.moveCursor(cursor, DATA_LENGTH - 100)); char c; std::vector<char> res; @@ -385,6 +405,177 @@ TEST(Buffer, streamMoveForward) ASSERT_EQ(partialData, res); } +/* CharReader Test */ + +TEST(CharReaderTest, simpleReadTest) +{ + std::string testStr{"this is a test"}; + char c; + + // Feed a test string into the reader + CharReader reader{testStr}; + + // Try to read the test string + std::string res; + while (!reader.atEnd()) { + ASSERT_TRUE(reader.read(c)); + res.append(&c, 1); + } + + // The two strings must equal + ASSERT_EQ(testStr, res); + + // We must now be at line 1, column 15 + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(testStr.size() + 1, reader.getColumn()); + + // If we call either read or peek, false is returned + ASSERT_FALSE(reader.read(c)); + ASSERT_FALSE(reader.peek(c)); +} + +TEST(CharReaderTest, simplePeekTest) +{ + std::string testStr{"this is a test"}; + char c; + + // Feed a test string into the reader + CharReader reader{testStr}; + + // Try to read the test string + std::string res; + while (reader.peek(c)) { + res.append(&c, 1); + } + + // Peeking does not trigger the "atEnd" flag + ASSERT_FALSE(reader.atEnd()); + + // The two strings must equal + ASSERT_EQ(testStr, res); + + // We must now be at line 1, column 1 and NOT at the end of the stream + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + ASSERT_FALSE(reader.atEnd()); + + // If we consume the peek, we must be at line 1, column 15 and we should be + // at the end of the stream + reader.consumePeek(); + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(testStr.size() + 1, reader.getColumn()); + ASSERT_TRUE(reader.atEnd()); + + // If we call either read or peek, false is returned + ASSERT_FALSE(reader.read(c)); + ASSERT_FALSE(reader.peek(c)); +} + +TEST(CharReaderTest, rowColumnCounterTest) +{ + // Feed a test string into the reader + CharReader reader{"1\n\r2\n3\r\n\n4"}; + + // We should currently be in line 1, column 1 + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + + // Read two characters + char c; + for (int i = 0; i < 2; i++) + reader.read(c); + ASSERT_EQ(2U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + + // Read two characters + for (int i = 0; i < 2; i++) + reader.read(c); + ASSERT_EQ(3U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + + // Read three characters + for (int i = 0; i < 3; i++) + reader.read(c); + ASSERT_EQ(5U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); +} + +TEST(CharReaderTest, rowColumnCounterTestOffs) +{ + // Feed a test string into the reader + CharReader reader{"1\n\r2\n3\r\n\n4", 4, 10}; + + // We should currently be in line 1, column 1 + ASSERT_EQ(4U, reader.getLine()); + ASSERT_EQ(10U, reader.getColumn()); + + // Read two characters + char c; + for (int i = 0; i < 2; i++) + reader.read(c); + ASSERT_EQ(5U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + + // Read two characters + for (int i = 0; i < 2; i++) + reader.read(c); + ASSERT_EQ(6U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); + + // Read three characters + for (int i = 0; i < 3; i++) + reader.read(c); + ASSERT_EQ(8U, reader.getLine()); + ASSERT_EQ(1U, reader.getColumn()); +} + +TEST(CharReaderTest, linebreakSubstitutionTest) +{ + // Feed a test string into the reader and read all characters back + CharReader reader{"this\n\ris\n\rjust\na test\r\n\rtest\n\r"}; + std::string res; + char c; + while (reader.read(c)) { + res.append(&c, 1); + } + + // Test for equality + ASSERT_EQ("this\nis\njust\na test\n\ntest\n", res); +} + +TEST(CharReaderTest, rowColumnCounterUTF8Test) +{ + // Feed a test string with some umlauts into the reader + CharReader reader{"\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f"}; + + // Read all bytes + char c; + while (reader.read(c)) { + // Do nothing + } + + // The sequence above equals 5 UTF-8 characters (so after reading all the + // cursor is at position 6) + ASSERT_EQ(1U, reader.getLine()); + ASSERT_EQ(6U, reader.getColumn()); +} + +TEST(CharReaderTest, streamTest) +{ + // Copy the test data to a string stream + std::stringstream ss; + std::copy(DATA.begin(), DATA.end(), std::ostream_iterator<char>(ss)); + + // Read the data back from the stream + std::vector<char> res; + char c; + CharReader reader{ss}; + while (reader.read(c)) { + res.push_back(c); + } + ASSERT_EQ(DATA_LENGTH, res.size()); + ASSERT_EQ(DATA, res); +} } } |