diff options
| -rw-r--r-- | src/core/utils/CharReader.cpp | 72 | ||||
| -rw-r--r-- | src/core/utils/CharReader.hpp | 22 | ||||
| -rw-r--r-- | test/core/utils/CharReaderTest.cpp | 217 | 
3 files changed, 276 insertions, 35 deletions
diff --git a/src/core/utils/CharReader.cpp b/src/core/utils/CharReader.cpp index 61bbd64..12d0043 100644 --- a/src/core/utils/CharReader.cpp +++ b/src/core/utils/CharReader.cpp @@ -41,7 +41,7 @@ namespace utils {   */  static size_t istreamReadCallback(char *buf, size_t size, void *userData)  { -	return (static_cast<std::istream*>(userData))->read(buf, size).gcount(); +	return (static_cast<std::istream *>(userData))->read(buf, size).gcount();  }  /* Class Buffer */ @@ -371,24 +371,22 @@ void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer,  /* CharReader class */ -CharReader::CharReader(std::shared_ptr<Buffer> buffer) +CharReader::CharReader(std::shared_ptr<Buffer> buffer, size_t line, +                       size_t column)      : buffer(buffer), -      readCursor(buffer->createCursor()), -      peekCursor(buffer->createCursor()) +      readCursor(buffer->createCursor(), line, column), +      peekCursor(buffer->createCursor(), line, column), +      coherent(true)  {  }  CharReader::CharReader(const std::string &str, size_t line, size_t column) -    : buffer(new Buffer{str}), -      readCursor(buffer->createCursor(), line, column), -      peekCursor(buffer->createCursor(), line, column) +    : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, line, column)  {  }  CharReader::CharReader(std::istream &istream, size_t line, size_t column) -    : buffer(new Buffer{istream}), -      readCursor(buffer->createCursor(), line, column), -      peekCursor(buffer->createCursor(), line, column) +    : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, line, column)  {  } @@ -467,13 +465,51 @@ bool CharReader::readAtCursor(Cursor &cursor, char &c)  	}  } -bool CharReader::peek(char &c) { return readAtCursor(peekCursor, c); } +bool CharReader::peek(char &c) +{ +	// If the reader was coherent, update the peek cursor state +	if (coherent) { +		peekCursor.assign(buffer, readCursor); +		coherent = false; +	} -bool CharReader::read(char &c) { return readAtCursor(readCursor, c); } +	// Read a character from the peek cursor +	return readAtCursor(peekCursor, c); +} -void CharReader::resetPeek() { peekCursor.assign(buffer, readCursor); } +bool CharReader::read(char &c) +{ +	// Read a character from the buffer at the current read cursor +	bool res = readAtCursor(readCursor, c); + +	// Set the peek position to the current read position, if reading was not +	// coherent +	if (!coherent) { +		peekCursor.assign(buffer, readCursor); +		coherent = true; +	} else { +		buffer->copyCursor(readCursor.cursor, peekCursor.cursor); +	} -void CharReader::consumePeek() { readCursor.assign(buffer, peekCursor); } +	// Return the result of the read function +	return res; +} + +void CharReader::resetPeek() +{ +	if (!coherent) { +		peekCursor.assign(buffer, readCursor); +		coherent = true; +	} +} + +void CharReader::consumePeek() +{ +	if (!coherent) { +		readCursor.assign(buffer, peekCursor); +		coherent = true; +	} +}  bool CharReader::consumeWhitespace()  { @@ -490,20 +526,22 @@ bool CharReader::consumeWhitespace()  CharReaderFork CharReader::fork()  { -	return CharReaderFork(buffer, readCursor, peekCursor); +	return CharReaderFork(buffer, readCursor, peekCursor, coherent);  }  /* Class CharReaderFork */  CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer,                                 CharReader::Cursor &parentReadCursor, -                               CharReader::Cursor &parentPeekCursor) -    : CharReader(buffer), +                               CharReader::Cursor &parentPeekCursor, +                               bool coherent) +    : CharReader(buffer, 1, 1),        parentReadCursor(parentReadCursor),        parentPeekCursor(parentPeekCursor)  {  	readCursor.assign(buffer, parentReadCursor);  	peekCursor.assign(buffer, parentPeekCursor); +	this->coherent = coherent;  }  void CharReaderFork::commit() diff --git a/src/core/utils/CharReader.hpp b/src/core/utils/CharReader.hpp index a48f5ad..3d4c894 100644 --- a/src/core/utils/CharReader.hpp +++ b/src/core/utils/CharReader.hpp @@ -377,7 +377,7 @@ protected:  		 *  		 * @param cursor is the underlying cursor in the Buffer instance.  		 */ -		Cursor(Buffer::CursorId cursor, size_t line = 1, size_t column = 1) +		Cursor(Buffer::CursorId cursor, size_t line, size_t column)  		    : cursor(cursor),  		      line(line),  		      column(column), @@ -433,13 +433,19 @@ protected:  	Cursor peekCursor;  	/** +	 * Set to true as long the underlying Buffer cursor is at the same position +	 * for the read and the peek cursor. +	 */ +	bool coherent; + +	/**  	 * Protected constructor of the CharReader base class. Creates new read  	 * and peek cursors for the given buffer.  	 *  	 * @param buffer is a reference to the underlying Buffer class responsible  	 * for allowing to read from a single input stream from multiple locations.  	 */ -	CharReader(std::shared_ptr<Buffer> buffer); +	CharReader(std::shared_ptr<Buffer> buffer, size_t line, size_t column);  public:  	/** @@ -538,14 +544,14 @@ public:  	 *  	 * @return the current line number.  	 */ -	int getLine() const { return readCursor.line; } +	size_t getLine() const { return readCursor.line; }  	/**  	 * Returns the current column (starting with one).  	 *  	 * @return the current column number.  	 */ -	int getColumn() const { return readCursor.column; } +	size_t getColumn() const { return readCursor.column; }  };  /** @@ -570,10 +576,16 @@ private:  	/**  	 * Constructor of the CharReaderFork class. +	 * +	 * @param buffer is a reference at the parent Buffer instance. +	 * @param parentPeekCursor is a reference at the parent read cursor. +	 * @param parentPeekCursor is a reference at the parent peek cursor. +	 * @param coherent specifies whether the char reader cursors are initialized +	 * coherently.  	 */  	CharReaderFork(std::shared_ptr<Buffer> buffer,  	               CharReader::Cursor &parentReadCursor, -	               CharReader::Cursor &parentPeekCursor); +	               CharReader::Cursor &parentPeekCursor, bool coherent);  public:  	/** diff --git a/test/core/utils/CharReaderTest.cpp b/test/core/utils/CharReaderTest.cpp index c9a1f64..9b700ad 100644 --- a/test/core/utils/CharReaderTest.cpp +++ b/test/core/utils/CharReaderTest.cpp @@ -27,6 +27,8 @@  namespace ousia {  namespace utils { +/* Buffer Test */ +  TEST(Buffer, simpleRead)  {  	std::string testStr{"this is a test"}; @@ -42,7 +44,7 @@ TEST(Buffer, simpleRead)  	ASSERT_FALSE(buf.atEnd(cursor));  	// The cursor must be at zero -	ASSERT_EQ(0, buf.offset(cursor)); +	ASSERT_EQ(0U, buf.offset(cursor));  	// Try to read the test string  	std::string res; @@ -57,7 +59,7 @@ TEST(Buffer, simpleRead)  	ASSERT_EQ(testStr.size(), buf.offset(cursor));  	// The two strings must equal -	ASSERT_STREQ(testStr.c_str(), res.c_str()); +	ASSERT_EQ(testStr, res);  }  TEST(Buffer, cursorManagement) @@ -68,13 +70,13 @@ TEST(Buffer, cursorManagement)  	Buffer::CursorId c2 = buf.createCursor();  	Buffer::CursorId c3 = buf.createCursor(); -	ASSERT_EQ(0, c1); -	ASSERT_EQ(1, c2); -	ASSERT_EQ(2, c3); +	ASSERT_EQ(0U, c1); +	ASSERT_EQ(1U, c2); +	ASSERT_EQ(2U, c3);  	buf.deleteCursor(c2);  	Buffer::CursorId c4 = buf.createCursor(); -	ASSERT_EQ(1, c4); +	ASSERT_EQ(1U, c4);  }  TEST(Buffer, twoCursors) @@ -235,10 +237,20 @@ static std::vector<char> generateData(size_t len)  	uint32_t v = 0xF3A99148;  	std::vector<char> res;  	for (size_t i = 0; i < len; i++) { -		v = v ^ (v >> B1); -		v = v ^ (v << B2); -		v = v ^ (v >> B3); -		res.push_back(v & 0xFF); +		while (true) { +			// Advance the random seed +			v = v ^ (v >> B1); +			v = v ^ (v << B2); +			v = v ^ (v >> B3); + +			// Replace \n and \r in order to avoid line break processing by the +			// CharReader +			char c = v & 0xFF; +			if (c != '\n' && c != '\r') { +				res.push_back(c); +				break; +			} +		}  	}  	return res;  } @@ -306,7 +318,7 @@ TEST(Buffer, streamTwoCursors)  	ASSERT_TRUE(buf.atEnd(cur1));  	ASSERT_FALSE(buf.atEnd(cur2));  	ASSERT_EQ(DATA_LENGTH, buf.offset(cur1)); -	ASSERT_EQ(0, buf.offset(cur2)); +	ASSERT_EQ(0U, buf.offset(cur2));  	std::vector<char> res2;  	while (buf.read(cur2, c)) { @@ -323,7 +335,7 @@ TEST(Buffer, streamTwoCursors)  	ASSERT_EQ(DATA, res2);  } -TEST(Buffer, streamTwoCursorsInterleaved) +TEST(Buffer, streamTwoCursorsMovingInterleaved)  {  	VectorReadState state(DATA); @@ -355,6 +367,13 @@ TEST(Buffer, streamTwoCursorsInterleaved)  				res2.push_back(c);  			}  		} + +		// Move cur1 60 bytes forward and backward +		buf.moveCursor(cur1, -buf.moveCursor(cur1, 60)); + +		// Make sure the cursor position is correct +		ASSERT_EQ(res1.size(), buf.offset(cur1)); +		ASSERT_EQ(res2.size(), buf.offset(cur2));  	}  	ASSERT_EQ(DATA_LENGTH, buf.offset(cur1)); @@ -375,7 +394,8 @@ TEST(Buffer, streamMoveForward)  	Buffer buf{readFromVector, &state};  	Buffer::CursorId cursor = buf.createCursor(); -	ASSERT_EQ(DATA_LENGTH - 100, buf.moveCursor(cursor, DATA_LENGTH - 100)); +	ASSERT_EQ(ssize_t(DATA_LENGTH) - 100, +	          buf.moveCursor(cursor, DATA_LENGTH - 100));  	char c;  	std::vector<char> res; @@ -385,6 +405,177 @@ TEST(Buffer, streamMoveForward)  	ASSERT_EQ(partialData, res);  } +/* CharReader Test */ + +TEST(CharReaderTest, simpleReadTest) +{ +	std::string testStr{"this is a test"}; +	char c; + +	// Feed a test string into the reader +	CharReader reader{testStr}; + +	// Try to read the test string +	std::string res; +	while (!reader.atEnd()) { +		ASSERT_TRUE(reader.read(c)); +		res.append(&c, 1); +	} + +	// The two strings must equal +	ASSERT_EQ(testStr, res); + +	// We must now be at line 1, column 15 +	ASSERT_EQ(1U, reader.getLine()); +	ASSERT_EQ(testStr.size() + 1, reader.getColumn()); + +	// If we call either read or peek, false is returned +	ASSERT_FALSE(reader.read(c)); +	ASSERT_FALSE(reader.peek(c)); +} + +TEST(CharReaderTest, simplePeekTest) +{ +	std::string testStr{"this is a test"}; +	char c; + +	// Feed a test string into the reader +	CharReader reader{testStr}; + +	// Try to read the test string +	std::string res; +	while (reader.peek(c)) { +		res.append(&c, 1); +	} + +	// Peeking does not trigger the "atEnd" flag +	ASSERT_FALSE(reader.atEnd()); + +	// The two strings must equal +	ASSERT_EQ(testStr, res); + +	// We must now be at line 1, column 1 and NOT at the end of the stream +	ASSERT_EQ(1U, reader.getLine()); +	ASSERT_EQ(1U, reader.getColumn()); +	ASSERT_FALSE(reader.atEnd()); + +	// If we consume the peek, we must be at line 1, column 15 and we should be +	// at the end of the stream +	reader.consumePeek(); +	ASSERT_EQ(1U, reader.getLine()); +	ASSERT_EQ(testStr.size() + 1, reader.getColumn()); +	ASSERT_TRUE(reader.atEnd()); + +	// If we call either read or peek, false is returned +	ASSERT_FALSE(reader.read(c)); +	ASSERT_FALSE(reader.peek(c)); +} + +TEST(CharReaderTest, rowColumnCounterTest) +{ +	// Feed a test string into the reader +	CharReader reader{"1\n\r2\n3\r\n\n4"}; + +	// We should currently be in line 1, column 1 +	ASSERT_EQ(1U, reader.getLine()); +	ASSERT_EQ(1U, reader.getColumn()); + +	// Read two characters +	char c; +	for (int i = 0; i < 2; i++) +		reader.read(c); +	ASSERT_EQ(2U, reader.getLine()); +	ASSERT_EQ(1U, reader.getColumn()); + +	// Read two characters +	for (int i = 0; i < 2; i++) +		reader.read(c); +	ASSERT_EQ(3U, reader.getLine()); +	ASSERT_EQ(1U, reader.getColumn()); + +	// Read three characters +	for (int i = 0; i < 3; i++) +		reader.read(c); +	ASSERT_EQ(5U, reader.getLine()); +	ASSERT_EQ(1U, reader.getColumn()); +} + +TEST(CharReaderTest, rowColumnCounterTestOffs) +{ +	// Feed a test string into the reader +	CharReader reader{"1\n\r2\n3\r\n\n4", 4, 10}; + +	// We should currently be in line 1, column 1 +	ASSERT_EQ(4U, reader.getLine()); +	ASSERT_EQ(10U, reader.getColumn()); + +	// Read two characters +	char c; +	for (int i = 0; i < 2; i++) +		reader.read(c); +	ASSERT_EQ(5U, reader.getLine()); +	ASSERT_EQ(1U, reader.getColumn()); + +	// Read two characters +	for (int i = 0; i < 2; i++) +		reader.read(c); +	ASSERT_EQ(6U, reader.getLine()); +	ASSERT_EQ(1U, reader.getColumn()); + +	// Read three characters +	for (int i = 0; i < 3; i++) +		reader.read(c); +	ASSERT_EQ(8U, reader.getLine()); +	ASSERT_EQ(1U, reader.getColumn()); +} + +TEST(CharReaderTest, linebreakSubstitutionTest) +{ +	// Feed a test string into the reader and read all characters back +	CharReader reader{"this\n\ris\n\rjust\na test\r\n\rtest\n\r"}; +	std::string res; +	char c; +	while (reader.read(c)) { +		res.append(&c, 1); +	} + +	// Test for equality +	ASSERT_EQ("this\nis\njust\na test\n\ntest\n", res); +} + +TEST(CharReaderTest, rowColumnCounterUTF8Test) +{ +	// Feed a test string with some umlauts into the reader +	CharReader reader{"\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f"}; + +	// Read all bytes +	char c; +	while (reader.read(c)) { +		// Do nothing +	} + +	// The sequence above equals 5 UTF-8 characters (so after reading all the +	// cursor is at position 6) +	ASSERT_EQ(1U, reader.getLine()); +	ASSERT_EQ(6U, reader.getColumn()); +} + +TEST(CharReaderTest, streamTest) +{ +	// Copy the test data to a string stream +	std::stringstream ss; +	std::copy(DATA.begin(), DATA.end(), std::ostream_iterator<char>(ss)); + +	// Read the data back from the stream +	std::vector<char> res; +	char c; +	CharReader reader{ss}; +	while (reader.read(c)) { +		res.push_back(c); +	} +	ASSERT_EQ(DATA_LENGTH, res.size()); +	ASSERT_EQ(DATA, res); +}  }  }  | 
