summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
Diffstat (limited to 'src/core')
-rw-r--r--src/core/utils/CharReader.cpp206
-rw-r--r--src/core/utils/CharReader.hpp111
2 files changed, 226 insertions, 91 deletions
diff --git a/src/core/utils/CharReader.cpp b/src/core/utils/CharReader.cpp
index 12d0043..effc587 100644
--- a/src/core/utils/CharReader.cpp
+++ b/src/core/utils/CharReader.cpp
@@ -18,6 +18,7 @@
#include <algorithm>
#include <limits>
+#include <sstream>
#include <core/Utils.hpp>
@@ -326,7 +327,7 @@ bool Buffer::atEnd(Buffer::CursorId cursor) const
(c.bucket == endBucket && c.bucketOffs == endBucket->size());
}
-bool Buffer::read(Buffer::CursorId cursor, char &c)
+bool Buffer::fetchCharacter(CursorId cursor, char &c, bool incr)
{
Cursor &cur = cursors[cursor];
while (true) {
@@ -336,7 +337,9 @@ bool Buffer::read(Buffer::CursorId cursor, char &c)
// If there is still data in the current bucket, return this data
if (cur.bucketOffs < bucket.size()) {
c = bucket[cur.bucketOffs];
- cur.bucketOffs++;
+ if (incr) {
+ cur.bucketOffs++;
+ }
return true;
} else if (cur.bucket == endBucket) {
// Return false if the end of the stream has been reached, otherwise
@@ -354,6 +357,16 @@ bool Buffer::read(Buffer::CursorId cursor, char &c)
}
}
+bool Buffer::read(Buffer::CursorId cursor, char &c)
+{
+ return fetchCharacter(cursor, c, true);
+}
+
+bool Buffer::fetch(CursorId cursor, char &c)
+{
+ return fetchCharacter(cursor, c, false);
+}
+
/* CharReader::Cursor class */
void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer,
@@ -365,8 +378,6 @@ void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer,
// Copy the state
line = cursor.line;
column = cursor.column;
- state = cursor.state;
- lastLinebreak = cursor.lastLinebreak;
}
/* CharReader class */
@@ -396,73 +407,40 @@ CharReader::~CharReader()
buffer->deleteCursor(peekCursor.cursor);
}
-bool CharReader::substituteLinebreaks(Cursor &cursor, char &c)
-{
- if (c == '\n' || c == '\r') {
- switch (cursor.state) {
- case LinebreakState::NONE:
- // We got a first linebreak character -- output a '\n'
- if (c == '\n') {
- cursor.state = LinebreakState::HAS_LF;
- } else {
- cursor.state = LinebreakState::HAS_CR;
- }
- c = '\n';
- return true;
- case LinebreakState::HAS_LF:
- // If a LF is followed by a LF, output a new linefeed
- if (c == '\n') {
- cursor.state = LinebreakState::HAS_LF;
- return true;
- }
-
- // Otherwise, don't handle this character (part of "\n\r")
- cursor.state = LinebreakState::NONE;
- return false;
- case LinebreakState::HAS_CR:
- // If a CR is followed by a CR, output a new linefeed
- if (c == '\r') {
- cursor.state = LinebreakState::HAS_CR;
- c = '\n';
- return true;
- }
-
- // Otherwise, don't handle this character (part of "\r\n")
- cursor.state = LinebreakState::NONE;
- return false;
- }
- }
-
- // No linebreak character, reset the linebreak state
- cursor.state = LinebreakState::NONE;
- return true;
-}
-
bool CharReader::readAtCursor(Cursor &cursor, char &c)
{
- while (true) {
- // Return false if we're at the end of the stream
- if (!buffer->read(cursor.cursor, c)) {
- return false;
- }
+ // Return false if we're at the end of the stream
+ if (!buffer->read(cursor.cursor, c)) {
+ return false;
+ }
- // Substitute linebreak characters with a single '\n'
- if (substituteLinebreaks(cursor, c)) {
- if (c == '\n') {
- // A linebreak was reached, go to the next line
- cursor.line++;
- cursor.column = 1;
- cursor.lastLinebreak = buffer->offset(cursor.cursor);
- } else {
- // Ignore UTF-8 continuation bytes
- if (!((c & 0x80) && !(c & 0x40))) {
- cursor.column++;
- }
+ // Substitute linebreak sequences with a single '\n'
+ if (c == '\n' || c == '\r') {
+ // Output a single \n
+ c = '\n';
+
+ // Check whether the next character is a continuation of the
+ // current character
+ char c2;
+ if (buffer->read(cursor.cursor, c2)) {
+ if ((c2 != '\n' && c2 != '\r') || c2 == c) {
+ buffer->moveCursor(cursor.cursor, -1);
}
+ }
+ }
- return true;
+ // Count lines and columns
+ if (c == '\n') {
+ // A linebreak was reached, go to the next line
+ cursor.line++;
+ cursor.column = 1;
+ } else {
+ // Ignore UTF-8 continuation bytes
+ if (!((c & 0x80) && !(c & 0x40))) {
+ cursor.column++;
}
}
+ return true;
}
bool CharReader::peek(char &c)
@@ -529,6 +507,106 @@ CharReaderFork CharReader::fork()
return CharReaderFork(buffer, readCursor, peekCursor, coherent);
}
+CharReader::Context CharReader::getContext(ssize_t maxSize)
+{
+ // Clone the current read cursor
+ Buffer::CursorId cur = buffer->createCursor(readCursor.cursor);
+
+ // Fetch the start position of the search
+ ssize_t offs = buffer->offset(cur);
+ ssize_t start = offs;
+ ssize_t end = offs;
+ char c;
+
+ // Search the beginning of the line with the last non-whitespace character
+ bool hadNonWhitespace = false;
+ bool foundBegin = false;
+ for (ssize_t i = 0; i < maxSize; i++) {
+ // Fetch the character at the current position
+ if (buffer->fetch(cur, c)) {
+ // Abort, at linebreaks if we found a non-linebreak character
+ if (hadNonWhitespace && (c == '\n' || c == '\r')) {
+ buffer->moveCursor(cur, 1);
+ start++;
+ foundBegin = true;
+ break;
+ }
+ }
+ if (buffer->moveCursor(cur, -1) == 0) {
+ foundBegin = true;
+ break;
+ }
+
+ // Update the start position and the hadNonWhitespace flag
+ hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c);
+ start--;
+ }
+
+ // Search the end of the line
+ buffer->moveCursor(cur, offs - start);
+ bool foundEnd = false;
+ for (ssize_t i = 0; i < maxSize; i++) {
+ // Increment the end counter if a character was read, abort if the end
+ // of the stream has been reached
+ if (buffer->read(cur, c)) {
+ end++;
+ } else {
+ foundEnd = true;
+ break;
+ }
+
+ // Abort on linebreak characters
+ if (c == '\n' || c == '\r') {
+ foundEnd = true;
+ break;
+ }
+ }
+
+ // Calculate the truncated start and end position and limit the number of
+ // characters to the maximum number of characters
+ ssize_t tStart = start;
+ ssize_t tEnd = end;
+ if (tEnd - tStart > maxSize) {
+ tStart = std::max(offs - maxSize / 2, tStart);
+ tEnd = tStart + maxSize;
+ }
+
+ // Try to go to the calculated start position and fetch the actual start
+ // position
+ ssize_t aStart = end + buffer->moveCursor(cur, tStart - end);
+ if (aStart > tStart) {
+ tEnd = tEnd + (aStart - tStart);
+ tStart = aStart;
+ }
+
+ // Read one line
+ std::stringstream ss;
+ size_t relPos = 0;
+ for (ssize_t i = tStart; i < tEnd; i++) {
+ if (buffer->read(cur, c)) {
+ // Break once a linebreak is reached
+ if (c == '\n' || c == '\r') {
+ break;
+ }
+
+ // Add the current character to the output
+ ss << c;
+
+ // Increment the string-relative offset as long as the original
+ // offset is not reached in the for loop
+ if (i < offs) {
+ relPos++;
+ }
+ }
+ }
+
+ // Delete the newly created cursor
+ buffer->deleteCursor(cur);
+
+ return CharReader::Context{ss.str(), relPos, !foundBegin || tStart != start,
+ !foundEnd || tEnd != end};
+}
+
/* Class CharReaderFork */
CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer,
diff --git a/src/core/utils/CharReader.hpp b/src/core/utils/CharReader.hpp
index 3d4c894..5daa21d 100644
--- a/src/core/utils/CharReader.hpp
+++ b/src/core/utils/CharReader.hpp
@@ -206,6 +206,12 @@ private:
*/
size_t moveBackward(CursorId cursor, size_t relativeOffs);
+ /**
+ * Reads a character from the current cursor position and optionally
+ * advances.
+ */
+ bool fetchCharacter(CursorId cursor, char &c, bool incr);
+
public:
/**
* Intializes the Buffer with a reference to a ReadCallback that is used
@@ -311,7 +317,8 @@ public:
bool atEnd(CursorId cursor) const;
/**
- * Reads a single character from the ring buffer from the given cursor.
+ * Reads a single character from the ring buffer from the given cursor and
+ * moves to the next character.
*
* @param cursor specifies the cursor from which the data should be read.
* The cursor will be advanced by one byte.
@@ -320,6 +327,18 @@ public:
* been reached.
*/
bool read(CursorId cursor, char &c);
+
+ /**
+ * Returns a single character from the ring buffer from the current cursor
+ * position and stays at that position.
+ *
+ * @param cursor specifies the cursor from which the data should be read.
+ * The cursor will be advanced by one byte.
+ * @param c is the character into which the data needs to be read.
+ * @return true if a character could be fetched, false if the end of the
+ * stream has been reached.
+ */
+ bool fetch(CursorId cursor, char &c);
};
// Forward declaration
@@ -333,13 +352,53 @@ class CharReaderFork;
* of linebreaks and converts these to a single '\n'.
*/
class CharReader {
-protected:
+public:
/**
- * Enum to represent the current state of the internal state machine that
- * replaces the linebreaks from multiple platforms to a single '\n'.
+ * The context struct is used to represent the current context the char
+ * reader is in. This context can for example be used when building error
+ * messages.
*/
- enum class LinebreakState { NONE, HAS_LF, HAS_CR };
+ struct Context {
+ /**
+ * Set to the content of the current line.
+ */
+ std::string line;
+
+ /**
+ * Relative position (in characters) within that line.
+ */
+ size_t relPos;
+
+ /**
+ * Set to true if the beginning of the line has been truncated (because
+ * the reader position is too far away from the actual position of the
+ * line).
+ */
+ bool truncatedStart;
+
+ /**
+ * Set to true if the end of the line has been truncated (because the
+ * reader position is too far away from the actual end position of the
+ * line.
+ */
+ bool truncatedEnd;
+
+ Context()
+ : line(), relPos(0), truncatedStart(false), truncatedEnd(false)
+ {
+ }
+ Context(std::string line, size_t relPos, bool truncatedStart,
+ bool truncatedEnd)
+ : line(std::move(line)),
+ relPos(relPos),
+ truncatedStart(truncatedStart),
+ truncatedEnd(truncatedEnd)
+ {
+ }
+ };
+
+protected:
/**
* Internally used cursor structure for managing the read and the peek
* cursor.
@@ -353,24 +412,12 @@ protected:
/**
* Current line the cursor is in.
*/
- size_t line;
+ uint32_t line;
/**
* Current column the cursor is in.
*/
- size_t column;
-
- /**
- * State of the linebreak replacement statemachine.
- */
- LinebreakState state;
-
- /**
- * Contains the absolute offset in the input stream containing the
- * position of the last linebreak. This is used for extracting the
- * context (the line) in which an error occured.
- */
- size_t lastLinebreak;
+ uint32_t column;
/**
* Constructor of the Cursor class.
@@ -378,11 +425,7 @@ protected:
* @param cursor is the underlying cursor in the Buffer instance.
*/
Cursor(Buffer::CursorId cursor, size_t line, size_t column)
- : cursor(cursor),
- line(line),
- column(column),
- state(LinebreakState::NONE),
- lastLinebreak(0)
+ : cursor(cursor), line(line), column(column)
{
}
@@ -434,7 +477,8 @@ protected:
/**
* Set to true as long the underlying Buffer cursor is at the same position
- * for the read and the peek cursor.
+ * for the read and the peek cursor. This is only used for optimization
+ * purposes and makes consecutive reads a bit faster.
*/
bool coherent;
@@ -544,14 +588,27 @@ public:
*
* @return the current line number.
*/
- size_t getLine() const { return readCursor.line; }
+ uint32_t getLine() const { return readCursor.line; }
/**
* Returns the current column (starting with one).
*
* @return the current column number.
*/
- size_t getColumn() const { return readCursor.column; }
+ uint32_t getColumn() const { return readCursor.column; }
+
+ /**
+ * Returns the current byte offset of the read cursor.
+ *
+ * @return the byte position within the stream.
+ */
+ size_t getOffset() const { return buffer->offset(readCursor.cursor); };
+
+ /**
+ * Returns the line the read cursor currently is in, but at most the
+ * given number of characters in the form of a Context structure.
+ */
+ Context getContext(ssize_t maxSize);
};
/**