summaryrefslogtreecommitdiff
path: root/src/core/common
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-01-23 01:25:55 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-01-23 01:25:55 +0100
commit5eaeae92ccf209194bced39d888c5a0e527a9c1a (patch)
tree4e67e41b322fb13bcbb69d30ecb5a0144c2c35eb /src/core/common
parent1dabb6ca6d5a6a294ea0d6115a16d402cf4799ab (diff)
Started to adapt CharReader
Diffstat (limited to 'src/core/common')
-rw-r--r--src/core/common/CharReader.cpp209
-rw-r--r--src/core/common/CharReader.hpp164
2 files changed, 77 insertions, 296 deletions
diff --git a/src/core/common/CharReader.cpp b/src/core/common/CharReader.cpp
index 6966b97..b0bbade 100644
--- a/src/core/common/CharReader.cpp
+++ b/src/core/common/CharReader.cpp
@@ -376,44 +376,33 @@ bool Buffer::fetch(CursorId cursor, char &c)
return fetchCharacter(cursor, c, false);
}
-/* CharReader::Cursor class */
-
-void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer,
- CharReader::Cursor &cursor)
-{
- // Copy the cursor position
- buffer->copyCursor(cursor.cursor, this->cursor);
-
- // Copy the state
- line = cursor.line;
- column = cursor.column;
-}
-
/* CharReader class */
-CharReader::CharReader(std::shared_ptr<Buffer> buffer, size_t line,
- size_t column)
+CharReader::CharReader(std::shared_ptr<Buffer> buffer, SourceId sourceId,
+ size_t offs)
: buffer(buffer),
- readCursor(buffer->createCursor(), line, column),
- peekCursor(buffer->createCursor(), line, column),
- coherent(true)
+ readCursor(buffer->createCursor()),
+ peekCursor(buffer->createCursor()),
+ coherent(true),
+ sourceId(sourceId),
+ offs(offs)
{
}
-CharReader::CharReader(const std::string &str, size_t line, size_t column)
- : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, line, column)
+CharReader::CharReader(const std::string &str, SourceId sourceId, size_t offs)
+ : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, sourceId, offs)
{
}
-CharReader::CharReader(std::istream &istream, size_t line, size_t column)
- : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, line, column)
+CharReader::CharReader(std::istream &istream, SourceId sourceId, size_t offs)
+ : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, sourceId, offs)
{
}
CharReader::~CharReader()
{
- buffer->deleteCursor(readCursor.cursor);
- buffer->deleteCursor(peekCursor.cursor);
+ buffer->deleteCursor(readCursor);
+ buffer->deleteCursor(peekCursor);
}
bool CharReader::readAtCursor(Cursor &cursor, char &c)
@@ -437,18 +426,6 @@ bool CharReader::readAtCursor(Cursor &cursor, char &c)
}
}
}
-
- // Count lines and columns
- if (c == '\n') {
- // A linebreak was reached, go to the next line
- cursor.line++;
- cursor.column = 1;
- } else {
- // Ignore UTF-8 continuation bytes
- if (!((c & 0x80) && !(c & 0x40))) {
- cursor.column++;
- }
- }
return true;
}
@@ -456,7 +433,7 @@ bool CharReader::peek(char &c)
{
// If the reader was coherent, update the peek cursor state
if (coherent) {
- peekCursor.assign(buffer, readCursor);
+ buffer->copyCursor(readCursor, peekCursor);
coherent = false;
}
@@ -471,12 +448,8 @@ bool CharReader::read(char &c)
// Set the peek position to the current read position, if reading was not
// coherent
- if (!coherent) {
- peekCursor.assign(buffer, readCursor);
- coherent = true;
- } else {
- buffer->copyCursor(readCursor.cursor, peekCursor.cursor);
- }
+ buffer->copyCursor(readCursor, peekCursor);
+ coherent = true;
// Return the result of the read function
return res;
@@ -485,7 +458,7 @@ bool CharReader::read(char &c)
void CharReader::resetPeek()
{
if (!coherent) {
- peekCursor.assign(buffer, readCursor);
+ buffer->copyCursor(readCursor, peekCursor);
coherent = true;
}
}
@@ -493,7 +466,7 @@ void CharReader::resetPeek()
void CharReader::consumePeek()
{
if (!coherent) {
- readCursor.assign(buffer, peekCursor);
+ buffer->copyCursor(peekCursor, readCursor);
coherent = true;
}
}
@@ -513,7 +486,8 @@ bool CharReader::consumeWhitespace()
CharReaderFork CharReader::fork()
{
- return CharReaderFork(buffer, readCursor, peekCursor, coherent);
+ return CharReaderFork(buffer, readCursor, peekCursor, sourceId, offs,
+ coherent);
}
size_t CharReader::readRaw(char *buf, size_t size)
@@ -528,155 +502,38 @@ size_t CharReader::readRaw(char *buf, size_t size)
return res;
}
-SourceContext CharReader::getContextAt(ssize_t maxSize,
- Buffer::CursorId referenceCursor)
-{
- // Clone the given read cursor
- Buffer::CursorId cur = buffer->createCursor(referenceCursor);
-
- // Fetch the start position of the search
- ssize_t offs = buffer->offset(cur);
- ssize_t start = offs;
- ssize_t end = offs;
- char c;
-
- // Search the beginning of the line with the last non-whitespace character
- bool hadNonWhitespace = false;
- bool foundBegin = false;
- for (ssize_t i = 0; i < maxSize; i++) {
- // Fetch the character at the current position
- if (buffer->fetch(cur, c)) {
- // Abort, at linebreaks if we found a non-linebreak character
- hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c);
- if (hadNonWhitespace && (c == '\n' || c == '\r')) {
- buffer->moveCursor(cur, 1);
- start++;
- foundBegin = true;
- break;
- }
- }
- if (buffer->moveCursor(cur, -1) == 0) {
- foundBegin = true;
- break;
- } else {
- // Update the start position and the hadNonWhitespace flag
- start--;
- }
- }
-
- // Search the end of the line
- buffer->moveCursor(cur, offs - start);
- bool foundEnd = false;
- for (ssize_t i = 0; i < maxSize; i++) {
- // Increment the end counter if a character was read, abort if the end
- // of the stream has been reached
- if (buffer->read(cur, c)) {
- end++;
- } else {
- foundEnd = true;
- break;
- }
-
- // Abort on linebreak characters
- if (c == '\n' || c == '\r') {
- foundEnd = true;
- break;
- }
- }
-
- // Calculate the truncated start and end position and limit the number of
- // characters to the maximum number of characters
- ssize_t tStart = start;
- ssize_t tEnd = end;
- if (tEnd - tStart > maxSize) {
- tStart = std::max(offs - maxSize / 2, tStart);
- tEnd = tStart + maxSize;
- }
-
- // Try to go to the calculated start position and fetch the actual start
- // position
- ssize_t aStart = end + buffer->moveCursor(cur, tStart - end);
- if (aStart > tStart) {
- tEnd = tEnd + (aStart - tStart);
- tStart = aStart;
- }
-
- // Read one line
- std::stringstream ss;
- size_t relPos = 0;
- for (ssize_t i = tStart; i < tEnd; i++) {
- if (buffer->read(cur, c)) {
- // Break once a linebreak is reached
- if (c == '\n' || c == '\r') {
- break;
- }
-
- // Add the current character to the output
- ss << c;
-
- // Increment the string-relative offset as long as the original
- // offset is not reached in the for loop
- if (i < offs) {
- relPos++;
- }
- }
- }
-
- // Delete the newly created cursor
- buffer->deleteCursor(cur);
-
- return SourceContext{ss.str(), relPos, !foundBegin || tStart != start,
- !foundEnd || tEnd != end};
-}
-
-SourceContext CharReader::getContextAtOffs(ssize_t maxSize, size_t offs)
-{
- // Create a new cursor and calculate how far it has to be moved to reach
- // the position specified in the location instance
- Buffer::CursorId cur = buffer->createCursor();
- ssize_t moveOffs = offs - buffer->offset(cur);
-
- // Try to move the cursor to the specified position and read the context
- SourceContext res;
- if (buffer->moveCursor(cur, moveOffs) == moveOffs) {
- res = getContextAt(60, cur);
- }
-
- // Delete the read cursor
- buffer->deleteCursor(cur);
- return res;
-}
+bool CharReader::atEnd() const { return buffer->atEnd(readCursor.cursor); }
-SourceContext CharReader::getContext(ssize_t maxSize)
+size_t CharReader::getOffset() const
{
- return getContextAt(maxSize, readCursor.cursor);
+ return buffer->offset(readCursor.cursor) + offs;
}
-SourceContext CharReader::contextCallback(const SourceLocation &location,
- void *data)
+SourceLocation CharReader::getLocation() const
{
- return static_cast<CharReader *>(data)->getContextAtOffs(60, location.offs);
+ return SourceLocation{sourceId, getOffset()};
}
/* Class CharReaderFork */
CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer,
- CharReader::Cursor &parentReadCursor,
- CharReader::Cursor &parentPeekCursor,
+ Buffer::CursorId parentReadCursor,
+ Buffer::CursorId parentPeekCursor,
+ SourceContextCallback sourceId, size_t offs,
bool coherent)
- : CharReader(buffer, 1, 1),
+ : CharReader(buffer, sourceId, offs),
parentReadCursor(parentReadCursor),
parentPeekCursor(parentPeekCursor)
{
- readCursor.assign(buffer, parentReadCursor);
- peekCursor.assign(buffer, parentPeekCursor);
+ buffer->copyCursor(parentReadCursor, readCursor);
+ buffer->copyCursor(parentPeekCursor, peekCursor);
this->coherent = coherent;
}
void CharReaderFork::commit()
{
- parentReadCursor.assign(buffer, readCursor);
- parentPeekCursor.assign(buffer, peekCursor);
+ buffer->copyCursor(readCursor, parentReadCursor);
+ buffer->copyCursor(peekCursor, parentPeekCursor);
}
}
diff --git a/src/core/common/CharReader.hpp b/src/core/common/CharReader.hpp
index 134d9d9..0957e97 100644
--- a/src/core/common/CharReader.hpp
+++ b/src/core/common/CharReader.hpp
@@ -355,54 +355,10 @@ class CharReaderFork;
/**
* Used within parsers for convenient access to single characters in an input
* stream or buffer. It allows reading and peeking single characters from a
- * buffer. Additionally it counts the current column/row (with correct handling
- * for UTF-8) and contains an internal state machine that handles the detection
- * of linebreaks and converts these to a single '\n'.
+ * buffer. Additionally it contains an internal state machine that handles the
+ * detection of linebreaks and converts these to a single '\n'.
*/
class CharReader {
-protected:
- /**
- * Internally used cursor structure for managing the read and the peek
- * cursor.
- */
- struct Cursor {
- /**
- * Corresponding cursor in the underlying buffer instance.
- */
- const Buffer::CursorId cursor;
-
- /**
- * Current line the cursor is in.
- */
- int line;
-
- /**
- * Current column the cursor is in.
- */
- int column;
-
- /**
- * Constructor of the Cursor class.
- *
- * @param cursor is the underlying cursor in the Buffer instance.
- * @param line is the line at which the cursor is positioned.
- * @param column is the column at which the cursor is positioned.
- */
- Cursor(Buffer::CursorId cursor, int line, int column)
- : cursor(cursor), line(line), column(column)
- {
- }
-
- /**
- * Assigns one cursor to another.
- *
- * @param buffer is the underlying buffer instance the internal cursor
- * belongs to.
- * @param cursor is the cursor from which the state should be copied.
- */
- void assign(std::shared_ptr<Buffer> buffer, Cursor &cursor);
- };
-
private:
/**
* Substitutes "\r", "\n\r", "\r\n" with a single "\n".
@@ -421,29 +377,7 @@ private:
* @return true if a character was read, false if the end of the stream has
* been reached.
*/
- bool readAtCursor(Cursor &cursor, char &c);
-
- /**
- * Returns the line the given cursor currently is in, but at most the
- * given number of characters in the form of a Context structure.
- *
- * @param maxSize is the maximum length of the extracted context
- * @param referenceCursor is a cursor in the internal buffer pointing at the
- * location at which the context should be read.
- */
- SourceContext getContextAt(ssize_t maxSize,
- Buffer::CursorId referenceCursor);
-
- /**
- * Returns the line the at the given byte offset, but at most the
- * given number of characters in the form of a Context structure.
- *
- * @param maxSize is the maximum length of the extracted context
- * @param offs is the byte offset for which the context should be read.
- * @return the context at the specified position or an empty (invalid)
- * context if the context could not be read.
- */
- SourceContext getContextAtOffs(ssize_t maxSize, size_t offs);
+ bool readAtCursor(Buffer::CursorId &cursor, char &c);
protected:
/**
@@ -454,12 +388,12 @@ protected:
/**
* Cursor used for reading.
*/
- Cursor readCursor;
+ Buffer::CursorId readCursor;
/**
* Cursor used for peeking.
*/
- Cursor peekCursor;
+ Buffer::CursorId peekCursor;
/**
* Set to true as long the underlying Buffer cursor is at the same position
@@ -469,33 +403,50 @@ protected:
bool coherent;
/**
+ * Id of the underlying source file.
+ */
+ SourceId sourceId;
+
+ /**
+ * Offset to be added to the underlying buffer byte positions.
+ */
+ size_t offs;
+
+ /**
* Protected constructor of the CharReader base class. Creates new read
* and peek cursors for the given buffer.
*
* @param buffer is a reference to the underlying Buffer class responsible
* for allowing to read from a single input stream from multiple locations.
+ * @param sourceId is the ID of the underlying source file.
+ * @param offs is the byte offset at which the char reader should start
+ * counting.
*/
- CharReader(std::shared_ptr<Buffer> buffer, size_t line, size_t column);
+ CharReader(std::shared_ptr<Buffer> buffer, SourceId sourceId, size_t offs);
public:
/**
* Creates a new CharReader instance from a string.
*
* @param str is a string containing the input data.
- * @param line is the start line.
- * @param column is the start column.
+ * @param sourceId is the ID of the underlying source file.
+ * @param offs is the byte offset at which the char reader should start
+ * counting.
*/
- CharReader(const std::string &str, size_t line = 1, size_t column = 1);
+ CharReader(const std::string &str, SourceId sourceId = InvalidSourceId,
+ size_t offs = 0);
/**
* Creates a new CharReader instance for an input stream.
*
* @param istream is the input stream from which incomming data should be
* read.
- * @param line is the start line.
- * @param column is the start column.
+ * @param sourceId is the ID of the underlying source file.
+ * @param offs is the byte offset at which the char reader should start
+ * counting.
*/
- CharReader(std::istream &istream, size_t line = 1, size_t column = 1);
+ CharReader(std::istream &istream, SourceId sourceId = InvalidSourceId,
+ size_t offs = 0);
/**
* Deletes the used cursors from the underlying buffer instance.
@@ -572,56 +523,27 @@ public:
size_t readRaw(char *buf, size_t size);
/**
- * Returns true if there are no more characters as the stream was
- * closed.
+ * Returns true if there are no more characters as the stream was closed.
*
* @return true if there is no more data.
*/
- bool atEnd() const { return buffer->atEnd(readCursor.cursor); }
+ bool atEnd() const;
/**
* Returns the offset of the read cursor in bytes.
- */
- size_t getOffset() const { return buffer->offset(readCursor.cursor); }
-
- /**
- * Returns the line number the read cursor currently is at.
- */
- int getLine() const { return readCursor.line; }
-
- /**
- * Returns the column the read cursor currently is at.
- */
- int getColumn() const { return readCursor.column; }
-
- /**
- * Returns the current position of the read cursor (line and column).
- */
- SourceLocation getLocation() const
- {
- return SourceLocation(getLine(), getColumn(), getOffset());
- }
-
- /**
- * Returns the line the read cursor currently is in, but at most the
- * given number of characters in the form of a Context structure.
*
- * @param maxSize is the maximum length of the extracted context
+ * @return the offset of the read cursor in bytes.
*/
- SourceContext getContext(ssize_t maxSize = 60);
+ size_t getOffset() const;
/**
- * Function that can be used to provide the context for a certain source
- * location. A pointer to this function can be supplied to a Logger instance
- * in the pushFile() method. The data should be set to a pointer to the
- * CharReader instance.
+ * Returns a SourceLocation object describing the exact position (including
+ * the source file) of the read cursor.
*
- * @param location is the location for which the context should be returned.
- * Only the "offs" field within the location is used.
- * @param data is a pointer pointing at a CharReader instance.
+ * @return a SourceLocation object at the position of the current read
+ * cursor.
*/
- static SourceContext contextCallback(const SourceLocation &location,
- void *data);
+ SourceLocation getLocation() const;
};
/**
@@ -637,12 +559,12 @@ private:
/**
* The reader cursor of the underlying CharReader instance.
*/
- CharReader::Cursor &parentReadCursor;
+ Buffer::CursorId parentReadCursor;
/**
* The peek cursor of the underlying CharReader instance.
*/
- CharReader::Cursor &parentPeekCursor;
+ Buffer::CursorId parentPeekCursor;
/**
* Constructor of the CharReaderFork class.
@@ -650,12 +572,14 @@ private:
* @param buffer is a reference at the parent Buffer instance.
* @param parentPeekCursor is a reference at the parent read cursor.
* @param parentPeekCursor is a reference at the parent peek cursor.
+ * @param location is the current location.
* @param coherent specifies whether the char reader cursors are initialized
* coherently.
*/
CharReaderFork(std::shared_ptr<Buffer> buffer,
- CharReader::Cursor &parentReadCursor,
- CharReader::Cursor &parentPeekCursor, bool coherent);
+ Buffer::CursorId &parentReadCursor,
+ Buffer::CursorId &parentPeekCursor,
+ SourceContextCallback sourceId, size_t offs, bool coherent);
public:
/**