diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-01-23 01:25:55 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-01-23 01:25:55 +0100 |
commit | 5eaeae92ccf209194bced39d888c5a0e527a9c1a (patch) | |
tree | 4e67e41b322fb13bcbb69d30ecb5a0144c2c35eb /src/core/common | |
parent | 1dabb6ca6d5a6a294ea0d6115a16d402cf4799ab (diff) |
Started to adapt CharReader
Diffstat (limited to 'src/core/common')
-rw-r--r-- | src/core/common/CharReader.cpp | 209 | ||||
-rw-r--r-- | src/core/common/CharReader.hpp | 164 |
2 files changed, 77 insertions, 296 deletions
diff --git a/src/core/common/CharReader.cpp b/src/core/common/CharReader.cpp index 6966b97..b0bbade 100644 --- a/src/core/common/CharReader.cpp +++ b/src/core/common/CharReader.cpp @@ -376,44 +376,33 @@ bool Buffer::fetch(CursorId cursor, char &c) return fetchCharacter(cursor, c, false); } -/* CharReader::Cursor class */ - -void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer, - CharReader::Cursor &cursor) -{ - // Copy the cursor position - buffer->copyCursor(cursor.cursor, this->cursor); - - // Copy the state - line = cursor.line; - column = cursor.column; -} - /* CharReader class */ -CharReader::CharReader(std::shared_ptr<Buffer> buffer, size_t line, - size_t column) +CharReader::CharReader(std::shared_ptr<Buffer> buffer, SourceId sourceId, + size_t offs) : buffer(buffer), - readCursor(buffer->createCursor(), line, column), - peekCursor(buffer->createCursor(), line, column), - coherent(true) + readCursor(buffer->createCursor()), + peekCursor(buffer->createCursor()), + coherent(true), + sourceId(sourceId), + offs(offs) { } -CharReader::CharReader(const std::string &str, size_t line, size_t column) - : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, line, column) +CharReader::CharReader(const std::string &str, SourceId sourceId, size_t offs) + : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, sourceId, offs) { } -CharReader::CharReader(std::istream &istream, size_t line, size_t column) - : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, line, column) +CharReader::CharReader(std::istream &istream, SourceId sourceId, size_t offs) + : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, sourceId, offs) { } CharReader::~CharReader() { - buffer->deleteCursor(readCursor.cursor); - buffer->deleteCursor(peekCursor.cursor); + buffer->deleteCursor(readCursor); + buffer->deleteCursor(peekCursor); } bool CharReader::readAtCursor(Cursor &cursor, char &c) @@ -437,18 +426,6 @@ bool CharReader::readAtCursor(Cursor &cursor, char &c) } } } - - // Count lines and columns - if (c == '\n') { - // A linebreak was reached, go to the next line - cursor.line++; - cursor.column = 1; - } else { - // Ignore UTF-8 continuation bytes - if (!((c & 0x80) && !(c & 0x40))) { - cursor.column++; - } - } return true; } @@ -456,7 +433,7 @@ bool CharReader::peek(char &c) { // If the reader was coherent, update the peek cursor state if (coherent) { - peekCursor.assign(buffer, readCursor); + buffer->copyCursor(readCursor, peekCursor); coherent = false; } @@ -471,12 +448,8 @@ bool CharReader::read(char &c) // Set the peek position to the current read position, if reading was not // coherent - if (!coherent) { - peekCursor.assign(buffer, readCursor); - coherent = true; - } else { - buffer->copyCursor(readCursor.cursor, peekCursor.cursor); - } + buffer->copyCursor(readCursor, peekCursor); + coherent = true; // Return the result of the read function return res; @@ -485,7 +458,7 @@ bool CharReader::read(char &c) void CharReader::resetPeek() { if (!coherent) { - peekCursor.assign(buffer, readCursor); + buffer->copyCursor(readCursor, peekCursor); coherent = true; } } @@ -493,7 +466,7 @@ void CharReader::resetPeek() void CharReader::consumePeek() { if (!coherent) { - readCursor.assign(buffer, peekCursor); + buffer->copyCursor(peekCursor, readCursor); coherent = true; } } @@ -513,7 +486,8 @@ bool CharReader::consumeWhitespace() CharReaderFork CharReader::fork() { - return CharReaderFork(buffer, readCursor, peekCursor, coherent); + return CharReaderFork(buffer, readCursor, peekCursor, sourceId, offs, + coherent); } size_t CharReader::readRaw(char *buf, size_t size) @@ -528,155 +502,38 @@ size_t CharReader::readRaw(char *buf, size_t size) return res; } -SourceContext CharReader::getContextAt(ssize_t maxSize, - Buffer::CursorId referenceCursor) -{ - // Clone the given read cursor - Buffer::CursorId cur = buffer->createCursor(referenceCursor); - - // Fetch the start position of the search - ssize_t offs = buffer->offset(cur); - ssize_t start = offs; - ssize_t end = offs; - char c; - - // Search the beginning of the line with the last non-whitespace character - bool hadNonWhitespace = false; - bool foundBegin = false; - for (ssize_t i = 0; i < maxSize; i++) { - // Fetch the character at the current position - if (buffer->fetch(cur, c)) { - // Abort, at linebreaks if we found a non-linebreak character - hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c); - if (hadNonWhitespace && (c == '\n' || c == '\r')) { - buffer->moveCursor(cur, 1); - start++; - foundBegin = true; - break; - } - } - if (buffer->moveCursor(cur, -1) == 0) { - foundBegin = true; - break; - } else { - // Update the start position and the hadNonWhitespace flag - start--; - } - } - - // Search the end of the line - buffer->moveCursor(cur, offs - start); - bool foundEnd = false; - for (ssize_t i = 0; i < maxSize; i++) { - // Increment the end counter if a character was read, abort if the end - // of the stream has been reached - if (buffer->read(cur, c)) { - end++; - } else { - foundEnd = true; - break; - } - - // Abort on linebreak characters - if (c == '\n' || c == '\r') { - foundEnd = true; - break; - } - } - - // Calculate the truncated start and end position and limit the number of - // characters to the maximum number of characters - ssize_t tStart = start; - ssize_t tEnd = end; - if (tEnd - tStart > maxSize) { - tStart = std::max(offs - maxSize / 2, tStart); - tEnd = tStart + maxSize; - } - - // Try to go to the calculated start position and fetch the actual start - // position - ssize_t aStart = end + buffer->moveCursor(cur, tStart - end); - if (aStart > tStart) { - tEnd = tEnd + (aStart - tStart); - tStart = aStart; - } - - // Read one line - std::stringstream ss; - size_t relPos = 0; - for (ssize_t i = tStart; i < tEnd; i++) { - if (buffer->read(cur, c)) { - // Break once a linebreak is reached - if (c == '\n' || c == '\r') { - break; - } - - // Add the current character to the output - ss << c; - - // Increment the string-relative offset as long as the original - // offset is not reached in the for loop - if (i < offs) { - relPos++; - } - } - } - - // Delete the newly created cursor - buffer->deleteCursor(cur); - - return SourceContext{ss.str(), relPos, !foundBegin || tStart != start, - !foundEnd || tEnd != end}; -} - -SourceContext CharReader::getContextAtOffs(ssize_t maxSize, size_t offs) -{ - // Create a new cursor and calculate how far it has to be moved to reach - // the position specified in the location instance - Buffer::CursorId cur = buffer->createCursor(); - ssize_t moveOffs = offs - buffer->offset(cur); - - // Try to move the cursor to the specified position and read the context - SourceContext res; - if (buffer->moveCursor(cur, moveOffs) == moveOffs) { - res = getContextAt(60, cur); - } - - // Delete the read cursor - buffer->deleteCursor(cur); - return res; -} +bool CharReader::atEnd() const { return buffer->atEnd(readCursor.cursor); } -SourceContext CharReader::getContext(ssize_t maxSize) +size_t CharReader::getOffset() const { - return getContextAt(maxSize, readCursor.cursor); + return buffer->offset(readCursor.cursor) + offs; } -SourceContext CharReader::contextCallback(const SourceLocation &location, - void *data) +SourceLocation CharReader::getLocation() const { - return static_cast<CharReader *>(data)->getContextAtOffs(60, location.offs); + return SourceLocation{sourceId, getOffset()}; } /* Class CharReaderFork */ CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer, - CharReader::Cursor &parentReadCursor, - CharReader::Cursor &parentPeekCursor, + Buffer::CursorId parentReadCursor, + Buffer::CursorId parentPeekCursor, + SourceContextCallback sourceId, size_t offs, bool coherent) - : CharReader(buffer, 1, 1), + : CharReader(buffer, sourceId, offs), parentReadCursor(parentReadCursor), parentPeekCursor(parentPeekCursor) { - readCursor.assign(buffer, parentReadCursor); - peekCursor.assign(buffer, parentPeekCursor); + buffer->copyCursor(parentReadCursor, readCursor); + buffer->copyCursor(parentPeekCursor, peekCursor); this->coherent = coherent; } void CharReaderFork::commit() { - parentReadCursor.assign(buffer, readCursor); - parentPeekCursor.assign(buffer, peekCursor); + buffer->copyCursor(readCursor, parentReadCursor); + buffer->copyCursor(peekCursor, parentPeekCursor); } } diff --git a/src/core/common/CharReader.hpp b/src/core/common/CharReader.hpp index 134d9d9..0957e97 100644 --- a/src/core/common/CharReader.hpp +++ b/src/core/common/CharReader.hpp @@ -355,54 +355,10 @@ class CharReaderFork; /** * Used within parsers for convenient access to single characters in an input * stream or buffer. It allows reading and peeking single characters from a - * buffer. Additionally it counts the current column/row (with correct handling - * for UTF-8) and contains an internal state machine that handles the detection - * of linebreaks and converts these to a single '\n'. + * buffer. Additionally it contains an internal state machine that handles the + * detection of linebreaks and converts these to a single '\n'. */ class CharReader { -protected: - /** - * Internally used cursor structure for managing the read and the peek - * cursor. - */ - struct Cursor { - /** - * Corresponding cursor in the underlying buffer instance. - */ - const Buffer::CursorId cursor; - - /** - * Current line the cursor is in. - */ - int line; - - /** - * Current column the cursor is in. - */ - int column; - - /** - * Constructor of the Cursor class. - * - * @param cursor is the underlying cursor in the Buffer instance. - * @param line is the line at which the cursor is positioned. - * @param column is the column at which the cursor is positioned. - */ - Cursor(Buffer::CursorId cursor, int line, int column) - : cursor(cursor), line(line), column(column) - { - } - - /** - * Assigns one cursor to another. - * - * @param buffer is the underlying buffer instance the internal cursor - * belongs to. - * @param cursor is the cursor from which the state should be copied. - */ - void assign(std::shared_ptr<Buffer> buffer, Cursor &cursor); - }; - private: /** * Substitutes "\r", "\n\r", "\r\n" with a single "\n". @@ -421,29 +377,7 @@ private: * @return true if a character was read, false if the end of the stream has * been reached. */ - bool readAtCursor(Cursor &cursor, char &c); - - /** - * Returns the line the given cursor currently is in, but at most the - * given number of characters in the form of a Context structure. - * - * @param maxSize is the maximum length of the extracted context - * @param referenceCursor is a cursor in the internal buffer pointing at the - * location at which the context should be read. - */ - SourceContext getContextAt(ssize_t maxSize, - Buffer::CursorId referenceCursor); - - /** - * Returns the line the at the given byte offset, but at most the - * given number of characters in the form of a Context structure. - * - * @param maxSize is the maximum length of the extracted context - * @param offs is the byte offset for which the context should be read. - * @return the context at the specified position or an empty (invalid) - * context if the context could not be read. - */ - SourceContext getContextAtOffs(ssize_t maxSize, size_t offs); + bool readAtCursor(Buffer::CursorId &cursor, char &c); protected: /** @@ -454,12 +388,12 @@ protected: /** * Cursor used for reading. */ - Cursor readCursor; + Buffer::CursorId readCursor; /** * Cursor used for peeking. */ - Cursor peekCursor; + Buffer::CursorId peekCursor; /** * Set to true as long the underlying Buffer cursor is at the same position @@ -469,33 +403,50 @@ protected: bool coherent; /** + * Id of the underlying source file. + */ + SourceId sourceId; + + /** + * Offset to be added to the underlying buffer byte positions. + */ + size_t offs; + + /** * Protected constructor of the CharReader base class. Creates new read * and peek cursors for the given buffer. * * @param buffer is a reference to the underlying Buffer class responsible * for allowing to read from a single input stream from multiple locations. + * @param sourceId is the ID of the underlying source file. + * @param offs is the byte offset at which the char reader should start + * counting. */ - CharReader(std::shared_ptr<Buffer> buffer, size_t line, size_t column); + CharReader(std::shared_ptr<Buffer> buffer, SourceId sourceId, size_t offs); public: /** * Creates a new CharReader instance from a string. * * @param str is a string containing the input data. - * @param line is the start line. - * @param column is the start column. + * @param sourceId is the ID of the underlying source file. + * @param offs is the byte offset at which the char reader should start + * counting. */ - CharReader(const std::string &str, size_t line = 1, size_t column = 1); + CharReader(const std::string &str, SourceId sourceId = InvalidSourceId, + size_t offs = 0); /** * Creates a new CharReader instance for an input stream. * * @param istream is the input stream from which incomming data should be * read. - * @param line is the start line. - * @param column is the start column. + * @param sourceId is the ID of the underlying source file. + * @param offs is the byte offset at which the char reader should start + * counting. */ - CharReader(std::istream &istream, size_t line = 1, size_t column = 1); + CharReader(std::istream &istream, SourceId sourceId = InvalidSourceId, + size_t offs = 0); /** * Deletes the used cursors from the underlying buffer instance. @@ -572,56 +523,27 @@ public: size_t readRaw(char *buf, size_t size); /** - * Returns true if there are no more characters as the stream was - * closed. + * Returns true if there are no more characters as the stream was closed. * * @return true if there is no more data. */ - bool atEnd() const { return buffer->atEnd(readCursor.cursor); } + bool atEnd() const; /** * Returns the offset of the read cursor in bytes. - */ - size_t getOffset() const { return buffer->offset(readCursor.cursor); } - - /** - * Returns the line number the read cursor currently is at. - */ - int getLine() const { return readCursor.line; } - - /** - * Returns the column the read cursor currently is at. - */ - int getColumn() const { return readCursor.column; } - - /** - * Returns the current position of the read cursor (line and column). - */ - SourceLocation getLocation() const - { - return SourceLocation(getLine(), getColumn(), getOffset()); - } - - /** - * Returns the line the read cursor currently is in, but at most the - * given number of characters in the form of a Context structure. * - * @param maxSize is the maximum length of the extracted context + * @return the offset of the read cursor in bytes. */ - SourceContext getContext(ssize_t maxSize = 60); + size_t getOffset() const; /** - * Function that can be used to provide the context for a certain source - * location. A pointer to this function can be supplied to a Logger instance - * in the pushFile() method. The data should be set to a pointer to the - * CharReader instance. + * Returns a SourceLocation object describing the exact position (including + * the source file) of the read cursor. * - * @param location is the location for which the context should be returned. - * Only the "offs" field within the location is used. - * @param data is a pointer pointing at a CharReader instance. + * @return a SourceLocation object at the position of the current read + * cursor. */ - static SourceContext contextCallback(const SourceLocation &location, - void *data); + SourceLocation getLocation() const; }; /** @@ -637,12 +559,12 @@ private: /** * The reader cursor of the underlying CharReader instance. */ - CharReader::Cursor &parentReadCursor; + Buffer::CursorId parentReadCursor; /** * The peek cursor of the underlying CharReader instance. */ - CharReader::Cursor &parentPeekCursor; + Buffer::CursorId parentPeekCursor; /** * Constructor of the CharReaderFork class. @@ -650,12 +572,14 @@ private: * @param buffer is a reference at the parent Buffer instance. * @param parentPeekCursor is a reference at the parent read cursor. * @param parentPeekCursor is a reference at the parent peek cursor. + * @param location is the current location. * @param coherent specifies whether the char reader cursors are initialized * coherently. */ CharReaderFork(std::shared_ptr<Buffer> buffer, - CharReader::Cursor &parentReadCursor, - CharReader::Cursor &parentPeekCursor, bool coherent); + Buffer::CursorId &parentReadCursor, + Buffer::CursorId &parentPeekCursor, + SourceContextCallback sourceId, size_t offs, bool coherent); public: /** |