summaryrefslogtreecommitdiff
path: root/src/core/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/utils')
-rw-r--r--src/core/utils/CharReader.cpp643
-rw-r--r--src/core/utils/CharReader.hpp672
2 files changed, 0 insertions, 1315 deletions
diff --git a/src/core/utils/CharReader.cpp b/src/core/utils/CharReader.cpp
deleted file mode 100644
index 61616d7..0000000
--- a/src/core/utils/CharReader.cpp
+++ /dev/null
@@ -1,643 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <sstream>
-
-#include <core/Utils.hpp>
-
-#include "CharReader.hpp"
-
-namespace ousia {
-namespace utils {
-
-/* Helper functions */
-
-/**
- * istreamReadCallback is used internally by the Buffer calss to stream data
- * from an input stream.
- *
- * @param buf is points a the target memory region.
- * @param size is the requested number of bytes.
- * @param userData is a pointer at some user defined data.
- * @return the actual number of bytes read. If the result is smaller than
- * the requested size, this tells the Buffer that the end of the input
- * stream is reached.
- */
-static size_t istreamReadCallback(char *buf, size_t size, void *userData)
-{
- return (static_cast<std::istream *>(userData))->read(buf, size).gcount();
-}
-
-/* Class Buffer */
-
-Buffer::Buffer(ReadCallback callback, void *userData)
- : callback(callback),
- userData(userData),
- reachedEnd(false),
- startBucket(buckets.end()),
- endBucket(buckets.end()),
- startOffset(0),
- firstDead(0)
-{
- // Load a first block of data from the stream
- stream();
- startBucket = buckets.begin();
-}
-
-Buffer::Buffer(std::istream &istream) : Buffer(istreamReadCallback, &istream) {}
-
-Buffer::Buffer(const std::string &str)
- : callback(nullptr),
- userData(nullptr),
- reachedEnd(true),
- startBucket(buckets.end()),
- endBucket(buckets.end()),
- startOffset(0),
- firstDead(0)
-{
- // Copy the given string into a first buffer and set the start buffer
- // correctly
- Bucket &bucket = nextBucket();
- bucket.resize(str.size());
- std::copy(str.begin(), str.end(), bucket.begin());
- startBucket = buckets.begin();
-}
-
-#ifndef NDEBUG
-Buffer::~Buffer()
-{
- // Make sure all cursors have been deleted
- for (bool cursor_alive: alive) {
- assert(!cursor_alive);
- }
-}
-#endif
-
-void Buffer::advance(BucketIterator &it)
-{
- it++;
- if (it == buckets.end()) {
- it = buckets.begin();
- }
-}
-
-void Buffer::advance(BucketList::const_iterator &it) const
-{
- it++;
- if (it == buckets.cend()) {
- it = buckets.cbegin();
- }
-}
-
-Buffer::Bucket &Buffer::nextBucket()
-{
- constexpr size_t MAXVAL = std::numeric_limits<size_t>::max();
-
- // Fetch the minimum bucket index
- size_t minBucketIdx = MAXVAL;
- for (size_t i = 0; i < cursors.size(); i++) {
- if (alive[i]) {
- // Fetch references to the bucket and the cursor
- const Cursor &cur = cursors[i];
- const Bucket &bucket = *(cur.bucket);
-
- // Increment the bucket index by one, if the cursor is at the end
- // of the bucket (only valid if the LOOKBACK_SIZE is set to zero)
- size_t bIdx = cur.bucketIdx;
- if (LOOKBACK_SIZE == 0 && cur.bucketOffs == bucket.size()) {
- bIdx++;
- }
-
- // Decrement the bucket index by one, if the previous bucket still
- // needs to be reached and cannot be overridden
- if (bIdx > 0 && cur.bucketOffs < LOOKBACK_SIZE) {
- bIdx--;
- }
-
- // Set the bucket index to the minium
- minBucketIdx = std::min(minBucketIdx, bIdx);
- }
- }
-
- // If there is space between the current start bucket and the read
- // cursor, the start bucket can be safely overridden.
- if (minBucketIdx > 0 && minBucketIdx != MAXVAL) {
- // All cursor bucket indices will be decreased by one
- for (size_t i = 0; i < cursors.size(); i++) {
- cursors[i].bucketIdx--;
- }
-
- // Increment the start offset
- startOffset += startBucket->size();
-
- // The old start bucket is the new end bucket
- endBucket = startBucket;
-
- // Advance the start bucket, wrap around at the end of the list
- advance(startBucket);
- } else {
- // No free bucket, insert a new one before the start bucket
- endBucket = buckets.emplace(startBucket);
- }
- return *endBucket;
-}
-
-Buffer::CursorId Buffer::nextCursor()
-{
- bool hasCursor = false;
- CursorId res = 0;
-
- // Search for the next free cursor starting with minNextCursorId
- for (size_t i = firstDead; i < alive.size(); i++) {
- if (!alive[i]) {
- res = i;
- hasCursor = true;
- break;
- }
- }
-
- // Add a new cursor to the cursor list if no cursor is currently free
- if (!hasCursor) {
- res = cursors.size();
- cursors.resize(res + 1);
- alive.resize(res + 1);
- }
-
- // The next dead cursor is at least the next cursor
- firstDead = res + 1;
-
- // Mark the new cursor as alive
- alive[res] = true;
-
- return res;
-}
-
-void Buffer::stream()
-{
- // Fetch the bucket into which the data should be inserted, make sure it
- // has the correct size
- Bucket &tar = nextBucket();
- tar.resize(REQUEST_SIZE);
-
- // Read data from the stream into the target buffer
- size_t size = callback(tar.data(), REQUEST_SIZE, userData);
-
- // If not enough bytes were returned, we're at the end of the stream
- if (size < REQUEST_SIZE) {
- tar.resize(size);
- reachedEnd = true;
- }
-}
-
-Buffer::CursorId Buffer::createCursor()
-{
- CursorId res = nextCursor();
- cursors[res].bucket = startBucket;
- cursors[res].bucketIdx = 0;
- cursors[res].bucketOffs = 0;
- return res;
-}
-
-Buffer::CursorId Buffer::createCursor(Buffer::CursorId ref)
-{
- CursorId res = nextCursor();
- cursors[res] = cursors[ref];
- return res;
-}
-
-void Buffer::copyCursor(Buffer::CursorId from, Buffer::CursorId to)
-{
- cursors[to] = cursors[from];
-}
-
-void Buffer::deleteCursor(Buffer::CursorId cursor)
-{
- alive[cursor] = false;
- firstDead = std::min(firstDead, cursor);
-}
-
-size_t Buffer::offset(Buffer::CursorId cursor) const
-{
- const Cursor &cur = cursors[cursor];
- size_t offs = startOffset + cur.bucketOffs;
- BucketList::const_iterator it = startBucket;
- while (it != cur.bucket) {
- offs += it->size();
- advance(it);
- }
- return offs;
-}
-
-size_t Buffer::moveForward(CursorId cursor, size_t relativeOffs)
-{
- size_t offs = relativeOffs;
- Cursor &cur = cursors[cursor];
- while (offs > 0) {
- // Fetch the current bucket of the cursor
- Bucket &bucket = *(cur.bucket);
-
- // If there is enough space in the bucket, simply increment the bucket
- // offset by the given relative offset
- const size_t space = bucket.size() - cur.bucketOffs;
- if (space >= offs) {
- cur.bucketOffs += offs;
- break;
- } else {
- // Go to the end of the current bucket otherwise
- offs -= space;
- cur.bucketOffs = bucket.size();
-
- // Go to the next bucket
- if (cur.bucket != endBucket) {
- // Go to the next bucket
- advance(cur.bucket);
- cur.bucketIdx++;
- cur.bucketOffs = 0;
- } else {
- // Abort, if there is no more data to stream, otherwise just
- // load new data
- if (reachedEnd) {
- return relativeOffs - offs;
- }
- stream();
- }
- }
- }
- return relativeOffs;
-}
-
-size_t Buffer::moveBackward(CursorId cursor, size_t relativeOffs)
-{
- size_t offs = relativeOffs;
- Cursor &cur = cursors[cursor];
- while (offs > 0) {
- // If there is enough space in the bucket, simply decrement the bucket
- // offset by the given relative offset
- if (cur.bucketOffs >= offs) {
- cur.bucketOffs -= offs;
- break;
- } else {
- // Go to the beginning of the current bucket otherwise
- offs -= cur.bucketOffs;
- cur.bucketOffs = 0;
-
- // Abort if there is no more bucket to got back to
- if (cur.bucketIdx == 0) {
- return relativeOffs - offs;
- }
-
- // Go to the previous bucket (wrap around at the beginning of the
- // list)
- if (cur.bucket == buckets.begin()) {
- cur.bucket = buckets.end();
- }
- cur.bucket--;
-
- // Decrement the bucket index, and set the current offset to the
- // end of the new bucket
- cur.bucketIdx--;
- cur.bucketOffs = cur.bucket->size();
- }
- }
- return relativeOffs;
-}
-
-ssize_t Buffer::moveCursor(CursorId cursor, ssize_t relativeOffs)
-{
- if (relativeOffs > 0) {
- return moveForward(cursor, relativeOffs);
- } else if (relativeOffs < 0) {
- return -moveBackward(cursor, -relativeOffs);
- } else {
- return 0;
- }
-}
-
-bool Buffer::atEnd(Buffer::CursorId cursor) const
-{
- const Cursor &c = cursors[cursor];
- return reachedEnd &&
- (c.bucket == endBucket && c.bucketOffs == endBucket->size());
-}
-
-bool Buffer::fetchCharacter(CursorId cursor, char &c, bool incr)
-{
- Cursor &cur = cursors[cursor];
- while (true) {
- // Reference at the current bucket
- Bucket &bucket = *(cur.bucket);
-
- // If there is still data in the current bucket, return this data
- if (cur.bucketOffs < bucket.size()) {
- c = bucket[cur.bucketOffs];
- if (incr) {
- cur.bucketOffs++;
- }
- return true;
- } else if (cur.bucket == endBucket) {
- // Return false if the end of the stream has been reached, otherwise
- // load new data
- if (reachedEnd) {
- return false;
- }
- stream();
- }
-
- // Go to the next bucket
- cur.bucketIdx++;
- cur.bucketOffs = 0;
- advance(cur.bucket);
- }
-}
-
-bool Buffer::read(Buffer::CursorId cursor, char &c)
-{
- return fetchCharacter(cursor, c, true);
-}
-
-bool Buffer::fetch(CursorId cursor, char &c)
-{
- return fetchCharacter(cursor, c, false);
-}
-
-/* CharReader::Cursor class */
-
-void CharReader::Cursor::assign(std::shared_ptr<Buffer> buffer,
- CharReader::Cursor &cursor)
-{
- // Copy the cursor position
- buffer->copyCursor(cursor.cursor, this->cursor);
-
- // Copy the state
- line = cursor.line;
- column = cursor.column;
-}
-
-/* CharReader class */
-
-CharReader::CharReader(std::shared_ptr<Buffer> buffer, size_t line,
- size_t column)
- : buffer(buffer),
- readCursor(buffer->createCursor(), line, column),
- peekCursor(buffer->createCursor(), line, column),
- coherent(true)
-{
-}
-
-CharReader::CharReader(const std::string &str, size_t line, size_t column)
- : CharReader(std::shared_ptr<Buffer>{new Buffer{str}}, line, column)
-{
-}
-
-CharReader::CharReader(std::istream &istream, size_t line, size_t column)
- : CharReader(std::shared_ptr<Buffer>{new Buffer{istream}}, line, column)
-{
-}
-
-CharReader::~CharReader()
-{
- buffer->deleteCursor(readCursor.cursor);
- buffer->deleteCursor(peekCursor.cursor);
-}
-
-bool CharReader::readAtCursor(Cursor &cursor, char &c)
-{
- // Return false if we're at the end of the stream
- if (!buffer->read(cursor.cursor, c)) {
- return false;
- }
-
- // Substitute linebreak sequences with a single '\n'
- if (c == '\n' || c == '\r') {
- // Output a single \n
- c = '\n';
-
- // Check whether the next character is a continuation of the
- // current character
- char c2;
- if (buffer->read(cursor.cursor, c2)) {
- if ((c2 != '\n' && c2 != '\r') || c2 == c) {
- buffer->moveCursor(cursor.cursor, -1);
- }
- }
- }
-
- // Count lines and columns
- if (c == '\n') {
- // A linebreak was reached, go to the next line
- cursor.line++;
- cursor.column = 1;
- } else {
- // Ignore UTF-8 continuation bytes
- if (!((c & 0x80) && !(c & 0x40))) {
- cursor.column++;
- }
- }
- return true;
-}
-
-bool CharReader::peek(char &c)
-{
- // If the reader was coherent, update the peek cursor state
- if (coherent) {
- peekCursor.assign(buffer, readCursor);
- coherent = false;
- }
-
- // Read a character from the peek cursor
- return readAtCursor(peekCursor, c);
-}
-
-bool CharReader::read(char &c)
-{
- // Read a character from the buffer at the current read cursor
- bool res = readAtCursor(readCursor, c);
-
- // Set the peek position to the current read position, if reading was not
- // coherent
- if (!coherent) {
- peekCursor.assign(buffer, readCursor);
- coherent = true;
- } else {
- buffer->copyCursor(readCursor.cursor, peekCursor.cursor);
- }
-
- // Return the result of the read function
- return res;
-}
-
-void CharReader::resetPeek()
-{
- if (!coherent) {
- peekCursor.assign(buffer, readCursor);
- coherent = true;
- }
-}
-
-void CharReader::consumePeek()
-{
- if (!coherent) {
- readCursor.assign(buffer, peekCursor);
- coherent = true;
- }
-}
-
-bool CharReader::consumeWhitespace()
-{
- char c;
- while (peek(c)) {
- if (!Utils::isWhitespace(c)) {
- resetPeek();
- return true;
- }
- consumePeek();
- }
- return false;
-}
-
-CharReaderFork CharReader::fork()
-{
- return CharReaderFork(buffer, readCursor, peekCursor, coherent);
-}
-
-CharReader::Context CharReader::getContext(ssize_t maxSize)
-{
- // Clone the current read cursor
- Buffer::CursorId cur = buffer->createCursor(readCursor.cursor);
-
- // Fetch the start position of the search
- ssize_t offs = buffer->offset(cur);
- ssize_t start = offs;
- ssize_t end = offs;
- char c;
-
- // Search the beginning of the line with the last non-whitespace character
- bool hadNonWhitespace = false;
- bool foundBegin = false;
- for (ssize_t i = 0; i < maxSize; i++) {
- // Fetch the character at the current position
- if (buffer->fetch(cur, c)) {
- // Abort, at linebreaks if we found a non-linebreak character
- hadNonWhitespace = hadNonWhitespace || !Utils::isWhitespace(c);
- if (hadNonWhitespace && (c == '\n' || c == '\r')) {
- buffer->moveCursor(cur, 1);
- start++;
- foundBegin = true;
- break;
- }
- }
- if (buffer->moveCursor(cur, -1) == 0) {
- foundBegin = true;
- break;
- } else {
- // Update the start position and the hadNonWhitespace flag
- start--;
- }
- }
-
- // Search the end of the line
- buffer->moveCursor(cur, offs - start);
- bool foundEnd = false;
- for (ssize_t i = 0; i < maxSize; i++) {
- // Increment the end counter if a character was read, abort if the end
- // of the stream has been reached
- if (buffer->read(cur, c)) {
- end++;
- } else {
- foundEnd = true;
- break;
- }
-
- // Abort on linebreak characters
- if (c == '\n' || c == '\r') {
- foundEnd = true;
- break;
- }
- }
-
- // Calculate the truncated start and end position and limit the number of
- // characters to the maximum number of characters
- ssize_t tStart = start;
- ssize_t tEnd = end;
- if (tEnd - tStart > maxSize) {
- tStart = std::max(offs - maxSize / 2, tStart);
- tEnd = tStart + maxSize;
- }
-
- // Try to go to the calculated start position and fetch the actual start
- // position
- ssize_t aStart = end + buffer->moveCursor(cur, tStart - end);
- if (aStart > tStart) {
- tEnd = tEnd + (aStart - tStart);
- tStart = aStart;
- }
-
- // Read one line
- std::stringstream ss;
- size_t relPos = 0;
- for (ssize_t i = tStart; i < tEnd; i++) {
- if (buffer->read(cur, c)) {
- // Break once a linebreak is reached
- if (c == '\n' || c == '\r') {
- break;
- }
-
- // Add the current character to the output
- ss << c;
-
- // Increment the string-relative offset as long as the original
- // offset is not reached in the for loop
- if (i < offs) {
- relPos++;
- }
- }
- }
-
- // Delete the newly created cursor
- buffer->deleteCursor(cur);
-
- return CharReader::Context{ss.str(), relPos, !foundBegin || tStart != start,
- !foundEnd || tEnd != end};
-}
-
-/* Class CharReaderFork */
-
-CharReaderFork::CharReaderFork(std::shared_ptr<Buffer> buffer,
- CharReader::Cursor &parentReadCursor,
- CharReader::Cursor &parentPeekCursor,
- bool coherent)
- : CharReader(buffer, 1, 1),
- parentReadCursor(parentReadCursor),
- parentPeekCursor(parentPeekCursor)
-{
- readCursor.assign(buffer, parentReadCursor);
- peekCursor.assign(buffer, parentPeekCursor);
- this->coherent = coherent;
-}
-
-void CharReaderFork::commit()
-{
- parentReadCursor.assign(buffer, readCursor);
- parentPeekCursor.assign(buffer, peekCursor);
-}
-}
-}
-
diff --git a/src/core/utils/CharReader.hpp b/src/core/utils/CharReader.hpp
deleted file mode 100644
index 1306026..0000000
--- a/src/core/utils/CharReader.hpp
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file CharReader.hpp
- *
- * Used within all parsers to read single characters from an underlying stream.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_CHAR_READER_HPP_
-#define _OUSIA_CHAR_READER_HPP_
-
-#include <istream>
-#include <list>
-#include <memory>
-#include <vector>
-
-namespace ousia {
-namespace utils {
-
-/**
- * A chunked ring buffer used in CharReader to provide access to an input stream
- * with multiple read cursors. The Buffer automatically expands to the
- * size of the spanned by the read cursors while reusing already allocated
- * memory.
- */
-class Buffer {
-public:
- /**
- * Callback function which is called whenever new data is requested from the
- * input stream.
- *
- * @param buf is points a the target memory region.
- * @param size is the requested number of bytes.
- * @param userData is a pointer at some user defined data given in the
- * constructor.
- * @return the actual number of bytes read. If the result is smaller than
- * the requested size, this tells the Buffer that the end of the input
- * stream is reached.
- */
- using ReadCallback = size_t (*)(char *buf, size_t size, void *userData);
-
- /**
- * Handle used to identify a cursor.
- */
- using CursorId = size_t;
-
-private:
- /**
- * Number of bytes to request from the input stream. Set to 64 KiB because
- * this seems to be a nice value for I/O operations according to multiple
- * sources.
- */
- static constexpr size_t REQUEST_SIZE = 64 * 1024;
-
- /**
- * Number of bytes the buffer guarantees to be capable of looking back
- * for extracting the current context.
- */
- static constexpr size_t LOOKBACK_SIZE = 128;
-
- /**
- * Type used internally to represent one chunk of memory.
- */
- using Bucket = std::vector<char>;
-
- /**
- * Type used internally to represent a bucket container.
- */
- using BucketList = std::list<Bucket>;
-
- /**
- * Type used internally for representing iterators in the bucket list.
- */
- using BucketIterator = BucketList::iterator;
-
- /**
- * Type used internally to represent a read cursor.
- */
- struct Cursor {
- /**
- * Iterator pointing at the current bucket.
- */
- BucketIterator bucket;
-
- /**
- * Index of the bucket relative to the start bucket.
- */
- size_t bucketIdx;
-
- /**
- * Current offset within that bucket.
- */
- size_t bucketOffs;
- };
-
- /**
- * List of buckets containing the buffered memory.
- */
- BucketList buckets;
-
- /**
- * List of cursors used to access the memory. Note that cursors can be
- * marked as inactive and reused lateron (to avoid having to resize the
- * vector).
- */
- std::vector<Cursor> cursors;
-
- /**
- * Bitfield specifying which of the cursors is actually valid.
- */
- std::vector<bool> alive;
-
- /**
- * Function to be called whenever new data is needed. Set to nullptr if the
- * Buffer is not backed by an input stream.
- */
- const ReadCallback callback;
-
- /**
- * User data given in the constructor.
- */
- void *userData;
-
- /**
- * Set to true if the input stream is at its end.
- */
- bool reachedEnd;
-
- /**
- * Iterator pointing at the current start bucket.
- */
- BucketIterator startBucket;
-
- /**
- * Iterator pointing at the last bucket.
- */
- BucketIterator endBucket;
-
- /**
- * Byte offset of the start bucket relative to the beginning of the stream.
- */
- size_t startOffset;
-
- /**
- * Points at the smallest possible available cursor index, yet does not
- * guarantee that this cursor index actuall is free.
- */
- CursorId firstDead;
-
- /**
- * Advances the bucket iterator, cares about wrapping around in the ring.
- */
- void advance(BucketIterator &it);
-
- /**
- * Advances the bucket iterator, cares about wrapping around in the ring.
- */
- void advance(BucketList::const_iterator &it) const;
-
- /**
- * Internally used to find the next free cursor in the cursors vector. The
- * cursor is marked as active.
- *
- * @return the next free cursor index.
- */
- CursorId nextCursor();
-
- /**
- * Returns a reference at the next bucket into which data should be
- * inserted.
- *
- * @return a bucket into which the data can be inserted.
- */
- Bucket &nextBucket();
-
- /**
- * Reads data from the input stream and places it in the next free buffer.
- */
- void stream();
-
- /**
- * Moves the given cursor forward.
- */
- size_t moveForward(CursorId cursor, size_t relativeOffs);
-
- /**
- * Moves the given cursor backward.
- */
- size_t moveBackward(CursorId cursor, size_t relativeOffs);
-
- /**
- * Reads a character from the current cursor position and optionally
- * advances.
- */
- bool fetchCharacter(CursorId cursor, char &c, bool incr);
-
-public:
- /**
- * Intializes the Buffer with a reference to a ReadCallback that is used
- * to fetch data from an underlying input stream.
- *
- * @param callback is the function that will be called whenever data is read
- * from the ring buffer and the buffer does not hold enough data to fulfill
- * this read request.
- * @param userData is a pointer to user defined data which will be passed to
- * the callback function.
- */
- Buffer(ReadCallback callback, void *userData);
-
- /**
- * Initializes the Buffer with a reference to an std::istream from which
- * data will be read.
- *
- * @param istream is the input stream from which the data should be read.
- */
- Buffer(std::istream &istream);
-
- /**
- * Initializes the Buffer with the contents of the given string, after
- * this operation the Buffer has a fixed size.
- *
- * @param str is the string containing the data that should be copied into
- * the ring buffer.
- */
- Buffer(const std::string &str);
-
-#ifndef NDEBUG
- /**
- * Destructor of the Buffer class. Makes sure that all cursors have been
- * freed.
- */
- ~Buffer();
-#endif
-
- // No copy
- Buffer(const Buffer &) = delete;
-
- // No assign
- Buffer &operator=(const Buffer &) = delete;
-
- /**
- * Creates a new read cursor positioned at the smallest possible position
- * in the ring buffer.
- */
- CursorId createCursor();
-
- /**
- * Creates a new read cursor positioned at the same position as the given
- * read cursor.
- *
- * @param ref is the read cursor that should be used as reference for the
- * new read cursor.
- */
- CursorId createCursor(CursorId ref);
-
- /**
- * Copies the position of one cursor to another cursor.
- *
- * @param from is the cursor id of which the position should be copied.
- * @param to is the cursor id to which the position should be copied.
- */
- void copyCursor(CursorId from, CursorId to);
-
- /**
- * Deletes the cursor with the given id. The cursor may no longer be used
- * after this function has been called.
- *
- * @param cursor is the id of the cursor that should be freed.
- */
- void deleteCursor(CursorId cursor);
-
- /**
- * Moves a cursor by offs bytes. Note that moving backwards is theoretically
- * limited by the LOOKBACK_SIZE of the Buffer, practically it will most
- * likely be limited by the REQUEST_SIZE, so you can got at most 64 KiB
- * backwards.
- *
- * @param cursor is the cursor that should be moved.
- * @param relativeOffs is a positive or negative integer number specifying
- * the number of bytes the cursor should be moved forward (positive numbers)
- * or backwards (negative numbers).
- * @return the actual number of bytes the cursor was moved. This number is
- * smaller than the relativeOffs given in the constructor if the
- */
- ssize_t moveCursor(CursorId cursor, ssize_t relativeOffs);
-
- /**
- * Returns the current byte offset of the given cursor relative to the
- * beginning of the stream.
- *
- * @param cursor is the cursor for which the byte offset relative to the
- * beginning of the stream should be returned.
- * @return the number of bytes since the beginning of the stream for the
- * given cursor.
- */
- size_t offset(CursorId cursor) const;
-
- /**
- * Returns true if the given cursor currently is at the end of the stream.
- *
- * @param cursor is the cursor for which the atEnd flag should be returned.
- * @return true if the there are no more bytes for this cursor. If false
- * is returned, this means that there may be more bytes in the stream,
- * nevertheless the end of the stream may be hit once the next read function
- * is called.
- */
- bool atEnd(CursorId cursor) const;
-
- /**
- * Reads a single character from the ring buffer from the given cursor and
- * moves to the next character.
- *
- * @param cursor specifies the cursor from which the data should be read.
- * The cursor will be advanced by one byte.
- * @param c is the character into which the data needs to be read.
- * @return true if a character was read, false if the end of the stream has
- * been reached.
- */
- bool read(CursorId cursor, char &c);
-
- /**
- * Returns a single character from the ring buffer from the current cursor
- * position and stays at that position.
- *
- * @param cursor specifies the cursor from which the data should be read.
- * The cursor will be advanced by one byte.
- * @param c is the character into which the data needs to be read.
- * @return true if a character could be fetched, false if the end of the
- * stream has been reached.
- */
- bool fetch(CursorId cursor, char &c);
-};
-
-// Forward declaration
-class CharReaderFork;
-
-/**
- * Used within parsers for convenient access to single characters in an input
- * stream or buffer. It allows reading and peeking single characters from a
- * buffer. Additionally it counts the current column/row (with correct handling
- * for UTF-8) and contains an internal state machine that handles the detection
- * of linebreaks and converts these to a single '\n'.
- */
-class CharReader {
-public:
- /**
- * The context struct is used to represent the current context the char
- * reader is in. This context can for example be used when building error
- * messages.
- */
- struct Context {
- /**
- * Set to the content of the current line.
- */
- std::string line;
-
- /**
- * Relative position (in characters) within that line.
- */
- size_t relPos;
-
- /**
- * Set to true if the beginning of the line has been truncated (because
- * the reader position is too far away from the actual position of the
- * line).
- */
- bool truncatedStart;
-
- /**
- * Set to true if the end of the line has been truncated (because the
- * reader position is too far away from the actual end position of the
- * line.
- */
- bool truncatedEnd;
-
- Context()
- : line(), relPos(0), truncatedStart(false), truncatedEnd(false)
- {
- }
-
- Context(std::string line, size_t relPos, bool truncatedStart,
- bool truncatedEnd)
- : line(std::move(line)),
- relPos(relPos),
- truncatedStart(truncatedStart),
- truncatedEnd(truncatedEnd)
- {
- }
- };
-
-protected:
- /**
- * Internally used cursor structure for managing the read and the peek
- * cursor.
- */
- struct Cursor {
- /**
- * Corresponding cursor in the underlying buffer instance.
- */
- const Buffer::CursorId cursor;
-
- /**
- * Current line the cursor is in.
- */
- uint32_t line;
-
- /**
- * Current column the cursor is in.
- */
- uint32_t column;
-
- /**
- * Constructor of the Cursor class.
- *
- * @param cursor is the underlying cursor in the Buffer instance.
- */
- Cursor(Buffer::CursorId cursor, size_t line, size_t column)
- : cursor(cursor), line(line), column(column)
- {
- }
-
- /**
- * Assigns one cursor to another.
- *
- * @param buffer is the underlying buffer instance the internal cursor
- * belongs to.
- * @param cursor is the cursor from which the state should be copied.
- */
- void assign(std::shared_ptr<Buffer> buffer, Cursor &cursor);
- };
-
-private:
- /**
- * Substitutes "\r", "\n\r", "\r\n" with a single "\n".
- *
- * @param cursor is the cursor from which the character should be read.
- * @param c a reference to the character that should be written.
- * @return true if another character needs to be read.
- */
- bool substituteLinebreaks(Cursor &cursor, char &c);
-
- /**
- * Reads a single character from the given cursor.
- *
- * @param cursor is the cursor from which the character should be read.
- * @param c a reference to the character that should be written.
- * @return true if a character was read, false if the end of the stream has
- * been reached.
- */
- bool readAtCursor(Cursor &cursor, char &c);
-
-protected:
- /**
- * Reference pointing at the underlying buffer.
- */
- std::shared_ptr<Buffer> buffer;
-
- /**
- * Cursor used for reading.
- */
- Cursor readCursor;
-
- /**
- * Cursor used for peeking.
- */
- Cursor peekCursor;
-
- /**
- * Set to true as long the underlying Buffer cursor is at the same position
- * for the read and the peek cursor. This is only used for optimization
- * purposes and makes consecutive reads a bit faster.
- */
- bool coherent;
-
- /**
- * Protected constructor of the CharReader base class. Creates new read
- * and peek cursors for the given buffer.
- *
- * @param buffer is a reference to the underlying Buffer class responsible
- * for allowing to read from a single input stream from multiple locations.
- */
- CharReader(std::shared_ptr<Buffer> buffer, size_t line, size_t column);
-
-public:
- /**
- * Creates a new CharReader instance from a string.
- *
- * @param str is a string containing the input data.
- * @param line is the start line.
- * @param column is the start column.
- */
- CharReader(const std::string &str, size_t line = 1, size_t column = 1);
-
- /**
- * Creates a new CharReader instance for an input stream.
- *
- * @param istream is the input stream from which incomming data should be
- * read.
- * @param line is the start line.
- * @param column is the start column.
- */
- CharReader(std::istream &istream, size_t line = 1, size_t column = 1);
-
- /**
- * Deletes the used cursors from the underlying buffer instance.
- */
- ~CharReader();
-
- // No copy
- CharReader(const Buffer &) = delete;
-
- // No assign
- CharReader &operator=(const Buffer &) = delete;
-
- /**
- * Peeks a single character. If called multiple times, returns the
- * character after the previously peeked character.
- *
- * @param c is a reference to the character to which the result should be
- * written.
- * @return true if the character was successfully read, false if there are
- * no more characters to be read in the buffer.
- */
- bool peek(char &c);
-
- /**
- * Reads a character from the input data. If "peek" was called
- * beforehand resets the peek pointer.
- *
- * @param c is a reference to the character to which the result should be
- * written.
- * @return true if the character was successfully read, false if there are
- * no more characters to be read in the buffer.
- */
- bool read(char &c);
-
- /**
- * Resets the peek pointer to the "read" pointer.
- */
- void resetPeek();
-
- /**
- * Advances the read pointer to the peek pointer -- so if the "peek"
- * function was called, "read" will now return the character after
- * the last peeked character.
- */
- void consumePeek();
-
- /**
- * Moves the read cursor to the next non-whitespace character. Returns
- * false, if the end of the stream was reached.
- *
- * @return false if the end of the stream was reached, false othrwise.
- */
- bool consumeWhitespace();
-
- /**
- * Creates a new CharReader located at the same position as this CharReader
- * instance, yet the new CharReader can be used independently of this
- * CharReader. Use the "commit" function of the returned CharReader to
- * copy the state of the forked CharReaderFork to this CharReader.
- *
- * @return a CharReaderFork instance positioned at the same location as this
- * CharReader instance.
- */
- CharReaderFork fork();
-
- /**
- * Returns true if there are no more characters as the stream was
- * closed.
- *
- * @return true if there is no more data.
- */
- bool atEnd() const { return buffer->atEnd(readCursor.cursor); }
-
- /**
- * Returns the current line (starting with one).
- *
- * @return the current line number.
- */
- uint32_t getLine() const { return readCursor.line; }
-
- /**
- * Returns the current column (starting with one).
- *
- * @return the current column number.
- */
- uint32_t getColumn() const { return readCursor.column; }
-
- /**
- * Returns the current byte offset of the read cursor.
- *
- * @return the byte position within the stream.
- */
- size_t getOffset() const { return buffer->offset(readCursor.cursor); };
-
- /**
- * Returns the line the read cursor currently is in, but at most the
- * given number of characters in the form of a Context structure.
- */
- Context getContext(ssize_t maxSize);
-};
-
-/**
- * A CharReaderFork is returned whenever the "fork" function of the CharReader
- * class is used. Its "commit" function can be used to move the underlying
- * CharReader instance to the location of the CharReaderFork instance. Otherwise
- * the read location of the underlying CharReader is left unchanged.
- */
-class CharReaderFork : public CharReader {
-private:
- friend CharReader;
-
- /**
- * The reader cursor of the underlying CharReader instance.
- */
- CharReader::Cursor &parentReadCursor;
-
- /**
- * The peek cursor of the underlying CharReader instance.
- */
- CharReader::Cursor &parentPeekCursor;
-
- /**
- * Constructor of the CharReaderFork class.
- *
- * @param buffer is a reference at the parent Buffer instance.
- * @param parentPeekCursor is a reference at the parent read cursor.
- * @param parentPeekCursor is a reference at the parent peek cursor.
- * @param coherent specifies whether the char reader cursors are initialized
- * coherently.
- */
- CharReaderFork(std::shared_ptr<Buffer> buffer,
- CharReader::Cursor &parentReadCursor,
- CharReader::Cursor &parentPeekCursor, bool coherent);
-
-public:
- /**
- * Moves the read and peek cursor of the parent CharReader to the location
- * of the read and peek cursor in the fork.
- */
- void commit();
-};
-}
-
-/**
- * Alias of the commonly used CharReader class.
- */
-using CharReader = utils::CharReader;
-
-}
-
-#endif /* _OUSIA_CHAR_READER_HPP_ */
-