diff options
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/core/utils/Buffer.cpp | 231 | ||||
-rw-r--r-- | src/core/utils/Buffer.hpp | 301 |
3 files changed, 533 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 94b2cc7..d1ecc18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -114,6 +114,7 @@ ADD_LIBRARY(ousia_core # src/core/script/Object # src/core/script/ScriptEngine # src/core/script/Variant + src/core/utils/Buffer src/core/variant/Reader src/core/variant/Variant ) diff --git a/src/core/utils/Buffer.cpp b/src/core/utils/Buffer.cpp new file mode 100644 index 0000000..6407907 --- /dev/null +++ b/src/core/utils/Buffer.cpp @@ -0,0 +1,231 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <algorithm> + +#include "Buffer.hpp" + +namespace ousia { +namespace utils { + +/* Class Buffer */ + +Buffer::Buffer(ReadCallback callback, void *userData) + : callback(callback), + userData(userData), + reachedEnd(false), + startBucket(buckets.end()), + endBucket(buckets.end()), + startOffset(0), + firstDead(0) +{ + // Insert a first empty bucket and set the start buffer correctly + nextBucket(); + startBucket = buckets.begin(); +} + +Buffer::Buffer(const std::string &str) + : callback(nullptr), + userData(nullptr), + reachedEnd(true), + startBucket(buckets.end()), + endBucket(buckets.end()), + startOffset(0), + firstDead(0) +{ + // Copy the given string into a first buffer and set the start buffer + // correctly + Bucket &bucket = nextBucket(); + bucket.resize(str.size()); + std::copy(str.begin(), str.end(), bucket.begin()); + startBucket = buckets.begin(); +} + +void Buffer::advance(BucketIterator &it) +{ + it++; + if (it == buckets.end()) { + it = buckets.begin(); + } +} + +void Buffer::advance(BucketList::const_iterator &it) const +{ + it++; + if (it == buckets.cend()) { + it = buckets.cbegin(); + } +} + + +Buffer::Bucket &Buffer::nextBucket() +{ + // Fetch the minimum bucket index + size_t minBucketIdx = 0; + for (size_t i = 0; i < cursors.size(); i++) { + if (alive[i]) { + minBucketIdx = std::min(minBucketIdx, cursors[i].bucketIdx); + } + } + + // If there is space between the current start bucket and the read + // cursor, the start bucket can be safely overridden. + if (minBucketIdx > 0) { + // All cursor bucket indices will be decreased by one + for (size_t i = 0; i < cursors.size(); i++) { + cursors[i].bucketIdx--; + } + + // Increment the start offset + startOffset += startBucket->size(); + + // The old start bucket is the new end bucket + endBucket = startBucket; + + // Advance the start bucket, wrap around at the end of the list + advance(startBucket); + } else { + // No free bucket, insert a new one before the start bucket + endBucket = buckets.emplace(startBucket); + } + return *endBucket; +} + +Buffer::CursorId Buffer::nextCursor() +{ + bool hasCursor = false; + CursorId res = 0; + + // Search for the next free cursor starting with minNextCursorId + for (size_t i = firstDead; i < alive.size(); i++) { + if (!alive[i]) { + res = i; + hasCursor = true; + break; + } + } + + // Add a new cursor to the cursor list if no cursor is currently free + if (!hasCursor) { + res = cursors.size(); + cursors.resize(res + 1); + alive.resize(res + 1); + } + + // The next dead cursor is at least the next cursor + firstDead = res + 1; + + // Mark the new cursor as alive + alive[res] = true; + + return res; +} + +void Buffer::stream() +{ + // Fetch the bucket into which the data should be inserted, make sure it + // has the correct size + Bucket &tar = nextBucket(); + tar.resize(REQUEST_SIZE); + + // Read data from the stream into the target buffer + size_t size = callback(tar.data(), REQUEST_SIZE, userData); + + // If not enough bytes were returned, we're at the end of the stream + if (size < REQUEST_SIZE) { + tar.resize(size); + reachedEnd = true; + } +} + +Buffer::CursorId Buffer::createCursor() +{ + CursorId res = nextCursor(); + cursors[res].bucket = startBucket; + cursors[res].bucketIdx = 0; + cursors[res].bucketOffs = 0; + return res; +} + +Buffer::CursorId Buffer::createCursor(Buffer::CursorId ref) +{ + CursorId res = nextCursor(); + cursors[res] = cursors[ref]; + return res; +} + +void Buffer::copyCursor(Buffer::CursorId from, Buffer::CursorId to) +{ + cursors[to] = cursors[from]; +} + +void Buffer::deleteCursor(Buffer::CursorId cursor) +{ + alive[cursor] = false; + firstDead = std::min(firstDead, cursor); +} + +size_t Buffer::offset(Buffer::CursorId cursor) const +{ + const Cursor &c = cursors[cursor]; + size_t offs = startOffset + c.bucketOffs; + BucketList::const_iterator it = startBucket; + while (it != c.bucket) { + offs += it->size(); + advance(it); + } + return offs; +} + +bool Buffer::atEnd(Buffer::CursorId cursor) const +{ + const Cursor &c = cursors[cursor]; + return reachedEnd && + (c.bucket == endBucket && c.bucketOffs == endBucket->size()); +} + +bool Buffer::read(Buffer::CursorId cursor, char &c) +{ + Cursor &cur = cursors[cursor]; + while (true) { + // Reference at the current bucket + Bucket &bucket = *(cur.bucket); + + // If there is still data in the current bucket, return this data + if (cur.bucketOffs < bucket.size()) { + c = bucket[cur.bucketOffs]; + cur.bucketOffs++; + return true; + } else if (cur.bucket == endBucket) { + // Return false if the end of the stream has been reached, otherwise + // load new data + if (reachedEnd) { + return false; + } + stream(); + } + + // Go to the next bucket + cur.bucketIdx++; + cur.bucketOffs = 0; + advance(cur.bucket); + } +} +} +} + diff --git a/src/core/utils/Buffer.hpp b/src/core/utils/Buffer.hpp new file mode 100644 index 0000000..0dd0a25 --- /dev/null +++ b/src/core/utils/Buffer.hpp @@ -0,0 +1,301 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Buffer.hpp + * + * Helper class which allows to read from an input source from multiple cursors. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_BUFFER_HPP_ +#define _OUSIA_BUFFER_HPP_ + +#include <list> +#include <vector> + +namespace ousia { +namespace utils { + +/** + * A chunked ring buffer used in CharReader to provide access to an input stream + * with multiple read cursors. The Buffer automatically expands to the + * size of the spanned by the read cursors while reusing already allocated + * memory. + */ +class Buffer { +public: + /** + * Callback function which is called whenever new data is requested from the + * input stream. + * + * @param buf is a pointer at the memory region to which the data should be + * writtern. + * @param size is the size of the + * @param userData is a pointer at some user defined data given in the + * constructor. + * @return the actual number of bytes read. If the result is smaller than + * the requested size, this tells the Buffer that the end of the input + * stream is reached. + */ + using ReadCallback = size_t (*)(char *buf, size_t size, void *userData); + + /** + * Handle used to identify a cursor. + */ + using CursorId = size_t; + +private: + /** + * Number of bytes to request from the input stream. + */ + static constexpr size_t REQUEST_SIZE = 16 * 1024; + + /** + * Type used internally to represent one chunk of memory. + */ + using Bucket = std::vector<char>; + + /** + * Type used internally to represent a bucket container. + */ + using BucketList = std::list<Bucket>; + + /** + * Type used internally for representing iterators in the bucket list. + */ + using BucketIterator = BucketList::iterator; + + /** + * Type used internally to represent a read cursor. + */ + struct Cursor { + /** + * Iterator pointing at the current bucket. + */ + BucketIterator bucket; + + /** + * Index of the bucket relative to the start bucket. + */ + size_t bucketIdx; + + /** + * Current offset within that bucket. + */ + size_t bucketOffs; + }; + + /** + * List of buckets containing the buffered memory. + */ + BucketList buckets; + + /** + * List of cursors used to access the memory. Note that cursors can be + * marked as inactive and reused lateron (to avoid having to resize the + * vector). + */ + std::vector<Cursor> cursors; + + /** + * Bitfield specifying which of the cursors is actually valid. + */ + std::vector<bool> alive; + + /** + * Function to be called whenever new data is needed. Set to nullptr if the + * Buffer is not backed by an input stream. + */ + const ReadCallback callback; + + /** + * User data given in the constructor. + */ + void *userData; + + /** + * Set to true if the input stream is at its end. + */ + bool reachedEnd; + + /** + * Iterator pointing at the current start bucket. + */ + BucketIterator startBucket; + + /** + * Iterator pointing at the last bucket. + */ + BucketIterator endBucket; + + /** + * Byte offset of the start bucket relative to the beginning of the stream. + */ + size_t startOffset; + + /** + * Points at the smallest possible available cursor index, yet does not + * guarantee that this cursor index actuall is free. + */ + CursorId firstDead; + + /** + * Advances the bucket iterator, cares about wrapping around in the ring. + */ + void advance(BucketIterator &it); + + /** + * Advances the bucket iterator, cares about wrapping around in the ring. + */ + void advance(BucketList::const_iterator &it) const; + + /** + * Internally used to find the next free cursor in the cursors vector. The + * cursor is marked as active. + * + * @return the next free cursor index. + */ + CursorId nextCursor(); + + /** + * Returns a reference at the next bucket into which data should be + * inserted. + * + * @return a bucket into which the data can be inserted. + */ + Bucket &nextBucket(); + + /** + * Reads data from the input stream and places it in the next free buffer. + */ + void stream(); + +public: + /** + * Intializes the Buffer with a reference to a ReadCallback that is used + * to fetch data from an underlying input stream. + * + * @param callback is the function that will be called whenever data is read + * from the ring buffer and the buffer does not hold enough data to fulfill + * this read request. + * @param userData is a pointer to user defined data which will be passed to + * the callback function. + */ + Buffer(ReadCallback callback, void *userData); + + /** + * Initializes the Buffer with the contents of the given string, after + * this operation the Buffer has a fixed size. + * + * @param str is the string containing the data that should be copied into + * the ring buffer. + */ + Buffer(const std::string &str); + + // No copy + Buffer(const Buffer &) = delete; + + // No assign + Buffer &operator=(const Buffer &) = delete; + + /** + * Creates a new read cursor positioned at the smallest possible position + * in the ring buffer. + */ + CursorId createCursor(); + + /** + * Creates a new read cursor positioned at the same position as the given + * read cursor. + * + * @param ref is the read cursor that should be used as reference for the + * new read cursor. + */ + CursorId createCursor(CursorId ref); + + /** + * Copies the position of one cursor to another cursor. + * + * @param from is the cursor id of which the position should be copied. + * @param to is the cursor id to which the position should be copied. + */ + void copyCursor(CursorId from, CursorId to); + + /** + * Deletes the cursor with the given id. The cursor may no longer be used + * after this function has been called. + * + * @param cursor is the id of the cursor that should be freed. + */ + void deleteCursor(CursorId cursor); + + /** + * Returns the current byte offset of the given cursor relative to the + * beginning of the stream. + * + * @param cursor is the cursor for which the byte offset relative to the + * beginning of the stream should be returned. + * @return the number of bytes since the beginning of the stream for the + * given cursor. + */ + size_t offset(CursorId cursor) const; + + /** + * Returns true if the given cursor currently is at the end of the stream. + * + * @param cursor is the cursor for which the atEnd flag should be returned. + * @return true if the there are no more bytes for this cursor. If false + * is returned, this means that there may be more bytes in the stream, + * nevertheless the end of the stream may be hit once the next read function + * is called. + */ + bool atEnd(CursorId cursor) const; + + /** + * Reads a single character from the ring buffer from the given cursor. + * + * @param cursor specifies the cursor from which the data should be read. + * The cursor will be advanced by one byte. + * @param c is the character into which the data needs to be read. + * @return true if a character was read, false if the end of the stream has + * been reached. + */ + bool read(CursorId cursor, char &c); + +// /** +// * Reads string from the ring buffer from the given cursor. +// * +// * @param cursor specifies the cursor from which the data should be read. +// * The cursor will be advanced by the specified number of bytes (or to the +// * end of the stream). +// * @param res is the vector into which the data should be read. Any already +// * present data will be overridden. +// * @param len is number of bytes that should be read from the buffer. +// * @return true if len bytes were read, false if less the len bytes have +// * been read because the end of the stream has been reached. +// */ +// bool read(CursorId cursor, std::vector<char> &res, size_t len); +}; + +} +} + +#endif /* _OUSIA_BUFFER_HPP_ */ + |