From 0f2eb7f1647aaf4d2fdedaee9ccb6f6fa6e179d2 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Mon, 8 Dec 2014 02:18:59 +0100 Subject: first (untested) version of the new buffer class --- src/core/utils/Buffer.hpp | 301 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 src/core/utils/Buffer.hpp (limited to 'src/core/utils/Buffer.hpp') diff --git a/src/core/utils/Buffer.hpp b/src/core/utils/Buffer.hpp new file mode 100644 index 0000000..0dd0a25 --- /dev/null +++ b/src/core/utils/Buffer.hpp @@ -0,0 +1,301 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file Buffer.hpp + * + * Helper class which allows to read from an input source from multiple cursors. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_BUFFER_HPP_ +#define _OUSIA_BUFFER_HPP_ + +#include +#include + +namespace ousia { +namespace utils { + +/** + * A chunked ring buffer used in CharReader to provide access to an input stream + * with multiple read cursors. The Buffer automatically expands to the + * size of the spanned by the read cursors while reusing already allocated + * memory. + */ +class Buffer { +public: + /** + * Callback function which is called whenever new data is requested from the + * input stream. + * + * @param buf is a pointer at the memory region to which the data should be + * writtern. + * @param size is the size of the + * @param userData is a pointer at some user defined data given in the + * constructor. + * @return the actual number of bytes read. If the result is smaller than + * the requested size, this tells the Buffer that the end of the input + * stream is reached. + */ + using ReadCallback = size_t (*)(char *buf, size_t size, void *userData); + + /** + * Handle used to identify a cursor. + */ + using CursorId = size_t; + +private: + /** + * Number of bytes to request from the input stream. + */ + static constexpr size_t REQUEST_SIZE = 16 * 1024; + + /** + * Type used internally to represent one chunk of memory. + */ + using Bucket = std::vector; + + /** + * Type used internally to represent a bucket container. + */ + using BucketList = std::list; + + /** + * Type used internally for representing iterators in the bucket list. + */ + using BucketIterator = BucketList::iterator; + + /** + * Type used internally to represent a read cursor. + */ + struct Cursor { + /** + * Iterator pointing at the current bucket. + */ + BucketIterator bucket; + + /** + * Index of the bucket relative to the start bucket. + */ + size_t bucketIdx; + + /** + * Current offset within that bucket. + */ + size_t bucketOffs; + }; + + /** + * List of buckets containing the buffered memory. + */ + BucketList buckets; + + /** + * List of cursors used to access the memory. Note that cursors can be + * marked as inactive and reused lateron (to avoid having to resize the + * vector). + */ + std::vector cursors; + + /** + * Bitfield specifying which of the cursors is actually valid. + */ + std::vector alive; + + /** + * Function to be called whenever new data is needed. Set to nullptr if the + * Buffer is not backed by an input stream. + */ + const ReadCallback callback; + + /** + * User data given in the constructor. + */ + void *userData; + + /** + * Set to true if the input stream is at its end. + */ + bool reachedEnd; + + /** + * Iterator pointing at the current start bucket. + */ + BucketIterator startBucket; + + /** + * Iterator pointing at the last bucket. + */ + BucketIterator endBucket; + + /** + * Byte offset of the start bucket relative to the beginning of the stream. + */ + size_t startOffset; + + /** + * Points at the smallest possible available cursor index, yet does not + * guarantee that this cursor index actuall is free. + */ + CursorId firstDead; + + /** + * Advances the bucket iterator, cares about wrapping around in the ring. + */ + void advance(BucketIterator &it); + + /** + * Advances the bucket iterator, cares about wrapping around in the ring. + */ + void advance(BucketList::const_iterator &it) const; + + /** + * Internally used to find the next free cursor in the cursors vector. The + * cursor is marked as active. + * + * @return the next free cursor index. + */ + CursorId nextCursor(); + + /** + * Returns a reference at the next bucket into which data should be + * inserted. + * + * @return a bucket into which the data can be inserted. + */ + Bucket &nextBucket(); + + /** + * Reads data from the input stream and places it in the next free buffer. + */ + void stream(); + +public: + /** + * Intializes the Buffer with a reference to a ReadCallback that is used + * to fetch data from an underlying input stream. + * + * @param callback is the function that will be called whenever data is read + * from the ring buffer and the buffer does not hold enough data to fulfill + * this read request. + * @param userData is a pointer to user defined data which will be passed to + * the callback function. + */ + Buffer(ReadCallback callback, void *userData); + + /** + * Initializes the Buffer with the contents of the given string, after + * this operation the Buffer has a fixed size. + * + * @param str is the string containing the data that should be copied into + * the ring buffer. + */ + Buffer(const std::string &str); + + // No copy + Buffer(const Buffer &) = delete; + + // No assign + Buffer &operator=(const Buffer &) = delete; + + /** + * Creates a new read cursor positioned at the smallest possible position + * in the ring buffer. + */ + CursorId createCursor(); + + /** + * Creates a new read cursor positioned at the same position as the given + * read cursor. + * + * @param ref is the read cursor that should be used as reference for the + * new read cursor. + */ + CursorId createCursor(CursorId ref); + + /** + * Copies the position of one cursor to another cursor. + * + * @param from is the cursor id of which the position should be copied. + * @param to is the cursor id to which the position should be copied. + */ + void copyCursor(CursorId from, CursorId to); + + /** + * Deletes the cursor with the given id. The cursor may no longer be used + * after this function has been called. + * + * @param cursor is the id of the cursor that should be freed. + */ + void deleteCursor(CursorId cursor); + + /** + * Returns the current byte offset of the given cursor relative to the + * beginning of the stream. + * + * @param cursor is the cursor for which the byte offset relative to the + * beginning of the stream should be returned. + * @return the number of bytes since the beginning of the stream for the + * given cursor. + */ + size_t offset(CursorId cursor) const; + + /** + * Returns true if the given cursor currently is at the end of the stream. + * + * @param cursor is the cursor for which the atEnd flag should be returned. + * @return true if the there are no more bytes for this cursor. If false + * is returned, this means that there may be more bytes in the stream, + * nevertheless the end of the stream may be hit once the next read function + * is called. + */ + bool atEnd(CursorId cursor) const; + + /** + * Reads a single character from the ring buffer from the given cursor. + * + * @param cursor specifies the cursor from which the data should be read. + * The cursor will be advanced by one byte. + * @param c is the character into which the data needs to be read. + * @return true if a character was read, false if the end of the stream has + * been reached. + */ + bool read(CursorId cursor, char &c); + +// /** +// * Reads string from the ring buffer from the given cursor. +// * +// * @param cursor specifies the cursor from which the data should be read. +// * The cursor will be advanced by the specified number of bytes (or to the +// * end of the stream). +// * @param res is the vector into which the data should be read. Any already +// * present data will be overridden. +// * @param len is number of bytes that should be read from the buffer. +// * @return true if len bytes were read, false if less the len bytes have +// * been read because the end of the stream has been reached. +// */ +// bool read(CursorId cursor, std::vector &res, size_t len); +}; + +} +} + +#endif /* _OUSIA_BUFFER_HPP_ */ + -- cgit v1.2.3