summaryrefslogtreecommitdiff
path: root/src/core/utils/Buffer.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/utils/Buffer.hpp')
-rw-r--r--src/core/utils/Buffer.hpp301
1 files changed, 301 insertions, 0 deletions
diff --git a/src/core/utils/Buffer.hpp b/src/core/utils/Buffer.hpp
new file mode 100644
index 0000000..0dd0a25
--- /dev/null
+++ b/src/core/utils/Buffer.hpp
@@ -0,0 +1,301 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Buffer.hpp
+ *
+ * Helper class which allows to read from an input source from multiple cursors.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_BUFFER_HPP_
+#define _OUSIA_BUFFER_HPP_
+
+#include <list>
+#include <vector>
+
+namespace ousia {
+namespace utils {
+
+/**
+ * A chunked ring buffer used in CharReader to provide access to an input stream
+ * with multiple read cursors. The Buffer automatically expands to the
+ * size of the spanned by the read cursors while reusing already allocated
+ * memory.
+ */
+class Buffer {
+public:
+ /**
+ * Callback function which is called whenever new data is requested from the
+ * input stream.
+ *
+ * @param buf is a pointer at the memory region to which the data should be
+ * writtern.
+ * @param size is the size of the
+ * @param userData is a pointer at some user defined data given in the
+ * constructor.
+ * @return the actual number of bytes read. If the result is smaller than
+ * the requested size, this tells the Buffer that the end of the input
+ * stream is reached.
+ */
+ using ReadCallback = size_t (*)(char *buf, size_t size, void *userData);
+
+ /**
+ * Handle used to identify a cursor.
+ */
+ using CursorId = size_t;
+
+private:
+ /**
+ * Number of bytes to request from the input stream.
+ */
+ static constexpr size_t REQUEST_SIZE = 16 * 1024;
+
+ /**
+ * Type used internally to represent one chunk of memory.
+ */
+ using Bucket = std::vector<char>;
+
+ /**
+ * Type used internally to represent a bucket container.
+ */
+ using BucketList = std::list<Bucket>;
+
+ /**
+ * Type used internally for representing iterators in the bucket list.
+ */
+ using BucketIterator = BucketList::iterator;
+
+ /**
+ * Type used internally to represent a read cursor.
+ */
+ struct Cursor {
+ /**
+ * Iterator pointing at the current bucket.
+ */
+ BucketIterator bucket;
+
+ /**
+ * Index of the bucket relative to the start bucket.
+ */
+ size_t bucketIdx;
+
+ /**
+ * Current offset within that bucket.
+ */
+ size_t bucketOffs;
+ };
+
+ /**
+ * List of buckets containing the buffered memory.
+ */
+ BucketList buckets;
+
+ /**
+ * List of cursors used to access the memory. Note that cursors can be
+ * marked as inactive and reused lateron (to avoid having to resize the
+ * vector).
+ */
+ std::vector<Cursor> cursors;
+
+ /**
+ * Bitfield specifying which of the cursors is actually valid.
+ */
+ std::vector<bool> alive;
+
+ /**
+ * Function to be called whenever new data is needed. Set to nullptr if the
+ * Buffer is not backed by an input stream.
+ */
+ const ReadCallback callback;
+
+ /**
+ * User data given in the constructor.
+ */
+ void *userData;
+
+ /**
+ * Set to true if the input stream is at its end.
+ */
+ bool reachedEnd;
+
+ /**
+ * Iterator pointing at the current start bucket.
+ */
+ BucketIterator startBucket;
+
+ /**
+ * Iterator pointing at the last bucket.
+ */
+ BucketIterator endBucket;
+
+ /**
+ * Byte offset of the start bucket relative to the beginning of the stream.
+ */
+ size_t startOffset;
+
+ /**
+ * Points at the smallest possible available cursor index, yet does not
+ * guarantee that this cursor index actuall is free.
+ */
+ CursorId firstDead;
+
+ /**
+ * Advances the bucket iterator, cares about wrapping around in the ring.
+ */
+ void advance(BucketIterator &it);
+
+ /**
+ * Advances the bucket iterator, cares about wrapping around in the ring.
+ */
+ void advance(BucketList::const_iterator &it) const;
+
+ /**
+ * Internally used to find the next free cursor in the cursors vector. The
+ * cursor is marked as active.
+ *
+ * @return the next free cursor index.
+ */
+ CursorId nextCursor();
+
+ /**
+ * Returns a reference at the next bucket into which data should be
+ * inserted.
+ *
+ * @return a bucket into which the data can be inserted.
+ */
+ Bucket &nextBucket();
+
+ /**
+ * Reads data from the input stream and places it in the next free buffer.
+ */
+ void stream();
+
+public:
+ /**
+ * Intializes the Buffer with a reference to a ReadCallback that is used
+ * to fetch data from an underlying input stream.
+ *
+ * @param callback is the function that will be called whenever data is read
+ * from the ring buffer and the buffer does not hold enough data to fulfill
+ * this read request.
+ * @param userData is a pointer to user defined data which will be passed to
+ * the callback function.
+ */
+ Buffer(ReadCallback callback, void *userData);
+
+ /**
+ * Initializes the Buffer with the contents of the given string, after
+ * this operation the Buffer has a fixed size.
+ *
+ * @param str is the string containing the data that should be copied into
+ * the ring buffer.
+ */
+ Buffer(const std::string &str);
+
+ // No copy
+ Buffer(const Buffer &) = delete;
+
+ // No assign
+ Buffer &operator=(const Buffer &) = delete;
+
+ /**
+ * Creates a new read cursor positioned at the smallest possible position
+ * in the ring buffer.
+ */
+ CursorId createCursor();
+
+ /**
+ * Creates a new read cursor positioned at the same position as the given
+ * read cursor.
+ *
+ * @param ref is the read cursor that should be used as reference for the
+ * new read cursor.
+ */
+ CursorId createCursor(CursorId ref);
+
+ /**
+ * Copies the position of one cursor to another cursor.
+ *
+ * @param from is the cursor id of which the position should be copied.
+ * @param to is the cursor id to which the position should be copied.
+ */
+ void copyCursor(CursorId from, CursorId to);
+
+ /**
+ * Deletes the cursor with the given id. The cursor may no longer be used
+ * after this function has been called.
+ *
+ * @param cursor is the id of the cursor that should be freed.
+ */
+ void deleteCursor(CursorId cursor);
+
+ /**
+ * Returns the current byte offset of the given cursor relative to the
+ * beginning of the stream.
+ *
+ * @param cursor is the cursor for which the byte offset relative to the
+ * beginning of the stream should be returned.
+ * @return the number of bytes since the beginning of the stream for the
+ * given cursor.
+ */
+ size_t offset(CursorId cursor) const;
+
+ /**
+ * Returns true if the given cursor currently is at the end of the stream.
+ *
+ * @param cursor is the cursor for which the atEnd flag should be returned.
+ * @return true if the there are no more bytes for this cursor. If false
+ * is returned, this means that there may be more bytes in the stream,
+ * nevertheless the end of the stream may be hit once the next read function
+ * is called.
+ */
+ bool atEnd(CursorId cursor) const;
+
+ /**
+ * Reads a single character from the ring buffer from the given cursor.
+ *
+ * @param cursor specifies the cursor from which the data should be read.
+ * The cursor will be advanced by one byte.
+ * @param c is the character into which the data needs to be read.
+ * @return true if a character was read, false if the end of the stream has
+ * been reached.
+ */
+ bool read(CursorId cursor, char &c);
+
+// /**
+// * Reads string from the ring buffer from the given cursor.
+// *
+// * @param cursor specifies the cursor from which the data should be read.
+// * The cursor will be advanced by the specified number of bytes (or to the
+// * end of the stream).
+// * @param res is the vector into which the data should be read. Any already
+// * present data will be overridden.
+// * @param len is number of bytes that should be read from the buffer.
+// * @return true if len bytes were read, false if less the len bytes have
+// * been read because the end of the stream has been reached.
+// */
+// bool read(CursorId cursor, std::vector<char> &res, size_t len);
+};
+
+}
+}
+
+#endif /* _OUSIA_BUFFER_HPP_ */
+