/*
Ousía
Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include "BufferedCharReader.hpp"
namespace ousia {
// Constants used within the linebreak statemachine.
static const uint8_t LB_STATE_NONE = 0x00;
static const uint8_t LB_STATE_ONE = 0x01;
static const uint8_t LB_STATE_LF = 0x10;
static const uint8_t LB_STATE_CR = 0x20;
static const uint8_t LB_STATE_MASK_CNT = 0x0F;
static const uint8_t LB_STATE_MASK_TYPE = 0xF0;
/* Struct BufferedCharReader::ReadCursor */
BufferedCharReader::ReadCursor::ReadCursor(unsigned int line,
unsigned int column,
bool destructive)
: line(line),
column(column),
bufferElem(0),
bufferPos(0),
destructive(destructive),
lbState(LB_STATE_NONE)
{
}
void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor)
{
this->line = cursor.line;
this->column = cursor.column;
this->bufferElem = cursor.bufferElem;
this->bufferPos = cursor.bufferPos;
this->lbState = cursor.lbState;
}
/* Class BufferedCharReader */
BufferedCharReader::BufferedCharReader(int line, int column)
: inputStream(nullptr),
readCursor(line, column, true),
peekCursor(line, column, false),
depleted(false)
{
}
BufferedCharReader::BufferedCharReader(const std::string &str, int line,
int column)
: inputStream(nullptr),
readCursor(line, column, true),
peekCursor(line, column, false),
depleted(true)
{
buffer.push_back(str);
}
BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line,
int column)
: inputStream(&inputStream),
readCursor(line, column, true),
peekCursor(line, column, false),
depleted(false)
{
}
void BufferedCharReader::feed(const std::string &data)
{
if (!depleted && !inputStream) {
buffer.push_back(data);
}
}
void BufferedCharReader::close()
{
if (!inputStream) {
depleted = true;
}
}
bool BufferedCharReader::substituteLinebreaks(ReadCursor &cursor, char *c)
{
// Handle line breaks, inserts breakes after the following character
// combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n
if ((*c == '\n') || (*c == '\r')) {
// Determine the type of the current linebreak character
const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR;
// Read the last count and the last type from the state
const uint8_t lastCount = cursor.lbState & LB_STATE_MASK_CNT;
const uint8_t lastType = cursor.lbState & LB_STATE_MASK_TYPE;
// Set the current linebreak type and counter in the state
cursor.lbState = ((lastCount + 1) & 1) | type;
// If either this is the first instance of this character or the same
// return character is repeated
if (!lastCount || (lastType == type)) {
*c = '\n';
return true;
}
return false;
}
// Find the state
cursor.lbState = LB_STATE_NONE;
return true;
}
bool BufferedCharReader::readCharacterAtCursor(ReadCursor &cursor, char *c)
{
bool hasChar = false;
while (!hasChar) {
// Abort if the current buffer element does not point to a valid entry
// in the buffer -- we must try to feed another data block into the
// internal buffer
if (cursor.bufferElem >= buffer.size()) {
// Abort if there is no more data or no input stream is set
if (depleted || !inputStream) {
return false;
}
// Read a buffer of the specified size
constexpr std::streamsize BUFFER_SIZE = 1024;
std::array buf;
const std::streamsize cnt =
(*inputStream).read(buf.data(), BUFFER_SIZE).gcount();
// If data has been read, append it to the input buffer and try
// again
if (cnt > 0) {
buffer.emplace_back(buf.data());
continue;
}
// End of file handling
if (inputStream->fail() || inputStream->eof()) {
depleted = true;
return false;
}
}
// Fetch the current element the peek pointer points to
const std::string &data = buffer[cursor.bufferElem];
// Handle the "no data" case -- either in a destructive or
// non-destructive manner.
if (cursor.bufferPos >= data.length()) {
if (cursor.destructive) {
buffer.pop_front();
} else {
cursor.bufferElem++;
}
cursor.bufferPos = 0;
continue;
}
// Read the character, advance the buffer position
*c = *(data.data() + cursor.bufferPos);
cursor.bufferPos++;
// Substitute linebreaks with a single LF (0x0A)
hasChar = substituteLinebreaks(cursor, c);
}
// Update the position counter
if (*c == '\n') {
cursor.line++;
cursor.column = 1;
} else {
// Ignore UTF-8 continuation bytes
if (!((*c & 0x80) && !(*c & 0x40))) {
cursor.column++;
}
}
return true;
}
bool BufferedCharReader::peek(char *c)
{
return readCharacterAtCursor(peekCursor, c);
}
bool BufferedCharReader::read(char *c)
{
resetPeek();
return readCharacterAtCursor(readCursor, c);
}
void BufferedCharReader::consumePeek()
{
// Remove all no longer needed buffer elements
for (unsigned int i = 0; i < peekCursor.bufferElem; i++) {
buffer.pop_front();
}
peekCursor.bufferElem = 0;
// Copy the peek cursor to the read cursor
readCursor.assign(peekCursor);
}
void BufferedCharReader::resetPeek()
{
// Reset the peek cursor to the read cursor
peekCursor.assign(readCursor);
}
bool BufferedCharReader::atEnd() const
{
if (depleted || !inputStream) {
if (buffer.size() <= 0) {
return true;
} else if (buffer.size() == 1) {
return buffer[0].size() == readCursor.bufferPos;
}
}
return false;
}
}