/*
Ousía
Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include
#include "Reader.hpp"
namespace ousia {
namespace variant {
static const char *ERR_UNEXPECTED_CHARACTER = "Unexpected character";
static const char *ERR_UNEXPECTED_END = "Unexpected end";
static const char *ERR_UNTERMINATED = "Unterminated literal";
static const int STATE_INIT = 0;
static const int STATE_IN_STRING = 1;
static const int STATE_ESCAPE = 2;
static std::pair parseString(
BufferedCharReader &reader, const unordered_set *delims = nullptr,
Logger *logger = nullptr)
{
// Initialize the internal state
Err errCode = Err::OK;
int state = STATE_INIT;
char quote = 0;
std::stringstream res;
// Statemachine whic iterates over each character in the stream
// TODO: Combination of peeking and consumePeek is stupid as consumePeek is
// the default (read and putBack would obviously be better, yet the latter
// is not trivial to implement in the current BufferedCharReader).
char c;
while (reader.peek(&c)) {
switch (state) {
case STATE_INIT:
if (c == '"' || c == '\'') {
quote = c;
state = STATE_IN_STRING;
} else if (delims && delims.count(c)) {
Logger.log(ERR_UNTERMINATED, reader);
return std::make_pair(Err::UNEXPECTED_END, res.str());
} else if (Utils::isWhitespace(c)) {
reader.consumePeek();
continue;
}
return std::make_pair(Err::UNEXPECTED_CHARACTER, res.str());
break;
case STATE_IN_STRING:
if (c == q) {
state = STATE_END;
reader.consumePeek();
return std::make_pair(Err::OK, res.str());
} else if (c == '\\') {
state = STATE_ESCAPE;
} else if (c == '\n') {
return std::make_pair(Err::UNTERMINATED, res.str());
}
res << c;
reader.consumePeek();
break;
case STATE_ESCAPE:
switch (c) {
case 'b':
res << '\b';
break;
case 'f':
res << '\f';
break;
case 'n':
res << '\n';
break;
case 'r':
res << '\r';
break;
case 't':
res << '\t';
break;
case 'v':
res << '\v';
break;
case '\'':
res << '\'';
break;
case '"':
res << '"';
break;
case '\\':
res << '\\';
break;
case '\n':
break;
case 'x':
// TODO: Parse Latin-1 sequence hex XX
break;
case 'u':
// TODO: Parse 16-Bit unicode character hex XXXX
break;
default:
if (Utils::isNumeric(c)) {
// TODO: Parse octal 000 sequence
} else {
errCode = Err::ERR_INVALID_ESCAPE;
}
break;
}
state = STATE_IN_STRING;
reader.consumePeek();
break;
}
}
return std::make_pair(Err::UNEXPECTED_END, res.str());
}
static std::pair parseUnescapedString(
BufferedCharReader &reader, const unordered_set *delims)
{
assert(delims);
std::stringstream res;
char c;
while (reader.peek(&c)) {
if (delims->count(c)) {
return std::make_pair(Err::OK, res.str());
}
res << c;
reader.consumePeek();
}
return std::make_pair(Err::UNEXPECTED_END, res.str());
}
static std::pair parseGeneric(BufferedCharReader &reader,
const unordered_set *delims)
{
assert(delims);
char c;
while (reader.peek(&c)) {
// Stop if a delimiter is reached, skipp all whitespace characters
if (delims->count(c)) {
return std::make_pair(Err::OK, res.str());
} else if (Utils::isWhitespace(c)) {
reader.consumePeek();
continue;
}
// Parse a string if a quote is reached
if (c == '"' || c == '\'') {
return parseString(reader, nullptr);
}
if (c == '[') {
// TODO: Parse struct descriptor
}
if (isNumeric(c)) {
// TODO: Parse integer/double
}
// Parse an unescaped string in any other case
return parseUnescapedString(reader, delims);
}
return std::make_pair(Err::UNEXPECTED_END, res.str());
}
}
}