summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
Diffstat (limited to 'src/core')
-rw-r--r--src/core/variant/Reader.cpp177
-rw-r--r--src/core/variant/Reader.hpp130
-rw-r--r--src/core/variant/Variant.hpp6
3 files changed, 312 insertions, 1 deletions
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp
new file mode 100644
index 0000000..6142ecf
--- /dev/null
+++ b/src/core/variant/Reader.cpp
@@ -0,0 +1,177 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <cassert>
+#include <sstream>
+
+#include <core/Utils.hpp>
+
+#include "Reader.hpp"
+
+namespace ousia {
+namespace variant {
+
+static const int STATE_INIT = 0;
+static const int STATE_IN_STRING = 1;
+static const int STATE_ESCAPE = 2;
+
+static std::pair<Err, std::string> parseString(
+ BufferedCharReader &reader, const unordered_set<char> *delims = nullptr)
+{
+ // Initialize the internal state
+ Err errCode = Err::OK;
+ int state = STATE_INIT;
+ char quote = 0;
+ std::stringstream res;
+
+ // Statemachine whic iterates over each character in the stream
+ // TODO: Combination of peeking and consumePeek is stupid as consumePeek is
+ // the default (read and putBack would obviously be better, yet the latter
+ // is not trivial to implement in the current BufferedCharReader).
+ char c;
+ while (reader.peek(&c)) {
+ switch (state) {
+ case STATE_INIT:
+ if (c == '"' || c == '\'') {
+ quote = c;
+ state = STATE_IN_STRING;
+ } else if (delims && delims.count(c)) {
+ return std::make_pair(Err::UNEXPECTED_END, res.str());
+ }
+ reader.consumePeek();
+ break;
+ case STATE_IN_STRING:
+ if (c == q) {
+ state = STATE_END;
+ reader.consumePeek();
+ return std::make_pair(Err::OK, res.str());
+ } else if (c == '\\') {
+ state = STATE_ESCAPE;
+ } else if (c == '\n') {
+ return std::make_pair(Err::UNTERMINATED, res.str());
+ }
+ res << c;
+ reader.consumePeek();
+ break;
+ case STATE_ESCAPE:
+ switch (c) {
+ case 'b':
+ res << '\b';
+ break;
+ case 'f':
+ res << '\f';
+ break;
+ case 'n':
+ res << '\n';
+ break;
+ case 'r':
+ res << '\r';
+ break;
+ case 't':
+ res << '\t';
+ break;
+ case 'v':
+ res << '\v';
+ break;
+ case '\'':
+ res << '\'';
+ break;
+ case '"':
+ res << '"';
+ break;
+ case '\\':
+ res << '\\';
+ break;
+ case '\n':
+ break;
+ case 'x':
+ // TODO: Parse Latin-1 sequence hex XX
+ break;
+ case 'u':
+ // TODO: Parse 16-Bit unicode character hex XXXX
+ break;
+ default:
+ if (Utils::isNumeric(c)) {
+ // TODO: Parse octal 000 sequence
+ } else {
+ errCode = Err::ERR_INVALID_ESCAPE;
+ }
+ break;
+ }
+ state = STATE_IN_STRING;
+ reader.consumePeek();
+ break;
+ }
+ }
+ return std::make_pair(Err::UNEXPECTED_END, res.str());
+}
+
+static std::pair<Err, std::string> parseUnescapedString(
+ BufferedCharReader &reader, const unordered_set<char> *delims)
+{
+ assert(delims);
+
+ std::stringstream res;
+ char c;
+ while (reader.peek(&c)) {
+ if (delims->count(c)) {
+ return std::make_pair(Err::OK, res.str());
+ }
+ res << c;
+ reader.consumePeek();
+ }
+ return std::make_pair(Err::UNEXPECTED_END, res.str());
+}
+
+static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader,
+ const unordered_set<char> *delims)
+{
+ assert(delims);
+
+ char c;
+ while (reader.peek(&c)) {
+ // Stop if a delimiter is reached, skipp all whitespace characters
+ if (delims->count(c)) {
+ return std::make_pair(Err::OK, res.str());
+ } else if (Utils::isWhitespace(c)) {
+ reader.consumePeek();
+ continue;
+ }
+
+ // Parse a string if a quote is reached
+ if (c == '"' || c == '\'') {
+ return parseString(reader, nullptr);
+ }
+
+ if (c == '[') {
+ // TODO: Parse struct descriptor
+ }
+
+ if (isNumeric(c)) {
+ // TODO: Parse integer/double
+ }
+
+ // Parse an unescaped string in any other case
+ return parseUnescapedString(reader, delims);
+ }
+ return std::make_pair(Err::UNEXPECTED_END, res.str());
+}
+
+}
+}
+
diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp
new file mode 100644
index 0000000..3f945f0
--- /dev/null
+++ b/src/core/variant/Reader.hpp
@@ -0,0 +1,130 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Reader.hpp
+ *
+ * Provides parsers for various micro formats. These formats include integers,
+ * doubles, strings, JSON and the Ousía struct notation.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_VARIANT_READER_HPP_
+#define _OUSIA_VARIANT_READER_HPP_
+
+#include <unordered_set>
+#include <utility>
+
+#include <core/BufferedCharReader.hpp>
+
+#include "Variant.hpp"
+
+namespace ousia {
+namespace variant {
+
+class Reader {
+public:
+ // TODO: Pass logger instance instead of using error codes?
+
+ /**
+ * The Err enum describes possible error codes that may be encountered when
+ * parsing the microtypes.
+ */
+ enum class Err : int {
+ /**
+ * Reached the end of the stream, but expected more data.
+ */
+ ERR_UNEXPECTED_END = -1,
+
+ /**
+ * The stream is malformed.
+ */
+ ERR_MALFORMED = -2,
+
+ /**
+ * Unexpected character.
+ */
+ ERR_UNEXPECTED_CHARACTER = -3,
+
+ /**
+ * Unterminated literal.
+ */
+ ERR_UNTERMINATED = -4,
+
+ /**
+ * Invalid escape character.
+ */
+ ERR_INVALID_ESCAPE = -5,
+
+ /**
+ * A value of the requested type was extracted successfully.
+ */
+ OK = 0
+ };
+
+ /**
+ * Parses a string which may either be enclosed by " or ', unescapes
+ * entities in the string as specified for JavaScript.
+ *
+ * @param reader is a reference to the BufferedCharReader instance which is
+ * the source for the character data. The reader will be positioned after
+ * the terminating quote character or at the terminating delimiting
+ * character.
+ * @param delims is an optional set of delimiters after which parsing has to
+ * be stopped (the delimiters may occur inside the actual string, but not
+ * outside). If nullptr is given, no delimiter is used and a complete string
+ * is read.
+ */
+ static std::pair<Err, std::string> parseString(
+ BufferedCharReader &reader,
+ const unordered_set<char> *delims = nullptr);
+
+ /**
+ * Extracts an unescaped string from the given buffered char reader
+ * instance. This function just reads text until one of the given delimiter
+ * characters is reached.
+ *
+ * @param reader is a reference to the BufferedCharReader instance which is
+ * the source for the character data. The reader will be positioned at the
+ * terminating delimiting character.
+ * @param delims is a set of characters which will terminate the string.
+ * These characters are not included in the result. May not be nullptr.
+ */
+ static std::pair<Err, std::string> parseUnescapedString(
+ BufferedCharReader &reader, const unordered_set<char> *delims);
+
+ /**
+ * Tries to parse the most specific item from the given stream until one of
+ * the given delimiters is reached or a meaningful literal has been read.
+ * The resulting variant represents the value that has been read.
+ *
+ * @param reader is a reference to the BufferedCharReader instance which is
+ * the source for the character data. The reader will be positioned at the
+ * terminating delimiting character.
+ * @param delims is a set of characters which will terminate the string.
+ * These characters are not included in the result. May not be nullptr.
+ */
+ static std::pair<Err, Variant> parseGeneric(
+ BufferedCharReader &reader, const unordered_set<char> *delims);
+};
+}
+}
+
+#endif /* _OUSIA_VARIANT_READER_HPP_ */
+
diff --git a/src/core/variant/Variant.hpp b/src/core/variant/Variant.hpp
index d65e14a..6476780 100644
--- a/src/core/variant/Variant.hpp
+++ b/src/core/variant/Variant.hpp
@@ -42,6 +42,7 @@
#include <core/Exceptions.hpp>
namespace ousia {
+namespace variant {
/**
* Instances of the Variant class represent any kind of data that is exchanged
@@ -680,13 +681,16 @@ public:
* Prints a key value pair to the output stream.
*/
friend std::ostream &operator<<(std::ostream &os,
- const mapType::value_type &v)
+ const mapType::value_type &v)
{
// TODO: Use proper serialization function
return os << "\"" << v.first << "\": " << v.second.toString(true);
}
};
+}
+// Alias for the (very often used and unambigous) variant class
+using Variant = variant::Variant;
}
#endif /* _OUSIA_VARIANT_HPP_ */