diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/variant/Reader.cpp | 177 | ||||
| -rw-r--r-- | src/core/variant/Reader.hpp | 130 | ||||
| -rw-r--r-- | src/core/variant/Variant.hpp | 6 | 
3 files changed, 312 insertions, 1 deletions
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp new file mode 100644 index 0000000..6142ecf --- /dev/null +++ b/src/core/variant/Reader.cpp @@ -0,0 +1,177 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <cassert> +#include <sstream> + +#include <core/Utils.hpp> + +#include "Reader.hpp" + +namespace ousia { +namespace variant { + +static const int STATE_INIT = 0; +static const int STATE_IN_STRING = 1; +static const int STATE_ESCAPE = 2; + +static std::pair<Err, std::string> parseString( +    BufferedCharReader &reader, const unordered_set<char> *delims = nullptr) +{ +	// Initialize the internal state +	Err errCode = Err::OK; +	int state = STATE_INIT; +	char quote = 0; +	std::stringstream res; + +	// Statemachine whic iterates over each character in the stream +	// TODO: Combination of peeking and consumePeek is stupid as consumePeek is +	// the default (read and putBack would obviously be better, yet the latter +	// is not trivial to implement in the current BufferedCharReader). +	char c; +	while (reader.peek(&c)) { +		switch (state) { +			case STATE_INIT: +				if (c == '"' || c == '\'') { +					quote = c; +					state = STATE_IN_STRING; +				} else if (delims && delims.count(c)) { +					return std::make_pair(Err::UNEXPECTED_END, res.str()); +				} +				reader.consumePeek(); +				break; +			case STATE_IN_STRING: +				if (c == q) { +					state = STATE_END; +					reader.consumePeek(); +					return std::make_pair(Err::OK, res.str()); +				} else if (c == '\\') { +					state = STATE_ESCAPE; +				} else if (c == '\n') { +					return std::make_pair(Err::UNTERMINATED, res.str()); +				} +				res << c; +				reader.consumePeek(); +				break; +			case STATE_ESCAPE: +				switch (c) { +					case 'b': +						res << '\b'; +						break; +					case 'f': +						res << '\f'; +						break; +					case 'n': +						res << '\n'; +						break; +					case 'r': +						res << '\r'; +						break; +					case 't': +						res << '\t'; +						break; +					case 'v': +						res << '\v'; +						break; +					case '\'': +						res << '\''; +						break; +					case '"': +						res << '"'; +						break; +					case '\\': +						res << '\\'; +						break; +					case '\n': +						break; +					case 'x': +						// TODO: Parse Latin-1 sequence hex XX +						break; +					case 'u': +						// TODO: Parse 16-Bit unicode character hex XXXX +						break; +					default: +						if (Utils::isNumeric(c)) { +							// TODO: Parse octal 000 sequence +						} else { +							errCode = Err::ERR_INVALID_ESCAPE; +						} +						break; +				} +				state = STATE_IN_STRING; +				reader.consumePeek(); +				break; +		} +	} +	return std::make_pair(Err::UNEXPECTED_END, res.str()); +} + +static std::pair<Err, std::string> parseUnescapedString( +    BufferedCharReader &reader, const unordered_set<char> *delims) +{ +	assert(delims); + +	std::stringstream res; +	char c; +	while (reader.peek(&c)) { +		if (delims->count(c)) { +			return std::make_pair(Err::OK, res.str()); +		} +		res << c; +		reader.consumePeek(); +	} +	return std::make_pair(Err::UNEXPECTED_END, res.str()); +} + +static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader, +                                            const unordered_set<char> *delims) +{ +	assert(delims); + +	char c; +	while (reader.peek(&c)) { +		// Stop if a delimiter is reached, skipp all whitespace characters +		if (delims->count(c)) { +			return std::make_pair(Err::OK, res.str()); +		} else if (Utils::isWhitespace(c)) { +			reader.consumePeek(); +			continue; +		} + +		// Parse a string if a quote is reached +		if (c == '"' || c == '\'') { +			return parseString(reader, nullptr); +		} + +		if (c == '[') { +			// TODO: Parse struct descriptor +		} + +		if (isNumeric(c)) { +			// TODO: Parse integer/double +		} + +		// Parse an unescaped string in any other case +		return parseUnescapedString(reader, delims); +	} +	return std::make_pair(Err::UNEXPECTED_END, res.str()); +} + +} +} + diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp new file mode 100644 index 0000000..3f945f0 --- /dev/null +++ b/src/core/variant/Reader.hpp @@ -0,0 +1,130 @@ +/* +    Ousía +    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Reader.hpp + * + * Provides parsers for various micro formats. These formats include integers, + * doubles, strings, JSON and the Ousía struct notation. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_VARIANT_READER_HPP_ +#define _OUSIA_VARIANT_READER_HPP_ + +#include <unordered_set> +#include <utility> + +#include <core/BufferedCharReader.hpp> + +#include "Variant.hpp" + +namespace ousia { +namespace variant { + +class Reader { +public: +	// TODO: Pass logger instance instead of using error codes? + +	/** +	 * The Err enum describes possible error codes that may be encountered when +	 * parsing the microtypes. +	 */ +	enum class Err : int { +		/** +	     * Reached the end of the stream, but expected more data. +	     */ +		ERR_UNEXPECTED_END = -1, + +		/** +	     * The stream is malformed. +	     */ +		ERR_MALFORMED = -2, + +		/** +		 * Unexpected character. +		 */ +		ERR_UNEXPECTED_CHARACTER = -3, + +		/** +		 * Unterminated literal. +		 */ +		ERR_UNTERMINATED = -4, + +		/** +		 * Invalid escape character. +		 */ +		ERR_INVALID_ESCAPE = -5, + +		/** +	     * A value of the requested type was extracted successfully. +	     */ +		OK = 0 +	}; + +	/** +	 * Parses a string which may either be enclosed by " or ', unescapes +	 * entities in the string as specified for JavaScript. +	 * +	 * @param reader is a reference to the BufferedCharReader instance which is +	 * the source for the character data. The reader will be positioned after +	 * the terminating quote character or at the terminating delimiting +	 * character. +	 * @param delims is an optional set of delimiters after which parsing has to +	 * be stopped (the delimiters may occur inside the actual string, but not +	 * outside). If nullptr is given, no delimiter is used and a complete string +	 * is read. +	 */ +	static std::pair<Err, std::string> parseString( +	    BufferedCharReader &reader, +	    const unordered_set<char> *delims = nullptr); + +	/** +	 * Extracts an unescaped string from the given buffered char reader +	 * instance. This function just reads text until one of the given delimiter +	 * characters is reached. +	 * +	 * @param reader is a reference to the BufferedCharReader instance which is +	 * the source for the character data. The reader will be positioned at the +	 * terminating delimiting character. +	 * @param delims is a set of characters which will terminate the string. +	 * These characters are not included in the result. May not be nullptr. +	 */ +	static std::pair<Err, std::string> parseUnescapedString( +	    BufferedCharReader &reader, const unordered_set<char> *delims); + +	/** +	 * Tries to parse the most specific item from the given stream until one of +	 * the given delimiters is reached or a meaningful literal has been read. +	 * The resulting variant represents the value that has been read. +	 * +	 * @param reader is a reference to the BufferedCharReader instance which is +	 * the source for the character data. The reader will be positioned at the +	 * terminating delimiting character. +	 * @param delims is a set of characters which will terminate the string. +	 * These characters are not included in the result. May not be nullptr. +	 */ +	static std::pair<Err, Variant> parseGeneric( +	    BufferedCharReader &reader, const unordered_set<char> *delims); +}; +} +} + +#endif /* _OUSIA_VARIANT_READER_HPP_ */ + diff --git a/src/core/variant/Variant.hpp b/src/core/variant/Variant.hpp index d65e14a..6476780 100644 --- a/src/core/variant/Variant.hpp +++ b/src/core/variant/Variant.hpp @@ -42,6 +42,7 @@  #include <core/Exceptions.hpp>  namespace ousia { +namespace variant {  /**   * Instances of the Variant class represent any kind of data that is exchanged @@ -680,13 +681,16 @@ public:  	 * Prints a key value pair to the output stream.  	 */  	friend std::ostream &operator<<(std::ostream &os, -		                     const mapType::value_type &v) +	                                const mapType::value_type &v)  	{  		// TODO: Use proper serialization function  		return os << "\"" << v.first << "\": " << v.second.toString(true);  	}  }; +} +// Alias for the (very often used and unambigous) variant class +using Variant = variant::Variant;  }  #endif /* _OUSIA_VARIANT_HPP_ */  | 
