diff options
| author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-11 15:26:50 +0100 | 
|---|---|---|
| committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-11 15:26:50 +0100 | 
| commit | 3f62168ed0b088eec3cb2903f03966f7d501f564 (patch) | |
| tree | 781f5bd9b304d9eb931827a26f463575d772983d /src | |
| parent | b74936760e28a92cadfaec47928ea478fe2d72ee (diff) | |
moved to CharReader everywhere
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/BufferedCharReader.cpp | 263 | ||||
| -rw-r--r-- | src/core/BufferedCharReader.hpp | 288 | ||||
| -rw-r--r-- | src/core/CSS.hpp | 2 | ||||
| -rw-r--r-- | src/core/CodeTokenizer.hpp | 8 | ||||
| -rw-r--r-- | src/core/Registry.cpp | 3 | ||||
| -rw-r--r-- | src/core/Tokenizer.cpp | 12 | ||||
| -rw-r--r-- | src/core/Tokenizer.hpp | 27 | ||||
| -rw-r--r-- | src/core/common/CharReader.cpp (renamed from src/core/utils/CharReader.cpp) | 5 | ||||
| -rw-r--r-- | src/core/common/CharReader.hpp (renamed from src/core/utils/CharReader.hpp) | 7 | ||||
| -rw-r--r-- | src/core/common/Exceptions.cpp (renamed from src/core/Exceptions.cpp) | 0 | ||||
| -rw-r--r-- | src/core/common/Exceptions.hpp (renamed from src/core/Exceptions.hpp) | 0 | ||||
| -rw-r--r-- | src/core/common/Logger.cpp (renamed from src/core/Logger.cpp) | 0 | ||||
| -rw-r--r-- | src/core/common/Logger.hpp (renamed from src/core/Logger.hpp) | 0 | ||||
| -rw-r--r-- | src/core/common/Utils.cpp (renamed from src/core/Utils.cpp) | 0 | ||||
| -rw-r--r-- | src/core/common/Utils.hpp (renamed from src/core/Utils.hpp) | 0 | ||||
| -rw-r--r-- | src/core/common/Variant.cpp (renamed from src/core/variant/Variant.cpp) | 3 | ||||
| -rw-r--r-- | src/core/common/Variant.hpp (renamed from src/core/variant/Variant.hpp) | 7 | ||||
| -rw-r--r-- | src/core/common/VariantReader.cpp (renamed from src/core/variant/Reader.cpp) | 247 | ||||
| -rw-r--r-- | src/core/common/VariantReader.hpp (renamed from src/core/variant/Reader.hpp) | 87 | ||||
| -rw-r--r-- | src/core/parser/Parser.hpp | 4 | ||||
| -rw-r--r-- | src/core/parser/ParserStack.cpp | 4 | ||||
| -rw-r--r-- | src/core/parser/ParserStack.hpp | 2 | ||||
| -rw-r--r-- | src/plugins/css/CSSParser.cpp | 18 | ||||
| -rw-r--r-- | src/plugins/css/CSSParser.hpp | 2 | ||||
| -rw-r--r-- | src/plugins/xml/XmlParser.cpp | 2 | 
25 files changed, 211 insertions, 780 deletions
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp deleted file mode 100644 index aeedf12..0000000 --- a/src/core/BufferedCharReader.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/* -    Ousía -    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel - -    This program is free software: you can redistribute it and/or modify -    it under the terms of the GNU General Public License as published by -    the Free Software Foundation, either version 3 of the License, or -    (at your option) any later version. - -    This program is distributed in the hope that it will be useful, -    but WITHOUT ANY WARRANTY; without even the implied warranty of -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -    GNU General Public License for more details. - -    You should have received a copy of the GNU General Public License -    along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <array> - -#include "Utils.hpp" - -#include "BufferedCharReader.hpp" - -namespace ousia { - -// Constants used within the linebreak statemachine. -static const uint8_t LB_STATE_NONE = 0x00; -static const uint8_t LB_STATE_ONE = 0x01; -static const uint8_t LB_STATE_LF = 0x10; -static const uint8_t LB_STATE_CR = 0x20; -static const uint8_t LB_STATE_MASK_CNT = 0x0F; -static const uint8_t LB_STATE_MASK_TYPE = 0xF0; - -/* Struct BufferedCharReader::ReadCursor */ - -BufferedCharReader::ReadCursor::ReadCursor(unsigned int line, -                                           unsigned int column, -                                           bool destructive) -    : line(line), -      column(column), -      bufferElem(0), -      bufferPos(0), -      destructive(destructive), -      lbState(LB_STATE_NONE) -{ -} - -void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) -{ -	this->line = cursor.line; -	this->column = cursor.column; -	this->bufferElem = cursor.bufferElem; -	this->bufferPos = cursor.bufferPos; -	this->lbState = cursor.lbState; -} - -/* Class BufferedCharReader */ - -BufferedCharReader::BufferedCharReader(int line, int column) -    : inputStream(nullptr), -      readCursor(line, column, true), -      peekCursor(line, column, false), -      depleted(false) -{ -} - -BufferedCharReader::BufferedCharReader(const std::string &str, int line, -                                       int column) -    : inputStream(nullptr), -      readCursor(line, column, true), -      peekCursor(line, column, false), -      depleted(true) -{ -	buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(const std::string &str) -    : inputStream(nullptr), -      readCursor(1, 1, true), -      peekCursor(1, 1, false), -      depleted(true) -{ -	buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line, -                                       int column) -    : inputStream(&inputStream), -      readCursor(line, column, true), -      peekCursor(line, column, false), -      depleted(false) -{ -} - -void BufferedCharReader::feed(const std::string &data) -{ -	if (!depleted && !inputStream) { -		buffer.push_back(data); -	} -} - -void BufferedCharReader::close() -{ -	if (!inputStream) { -		depleted = true; -	} -} - -bool BufferedCharReader::substituteLinebreaks(ReadCursor &cursor, char *c) -{ -	// Handle line breaks, inserts breakes after the following character -	// combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n -	if ((*c == '\n') || (*c == '\r')) { -		// Determine the type of the current linebreak character -		const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; - -		// Read the last count and the last type from the state -		const uint8_t lastCount = cursor.lbState & LB_STATE_MASK_CNT; -		const uint8_t lastType = cursor.lbState & LB_STATE_MASK_TYPE; - -		// Set the current linebreak type and counter in the state -		cursor.lbState = ((lastCount + 1) & 1) | type; - -		// If either this is the first instance of this character or the same -		// return character is repeated -		if (!lastCount || (lastType == type)) { -			*c = '\n'; -			return true; -		} -		return false; -	} - -	// Find the state -	cursor.lbState = LB_STATE_NONE; -	return true; -} - -bool BufferedCharReader::readCharacterAtCursor(ReadCursor &cursor, char *c) -{ -	bool hasChar = false; -	while (!hasChar) { -		// Abort if the current buffer element does not point to a valid entry -		// in the buffer -- we must try to feed another data block into the -		// internal buffer -		if (cursor.bufferElem >= buffer.size()) { -			// Abort if there is no more data or no input stream is set -			if (depleted || !inputStream) { -				return false; -			} - -			// Read a buffer of the specified size -			constexpr std::streamsize BUFFER_SIZE = 1024; -			std::array<char, BUFFER_SIZE> buf; -			const std::streamsize cnt = -			    (*inputStream).read(buf.data(), BUFFER_SIZE).gcount(); - -			// If data has been read, append it to the input buffer and try -			// again -			if (cnt > 0) { -				buffer.emplace_back(std::string(buf.data(), cnt)); -				continue; -			} - -			// End of file handling -			if (inputStream->fail() || inputStream->eof()) { -				depleted = true; -				return false; -			} -		} - -		// Fetch the current element the peek pointer points to -		const std::string &data = buffer[cursor.bufferElem]; - -		// Handle the "no data" case -- either in a destructive or -		// non-destructive manner. -		if (cursor.bufferPos >= data.length()) { -			if (cursor.destructive) { -				buffer.pop_front(); -			} else { -				cursor.bufferElem++; -			} -			cursor.bufferPos = 0; -			continue; -		} - -		// Read the character, advance the buffer position -		*c = *(data.data() + cursor.bufferPos); -		cursor.bufferPos++; - -		// Substitute linebreaks with a single LF (0x0A) -		hasChar = substituteLinebreaks(cursor, c); -	} - -	// Update the position counter -	if (*c == '\n') { -		cursor.line++; -		cursor.column = 1; -	} else { -		// Ignore UTF-8 continuation bytes -		if (!((*c & 0x80) && !(*c & 0x40))) { -			cursor.column++; -		} -	} - -	return true; -} - -bool BufferedCharReader::peek(char *c) -{ -	return readCharacterAtCursor(peekCursor, c); -} - -bool BufferedCharReader::read(char *c) -{ -	resetPeek(); -	return readCharacterAtCursor(readCursor, c); -} - -void BufferedCharReader::consumePeek() -{ -	// Remove all no longer needed buffer elements -	for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { -		buffer.pop_front(); -	} -	peekCursor.bufferElem = 0; - -	// Copy the peek cursor to the read cursor -	readCursor.assign(peekCursor); -} - -bool BufferedCharReader::consumeWhitespace() -{ -	char c; -	while (peek(&c)) { -		if (!Utils::isWhitespace(c)) { -			resetPeek(); -			return true; -		} -		consumePeek(); -	} -	return false; -} - -void BufferedCharReader::resetPeek() -{ -	// Reset the peek cursor to the read cursor -	peekCursor.assign(readCursor); -} - -bool BufferedCharReader::atEnd() const -{ -	if (depleted || !inputStream) { -		if (buffer.size() <= 0) { -			return true; -		} else if (buffer.size() == 1) { -			return buffer[0].size() == readCursor.bufferPos; -		} -	} -	return false; -} -} - diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp deleted file mode 100644 index e7f3186..0000000 --- a/src/core/BufferedCharReader.hpp +++ /dev/null @@ -1,288 +0,0 @@ -/* -    Ousía -    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel - -    This program is free software: you can redistribute it and/or modify -    it under the terms of the GNU General Public License as published by -    the Free Software Foundation, either version 3 of the License, or -    (at your option) any later version. - -    This program is distributed in the hope that it will be useful, -    but WITHOUT ANY WARRANTY; without even the implied warranty of -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -    GNU General Public License for more details. - -    You should have received a copy of the GNU General Public License -    along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file BufferedCharReader.hpp - * - * Contains the BufferedCharReader class which is used for reading/peeking - * single characters from an input stream or string. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_BUFFERED_CHAR_READER_H_ -#define _OUSIA_BUFFERED_CHAR_READER_H_ - -#include <deque> -#include <string> -#include <istream> -#include <cstdint> - -namespace ousia { - -// TODO: Better split this class into multiple classes with base class -// BufferedCharReader where each sub class represents one method of supplying -// the input data (feeding, initial string, input stream). - -/** - * The BufferedCharReader class is used for storing incomming data that - * is fed into the pipeline as well as reading/peeking single characters - * from that buffer. Additionally it counts the current column/row - * (with correct handling for UTF-8) and contains an internal state - * machine that handles the detection of linebreaks and converts these to a - * single '\n'. - */ -class BufferedCharReader { -private: -	/** -	 * The ReadCursor structure is responsible for representing the read -	 * position within the text an all state machine states belonging to the -	 * cursor. There are two types of read cursors: destructive and -	 * non-destructive read cursors. -	 */ -	struct ReadCursor { -		/** -		 * The line the cursor currently points to. -		 */ -		unsigned int line; - -		/** -		 * The column the cursor currently points to. -		 */ -		unsigned int column; - -		/** -		 * The index of the element in the data buffer we're currently reading -		 * from. -		 */ -		unsigned int bufferElem; - -		/** -		 * The byte position within this data buffer. -		 */ -		unsigned int bufferPos; - -		/** -		 * Specifies whether this is a destructive cursor (bytes are discarded -		 * once they were read from the buffer). -		 */ -		const bool destructive; - -		/** -		 * State variable used in the internal state machine of the -		 * line feed detection. -		 */ -		uint8_t lbState; - -		/** -		 * Constructor of the ReadCursor structure. -		 * -		 * @param line is the start line. -		 * @param column is the start column. -		 * @param destructive specifies whether the ReadCursor is destructive -		 * (consumes all read characters, as used in the "read cursor") or -		 * non-destructive (as used in the "peek cursor"). -		 */ -		ReadCursor(unsigned int line, unsigned int column, bool destructive); - -		/** -		 * Copys the data from another ReadCursor without overriding the -		 * "destructive" flag. -		 * -		 * @param cursor is the cursor that should be copied. -		 */ -		void assign(const ReadCursor &cursor); -	}; - -	/** -	 * Pointer at an (optional) input stream used for reading a chunk of data -	 * whenever the input buffer depletes. -	 */ -	std::istream *inputStream; - -	/** -	 * The read and the peek cursor. -	 */ -	ReadCursor readCursor, peekCursor; - -	/** -	 * Set to true if there is no more input data. -	 */ -	bool depleted; - -	/** -	 * Queue containing the data that has been fed into the char reader. -	 */ -	std::deque<std::string> buffer; - -	/** -	 * Substitute any combination of linebreaks in the incomming code with "\n". -	 * Returns true if the current character is meant as output, false -	 * otherwise. -	 */ -	bool substituteLinebreaks(ReadCursor &cursor, char *c); - -	/** -	 * Reads a character from the input buffer and advances the given read -	 * cursor. -	 * -	 * @param cursor is a reference to the read cursor that should be used -	 * for reading. -	 * @param hasChar is set to true, if a character is available, false if -	 * no character is available (e.g. because line breaks are substituted or -	 * the end of a buffer boundary is reached -- in this case this function -	 * should be called again with the same parameters.) -	 * @param c is a output parameter, which will be set to the read character. -	 * @param returns true if there was enough data in the buffer, false -	 * otherwise. -	 */ -	bool readCharacterAtCursor(ReadCursor &cursor, char *c); - -	/** -	 * Function that is called for each read character -- updates the row and -	 * column count. -	 */ -	void updatePositionCounters(const char c); - -public: - -	/** -	 * Constructor of the buffered char reader class with empty buffer as input. -	 * This operates the BufferedCharReader in a mode where new data has to be -	 * fed using the "feed" function and explicitly closed using the "close" -	 * function. -	 * -	 * @param line is the start line. -	 * @param column is the start column. -	 */ -	BufferedCharReader(int line = 1, int column = 1); - -	/** -	 * Constructor of the buffered char reader class with a string as input. -	 * -	 * @param str is a string containing the input data. -	 * @param line is the start line. -	 * @param column is the start column. -	 */ -	BufferedCharReader(const std::string &str, int line, int column); - -	/** -	 * Constructor of the buffered char reader class with a string as input. -	 * -	 * @param str is a string containing the input data. -	 */ -	BufferedCharReader(const std::string &str); - -	/** -	 * Constructor of the buffered char reader class with a string as input. -	 * -	 * @param inputStream is the input stream from which incomming data should -	 * be read. -	 * @param line is the start line. -	 * @param column is the start column. -	 */ -	BufferedCharReader(std::istream &inputStream, int line = 1, int column = 1); - -	/** -	 * Peeks a single character. If called multiple times, returns the -	 * character after the previously peeked character. -	 * -	 * @param c is a reference to the character to which the result should be -	 * writtern. -	 * @return true if the character was successfully read, false if there are -	 * no more characters to be read in the buffer. -	 */ -	bool peek(char *c); - -	/** -	 * Reads a character from the input data. If "peek" was called -	 * beforehand resets the peek pointer. -	 * -	 * @param c is a reference to the character to which the result should be -	 * writtern. -	 * @return true if the character was successfully read, false if there are -	 * no more characters to be read in the buffer. -	 */ -	bool read(char *c); - -	/** -	 * Advances the read pointer to the peek pointer -- so if the "peek" -	 * function was called, "read" will now return the character after -	 * the last peeked character. -	 */ -	void consumePeek(); - -	/** -	 * Moves the read cursor to the next non-whitespace character. Returns -	 * false, if the end of the stream was reached. -	 * -	 * @return false if the end of the stream was reached, false othrwise. -	 */ -	bool consumeWhitespace(); - -	/** -	 * Resets the peek pointer to the "read" pointer. -	 */ -	void resetPeek(); - -	/** -	 * Feeds new data into the internal buffer of the BufferedCharReader -	 * class. Only applicable if the buffered char reader was constructed -	 * without an input stream or string. -	 * -	 * @param data is a string containing the data that should be -	 * appended to the internal buffer. -	 */ -	void feed(const std::string &data); - -	/** -	 * Tells the buffered char reader that no more data will be fed. -	 * Only applicable if the buffered char reader was constructed without an -	 * input stream or string. -	 * -	 * @param data is a string containing the data that should be -	 * appended to the internal buffer. -	 */ -	void close(); - -	/** -	 * Returns true if there are no more characters as the stream was -	 * closed. -	 * -	 * @return true if there is no more data. -	 */ -	bool atEnd() const; - -	/** -	 * Returns the current line (starting with one). -	 * -	 * @return the current line number. -	 */ -	int getLine() const { return readCursor.line; } - -	/** -	 * Returns the current column (starting with one). -	 * -	 * @return the current column number. -	 */ -	int getColumn() const { return readCursor.column; } -}; -} - -#endif /* _OUSIA_BUFFERED_CHAR_READER_H_ */ - diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp index 1510f3a..a54d956 100644 --- a/src/core/CSS.hpp +++ b/src/core/CSS.hpp @@ -23,7 +23,7 @@  #include <vector>  #include <tuple> -#include <core/variant/Variant.hpp> +#include <core/common/Variant.hpp>  #include "Managed.hpp"  #include "Node.hpp" diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp index 43c7abb..4190297 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/core/CodeTokenizer.hpp @@ -22,7 +22,7 @@  #include <map>  #include <sstream> -#include "BufferedCharReader.hpp" +#include <core/common/CharReader.hpp>  #include "Tokenizer.hpp"  namespace ousia { @@ -108,8 +108,8 @@ public:  	/**  	 * -	 * @param input a BufferedCharReader containing the input for this -	 * tokenizer, as with a regular tokenizer. +	 * @param input a CharReader containing the input for this tokenizer, as +	 * with a regular tokenizer.  	 * @param root a TokenTreeNode representing the root of the TokenTree.  	 * Please note that you have to specify all tokenIDs here that you use  	 * in the descriptors map. @@ -120,7 +120,7 @@ public:  	 * and this CodeTokenizer would recognize the token "//" as starting a  	 * line comment.  	 */ -	CodeTokenizer(BufferedCharReader &input, const TokenTreeNode &root, +	CodeTokenizer(CharReader &input, const TokenTreeNode &root,  	              std::map<int, CodeTokenDescriptor> descriptors)  	    : Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL)  	{ diff --git a/src/core/Registry.cpp b/src/core/Registry.cpp index 6ff9594..74d1cf8 100644 --- a/src/core/Registry.cpp +++ b/src/core/Registry.cpp @@ -16,8 +16,7 @@      along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ -#include <core/Logger.hpp> - +#include <core/common/Logger.hpp>  #include <core/parser/Parser.hpp>  namespace ousia { diff --git a/src/core/Tokenizer.cpp b/src/core/Tokenizer.cpp index b99d1ed..0af5f5a 100644 --- a/src/core/Tokenizer.cpp +++ b/src/core/Tokenizer.cpp @@ -72,7 +72,7 @@ TokenTreeNode::TokenTreeNode(const std::map<std::string, int> &inputs)  {  } -Tokenizer::Tokenizer(BufferedCharReader &input, const TokenTreeNode &root) +Tokenizer::Tokenizer(CharReader &input, const TokenTreeNode &root)      : input(input), root(root)  {  } @@ -81,10 +81,10 @@ bool Tokenizer::prepare()  {  	std::stringstream buffer;  	char c; -	int startColumn = input.getColumn(); -	int startLine = input.getLine(); +	uint32_t startColumn = input.getColumn(); +	uint32_t startLine = input.getLine();  	bool bufEmpty = true; -	while (input.peek(&c)) { +	while (input.peek(c)) {  		if (root.children.find(c) != root.children.end()) {  			// if there might be a special token, keep peeking forward  			// until we find the token (or we don't). @@ -107,7 +107,7 @@ bool Tokenizer::prepare()  						input.consumePeek();  					}  				} -				if (!input.peek(&c)) { +				if (!input.peek(c)) {  					// if we are at the end we break off the search.  					break;  				} @@ -153,7 +153,7 @@ bool Tokenizer::prepare()  				}  			} else{  				//if we found nothing, read at least one character. -				input.peek(&c); +				input.peek(c);  			}  		}  		buffer << c; diff --git a/src/core/Tokenizer.hpp b/src/core/Tokenizer.hpp index 8f80150..33327cc 100644 --- a/src/core/Tokenizer.hpp +++ b/src/core/Tokenizer.hpp @@ -19,11 +19,12 @@  #ifndef _OUSIA_TOKENIZER_HPP_  #define _OUSIA_TOKENIZER_HPP_ +#include <cstdint> +#include <deque>  #include <istream>  #include <map> -#include <deque> -#include "BufferedCharReader.hpp" +#include <core/common/CharReader.hpp>  namespace ousia { @@ -120,13 +121,13 @@ static const int TOKEN_TEXT = -2;  struct Token {  	int tokenId;  	std::string content; -	int startColumn; -	int startLine; -	int endColumn; -	int endLine; +	uint32_t startColumn; +	uint32_t startLine; +	uint32_t endColumn; +	uint32_t endLine; -	Token(int tokenId, std::string content, int startColumn, int startLine, -	      int endColumn, int endLine) +	Token(int tokenId, std::string content, uint32_t startColumn, uint32_t startLine, +	      uint32_t endColumn, uint32_t endLine)  	    : tokenId(tokenId),  	      content(content),  	      startColumn(startColumn), @@ -160,7 +161,7 @@ struct Token {   */  class Tokenizer {  private: -	BufferedCharReader &input; +	CharReader &input;  	const TokenTreeNode &root;  	std::deque<Token> peeked;  	unsigned int peekCursor = 0; @@ -185,14 +186,14 @@ protected:  public:  	/**  	 * @param input The input of a Tokenizer is given in the form of a -	 * BufferedCharReader. Please refer to the respective documentation. +	 * CharReader. Please refer to the respective documentation.  	 * @param root This is meant to be the root of a TokenTree giving the  	 * specification of user-defined tokens this Tokenizer should recognize.  	 * The Tokenizer promises to not change the TokenTree such that you can  	 * re-use the same specification for multiple inputs.  	 * Please refer to the TokenTreeNode documentation for more information.  	 */ -	Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); +	Tokenizer(CharReader &input, const TokenTreeNode &root);  	/**  	 * The next method consumes one Token from the input stream and gives @@ -224,9 +225,9 @@ public:  	 */  	void consumePeek(); -	const BufferedCharReader &getInput() const { return input; } +	const CharReader &getInput() const { return input; } -	BufferedCharReader &getInput() { return input; } +	CharReader &getInput() { return input; }  };  } diff --git a/src/core/utils/CharReader.cpp b/src/core/common/CharReader.cpp index 61616d7..373c0c1 100644 --- a/src/core/utils/CharReader.cpp +++ b/src/core/common/CharReader.cpp @@ -21,12 +21,10 @@  #include <limits>  #include <sstream> -#include <core/Utils.hpp> -  #include "CharReader.hpp" +#include "Utils.hpp"  namespace ousia { -namespace utils {  /* Helper functions */ @@ -639,5 +637,4 @@ void CharReaderFork::commit()  	parentPeekCursor.assign(buffer, peekCursor);  }  } -} diff --git a/src/core/utils/CharReader.hpp b/src/core/common/CharReader.hpp index 1306026..3cbe4b4 100644 --- a/src/core/utils/CharReader.hpp +++ b/src/core/common/CharReader.hpp @@ -33,7 +33,6 @@  #include <vector>  namespace ousia { -namespace utils {  /**   * A chunked ring buffer used in CharReader to provide access to an input stream @@ -659,12 +658,6 @@ public:  	 */  	void commit();  }; -} - -/** - * Alias of the commonly used CharReader class. - */ -using CharReader = utils::CharReader;  } diff --git a/src/core/Exceptions.cpp b/src/core/common/Exceptions.cpp index d064f35..d064f35 100644 --- a/src/core/Exceptions.cpp +++ b/src/core/common/Exceptions.cpp diff --git a/src/core/Exceptions.hpp b/src/core/common/Exceptions.hpp index 00d6106..00d6106 100644 --- a/src/core/Exceptions.hpp +++ b/src/core/common/Exceptions.hpp diff --git a/src/core/Logger.cpp b/src/core/common/Logger.cpp index 17f55a6..17f55a6 100644 --- a/src/core/Logger.cpp +++ b/src/core/common/Logger.cpp diff --git a/src/core/Logger.hpp b/src/core/common/Logger.hpp index e6b97f4..e6b97f4 100644 --- a/src/core/Logger.hpp +++ b/src/core/common/Logger.hpp diff --git a/src/core/Utils.cpp b/src/core/common/Utils.cpp index c460ed4..c460ed4 100644 --- a/src/core/Utils.cpp +++ b/src/core/common/Utils.cpp diff --git a/src/core/Utils.hpp b/src/core/common/Utils.hpp index 5332b50..5332b50 100644 --- a/src/core/Utils.hpp +++ b/src/core/common/Utils.hpp diff --git a/src/core/variant/Variant.cpp b/src/core/common/Variant.cpp index d33cd4f..27fc6e7 100644 --- a/src/core/variant/Variant.cpp +++ b/src/core/common/Variant.cpp @@ -18,8 +18,7 @@  #include <sstream> -#include <core/Utils.hpp> - +#include "Utils.hpp"  #include "Variant.hpp"  namespace ousia { diff --git a/src/core/variant/Variant.hpp b/src/core/common/Variant.hpp index 1e62644..d411fd3 100644 --- a/src/core/variant/Variant.hpp +++ b/src/core/common/Variant.hpp @@ -39,10 +39,9 @@  // http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html  // later (will allow to use 8 bytes for a variant) -#include <core/Exceptions.hpp> +#include "Exceptions.hpp"  namespace ousia { -namespace variant {  /**   * Instances of the Variant class represent any kind of data that is exchanged @@ -758,9 +757,5 @@ public:  };  } -// Alias for the (very often used and unambigous) variant class -using Variant = variant::Variant; -} -  #endif /* _OUSIA_VARIANT_HPP_ */ diff --git a/src/core/variant/Reader.cpp b/src/core/common/VariantReader.cpp index 5c167cd..e611842 100644 --- a/src/core/variant/Reader.cpp +++ b/src/core/common/VariantReader.cpp @@ -21,12 +21,10 @@  #include <cmath>  #include <sstream> -#include <core/Utils.hpp> - -#include "Reader.hpp" +#include "VariantReader.hpp" +#include "Utils.hpp"  namespace ousia { -namespace variant {  // TODO: Better error messages (like "Expected 'x' but got 'y'")  // TODO: Replace delims with single char delim where possible @@ -177,134 +175,138 @@ public:  	 * delimiters.  	 */  	bool parse(CharReader &reader, Logger &logger, -	           const std::unordered_set<char> &delims) -	{ -		State state = State::INIT; -		char c; +	           const std::unordered_set<char> &delims); +}; -		// Consume the first whitespace characters -		reader.consumeWhitespace(); +bool Number::parse(CharReader &reader, Logger &logger, +           const std::unordered_set<char> &delims) +{ +	State state = State::INIT; +	char c; -		// Iterate over the FSM to extract numbers -		while (reader.peek(c)) { -			// Abort, once a delimiter or whitespace is reached -			if (Utils::isWhitespace(c) || delims.count(c)) { -				reader.resetPeek(); -				break; -			} +	// Consume the first whitespace characters +	reader.consumeWhitespace(); -			// The character is not a whitespace character and not a delimiter -			switch (state) { -				case State::INIT: -				case State::HAS_MINUS: -					switch (c) { -						case '-': -							// Do not allow multiple minus signs -							if (state == State::HAS_MINUS) { -								logger.errorAt(ERR_UNEXPECTED_CHAR, reader); -								return false; -							} -							state = State::HAS_MINUS; -							s = -1; -							break; -						case '0': -							// Remember a leading zero for the detection of "0x" -							state = State::LEADING_ZERO; -							break; -						case '.': -							// Remember a leading point as ".eXXX" is invalid -							state = State::LEADING_POINT; -							break; -						default: -							state = State::INT; -							if (!appendChar(c, 10, Part::A, reader, logger)) { -								return false; -							} -							break; -					} -					break; -				case State::LEADING_ZERO: -					if (c == 'x' || c == 'X') { -						state = State::HEX; +	// Iterate over the FSM to extract numbers +	while (reader.peek(c)) { +		// Abort, once a delimiter or whitespace is reached +		if (Utils::isWhitespace(c) || delims.count(c)) { +			reader.resetPeek(); +			break; +		} + +		// The character is not a whitespace character and not a delimiter +		switch (state) { +			case State::INIT: +			case State::HAS_MINUS: +				switch (c) { +					case '-': +						// Do not allow multiple minus signs +						if (state == State::HAS_MINUS) { +							logger.errorAt(ERR_UNEXPECTED_CHAR, reader); +							return false; +						} +						state = State::HAS_MINUS; +						s = -1;  						break; -					} -				// fallthrough -				case State::INT: -					switch (c) { -						case '.': -							state = State::POINT; -							break; -						case 'e': -						case 'E': -							state = State::EXP_INIT; -							break; -						default: -							state = State::INT; -							if (!appendChar(c, 10, Part::A, reader, logger)) { -								return false; -							} -							break; -					} -					break; -				case State::HEX: -					if (!appendChar(c, 16, Part::A, reader, logger)) { -						return false; -					} -					break; -				case State::LEADING_POINT: -				case State::POINT: -					switch (c) { -						case 'e': -						case 'E': -							if (state == State::LEADING_POINT) { -								logger.errorAt(ERR_UNEXPECTED_CHAR, reader); -								return false; -							} -							state = State::EXP_INIT; -							break; -						default: -							state = State::POINT; -							if (!appendChar(c, 10, Part::N, reader, logger)) { -								return false; -							} -							break; -					} +					case '0': +						// Remember a leading zero for the detection of "0x" +						state = State::LEADING_ZERO; +						break; +					case '.': +						// Remember a leading point as ".eXXX" is invalid +						state = State::LEADING_POINT; +						break; +					default: +						state = State::INT; +						if (!appendChar(c, 10, Part::A, reader, logger)) { +							return false; +						} +						break; +				} +				break; +			case State::LEADING_ZERO: +				if (c == 'x' || c == 'X') { +					state = State::HEX;  					break; -				case State::EXP_HAS_MINUS: -				case State::EXP_INIT: -					if (c == '-') { -						if (state == State::EXP_HAS_MINUS) { +				} +			// fallthrough +			case State::INT: +				switch (c) { +					case '.': +						state = State::POINT; +						break; +					case 'e': +					case 'E': +						state = State::EXP_INIT; +						break; +					default: +						state = State::INT; +						if (!appendChar(c, 10, Part::A, reader, logger)) { +							return false; +						} +						break; +				} +				break; +			case State::HEX: +				if (!appendChar(c, 16, Part::A, reader, logger)) { +					return false; +				} +				break; +			case State::LEADING_POINT: +			case State::POINT: +				switch (c) { +					case 'e': +					case 'E': +						if (state == State::LEADING_POINT) {  							logger.errorAt(ERR_UNEXPECTED_CHAR, reader);  							return false;  						} -						state = State::EXP_HAS_MINUS; -						sE = -1; -					} else { -						state = State::EXP; -						if (!appendChar(c, 10, Part::E, reader, logger)) { +						state = State::EXP_INIT; +						break; +					default: +						state = State::POINT; +						if (!appendChar(c, 10, Part::N, reader, logger)) {  							return false;  						} +						break; +				} +				break; +			case State::EXP_HAS_MINUS: +			case State::EXP_INIT: +				if (c == '-') { +					if (state == State::EXP_HAS_MINUS) { +						logger.errorAt(ERR_UNEXPECTED_CHAR, reader); +						return false;  					} -					break; -				case State::EXP: +					state = State::EXP_HAS_MINUS; +					sE = -1; +				} else { +					state = State::EXP;  					if (!appendChar(c, 10, Part::E, reader, logger)) {  						return false;  					} -					break; -			} -			reader.consumePeek(); +				} +				break; +			case State::EXP: +				if (!appendChar(c, 10, Part::E, reader, logger)) { +					return false; +				} +				break;  		} +		reader.consumePeek(); +	} -		// States in which ending is valid. Log an error in other states -		if (state == State::LEADING_ZERO || state == State::HEX || -		    state == State::INT || state == State::POINT || -		    state == State::EXP) { -			return true; -		} -		logger.errorAt(ERR_UNEXPECTED_END, reader); -		return false; +	// States in which ending is valid. Log an error in other states +	if (state == State::LEADING_ZERO || state == State::HEX || +	    state == State::INT || state == State::POINT || +	    state == State::EXP) { +		return true;  	} -}; +	logger.errorAt(ERR_UNEXPECTED_END, reader); +	return false; +} +  /* Class Reader */ @@ -324,7 +326,7 @@ static std::pair<bool, T> error(CharReader &reader, Logger &logger,  	return std::make_pair(false, std::move(res));  } -std::pair<bool, std::string> Reader::parseString( +std::pair<bool, std::string> VariantReader::parseString(      CharReader &reader, Logger &logger,      const std::unordered_set<char> *delims)  { @@ -422,7 +424,7 @@ std::pair<bool, std::string> Reader::parseString(  	return error(reader, logger, ERR_UNEXPECTED_END, res.str());  } -std::pair<bool, Variant::arrayType> Reader::parseArray( +std::pair<bool, Variant::arrayType> VariantReader::parseArray(      CharReader &reader, Logger &logger, char delim)  {  	Variant::arrayType res; @@ -490,7 +492,7 @@ std::pair<bool, Variant::arrayType> Reader::parseArray(  	return error(reader, logger, ERR_UNEXPECTED_END, res);  } -std::pair<bool, std::string> Reader::parseUnescapedString( +std::pair<bool, std::string> VariantReader::parseUnescapedString(      CharReader &reader, Logger &logger,      const std::unordered_set<char> &delims)  { @@ -527,7 +529,7 @@ std::pair<bool, std::string> Reader::parseUnescapedString(  	return std::make_pair(true, res.str());  } -std::pair<bool, int64_t> Reader::parseInteger( +std::pair<bool, int64_t> VariantReader::parseInteger(      CharReader &reader, Logger &logger,      const std::unordered_set<char> &delims)  { @@ -544,7 +546,7 @@ std::pair<bool, int64_t> Reader::parseInteger(  	return std::make_pair(false, n.intValue());  } -std::pair<bool, double> Reader::parseDouble( +std::pair<bool, double> VariantReader::parseDouble(      CharReader &reader, Logger &logger,      const std::unordered_set<char> &delims)  { @@ -553,7 +555,7 @@ std::pair<bool, double> Reader::parseDouble(  	return std::make_pair(res, n.doubleValue());  } -std::pair<bool, Variant> Reader::parseGeneric( +std::pair<bool, Variant> VariantReader::parseGeneric(      CharReader &reader, Logger &logger,      const std::unordered_set<char> &delims)  { @@ -582,7 +584,7 @@ std::pair<bool, Variant> Reader::parseGeneric(  			Number n;  			// Fork the reader -			utils::CharReaderFork fork = reader.fork(); +			CharReaderFork fork = reader.fork();  			// TODO: Fork logger @@ -620,5 +622,4 @@ std::pair<bool, Variant> Reader::parseGeneric(  	return error(reader, logger, ERR_UNEXPECTED_END, nullptr);  }  } -} diff --git a/src/core/variant/Reader.hpp b/src/core/common/VariantReader.hpp index 4114d46..5e7c5d2 100644 --- a/src/core/variant/Reader.hpp +++ b/src/core/common/VariantReader.hpp @@ -17,7 +17,7 @@  */  /** - * @file Reader.hpp + * @file VariantReader.hpp   *   * Provides parsers for various micro formats. These formats include integers,   * doubles, strings, JSON and the Ousía struct notation. @@ -32,23 +32,21 @@  #include <unordered_set>  #include <utility> -#include <core/utils/CharReader.hpp> -#include <core/Logger.hpp> - +#include "CharReader.hpp" +#include "Logger.hpp"  #include "Variant.hpp"  namespace ousia { -namespace variant { -class Reader { +class VariantReader {  private:  	/**  	 * Parses a string which may either be enclosed by " or ', unescapes  	 * entities in the string as specified for JavaScript.  	 * -	 * @param reader is a reference to the CharReader instance which is -	 * the source for the character data. The reader will be positioned after -	 * the terminating quote character or at the terminating delimiting +	 * @param VariantReader is a reference to the CharReader instance which is +	 * the source for the character data. The VariantReader will be positioned +	 * after the terminating quote character or at the terminating delimiting  	 * character.  	 * @param logger is the logger instance that should be used to log error  	 * messages and warnings. @@ -58,7 +56,7 @@ private:  	 * is read.  	 */  	static std::pair<bool, std::string> parseString( -	    CharReader &reader, Logger &logger, +	    CharReader &VariantReader, Logger &logger,  	    const std::unordered_set<char> *delims);  public: @@ -66,9 +64,9 @@ public:  	 * Parses a string which may either be enclosed by " or ', unescapes  	 * entities in the string as specified for JavaScript.  	 * -	 * @param reader is a reference to the CharReader instance which is -	 * the source for the character data. The reader will be positioned after -	 * the terminating quote character or at the terminating delimiting +	 * @param VariantReader is a reference to the CharReader instance which is +	 * the source for the character data. The VariantReader will be positioned +	 * after the terminating quote character or at the terminating delimiting  	 * character.  	 * @param logger is the logger instance that should be used to log error  	 * messages and warnings. @@ -77,93 +75,92 @@ public:  	 * outside).  	 */  	static std::pair<bool, std::string> parseString( -	    CharReader &reader, Logger &logger, +	    CharReader &VariantReader, Logger &logger,  	    const std::unordered_set<char> &delims)  	{ -		return parseString(reader, logger, &delims); +		return parseString(VariantReader, logger, &delims);  	}  	/**  	 * Parses a string which may either be enclosed by " or ', unescapes  	 * entities in the string as specified for JavaScript.  	 * -	 * @param reader is a reference to the CharReader instance which is -	 * the source for the character data. The reader will be positioned after -	 * the terminating quote character or at the terminating delimiting +	 * @param VariantReader is a reference to the CharReader instance which is +	 * the source for the character data. The VariantReader will be positioned  +	 * after the terminating quote character or at the terminating delimiting  	 * character.  	 * @param logger is the logger instance that should be used to log error  	 * messages and warnings.  	 */ -	static std::pair<bool, std::string> parseString(CharReader &reader, +	static std::pair<bool, std::string> parseString(CharReader &VariantReader,  	                                                Logger &logger)  	{ -		return parseString(reader, logger, nullptr); +		return parseString(VariantReader, logger, nullptr);  	}  	/** -	 * Extracts an unescaped string from the given buffered char reader +	 * Extracts an unescaped string from the given buffered char VariantReader  	 * instance. This function just reads text until one of the given delimiter  	 * characters is reached.  	 * -	 * @param reader is a reference to the CharReader instance which is -	 * the source for the character data. The reader will be positioned at the -	 * terminating delimiting character. +	 * @param VariantReader is a reference to the CharReader instance which is +	 * the source for the character data. The VariantReader will be positioned +	 * at the terminating delimiting character.  	 * @param delims is a set of characters which will terminate the string.  	 * These characters are not included in the result. May not be nullptr.  	 */  	static std::pair<bool, std::string> parseUnescapedString( -	    CharReader &reader, Logger &logger, +	    CharReader &VariantReader, Logger &logger,  	    const std::unordered_set<char> &delims);  	/** -	 * Parses an integer from the given buffered char reader instance until one -	 * of the given delimiter characters is reached. +	 * Parses an integer from the given buffered char VariantReader instance +	 * until one of the given delimiter characters is reached.  	 * -	 * @param reader is a reference to the CharReader instance from -	 * which the character data should been reader. The reader will be -	 * positioned at the terminating delimiting character or directly after the -	 * integer. +	 * @param VariantReader is a reference to the CharReader instance from +	 * which the character data should been VariantReader. The VariantReader +	 * will be positioned at the terminating delimiting character or directly +	 * after the integer.  	 */  	static std::pair<bool, int64_t> parseInteger( -	    CharReader &reader, Logger &logger, +	    CharReader &VariantReader, Logger &logger,  	    const std::unordered_set<char> &delims);  	/** -	 * Parses an double from the given buffered char reader instance until one -	 * of the given delimiter characters is reached. +	 * Parses an double from the given buffered char VariantReader instance +	 * until one of the given delimiter characters is reached.  	 * -	 * @param reader is a reference to the CharReader instance from -	 * which the character data should been reader. The reader will be -	 * positioned at the terminating delimiting character or directly after the -	 * integer. +	 * @param VariantReader is a reference to the CharReader instance from +	 * which the character data should been VariantReader. The VariantReader +	 * will be positioned at the terminating delimiting character or directly +	 * after the integer.  	 */  	static std::pair<bool, double> parseDouble( -	    CharReader &reader, Logger &logger, +	    CharReader &VariantReader, Logger &logger,  	    const std::unordered_set<char> &delims);  	/**  	 * Parses an array of values.  	 */  	static std::pair<bool, Variant::arrayType> parseArray( -	    CharReader &reader, Logger &logger, char delim = 0); +	    CharReader &VariantReader, Logger &logger, char delim = 0);  	/**  	 * Tries to parse the most specific item from the given stream until one of  	 * the given delimiters is reached or a meaningful literal has been read.  	 * The resulting variant represents the value that has been read.  	 * -	 * @param reader is a reference to the CharReader instance which is -	 * the source for the character data. The reader will be positioned at the -	 * terminating delimiting character. +	 * @param VariantReader is a reference to the CharReader instance which is +	 * the source for the character data. The VariantReader will be positioned +	 * at the terminating delimiting character.  	 * @param delims is a set of characters which will terminate the string.  	 * These characters are not included in the result. May not be nullptr.  	 */  	static std::pair<bool, Variant> parseGeneric( -	    CharReader &reader, Logger &logger, +	    CharReader &VariantReader, Logger &logger,  	    const std::unordered_set<char> &delims);  };  } -}  #endif /* _OUSIA_VARIANT_READER_HPP_ */ diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index 5dac956..e155cfd 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -32,10 +32,10 @@  #include <set>  #include <string> -#include <core/Exceptions.hpp>  #include <core/Node.hpp> -#include <core/Logger.hpp>  #include <core/Registry.hpp> +#include <core/common/Exceptions.hpp> +#include <core/common/Logger.hpp>  #include "Scope.hpp" diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp index dca7f35..5e801ee 100644 --- a/src/core/parser/ParserStack.cpp +++ b/src/core/parser/ParserStack.cpp @@ -20,8 +20,8 @@  #include "ParserStack.hpp" -#include <core/Utils.hpp> -#include <core/Exceptions.hpp> +#include <core/common/Utils.hpp> +#include <core/common/Exceptions.hpp>  namespace ousia {  namespace parser { diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp index c5ed4e4..233f4f9 100644 --- a/src/core/parser/ParserStack.hpp +++ b/src/core/parser/ParserStack.hpp @@ -37,7 +37,7 @@  #include <stack>  #include <vector> -#include <core/variant/Variant.hpp> +#include <core/common/Variant.hpp>  #include "Parser.hpp" diff --git a/src/plugins/css/CSSParser.cpp b/src/plugins/css/CSSParser.cpp index 4cbe93f..5985047 100644 --- a/src/plugins/css/CSSParser.cpp +++ b/src/plugins/css/CSSParser.cpp @@ -18,7 +18,7 @@  #include "CSSParser.hpp" -#include <core/variant/Reader.hpp> +#include <core/common/VariantReader.hpp>  namespace ousia {  namespace parser { @@ -77,7 +77,7 @@ static const std::map<int, CodeTokenDescriptor> CSS_DESCRIPTORS = {  Rooted<Node> CSSParser::parse(std::istream &is, ParserContext &ctx)  { -	BufferedCharReader input{is}; +	CharReader input{is};  	CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS};  	tokenizer.ignoreComments = true;  	tokenizer.ignoreLinebreaks = true; @@ -228,14 +228,14 @@ Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer,  			Variant::arrayType args;  			// we require at least one argument, if parantheses are used  			// XXX -			/*args.push_back(variant::Reader::parseGeneric(tokenizer.getInput(), +			args.push_back(VariantReader::parseGeneric(tokenizer.getInput(),  			                                             ctx.logger, -			                                             {',', ')'}).second);*/ +			                                             {',', ')'}).second);  			while (expect(COMMA, tokenizer, t, false, ctx)) {  				// as long as we find commas we expect new arguments. -				/*args.push_back( -				    variant::Reader::parseGeneric( -				        tokenizer.getInput(), ctx.logger, {',', ')'}).second);*/ +				args.push_back( +				    VariantReader::parseGeneric( +				        tokenizer.getInput(), ctx.logger, {',', ')'}).second);  			}  			expect(PAREN_CLOSE, tokenizer, t, true, ctx);  			// and we return with the finished Selector. @@ -334,8 +334,8 @@ bool CSSParser::parseRule(CodeTokenizer &tokenizer, ParserContext &ctx,  	expect(COLON, tokenizer, t, true, ctx);  	// then the value  	// TODO: Resolve key for appropriate parsing function here. -	/*value = variant::Reader::parseGeneric(tokenizer.getInput(), ctx.logger, -	                                      {';'}).second;*/ +	value = VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, +	                                      {';'}).second;  	// and a ;  	expect(SEMICOLON, tokenizer, t, true, ctx);  	return true; diff --git a/src/plugins/css/CSSParser.hpp b/src/plugins/css/CSSParser.hpp index 82f0cd1..eeb5b2c 100644 --- a/src/plugins/css/CSSParser.hpp +++ b/src/plugins/css/CSSParser.hpp @@ -22,9 +22,9 @@  #include <vector>  #include <utility> -#include <core/BufferedCharReader.hpp>  #include <core/CodeTokenizer.hpp>  #include <core/CSS.hpp> +#include <core/common/CharReader.hpp>  #include <core/parser/Parser.hpp>  namespace ousia { diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp index ce2857e..9a7b4d8 100644 --- a/src/plugins/xml/XmlParser.cpp +++ b/src/plugins/xml/XmlParser.cpp @@ -20,7 +20,7 @@  #include <expat.h> -#include <core/Utils.hpp> +#include <core/common/Utils.hpp>  #include <core/parser/ParserStack.hpp>  #include "XmlParser.hpp"  | 
