diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-11 15:26:50 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-11 15:26:50 +0100 |
commit | 3f62168ed0b088eec3cb2903f03966f7d501f564 (patch) | |
tree | 781f5bd9b304d9eb931827a26f463575d772983d | |
parent | b74936760e28a92cadfaec47928ea478fe2d72ee (diff) |
moved to CharReader everywhere
-rw-r--r-- | CMakeLists.txt | 24 | ||||
-rw-r--r-- | src/core/BufferedCharReader.cpp | 263 | ||||
-rw-r--r-- | src/core/BufferedCharReader.hpp | 288 | ||||
-rw-r--r-- | src/core/CSS.hpp | 2 | ||||
-rw-r--r-- | src/core/CodeTokenizer.hpp | 8 | ||||
-rw-r--r-- | src/core/Registry.cpp | 3 | ||||
-rw-r--r-- | src/core/Tokenizer.cpp | 12 | ||||
-rw-r--r-- | src/core/Tokenizer.hpp | 27 | ||||
-rw-r--r-- | src/core/common/CharReader.cpp (renamed from src/core/utils/CharReader.cpp) | 5 | ||||
-rw-r--r-- | src/core/common/CharReader.hpp (renamed from src/core/utils/CharReader.hpp) | 7 | ||||
-rw-r--r-- | src/core/common/Exceptions.cpp (renamed from src/core/Exceptions.cpp) | 0 | ||||
-rw-r--r-- | src/core/common/Exceptions.hpp (renamed from src/core/Exceptions.hpp) | 0 | ||||
-rw-r--r-- | src/core/common/Logger.cpp (renamed from src/core/Logger.cpp) | 0 | ||||
-rw-r--r-- | src/core/common/Logger.hpp (renamed from src/core/Logger.hpp) | 0 | ||||
-rw-r--r-- | src/core/common/Utils.cpp (renamed from src/core/Utils.cpp) | 0 | ||||
-rw-r--r-- | src/core/common/Utils.hpp (renamed from src/core/Utils.hpp) | 0 | ||||
-rw-r--r-- | src/core/common/Variant.cpp (renamed from src/core/variant/Variant.cpp) | 3 | ||||
-rw-r--r-- | src/core/common/Variant.hpp (renamed from src/core/variant/Variant.hpp) | 7 | ||||
-rw-r--r-- | src/core/common/VariantReader.cpp (renamed from src/core/variant/Reader.cpp) | 247 | ||||
-rw-r--r-- | src/core/common/VariantReader.hpp (renamed from src/core/variant/Reader.hpp) | 87 | ||||
-rw-r--r-- | src/core/parser/Parser.hpp | 4 | ||||
-rw-r--r-- | src/core/parser/ParserStack.cpp | 4 | ||||
-rw-r--r-- | src/core/parser/ParserStack.hpp | 2 | ||||
-rw-r--r-- | src/plugins/css/CSSParser.cpp | 18 | ||||
-rw-r--r-- | src/plugins/css/CSSParser.hpp | 2 | ||||
-rw-r--r-- | src/plugins/xml/XmlParser.cpp | 2 | ||||
-rw-r--r-- | test/core/BufferedCharReaderTest.cpp | 185 | ||||
-rw-r--r-- | test/core/CodeTokenizerTest.cpp | 26 | ||||
-rw-r--r-- | test/core/RegistryTest.cpp | 2 | ||||
-rw-r--r-- | test/core/TokenizerTest.cpp | 14 | ||||
-rw-r--r-- | test/core/common/CharReaderTest.cpp (renamed from test/core/utils/CharReaderTest.cpp) | 4 | ||||
-rw-r--r-- | test/core/common/LoggerTest.cpp (renamed from test/core/LoggerTest.cpp) | 2 | ||||
-rw-r--r-- | test/core/common/UtilsTest.cpp (renamed from test/core/UtilsTest.cpp) | 2 | ||||
-rw-r--r-- | test/core/common/VariantReaderTest.cpp (renamed from test/core/variant/ReaderTest.cpp) | 70 | ||||
-rw-r--r-- | test/core/common/VariantTest.cpp (renamed from test/core/variant/VariantTest.cpp) | 2 | ||||
-rw-r--r-- | test/plugins/css/CSSParserTest.cpp | 16 |
36 files changed, 288 insertions, 1050 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index d824b86..3cfa185 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,18 +97,20 @@ ADD_DEFINITIONS( ) ADD_LIBRARY(ousia_core - src/core/BufferedCharReader src/core/CodeTokenizer src/core/CSS - src/core/Exceptions - src/core/Logger src/core/Managed src/core/Node src/core/Registry src/core/ResourceLocator src/core/Tokenizer # src/core/Typesystem - src/core/Utils + src/core/common/CharReader + src/core/common/Exceptions + src/core/common/Logger + src/core/common/Utils + src/core/common/Variant + src/core/common/VariantReader src/core/parser/Parser src/core/parser/ParserStack src/core/parser/Scope @@ -116,9 +118,6 @@ ADD_LIBRARY(ousia_core # src/core/script/Object # src/core/script/ScriptEngine # src/core/script/Variant - src/core/utils/CharReader - src/core/variant/Reader - src/core/variant/Variant ) ADD_LIBRARY(ousia_css @@ -155,10 +154,8 @@ IF(TEST) ) ADD_EXECUTABLE(ousia_test_core - test/core/BufferedCharReaderTest test/core/CodeTokenizerTest test/core/CSSTest - test/core/LoggerTest test/core/ManagedTest test/core/ManagedContainersTest test/core/NodeTest @@ -166,14 +163,15 @@ IF(TEST) test/core/RegistryTest test/core/ResourceLocatorTest test/core/TokenizerTest - test/core/UtilsTest + test/core/common/CharReaderTest + test/core/common/LoggerTest + test/core/common/VariantReaderTest + test/core/common/VariantTest + test/core/common/UtilsTest test/core/parser/ParserStackTest # test/core/script/FunctionTest # test/core/script/ObjectTest # test/core/script/VariantTest - test/core/utils/CharReaderTest - test/core/variant/ReaderTest - test/core/variant/VariantTest ) TARGET_LINK_LIBRARIES(ousia_test_core diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp deleted file mode 100644 index aeedf12..0000000 --- a/src/core/BufferedCharReader.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <array> - -#include "Utils.hpp" - -#include "BufferedCharReader.hpp" - -namespace ousia { - -// Constants used within the linebreak statemachine. -static const uint8_t LB_STATE_NONE = 0x00; -static const uint8_t LB_STATE_ONE = 0x01; -static const uint8_t LB_STATE_LF = 0x10; -static const uint8_t LB_STATE_CR = 0x20; -static const uint8_t LB_STATE_MASK_CNT = 0x0F; -static const uint8_t LB_STATE_MASK_TYPE = 0xF0; - -/* Struct BufferedCharReader::ReadCursor */ - -BufferedCharReader::ReadCursor::ReadCursor(unsigned int line, - unsigned int column, - bool destructive) - : line(line), - column(column), - bufferElem(0), - bufferPos(0), - destructive(destructive), - lbState(LB_STATE_NONE) -{ -} - -void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) -{ - this->line = cursor.line; - this->column = cursor.column; - this->bufferElem = cursor.bufferElem; - this->bufferPos = cursor.bufferPos; - this->lbState = cursor.lbState; -} - -/* Class BufferedCharReader */ - -BufferedCharReader::BufferedCharReader(int line, int column) - : inputStream(nullptr), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(false) -{ -} - -BufferedCharReader::BufferedCharReader(const std::string &str, int line, - int column) - : inputStream(nullptr), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(true) -{ - buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(const std::string &str) - : inputStream(nullptr), - readCursor(1, 1, true), - peekCursor(1, 1, false), - depleted(true) -{ - buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line, - int column) - : inputStream(&inputStream), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(false) -{ -} - -void BufferedCharReader::feed(const std::string &data) -{ - if (!depleted && !inputStream) { - buffer.push_back(data); - } -} - -void BufferedCharReader::close() -{ - if (!inputStream) { - depleted = true; - } -} - -bool BufferedCharReader::substituteLinebreaks(ReadCursor &cursor, char *c) -{ - // Handle line breaks, inserts breakes after the following character - // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n - if ((*c == '\n') || (*c == '\r')) { - // Determine the type of the current linebreak character - const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; - - // Read the last count and the last type from the state - const uint8_t lastCount = cursor.lbState & LB_STATE_MASK_CNT; - const uint8_t lastType = cursor.lbState & LB_STATE_MASK_TYPE; - - // Set the current linebreak type and counter in the state - cursor.lbState = ((lastCount + 1) & 1) | type; - - // If either this is the first instance of this character or the same - // return character is repeated - if (!lastCount || (lastType == type)) { - *c = '\n'; - return true; - } - return false; - } - - // Find the state - cursor.lbState = LB_STATE_NONE; - return true; -} - -bool BufferedCharReader::readCharacterAtCursor(ReadCursor &cursor, char *c) -{ - bool hasChar = false; - while (!hasChar) { - // Abort if the current buffer element does not point to a valid entry - // in the buffer -- we must try to feed another data block into the - // internal buffer - if (cursor.bufferElem >= buffer.size()) { - // Abort if there is no more data or no input stream is set - if (depleted || !inputStream) { - return false; - } - - // Read a buffer of the specified size - constexpr std::streamsize BUFFER_SIZE = 1024; - std::array<char, BUFFER_SIZE> buf; - const std::streamsize cnt = - (*inputStream).read(buf.data(), BUFFER_SIZE).gcount(); - - // If data has been read, append it to the input buffer and try - // again - if (cnt > 0) { - buffer.emplace_back(std::string(buf.data(), cnt)); - continue; - } - - // End of file handling - if (inputStream->fail() || inputStream->eof()) { - depleted = true; - return false; - } - } - - // Fetch the current element the peek pointer points to - const std::string &data = buffer[cursor.bufferElem]; - - // Handle the "no data" case -- either in a destructive or - // non-destructive manner. - if (cursor.bufferPos >= data.length()) { - if (cursor.destructive) { - buffer.pop_front(); - } else { - cursor.bufferElem++; - } - cursor.bufferPos = 0; - continue; - } - - // Read the character, advance the buffer position - *c = *(data.data() + cursor.bufferPos); - cursor.bufferPos++; - - // Substitute linebreaks with a single LF (0x0A) - hasChar = substituteLinebreaks(cursor, c); - } - - // Update the position counter - if (*c == '\n') { - cursor.line++; - cursor.column = 1; - } else { - // Ignore UTF-8 continuation bytes - if (!((*c & 0x80) && !(*c & 0x40))) { - cursor.column++; - } - } - - return true; -} - -bool BufferedCharReader::peek(char *c) -{ - return readCharacterAtCursor(peekCursor, c); -} - -bool BufferedCharReader::read(char *c) -{ - resetPeek(); - return readCharacterAtCursor(readCursor, c); -} - -void BufferedCharReader::consumePeek() -{ - // Remove all no longer needed buffer elements - for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { - buffer.pop_front(); - } - peekCursor.bufferElem = 0; - - // Copy the peek cursor to the read cursor - readCursor.assign(peekCursor); -} - -bool BufferedCharReader::consumeWhitespace() -{ - char c; - while (peek(&c)) { - if (!Utils::isWhitespace(c)) { - resetPeek(); - return true; - } - consumePeek(); - } - return false; -} - -void BufferedCharReader::resetPeek() -{ - // Reset the peek cursor to the read cursor - peekCursor.assign(readCursor); -} - -bool BufferedCharReader::atEnd() const -{ - if (depleted || !inputStream) { - if (buffer.size() <= 0) { - return true; - } else if (buffer.size() == 1) { - return buffer[0].size() == readCursor.bufferPos; - } - } - return false; -} -} - diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp deleted file mode 100644 index e7f3186..0000000 --- a/src/core/BufferedCharReader.hpp +++ /dev/null @@ -1,288 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file BufferedCharReader.hpp - * - * Contains the BufferedCharReader class which is used for reading/peeking - * single characters from an input stream or string. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_BUFFERED_CHAR_READER_H_ -#define _OUSIA_BUFFERED_CHAR_READER_H_ - -#include <deque> -#include <string> -#include <istream> -#include <cstdint> - -namespace ousia { - -// TODO: Better split this class into multiple classes with base class -// BufferedCharReader where each sub class represents one method of supplying -// the input data (feeding, initial string, input stream). - -/** - * The BufferedCharReader class is used for storing incomming data that - * is fed into the pipeline as well as reading/peeking single characters - * from that buffer. Additionally it counts the current column/row - * (with correct handling for UTF-8) and contains an internal state - * machine that handles the detection of linebreaks and converts these to a - * single '\n'. - */ -class BufferedCharReader { -private: - /** - * The ReadCursor structure is responsible for representing the read - * position within the text an all state machine states belonging to the - * cursor. There are two types of read cursors: destructive and - * non-destructive read cursors. - */ - struct ReadCursor { - /** - * The line the cursor currently points to. - */ - unsigned int line; - - /** - * The column the cursor currently points to. - */ - unsigned int column; - - /** - * The index of the element in the data buffer we're currently reading - * from. - */ - unsigned int bufferElem; - - /** - * The byte position within this data buffer. - */ - unsigned int bufferPos; - - /** - * Specifies whether this is a destructive cursor (bytes are discarded - * once they were read from the buffer). - */ - const bool destructive; - - /** - * State variable used in the internal state machine of the - * line feed detection. - */ - uint8_t lbState; - - /** - * Constructor of the ReadCursor structure. - * - * @param line is the start line. - * @param column is the start column. - * @param destructive specifies whether the ReadCursor is destructive - * (consumes all read characters, as used in the "read cursor") or - * non-destructive (as used in the "peek cursor"). - */ - ReadCursor(unsigned int line, unsigned int column, bool destructive); - - /** - * Copys the data from another ReadCursor without overriding the - * "destructive" flag. - * - * @param cursor is the cursor that should be copied. - */ - void assign(const ReadCursor &cursor); - }; - - /** - * Pointer at an (optional) input stream used for reading a chunk of data - * whenever the input buffer depletes. - */ - std::istream *inputStream; - - /** - * The read and the peek cursor. - */ - ReadCursor readCursor, peekCursor; - - /** - * Set to true if there is no more input data. - */ - bool depleted; - - /** - * Queue containing the data that has been fed into the char reader. - */ - std::deque<std::string> buffer; - - /** - * Substitute any combination of linebreaks in the incomming code with "\n". - * Returns true if the current character is meant as output, false - * otherwise. - */ - bool substituteLinebreaks(ReadCursor &cursor, char *c); - - /** - * Reads a character from the input buffer and advances the given read - * cursor. - * - * @param cursor is a reference to the read cursor that should be used - * for reading. - * @param hasChar is set to true, if a character is available, false if - * no character is available (e.g. because line breaks are substituted or - * the end of a buffer boundary is reached -- in this case this function - * should be called again with the same parameters.) - * @param c is a output parameter, which will be set to the read character. - * @param returns true if there was enough data in the buffer, false - * otherwise. - */ - bool readCharacterAtCursor(ReadCursor &cursor, char *c); - - /** - * Function that is called for each read character -- updates the row and - * column count. - */ - void updatePositionCounters(const char c); - -public: - - /** - * Constructor of the buffered char reader class with empty buffer as input. - * This operates the BufferedCharReader in a mode where new data has to be - * fed using the "feed" function and explicitly closed using the "close" - * function. - * - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(int line = 1, int column = 1); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param str is a string containing the input data. - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(const std::string &str, int line, int column); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param str is a string containing the input data. - */ - BufferedCharReader(const std::string &str); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param inputStream is the input stream from which incomming data should - * be read. - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(std::istream &inputStream, int line = 1, int column = 1); - - /** - * Peeks a single character. If called multiple times, returns the - * character after the previously peeked character. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool peek(char *c); - - /** - * Reads a character from the input data. If "peek" was called - * beforehand resets the peek pointer. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool read(char *c); - - /** - * Advances the read pointer to the peek pointer -- so if the "peek" - * function was called, "read" will now return the character after - * the last peeked character. - */ - void consumePeek(); - - /** - * Moves the read cursor to the next non-whitespace character. Returns - * false, if the end of the stream was reached. - * - * @return false if the end of the stream was reached, false othrwise. - */ - bool consumeWhitespace(); - - /** - * Resets the peek pointer to the "read" pointer. - */ - void resetPeek(); - - /** - * Feeds new data into the internal buffer of the BufferedCharReader - * class. Only applicable if the buffered char reader was constructed - * without an input stream or string. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - */ - void feed(const std::string &data); - - /** - * Tells the buffered char reader that no more data will be fed. - * Only applicable if the buffered char reader was constructed without an - * input stream or string. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - */ - void close(); - - /** - * Returns true if there are no more characters as the stream was - * closed. - * - * @return true if there is no more data. - */ - bool atEnd() const; - - /** - * Returns the current line (starting with one). - * - * @return the current line number. - */ - int getLine() const { return readCursor.line; } - - /** - * Returns the current column (starting with one). - * - * @return the current column number. - */ - int getColumn() const { return readCursor.column; } -}; -} - -#endif /* _OUSIA_BUFFERED_CHAR_READER_H_ */ - diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp index 1510f3a..a54d956 100644 --- a/src/core/CSS.hpp +++ b/src/core/CSS.hpp @@ -23,7 +23,7 @@ #include <vector> #include <tuple> -#include <core/variant/Variant.hpp> +#include <core/common/Variant.hpp> #include "Managed.hpp" #include "Node.hpp" diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp index 43c7abb..4190297 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/core/CodeTokenizer.hpp @@ -22,7 +22,7 @@ #include <map> #include <sstream> -#include "BufferedCharReader.hpp" +#include <core/common/CharReader.hpp> #include "Tokenizer.hpp" namespace ousia { @@ -108,8 +108,8 @@ public: /** * - * @param input a BufferedCharReader containing the input for this - * tokenizer, as with a regular tokenizer. + * @param input a CharReader containing the input for this tokenizer, as + * with a regular tokenizer. * @param root a TokenTreeNode representing the root of the TokenTree. * Please note that you have to specify all tokenIDs here that you use * in the descriptors map. @@ -120,7 +120,7 @@ public: * and this CodeTokenizer would recognize the token "//" as starting a * line comment. */ - CodeTokenizer(BufferedCharReader &input, const TokenTreeNode &root, + CodeTokenizer(CharReader &input, const TokenTreeNode &root, std::map<int, CodeTokenDescriptor> descriptors) : Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL) { diff --git a/src/core/Registry.cpp b/src/core/Registry.cpp index 6ff9594..74d1cf8 100644 --- a/src/core/Registry.cpp +++ b/src/core/Registry.cpp @@ -16,8 +16,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <core/Logger.hpp> - +#include <core/common/Logger.hpp> #include <core/parser/Parser.hpp> namespace ousia { diff --git a/src/core/Tokenizer.cpp b/src/core/Tokenizer.cpp index b99d1ed..0af5f5a 100644 --- a/src/core/Tokenizer.cpp +++ b/src/core/Tokenizer.cpp @@ -72,7 +72,7 @@ TokenTreeNode::TokenTreeNode(const std::map<std::string, int> &inputs) { } -Tokenizer::Tokenizer(BufferedCharReader &input, const TokenTreeNode &root) +Tokenizer::Tokenizer(CharReader &input, const TokenTreeNode &root) : input(input), root(root) { } @@ -81,10 +81,10 @@ bool Tokenizer::prepare() { std::stringstream buffer; char c; - int startColumn = input.getColumn(); - int startLine = input.getLine(); + uint32_t startColumn = input.getColumn(); + uint32_t startLine = input.getLine(); bool bufEmpty = true; - while (input.peek(&c)) { + while (input.peek(c)) { if (root.children.find(c) != root.children.end()) { // if there might be a special token, keep peeking forward // until we find the token (or we don't). @@ -107,7 +107,7 @@ bool Tokenizer::prepare() input.consumePeek(); } } - if (!input.peek(&c)) { + if (!input.peek(c)) { // if we are at the end we break off the search. break; } @@ -153,7 +153,7 @@ bool Tokenizer::prepare() } } else{ //if we found nothing, read at least one character. - input.peek(&c); + input.peek(c); } } buffer << c; diff --git a/src/core/Tokenizer.hpp b/src/core/Tokenizer.hpp index 8f80150..33327cc 100644 --- a/src/core/Tokenizer.hpp +++ b/src/core/Tokenizer.hpp @@ -19,11 +19,12 @@ #ifndef _OUSIA_TOKENIZER_HPP_ #define _OUSIA_TOKENIZER_HPP_ +#include <cstdint> +#include <deque> #include <istream> #include <map> -#include <deque> -#include "BufferedCharReader.hpp" +#include <core/common/CharReader.hpp> namespace ousia { @@ -120,13 +121,13 @@ static const int TOKEN_TEXT = -2; struct Token { int tokenId; std::string content; - int startColumn; - int startLine; - int endColumn; - int endLine; + uint32_t startColumn; + uint32_t startLine; + uint32_t endColumn; + uint32_t endLine; - Token(int tokenId, std::string content, int startColumn, int startLine, - int endColumn, int endLine) + Token(int tokenId, std::string content, uint32_t startColumn, uint32_t startLine, + uint32_t endColumn, uint32_t endLine) : tokenId(tokenId), content(content), startColumn(startColumn), @@ -160,7 +161,7 @@ struct Token { */ class Tokenizer { private: - BufferedCharReader &input; + CharReader &input; const TokenTreeNode &root; std::deque<Token> peeked; unsigned int peekCursor = 0; @@ -185,14 +186,14 @@ protected: public: /** * @param input The input of a Tokenizer is given in the form of a - * BufferedCharReader. Please refer to the respective documentation. + * CharReader. Please refer to the respective documentation. * @param root This is meant to be the root of a TokenTree giving the * specification of user-defined tokens this Tokenizer should recognize. * The Tokenizer promises to not change the TokenTree such that you can * re-use the same specification for multiple inputs. * Please refer to the TokenTreeNode documentation for more information. */ - Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); + Tokenizer(CharReader &input, const TokenTreeNode &root); /** * The next method consumes one Token from the input stream and gives @@ -224,9 +225,9 @@ public: */ void consumePeek(); - const BufferedCharReader &getInput() const { return input; } + const CharReader &getInput() const { return input; } - BufferedCharReader &getInput() { return input; } + CharReader &getInput() { return input; } }; } diff --git a/src/core/utils/CharReader.cpp b/src/core/common/CharReader.cpp index 61616d7..373c0c1 100644 --- a/src/core/utils/CharReader.cpp +++ b/src/core/common/CharReader.cpp @@ -21,12 +21,10 @@ #include <limits> #include <sstream> -#include <core/Utils.hpp> - #include "CharReader.hpp" +#include "Utils.hpp" namespace ousia { -namespace utils { /* Helper functions */ @@ -639,5 +637,4 @@ void CharReaderFork::commit() parentPeekCursor.assign(buffer, peekCursor); } } -} diff --git a/src/core/utils/CharReader.hpp b/src/core/common/CharReader.hpp index 1306026..3cbe4b4 100644 --- a/src/core/utils/CharReader.hpp +++ b/src/core/common/CharReader.hpp @@ -33,7 +33,6 @@ #include <vector> namespace ousia { -namespace utils { /** * A chunked ring buffer used in CharReader to provide access to an input stream @@ -659,12 +658,6 @@ public: */ void commit(); }; -} - -/** - * Alias of the commonly used CharReader class. - */ -using CharReader = utils::CharReader; } diff --git a/src/core/Exceptions.cpp b/src/core/common/Exceptions.cpp index d064f35..d064f35 100644 --- a/src/core/Exceptions.cpp +++ b/src/core/common/Exceptions.cpp diff --git a/src/core/Exceptions.hpp b/src/core/common/Exceptions.hpp index 00d6106..00d6106 100644 --- a/src/core/Exceptions.hpp +++ b/src/core/common/Exceptions.hpp diff --git a/src/core/Logger.cpp b/src/core/common/Logger.cpp index 17f55a6..17f55a6 100644 --- a/src/core/Logger.cpp +++ b/src/core/common/Logger.cpp diff --git a/src/core/Logger.hpp b/src/core/common/Logger.hpp index e6b97f4..e6b97f4 100644 --- a/src/core/Logger.hpp +++ b/src/core/common/Logger.hpp diff --git a/src/core/Utils.cpp b/src/core/common/Utils.cpp index c460ed4..c460ed4 100644 --- a/src/core/Utils.cpp +++ b/src/core/common/Utils.cpp diff --git a/src/core/Utils.hpp b/src/core/common/Utils.hpp index 5332b50..5332b50 100644 --- a/src/core/Utils.hpp +++ b/src/core/common/Utils.hpp diff --git a/src/core/variant/Variant.cpp b/src/core/common/Variant.cpp index d33cd4f..27fc6e7 100644 --- a/src/core/variant/Variant.cpp +++ b/src/core/common/Variant.cpp @@ -18,8 +18,7 @@ #include <sstream> -#include <core/Utils.hpp> - +#include "Utils.hpp" #include "Variant.hpp" namespace ousia { diff --git a/src/core/variant/Variant.hpp b/src/core/common/Variant.hpp index 1e62644..d411fd3 100644 --- a/src/core/variant/Variant.hpp +++ b/src/core/common/Variant.hpp @@ -39,10 +39,9 @@ // http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html // later (will allow to use 8 bytes for a variant) -#include <core/Exceptions.hpp> +#include "Exceptions.hpp" namespace ousia { -namespace variant { /** * Instances of the Variant class represent any kind of data that is exchanged @@ -758,9 +757,5 @@ public: }; } -// Alias for the (very often used and unambigous) variant class -using Variant = variant::Variant; -} - #endif /* _OUSIA_VARIANT_HPP_ */ diff --git a/src/core/variant/Reader.cpp b/src/core/common/VariantReader.cpp index 5c167cd..e611842 100644 --- a/src/core/variant/Reader.cpp +++ b/src/core/common/VariantReader.cpp @@ -21,12 +21,10 @@ #include <cmath> #include <sstream> -#include <core/Utils.hpp> - -#include "Reader.hpp" +#include "VariantReader.hpp" +#include "Utils.hpp" namespace ousia { -namespace variant { // TODO: Better error messages (like "Expected 'x' but got 'y'") // TODO: Replace delims with single char delim where possible @@ -177,134 +175,138 @@ public: * delimiters. */ bool parse(CharReader &reader, Logger &logger, - const std::unordered_set<char> &delims) - { - State state = State::INIT; - char c; + const std::unordered_set<char> &delims); +}; - // Consume the first whitespace characters - reader.consumeWhitespace(); +bool Number::parse(CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) +{ + State state = State::INIT; + char c; - // Iterate over the FSM to extract numbers - while (reader.peek(c)) { - // Abort, once a delimiter or whitespace is reached - if (Utils::isWhitespace(c) || delims.count(c)) { - reader.resetPeek(); - break; - } + // Consume the first whitespace characters + reader.consumeWhitespace(); - // The character is not a whitespace character and not a delimiter - switch (state) { - case State::INIT: - case State::HAS_MINUS: - switch (c) { - case '-': - // Do not allow multiple minus signs - if (state == State::HAS_MINUS) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::HAS_MINUS; - s = -1; - break; - case '0': - // Remember a leading zero for the detection of "0x" - state = State::LEADING_ZERO; - break; - case '.': - // Remember a leading point as ".eXXX" is invalid - state = State::LEADING_POINT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::LEADING_ZERO: - if (c == 'x' || c == 'X') { - state = State::HEX; + // Iterate over the FSM to extract numbers + while (reader.peek(c)) { + // Abort, once a delimiter or whitespace is reached + if (Utils::isWhitespace(c) || delims.count(c)) { + reader.resetPeek(); + break; + } + + // The character is not a whitespace character and not a delimiter + switch (state) { + case State::INIT: + case State::HAS_MINUS: + switch (c) { + case '-': + // Do not allow multiple minus signs + if (state == State::HAS_MINUS) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::HAS_MINUS; + s = -1; break; - } - // fallthrough - case State::INT: - switch (c) { - case '.': - state = State::POINT; - break; - case 'e': - case 'E': - state = State::EXP_INIT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::HEX: - if (!appendChar(c, 16, Part::A, reader, logger)) { - return false; - } - break; - case State::LEADING_POINT: - case State::POINT: - switch (c) { - case 'e': - case 'E': - if (state == State::LEADING_POINT) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::EXP_INIT; - break; - default: - state = State::POINT; - if (!appendChar(c, 10, Part::N, reader, logger)) { - return false; - } - break; - } + case '0': + // Remember a leading zero for the detection of "0x" + state = State::LEADING_ZERO; + break; + case '.': + // Remember a leading point as ".eXXX" is invalid + state = State::LEADING_POINT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::LEADING_ZERO: + if (c == 'x' || c == 'X') { + state = State::HEX; break; - case State::EXP_HAS_MINUS: - case State::EXP_INIT: - if (c == '-') { - if (state == State::EXP_HAS_MINUS) { + } + // fallthrough + case State::INT: + switch (c) { + case '.': + state = State::POINT; + break; + case 'e': + case 'E': + state = State::EXP_INIT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::HEX: + if (!appendChar(c, 16, Part::A, reader, logger)) { + return false; + } + break; + case State::LEADING_POINT: + case State::POINT: + switch (c) { + case 'e': + case 'E': + if (state == State::LEADING_POINT) { logger.errorAt(ERR_UNEXPECTED_CHAR, reader); return false; } - state = State::EXP_HAS_MINUS; - sE = -1; - } else { - state = State::EXP; - if (!appendChar(c, 10, Part::E, reader, logger)) { + state = State::EXP_INIT; + break; + default: + state = State::POINT; + if (!appendChar(c, 10, Part::N, reader, logger)) { return false; } + break; + } + break; + case State::EXP_HAS_MINUS: + case State::EXP_INIT: + if (c == '-') { + if (state == State::EXP_HAS_MINUS) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; } - break; - case State::EXP: + state = State::EXP_HAS_MINUS; + sE = -1; + } else { + state = State::EXP; if (!appendChar(c, 10, Part::E, reader, logger)) { return false; } - break; - } - reader.consumePeek(); + } + break; + case State::EXP: + if (!appendChar(c, 10, Part::E, reader, logger)) { + return false; + } + break; } + reader.consumePeek(); + } - // States in which ending is valid. Log an error in other states - if (state == State::LEADING_ZERO || state == State::HEX || - state == State::INT || state == State::POINT || - state == State::EXP) { - return true; - } - logger.errorAt(ERR_UNEXPECTED_END, reader); - return false; + // States in which ending is valid. Log an error in other states + if (state == State::LEADING_ZERO || state == State::HEX || + state == State::INT || state == State::POINT || + state == State::EXP) { + return true; } -}; + logger.errorAt(ERR_UNEXPECTED_END, reader); + return false; +} + /* Class Reader */ @@ -324,7 +326,7 @@ static std::pair<bool, T> error(CharReader &reader, Logger &logger, return std::make_pair(false, std::move(res)); } -std::pair<bool, std::string> Reader::parseString( +std::pair<bool, std::string> VariantReader::parseString( CharReader &reader, Logger &logger, const std::unordered_set<char> *delims) { @@ -422,7 +424,7 @@ std::pair<bool, std::string> Reader::parseString( return error(reader, logger, ERR_UNEXPECTED_END, res.str()); } -std::pair<bool, Variant::arrayType> Reader::parseArray( +std::pair<bool, Variant::arrayType> VariantReader::parseArray( CharReader &reader, Logger &logger, char delim) { Variant::arrayType res; @@ -490,7 +492,7 @@ std::pair<bool, Variant::arrayType> Reader::parseArray( return error(reader, logger, ERR_UNEXPECTED_END, res); } -std::pair<bool, std::string> Reader::parseUnescapedString( +std::pair<bool, std::string> VariantReader::parseUnescapedString( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { @@ -527,7 +529,7 @@ std::pair<bool, std::string> Reader::parseUnescapedString( return std::make_pair(true, res.str()); } -std::pair<bool, int64_t> Reader::parseInteger( +std::pair<bool, int64_t> VariantReader::parseInteger( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { @@ -544,7 +546,7 @@ std::pair<bool, int64_t> Reader::parseInteger( return std::make_pair(false, n.intValue()); } -std::pair<bool, double> Reader::parseDouble( +std::pair<bool, double> VariantReader::parseDouble( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { @@ -553,7 +555,7 @@ std::pair<bool, double> Reader::parseDouble( return std::make_pair(res, n.doubleValue()); } -std::pair<bool, Variant> Reader::parseGeneric( +std::pair<bool, Variant> VariantReader::parseGeneric( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { @@ -582,7 +584,7 @@ std::pair<bool, Variant> Reader::parseGeneric( Number n; // Fork the reader - utils::CharReaderFork fork = reader.fork(); + CharReaderFork fork = reader.fork(); // TODO: Fork logger @@ -620,5 +622,4 @@ std::pair<bool, Variant> Reader::parseGeneric( return error(reader, logger, ERR_UNEXPECTED_END, nullptr); } } -} diff --git a/src/core/variant/Reader.hpp b/src/core/common/VariantReader.hpp index 4114d46..5e7c5d2 100644 --- a/src/core/variant/Reader.hpp +++ b/src/core/common/VariantReader.hpp @@ -17,7 +17,7 @@ */ /** - * @file Reader.hpp + * @file VariantReader.hpp * * Provides parsers for various micro formats. These formats include integers, * doubles, strings, JSON and the Ousía struct notation. @@ -32,23 +32,21 @@ #include <unordered_set> #include <utility> -#include <core/utils/CharReader.hpp> -#include <core/Logger.hpp> - +#include "CharReader.hpp" +#include "Logger.hpp" #include "Variant.hpp" namespace ousia { -namespace variant { -class Reader { +class VariantReader { private: /** * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting * character. * @param logger is the logger instance that should be used to log error * messages and warnings. @@ -58,7 +56,7 @@ private: * is read. */ static std::pair<bool, std::string> parseString( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> *delims); public: @@ -66,9 +64,9 @@ public: * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting * character. * @param logger is the logger instance that should be used to log error * messages and warnings. @@ -77,93 +75,92 @@ public: * outside). */ static std::pair<bool, std::string> parseString( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims) { - return parseString(reader, logger, &delims); + return parseString(VariantReader, logger, &delims); } /** * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting * character. * @param logger is the logger instance that should be used to log error * messages and warnings. */ - static std::pair<bool, std::string> parseString(CharReader &reader, + static std::pair<bool, std::string> parseString(CharReader &VariantReader, Logger &logger) { - return parseString(reader, logger, nullptr); + return parseString(VariantReader, logger, nullptr); } /** - * Extracts an unescaped string from the given buffered char reader + * Extracts an unescaped string from the given buffered char VariantReader * instance. This function just reads text until one of the given delimiter * characters is reached. * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned at the - * terminating delimiting character. + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. * @param delims is a set of characters which will terminate the string. * These characters are not included in the result. May not be nullptr. */ static std::pair<bool, std::string> parseUnescapedString( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); /** - * Parses an integer from the given buffered char reader instance until one - * of the given delimiter characters is reached. + * Parses an integer from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. * - * @param reader is a reference to the CharReader instance from - * which the character data should been reader. The reader will be - * positioned at the terminating delimiting character or directly after the - * integer. + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. */ static std::pair<bool, int64_t> parseInteger( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); /** - * Parses an double from the given buffered char reader instance until one - * of the given delimiter characters is reached. + * Parses an double from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. * - * @param reader is a reference to the CharReader instance from - * which the character data should been reader. The reader will be - * positioned at the terminating delimiting character or directly after the - * integer. + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. */ static std::pair<bool, double> parseDouble( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); /** * Parses an array of values. */ static std::pair<bool, Variant::arrayType> parseArray( - CharReader &reader, Logger &logger, char delim = 0); + CharReader &VariantReader, Logger &logger, char delim = 0); /** * Tries to parse the most specific item from the given stream until one of * the given delimiters is reached or a meaningful literal has been read. * The resulting variant represents the value that has been read. * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned at the - * terminating delimiting character. + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. * @param delims is a set of characters which will terminate the string. * These characters are not included in the result. May not be nullptr. */ static std::pair<bool, Variant> parseGeneric( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); }; } -} #endif /* _OUSIA_VARIANT_READER_HPP_ */ diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index 5dac956..e155cfd 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -32,10 +32,10 @@ #include <set> #include <string> -#include <core/Exceptions.hpp> #include <core/Node.hpp> -#include <core/Logger.hpp> #include <core/Registry.hpp> +#include <core/common/Exceptions.hpp> +#include <core/common/Logger.hpp> #include "Scope.hpp" diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp index dca7f35..5e801ee 100644 --- a/src/core/parser/ParserStack.cpp +++ b/src/core/parser/ParserStack.cpp @@ -20,8 +20,8 @@ #include "ParserStack.hpp" -#include <core/Utils.hpp> -#include <core/Exceptions.hpp> +#include <core/common/Utils.hpp> +#include <core/common/Exceptions.hpp> namespace ousia { namespace parser { diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp index c5ed4e4..233f4f9 100644 --- a/src/core/parser/ParserStack.hpp +++ b/src/core/parser/ParserStack.hpp @@ -37,7 +37,7 @@ #include <stack> #include <vector> -#include <core/variant/Variant.hpp> +#include <core/common/Variant.hpp> #include "Parser.hpp" diff --git a/src/plugins/css/CSSParser.cpp b/src/plugins/css/CSSParser.cpp index 4cbe93f..5985047 100644 --- a/src/plugins/css/CSSParser.cpp +++ b/src/plugins/css/CSSParser.cpp @@ -18,7 +18,7 @@ #include "CSSParser.hpp" -#include <core/variant/Reader.hpp> +#include <core/common/VariantReader.hpp> namespace ousia { namespace parser { @@ -77,7 +77,7 @@ static const std::map<int, CodeTokenDescriptor> CSS_DESCRIPTORS = { Rooted<Node> CSSParser::parse(std::istream &is, ParserContext &ctx) { - BufferedCharReader input{is}; + CharReader input{is}; CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS}; tokenizer.ignoreComments = true; tokenizer.ignoreLinebreaks = true; @@ -228,14 +228,14 @@ Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer, Variant::arrayType args; // we require at least one argument, if parantheses are used // XXX - /*args.push_back(variant::Reader::parseGeneric(tokenizer.getInput(), + args.push_back(VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, - {',', ')'}).second);*/ + {',', ')'}).second); while (expect(COMMA, tokenizer, t, false, ctx)) { // as long as we find commas we expect new arguments. - /*args.push_back( - variant::Reader::parseGeneric( - tokenizer.getInput(), ctx.logger, {',', ')'}).second);*/ + args.push_back( + VariantReader::parseGeneric( + tokenizer.getInput(), ctx.logger, {',', ')'}).second); } expect(PAREN_CLOSE, tokenizer, t, true, ctx); // and we return with the finished Selector. @@ -334,8 +334,8 @@ bool CSSParser::parseRule(CodeTokenizer &tokenizer, ParserContext &ctx, expect(COLON, tokenizer, t, true, ctx); // then the value // TODO: Resolve key for appropriate parsing function here. - /*value = variant::Reader::parseGeneric(tokenizer.getInput(), ctx.logger, - {';'}).second;*/ + value = VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, + {';'}).second; // and a ; expect(SEMICOLON, tokenizer, t, true, ctx); return true; diff --git a/src/plugins/css/CSSParser.hpp b/src/plugins/css/CSSParser.hpp index 82f0cd1..eeb5b2c 100644 --- a/src/plugins/css/CSSParser.hpp +++ b/src/plugins/css/CSSParser.hpp @@ -22,9 +22,9 @@ #include <vector> #include <utility> -#include <core/BufferedCharReader.hpp> #include <core/CodeTokenizer.hpp> #include <core/CSS.hpp> +#include <core/common/CharReader.hpp> #include <core/parser/Parser.hpp> namespace ousia { diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp index ce2857e..9a7b4d8 100644 --- a/src/plugins/xml/XmlParser.cpp +++ b/src/plugins/xml/XmlParser.cpp @@ -20,7 +20,7 @@ #include <expat.h> -#include <core/Utils.hpp> +#include <core/common/Utils.hpp> #include <core/parser/ParserStack.hpp> #include "XmlParser.hpp" diff --git a/test/core/BufferedCharReaderTest.cpp b/test/core/BufferedCharReaderTest.cpp deleted file mode 100644 index b3498f7..0000000 --- a/test/core/BufferedCharReaderTest.cpp +++ /dev/null @@ -1,185 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <string> -#include <iostream> - -#include "gtest/gtest.h" - -#include <core/BufferedCharReader.hpp> - -namespace ousia{ - -TEST(BufferedCharReaderTest, SimpleReadTest) -{ - std::string testStr{"this is a test"}; - char c; - - // Feed a test string into the reader - BufferedCharReader reader{testStr}; - - // Try to read the test string - std::string res; - while (!reader.atEnd()) { - ASSERT_TRUE(reader.read(&c)); - res.append(&c, 1); - } - - // The two strings must equal - ASSERT_STREQ(testStr.c_str(), res.c_str()) ; - - // We must now be at line 1, column 15 - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(testStr.size() + 1, reader.getColumn()); - - // If we call either read or peek, false is returned - ASSERT_FALSE(reader.read(&c)); - ASSERT_FALSE(reader.peek(&c)); -} - -TEST(BufferedCharReaderTest, SimplePeekTest) -{ - std::string testStr{"this is a test"}; - char c; - - // Feed a test string into the reader - BufferedCharReader reader{testStr}; - - // Try to read the test string - std::string res; - while (reader.peek(&c)) { - res.append(&c, 1); - } - - // Peeking does not trigger the "atEnd" flag - ASSERT_FALSE(reader.atEnd()); - - // The two strings must equal - ASSERT_STREQ(testStr.c_str(), res.c_str()); - - // We must now be at line 1, column 1 and NOT at the end of the stream - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(1, reader.getColumn()); - ASSERT_FALSE(reader.atEnd()); - - // If we consume the peek, we must be at line 1, column 15 and we should be - // at the end of the stream - reader.consumePeek(); - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(testStr.size() + 1, reader.getColumn()); - ASSERT_TRUE(reader.atEnd()); - - // If we call either read or peek, false is returned - ASSERT_FALSE(reader.read(&c)); - ASSERT_FALSE(reader.peek(&c)); -} - -TEST(BufferedCharReaderTest, SplittedPeakTest) -{ - std::string testStr{"this is a test"}; - char c; - - // Feed a test string into the reader - BufferedCharReader reader; - - // Try to peek the test string, feed char after char into the reader - std::string res; - for (unsigned int i = 0; i < testStr.length(); i++) { - reader.feed(std::string(&testStr[i], 1)); - while (reader.peek(&c)) { - res.append(&c, 1); - } - } - reader.close(); - - // Consume the peeked data - ASSERT_FALSE(reader.atEnd()); - reader.consumePeek(); - ASSERT_TRUE(reader.atEnd()); - - // The two strings must equal - ASSERT_STREQ(testStr.c_str(), res.c_str()) ; - - // We must now be at line 1, column 15 - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(testStr.size() + 1, reader.getColumn()); - - // If we call either read or peek, false is returned - ASSERT_FALSE(reader.read(&c)); - ASSERT_FALSE(reader.peek(&c)); -} - -TEST(BufferedCharReaderTest, RowColumnCounterTest) -{ - // Feed a test string into the reader - BufferedCharReader reader{"1\n\r2\n3\r\n\n4"}; - - // We should currently be in line 1, column 1 - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(1, reader.getColumn()); - - // Read two characters - char c; - for (int i = 0; i < 2; i++) reader.read(&c); - ASSERT_EQ(2, reader.getLine()); - ASSERT_EQ(1, reader.getColumn()); - - // Read two characters - for (int i = 0; i < 2; i++) reader.read(&c); - ASSERT_EQ(3, reader.getLine()); - ASSERT_EQ(1, reader.getColumn()); - - // Read three characters - for (int i = 0; i < 3; i++) reader.read(&c); - ASSERT_EQ(5, reader.getLine()); - ASSERT_EQ(1, reader.getColumn()); -} - -TEST(BufferedCharReaderTest, LinebreakSubstitutionTest) -{ - // Feed a test string into the reader - BufferedCharReader reader{"this\n\ris\n\rjust\na test\r\n\rtest\n\r"}; - - // Read all characters from the test string - std::string res; - char c; - while (reader.read(&c)) { - res.append(&c, 1); - } - - // Test for equality - ASSERT_STREQ("this\nis\njust\na test\n\ntest\n", res.c_str()); -} - -TEST(BufferedCharReaderTest, RowColumnCounterUTF8Test) -{ - // Feed a test string with some umlauts into the reader - BufferedCharReader reader{"\x61\xc3\x96\xc3\x84\xc3\x9c\xc3\x9f"}; - - // Read all bytes - char c; - while (reader.read(&c)); - - // The sequence above equals 5 UTF-8 characters (so after reading all the - // cursor is at position 6) - ASSERT_EQ(1, reader.getLine()); - ASSERT_EQ(6, reader.getColumn()); -} - -} - diff --git a/test/core/CodeTokenizerTest.cpp b/test/core/CodeTokenizerTest.cpp index 1432564..4d11622 100644 --- a/test/core/CodeTokenizerTest.cpp +++ b/test/core/CodeTokenizerTest.cpp @@ -32,15 +32,15 @@ static const int CURLY_CLOSE = 41; TEST(CodeTokenizer, testTokenizer) { - BufferedCharReader reader; - reader.feed("/**\n"); // 1 - reader.feed(" * Some Block Comment\n"); // 2 - reader.feed(" */\n"); // 3 - reader.feed("var my_string = 'My \\'String\\'';\n"); // 4 - reader.feed("// and a line comment\n"); // 5 - reader.feed("var my_obj = { a = 4;}"); // 6 - // 123456789012345678901234567890123456789 - // 0 1 2 3 + CharReader reader{ + "/**\n" // 1 + " * Some Block Comment\n" // 2 + " */\n" // 3 + "var my_string = 'My \\'String\\'';\n" // 4 + "// and a line comment\n" // 5 + "var my_obj = { a = 4;}"}; // 6 + // 123456789012345678901234567890123456789 + // 0 1 2 3 TokenTreeNode root{{{"/*", 1}, {"*/", 2}, {"//", 3}, @@ -68,10 +68,10 @@ TEST(CodeTokenizer, testTokenizer) {STRING, "My 'String'", 17, 4, 32, 4}, {TOKEN_TEXT, ";", 32, 4, 33, 4}, {LINEBREAK, "\n", 33, 4, 1, 5}, - //this is slightly counter-intuitive but makes sense if you think about - //it: As a line comment is ended by a line break the line break is - //technically still a part of the line comment and thus the ending - //is in the next line. + // this is slightly counter-intuitive but makes sense if you think about + // it: As a line comment is ended by a line break the line break is + // technically still a part of the line comment and thus the ending + // is in the next line. {LINE_COMMENT, " and a line comment", 1, 5, 1, 6}, {TOKEN_TEXT, "var", 1, 6, 4, 6}, {TOKEN_TEXT, "my_obj", 5, 6, 11, 6}, diff --git a/test/core/RegistryTest.cpp b/test/core/RegistryTest.cpp index e06011a..22365f2 100644 --- a/test/core/RegistryTest.cpp +++ b/test/core/RegistryTest.cpp @@ -22,7 +22,7 @@ #include <sstream> -#include <core/Logger.hpp> +#include <core/common/Logger.hpp> namespace ousia { diff --git a/test/core/TokenizerTest.cpp b/test/core/TokenizerTest.cpp index da6b578..2b80662 100644 --- a/test/core/TokenizerTest.cpp +++ b/test/core/TokenizerTest.cpp @@ -18,7 +18,7 @@ #include <gtest/gtest.h> -#include <core/BufferedCharReader.hpp> +#include <core/common/CharReader.hpp> #include <core/Tokenizer.hpp> @@ -65,10 +65,9 @@ TEST(Tokenizer, testTokenization) { TokenTreeNode root{{{"/", 1}, {"/*", 2}, {"*/", 3}}}; - BufferedCharReader reader; - reader.feed("Test/Test /* Block Comment */"); - // 12345678901234567890123456789 - // 0 1 2 + CharReader reader{"Test/Test /* Block Comment */"}; + // 12345678901234567890123456789 + // 0 1 2 std::vector<Token> expected = { {TOKEN_TEXT, "Test", 1, 1, 5, 1}, @@ -97,10 +96,7 @@ TEST(Tokenizer, testIncompleteTokens) { TokenTreeNode root{{{"ab", 1}, {"c", 2}}}; - BufferedCharReader reader; - reader.feed("ac"); - // 1234567890 - // 0 1 + CharReader reader{"ac"}; std::vector<Token> expected = { {TOKEN_TEXT, "a", 1, 1, 2, 1}, diff --git a/test/core/utils/CharReaderTest.cpp b/test/core/common/CharReaderTest.cpp index eb04a8e..06b9d45 100644 --- a/test/core/utils/CharReaderTest.cpp +++ b/test/core/common/CharReaderTest.cpp @@ -22,10 +22,9 @@ #include "gtest/gtest.h" -#include <core/utils/CharReader.hpp> +#include <core/common/CharReader.hpp> namespace ousia { -namespace utils { /* Test data */ @@ -819,5 +818,4 @@ TEST(CharReaderTest, context) } } } -} diff --git a/test/core/LoggerTest.cpp b/test/core/common/LoggerTest.cpp index abb76de..54c67f9 100644 --- a/test/core/LoggerTest.cpp +++ b/test/core/common/LoggerTest.cpp @@ -20,7 +20,7 @@ #include <gtest/gtest.h> -#include <core/Logger.hpp> +#include <core/common/Logger.hpp> namespace ousia { diff --git a/test/core/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index 0a7d2a3..2858038 100644 --- a/test/core/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -18,7 +18,7 @@ #include <gtest/gtest.h> -#include <core/Utils.hpp> +#include <core/common/Utils.hpp> namespace ousia { diff --git a/test/core/variant/ReaderTest.cpp b/test/core/common/VariantReaderTest.cpp index 43e85a5..d9bb74e 100644 --- a/test/core/variant/ReaderTest.cpp +++ b/test/core/common/VariantReaderTest.cpp @@ -19,7 +19,7 @@ #include <iostream> #include <gtest/gtest.h> -#include <core/variant/Reader.hpp> +#include <core/common/VariantReader.hpp> namespace ousia { namespace variant { @@ -32,7 +32,7 @@ TEST(Reader, readString) // Simple, double quoted string { CharReader reader("\"hello world\""); - auto res = Reader::parseString(reader, logger); + auto res = VariantReader::parseString(reader, logger); ASSERT_TRUE(res.first); ASSERT_EQ("hello world", res.second); } @@ -40,7 +40,7 @@ TEST(Reader, readString) // Simple, double quoted string with whitespace { CharReader reader(" \"hello world\" "); - auto res = Reader::parseString(reader, logger); + auto res = VariantReader::parseString(reader, logger); ASSERT_TRUE(res.first); ASSERT_EQ("hello world", res.second); } @@ -48,7 +48,7 @@ TEST(Reader, readString) // Simple, single quoted string { CharReader reader("'hello world'"); - auto res = Reader::parseString(reader, logger); + auto res = VariantReader::parseString(reader, logger); ASSERT_TRUE(res.first); ASSERT_EQ("hello world", res.second); } @@ -56,7 +56,7 @@ TEST(Reader, readString) // Escape characters { CharReader reader("'\\'\\\"\\b\\f\\n\\r\\t\\v'"); - auto res = Reader::parseString(reader, logger); + auto res = VariantReader::parseString(reader, logger); ASSERT_TRUE(res.first); ASSERT_EQ("'\"\b\f\n\r\t\v", res.second); } @@ -67,7 +67,7 @@ TEST(Reader, parseUnescapedString) // Simple case { CharReader reader("hello world;"); - auto res = Reader::parseUnescapedString(reader, logger, {';'}); + auto res = VariantReader::parseUnescapedString(reader, logger, {';'}); ASSERT_TRUE(res.first); ASSERT_EQ("hello world", res.second); } @@ -75,7 +75,7 @@ TEST(Reader, parseUnescapedString) // Simple case with whitespace { CharReader reader(" hello world ; "); - auto res = Reader::parseUnescapedString(reader, logger, {';'}); + auto res = VariantReader::parseUnescapedString(reader, logger, {';'}); ASSERT_TRUE(res.first); ASSERT_EQ("hello world", res.second); } @@ -83,7 +83,7 @@ TEST(Reader, parseUnescapedString) // Linebreaks { CharReader reader(" hello\nworld ; "); - auto res = Reader::parseUnescapedString(reader, logger, {';'}); + auto res = VariantReader::parseUnescapedString(reader, logger, {';'}); ASSERT_TRUE(res.first); ASSERT_EQ("hello\nworld", res.second); } @@ -91,7 +91,7 @@ TEST(Reader, parseUnescapedString) // End of stream { CharReader reader(" hello world "); - auto res = Reader::parseUnescapedString(reader, logger, {';'}); + auto res = VariantReader::parseUnescapedString(reader, logger, {';'}); ASSERT_TRUE(res.first); ASSERT_EQ("hello world", res.second); } @@ -104,49 +104,49 @@ TEST(Reader, parseInteger) // Valid integers { CharReader reader("0 "); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(0, res.second); } { CharReader reader("42 "); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(42, res.second); } { CharReader reader("-42"); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(-42, res.second); } { CharReader reader(" -0x4A2 "); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(-0x4A2, res.second); } { CharReader reader(" 0Xaffe"); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(0xAFFE, res.second); } { CharReader reader("0x7FFFFFFFFFFFFFFF"); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(0x7FFFFFFFFFFFFFFFL, res.second); } { CharReader reader("-0x7FFFFFFFFFFFFFFF"); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(-0x7FFFFFFFFFFFFFFFL, res.second); } @@ -154,25 +154,25 @@ TEST(Reader, parseInteger) // Invalid integers { CharReader reader("-"); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_FALSE(res.first); } { CharReader reader("0a"); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_FALSE(res.first); } { CharReader reader("-0xag"); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_FALSE(res.first); } { CharReader reader("0x8000000000000000"); - auto res = Reader::parseInteger(reader, logger, noDelim); + auto res = VariantReader::parseInteger(reader, logger, noDelim); ASSERT_FALSE(res.first); } } @@ -182,49 +182,49 @@ TEST(Reader, parseDouble) // Valid doubles { CharReader reader("1.25"); - auto res = Reader::parseDouble(reader, logger, noDelim); + auto res = VariantReader::parseDouble(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(1.25, res.second); } { CharReader reader(".25"); - auto res = Reader::parseDouble(reader, logger, noDelim); + auto res = VariantReader::parseDouble(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(.25, res.second); } { CharReader reader(".25e1"); - auto res = Reader::parseDouble(reader, logger, noDelim); + auto res = VariantReader::parseDouble(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(2.5, res.second); } { CharReader reader("-2.5e-1"); - auto res = Reader::parseDouble(reader, logger, noDelim); + auto res = VariantReader::parseDouble(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(-0.25, res.second); } { CharReader reader("-50e-2"); - auto res = Reader::parseDouble(reader, logger, noDelim); + auto res = VariantReader::parseDouble(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(-0.5, res.second); } { CharReader reader("-1."); - auto res = Reader::parseDouble(reader, logger, noDelim); + auto res = VariantReader::parseDouble(reader, logger, noDelim); ASSERT_TRUE(res.first); ASSERT_EQ(-1., res.second); } { CharReader reader("-50.e-2"); - auto res = Reader::parseDouble(reader, logger, {'.'}); + auto res = VariantReader::parseDouble(reader, logger, {'.'}); ASSERT_TRUE(res.first); ASSERT_EQ(-50, res.second); } @@ -232,13 +232,13 @@ TEST(Reader, parseDouble) // Invalid doubles { CharReader reader(".e1"); - auto res = Reader::parseDouble(reader, logger, noDelim); + auto res = VariantReader::parseDouble(reader, logger, noDelim); ASSERT_FALSE(res.first); } { CharReader reader("0e100000"); - auto res = Reader::parseDouble(reader, logger, noDelim); + auto res = VariantReader::parseDouble(reader, logger, noDelim); ASSERT_FALSE(res.first); } } @@ -249,7 +249,7 @@ TEST(Reader, parseArray) { CharReader reader("[\"Hello, World\", unescaped\n string ,\n" "1234, 0.56, true, false, null]"); - auto res = Reader::parseArray(reader, logger); + auto res = VariantReader::parseArray(reader, logger); ASSERT_TRUE(res.first); // Make sure array has the correct size @@ -276,7 +276,7 @@ TEST(Reader, parseArray) // Ending with comma { CharReader reader("[ 'test' ,]"); - auto res = Reader::parseArray(reader, logger); + auto res = VariantReader::parseArray(reader, logger); ASSERT_TRUE(res.first); // Make sure the array has the correct size @@ -292,7 +292,7 @@ TEST(Reader, parseArray) // Recovery from invalid values { CharReader reader("[ 0invalidNumber, str, 1invalid]"); - auto res = Reader::parseArray(reader, logger); + auto res = VariantReader::parseArray(reader, logger); ASSERT_TRUE(res.first); // Make sure the array has the correct size @@ -315,7 +315,7 @@ TEST(Reader, parseGeneric) // Simple case, unescaped string { CharReader reader("hello world"); - auto res = Reader::parseGeneric(reader, logger, {';'}); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); ASSERT_TRUE(res.first); ASSERT_TRUE(res.second.isString()); ASSERT_EQ("hello world", res.second.asString()); @@ -324,7 +324,7 @@ TEST(Reader, parseGeneric) // Simple case, double quoted string { CharReader reader(" \"hello world\" "); - auto res = Reader::parseGeneric(reader, logger, {';'}); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); ASSERT_TRUE(res.first); ASSERT_TRUE(res.second.isString()); ASSERT_EQ("hello world", res.second.asString()); @@ -333,7 +333,7 @@ TEST(Reader, parseGeneric) // Simple case, single quoted string { CharReader reader(" 'hello world' "); - auto res = Reader::parseGeneric(reader, logger, {';'}); + auto res = VariantReader::parseGeneric(reader, logger, {';'}); ASSERT_TRUE(res.first); ASSERT_TRUE(res.second.isString()); ASSERT_EQ("hello world", res.second.asString()); diff --git a/test/core/variant/VariantTest.cpp b/test/core/common/VariantTest.cpp index e51cf36..580846e 100644 --- a/test/core/variant/VariantTest.cpp +++ b/test/core/common/VariantTest.cpp @@ -20,7 +20,7 @@ #include <gtest/gtest.h> -#include <core/variant/Variant.hpp> +#include <core/common/Variant.hpp> namespace ousia { diff --git a/test/plugins/css/CSSParserTest.cpp b/test/plugins/css/CSSParserTest.cpp index 6499375..3ea3a19 100644 --- a/test/plugins/css/CSSParserTest.cpp +++ b/test/plugins/css/CSSParserTest.cpp @@ -186,11 +186,11 @@ TEST(CSSParser, testParseCSS) { Rooted<RuleSet> ruleSet = A->getRuleSet(); ASSERT_EQ(2, ruleSet->getRules().size()); - variant::Variant v = ruleSet->getRules()["ident1"]; - ASSERT_EQ(variant::Variant::Type::STRING, v.getType()); + Variant v = ruleSet->getRules()["ident1"]; + ASSERT_EQ(Variant::Type::STRING, v.getType()); ASSERT_EQ("val4", v.asString()); v = ruleSet->getRules()["ident2"]; - ASSERT_EQ(variant::Variant::Type::STRING, v.getType()); + ASSERT_EQ(Variant::Type::STRING, v.getType()); ASSERT_EQ("val2", v.asString()); } /* @@ -211,8 +211,8 @@ TEST(CSSParser, testParseCSS) { Rooted<RuleSet> ruleSet = Aselect->getRuleSet(); ASSERT_EQ(1, ruleSet->getRules().size()); - variant::Variant v = ruleSet->getRules()["ident3"]; - ASSERT_EQ(variant::Variant::Type::STRING, v.getType()); + Variant v = ruleSet->getRules()["ident3"]; + ASSERT_EQ(Variant::Type::STRING, v.getType()); ASSERT_EQ("val3", v.asString()); } /* @@ -250,11 +250,11 @@ TEST(CSSParser, testParseCSS) { Rooted<RuleSet> ruleSet = BA->getRuleSet(); ASSERT_EQ(2, ruleSet->getRules().size()); - variant::Variant v = ruleSet->getRules()["ident1"]; - ASSERT_EQ(variant::Variant::Type::STRING, v.getType()); + Variant v = ruleSet->getRules()["ident1"]; + ASSERT_EQ(Variant::Type::STRING, v.getType()); ASSERT_EQ("val1", v.asString()); v = ruleSet->getRules()["ident2"]; - ASSERT_EQ(variant::Variant::Type::STRING, v.getType()); + ASSERT_EQ(Variant::Type::STRING, v.getType()); ASSERT_EQ("val2", v.asString()); } } |