diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-11 15:26:50 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2014-12-11 15:26:50 +0100 |
commit | 3f62168ed0b088eec3cb2903f03966f7d501f564 (patch) | |
tree | 781f5bd9b304d9eb931827a26f463575d772983d /src | |
parent | b74936760e28a92cadfaec47928ea478fe2d72ee (diff) |
moved to CharReader everywhere
Diffstat (limited to 'src')
-rw-r--r-- | src/core/BufferedCharReader.cpp | 263 | ||||
-rw-r--r-- | src/core/BufferedCharReader.hpp | 288 | ||||
-rw-r--r-- | src/core/CSS.hpp | 2 | ||||
-rw-r--r-- | src/core/CodeTokenizer.hpp | 8 | ||||
-rw-r--r-- | src/core/Registry.cpp | 3 | ||||
-rw-r--r-- | src/core/Tokenizer.cpp | 12 | ||||
-rw-r--r-- | src/core/Tokenizer.hpp | 27 | ||||
-rw-r--r-- | src/core/common/CharReader.cpp (renamed from src/core/utils/CharReader.cpp) | 5 | ||||
-rw-r--r-- | src/core/common/CharReader.hpp (renamed from src/core/utils/CharReader.hpp) | 7 | ||||
-rw-r--r-- | src/core/common/Exceptions.cpp (renamed from src/core/Exceptions.cpp) | 0 | ||||
-rw-r--r-- | src/core/common/Exceptions.hpp (renamed from src/core/Exceptions.hpp) | 0 | ||||
-rw-r--r-- | src/core/common/Logger.cpp (renamed from src/core/Logger.cpp) | 0 | ||||
-rw-r--r-- | src/core/common/Logger.hpp (renamed from src/core/Logger.hpp) | 0 | ||||
-rw-r--r-- | src/core/common/Utils.cpp (renamed from src/core/Utils.cpp) | 0 | ||||
-rw-r--r-- | src/core/common/Utils.hpp (renamed from src/core/Utils.hpp) | 0 | ||||
-rw-r--r-- | src/core/common/Variant.cpp (renamed from src/core/variant/Variant.cpp) | 3 | ||||
-rw-r--r-- | src/core/common/Variant.hpp (renamed from src/core/variant/Variant.hpp) | 7 | ||||
-rw-r--r-- | src/core/common/VariantReader.cpp (renamed from src/core/variant/Reader.cpp) | 247 | ||||
-rw-r--r-- | src/core/common/VariantReader.hpp (renamed from src/core/variant/Reader.hpp) | 87 | ||||
-rw-r--r-- | src/core/parser/Parser.hpp | 4 | ||||
-rw-r--r-- | src/core/parser/ParserStack.cpp | 4 | ||||
-rw-r--r-- | src/core/parser/ParserStack.hpp | 2 | ||||
-rw-r--r-- | src/plugins/css/CSSParser.cpp | 18 | ||||
-rw-r--r-- | src/plugins/css/CSSParser.hpp | 2 | ||||
-rw-r--r-- | src/plugins/xml/XmlParser.cpp | 2 |
25 files changed, 211 insertions, 780 deletions
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp deleted file mode 100644 index aeedf12..0000000 --- a/src/core/BufferedCharReader.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <array> - -#include "Utils.hpp" - -#include "BufferedCharReader.hpp" - -namespace ousia { - -// Constants used within the linebreak statemachine. -static const uint8_t LB_STATE_NONE = 0x00; -static const uint8_t LB_STATE_ONE = 0x01; -static const uint8_t LB_STATE_LF = 0x10; -static const uint8_t LB_STATE_CR = 0x20; -static const uint8_t LB_STATE_MASK_CNT = 0x0F; -static const uint8_t LB_STATE_MASK_TYPE = 0xF0; - -/* Struct BufferedCharReader::ReadCursor */ - -BufferedCharReader::ReadCursor::ReadCursor(unsigned int line, - unsigned int column, - bool destructive) - : line(line), - column(column), - bufferElem(0), - bufferPos(0), - destructive(destructive), - lbState(LB_STATE_NONE) -{ -} - -void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) -{ - this->line = cursor.line; - this->column = cursor.column; - this->bufferElem = cursor.bufferElem; - this->bufferPos = cursor.bufferPos; - this->lbState = cursor.lbState; -} - -/* Class BufferedCharReader */ - -BufferedCharReader::BufferedCharReader(int line, int column) - : inputStream(nullptr), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(false) -{ -} - -BufferedCharReader::BufferedCharReader(const std::string &str, int line, - int column) - : inputStream(nullptr), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(true) -{ - buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(const std::string &str) - : inputStream(nullptr), - readCursor(1, 1, true), - peekCursor(1, 1, false), - depleted(true) -{ - buffer.push_back(str); -} - -BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line, - int column) - : inputStream(&inputStream), - readCursor(line, column, true), - peekCursor(line, column, false), - depleted(false) -{ -} - -void BufferedCharReader::feed(const std::string &data) -{ - if (!depleted && !inputStream) { - buffer.push_back(data); - } -} - -void BufferedCharReader::close() -{ - if (!inputStream) { - depleted = true; - } -} - -bool BufferedCharReader::substituteLinebreaks(ReadCursor &cursor, char *c) -{ - // Handle line breaks, inserts breakes after the following character - // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n - if ((*c == '\n') || (*c == '\r')) { - // Determine the type of the current linebreak character - const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; - - // Read the last count and the last type from the state - const uint8_t lastCount = cursor.lbState & LB_STATE_MASK_CNT; - const uint8_t lastType = cursor.lbState & LB_STATE_MASK_TYPE; - - // Set the current linebreak type and counter in the state - cursor.lbState = ((lastCount + 1) & 1) | type; - - // If either this is the first instance of this character or the same - // return character is repeated - if (!lastCount || (lastType == type)) { - *c = '\n'; - return true; - } - return false; - } - - // Find the state - cursor.lbState = LB_STATE_NONE; - return true; -} - -bool BufferedCharReader::readCharacterAtCursor(ReadCursor &cursor, char *c) -{ - bool hasChar = false; - while (!hasChar) { - // Abort if the current buffer element does not point to a valid entry - // in the buffer -- we must try to feed another data block into the - // internal buffer - if (cursor.bufferElem >= buffer.size()) { - // Abort if there is no more data or no input stream is set - if (depleted || !inputStream) { - return false; - } - - // Read a buffer of the specified size - constexpr std::streamsize BUFFER_SIZE = 1024; - std::array<char, BUFFER_SIZE> buf; - const std::streamsize cnt = - (*inputStream).read(buf.data(), BUFFER_SIZE).gcount(); - - // If data has been read, append it to the input buffer and try - // again - if (cnt > 0) { - buffer.emplace_back(std::string(buf.data(), cnt)); - continue; - } - - // End of file handling - if (inputStream->fail() || inputStream->eof()) { - depleted = true; - return false; - } - } - - // Fetch the current element the peek pointer points to - const std::string &data = buffer[cursor.bufferElem]; - - // Handle the "no data" case -- either in a destructive or - // non-destructive manner. - if (cursor.bufferPos >= data.length()) { - if (cursor.destructive) { - buffer.pop_front(); - } else { - cursor.bufferElem++; - } - cursor.bufferPos = 0; - continue; - } - - // Read the character, advance the buffer position - *c = *(data.data() + cursor.bufferPos); - cursor.bufferPos++; - - // Substitute linebreaks with a single LF (0x0A) - hasChar = substituteLinebreaks(cursor, c); - } - - // Update the position counter - if (*c == '\n') { - cursor.line++; - cursor.column = 1; - } else { - // Ignore UTF-8 continuation bytes - if (!((*c & 0x80) && !(*c & 0x40))) { - cursor.column++; - } - } - - return true; -} - -bool BufferedCharReader::peek(char *c) -{ - return readCharacterAtCursor(peekCursor, c); -} - -bool BufferedCharReader::read(char *c) -{ - resetPeek(); - return readCharacterAtCursor(readCursor, c); -} - -void BufferedCharReader::consumePeek() -{ - // Remove all no longer needed buffer elements - for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { - buffer.pop_front(); - } - peekCursor.bufferElem = 0; - - // Copy the peek cursor to the read cursor - readCursor.assign(peekCursor); -} - -bool BufferedCharReader::consumeWhitespace() -{ - char c; - while (peek(&c)) { - if (!Utils::isWhitespace(c)) { - resetPeek(); - return true; - } - consumePeek(); - } - return false; -} - -void BufferedCharReader::resetPeek() -{ - // Reset the peek cursor to the read cursor - peekCursor.assign(readCursor); -} - -bool BufferedCharReader::atEnd() const -{ - if (depleted || !inputStream) { - if (buffer.size() <= 0) { - return true; - } else if (buffer.size() == 1) { - return buffer[0].size() == readCursor.bufferPos; - } - } - return false; -} -} - diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp deleted file mode 100644 index e7f3186..0000000 --- a/src/core/BufferedCharReader.hpp +++ /dev/null @@ -1,288 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - * @file BufferedCharReader.hpp - * - * Contains the BufferedCharReader class which is used for reading/peeking - * single characters from an input stream or string. - * - * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) - */ - -#ifndef _OUSIA_BUFFERED_CHAR_READER_H_ -#define _OUSIA_BUFFERED_CHAR_READER_H_ - -#include <deque> -#include <string> -#include <istream> -#include <cstdint> - -namespace ousia { - -// TODO: Better split this class into multiple classes with base class -// BufferedCharReader where each sub class represents one method of supplying -// the input data (feeding, initial string, input stream). - -/** - * The BufferedCharReader class is used for storing incomming data that - * is fed into the pipeline as well as reading/peeking single characters - * from that buffer. Additionally it counts the current column/row - * (with correct handling for UTF-8) and contains an internal state - * machine that handles the detection of linebreaks and converts these to a - * single '\n'. - */ -class BufferedCharReader { -private: - /** - * The ReadCursor structure is responsible for representing the read - * position within the text an all state machine states belonging to the - * cursor. There are two types of read cursors: destructive and - * non-destructive read cursors. - */ - struct ReadCursor { - /** - * The line the cursor currently points to. - */ - unsigned int line; - - /** - * The column the cursor currently points to. - */ - unsigned int column; - - /** - * The index of the element in the data buffer we're currently reading - * from. - */ - unsigned int bufferElem; - - /** - * The byte position within this data buffer. - */ - unsigned int bufferPos; - - /** - * Specifies whether this is a destructive cursor (bytes are discarded - * once they were read from the buffer). - */ - const bool destructive; - - /** - * State variable used in the internal state machine of the - * line feed detection. - */ - uint8_t lbState; - - /** - * Constructor of the ReadCursor structure. - * - * @param line is the start line. - * @param column is the start column. - * @param destructive specifies whether the ReadCursor is destructive - * (consumes all read characters, as used in the "read cursor") or - * non-destructive (as used in the "peek cursor"). - */ - ReadCursor(unsigned int line, unsigned int column, bool destructive); - - /** - * Copys the data from another ReadCursor without overriding the - * "destructive" flag. - * - * @param cursor is the cursor that should be copied. - */ - void assign(const ReadCursor &cursor); - }; - - /** - * Pointer at an (optional) input stream used for reading a chunk of data - * whenever the input buffer depletes. - */ - std::istream *inputStream; - - /** - * The read and the peek cursor. - */ - ReadCursor readCursor, peekCursor; - - /** - * Set to true if there is no more input data. - */ - bool depleted; - - /** - * Queue containing the data that has been fed into the char reader. - */ - std::deque<std::string> buffer; - - /** - * Substitute any combination of linebreaks in the incomming code with "\n". - * Returns true if the current character is meant as output, false - * otherwise. - */ - bool substituteLinebreaks(ReadCursor &cursor, char *c); - - /** - * Reads a character from the input buffer and advances the given read - * cursor. - * - * @param cursor is a reference to the read cursor that should be used - * for reading. - * @param hasChar is set to true, if a character is available, false if - * no character is available (e.g. because line breaks are substituted or - * the end of a buffer boundary is reached -- in this case this function - * should be called again with the same parameters.) - * @param c is a output parameter, which will be set to the read character. - * @param returns true if there was enough data in the buffer, false - * otherwise. - */ - bool readCharacterAtCursor(ReadCursor &cursor, char *c); - - /** - * Function that is called for each read character -- updates the row and - * column count. - */ - void updatePositionCounters(const char c); - -public: - - /** - * Constructor of the buffered char reader class with empty buffer as input. - * This operates the BufferedCharReader in a mode where new data has to be - * fed using the "feed" function and explicitly closed using the "close" - * function. - * - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(int line = 1, int column = 1); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param str is a string containing the input data. - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(const std::string &str, int line, int column); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param str is a string containing the input data. - */ - BufferedCharReader(const std::string &str); - - /** - * Constructor of the buffered char reader class with a string as input. - * - * @param inputStream is the input stream from which incomming data should - * be read. - * @param line is the start line. - * @param column is the start column. - */ - BufferedCharReader(std::istream &inputStream, int line = 1, int column = 1); - - /** - * Peeks a single character. If called multiple times, returns the - * character after the previously peeked character. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool peek(char *c); - - /** - * Reads a character from the input data. If "peek" was called - * beforehand resets the peek pointer. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool read(char *c); - - /** - * Advances the read pointer to the peek pointer -- so if the "peek" - * function was called, "read" will now return the character after - * the last peeked character. - */ - void consumePeek(); - - /** - * Moves the read cursor to the next non-whitespace character. Returns - * false, if the end of the stream was reached. - * - * @return false if the end of the stream was reached, false othrwise. - */ - bool consumeWhitespace(); - - /** - * Resets the peek pointer to the "read" pointer. - */ - void resetPeek(); - - /** - * Feeds new data into the internal buffer of the BufferedCharReader - * class. Only applicable if the buffered char reader was constructed - * without an input stream or string. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - */ - void feed(const std::string &data); - - /** - * Tells the buffered char reader that no more data will be fed. - * Only applicable if the buffered char reader was constructed without an - * input stream or string. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - */ - void close(); - - /** - * Returns true if there are no more characters as the stream was - * closed. - * - * @return true if there is no more data. - */ - bool atEnd() const; - - /** - * Returns the current line (starting with one). - * - * @return the current line number. - */ - int getLine() const { return readCursor.line; } - - /** - * Returns the current column (starting with one). - * - * @return the current column number. - */ - int getColumn() const { return readCursor.column; } -}; -} - -#endif /* _OUSIA_BUFFERED_CHAR_READER_H_ */ - diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp index 1510f3a..a54d956 100644 --- a/src/core/CSS.hpp +++ b/src/core/CSS.hpp @@ -23,7 +23,7 @@ #include <vector> #include <tuple> -#include <core/variant/Variant.hpp> +#include <core/common/Variant.hpp> #include "Managed.hpp" #include "Node.hpp" diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp index 43c7abb..4190297 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/core/CodeTokenizer.hpp @@ -22,7 +22,7 @@ #include <map> #include <sstream> -#include "BufferedCharReader.hpp" +#include <core/common/CharReader.hpp> #include "Tokenizer.hpp" namespace ousia { @@ -108,8 +108,8 @@ public: /** * - * @param input a BufferedCharReader containing the input for this - * tokenizer, as with a regular tokenizer. + * @param input a CharReader containing the input for this tokenizer, as + * with a regular tokenizer. * @param root a TokenTreeNode representing the root of the TokenTree. * Please note that you have to specify all tokenIDs here that you use * in the descriptors map. @@ -120,7 +120,7 @@ public: * and this CodeTokenizer would recognize the token "//" as starting a * line comment. */ - CodeTokenizer(BufferedCharReader &input, const TokenTreeNode &root, + CodeTokenizer(CharReader &input, const TokenTreeNode &root, std::map<int, CodeTokenDescriptor> descriptors) : Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL) { diff --git a/src/core/Registry.cpp b/src/core/Registry.cpp index 6ff9594..74d1cf8 100644 --- a/src/core/Registry.cpp +++ b/src/core/Registry.cpp @@ -16,8 +16,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <core/Logger.hpp> - +#include <core/common/Logger.hpp> #include <core/parser/Parser.hpp> namespace ousia { diff --git a/src/core/Tokenizer.cpp b/src/core/Tokenizer.cpp index b99d1ed..0af5f5a 100644 --- a/src/core/Tokenizer.cpp +++ b/src/core/Tokenizer.cpp @@ -72,7 +72,7 @@ TokenTreeNode::TokenTreeNode(const std::map<std::string, int> &inputs) { } -Tokenizer::Tokenizer(BufferedCharReader &input, const TokenTreeNode &root) +Tokenizer::Tokenizer(CharReader &input, const TokenTreeNode &root) : input(input), root(root) { } @@ -81,10 +81,10 @@ bool Tokenizer::prepare() { std::stringstream buffer; char c; - int startColumn = input.getColumn(); - int startLine = input.getLine(); + uint32_t startColumn = input.getColumn(); + uint32_t startLine = input.getLine(); bool bufEmpty = true; - while (input.peek(&c)) { + while (input.peek(c)) { if (root.children.find(c) != root.children.end()) { // if there might be a special token, keep peeking forward // until we find the token (or we don't). @@ -107,7 +107,7 @@ bool Tokenizer::prepare() input.consumePeek(); } } - if (!input.peek(&c)) { + if (!input.peek(c)) { // if we are at the end we break off the search. break; } @@ -153,7 +153,7 @@ bool Tokenizer::prepare() } } else{ //if we found nothing, read at least one character. - input.peek(&c); + input.peek(c); } } buffer << c; diff --git a/src/core/Tokenizer.hpp b/src/core/Tokenizer.hpp index 8f80150..33327cc 100644 --- a/src/core/Tokenizer.hpp +++ b/src/core/Tokenizer.hpp @@ -19,11 +19,12 @@ #ifndef _OUSIA_TOKENIZER_HPP_ #define _OUSIA_TOKENIZER_HPP_ +#include <cstdint> +#include <deque> #include <istream> #include <map> -#include <deque> -#include "BufferedCharReader.hpp" +#include <core/common/CharReader.hpp> namespace ousia { @@ -120,13 +121,13 @@ static const int TOKEN_TEXT = -2; struct Token { int tokenId; std::string content; - int startColumn; - int startLine; - int endColumn; - int endLine; + uint32_t startColumn; + uint32_t startLine; + uint32_t endColumn; + uint32_t endLine; - Token(int tokenId, std::string content, int startColumn, int startLine, - int endColumn, int endLine) + Token(int tokenId, std::string content, uint32_t startColumn, uint32_t startLine, + uint32_t endColumn, uint32_t endLine) : tokenId(tokenId), content(content), startColumn(startColumn), @@ -160,7 +161,7 @@ struct Token { */ class Tokenizer { private: - BufferedCharReader &input; + CharReader &input; const TokenTreeNode &root; std::deque<Token> peeked; unsigned int peekCursor = 0; @@ -185,14 +186,14 @@ protected: public: /** * @param input The input of a Tokenizer is given in the form of a - * BufferedCharReader. Please refer to the respective documentation. + * CharReader. Please refer to the respective documentation. * @param root This is meant to be the root of a TokenTree giving the * specification of user-defined tokens this Tokenizer should recognize. * The Tokenizer promises to not change the TokenTree such that you can * re-use the same specification for multiple inputs. * Please refer to the TokenTreeNode documentation for more information. */ - Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); + Tokenizer(CharReader &input, const TokenTreeNode &root); /** * The next method consumes one Token from the input stream and gives @@ -224,9 +225,9 @@ public: */ void consumePeek(); - const BufferedCharReader &getInput() const { return input; } + const CharReader &getInput() const { return input; } - BufferedCharReader &getInput() { return input; } + CharReader &getInput() { return input; } }; } diff --git a/src/core/utils/CharReader.cpp b/src/core/common/CharReader.cpp index 61616d7..373c0c1 100644 --- a/src/core/utils/CharReader.cpp +++ b/src/core/common/CharReader.cpp @@ -21,12 +21,10 @@ #include <limits> #include <sstream> -#include <core/Utils.hpp> - #include "CharReader.hpp" +#include "Utils.hpp" namespace ousia { -namespace utils { /* Helper functions */ @@ -639,5 +637,4 @@ void CharReaderFork::commit() parentPeekCursor.assign(buffer, peekCursor); } } -} diff --git a/src/core/utils/CharReader.hpp b/src/core/common/CharReader.hpp index 1306026..3cbe4b4 100644 --- a/src/core/utils/CharReader.hpp +++ b/src/core/common/CharReader.hpp @@ -33,7 +33,6 @@ #include <vector> namespace ousia { -namespace utils { /** * A chunked ring buffer used in CharReader to provide access to an input stream @@ -659,12 +658,6 @@ public: */ void commit(); }; -} - -/** - * Alias of the commonly used CharReader class. - */ -using CharReader = utils::CharReader; } diff --git a/src/core/Exceptions.cpp b/src/core/common/Exceptions.cpp index d064f35..d064f35 100644 --- a/src/core/Exceptions.cpp +++ b/src/core/common/Exceptions.cpp diff --git a/src/core/Exceptions.hpp b/src/core/common/Exceptions.hpp index 00d6106..00d6106 100644 --- a/src/core/Exceptions.hpp +++ b/src/core/common/Exceptions.hpp diff --git a/src/core/Logger.cpp b/src/core/common/Logger.cpp index 17f55a6..17f55a6 100644 --- a/src/core/Logger.cpp +++ b/src/core/common/Logger.cpp diff --git a/src/core/Logger.hpp b/src/core/common/Logger.hpp index e6b97f4..e6b97f4 100644 --- a/src/core/Logger.hpp +++ b/src/core/common/Logger.hpp diff --git a/src/core/Utils.cpp b/src/core/common/Utils.cpp index c460ed4..c460ed4 100644 --- a/src/core/Utils.cpp +++ b/src/core/common/Utils.cpp diff --git a/src/core/Utils.hpp b/src/core/common/Utils.hpp index 5332b50..5332b50 100644 --- a/src/core/Utils.hpp +++ b/src/core/common/Utils.hpp diff --git a/src/core/variant/Variant.cpp b/src/core/common/Variant.cpp index d33cd4f..27fc6e7 100644 --- a/src/core/variant/Variant.cpp +++ b/src/core/common/Variant.cpp @@ -18,8 +18,7 @@ #include <sstream> -#include <core/Utils.hpp> - +#include "Utils.hpp" #include "Variant.hpp" namespace ousia { diff --git a/src/core/variant/Variant.hpp b/src/core/common/Variant.hpp index 1e62644..d411fd3 100644 --- a/src/core/variant/Variant.hpp +++ b/src/core/common/Variant.hpp @@ -39,10 +39,9 @@ // http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html // later (will allow to use 8 bytes for a variant) -#include <core/Exceptions.hpp> +#include "Exceptions.hpp" namespace ousia { -namespace variant { /** * Instances of the Variant class represent any kind of data that is exchanged @@ -758,9 +757,5 @@ public: }; } -// Alias for the (very often used and unambigous) variant class -using Variant = variant::Variant; -} - #endif /* _OUSIA_VARIANT_HPP_ */ diff --git a/src/core/variant/Reader.cpp b/src/core/common/VariantReader.cpp index 5c167cd..e611842 100644 --- a/src/core/variant/Reader.cpp +++ b/src/core/common/VariantReader.cpp @@ -21,12 +21,10 @@ #include <cmath> #include <sstream> -#include <core/Utils.hpp> - -#include "Reader.hpp" +#include "VariantReader.hpp" +#include "Utils.hpp" namespace ousia { -namespace variant { // TODO: Better error messages (like "Expected 'x' but got 'y'") // TODO: Replace delims with single char delim where possible @@ -177,134 +175,138 @@ public: * delimiters. */ bool parse(CharReader &reader, Logger &logger, - const std::unordered_set<char> &delims) - { - State state = State::INIT; - char c; + const std::unordered_set<char> &delims); +}; - // Consume the first whitespace characters - reader.consumeWhitespace(); +bool Number::parse(CharReader &reader, Logger &logger, + const std::unordered_set<char> &delims) +{ + State state = State::INIT; + char c; - // Iterate over the FSM to extract numbers - while (reader.peek(c)) { - // Abort, once a delimiter or whitespace is reached - if (Utils::isWhitespace(c) || delims.count(c)) { - reader.resetPeek(); - break; - } + // Consume the first whitespace characters + reader.consumeWhitespace(); - // The character is not a whitespace character and not a delimiter - switch (state) { - case State::INIT: - case State::HAS_MINUS: - switch (c) { - case '-': - // Do not allow multiple minus signs - if (state == State::HAS_MINUS) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::HAS_MINUS; - s = -1; - break; - case '0': - // Remember a leading zero for the detection of "0x" - state = State::LEADING_ZERO; - break; - case '.': - // Remember a leading point as ".eXXX" is invalid - state = State::LEADING_POINT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::LEADING_ZERO: - if (c == 'x' || c == 'X') { - state = State::HEX; + // Iterate over the FSM to extract numbers + while (reader.peek(c)) { + // Abort, once a delimiter or whitespace is reached + if (Utils::isWhitespace(c) || delims.count(c)) { + reader.resetPeek(); + break; + } + + // The character is not a whitespace character and not a delimiter + switch (state) { + case State::INIT: + case State::HAS_MINUS: + switch (c) { + case '-': + // Do not allow multiple minus signs + if (state == State::HAS_MINUS) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; + } + state = State::HAS_MINUS; + s = -1; break; - } - // fallthrough - case State::INT: - switch (c) { - case '.': - state = State::POINT; - break; - case 'e': - case 'E': - state = State::EXP_INIT; - break; - default: - state = State::INT; - if (!appendChar(c, 10, Part::A, reader, logger)) { - return false; - } - break; - } - break; - case State::HEX: - if (!appendChar(c, 16, Part::A, reader, logger)) { - return false; - } - break; - case State::LEADING_POINT: - case State::POINT: - switch (c) { - case 'e': - case 'E': - if (state == State::LEADING_POINT) { - logger.errorAt(ERR_UNEXPECTED_CHAR, reader); - return false; - } - state = State::EXP_INIT; - break; - default: - state = State::POINT; - if (!appendChar(c, 10, Part::N, reader, logger)) { - return false; - } - break; - } + case '0': + // Remember a leading zero for the detection of "0x" + state = State::LEADING_ZERO; + break; + case '.': + // Remember a leading point as ".eXXX" is invalid + state = State::LEADING_POINT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::LEADING_ZERO: + if (c == 'x' || c == 'X') { + state = State::HEX; break; - case State::EXP_HAS_MINUS: - case State::EXP_INIT: - if (c == '-') { - if (state == State::EXP_HAS_MINUS) { + } + // fallthrough + case State::INT: + switch (c) { + case '.': + state = State::POINT; + break; + case 'e': + case 'E': + state = State::EXP_INIT; + break; + default: + state = State::INT; + if (!appendChar(c, 10, Part::A, reader, logger)) { + return false; + } + break; + } + break; + case State::HEX: + if (!appendChar(c, 16, Part::A, reader, logger)) { + return false; + } + break; + case State::LEADING_POINT: + case State::POINT: + switch (c) { + case 'e': + case 'E': + if (state == State::LEADING_POINT) { logger.errorAt(ERR_UNEXPECTED_CHAR, reader); return false; } - state = State::EXP_HAS_MINUS; - sE = -1; - } else { - state = State::EXP; - if (!appendChar(c, 10, Part::E, reader, logger)) { + state = State::EXP_INIT; + break; + default: + state = State::POINT; + if (!appendChar(c, 10, Part::N, reader, logger)) { return false; } + break; + } + break; + case State::EXP_HAS_MINUS: + case State::EXP_INIT: + if (c == '-') { + if (state == State::EXP_HAS_MINUS) { + logger.errorAt(ERR_UNEXPECTED_CHAR, reader); + return false; } - break; - case State::EXP: + state = State::EXP_HAS_MINUS; + sE = -1; + } else { + state = State::EXP; if (!appendChar(c, 10, Part::E, reader, logger)) { return false; } - break; - } - reader.consumePeek(); + } + break; + case State::EXP: + if (!appendChar(c, 10, Part::E, reader, logger)) { + return false; + } + break; } + reader.consumePeek(); + } - // States in which ending is valid. Log an error in other states - if (state == State::LEADING_ZERO || state == State::HEX || - state == State::INT || state == State::POINT || - state == State::EXP) { - return true; - } - logger.errorAt(ERR_UNEXPECTED_END, reader); - return false; + // States in which ending is valid. Log an error in other states + if (state == State::LEADING_ZERO || state == State::HEX || + state == State::INT || state == State::POINT || + state == State::EXP) { + return true; } -}; + logger.errorAt(ERR_UNEXPECTED_END, reader); + return false; +} + /* Class Reader */ @@ -324,7 +326,7 @@ static std::pair<bool, T> error(CharReader &reader, Logger &logger, return std::make_pair(false, std::move(res)); } -std::pair<bool, std::string> Reader::parseString( +std::pair<bool, std::string> VariantReader::parseString( CharReader &reader, Logger &logger, const std::unordered_set<char> *delims) { @@ -422,7 +424,7 @@ std::pair<bool, std::string> Reader::parseString( return error(reader, logger, ERR_UNEXPECTED_END, res.str()); } -std::pair<bool, Variant::arrayType> Reader::parseArray( +std::pair<bool, Variant::arrayType> VariantReader::parseArray( CharReader &reader, Logger &logger, char delim) { Variant::arrayType res; @@ -490,7 +492,7 @@ std::pair<bool, Variant::arrayType> Reader::parseArray( return error(reader, logger, ERR_UNEXPECTED_END, res); } -std::pair<bool, std::string> Reader::parseUnescapedString( +std::pair<bool, std::string> VariantReader::parseUnescapedString( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { @@ -527,7 +529,7 @@ std::pair<bool, std::string> Reader::parseUnescapedString( return std::make_pair(true, res.str()); } -std::pair<bool, int64_t> Reader::parseInteger( +std::pair<bool, int64_t> VariantReader::parseInteger( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { @@ -544,7 +546,7 @@ std::pair<bool, int64_t> Reader::parseInteger( return std::make_pair(false, n.intValue()); } -std::pair<bool, double> Reader::parseDouble( +std::pair<bool, double> VariantReader::parseDouble( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { @@ -553,7 +555,7 @@ std::pair<bool, double> Reader::parseDouble( return std::make_pair(res, n.doubleValue()); } -std::pair<bool, Variant> Reader::parseGeneric( +std::pair<bool, Variant> VariantReader::parseGeneric( CharReader &reader, Logger &logger, const std::unordered_set<char> &delims) { @@ -582,7 +584,7 @@ std::pair<bool, Variant> Reader::parseGeneric( Number n; // Fork the reader - utils::CharReaderFork fork = reader.fork(); + CharReaderFork fork = reader.fork(); // TODO: Fork logger @@ -620,5 +622,4 @@ std::pair<bool, Variant> Reader::parseGeneric( return error(reader, logger, ERR_UNEXPECTED_END, nullptr); } } -} diff --git a/src/core/variant/Reader.hpp b/src/core/common/VariantReader.hpp index 4114d46..5e7c5d2 100644 --- a/src/core/variant/Reader.hpp +++ b/src/core/common/VariantReader.hpp @@ -17,7 +17,7 @@ */ /** - * @file Reader.hpp + * @file VariantReader.hpp * * Provides parsers for various micro formats. These formats include integers, * doubles, strings, JSON and the Ousía struct notation. @@ -32,23 +32,21 @@ #include <unordered_set> #include <utility> -#include <core/utils/CharReader.hpp> -#include <core/Logger.hpp> - +#include "CharReader.hpp" +#include "Logger.hpp" #include "Variant.hpp" namespace ousia { -namespace variant { -class Reader { +class VariantReader { private: /** * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting * character. * @param logger is the logger instance that should be used to log error * messages and warnings. @@ -58,7 +56,7 @@ private: * is read. */ static std::pair<bool, std::string> parseString( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> *delims); public: @@ -66,9 +64,9 @@ public: * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting * character. * @param logger is the logger instance that should be used to log error * messages and warnings. @@ -77,93 +75,92 @@ public: * outside). */ static std::pair<bool, std::string> parseString( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims) { - return parseString(reader, logger, &delims); + return parseString(VariantReader, logger, &delims); } /** * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned after - * the terminating quote character or at the terminating delimiting + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * after the terminating quote character or at the terminating delimiting * character. * @param logger is the logger instance that should be used to log error * messages and warnings. */ - static std::pair<bool, std::string> parseString(CharReader &reader, + static std::pair<bool, std::string> parseString(CharReader &VariantReader, Logger &logger) { - return parseString(reader, logger, nullptr); + return parseString(VariantReader, logger, nullptr); } /** - * Extracts an unescaped string from the given buffered char reader + * Extracts an unescaped string from the given buffered char VariantReader * instance. This function just reads text until one of the given delimiter * characters is reached. * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned at the - * terminating delimiting character. + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. * @param delims is a set of characters which will terminate the string. * These characters are not included in the result. May not be nullptr. */ static std::pair<bool, std::string> parseUnescapedString( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); /** - * Parses an integer from the given buffered char reader instance until one - * of the given delimiter characters is reached. + * Parses an integer from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. * - * @param reader is a reference to the CharReader instance from - * which the character data should been reader. The reader will be - * positioned at the terminating delimiting character or directly after the - * integer. + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. */ static std::pair<bool, int64_t> parseInteger( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); /** - * Parses an double from the given buffered char reader instance until one - * of the given delimiter characters is reached. + * Parses an double from the given buffered char VariantReader instance + * until one of the given delimiter characters is reached. * - * @param reader is a reference to the CharReader instance from - * which the character data should been reader. The reader will be - * positioned at the terminating delimiting character or directly after the - * integer. + * @param VariantReader is a reference to the CharReader instance from + * which the character data should been VariantReader. The VariantReader + * will be positioned at the terminating delimiting character or directly + * after the integer. */ static std::pair<bool, double> parseDouble( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); /** * Parses an array of values. */ static std::pair<bool, Variant::arrayType> parseArray( - CharReader &reader, Logger &logger, char delim = 0); + CharReader &VariantReader, Logger &logger, char delim = 0); /** * Tries to parse the most specific item from the given stream until one of * the given delimiters is reached or a meaningful literal has been read. * The resulting variant represents the value that has been read. * - * @param reader is a reference to the CharReader instance which is - * the source for the character data. The reader will be positioned at the - * terminating delimiting character. + * @param VariantReader is a reference to the CharReader instance which is + * the source for the character data. The VariantReader will be positioned + * at the terminating delimiting character. * @param delims is a set of characters which will terminate the string. * These characters are not included in the result. May not be nullptr. */ static std::pair<bool, Variant> parseGeneric( - CharReader &reader, Logger &logger, + CharReader &VariantReader, Logger &logger, const std::unordered_set<char> &delims); }; } -} #endif /* _OUSIA_VARIANT_READER_HPP_ */ diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index 5dac956..e155cfd 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -32,10 +32,10 @@ #include <set> #include <string> -#include <core/Exceptions.hpp> #include <core/Node.hpp> -#include <core/Logger.hpp> #include <core/Registry.hpp> +#include <core/common/Exceptions.hpp> +#include <core/common/Logger.hpp> #include "Scope.hpp" diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp index dca7f35..5e801ee 100644 --- a/src/core/parser/ParserStack.cpp +++ b/src/core/parser/ParserStack.cpp @@ -20,8 +20,8 @@ #include "ParserStack.hpp" -#include <core/Utils.hpp> -#include <core/Exceptions.hpp> +#include <core/common/Utils.hpp> +#include <core/common/Exceptions.hpp> namespace ousia { namespace parser { diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp index c5ed4e4..233f4f9 100644 --- a/src/core/parser/ParserStack.hpp +++ b/src/core/parser/ParserStack.hpp @@ -37,7 +37,7 @@ #include <stack> #include <vector> -#include <core/variant/Variant.hpp> +#include <core/common/Variant.hpp> #include "Parser.hpp" diff --git a/src/plugins/css/CSSParser.cpp b/src/plugins/css/CSSParser.cpp index 4cbe93f..5985047 100644 --- a/src/plugins/css/CSSParser.cpp +++ b/src/plugins/css/CSSParser.cpp @@ -18,7 +18,7 @@ #include "CSSParser.hpp" -#include <core/variant/Reader.hpp> +#include <core/common/VariantReader.hpp> namespace ousia { namespace parser { @@ -77,7 +77,7 @@ static const std::map<int, CodeTokenDescriptor> CSS_DESCRIPTORS = { Rooted<Node> CSSParser::parse(std::istream &is, ParserContext &ctx) { - BufferedCharReader input{is}; + CharReader input{is}; CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS}; tokenizer.ignoreComments = true; tokenizer.ignoreLinebreaks = true; @@ -228,14 +228,14 @@ Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer, Variant::arrayType args; // we require at least one argument, if parantheses are used // XXX - /*args.push_back(variant::Reader::parseGeneric(tokenizer.getInput(), + args.push_back(VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, - {',', ')'}).second);*/ + {',', ')'}).second); while (expect(COMMA, tokenizer, t, false, ctx)) { // as long as we find commas we expect new arguments. - /*args.push_back( - variant::Reader::parseGeneric( - tokenizer.getInput(), ctx.logger, {',', ')'}).second);*/ + args.push_back( + VariantReader::parseGeneric( + tokenizer.getInput(), ctx.logger, {',', ')'}).second); } expect(PAREN_CLOSE, tokenizer, t, true, ctx); // and we return with the finished Selector. @@ -334,8 +334,8 @@ bool CSSParser::parseRule(CodeTokenizer &tokenizer, ParserContext &ctx, expect(COLON, tokenizer, t, true, ctx); // then the value // TODO: Resolve key for appropriate parsing function here. - /*value = variant::Reader::parseGeneric(tokenizer.getInput(), ctx.logger, - {';'}).second;*/ + value = VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger, + {';'}).second; // and a ; expect(SEMICOLON, tokenizer, t, true, ctx); return true; diff --git a/src/plugins/css/CSSParser.hpp b/src/plugins/css/CSSParser.hpp index 82f0cd1..eeb5b2c 100644 --- a/src/plugins/css/CSSParser.hpp +++ b/src/plugins/css/CSSParser.hpp @@ -22,9 +22,9 @@ #include <vector> #include <utility> -#include <core/BufferedCharReader.hpp> #include <core/CodeTokenizer.hpp> #include <core/CSS.hpp> +#include <core/common/CharReader.hpp> #include <core/parser/Parser.hpp> namespace ousia { diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp index ce2857e..9a7b4d8 100644 --- a/src/plugins/xml/XmlParser.cpp +++ b/src/plugins/xml/XmlParser.cpp @@ -20,7 +20,7 @@ #include <expat.h> -#include <core/Utils.hpp> +#include <core/common/Utils.hpp> #include <core/parser/ParserStack.hpp> #include "XmlParser.hpp" |