summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/BufferedCharReader.cpp263
-rw-r--r--src/core/BufferedCharReader.hpp288
-rw-r--r--src/core/CSS.hpp2
-rw-r--r--src/core/CodeTokenizer.hpp8
-rw-r--r--src/core/Registry.cpp3
-rw-r--r--src/core/Tokenizer.cpp12
-rw-r--r--src/core/Tokenizer.hpp27
-rw-r--r--src/core/common/CharReader.cpp (renamed from src/core/utils/CharReader.cpp)5
-rw-r--r--src/core/common/CharReader.hpp (renamed from src/core/utils/CharReader.hpp)7
-rw-r--r--src/core/common/Exceptions.cpp (renamed from src/core/Exceptions.cpp)0
-rw-r--r--src/core/common/Exceptions.hpp (renamed from src/core/Exceptions.hpp)0
-rw-r--r--src/core/common/Logger.cpp (renamed from src/core/Logger.cpp)0
-rw-r--r--src/core/common/Logger.hpp (renamed from src/core/Logger.hpp)0
-rw-r--r--src/core/common/Utils.cpp (renamed from src/core/Utils.cpp)0
-rw-r--r--src/core/common/Utils.hpp (renamed from src/core/Utils.hpp)0
-rw-r--r--src/core/common/Variant.cpp (renamed from src/core/variant/Variant.cpp)3
-rw-r--r--src/core/common/Variant.hpp (renamed from src/core/variant/Variant.hpp)7
-rw-r--r--src/core/common/VariantReader.cpp (renamed from src/core/variant/Reader.cpp)247
-rw-r--r--src/core/common/VariantReader.hpp (renamed from src/core/variant/Reader.hpp)87
-rw-r--r--src/core/parser/Parser.hpp4
-rw-r--r--src/core/parser/ParserStack.cpp4
-rw-r--r--src/core/parser/ParserStack.hpp2
-rw-r--r--src/plugins/css/CSSParser.cpp18
-rw-r--r--src/plugins/css/CSSParser.hpp2
-rw-r--r--src/plugins/xml/XmlParser.cpp2
25 files changed, 211 insertions, 780 deletions
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp
deleted file mode 100644
index aeedf12..0000000
--- a/src/core/BufferedCharReader.cpp
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <array>
-
-#include "Utils.hpp"
-
-#include "BufferedCharReader.hpp"
-
-namespace ousia {
-
-// Constants used within the linebreak statemachine.
-static const uint8_t LB_STATE_NONE = 0x00;
-static const uint8_t LB_STATE_ONE = 0x01;
-static const uint8_t LB_STATE_LF = 0x10;
-static const uint8_t LB_STATE_CR = 0x20;
-static const uint8_t LB_STATE_MASK_CNT = 0x0F;
-static const uint8_t LB_STATE_MASK_TYPE = 0xF0;
-
-/* Struct BufferedCharReader::ReadCursor */
-
-BufferedCharReader::ReadCursor::ReadCursor(unsigned int line,
- unsigned int column,
- bool destructive)
- : line(line),
- column(column),
- bufferElem(0),
- bufferPos(0),
- destructive(destructive),
- lbState(LB_STATE_NONE)
-{
-}
-
-void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor)
-{
- this->line = cursor.line;
- this->column = cursor.column;
- this->bufferElem = cursor.bufferElem;
- this->bufferPos = cursor.bufferPos;
- this->lbState = cursor.lbState;
-}
-
-/* Class BufferedCharReader */
-
-BufferedCharReader::BufferedCharReader(int line, int column)
- : inputStream(nullptr),
- readCursor(line, column, true),
- peekCursor(line, column, false),
- depleted(false)
-{
-}
-
-BufferedCharReader::BufferedCharReader(const std::string &str, int line,
- int column)
- : inputStream(nullptr),
- readCursor(line, column, true),
- peekCursor(line, column, false),
- depleted(true)
-{
- buffer.push_back(str);
-}
-
-BufferedCharReader::BufferedCharReader(const std::string &str)
- : inputStream(nullptr),
- readCursor(1, 1, true),
- peekCursor(1, 1, false),
- depleted(true)
-{
- buffer.push_back(str);
-}
-
-BufferedCharReader::BufferedCharReader(std::istream &inputStream, int line,
- int column)
- : inputStream(&inputStream),
- readCursor(line, column, true),
- peekCursor(line, column, false),
- depleted(false)
-{
-}
-
-void BufferedCharReader::feed(const std::string &data)
-{
- if (!depleted && !inputStream) {
- buffer.push_back(data);
- }
-}
-
-void BufferedCharReader::close()
-{
- if (!inputStream) {
- depleted = true;
- }
-}
-
-bool BufferedCharReader::substituteLinebreaks(ReadCursor &cursor, char *c)
-{
- // Handle line breaks, inserts breakes after the following character
- // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n
- if ((*c == '\n') || (*c == '\r')) {
- // Determine the type of the current linebreak character
- const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR;
-
- // Read the last count and the last type from the state
- const uint8_t lastCount = cursor.lbState & LB_STATE_MASK_CNT;
- const uint8_t lastType = cursor.lbState & LB_STATE_MASK_TYPE;
-
- // Set the current linebreak type and counter in the state
- cursor.lbState = ((lastCount + 1) & 1) | type;
-
- // If either this is the first instance of this character or the same
- // return character is repeated
- if (!lastCount || (lastType == type)) {
- *c = '\n';
- return true;
- }
- return false;
- }
-
- // Find the state
- cursor.lbState = LB_STATE_NONE;
- return true;
-}
-
-bool BufferedCharReader::readCharacterAtCursor(ReadCursor &cursor, char *c)
-{
- bool hasChar = false;
- while (!hasChar) {
- // Abort if the current buffer element does not point to a valid entry
- // in the buffer -- we must try to feed another data block into the
- // internal buffer
- if (cursor.bufferElem >= buffer.size()) {
- // Abort if there is no more data or no input stream is set
- if (depleted || !inputStream) {
- return false;
- }
-
- // Read a buffer of the specified size
- constexpr std::streamsize BUFFER_SIZE = 1024;
- std::array<char, BUFFER_SIZE> buf;
- const std::streamsize cnt =
- (*inputStream).read(buf.data(), BUFFER_SIZE).gcount();
-
- // If data has been read, append it to the input buffer and try
- // again
- if (cnt > 0) {
- buffer.emplace_back(std::string(buf.data(), cnt));
- continue;
- }
-
- // End of file handling
- if (inputStream->fail() || inputStream->eof()) {
- depleted = true;
- return false;
- }
- }
-
- // Fetch the current element the peek pointer points to
- const std::string &data = buffer[cursor.bufferElem];
-
- // Handle the "no data" case -- either in a destructive or
- // non-destructive manner.
- if (cursor.bufferPos >= data.length()) {
- if (cursor.destructive) {
- buffer.pop_front();
- } else {
- cursor.bufferElem++;
- }
- cursor.bufferPos = 0;
- continue;
- }
-
- // Read the character, advance the buffer position
- *c = *(data.data() + cursor.bufferPos);
- cursor.bufferPos++;
-
- // Substitute linebreaks with a single LF (0x0A)
- hasChar = substituteLinebreaks(cursor, c);
- }
-
- // Update the position counter
- if (*c == '\n') {
- cursor.line++;
- cursor.column = 1;
- } else {
- // Ignore UTF-8 continuation bytes
- if (!((*c & 0x80) && !(*c & 0x40))) {
- cursor.column++;
- }
- }
-
- return true;
-}
-
-bool BufferedCharReader::peek(char *c)
-{
- return readCharacterAtCursor(peekCursor, c);
-}
-
-bool BufferedCharReader::read(char *c)
-{
- resetPeek();
- return readCharacterAtCursor(readCursor, c);
-}
-
-void BufferedCharReader::consumePeek()
-{
- // Remove all no longer needed buffer elements
- for (unsigned int i = 0; i < peekCursor.bufferElem; i++) {
- buffer.pop_front();
- }
- peekCursor.bufferElem = 0;
-
- // Copy the peek cursor to the read cursor
- readCursor.assign(peekCursor);
-}
-
-bool BufferedCharReader::consumeWhitespace()
-{
- char c;
- while (peek(&c)) {
- if (!Utils::isWhitespace(c)) {
- resetPeek();
- return true;
- }
- consumePeek();
- }
- return false;
-}
-
-void BufferedCharReader::resetPeek()
-{
- // Reset the peek cursor to the read cursor
- peekCursor.assign(readCursor);
-}
-
-bool BufferedCharReader::atEnd() const
-{
- if (depleted || !inputStream) {
- if (buffer.size() <= 0) {
- return true;
- } else if (buffer.size() == 1) {
- return buffer[0].size() == readCursor.bufferPos;
- }
- }
- return false;
-}
-}
-
diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp
deleted file mode 100644
index e7f3186..0000000
--- a/src/core/BufferedCharReader.hpp
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file BufferedCharReader.hpp
- *
- * Contains the BufferedCharReader class which is used for reading/peeking
- * single characters from an input stream or string.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_BUFFERED_CHAR_READER_H_
-#define _OUSIA_BUFFERED_CHAR_READER_H_
-
-#include <deque>
-#include <string>
-#include <istream>
-#include <cstdint>
-
-namespace ousia {
-
-// TODO: Better split this class into multiple classes with base class
-// BufferedCharReader where each sub class represents one method of supplying
-// the input data (feeding, initial string, input stream).
-
-/**
- * The BufferedCharReader class is used for storing incomming data that
- * is fed into the pipeline as well as reading/peeking single characters
- * from that buffer. Additionally it counts the current column/row
- * (with correct handling for UTF-8) and contains an internal state
- * machine that handles the detection of linebreaks and converts these to a
- * single '\n'.
- */
-class BufferedCharReader {
-private:
- /**
- * The ReadCursor structure is responsible for representing the read
- * position within the text an all state machine states belonging to the
- * cursor. There are two types of read cursors: destructive and
- * non-destructive read cursors.
- */
- struct ReadCursor {
- /**
- * The line the cursor currently points to.
- */
- unsigned int line;
-
- /**
- * The column the cursor currently points to.
- */
- unsigned int column;
-
- /**
- * The index of the element in the data buffer we're currently reading
- * from.
- */
- unsigned int bufferElem;
-
- /**
- * The byte position within this data buffer.
- */
- unsigned int bufferPos;
-
- /**
- * Specifies whether this is a destructive cursor (bytes are discarded
- * once they were read from the buffer).
- */
- const bool destructive;
-
- /**
- * State variable used in the internal state machine of the
- * line feed detection.
- */
- uint8_t lbState;
-
- /**
- * Constructor of the ReadCursor structure.
- *
- * @param line is the start line.
- * @param column is the start column.
- * @param destructive specifies whether the ReadCursor is destructive
- * (consumes all read characters, as used in the "read cursor") or
- * non-destructive (as used in the "peek cursor").
- */
- ReadCursor(unsigned int line, unsigned int column, bool destructive);
-
- /**
- * Copys the data from another ReadCursor without overriding the
- * "destructive" flag.
- *
- * @param cursor is the cursor that should be copied.
- */
- void assign(const ReadCursor &cursor);
- };
-
- /**
- * Pointer at an (optional) input stream used for reading a chunk of data
- * whenever the input buffer depletes.
- */
- std::istream *inputStream;
-
- /**
- * The read and the peek cursor.
- */
- ReadCursor readCursor, peekCursor;
-
- /**
- * Set to true if there is no more input data.
- */
- bool depleted;
-
- /**
- * Queue containing the data that has been fed into the char reader.
- */
- std::deque<std::string> buffer;
-
- /**
- * Substitute any combination of linebreaks in the incomming code with "\n".
- * Returns true if the current character is meant as output, false
- * otherwise.
- */
- bool substituteLinebreaks(ReadCursor &cursor, char *c);
-
- /**
- * Reads a character from the input buffer and advances the given read
- * cursor.
- *
- * @param cursor is a reference to the read cursor that should be used
- * for reading.
- * @param hasChar is set to true, if a character is available, false if
- * no character is available (e.g. because line breaks are substituted or
- * the end of a buffer boundary is reached -- in this case this function
- * should be called again with the same parameters.)
- * @param c is a output parameter, which will be set to the read character.
- * @param returns true if there was enough data in the buffer, false
- * otherwise.
- */
- bool readCharacterAtCursor(ReadCursor &cursor, char *c);
-
- /**
- * Function that is called for each read character -- updates the row and
- * column count.
- */
- void updatePositionCounters(const char c);
-
-public:
-
- /**
- * Constructor of the buffered char reader class with empty buffer as input.
- * This operates the BufferedCharReader in a mode where new data has to be
- * fed using the "feed" function and explicitly closed using the "close"
- * function.
- *
- * @param line is the start line.
- * @param column is the start column.
- */
- BufferedCharReader(int line = 1, int column = 1);
-
- /**
- * Constructor of the buffered char reader class with a string as input.
- *
- * @param str is a string containing the input data.
- * @param line is the start line.
- * @param column is the start column.
- */
- BufferedCharReader(const std::string &str, int line, int column);
-
- /**
- * Constructor of the buffered char reader class with a string as input.
- *
- * @param str is a string containing the input data.
- */
- BufferedCharReader(const std::string &str);
-
- /**
- * Constructor of the buffered char reader class with a string as input.
- *
- * @param inputStream is the input stream from which incomming data should
- * be read.
- * @param line is the start line.
- * @param column is the start column.
- */
- BufferedCharReader(std::istream &inputStream, int line = 1, int column = 1);
-
- /**
- * Peeks a single character. If called multiple times, returns the
- * character after the previously peeked character.
- *
- * @param c is a reference to the character to which the result should be
- * writtern.
- * @return true if the character was successfully read, false if there are
- * no more characters to be read in the buffer.
- */
- bool peek(char *c);
-
- /**
- * Reads a character from the input data. If "peek" was called
- * beforehand resets the peek pointer.
- *
- * @param c is a reference to the character to which the result should be
- * writtern.
- * @return true if the character was successfully read, false if there are
- * no more characters to be read in the buffer.
- */
- bool read(char *c);
-
- /**
- * Advances the read pointer to the peek pointer -- so if the "peek"
- * function was called, "read" will now return the character after
- * the last peeked character.
- */
- void consumePeek();
-
- /**
- * Moves the read cursor to the next non-whitespace character. Returns
- * false, if the end of the stream was reached.
- *
- * @return false if the end of the stream was reached, false othrwise.
- */
- bool consumeWhitespace();
-
- /**
- * Resets the peek pointer to the "read" pointer.
- */
- void resetPeek();
-
- /**
- * Feeds new data into the internal buffer of the BufferedCharReader
- * class. Only applicable if the buffered char reader was constructed
- * without an input stream or string.
- *
- * @param data is a string containing the data that should be
- * appended to the internal buffer.
- */
- void feed(const std::string &data);
-
- /**
- * Tells the buffered char reader that no more data will be fed.
- * Only applicable if the buffered char reader was constructed without an
- * input stream or string.
- *
- * @param data is a string containing the data that should be
- * appended to the internal buffer.
- */
- void close();
-
- /**
- * Returns true if there are no more characters as the stream was
- * closed.
- *
- * @return true if there is no more data.
- */
- bool atEnd() const;
-
- /**
- * Returns the current line (starting with one).
- *
- * @return the current line number.
- */
- int getLine() const { return readCursor.line; }
-
- /**
- * Returns the current column (starting with one).
- *
- * @return the current column number.
- */
- int getColumn() const { return readCursor.column; }
-};
-}
-
-#endif /* _OUSIA_BUFFERED_CHAR_READER_H_ */
-
diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp
index 1510f3a..a54d956 100644
--- a/src/core/CSS.hpp
+++ b/src/core/CSS.hpp
@@ -23,7 +23,7 @@
#include <vector>
#include <tuple>
-#include <core/variant/Variant.hpp>
+#include <core/common/Variant.hpp>
#include "Managed.hpp"
#include "Node.hpp"
diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp
index 43c7abb..4190297 100644
--- a/src/core/CodeTokenizer.hpp
+++ b/src/core/CodeTokenizer.hpp
@@ -22,7 +22,7 @@
#include <map>
#include <sstream>
-#include "BufferedCharReader.hpp"
+#include <core/common/CharReader.hpp>
#include "Tokenizer.hpp"
namespace ousia {
@@ -108,8 +108,8 @@ public:
/**
*
- * @param input a BufferedCharReader containing the input for this
- * tokenizer, as with a regular tokenizer.
+ * @param input a CharReader containing the input for this tokenizer, as
+ * with a regular tokenizer.
* @param root a TokenTreeNode representing the root of the TokenTree.
* Please note that you have to specify all tokenIDs here that you use
* in the descriptors map.
@@ -120,7 +120,7 @@ public:
* and this CodeTokenizer would recognize the token "//" as starting a
* line comment.
*/
- CodeTokenizer(BufferedCharReader &input, const TokenTreeNode &root,
+ CodeTokenizer(CharReader &input, const TokenTreeNode &root,
std::map<int, CodeTokenDescriptor> descriptors)
: Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL)
{
diff --git a/src/core/Registry.cpp b/src/core/Registry.cpp
index 6ff9594..74d1cf8 100644
--- a/src/core/Registry.cpp
+++ b/src/core/Registry.cpp
@@ -16,8 +16,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <core/Logger.hpp>
-
+#include <core/common/Logger.hpp>
#include <core/parser/Parser.hpp>
namespace ousia {
diff --git a/src/core/Tokenizer.cpp b/src/core/Tokenizer.cpp
index b99d1ed..0af5f5a 100644
--- a/src/core/Tokenizer.cpp
+++ b/src/core/Tokenizer.cpp
@@ -72,7 +72,7 @@ TokenTreeNode::TokenTreeNode(const std::map<std::string, int> &inputs)
{
}
-Tokenizer::Tokenizer(BufferedCharReader &input, const TokenTreeNode &root)
+Tokenizer::Tokenizer(CharReader &input, const TokenTreeNode &root)
: input(input), root(root)
{
}
@@ -81,10 +81,10 @@ bool Tokenizer::prepare()
{
std::stringstream buffer;
char c;
- int startColumn = input.getColumn();
- int startLine = input.getLine();
+ uint32_t startColumn = input.getColumn();
+ uint32_t startLine = input.getLine();
bool bufEmpty = true;
- while (input.peek(&c)) {
+ while (input.peek(c)) {
if (root.children.find(c) != root.children.end()) {
// if there might be a special token, keep peeking forward
// until we find the token (or we don't).
@@ -107,7 +107,7 @@ bool Tokenizer::prepare()
input.consumePeek();
}
}
- if (!input.peek(&c)) {
+ if (!input.peek(c)) {
// if we are at the end we break off the search.
break;
}
@@ -153,7 +153,7 @@ bool Tokenizer::prepare()
}
} else{
//if we found nothing, read at least one character.
- input.peek(&c);
+ input.peek(c);
}
}
buffer << c;
diff --git a/src/core/Tokenizer.hpp b/src/core/Tokenizer.hpp
index 8f80150..33327cc 100644
--- a/src/core/Tokenizer.hpp
+++ b/src/core/Tokenizer.hpp
@@ -19,11 +19,12 @@
#ifndef _OUSIA_TOKENIZER_HPP_
#define _OUSIA_TOKENIZER_HPP_
+#include <cstdint>
+#include <deque>
#include <istream>
#include <map>
-#include <deque>
-#include "BufferedCharReader.hpp"
+#include <core/common/CharReader.hpp>
namespace ousia {
@@ -120,13 +121,13 @@ static const int TOKEN_TEXT = -2;
struct Token {
int tokenId;
std::string content;
- int startColumn;
- int startLine;
- int endColumn;
- int endLine;
+ uint32_t startColumn;
+ uint32_t startLine;
+ uint32_t endColumn;
+ uint32_t endLine;
- Token(int tokenId, std::string content, int startColumn, int startLine,
- int endColumn, int endLine)
+ Token(int tokenId, std::string content, uint32_t startColumn, uint32_t startLine,
+ uint32_t endColumn, uint32_t endLine)
: tokenId(tokenId),
content(content),
startColumn(startColumn),
@@ -160,7 +161,7 @@ struct Token {
*/
class Tokenizer {
private:
- BufferedCharReader &input;
+ CharReader &input;
const TokenTreeNode &root;
std::deque<Token> peeked;
unsigned int peekCursor = 0;
@@ -185,14 +186,14 @@ protected:
public:
/**
* @param input The input of a Tokenizer is given in the form of a
- * BufferedCharReader. Please refer to the respective documentation.
+ * CharReader. Please refer to the respective documentation.
* @param root This is meant to be the root of a TokenTree giving the
* specification of user-defined tokens this Tokenizer should recognize.
* The Tokenizer promises to not change the TokenTree such that you can
* re-use the same specification for multiple inputs.
* Please refer to the TokenTreeNode documentation for more information.
*/
- Tokenizer(BufferedCharReader &input, const TokenTreeNode &root);
+ Tokenizer(CharReader &input, const TokenTreeNode &root);
/**
* The next method consumes one Token from the input stream and gives
@@ -224,9 +225,9 @@ public:
*/
void consumePeek();
- const BufferedCharReader &getInput() const { return input; }
+ const CharReader &getInput() const { return input; }
- BufferedCharReader &getInput() { return input; }
+ CharReader &getInput() { return input; }
};
}
diff --git a/src/core/utils/CharReader.cpp b/src/core/common/CharReader.cpp
index 61616d7..373c0c1 100644
--- a/src/core/utils/CharReader.cpp
+++ b/src/core/common/CharReader.cpp
@@ -21,12 +21,10 @@
#include <limits>
#include <sstream>
-#include <core/Utils.hpp>
-
#include "CharReader.hpp"
+#include "Utils.hpp"
namespace ousia {
-namespace utils {
/* Helper functions */
@@ -639,5 +637,4 @@ void CharReaderFork::commit()
parentPeekCursor.assign(buffer, peekCursor);
}
}
-}
diff --git a/src/core/utils/CharReader.hpp b/src/core/common/CharReader.hpp
index 1306026..3cbe4b4 100644
--- a/src/core/utils/CharReader.hpp
+++ b/src/core/common/CharReader.hpp
@@ -33,7 +33,6 @@
#include <vector>
namespace ousia {
-namespace utils {
/**
* A chunked ring buffer used in CharReader to provide access to an input stream
@@ -659,12 +658,6 @@ public:
*/
void commit();
};
-}
-
-/**
- * Alias of the commonly used CharReader class.
- */
-using CharReader = utils::CharReader;
}
diff --git a/src/core/Exceptions.cpp b/src/core/common/Exceptions.cpp
index d064f35..d064f35 100644
--- a/src/core/Exceptions.cpp
+++ b/src/core/common/Exceptions.cpp
diff --git a/src/core/Exceptions.hpp b/src/core/common/Exceptions.hpp
index 00d6106..00d6106 100644
--- a/src/core/Exceptions.hpp
+++ b/src/core/common/Exceptions.hpp
diff --git a/src/core/Logger.cpp b/src/core/common/Logger.cpp
index 17f55a6..17f55a6 100644
--- a/src/core/Logger.cpp
+++ b/src/core/common/Logger.cpp
diff --git a/src/core/Logger.hpp b/src/core/common/Logger.hpp
index e6b97f4..e6b97f4 100644
--- a/src/core/Logger.hpp
+++ b/src/core/common/Logger.hpp
diff --git a/src/core/Utils.cpp b/src/core/common/Utils.cpp
index c460ed4..c460ed4 100644
--- a/src/core/Utils.cpp
+++ b/src/core/common/Utils.cpp
diff --git a/src/core/Utils.hpp b/src/core/common/Utils.hpp
index 5332b50..5332b50 100644
--- a/src/core/Utils.hpp
+++ b/src/core/common/Utils.hpp
diff --git a/src/core/variant/Variant.cpp b/src/core/common/Variant.cpp
index d33cd4f..27fc6e7 100644
--- a/src/core/variant/Variant.cpp
+++ b/src/core/common/Variant.cpp
@@ -18,8 +18,7 @@
#include <sstream>
-#include <core/Utils.hpp>
-
+#include "Utils.hpp"
#include "Variant.hpp"
namespace ousia {
diff --git a/src/core/variant/Variant.hpp b/src/core/common/Variant.hpp
index 1e62644..d411fd3 100644
--- a/src/core/variant/Variant.hpp
+++ b/src/core/common/Variant.hpp
@@ -39,10 +39,9 @@
// http://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html
// later (will allow to use 8 bytes for a variant)
-#include <core/Exceptions.hpp>
+#include "Exceptions.hpp"
namespace ousia {
-namespace variant {
/**
* Instances of the Variant class represent any kind of data that is exchanged
@@ -758,9 +757,5 @@ public:
};
}
-// Alias for the (very often used and unambigous) variant class
-using Variant = variant::Variant;
-}
-
#endif /* _OUSIA_VARIANT_HPP_ */
diff --git a/src/core/variant/Reader.cpp b/src/core/common/VariantReader.cpp
index 5c167cd..e611842 100644
--- a/src/core/variant/Reader.cpp
+++ b/src/core/common/VariantReader.cpp
@@ -21,12 +21,10 @@
#include <cmath>
#include <sstream>
-#include <core/Utils.hpp>
-
-#include "Reader.hpp"
+#include "VariantReader.hpp"
+#include "Utils.hpp"
namespace ousia {
-namespace variant {
// TODO: Better error messages (like "Expected 'x' but got 'y'")
// TODO: Replace delims with single char delim where possible
@@ -177,134 +175,138 @@ public:
* delimiters.
*/
bool parse(CharReader &reader, Logger &logger,
- const std::unordered_set<char> &delims)
- {
- State state = State::INIT;
- char c;
+ const std::unordered_set<char> &delims);
+};
- // Consume the first whitespace characters
- reader.consumeWhitespace();
+bool Number::parse(CharReader &reader, Logger &logger,
+ const std::unordered_set<char> &delims)
+{
+ State state = State::INIT;
+ char c;
- // Iterate over the FSM to extract numbers
- while (reader.peek(c)) {
- // Abort, once a delimiter or whitespace is reached
- if (Utils::isWhitespace(c) || delims.count(c)) {
- reader.resetPeek();
- break;
- }
+ // Consume the first whitespace characters
+ reader.consumeWhitespace();
- // The character is not a whitespace character and not a delimiter
- switch (state) {
- case State::INIT:
- case State::HAS_MINUS:
- switch (c) {
- case '-':
- // Do not allow multiple minus signs
- if (state == State::HAS_MINUS) {
- logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
- return false;
- }
- state = State::HAS_MINUS;
- s = -1;
- break;
- case '0':
- // Remember a leading zero for the detection of "0x"
- state = State::LEADING_ZERO;
- break;
- case '.':
- // Remember a leading point as ".eXXX" is invalid
- state = State::LEADING_POINT;
- break;
- default:
- state = State::INT;
- if (!appendChar(c, 10, Part::A, reader, logger)) {
- return false;
- }
- break;
- }
- break;
- case State::LEADING_ZERO:
- if (c == 'x' || c == 'X') {
- state = State::HEX;
+ // Iterate over the FSM to extract numbers
+ while (reader.peek(c)) {
+ // Abort, once a delimiter or whitespace is reached
+ if (Utils::isWhitespace(c) || delims.count(c)) {
+ reader.resetPeek();
+ break;
+ }
+
+ // The character is not a whitespace character and not a delimiter
+ switch (state) {
+ case State::INIT:
+ case State::HAS_MINUS:
+ switch (c) {
+ case '-':
+ // Do not allow multiple minus signs
+ if (state == State::HAS_MINUS) {
+ logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
+ return false;
+ }
+ state = State::HAS_MINUS;
+ s = -1;
break;
- }
- // fallthrough
- case State::INT:
- switch (c) {
- case '.':
- state = State::POINT;
- break;
- case 'e':
- case 'E':
- state = State::EXP_INIT;
- break;
- default:
- state = State::INT;
- if (!appendChar(c, 10, Part::A, reader, logger)) {
- return false;
- }
- break;
- }
- break;
- case State::HEX:
- if (!appendChar(c, 16, Part::A, reader, logger)) {
- return false;
- }
- break;
- case State::LEADING_POINT:
- case State::POINT:
- switch (c) {
- case 'e':
- case 'E':
- if (state == State::LEADING_POINT) {
- logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
- return false;
- }
- state = State::EXP_INIT;
- break;
- default:
- state = State::POINT;
- if (!appendChar(c, 10, Part::N, reader, logger)) {
- return false;
- }
- break;
- }
+ case '0':
+ // Remember a leading zero for the detection of "0x"
+ state = State::LEADING_ZERO;
+ break;
+ case '.':
+ // Remember a leading point as ".eXXX" is invalid
+ state = State::LEADING_POINT;
+ break;
+ default:
+ state = State::INT;
+ if (!appendChar(c, 10, Part::A, reader, logger)) {
+ return false;
+ }
+ break;
+ }
+ break;
+ case State::LEADING_ZERO:
+ if (c == 'x' || c == 'X') {
+ state = State::HEX;
break;
- case State::EXP_HAS_MINUS:
- case State::EXP_INIT:
- if (c == '-') {
- if (state == State::EXP_HAS_MINUS) {
+ }
+ // fallthrough
+ case State::INT:
+ switch (c) {
+ case '.':
+ state = State::POINT;
+ break;
+ case 'e':
+ case 'E':
+ state = State::EXP_INIT;
+ break;
+ default:
+ state = State::INT;
+ if (!appendChar(c, 10, Part::A, reader, logger)) {
+ return false;
+ }
+ break;
+ }
+ break;
+ case State::HEX:
+ if (!appendChar(c, 16, Part::A, reader, logger)) {
+ return false;
+ }
+ break;
+ case State::LEADING_POINT:
+ case State::POINT:
+ switch (c) {
+ case 'e':
+ case 'E':
+ if (state == State::LEADING_POINT) {
logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
return false;
}
- state = State::EXP_HAS_MINUS;
- sE = -1;
- } else {
- state = State::EXP;
- if (!appendChar(c, 10, Part::E, reader, logger)) {
+ state = State::EXP_INIT;
+ break;
+ default:
+ state = State::POINT;
+ if (!appendChar(c, 10, Part::N, reader, logger)) {
return false;
}
+ break;
+ }
+ break;
+ case State::EXP_HAS_MINUS:
+ case State::EXP_INIT:
+ if (c == '-') {
+ if (state == State::EXP_HAS_MINUS) {
+ logger.errorAt(ERR_UNEXPECTED_CHAR, reader);
+ return false;
}
- break;
- case State::EXP:
+ state = State::EXP_HAS_MINUS;
+ sE = -1;
+ } else {
+ state = State::EXP;
if (!appendChar(c, 10, Part::E, reader, logger)) {
return false;
}
- break;
- }
- reader.consumePeek();
+ }
+ break;
+ case State::EXP:
+ if (!appendChar(c, 10, Part::E, reader, logger)) {
+ return false;
+ }
+ break;
}
+ reader.consumePeek();
+ }
- // States in which ending is valid. Log an error in other states
- if (state == State::LEADING_ZERO || state == State::HEX ||
- state == State::INT || state == State::POINT ||
- state == State::EXP) {
- return true;
- }
- logger.errorAt(ERR_UNEXPECTED_END, reader);
- return false;
+ // States in which ending is valid. Log an error in other states
+ if (state == State::LEADING_ZERO || state == State::HEX ||
+ state == State::INT || state == State::POINT ||
+ state == State::EXP) {
+ return true;
}
-};
+ logger.errorAt(ERR_UNEXPECTED_END, reader);
+ return false;
+}
+
/* Class Reader */
@@ -324,7 +326,7 @@ static std::pair<bool, T> error(CharReader &reader, Logger &logger,
return std::make_pair(false, std::move(res));
}
-std::pair<bool, std::string> Reader::parseString(
+std::pair<bool, std::string> VariantReader::parseString(
CharReader &reader, Logger &logger,
const std::unordered_set<char> *delims)
{
@@ -422,7 +424,7 @@ std::pair<bool, std::string> Reader::parseString(
return error(reader, logger, ERR_UNEXPECTED_END, res.str());
}
-std::pair<bool, Variant::arrayType> Reader::parseArray(
+std::pair<bool, Variant::arrayType> VariantReader::parseArray(
CharReader &reader, Logger &logger, char delim)
{
Variant::arrayType res;
@@ -490,7 +492,7 @@ std::pair<bool, Variant::arrayType> Reader::parseArray(
return error(reader, logger, ERR_UNEXPECTED_END, res);
}
-std::pair<bool, std::string> Reader::parseUnescapedString(
+std::pair<bool, std::string> VariantReader::parseUnescapedString(
CharReader &reader, Logger &logger,
const std::unordered_set<char> &delims)
{
@@ -527,7 +529,7 @@ std::pair<bool, std::string> Reader::parseUnescapedString(
return std::make_pair(true, res.str());
}
-std::pair<bool, int64_t> Reader::parseInteger(
+std::pair<bool, int64_t> VariantReader::parseInteger(
CharReader &reader, Logger &logger,
const std::unordered_set<char> &delims)
{
@@ -544,7 +546,7 @@ std::pair<bool, int64_t> Reader::parseInteger(
return std::make_pair(false, n.intValue());
}
-std::pair<bool, double> Reader::parseDouble(
+std::pair<bool, double> VariantReader::parseDouble(
CharReader &reader, Logger &logger,
const std::unordered_set<char> &delims)
{
@@ -553,7 +555,7 @@ std::pair<bool, double> Reader::parseDouble(
return std::make_pair(res, n.doubleValue());
}
-std::pair<bool, Variant> Reader::parseGeneric(
+std::pair<bool, Variant> VariantReader::parseGeneric(
CharReader &reader, Logger &logger,
const std::unordered_set<char> &delims)
{
@@ -582,7 +584,7 @@ std::pair<bool, Variant> Reader::parseGeneric(
Number n;
// Fork the reader
- utils::CharReaderFork fork = reader.fork();
+ CharReaderFork fork = reader.fork();
// TODO: Fork logger
@@ -620,5 +622,4 @@ std::pair<bool, Variant> Reader::parseGeneric(
return error(reader, logger, ERR_UNEXPECTED_END, nullptr);
}
}
-}
diff --git a/src/core/variant/Reader.hpp b/src/core/common/VariantReader.hpp
index 4114d46..5e7c5d2 100644
--- a/src/core/variant/Reader.hpp
+++ b/src/core/common/VariantReader.hpp
@@ -17,7 +17,7 @@
*/
/**
- * @file Reader.hpp
+ * @file VariantReader.hpp
*
* Provides parsers for various micro formats. These formats include integers,
* doubles, strings, JSON and the Ousía struct notation.
@@ -32,23 +32,21 @@
#include <unordered_set>
#include <utility>
-#include <core/utils/CharReader.hpp>
-#include <core/Logger.hpp>
-
+#include "CharReader.hpp"
+#include "Logger.hpp"
#include "Variant.hpp"
namespace ousia {
-namespace variant {
-class Reader {
+class VariantReader {
private:
/**
* Parses a string which may either be enclosed by " or ', unescapes
* entities in the string as specified for JavaScript.
*
- * @param reader is a reference to the CharReader instance which is
- * the source for the character data. The reader will be positioned after
- * the terminating quote character or at the terminating delimiting
+ * @param VariantReader is a reference to the CharReader instance which is
+ * the source for the character data. The VariantReader will be positioned
+ * after the terminating quote character or at the terminating delimiting
* character.
* @param logger is the logger instance that should be used to log error
* messages and warnings.
@@ -58,7 +56,7 @@ private:
* is read.
*/
static std::pair<bool, std::string> parseString(
- CharReader &reader, Logger &logger,
+ CharReader &VariantReader, Logger &logger,
const std::unordered_set<char> *delims);
public:
@@ -66,9 +64,9 @@ public:
* Parses a string which may either be enclosed by " or ', unescapes
* entities in the string as specified for JavaScript.
*
- * @param reader is a reference to the CharReader instance which is
- * the source for the character data. The reader will be positioned after
- * the terminating quote character or at the terminating delimiting
+ * @param VariantReader is a reference to the CharReader instance which is
+ * the source for the character data. The VariantReader will be positioned
+ * after the terminating quote character or at the terminating delimiting
* character.
* @param logger is the logger instance that should be used to log error
* messages and warnings.
@@ -77,93 +75,92 @@ public:
* outside).
*/
static std::pair<bool, std::string> parseString(
- CharReader &reader, Logger &logger,
+ CharReader &VariantReader, Logger &logger,
const std::unordered_set<char> &delims)
{
- return parseString(reader, logger, &delims);
+ return parseString(VariantReader, logger, &delims);
}
/**
* Parses a string which may either be enclosed by " or ', unescapes
* entities in the string as specified for JavaScript.
*
- * @param reader is a reference to the CharReader instance which is
- * the source for the character data. The reader will be positioned after
- * the terminating quote character or at the terminating delimiting
+ * @param VariantReader is a reference to the CharReader instance which is
+ * the source for the character data. The VariantReader will be positioned
+ * after the terminating quote character or at the terminating delimiting
* character.
* @param logger is the logger instance that should be used to log error
* messages and warnings.
*/
- static std::pair<bool, std::string> parseString(CharReader &reader,
+ static std::pair<bool, std::string> parseString(CharReader &VariantReader,
Logger &logger)
{
- return parseString(reader, logger, nullptr);
+ return parseString(VariantReader, logger, nullptr);
}
/**
- * Extracts an unescaped string from the given buffered char reader
+ * Extracts an unescaped string from the given buffered char VariantReader
* instance. This function just reads text until one of the given delimiter
* characters is reached.
*
- * @param reader is a reference to the CharReader instance which is
- * the source for the character data. The reader will be positioned at the
- * terminating delimiting character.
+ * @param VariantReader is a reference to the CharReader instance which is
+ * the source for the character data. The VariantReader will be positioned
+ * at the terminating delimiting character.
* @param delims is a set of characters which will terminate the string.
* These characters are not included in the result. May not be nullptr.
*/
static std::pair<bool, std::string> parseUnescapedString(
- CharReader &reader, Logger &logger,
+ CharReader &VariantReader, Logger &logger,
const std::unordered_set<char> &delims);
/**
- * Parses an integer from the given buffered char reader instance until one
- * of the given delimiter characters is reached.
+ * Parses an integer from the given buffered char VariantReader instance
+ * until one of the given delimiter characters is reached.
*
- * @param reader is a reference to the CharReader instance from
- * which the character data should been reader. The reader will be
- * positioned at the terminating delimiting character or directly after the
- * integer.
+ * @param VariantReader is a reference to the CharReader instance from
+ * which the character data should been VariantReader. The VariantReader
+ * will be positioned at the terminating delimiting character or directly
+ * after the integer.
*/
static std::pair<bool, int64_t> parseInteger(
- CharReader &reader, Logger &logger,
+ CharReader &VariantReader, Logger &logger,
const std::unordered_set<char> &delims);
/**
- * Parses an double from the given buffered char reader instance until one
- * of the given delimiter characters is reached.
+ * Parses an double from the given buffered char VariantReader instance
+ * until one of the given delimiter characters is reached.
*
- * @param reader is a reference to the CharReader instance from
- * which the character data should been reader. The reader will be
- * positioned at the terminating delimiting character or directly after the
- * integer.
+ * @param VariantReader is a reference to the CharReader instance from
+ * which the character data should been VariantReader. The VariantReader
+ * will be positioned at the terminating delimiting character or directly
+ * after the integer.
*/
static std::pair<bool, double> parseDouble(
- CharReader &reader, Logger &logger,
+ CharReader &VariantReader, Logger &logger,
const std::unordered_set<char> &delims);
/**
* Parses an array of values.
*/
static std::pair<bool, Variant::arrayType> parseArray(
- CharReader &reader, Logger &logger, char delim = 0);
+ CharReader &VariantReader, Logger &logger, char delim = 0);
/**
* Tries to parse the most specific item from the given stream until one of
* the given delimiters is reached or a meaningful literal has been read.
* The resulting variant represents the value that has been read.
*
- * @param reader is a reference to the CharReader instance which is
- * the source for the character data. The reader will be positioned at the
- * terminating delimiting character.
+ * @param VariantReader is a reference to the CharReader instance which is
+ * the source for the character data. The VariantReader will be positioned
+ * at the terminating delimiting character.
* @param delims is a set of characters which will terminate the string.
* These characters are not included in the result. May not be nullptr.
*/
static std::pair<bool, Variant> parseGeneric(
- CharReader &reader, Logger &logger,
+ CharReader &VariantReader, Logger &logger,
const std::unordered_set<char> &delims);
};
}
-}
#endif /* _OUSIA_VARIANT_READER_HPP_ */
diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp
index 5dac956..e155cfd 100644
--- a/src/core/parser/Parser.hpp
+++ b/src/core/parser/Parser.hpp
@@ -32,10 +32,10 @@
#include <set>
#include <string>
-#include <core/Exceptions.hpp>
#include <core/Node.hpp>
-#include <core/Logger.hpp>
#include <core/Registry.hpp>
+#include <core/common/Exceptions.hpp>
+#include <core/common/Logger.hpp>
#include "Scope.hpp"
diff --git a/src/core/parser/ParserStack.cpp b/src/core/parser/ParserStack.cpp
index dca7f35..5e801ee 100644
--- a/src/core/parser/ParserStack.cpp
+++ b/src/core/parser/ParserStack.cpp
@@ -20,8 +20,8 @@
#include "ParserStack.hpp"
-#include <core/Utils.hpp>
-#include <core/Exceptions.hpp>
+#include <core/common/Utils.hpp>
+#include <core/common/Exceptions.hpp>
namespace ousia {
namespace parser {
diff --git a/src/core/parser/ParserStack.hpp b/src/core/parser/ParserStack.hpp
index c5ed4e4..233f4f9 100644
--- a/src/core/parser/ParserStack.hpp
+++ b/src/core/parser/ParserStack.hpp
@@ -37,7 +37,7 @@
#include <stack>
#include <vector>
-#include <core/variant/Variant.hpp>
+#include <core/common/Variant.hpp>
#include "Parser.hpp"
diff --git a/src/plugins/css/CSSParser.cpp b/src/plugins/css/CSSParser.cpp
index 4cbe93f..5985047 100644
--- a/src/plugins/css/CSSParser.cpp
+++ b/src/plugins/css/CSSParser.cpp
@@ -18,7 +18,7 @@
#include "CSSParser.hpp"
-#include <core/variant/Reader.hpp>
+#include <core/common/VariantReader.hpp>
namespace ousia {
namespace parser {
@@ -77,7 +77,7 @@ static const std::map<int, CodeTokenDescriptor> CSS_DESCRIPTORS = {
Rooted<Node> CSSParser::parse(std::istream &is, ParserContext &ctx)
{
- BufferedCharReader input{is};
+ CharReader input{is};
CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS};
tokenizer.ignoreComments = true;
tokenizer.ignoreLinebreaks = true;
@@ -228,14 +228,14 @@ Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer,
Variant::arrayType args;
// we require at least one argument, if parantheses are used
// XXX
- /*args.push_back(variant::Reader::parseGeneric(tokenizer.getInput(),
+ args.push_back(VariantReader::parseGeneric(tokenizer.getInput(),
ctx.logger,
- {',', ')'}).second);*/
+ {',', ')'}).second);
while (expect(COMMA, tokenizer, t, false, ctx)) {
// as long as we find commas we expect new arguments.
- /*args.push_back(
- variant::Reader::parseGeneric(
- tokenizer.getInput(), ctx.logger, {',', ')'}).second);*/
+ args.push_back(
+ VariantReader::parseGeneric(
+ tokenizer.getInput(), ctx.logger, {',', ')'}).second);
}
expect(PAREN_CLOSE, tokenizer, t, true, ctx);
// and we return with the finished Selector.
@@ -334,8 +334,8 @@ bool CSSParser::parseRule(CodeTokenizer &tokenizer, ParserContext &ctx,
expect(COLON, tokenizer, t, true, ctx);
// then the value
// TODO: Resolve key for appropriate parsing function here.
- /*value = variant::Reader::parseGeneric(tokenizer.getInput(), ctx.logger,
- {';'}).second;*/
+ value = VariantReader::parseGeneric(tokenizer.getInput(), ctx.logger,
+ {';'}).second;
// and a ;
expect(SEMICOLON, tokenizer, t, true, ctx);
return true;
diff --git a/src/plugins/css/CSSParser.hpp b/src/plugins/css/CSSParser.hpp
index 82f0cd1..eeb5b2c 100644
--- a/src/plugins/css/CSSParser.hpp
+++ b/src/plugins/css/CSSParser.hpp
@@ -22,9 +22,9 @@
#include <vector>
#include <utility>
-#include <core/BufferedCharReader.hpp>
#include <core/CodeTokenizer.hpp>
#include <core/CSS.hpp>
+#include <core/common/CharReader.hpp>
#include <core/parser/Parser.hpp>
namespace ousia {
diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp
index ce2857e..9a7b4d8 100644
--- a/src/plugins/xml/XmlParser.cpp
+++ b/src/plugins/xml/XmlParser.cpp
@@ -20,7 +20,7 @@
#include <expat.h>
-#include <core/Utils.hpp>
+#include <core/common/Utils.hpp>
#include <core/parser/ParserStack.hpp>
#include "XmlParser.hpp"