diff options
Diffstat (limited to 'src/core/parser/utils/TokenizedData.hpp')
-rw-r--r-- | src/core/parser/utils/TokenizedData.hpp | 70 |
1 files changed, 43 insertions, 27 deletions
diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp index 85b80ae..b72ca02 100644 --- a/src/core/parser/utils/TokenizedData.hpp +++ b/src/core/parser/utils/TokenizedData.hpp @@ -36,7 +36,6 @@ #include <unordered_set> #include <core/common/Location.hpp> -#include <core/common/Variant.hpp> #include <core/common/Whitespace.hpp> #include <core/common/Token.hpp> @@ -48,6 +47,28 @@ class TokenizedDataReader; class TokenizedDataReaderFork; /** + * Internally used structure representing a cursor within the TokenizedData + * stream. + */ +struct TokenizedDataCursor { + /** + * Position within the byte buffer. + */ + size_t bufPos; + + /** + * Position within the token mark buffer. + */ + size_t markPos; + + /** + * Default constructor. The resulting cursor points at the beginning of the + * stream. + */ + TokenizedDataCursor() : bufPos(0), markPos(0) {} +}; + +/** * The TokenizedData class stores data extracted from a user defined document. * The data stored in TokenizedData */ @@ -88,10 +109,13 @@ public: * * @param data is the string that should be appended to the buffer. * @param offsStart is the start offset in bytes in the input file. + * @param protect if set to true, the appended characters will not be + * affected by whitespace handling, they will be returned as is. * @return the current size of the internal byte buffer. The returned value * is intended to be used for the "mark" function. */ - size_t append(const std::string &data, SourceOffset offsStart = 0); + size_t append(const std::string &data, SourceOffset offsStart = 0, + bool protect = false); /** * Appends a single character to the internal character buffer. @@ -99,10 +123,13 @@ public: * @param c is the character that should be appended to the buffer. * @param start is the start offset in bytes in the input file. * @param end is the end offset in bytes in the input file. + * @param protect if set to true, the appended character will not be + * affected by whitespace handling, it will be returned as is. * @return the current size of the internal byte buffer. The returned value * is intended to be used for the "mark" function. */ - size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd); + size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd, + bool protect = false); /** * Stores a token ending at the last character of the current buffer. @@ -187,15 +214,16 @@ private: /** * Position from which the last element was read from the internal buffer. */ - size_t readCursor; + TokenizedDataCursor readCursor; /** * Position from which the last element was peeked from the internal buffer. */ - size_t peekCursor; + TokenizedDataCursor peekCursor; +protected: /** - * Private constructor of TokenizedDataReader, taking a reference to the + * Protected constructor of TokenizedDataReader, taking a reference to the * internal TokenizedDataImpl structure storing the data that is accessed by * the reader. * @@ -205,8 +233,9 @@ private: * @param peekCursor is the cursor position from which tokens and text are * peeked. */ - TokenizedDataReader(std::shared_ptr<TokenizedDataImpl> impl, - size_t readCursor, size_t peekCursor); + TokenizedDataReader(std::shared_ptr<const TokenizedDataImpl> impl, + const TokenizedDataCursor &readCursor, + const TokenizedDataCursor &peekCursor); public: /** @@ -237,7 +266,7 @@ public: * false if there are no more tokens. */ bool read(Token &token, const TokenSet &tokens = TokenSet{}, - WhitespaceMode mode = WhitespaceMode::COLLAPSE); + WhitespaceMode mode = WhitespaceMode::TRIM); /** * Stores the next token in the given token reference, returns true if the @@ -253,7 +282,7 @@ public: * false if there are no more tokens. */ bool peek(Token &token, const TokenSet &tokens = TokenSet{}, - WhitespaceMode mode = WhitespaceMode::COLLAPSE); + WhitespaceMode mode = WhitespaceMode::TRIM); /** * Consumes the peeked tokens, the read cursor will now be at the position @@ -265,20 +294,6 @@ public: * Resets the peek cursor to the position of the read cursor. */ void resetPeek() { peekCursor = readCursor; } - - /** - * Stores the next text token in the given token reference, returns true if - * the operation was successful (there was indeed a text token), false if - * the next token is not a text token or there were no more tokens. - * - * @param token is an output parameter into which the read token will be - * stored. The TokenId is set to Tokens::Empty if there are no more tokens. - * @param mode is the whitespace mode that should be used when a text token - * is returned. - * @return a string variant with the data if there is any data or a nullptr - * variant if there is no text. - */ - Variant text(WhitespaceMode mode = WhitespaceMode::COLLAPSE); }; /** @@ -309,8 +324,9 @@ private: * peeked. */ TokenizedDataReaderFork(TokenizedDataReader &parent, - std::shared_ptr<TokenizedDataImpl> impl, - size_t readCursor, size_t peekCursor) + std::shared_ptr<const TokenizedDataImpl> impl, + const TokenizedDataCursor &readCursor, + const TokenizedDataCursor &peekCursor) : TokenizedDataReader(impl, readCursor, peekCursor), parent(parent) { } @@ -320,7 +336,7 @@ public: * Commits the read/peek progress to the underlying parent. */ void commit() { parent = *this; } -} +}; } #endif /* _OUSIA_TOKENIZED_DATA_HPP_ */ |