summaryrefslogtreecommitdiff
path: root/src/core/parser/utils/TokenizedData.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/parser/utils/TokenizedData.hpp')
-rw-r--r--src/core/parser/utils/TokenizedData.hpp70
1 files changed, 43 insertions, 27 deletions
diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp
index 85b80ae..b72ca02 100644
--- a/src/core/parser/utils/TokenizedData.hpp
+++ b/src/core/parser/utils/TokenizedData.hpp
@@ -36,7 +36,6 @@
#include <unordered_set>
#include <core/common/Location.hpp>
-#include <core/common/Variant.hpp>
#include <core/common/Whitespace.hpp>
#include <core/common/Token.hpp>
@@ -48,6 +47,28 @@ class TokenizedDataReader;
class TokenizedDataReaderFork;
/**
+ * Internally used structure representing a cursor within the TokenizedData
+ * stream.
+ */
+struct TokenizedDataCursor {
+ /**
+ * Position within the byte buffer.
+ */
+ size_t bufPos;
+
+ /**
+ * Position within the token mark buffer.
+ */
+ size_t markPos;
+
+ /**
+ * Default constructor. The resulting cursor points at the beginning of the
+ * stream.
+ */
+ TokenizedDataCursor() : bufPos(0), markPos(0) {}
+};
+
+/**
* The TokenizedData class stores data extracted from a user defined document.
* The data stored in TokenizedData
*/
@@ -88,10 +109,13 @@ public:
*
* @param data is the string that should be appended to the buffer.
* @param offsStart is the start offset in bytes in the input file.
+ * @param protect if set to true, the appended characters will not be
+ * affected by whitespace handling, they will be returned as is.
* @return the current size of the internal byte buffer. The returned value
* is intended to be used for the "mark" function.
*/
- size_t append(const std::string &data, SourceOffset offsStart = 0);
+ size_t append(const std::string &data, SourceOffset offsStart = 0,
+ bool protect = false);
/**
* Appends a single character to the internal character buffer.
@@ -99,10 +123,13 @@ public:
* @param c is the character that should be appended to the buffer.
* @param start is the start offset in bytes in the input file.
* @param end is the end offset in bytes in the input file.
+ * @param protect if set to true, the appended character will not be
+ * affected by whitespace handling, it will be returned as is.
* @return the current size of the internal byte buffer. The returned value
* is intended to be used for the "mark" function.
*/
- size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd);
+ size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd,
+ bool protect = false);
/**
* Stores a token ending at the last character of the current buffer.
@@ -187,15 +214,16 @@ private:
/**
* Position from which the last element was read from the internal buffer.
*/
- size_t readCursor;
+ TokenizedDataCursor readCursor;
/**
* Position from which the last element was peeked from the internal buffer.
*/
- size_t peekCursor;
+ TokenizedDataCursor peekCursor;
+protected:
/**
- * Private constructor of TokenizedDataReader, taking a reference to the
+ * Protected constructor of TokenizedDataReader, taking a reference to the
* internal TokenizedDataImpl structure storing the data that is accessed by
* the reader.
*
@@ -205,8 +233,9 @@ private:
* @param peekCursor is the cursor position from which tokens and text are
* peeked.
*/
- TokenizedDataReader(std::shared_ptr<TokenizedDataImpl> impl,
- size_t readCursor, size_t peekCursor);
+ TokenizedDataReader(std::shared_ptr<const TokenizedDataImpl> impl,
+ const TokenizedDataCursor &readCursor,
+ const TokenizedDataCursor &peekCursor);
public:
/**
@@ -237,7 +266,7 @@ public:
* false if there are no more tokens.
*/
bool read(Token &token, const TokenSet &tokens = TokenSet{},
- WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+ WhitespaceMode mode = WhitespaceMode::TRIM);
/**
* Stores the next token in the given token reference, returns true if the
@@ -253,7 +282,7 @@ public:
* false if there are no more tokens.
*/
bool peek(Token &token, const TokenSet &tokens = TokenSet{},
- WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+ WhitespaceMode mode = WhitespaceMode::TRIM);
/**
* Consumes the peeked tokens, the read cursor will now be at the position
@@ -265,20 +294,6 @@ public:
* Resets the peek cursor to the position of the read cursor.
*/
void resetPeek() { peekCursor = readCursor; }
-
- /**
- * Stores the next text token in the given token reference, returns true if
- * the operation was successful (there was indeed a text token), false if
- * the next token is not a text token or there were no more tokens.
- *
- * @param token is an output parameter into which the read token will be
- * stored. The TokenId is set to Tokens::Empty if there are no more tokens.
- * @param mode is the whitespace mode that should be used when a text token
- * is returned.
- * @return a string variant with the data if there is any data or a nullptr
- * variant if there is no text.
- */
- Variant text(WhitespaceMode mode = WhitespaceMode::COLLAPSE);
};
/**
@@ -309,8 +324,9 @@ private:
* peeked.
*/
TokenizedDataReaderFork(TokenizedDataReader &parent,
- std::shared_ptr<TokenizedDataImpl> impl,
- size_t readCursor, size_t peekCursor)
+ std::shared_ptr<const TokenizedDataImpl> impl,
+ const TokenizedDataCursor &readCursor,
+ const TokenizedDataCursor &peekCursor)
: TokenizedDataReader(impl, readCursor, peekCursor), parent(parent)
{
}
@@ -320,7 +336,7 @@ public:
* Commits the read/peek progress to the underlying parent.
*/
void commit() { parent = *this; }
-}
+};
}
#endif /* _OUSIA_TOKENIZED_DATA_HPP_ */