diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-04-10 16:02:55 +0200 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2016-04-25 22:24:13 +0200 |
commit | 5e2dee7ac9a6ecb8f1d9e5d829d82109168b5b42 (patch) | |
tree | 84cfc8c5369d98aa339e2b9aa81f18d40f729006 /src/core/parser | |
parent | 579b0e14d3bc1345512cac00a83159569cfcfa00 (diff) |
Implement hasNonWhitespaceChar, lastCharIsWhitespace and firstCharIsWhitespace methods for TokenizedData class
Diffstat (limited to 'src/core/parser')
-rw-r--r-- | src/core/parser/utils/TokenizedData.cpp | 103 | ||||
-rw-r--r-- | src/core/parser/utils/TokenizedData.hpp | 33 |
2 files changed, 134 insertions, 2 deletions
diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp index dcdcc65..276cd54 100644 --- a/src/core/parser/utils/TokenizedData.cpp +++ b/src/core/parser/utils/TokenizedData.cpp @@ -172,9 +172,24 @@ private: uint16_t numLinebreaks; /** + * True if the reader has a non-whitespace character. + */ + bool hasNonWhitespaceChar : 1; + + /** + * True if the last character is a whitespace character. + */ + bool lastCharIsWhitespace : 1; + + /** + * True if the first character in the buffer is a whitespace character. + */ + bool firstCharIsWhitespace : 1; + + /** * Flag indicating whether the internal "marks" vector is sorted. */ - mutable bool sorted; + mutable bool sorted : 1; public: /** @@ -231,11 +246,18 @@ public: protectedChars.push_back(protect); offsets.storeOffset(offsStart, offsEnd); - // Insert special tokens + // Fetch information about the current character const size_t size = buf.size(); const bool isWhitespace = Utils::isWhitespace(c); const bool isLinebreak = Utils::isLinebreak(c); + // Update the whitespace flags + hasNonWhitespaceChar = hasNonWhitespaceChar || !isWhitespace; + if (size == 0 && isWhitespace) { + firstCharIsWhitespace = true; + } + lastCharIsWhitespace = isWhitespace; + // Handle linebreaks if (isLinebreak) { // Mark linebreaks as linebreak @@ -305,6 +327,14 @@ public: } /** + * Marks the whitespace character at the given buffer position as protected. + * + * @param bufPos is the position of the character for which the "protected" + * flag should be set. + */ + void protect(size_t bufPos) { protectedChars[bufPos] = true; } + + /** * Stores a token at the given position. * * @param id is the token that should be stored. @@ -459,6 +489,9 @@ public: currentIndentation = 0; indentationLevels.clear(); numLinebreaks = 1; // Assume the stream starts with a linebreak + hasNonWhitespaceChar = false; + lastCharIsWhitespace = false; + firstCharIsWhitespace = false; sorted = true; } @@ -474,6 +507,21 @@ public: buf.resize(length); protectedChars.resize(length); offsets.trim(length); + + // Recalculate the whitespace flags + hasNonWhitespaceChar = false; + lastCharIsWhitespace = false; + firstCharIsWhitespace = false; + if (length > 0) { + firstCharIsWhitespace = Utils::isWhitespace(buf[0]); + lastCharIsWhitespace = Utils::isWhitespace(buf[length - 1]); + for (char c: buf) { + if (Utils::isWhitespace(c)) { + hasNonWhitespaceChar = true; + break; + } + } + } } } @@ -504,6 +552,40 @@ public: return SourceLocation{sourceId, offsets.loadOffset(0).first, offsets.loadOffset(size()).second}; } + + /** + * Returns true if at least one non-whitespace character is stored in the + * TokenizedData structure. + * + * @return true if the at least one character in the TokenizedData structure + * is a non-whitespace character. + */ + bool getHasNonWhitespaceChar() const + { + return hasNonWhitespaceChar; + } + + /** + * Returns true if the last character of the TokenizedData structure is a + * whitespace character. + * + * @return true if the last character is a whitespace character. + */ + bool getLastCharIsWhitespace() const + { + return lastCharIsWhitespace; + } + + /** + * Returns true if the first character of the TokenizedData structure is a + * whitespace character. + * + * @return true if the first character is a whitespace character. + */ + bool getFirstCharIsWhitespace() const + { + return firstCharIsWhitespace; + } }; /* Class TokenizedData */ @@ -565,6 +647,23 @@ TokenizedDataReader TokenizedData::reader() const TokenizedDataCursor()); } +void TokenizedData::protect(size_t bufPos) { impl->protect(bufPos); } + +bool TokenizedData::hasNonWhitespaceChar() const +{ + return impl->getHasNonWhitespaceChar(); +} + +bool TokenizedData::lastCharIsWhitespace() const +{ + return impl->getLastCharIsWhitespace(); +} + +bool TokenizedData::firstCharIsWhitespace() const +{ + return impl->getFirstCharIsWhitespace(); +} + /* Class TokenizedDataReader */ TokenizedDataReader::TokenizedDataReader( diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp index bc937f2..95af95e 100644 --- a/src/core/parser/utils/TokenizedData.hpp +++ b/src/core/parser/utils/TokenizedData.hpp @@ -144,6 +144,14 @@ public: bool protect = false); /** + * Marks the whitespace character at the given buffer position as protected. + * + * @param bufPos is the position of the character for which the "protected" + * flag should be set. + */ + void protect(size_t bufPos); + + /** * Stores a token ending at the last character of the current buffer. * * @param id is the id of the token for which the mark should be stored. @@ -208,6 +216,31 @@ public: * the internal buffer. */ TokenizedDataReader reader() const; + + /** + * Returns true if at least one non-whitespace character is stored in the + * TokenizedData structure. + * + * @return true if the at least one character in the TokenizedData structure + * is a non-whitespace character. + */ + bool hasNonWhitespaceChar() const; + + /** + * Returns true if the last character of the TokenizedData structure is a + * whitespace character. + * + * @return true if the last character is a whitespace character. + */ + bool lastCharIsWhitespace() const; + + /** + * Returns true if the first character of the TokenizedData structure is a + * whitespace character. + * + * @return true if the first character is a whitespace character. + */ + bool firstCharIsWhitespace() const; }; /** |