summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-04-10 16:02:55 +0200
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2016-04-25 22:24:13 +0200
commit5e2dee7ac9a6ecb8f1d9e5d829d82109168b5b42 (patch)
tree84cfc8c5369d98aa339e2b9aa81f18d40f729006 /src/core
parent579b0e14d3bc1345512cac00a83159569cfcfa00 (diff)
Implement hasNonWhitespaceChar, lastCharIsWhitespace and firstCharIsWhitespace methods for TokenizedData class
Diffstat (limited to 'src/core')
-rw-r--r--src/core/parser/utils/TokenizedData.cpp103
-rw-r--r--src/core/parser/utils/TokenizedData.hpp33
2 files changed, 134 insertions, 2 deletions
diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp
index dcdcc65..276cd54 100644
--- a/src/core/parser/utils/TokenizedData.cpp
+++ b/src/core/parser/utils/TokenizedData.cpp
@@ -172,9 +172,24 @@ private:
uint16_t numLinebreaks;
/**
+ * True if the reader has a non-whitespace character.
+ */
+ bool hasNonWhitespaceChar : 1;
+
+ /**
+ * True if the last character is a whitespace character.
+ */
+ bool lastCharIsWhitespace : 1;
+
+ /**
+ * True if the first character in the buffer is a whitespace character.
+ */
+ bool firstCharIsWhitespace : 1;
+
+ /**
* Flag indicating whether the internal "marks" vector is sorted.
*/
- mutable bool sorted;
+ mutable bool sorted : 1;
public:
/**
@@ -231,11 +246,18 @@ public:
protectedChars.push_back(protect);
offsets.storeOffset(offsStart, offsEnd);
- // Insert special tokens
+ // Fetch information about the current character
const size_t size = buf.size();
const bool isWhitespace = Utils::isWhitespace(c);
const bool isLinebreak = Utils::isLinebreak(c);
+ // Update the whitespace flags
+ hasNonWhitespaceChar = hasNonWhitespaceChar || !isWhitespace;
+ if (size == 0 && isWhitespace) {
+ firstCharIsWhitespace = true;
+ }
+ lastCharIsWhitespace = isWhitespace;
+
// Handle linebreaks
if (isLinebreak) {
// Mark linebreaks as linebreak
@@ -305,6 +327,14 @@ public:
}
/**
+ * Marks the whitespace character at the given buffer position as protected.
+ *
+ * @param bufPos is the position of the character for which the "protected"
+ * flag should be set.
+ */
+ void protect(size_t bufPos) { protectedChars[bufPos] = true; }
+
+ /**
* Stores a token at the given position.
*
* @param id is the token that should be stored.
@@ -459,6 +489,9 @@ public:
currentIndentation = 0;
indentationLevels.clear();
numLinebreaks = 1; // Assume the stream starts with a linebreak
+ hasNonWhitespaceChar = false;
+ lastCharIsWhitespace = false;
+ firstCharIsWhitespace = false;
sorted = true;
}
@@ -474,6 +507,21 @@ public:
buf.resize(length);
protectedChars.resize(length);
offsets.trim(length);
+
+ // Recalculate the whitespace flags
+ hasNonWhitespaceChar = false;
+ lastCharIsWhitespace = false;
+ firstCharIsWhitespace = false;
+ if (length > 0) {
+ firstCharIsWhitespace = Utils::isWhitespace(buf[0]);
+ lastCharIsWhitespace = Utils::isWhitespace(buf[length - 1]);
+ for (char c: buf) {
+ if (Utils::isWhitespace(c)) {
+ hasNonWhitespaceChar = true;
+ break;
+ }
+ }
+ }
}
}
@@ -504,6 +552,40 @@ public:
return SourceLocation{sourceId, offsets.loadOffset(0).first,
offsets.loadOffset(size()).second};
}
+
+ /**
+ * Returns true if at least one non-whitespace character is stored in the
+ * TokenizedData structure.
+ *
+ * @return true if the at least one character in the TokenizedData structure
+ * is a non-whitespace character.
+ */
+ bool getHasNonWhitespaceChar() const
+ {
+ return hasNonWhitespaceChar;
+ }
+
+ /**
+ * Returns true if the last character of the TokenizedData structure is a
+ * whitespace character.
+ *
+ * @return true if the last character is a whitespace character.
+ */
+ bool getLastCharIsWhitespace() const
+ {
+ return lastCharIsWhitespace;
+ }
+
+ /**
+ * Returns true if the first character of the TokenizedData structure is a
+ * whitespace character.
+ *
+ * @return true if the first character is a whitespace character.
+ */
+ bool getFirstCharIsWhitespace() const
+ {
+ return firstCharIsWhitespace;
+ }
};
/* Class TokenizedData */
@@ -565,6 +647,23 @@ TokenizedDataReader TokenizedData::reader() const
TokenizedDataCursor());
}
+void TokenizedData::protect(size_t bufPos) { impl->protect(bufPos); }
+
+bool TokenizedData::hasNonWhitespaceChar() const
+{
+ return impl->getHasNonWhitespaceChar();
+}
+
+bool TokenizedData::lastCharIsWhitespace() const
+{
+ return impl->getLastCharIsWhitespace();
+}
+
+bool TokenizedData::firstCharIsWhitespace() const
+{
+ return impl->getFirstCharIsWhitespace();
+}
+
/* Class TokenizedDataReader */
TokenizedDataReader::TokenizedDataReader(
diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp
index bc937f2..95af95e 100644
--- a/src/core/parser/utils/TokenizedData.hpp
+++ b/src/core/parser/utils/TokenizedData.hpp
@@ -144,6 +144,14 @@ public:
bool protect = false);
/**
+ * Marks the whitespace character at the given buffer position as protected.
+ *
+ * @param bufPos is the position of the character for which the "protected"
+ * flag should be set.
+ */
+ void protect(size_t bufPos);
+
+ /**
* Stores a token ending at the last character of the current buffer.
*
* @param id is the id of the token for which the mark should be stored.
@@ -208,6 +216,31 @@ public:
* the internal buffer.
*/
TokenizedDataReader reader() const;
+
+ /**
+ * Returns true if at least one non-whitespace character is stored in the
+ * TokenizedData structure.
+ *
+ * @return true if the at least one character in the TokenizedData structure
+ * is a non-whitespace character.
+ */
+ bool hasNonWhitespaceChar() const;
+
+ /**
+ * Returns true if the last character of the TokenizedData structure is a
+ * whitespace character.
+ *
+ * @return true if the last character is a whitespace character.
+ */
+ bool lastCharIsWhitespace() const;
+
+ /**
+ * Returns true if the first character of the TokenizedData structure is a
+ * whitespace character.
+ *
+ * @return true if the first character is a whitespace character.
+ */
+ bool firstCharIsWhitespace() const;
};
/**