Implement hasNonWhitespaceChar, lastCharIsWhitespace and firstCharIsWhitespace methods for TokenizedData class

author: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> 2015-04-10 16:02:55 +0200
committer: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> 2016-04-25 22:24:13 +0200
commit: 5e2dee7ac9a6ecb8f1d9e5d829d82109168b5b42 (patch)
tree: 84cfc8c5369d98aa339e2b9aa81f18d40f729006 /src/core/parser
parent: 579b0e14d3bc1345512cac00a83159569cfcfa00 (diff)
2 files changed, 134 insertions, 2 deletions
diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp
index dcdcc65..276cd54 100644
--- a/src/core/parser/utils/TokenizedData.cpp
+++ b/src/core/parser/utils/TokenizedData.cpp
@@ -172,9 +172,24 @@ private:
 	uint16_t numLinebreaks;
 
 	/**
+	 * True if the reader has a non-whitespace character.
+	 */
+	bool hasNonWhitespaceChar : 1;
+
+	/**
+	 * True if the last character is a whitespace character.
+	 */
+	bool lastCharIsWhitespace : 1;
+
+	/**
+	 * True if the first character in the buffer is a whitespace character.
+	 */
+	bool firstCharIsWhitespace : 1;
+
+	/**
 	 * Flag indicating whether the internal "marks" vector is sorted.
 	 */
-	mutable bool sorted;
+	mutable bool sorted : 1;
 
 public:
 	/**
@@ -231,11 +246,18 @@ public:
 		protectedChars.push_back(protect);
 		offsets.storeOffset(offsStart, offsEnd);
 
-		// Insert special tokens
+		// Fetch information about the current character
 		const size_t size = buf.size();
 		const bool isWhitespace = Utils::isWhitespace(c);
 		const bool isLinebreak = Utils::isLinebreak(c);
 
+		// Update the whitespace flags
+		hasNonWhitespaceChar = hasNonWhitespaceChar || !isWhitespace;
+		if (size == 0 && isWhitespace) {
+			firstCharIsWhitespace = true;
+		}
+		lastCharIsWhitespace = isWhitespace;
+
 		// Handle linebreaks
 		if (isLinebreak) {
 			// Mark linebreaks as linebreak
@@ -305,6 +327,14 @@ public:
 	}
 
 	/**
+	 * Marks the whitespace character at the given buffer position as protected.
+	 *
+	 * @param bufPos is the position of the character for which the "protected"
+	 * flag should be set.
+	 */
+	void protect(size_t bufPos) { protectedChars[bufPos] = true; }
+
+	/**
 	 * Stores a token at the given position.
 	 *
 	 * @param id is the token that should be stored.
@@ -459,6 +489,9 @@ public:
 		currentIndentation = 0;
 		indentationLevels.clear();
 		numLinebreaks = 1;  // Assume the stream starts with a linebreak
+		hasNonWhitespaceChar = false;
+		lastCharIsWhitespace = false;
+		firstCharIsWhitespace = false;
 		sorted = true;
 	}
 
@@ -474,6 +507,21 @@ public:
 			buf.resize(length);
 			protectedChars.resize(length);
 			offsets.trim(length);
+
+			// Recalculate the whitespace flags
+			hasNonWhitespaceChar = false;
+			lastCharIsWhitespace = false;
+			firstCharIsWhitespace = false;
+			if (length > 0) {
+				firstCharIsWhitespace = Utils::isWhitespace(buf[0]);
+				lastCharIsWhitespace = Utils::isWhitespace(buf[length - 1]);
+				for (char c: buf) {
+					if (Utils::isWhitespace(c)) {
+						hasNonWhitespaceChar = true;
+						break;
+					}
+				}
+			}
 		}
 	}
 
@@ -504,6 +552,40 @@ public:
 		return SourceLocation{sourceId, offsets.loadOffset(0).first,
 		                      offsets.loadOffset(size()).second};
 	}
+
+	/**
+	 * Returns true if at least one non-whitespace character is stored in the
+	 * TokenizedData structure.
+	 *
+	 * @return true if the at least one character in the TokenizedData structure
+	 * is a non-whitespace character.
+	 */
+	bool getHasNonWhitespaceChar() const
+	{
+		return hasNonWhitespaceChar;
+	}
+
+	/**
+	 * Returns true if the last character of the TokenizedData structure is a
+	 * whitespace character.
+	 *
+	 * @return true if the last character is a whitespace character.
+	 */
+	bool getLastCharIsWhitespace() const
+	{
+		return lastCharIsWhitespace;
+	}
+
+	/**
+	 * Returns true if the first character of the TokenizedData structure is a
+	 * whitespace character.
+	 *
+	 * @return true if the first character is a whitespace character.
+	 */
+	bool getFirstCharIsWhitespace() const
+	{
+		return firstCharIsWhitespace;
+	}
 };
 
 /* Class TokenizedData */
@@ -565,6 +647,23 @@ TokenizedDataReader TokenizedData::reader() const
 	                           TokenizedDataCursor());
 }
 
+void TokenizedData::protect(size_t bufPos) { impl->protect(bufPos); }
+
+bool TokenizedData::hasNonWhitespaceChar() const
+{
+	return impl->getHasNonWhitespaceChar();
+}
+
+bool TokenizedData::lastCharIsWhitespace() const
+{
+	return impl->getLastCharIsWhitespace();
+}
+
+bool TokenizedData::firstCharIsWhitespace() const
+{
+	return impl->getFirstCharIsWhitespace();
+}
+
 /* Class TokenizedDataReader */
 
 TokenizedDataReader::TokenizedDataReader(
diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp
index bc937f2..95af95e 100644
--- a/src/core/parser/utils/TokenizedData.hpp
+++ b/src/core/parser/utils/TokenizedData.hpp
@@ -144,6 +144,14 @@ public:
 	              bool protect = false);
 
 	/**
+	 * Marks the whitespace character at the given buffer position as protected.
+	 *
+	 * @param bufPos is the position of the character for which the "protected"
+	 * flag should be set.
+	 */
+	void protect(size_t bufPos);
+
+	/**
 	 * Stores a token ending at the last character of the current buffer.
 	 *
 	 * @param id is the id of the token for which the mark should be stored.
@@ -208,6 +216,31 @@ public:
 	 * the internal buffer.
 	 */
 	TokenizedDataReader reader() const;
+
+	/**
+	 * Returns true if at least one non-whitespace character is stored in the
+	 * TokenizedData structure.
+	 *
+	 * @return true if the at least one character in the TokenizedData structure
+	 * is a non-whitespace character.
+	 */
+	bool hasNonWhitespaceChar() const;
+
+	/**
+	 * Returns true if the last character of the TokenizedData structure is a
+	 * whitespace character.
+	 *
+	 * @return true if the last character is a whitespace character.
+	 */
+	bool lastCharIsWhitespace() const;
+
+	/**
+	 * Returns true if the first character of the TokenizedData structure is a
+	 * whitespace character.
+	 *
+	 * @return true if the first character is a whitespace character.
+	 */
+	bool firstCharIsWhitespace() const;
 };
 
 /**
author	Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>	2015-04-10 16:02:55 +0200
committer	Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>	2016-04-25 22:24:13 +0200
commit	5e2dee7ac9a6ecb8f1d9e5d829d82109168b5b42 (patch)
tree	84cfc8c5369d98aa339e2b9aa81f18d40f729006 /src/core/parser
parent	579b0e14d3bc1345512cac00a83159569cfcfa00 (diff)