diff options
| author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-04-10 16:02:55 +0200 | 
|---|---|---|
| committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2016-04-25 22:24:13 +0200 | 
| commit | 5e2dee7ac9a6ecb8f1d9e5d829d82109168b5b42 (patch) | |
| tree | 84cfc8c5369d98aa339e2b9aa81f18d40f729006 /src | |
| parent | 579b0e14d3bc1345512cac00a83159569cfcfa00 (diff) | |
Implement hasNonWhitespaceChar, lastCharIsWhitespace and firstCharIsWhitespace methods for TokenizedData class
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/parser/utils/TokenizedData.cpp | 103 | ||||
| -rw-r--r-- | src/core/parser/utils/TokenizedData.hpp | 33 | 
2 files changed, 134 insertions, 2 deletions
| diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp index dcdcc65..276cd54 100644 --- a/src/core/parser/utils/TokenizedData.cpp +++ b/src/core/parser/utils/TokenizedData.cpp @@ -172,9 +172,24 @@ private:  	uint16_t numLinebreaks;  	/** +	 * True if the reader has a non-whitespace character. +	 */ +	bool hasNonWhitespaceChar : 1; + +	/** +	 * True if the last character is a whitespace character. +	 */ +	bool lastCharIsWhitespace : 1; + +	/** +	 * True if the first character in the buffer is a whitespace character. +	 */ +	bool firstCharIsWhitespace : 1; + +	/**  	 * Flag indicating whether the internal "marks" vector is sorted.  	 */ -	mutable bool sorted; +	mutable bool sorted : 1;  public:  	/** @@ -231,11 +246,18 @@ public:  		protectedChars.push_back(protect);  		offsets.storeOffset(offsStart, offsEnd); -		// Insert special tokens +		// Fetch information about the current character  		const size_t size = buf.size();  		const bool isWhitespace = Utils::isWhitespace(c);  		const bool isLinebreak = Utils::isLinebreak(c); +		// Update the whitespace flags +		hasNonWhitespaceChar = hasNonWhitespaceChar || !isWhitespace; +		if (size == 0 && isWhitespace) { +			firstCharIsWhitespace = true; +		} +		lastCharIsWhitespace = isWhitespace; +  		// Handle linebreaks  		if (isLinebreak) {  			// Mark linebreaks as linebreak @@ -305,6 +327,14 @@ public:  	}  	/** +	 * Marks the whitespace character at the given buffer position as protected. +	 * +	 * @param bufPos is the position of the character for which the "protected" +	 * flag should be set. +	 */ +	void protect(size_t bufPos) { protectedChars[bufPos] = true; } + +	/**  	 * Stores a token at the given position.  	 *  	 * @param id is the token that should be stored. @@ -459,6 +489,9 @@ public:  		currentIndentation = 0;  		indentationLevels.clear();  		numLinebreaks = 1;  // Assume the stream starts with a linebreak +		hasNonWhitespaceChar = false; +		lastCharIsWhitespace = false; +		firstCharIsWhitespace = false;  		sorted = true;  	} @@ -474,6 +507,21 @@ public:  			buf.resize(length);  			protectedChars.resize(length);  			offsets.trim(length); + +			// Recalculate the whitespace flags +			hasNonWhitespaceChar = false; +			lastCharIsWhitespace = false; +			firstCharIsWhitespace = false; +			if (length > 0) { +				firstCharIsWhitespace = Utils::isWhitespace(buf[0]); +				lastCharIsWhitespace = Utils::isWhitespace(buf[length - 1]); +				for (char c: buf) { +					if (Utils::isWhitespace(c)) { +						hasNonWhitespaceChar = true; +						break; +					} +				} +			}  		}  	} @@ -504,6 +552,40 @@ public:  		return SourceLocation{sourceId, offsets.loadOffset(0).first,  		                      offsets.loadOffset(size()).second};  	} + +	/** +	 * Returns true if at least one non-whitespace character is stored in the +	 * TokenizedData structure. +	 * +	 * @return true if the at least one character in the TokenizedData structure +	 * is a non-whitespace character. +	 */ +	bool getHasNonWhitespaceChar() const +	{ +		return hasNonWhitespaceChar; +	} + +	/** +	 * Returns true if the last character of the TokenizedData structure is a +	 * whitespace character. +	 * +	 * @return true if the last character is a whitespace character. +	 */ +	bool getLastCharIsWhitespace() const +	{ +		return lastCharIsWhitespace; +	} + +	/** +	 * Returns true if the first character of the TokenizedData structure is a +	 * whitespace character. +	 * +	 * @return true if the first character is a whitespace character. +	 */ +	bool getFirstCharIsWhitespace() const +	{ +		return firstCharIsWhitespace; +	}  };  /* Class TokenizedData */ @@ -565,6 +647,23 @@ TokenizedDataReader TokenizedData::reader() const  	                           TokenizedDataCursor());  } +void TokenizedData::protect(size_t bufPos) { impl->protect(bufPos); } + +bool TokenizedData::hasNonWhitespaceChar() const +{ +	return impl->getHasNonWhitespaceChar(); +} + +bool TokenizedData::lastCharIsWhitespace() const +{ +	return impl->getLastCharIsWhitespace(); +} + +bool TokenizedData::firstCharIsWhitespace() const +{ +	return impl->getFirstCharIsWhitespace(); +} +  /* Class TokenizedDataReader */  TokenizedDataReader::TokenizedDataReader( diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp index bc937f2..95af95e 100644 --- a/src/core/parser/utils/TokenizedData.hpp +++ b/src/core/parser/utils/TokenizedData.hpp @@ -144,6 +144,14 @@ public:  	              bool protect = false);  	/** +	 * Marks the whitespace character at the given buffer position as protected. +	 * +	 * @param bufPos is the position of the character for which the "protected" +	 * flag should be set. +	 */ +	void protect(size_t bufPos); + +	/**  	 * Stores a token ending at the last character of the current buffer.  	 *  	 * @param id is the id of the token for which the mark should be stored. @@ -208,6 +216,31 @@ public:  	 * the internal buffer.  	 */  	TokenizedDataReader reader() const; + +	/** +	 * Returns true if at least one non-whitespace character is stored in the +	 * TokenizedData structure. +	 * +	 * @return true if the at least one character in the TokenizedData structure +	 * is a non-whitespace character. +	 */ +	bool hasNonWhitespaceChar() const; + +	/** +	 * Returns true if the last character of the TokenizedData structure is a +	 * whitespace character. +	 * +	 * @return true if the last character is a whitespace character. +	 */ +	bool lastCharIsWhitespace() const; + +	/** +	 * Returns true if the first character of the TokenizedData structure is a +	 * whitespace character. +	 * +	 * @return true if the first character is a whitespace character. +	 */ +	bool firstCharIsWhitespace() const;  };  /** | 
