1 files changed, 169 insertions, 0 deletions
diff --git a/src/plugins/css/CodeTokenizer.cpp b/src/plugins/css/CodeTokenizer.cpp
new file mode 100644
index 0000000..d65c514
--- /dev/null
+++ b/src/plugins/css/CodeTokenizer.cpp
@@ -0,0 +1,169 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <cassert>
+
+#include "CodeTokenizer.hpp"
+
+namespace ousia {
+
+Token CodeTokenizer::constructToken(const Token &t)
+{
+	std::string content = buf.str();
+	buf.str(std::string());
+	return Token{
+	    returnTokenId, content,
+	    SourceLocation{t.location.getSourceId(), startToken.location.getStart(),
+	                   t.location.getEnd()}};
+}
+
+void CodeTokenizer::buffer(const Token &t) { buf << t.content; }
+
+bool CodeTokenizer::doPrepare(const Token &t, std::deque<Token> &peeked)
+{
+	auto it = descriptors.find(t.tokenId);
+	CodeTokenMode mode = CodeTokenMode::NONE;
+	if (it != descriptors.end()) {
+		mode = it->second.mode;
+	}
+
+	switch (state) {
+		case CodeTokenizerState::NORMAL:
+			switch (mode) {
+				case CodeTokenMode::STRING_START_END:
+					state = CodeTokenizerState::IN_STRING;
+					break;
+				case CodeTokenMode::BLOCK_COMMENT_START:
+					state = CodeTokenizerState::IN_BLOCK_COMMENT;
+					break;
+				case CodeTokenMode::LINE_COMMENT:
+					state = CodeTokenizerState::IN_LINE_COMMENT;
+					break;
+				case CodeTokenMode::LINEBREAK:
+					if (!ignoreLinebreaks) {
+						peeked.push_back(
+						    {it->second.id, t.content, t.location});
+					}
+					return !ignoreLinebreaks;
+				default:
+					bool empty = true;
+					if (t.tokenId == TOKEN_TEXT) {
+						int begin = -1;
+						for (size_t c = 0; c < t.content.length(); c++) {
+							bool isWhitespace =
+							    t.content[c] == ' ' || t.content[c] == '\t';
+							if (begin < 0) {
+								// if we have not yet set our beginning,
+								// we wait for the first
+								// non-whitespace-character to set it.
+								if (!isWhitespace) {
+									begin = c;
+								}
+							} else {
+								// if we have set our beginning, we wait for the
+								// first whitespace character, which marks the
+								// end of the current word.
+								if (isWhitespace) {
+									peeked.push_back(Token{
+									    TOKEN_TEXT,
+									    t.content.substr(begin, (int)c - begin),
+									    SourceLocation{
+									        t.location.getSourceId(),
+									        t.location.getStart() + begin,
+									        t.location.getStart() + c}});
+									begin = -1;
+									empty = false;
+								}
+							}
+						}
+						if (begin >= 0) {
+							peeked.push_back(Token{
+							    TOKEN_TEXT, t.content.substr(begin),
+							    SourceLocation{t.location.getSourceId(),
+							                   t.location.getStart() + begin,
+							                   t.location.getEnd()}});
+							empty = false;
+						}
+					} else {
+						empty = false;
+						peeked.push_back(t);
+					}
+					return !empty;
+			}
+			startToken = t;
+			returnTokenId = it->second.id;
+			return false;
+		case CodeTokenizerState::IN_LINE_COMMENT:
+			switch (mode) {
+				case CodeTokenMode::LINEBREAK:
+					state = CodeTokenizerState::NORMAL;
+					if (!ignoreComments) {
+						peeked.push_back(constructToken(t));
+					}
+					return !ignoreComments;
+				default:
+					if (!ignoreComments) {
+						buffer(t);
+					}
+					return false;
+			}
+		case CodeTokenizerState::IN_BLOCK_COMMENT:
+			switch (mode) {
+				case CodeTokenMode::BLOCK_COMMENT_END:
+					state = CodeTokenizerState::NORMAL;
+					if (!ignoreComments) {
+						peeked.push_back(constructToken(t));
+					}
+					return !ignoreComments;
+				default:
+					if (!ignoreComments) {
+						buffer(t);
+					}
+					return false;
+			}
+		case CodeTokenizerState::IN_STRING:
+			switch (mode) {
+				case CodeTokenMode::ESCAPE:
+					if (escaped) {
+						buffer(t);
+					}
+					escaped = !escaped;
+					return false;
+				case CodeTokenMode::STRING_START_END:
+					if (escaped) {
+						buffer(t);
+						escaped = false;
+						return false;
+					} else {
+						peeked.push_back(constructToken(t));
+						state = CodeTokenizerState::NORMAL;
+						return true;
+					}
+				default:
+					if (escaped) {
+						// TODO: handle escaped characters?
+						escaped = false;
+					}
+					buffer(t);
+					return false;
+			}
+	}
+	assert(false);
+	return false;
+}
+}