Change way indent and dedent special tokens are produced by TokenizedData

* Move dedent to the end of the previous line * Leave indent to the first character of the current line * Dedent is called as many times as indent
author: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> 2015-04-03 01:05:16 +0200
committer: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> 2016-04-25 22:19:30 +0200
commit: c2fb096e02aca7dd3054dc2c8260c81847d8fa1f (patch)
tree: 5511cc260ba72d2aa2656f07e9487bfb9bd84a7a
parent: e2119b28fd6b107d07923cb3bcbd667a9bdc28a5 (diff)
2 files changed, 47 insertions, 7 deletions
diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp
index d8a8b37..dcdcc65 100644
--- a/src/core/parser/utils/TokenizedData.cpp
+++ b/src/core/parser/utils/TokenizedData.cpp
@@ -161,9 +161,10 @@ private:
 	uint16_t currentIndentation;
 
 	/**
-	 * Last indentation level.
+	 * List containing the indentation levels at which a "INDENT" token was
+	 * issued. This list is used to issue the right count of "DEDENT" tokens.
 	 */
-	uint16_t lastIndentation;
+	std::vector<uint16_t> indentationLevels;
 
 	/**
 	 * Number of linebreaks without any content between them.
@@ -280,14 +281,19 @@ public:
 		if (!isWhitespace && numLinebreaks > 0) {
 			// Issue a larger indentation than that in the previous line as
 			// "Indent" token
+			size_t lastIndentation =
+			    indentationLevels.empty() ? 0 : indentationLevels.back();
 			if (currentIndentation > lastIndentation) {
+				indentationLevels.push_back(currentIndentation);
 				mark(Tokens::Indent, size - 1, 0, true);
 			}
 
 			// Issue a smaller indentation than that in the previous line as
 			// "Dedent" token
-			if (currentIndentation < lastIndentation) {
-				mark(Tokens::Dedent, size - 1, 0, true);
+			while (!indentationLevels.empty() &&
+			       currentIndentation < indentationLevels.back()) {
+				indentationLevels.pop_back();
+				mark(Tokens::Dedent, firstLinebreak, 0, true);
 			}
 
 			// Reset the internal state machine
@@ -451,7 +457,7 @@ public:
 		marks.clear();
 		firstLinebreak = 0;
 		currentIndentation = 0;
-		lastIndentation = 0;
+		indentationLevels.clear();
 		numLinebreaks = 1;  // Assume the stream starts with a linebreak
 		sorted = true;
 	}
diff --git a/test/core/parser/utils/TokenizedDataTest.cpp b/test/core/parser/utils/TokenizedDataTest.cpp
index 8488459..31346bd 100644
--- a/test/core/parser/utils/TokenizedDataTest.cpp
+++ b/test/core/parser/utils/TokenizedDataTest.cpp
@@ -355,11 +355,43 @@ TEST(TokenizedData, specialTokenIndent)
 	            10, 10);
 	assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37);
 	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
-	            38, 38);
+	            37, 37);
+	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
+	            37, 37);
 	assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43);
 	assertEnd(reader);
 }
 
+TEST(TokenizedData, specialTokenIndent2)
+{
+	TokenizedData data;
+	data.append("a\n\tb\n\t\tc\n\t\t\td\n\te\nf\n");
+	//           0 1 23 4 5 67 8 9 0 12 3 45 67 8
+	//           0                 1
+	const TokenSet tokens{Tokens::Indent, Tokens::Dedent};
+
+	TokenizedDataReader reader = data.reader();
+	assertText(reader, "a", tokens, WhitespaceMode::COLLAPSE, 0, 1);
+	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+	            3, 3);
+	assertText(reader, "b", tokens, WhitespaceMode::COLLAPSE, 3, 4);
+	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+	            7, 7);
+	assertText(reader, "c", tokens, WhitespaceMode::COLLAPSE, 7, 8);
+	assertToken(reader, Tokens::Indent, "", tokens, WhitespaceMode::COLLAPSE,
+	            12, 12);
+	assertText(reader, "d", tokens, WhitespaceMode::COLLAPSE, 12, 13);
+	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
+	            13, 13);
+	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
+	            13, 13);
+	assertText(reader, "e", tokens, WhitespaceMode::COLLAPSE, 15, 16);
+	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
+	            16, 16);
+	assertText(reader, "f", tokens, WhitespaceMode::COLLAPSE, 17, 18);
+	assertEnd(reader);
+}
+
 TEST(TokenizedData, specialTokenIndentOverlap)
 {
 	TokenizedData data;
@@ -378,7 +410,9 @@ TEST(TokenizedData, specialTokenIndentOverlap)
 	            10, 10);
 	assertText(reader, "test2 test3 test4", tokens, WhitespaceMode::COLLAPSE, 10, 37);
 	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
-	            38, 38);
+	            37, 37);
+	assertToken(reader, Tokens::Dedent, "", tokens, WhitespaceMode::COLLAPSE,
+	            37, 37);
 	assertText(reader, "test5", tokens, WhitespaceMode::COLLAPSE, 38, 43);
 	assertEnd(reader);
 }
author	Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>	2015-04-03 01:05:16 +0200
committer	Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>	2016-04-25 22:19:30 +0200
commit	c2fb096e02aca7dd3054dc2c8260c81847d8fa1f (patch)
tree	5511cc260ba72d2aa2656f07e9487bfb9bd84a7a
parent	e2119b28fd6b107d07923cb3bcbd667a9bdc28a5 (diff)