From 84c9abc3e9762c4486ddc5ca0352a5d697a51987 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Wed, 25 Feb 2015 23:09:26 +0100
Subject: start of branch, commit log will be rewritten

---
 src/core/common/SourceContextReader.cpp |   5 +-
 src/core/common/Token.cpp               |  24 +++
 src/core/common/Token.hpp               | 181 ++++++++++++++++++++
 src/core/common/Utils.cpp               |   6 -
 src/core/common/Utils.hpp               |  53 +++---
 src/core/common/WhitespaceHandler.hpp   | 284 --------------------------------
 6 files changed, 240 insertions(+), 313 deletions(-)
 create mode 100644 src/core/common/Token.cpp
 create mode 100644 src/core/common/Token.hpp
 delete mode 100644 src/core/common/WhitespaceHandler.hpp

(limited to 'src/core/common')
diff --git a/src/core/common/SourceContextReader.cpp b/src/core/common/SourceContextReader.cpp
index d5d379c..f7dbdf3 100644
--- a/src/core/common/SourceContextReader.cpp
+++ b/src/core/common/SourceContextReader.cpp
@@ -149,8 +149,9 @@ SourceContext SourceContextReader::readContext(CharReader &reader,
 	ctx.relLen = end - start;           // end >= start (I2)
 
 	// Remove linebreaks at the beginning and the end
-	const std::pair<size_t, size_t> b =
-	    Utils::trim(lineBuf, Utils::isLinebreak);
+	const std::pair<size_t, size_t> b = Utils::trim(
+	    lineBuf,
+	    [&lineBuf](size_t i) { return Utils::isLinebreak(lineBuf[i]); });
 	ssize_t s = b.first, e = b.second;
 	s = std::min(s, static_cast<ssize_t>(ctx.relPos));
 
diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp
new file mode 100644
index 0000000..8bcdbb5
--- /dev/null
+++ b/src/core/common/Token.cpp
@@ -0,0 +1,24 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Token.hpp"
+
+namespace ousia {
+// Stub to make sure Tokens.hpp is valid
+}
+
diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
new file mode 100644
index 0000000..0cf56b0
--- /dev/null
+++ b/src/core/common/Token.hpp
@@ -0,0 +1,181 @@
+/*
+    Ousía
+    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Token.hpp
+ *
+ * Definition of the TokenId id and constants for some special tokens.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_TOKEN_HPP_
+#define _OUSIA_TOKEN_HPP_
+
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <unordered_set>
+
+#include <core/common/Location.hpp>
+
+namespace ousia {
+
+/**
+ * The TokenId is used to give each token id a unique id.
+ */
+using TokenId = uint32_t;
+
+/**
+ * Type used for storing token lengths.
+ */
+using TokenLength = uint16_t;
+
+/**
+ * Type used for storing token sets.
+ */
+using TokenSet = std::unordered_set<TokenId>;
+
+/**
+ * Namespace containing constants for TokenId instances with special meaning.
+ */
+namespace Tokens {
+/**
+ * Token which is not a token.
+ */
+constexpr TokenId Empty = std::numeric_limits<TokenId>::max();
+
+/**
+ * Token which represents data (represented as TokenizedData).
+ */
+constexpr TokenId Data = std::numeric_limits<TokenId>::max() - 1;
+
+/**
+ * Token which represents a newline token.
+ */
+constexpr TokenId Newline = std::numeric_limits<TokenId>::max() - 2;
+
+/**
+ * Token which represents a paragraph token -- issued if two consecutive
+ * newlines occur with optionally any amout of whitespace between them. The
+ * paragraph token is not repeated until more text is reached.
+ */
+constexpr TokenId Paragraph = std::numeric_limits<TokenId>::max() - 3;
+
+/**
+ * Token which represents a section token -- issued if three or more
+ * consecutive newlines occur with optionally any amout of whitespace between
+ * them. The section token is not repeated until more text is reached.
+ */
+constexpr TokenId Section = std::numeric_limits<TokenId>::max() - 4;
+
+/**
+ * Token which represents an indentation token -- issued if the indentation of
+ * this line is larger than the indentation of the previous line.
+ */
+constexpr TokenId Indent = std::numeric_limits<TokenId>::max() - 5;
+
+/**
+ * Token which represents an dedentation -- issued if the indentation of
+ * this line is smaller than the indentation of the previous line.
+ */
+constexpr TokenId Dedent = std::numeric_limits<TokenId>::max() - 6;
+
+/**
+ * Maximum token id to be used. Tokens allocated for users should not surpass
+ * this value.
+ */
+constexpr TokenId MaxTokenId = std::numeric_limits<TokenId>::max() - 255;
+}
+
+/**
+ * The Token structure describes a token discovered by the Tokenizer or read
+ * from the TokenizedData struct.
+ */
+struct Token {
+	/**
+	 * Id of the id of this token.
+	 */
+	TokenId id;
+
+	/**
+	 * String that was matched.
+	 */
+	std::string content;
+
+	/**
+	 * Location from which the string was extracted.
+	 */
+	SourceLocation location;
+
+	/**
+	 * Default constructor.
+	 */
+	Token() : id(Tokens::Empty) {}
+
+	/**
+	 * Constructor of a "data" token with no explicit content.
+	 *
+	 * @param location is the location of the extracted string content in the
+	 * source file.
+	 */
+	Token(SourceLocation location)
+	    : id(Tokens::Data), location(location)
+	{
+	}
+
+	/**
+	 * Constructor of the Token struct.
+	 *
+	 * @param id represents the token id.
+	 * @param content is the string content that has been extracted.
+	 * @param location is the location of the extracted string content in the
+	 * source file.
+	 */
+	Token(TokenId id, const std::string &content, SourceLocation location)
+	    : id(id), content(content), location(location)
+	{
+	}
+
+	/**
+	 * Constructor of the Token struct, only initializes the token id
+	 *
+	 * @param id is the id corresponding to the id of the token.
+	 */
+	Token(TokenId id) : id(id) {}
+
+	/**
+	 * Returns true if this token is special.
+	 *
+	 * @return true if the TokenId indicates that this token is a "special"
+	 * token.
+	 */
+	bool isSpecial() const {return id > Tokens::MaxTokenId;}
+
+	/**
+	 * The getLocation function allows the tokens to be directly passed as
+	 * parameter to Logger or LoggableException instances.
+	 *
+	 * @return a reference at the location field
+	 */
+	const SourceLocation &getLocation() const { return location; }
+};
+}
+
+#endif /* _OUSIA_TOKENS_HPP_ */
+
diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index a77951e..85d2c28 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -108,12 +108,6 @@ std::string Utils::extractFileExtension(const std::string &filename)
 	return std::string{};
 }
 
-std::string Utils::trim(const std::string &s)
-{
-	std::pair<size_t, size_t> bounds = trim(s, Utils::isWhitespace);
-	return s.substr(bounds.first, bounds.second - bounds.first);
-}
-
 bool Utils::startsWith(const std::string &s, const std::string &prefix)
 {
 	return prefix.size() <= s.size() && s.substr(0, prefix.size()) == prefix;
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 7d96562..82a8f8c 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -123,14 +123,6 @@ public:
 	 */
 	static bool hasNonWhitepaceChar(const std::string &s);
 
-	/**
-	 * Removes whitespace at the beginning and the end of the given string.
-	 *
-	 * @param s is the string that should be trimmed.
-	 * @return a trimmed copy of s.
-	 */
-	static std::string trim(const std::string &s);
-
 	/**
 	 * Trims the given string or vector of chars by returning the start and end
 	 * index.
@@ -153,8 +145,8 @@ public:
 	 *
 	 * @param s is the container that should be trimmed.
 	 * @param len is the number of elements in the container.
-	 * @param f is a function that returns true for values that should be
-	 * removed.
+	 * @param f is a function that returns true for values at a certain index
+	 * that should be removed.
 	 * @return start and end index. Note that "end" points at the character
 	 * beyond the end, thus "end" minus "start"
 	 */
@@ -163,7 +155,7 @@ public:
 	{
 		size_t start = 0;
 		for (size_t i = 0; i < len; i++) {
-			if (!f(s[i])) {
+			if (!f(i)) {
 				start = i;
 				break;
 			}
@@ -171,7 +163,7 @@ public:
 
 		size_t end = 0;
 		for (ssize_t i = len - 1; i >= static_cast<ssize_t>(start); i--) {
-			if (!f(s[i])) {
+			if (!f(i)) {
 				end = i + 1;
 				break;
 			}
@@ -198,16 +190,32 @@ public:
 	 * the collapsed version of the string ends.
 	 * @return start and end index. Note that "end" points at the character
 	 * beyond the end, thus "end" minus "start"
+	 * @param f is a function that returns true for values at a certain index
+	 * that should be removed.
 	 */
-	template <class T>
-	static std::string trim(const T &s, size_t len, size_t &start, size_t &end)
+	template <class T, class Filter>
+	static std::string trim(const T &s, size_t len, size_t &start, size_t &end,
+	                        Filter f)
 	{
-		auto res = trim(s, len, isWhitespace);
+		auto res = trim(s, len, f);
 		start = res.first;
 		end = res.second;
 		return std::string(&s[start], end - start);
 	}
 
+	/**
+	 * Removes whitespace at the beginning and the end of the given string.
+	 *
+	 * @param s is the string that should be trimmed.
+	 * @return a trimmed copy of s.
+	 */
+	static std::string trim(const std::string &s)
+	{
+		std::pair<size_t, size_t> bounds =
+		    trim(s, [&s](size_t i) { return isWhitespace(s[i]); });
+		return s.substr(bounds.first, bounds.second - bounds.first);
+	}
+
 	/**
 	 * Collapses the whitespaces in the given string (trims the string and
 	 * replaces all whitespace characters by a single one).
@@ -219,7 +227,8 @@ public:
 	{
 		size_t start;
 		size_t end;
-		return collapse(s, s.size(), start, end);
+		return collapse(s, s.size(), start, end,
+		                [&s](size_t i) { return isWhitespace(s[i]); });
 	}
 
 	/**
@@ -236,7 +245,8 @@ public:
 	static std::string collapse(const std::string &s, size_t &start,
 	                            size_t &end)
 	{
-		return collapse(s, s.size(), start, end);
+		return collapse(s, s.size(), start, end,
+		                [&s](size_t i) { return isWhitespace(s[i]); });
 	}
 
 	/**
@@ -244,6 +254,8 @@ public:
 	 * replaces all whitespace characters by a single one).
 	 *
 	 * @tparam T is the string type that should be used.
+	 * @tparam Filter is a filter function used for detecting the character
+	 * indices that might be removed.
 	 * @param s is the string in which the whitespace should be collapsed.
 	 * @param len is the length of the input string
 	 * @param start is an output parameter which is set to the offset at which
@@ -252,9 +264,9 @@ public:
 	 * the collapsed version of the string ends.
 	 * @return a copy of s with collapsed whitespace.
 	 */
-	template <class T>
+	template <class T, class Filter>
 	static std::string collapse(const T &s, size_t len, size_t &start,
-	                            size_t &end)
+	                            size_t &end, Filter f)
 	{
 		// Result vector
 		std::vector<char> res;
@@ -268,8 +280,7 @@ public:
 		bool hadWhitespace = false;
 		for (size_t i = 0; i < len; i++) {
 			const char c = s[i];
-			const bool whitespace = isWhitespace(c);
-			if (whitespace) {
+			if (f(i)) {
 				hadWhitespace = !res.empty();
 			} else {
 				// Adapt the start and end position
diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp
deleted file mode 100644
index ed52ea3..0000000
--- a/src/core/common/WhitespaceHandler.hpp
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
-    Ousía
-    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * @file WhitespaceHandler.hpp
- *
- * Contains the WhitespaceHandler classes which are used in multiple places to
- * trim, compact or preserve whitespaces while at the same time maintaining the
- * position information associated with the input strings.
- *
- * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
- */
-
-#ifndef _OUSIA_WHITESPACE_HANDLER_HPP_
-#define _OUSIA_WHITESPACE_HANDLER_HPP_
-
-#include <string>
-#include <vector>
-
-#include "Utils.hpp"
-
-namespace ousia {
-
-/**
- * WhitespaceHandler is a based class that can be used to collect text on a
- * character-by-character basis. Note that this class and its descendants are
- * hoped to be inlined by the compiler (and used in conjunction with templates),
- * thus they are fully defined inside this header.
- */
-class WhitespaceHandler {
-public:
-	/**
-	 * Start position of the extracted text.
-	 */
-	size_t textStart;
-
-	/**
-	 * End position of the extracted text.
-	 */
-	size_t textEnd;
-
-	/**
-	 * Buffer containing the extracted text.
-	 */
-	std::vector<char> textBuf;
-
-	/**
-	 * Constructor of the TextHandlerBase base class. Initializes the start and
-	 * end position with zeros.
-	 */
-	WhitespaceHandler() : textStart(0), textEnd(0) {}
-
-	/**
-	 * Returns true if this whitespace handler has found any text and a text
-	 * token could be emitted.
-	 *
-	 * @return true if the internal data buffer is non-empty.
-	 */
-	bool hasText() { return !textBuf.empty(); }
-
-	/**
-	 * Returns the content of the WhitespaceHandler as string.
-	 */
-	std::string toString() const
-	{
-		return std::string(textBuf.data(), textBuf.size());
-	}
-};
-
-/**
- * The PreservingWhitespaceHandler class preserves all characters unmodified,
- * including whitepace characters.
- */
-class PreservingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Appends the given character to the internal text buffer, does not
-	 * eliminate whitespace.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd);
-	}
-
-	/**
-	 * Static version of PreservingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd)
-	{
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * The TrimmingTextHandler class trims all whitespace characters at the begin
- * and the end of a text section but leaves all other characters unmodified,
- * including whitepace characters.
- */
-class TrimmingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Buffer used internally to temporarily store all whitespace characters.
-	 * They are only added to the output buffer if another non-whitespace
-	 * character is reached.
-	 */
-	std::vector<char> whitespaceBuf;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * whitespace characters at the begin and end of the text.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf);
-	}
-
-	/**
-	 * Static version of TrimmingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 * @param whitespaceBuf is a reference at the buffer for storing whitespace
-	 * characters.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd, std::vector<char> &whitespaceBuf)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				whitespaceBuf.push_back(c);
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (!whitespaceBuf.empty()) {
-			textBuf.insert(textBuf.end(), whitespaceBuf.begin(),
-			               whitespaceBuf.end());
-			whitespaceBuf.clear();
-		}
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * The CollapsingTextHandler trims characters at the beginning and end of the
- * text and reduced multiple whitespace characters to a single blank.
- */
-class CollapsingWhitespaceHandler : public WhitespaceHandler {
-public:
-	/**
-	 * Flag set to true if a whitespace character was reached.
-	 */
-	bool hasWhitespace = false;
-
-	/**
-	 * Appends the given character to the internal text buffer, eliminates
-	 * redundant whitespace characters.
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 */
-	void append(char c, size_t start, size_t end)
-	{
-		append(c, start, end, textBuf, textStart, textEnd, hasWhitespace);
-	}
-
-	/**
-	 * Static version of CollapsingWhitespaceHandler append
-	 *
-	 * @param c is the character that should be appended to the internal buffer.
-	 * @param start is the start byte offset of the given character.
-	 * @param end is the end byte offset of the given character.
-	 * @param textBuf is a reference at the text buffer that is to be used.
-	 * @param textStart is a reference at the text start variable that is to be
-	 * used.
-	 * @param textEnd is a reference at the text end variable that is to be
-	 * used.
-	 * @param hasWhitespace is a reference at the "hasWhitespace" flag.
-	 */
-	static void append(char c, size_t start, size_t end,
-	                   std::vector<char> &textBuf, size_t &textStart,
-	                   size_t &textEnd, bool &hasWhitespace)
-	{
-		// Handle whitespace characters
-		if (Utils::isWhitespace(c)) {
-			if (!textBuf.empty()) {
-				hasWhitespace = true;
-			}
-			return;
-		}
-
-		// Set the start and end offset correctly
-		if (textBuf.empty()) {
-			textStart = start;
-		}
-		textEnd = end;
-
-		// Store the character
-		if (hasWhitespace) {
-			textBuf.push_back(' ');
-			hasWhitespace = false;
-		}
-		textBuf.push_back(c);
-	}
-};
-
-/**
- * Function that can be used to append the given buffer (e.g. a string or a
- * vector) to the whitespace handler.
- *
- * @tparam WhitespaceHandler is one of the WhitespaceHandler classes.
- * @tparam Buffer is an iterable type.
- * @param handler is the handler to which the characters of the Buffer should be
- * appended.
- * @param buf is the buffer from which the characters should be read.
- * @param start is the start byte offset. Each character is counted as one byte.
- */
-template <typename WhitespaceHandler, typename Buffer>
-inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf,
-                                      size_t start)
-{
-	for (auto elem : buf) {
-		handler.append(elem, start, start + 1);
-		start++;
-	}
-}
-}
-
-#endif /* _OUSIA_WHITESPACE_HANDLER_HPP_ */
-
-- 
cgit v1.2.3


From 596fdab71b8bd116e20e33647d68f1d7a567696e Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:34:15 +0100
Subject: Wrote isUserDefinedToken function which checks whether a token is a
 valid user defined token and added unit tests

---
 src/core/common/Utils.cpp      | 24 ++++++++++++++++++++++++
 src/core/common/Utils.hpp      | 19 +++++++++++++++++++
 test/core/common/UtilsTest.cpp | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 1 deletion(-)

(limited to 'src/core/common')

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 85d2c28..219b437 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -118,5 +118,29 @@ bool Utils::endsWith(const std::string &s, const std::string &suffix)
 	return suffix.size() <= s.size() &&
 	       s.substr(s.size() - suffix.size(), suffix.size()) == suffix;
 }
+
+bool Utils::isUserDefinedToken(const std::string &token)
+{
+	// Make sure the token meets is neither empty, nor starts or ends with an
+	// alphanumeric character
+	const size_t len = token.size();
+	if (len == 0 || isAlphanumeric(token[0]) || isAlphanumeric(token[len - 1])) {
+		return false;
+	}
+
+	// Make sure the token is not any special OSML token
+	if (token == "\\" || token == "%" || token == "%{" || token == "}%" ||
+	    token == "{!" || token == "<\\" || token == "\\>") {
+		return false;
+	}
+
+	// Make sure the token contains other characters but { and }
+	for (char c: token) {
+		if (c != '{' && c != '}') {
+			return true;
+		}
+	}
+	return false;
+}
 }
 
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 82a8f8c..25a4de5 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -102,6 +102,25 @@ public:
 	 */
 	static bool isNamespacedIdentifier(const std::string &name);
 
+	/**
+	 * Returns true if the given characters form a valid user-defined token.
+	 * This function returns true under the following circumstances:
+	 * <ul>
+	 *   <li>The given token is not empty</li>
+	 *   <li>The given token starts and ends with a non-alphanumeric character
+	 *       </li>
+	 *   <li>The token is none of the following character sequences (which are
+	 *       special in OSML):
+	 *      <ul>
+	 *        <li>'{', '}' or any combined repetition of these characters</li>
+	 *        <li>'\', '{!', '<\', '\>'</li>
+	 *        <li>'%', '%{', '}%'</li>
+	 *      </ul>
+	 *   </li>
+	 * </ul>
+	 */
+	static bool isUserDefinedToken(const std::string &token);
+
 	/**
 	 * Returns true if the given character is a linebreak character.
 	 */
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 4bf1587..54890ee 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -131,4 +131,33 @@ TEST(Utils, collapse)
 	ASSERT_EQ("long test", Utils::collapse("     long    test   "));
 }
 
-}
\ No newline at end of file
+TEST(Utils, isUserDefinedToken)
+{
+	EXPECT_FALSE(Utils::isUserDefinedToken(""));
+	EXPECT_FALSE(Utils::isUserDefinedToken("a"));
+	EXPECT_TRUE(Utils::isUserDefinedToken(":"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("::"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("!?"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("."));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<<"));
+	EXPECT_TRUE(Utils::isUserDefinedToken(">>"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("''"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("``"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("´´"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("´"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("`"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<"));
+	EXPECT_TRUE(Utils::isUserDefinedToken(">"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("a:"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("a:a"));
+	EXPECT_FALSE(Utils::isUserDefinedToken(":a"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{{"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("}}"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{{}{}"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("<\\"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("\\>"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{!"));
+}
+
+}
-- 
cgit v1.2.3


From 88afbcc2a4c4cb9956e4459cf1c5aa08e349835e Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:41:35 +0100
Subject: Implemented TokenSyntaxDescriptor structure

---
 src/core/common/Token.cpp | 16 ++++++++++-
 src/core/common/Token.hpp | 72 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 82 insertions(+), 6 deletions(-)

(limited to 'src/core/common')

diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp
index 8bcdbb5..e454ae4 100644
--- a/src/core/common/Token.cpp
+++ b/src/core/common/Token.cpp
@@ -19,6 +19,20 @@
 #include "Token.hpp"
 
 namespace ousia {
-// Stub to make sure Tokens.hpp is valid
+
+/* Class TokenSyntaxDescriptor */
+
+void TokenSyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const
+{
+	if (start != Tokens::Empty) {
+		set.insert(start);
+	}
+	if (end != Tokens::Empty) {
+		set.insert(end);
+	}
+	if (shortForm != Tokens::Empty) {
+		set.insert(shortForm);
+	}
+}
 }
 
diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
index 0cf56b0..f89a0ce 100644
--- a/src/core/common/Token.hpp
+++ b/src/core/common/Token.hpp
@@ -134,10 +134,7 @@ struct Token {
 	 * @param location is the location of the extracted string content in the
 	 * source file.
 	 */
-	Token(SourceLocation location)
-	    : id(Tokens::Data), location(location)
-	{
-	}
+	Token(SourceLocation location) : id(Tokens::Data), location(location) {}
 
 	/**
 	 * Constructor of the Token struct.
@@ -165,7 +162,7 @@ struct Token {
 	 * @return true if the TokenId indicates that this token is a "special"
 	 * token.
 	 */
-	bool isSpecial() const {return id > Tokens::MaxTokenId;}
+	bool isSpecial() const { return id > Tokens::MaxTokenId; }
 
 	/**
 	 * The getLocation function allows the tokens to be directly passed as
@@ -175,6 +172,71 @@ struct Token {
 	 */
 	const SourceLocation &getLocation() const { return location; }
 };
+
+/**
+ * Class describing the user defined syntax for a single field or annotation.
+ */
+struct TokenSyntaxDescriptor {
+	/**
+	 * Possible start token or Tokens::Empty if no token is set.
+	 */
+	TokenId start;
+
+	/**
+	 * Possible end token or Tokens::Empty if no token is set.
+	 */
+	TokenId end;
+
+	/**
+	 * Possible representation token or Tokens::Empty if no token is set.
+	 */
+	TokenId shortForm;
+
+	/**
+	 * Flag specifying whether this TokenSyntaxDescriptor describes an
+	 * annotation.
+	 */
+	bool isAnnotation;
+
+	/**
+	 * Default constructor, sets all token ids to Tokens::Empty and isAnnotation
+	 * to false.
+	 */
+	TokenSyntaxDescriptor()
+	    : start(Tokens::Empty),
+	      end(Tokens::Empty),
+	      shortForm(Tokens::Empty),
+	      isAnnotation(false)
+	{
+	}
+
+	/**
+	 * Member initializer constructor.
+	 *
+	 * @param start is a possible start token.
+	 * @param end is a possible end token.
+	 * @param shortForm is a possible short form token.
+	 * @param isAnnotation is set to true if this syntax descriptor describes an
+	 * annotation.
+	 */
+	TokenSyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm,
+	                      bool isAnnotation)
+	    : start(start),
+	      end(end),
+	      shortForm(shortForm),
+	      isAnnotation(isAnnotation)
+	{
+	}
+
+	/**
+	 * Inserts all tokens referenced in this TokenSyntaxDescriptor into the
+	 * given TokenSet. Skips token ids set to Tokens::Empty.
+	 *
+	 * @param set is the TokenSet instance into which the Tokens should be
+	 * inserted.
+	 */
+	void insertIntoTokenSet(TokenSet &set) const;
+};
 }
 
 #endif /* _OUSIA_TOKENS_HPP_ */
-- 
cgit v1.2.3


From 5d6ee07995c7f59e66e0df558c8ebe7d2a8d1f68 Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:52:13 +0100
Subject: refactored SyntaxDescriptor to Token.hpp and added TokenDescriptor
 class.

---
 CMakeLists.txt                       |   1 +
 src/core/common/Token.cpp            |  14 ---
 src/core/common/Token.hpp            |  67 +-----------
 src/core/model/Syntax.cpp            |  58 +++++++++++
 src/core/model/Syntax.hpp            | 196 +++++++++++++++++++++++++++++++++++
 src/core/parser/stack/Callbacks.hpp  |   3 +-
 src/core/parser/stack/Handler.cpp    |   2 +-
 src/core/parser/stack/Handler.hpp    |   3 +-
 src/core/parser/stack/TokenStack.cpp |   4 +-
 src/core/parser/stack/TokenStack.hpp |   5 +-
 10 files changed, 266 insertions(+), 87 deletions(-)
 create mode 100644 src/core/model/Syntax.cpp
 create mode 100644 src/core/model/Syntax.hpp

(limited to 'src/core/common')

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b206458..13de9ac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,6 +176,7 @@ ADD_LIBRARY(ousia_core
 	src/core/model/Project
 	src/core/model/RootNode
 	src/core/model/Style
+	src/core/model/Syntax
 	src/core/model/Typesystem
 	src/core/parser/Parser
 	src/core/parser/ParserContext
diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp
index e454ae4..17ce03e 100644
--- a/src/core/common/Token.cpp
+++ b/src/core/common/Token.cpp
@@ -20,19 +20,5 @@
 
 namespace ousia {
 
-/* Class TokenSyntaxDescriptor */
-
-void TokenSyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const
-{
-	if (start != Tokens::Empty) {
-		set.insert(start);
-	}
-	if (end != Tokens::Empty) {
-		set.insert(end);
-	}
-	if (shortForm != Tokens::Empty) {
-		set.insert(shortForm);
-	}
-}
 }
 
diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
index f89a0ce..f37151f 100644
--- a/src/core/common/Token.hpp
+++ b/src/core/common/Token.hpp
@@ -173,71 +173,6 @@ struct Token {
 	const SourceLocation &getLocation() const { return location; }
 };
 
-/**
- * Class describing the user defined syntax for a single field or annotation.
- */
-struct TokenSyntaxDescriptor {
-	/**
-	 * Possible start token or Tokens::Empty if no token is set.
-	 */
-	TokenId start;
-
-	/**
-	 * Possible end token or Tokens::Empty if no token is set.
-	 */
-	TokenId end;
-
-	/**
-	 * Possible representation token or Tokens::Empty if no token is set.
-	 */
-	TokenId shortForm;
-
-	/**
-	 * Flag specifying whether this TokenSyntaxDescriptor describes an
-	 * annotation.
-	 */
-	bool isAnnotation;
-
-	/**
-	 * Default constructor, sets all token ids to Tokens::Empty and isAnnotation
-	 * to false.
-	 */
-	TokenSyntaxDescriptor()
-	    : start(Tokens::Empty),
-	      end(Tokens::Empty),
-	      shortForm(Tokens::Empty),
-	      isAnnotation(false)
-	{
-	}
-
-	/**
-	 * Member initializer constructor.
-	 *
-	 * @param start is a possible start token.
-	 * @param end is a possible end token.
-	 * @param shortForm is a possible short form token.
-	 * @param isAnnotation is set to true if this syntax descriptor describes an
-	 * annotation.
-	 */
-	TokenSyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm,
-	                      bool isAnnotation)
-	    : start(start),
-	      end(end),
-	      shortForm(shortForm),
-	      isAnnotation(isAnnotation)
-	{
-	}
-
-	/**
-	 * Inserts all tokens referenced in this TokenSyntaxDescriptor into the
-	 * given TokenSet. Skips token ids set to Tokens::Empty.
-	 *
-	 * @param set is the TokenSet instance into which the Tokens should be
-	 * inserted.
-	 */
-	void insertIntoTokenSet(TokenSet &set) const;
-};
 }
 
-#endif /* _OUSIA_TOKENS_HPP_ */
-
+#endif /* _OUSIA_TOKENS_HPP_ */
\ No newline at end of file
diff --git a/src/core/model/Syntax.cpp b/src/core/model/Syntax.cpp
new file mode 100644
index 0000000..9dbaccc
--- /dev/null
+++ b/src/core/model/Syntax.cpp
@@ -0,0 +1,58 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Syntax.hpp"
+
+#include "Domain.hpp"
+
+namespace ousia {
+
+/* Class TokenSyntaxDescriptor */
+
+bool SyntaxDescriptor::isAnnotation() const
+{
+	return descriptor->isa(&RttiTypes::AnnotationClass);
+}
+bool SyntaxDescriptor::isFieldDescriptor() const
+{
+	return descriptor->isa(&RttiTypes::FieldDescriptor);
+}
+bool SyntaxDescriptor::isStruct() const
+{
+	return descriptor->isa(&RttiTypes::StructuredClass);
+}
+
+void SyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const
+{
+	if (start != Tokens::Empty) {
+		set.insert(start);
+	}
+	if (end != Tokens::Empty) {
+		set.insert(end);
+	}
+	if (shortForm != Tokens::Empty) {
+		set.insert(shortForm);
+	}
+}
+
+bool SyntaxDescriptor::isEmpty() const
+{
+	return start == Tokens::Empty && end == Tokens::Empty &&
+	       shortForm == Tokens::Empty;
+}
+}
\ No newline at end of file
diff --git a/src/core/model/Syntax.hpp b/src/core/model/Syntax.hpp
new file mode 100644
index 0000000..4da3408
--- /dev/null
+++ b/src/core/model/Syntax.hpp
@@ -0,0 +1,196 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Syntax.hpp
+ *
+ * This header contains the Descriptor classes for user definable syntax for
+ * Document entities or fields. These classes are referenced in Ontology.hpp.
+ */
+
+#ifndef _OUSIA_MODEL_SYNTAX_HPP_
+#define _OUSIA_MODEL_SYNTAX_HPP_
+
+#include <core/common/Token.hpp>
+#include "Node.hpp"
+
+namespace ousia {
+
+/**
+ * Class to describe a single token that shall be used as user-defined syntax.
+ */
+struct TokenDescriptor {
+	/**
+	 * The string content of this token, if it is not a special one.
+	 */
+	std::string token;
+	/**
+	 * A flag to be set true if this TokenDescriptor uses a special token.
+	 */
+	bool special;
+	/**
+	 * An id to uniquely identify this token.
+	 */
+	TokenId id;
+
+	/**
+	 * Constructor for non-special tokens. The special flag is set to false and
+	 * the id to Tokens::Empty.
+	 *
+	 * @param token The string content of this token, if it is not a special
+	 *              one.
+	 */
+	TokenDescriptor(std::string token = std::string())
+	    : token(std::move(token)), special(false), id(Tokens::Empty)
+	{
+	}
+
+	/**
+	 * Constructor for special tokens. The token is set to an empty string and
+	 * the special flag to true.
+	 *
+	 * @param id the id of the special token.
+	 */
+	TokenDescriptor(TokenId id) : special(true), id(id) {}
+
+	/**
+	 * Returns true if and only if neither a string nor an ID is given.
+	 *
+	 * @return true if and only if neither a string nor an ID is given.
+	 */
+	bool isEmpty() const { return token.empty() && id == Tokens::Empty; }
+};
+
+/**
+ * Class describing the user defined syntax for a StructuredClass,
+ * AnnotationClass or FieldDescriptor.
+ *
+ * This class is used during parsing of a Document. It is used to describe
+ * the tokens relevant for one Descriptor that could be created at this point
+ * during parsing.
+ */
+struct SyntaxDescriptor {
+	/**
+	 * Possible start token or Tokens::Empty if no token is set.
+	 */
+	TokenId start;
+
+	/**
+	 * Possible end token or Tokens::Empty if no token is set.
+	 */
+	TokenId end;
+
+	/**
+	 * Possible representation token or Tokens::Empty if no token is set.
+	 */
+	TokenId shortForm;
+
+	/*
+	 * The Descriptor this SyntaxDescriptor belongs to. As this may be
+	 * a FieldDescriptor as well as a class Descriptor (StructuredClass or
+	 * AnnotationClass) we can only use the class Node as inner argument here.
+	 */
+	Rooted<Node> descriptor;
+	/*
+	 * Given the current leaf in the parsed document the depth of a
+	 * SyntaxDescriptor is defined as the number of transparent elements that
+	 * would be needed to construct an instance of the referenced descriptor.
+	 */
+	ssize_t depth;
+
+	/**
+	 * Default constructor, sets all token ids to Tokens::Empty and the
+	 * descriptor handle to nullptr.
+	 */
+	SyntaxDescriptor()
+	    : start(Tokens::Empty),
+	      end(Tokens::Empty),
+	      shortForm(Tokens::Empty),
+	      descriptor(nullptr),
+	      depth(-1)
+	{
+	}
+
+	/**
+	 * Member initializer constructor.
+	 *
+	 * @param start is a possible start token.
+	 * @param end is a possible end token.
+	 * @param shortForm is a possible short form token.
+	 * @param descriptor The Descriptor this SyntaxDescriptor belongs to.
+	 * @param depth Given the current leaf in the parsed document the depth of a
+	 * SyntaxDescriptor is defined as the number of transparent elements that
+	 * would be needed to construct an instance of the referenced descriptor.
+	 */
+	SyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm,
+	                 Handle<Node> descriptor, ssize_t depth)
+	    : start(start),
+	      end(end),
+	      shortForm(shortForm),
+	      descriptor(descriptor),
+	      depth(depth)
+	{
+	}
+
+	/**
+	 * Inserts all tokens referenced in this SyntaxDescriptor into the
+	 * given TokenSet. Skips token ids set to Tokens::Empty.
+	 *
+	 * @param set is the TokenSet instance into which the Tokens should be
+	 * inserted.
+	 */
+	void insertIntoTokenSet(TokenSet &set) const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to an
+	 * AnnotationClass.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to an
+	 * AnnotationClass.
+	 */
+	bool isAnnotation() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to a
+	 * StrcturedClass.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to a
+	 * StrcturedClass.
+	 */
+	bool isStruct() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor belongs to a
+	 * FieldDescriptor.
+	 *
+	 * @return true if and only if this SyntaxDescriptor belongs to a
+	 * FieldDescriptor.
+	 */
+	bool isFieldDescriptor() const;
+
+	/**
+	 * Returns true if and only if this SyntaxDescriptor has only empty
+	 * entries in start, end and short.
+	 *
+	 * @return true if and only if this SyntaxDescriptor has only empty
+	 * entries in start, end and short.
+	 */
+	bool isEmpty() const;
+};
+}
+#endif
\ No newline at end of file
diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp
index d7b2547..e471881 100644
--- a/src/core/parser/stack/Callbacks.hpp
+++ b/src/core/parser/stack/Callbacks.hpp
@@ -34,6 +34,7 @@
 
 #include <core/common/Whitespace.hpp>
 #include <core/common/Token.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
 
@@ -96,7 +97,7 @@ public:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
 
 	/**
 	 * Removes the previously pushed list of tokens from the stack.
diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp
index 734976a..12df0fd 100644
--- a/src/core/parser/stack/Handler.cpp
+++ b/src/core/parser/stack/Handler.cpp
@@ -74,7 +74,7 @@ Variant Handler::readData()
 	return handlerData.callbacks.readData();
 }
 
-void Handler::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
+void Handler::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
 {
 	handlerData.callbacks.pushTokens(tokens);
 }
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 848d395..19660d0 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -24,6 +24,7 @@
 #include <core/common/Location.hpp>
 #include <core/common/Variant.hpp>
 #include <core/common/Whitespace.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
 
@@ -200,7 +201,7 @@ protected:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
 
 	/**
 	 * Calls the corresponding function in the HandlerCallbacks instance.
diff --git a/src/core/parser/stack/TokenStack.cpp b/src/core/parser/stack/TokenStack.cpp
index 6afeaed..ac1d94e 100644
--- a/src/core/parser/stack/TokenStack.cpp
+++ b/src/core/parser/stack/TokenStack.cpp
@@ -21,7 +21,7 @@
 namespace ousia {
 namespace parser_stack {
 
-void TokenStack::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
+void TokenStack::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
 {
 	stack.push_back(tokens);
 }
@@ -35,7 +35,7 @@ TokenSet TokenStack::tokens() const
 	}
 
 	TokenSet res;
-	for (const TokenSyntaxDescriptor &descr : stack.back()) {
+	for (const SyntaxDescriptor &descr : stack.back()) {
 		descr.insertIntoTokenSet(res);
 	}
 	return res;
diff --git a/src/core/parser/stack/TokenStack.hpp b/src/core/parser/stack/TokenStack.hpp
index 9669f50..af734bb 100644
--- a/src/core/parser/stack/TokenStack.hpp
+++ b/src/core/parser/stack/TokenStack.hpp
@@ -32,6 +32,7 @@
 #include <vector>
 
 #include <core/common/Token.hpp>
+#include <core/model/Syntax.hpp>
 
 namespace ousia {
 namespace parser_stack {
@@ -52,7 +53,7 @@ private:
 	 * Stack containing vectors of TokenSyntaxDescriptor instances as given by
 	 * the user.
 	 */
-	std::vector<std::vector<TokenSyntaxDescriptor>> stack;
+	std::vector<std::vector<SyntaxDescriptor>> stack;
 
 	/**
 	 * Constructor of the TokenStack class.
@@ -86,7 +87,7 @@ public:
 	 * @param tokens is a list of TokenSyntaxDescriptor instances that should be
 	 * stored on the stack.
 	 */
-	void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens);
+	void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
 
 	/**
 	 * Removes the previously pushed list of tokens from the stack.
-- 
cgit v1.2.3


From 522580cfdfc9e6dc3448240448c29533e68f240f Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:52:34 +0100
Subject: added check for witespace characters in Utils::isUserDefinedToken

---
 src/core/common/Utils.cpp      | 15 +++++++++++----
 src/core/common/Utils.hpp      |  1 +
 test/core/common/UtilsTest.cpp |  2 ++
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'src/core/common')

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 219b437..a87ff6d 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -124,7 +124,8 @@ bool Utils::isUserDefinedToken(const std::string &token)
 	// Make sure the token meets is neither empty, nor starts or ends with an
 	// alphanumeric character
 	const size_t len = token.size();
-	if (len == 0 || isAlphanumeric(token[0]) || isAlphanumeric(token[len - 1])) {
+	if (len == 0 || isAlphanumeric(token[0]) ||
+	    isAlphanumeric(token[len - 1])) {
 		return false;
 	}
 
@@ -134,13 +135,19 @@ bool Utils::isUserDefinedToken(const std::string &token)
 		return false;
 	}
 
+	// Make sure the token does not contain any whitespaces.
+	for (char c : token) {
+		if (isWhitespace(c)) {
+			return false;
+		}
+	}
+
 	// Make sure the token contains other characters but { and }
-	for (char c: token) {
+	for (char c : token) {
 		if (c != '{' && c != '}') {
 			return true;
 		}
 	}
 	return false;
 }
-}
-
+}
\ No newline at end of file
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 25a4de5..d9e26da 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -117,6 +117,7 @@ public:
 	 *        <li>'%', '%{', '}%'</li>
 	 *      </ul>
 	 *   </li>
+	 *   <li>The token does not contain any whitespaces.</li>
 	 * </ul>
 	 */
 	static bool isUserDefinedToken(const std::string &token);
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 54890ee..2aaa430 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -148,6 +148,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_TRUE(Utils::isUserDefinedToken("`"));
 	EXPECT_TRUE(Utils::isUserDefinedToken("<"));
 	EXPECT_TRUE(Utils::isUserDefinedToken(">"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<+>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:a"));
 	EXPECT_FALSE(Utils::isUserDefinedToken(":a"));
@@ -158,6 +159,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_FALSE(Utils::isUserDefinedToken("<\\"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("\\>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("{!"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("< + >"));
 }
 
 }
-- 
cgit v1.2.3


From e31968c9e073c64cf718fbcaebbc83ee2bee48c8 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 18:09:34 +0100
Subject: Added additional constructor to Token

---
 src/core/common/Token.hpp | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'src/core/common')

diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
index f37151f..4b56f1a 100644
--- a/src/core/common/Token.hpp
+++ b/src/core/common/Token.hpp
@@ -134,7 +134,9 @@ struct Token {
 	 * @param location is the location of the extracted string content in the
 	 * source file.
 	 */
-	Token(SourceLocation location) : id(Tokens::Data), location(location) {}
+	Token(const SourceLocation &location) : id(Tokens::Data), location(location)
+	{
+	}
 
 	/**
 	 * Constructor of the Token struct.
@@ -144,11 +146,25 @@ struct Token {
 	 * @param location is the location of the extracted string content in the
 	 * source file.
 	 */
-	Token(TokenId id, const std::string &content, SourceLocation location)
+	Token(TokenId id, const std::string &content,
+	      const SourceLocation &location)
 	    : id(id), content(content), location(location)
 	{
 	}
 
+	/**
+	 * Constructor of the a "data" Token with the given string data and
+	 * location.
+	 *
+	 * @param content is the string content that should be stored in the token.
+	 * @param location is the location of the content within the source file.
+	 */
+	Token(const std::string &content,
+	      const SourceLocation &location = SourceLocation{})
+	    : id(Tokens::Data), content(content), location(location)
+	{
+	}
+
 	/**
 	 * Constructor of the Token struct, only initializes the token id
 	 *
@@ -172,7 +188,6 @@ struct Token {
 	 */
 	const SourceLocation &getLocation() const { return location; }
 };
-
 }
 
-#endif /* _OUSIA_TOKENS_HPP_ */
\ No newline at end of file
+#endif /* _OUSIA_TOKENS_HPP_ */
-- 
cgit v1.2.3