From 84c9abc3e9762c4486ddc5ca0352a5d697a51987 Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Wed, 25 Feb 2015 23:09:26 +0100
Subject: start of branch, commit log will be rewritten

---
 src/core/common/Utils.hpp | 53 ++++++++++++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 21 deletions(-)

(limited to 'src/core/common/Utils.hpp')
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 7d96562..82a8f8c 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -123,14 +123,6 @@ public:
 	 */
 	static bool hasNonWhitepaceChar(const std::string &s);
 
-	/**
-	 * Removes whitespace at the beginning and the end of the given string.
-	 *
-	 * @param s is the string that should be trimmed.
-	 * @return a trimmed copy of s.
-	 */
-	static std::string trim(const std::string &s);
-
 	/**
 	 * Trims the given string or vector of chars by returning the start and end
 	 * index.
@@ -153,8 +145,8 @@ public:
 	 *
 	 * @param s is the container that should be trimmed.
 	 * @param len is the number of elements in the container.
-	 * @param f is a function that returns true for values that should be
-	 * removed.
+	 * @param f is a function that returns true for values at a certain index
+	 * that should be removed.
 	 * @return start and end index. Note that "end" points at the character
 	 * beyond the end, thus "end" minus "start"
 	 */
@@ -163,7 +155,7 @@ public:
 	{
 		size_t start = 0;
 		for (size_t i = 0; i < len; i++) {
-			if (!f(s[i])) {
+			if (!f(i)) {
 				start = i;
 				break;
 			}
@@ -171,7 +163,7 @@ public:
 
 		size_t end = 0;
 		for (ssize_t i = len - 1; i >= static_cast<ssize_t>(start); i--) {
-			if (!f(s[i])) {
+			if (!f(i)) {
 				end = i + 1;
 				break;
 			}
@@ -198,16 +190,32 @@ public:
 	 * the collapsed version of the string ends.
 	 * @return start and end index. Note that "end" points at the character
 	 * beyond the end, thus "end" minus "start"
+	 * @param f is a function that returns true for values at a certain index
+	 * that should be removed.
 	 */
-	template <class T>
-	static std::string trim(const T &s, size_t len, size_t &start, size_t &end)
+	template <class T, class Filter>
+	static std::string trim(const T &s, size_t len, size_t &start, size_t &end,
+	                        Filter f)
 	{
-		auto res = trim(s, len, isWhitespace);
+		auto res = trim(s, len, f);
 		start = res.first;
 		end = res.second;
 		return std::string(&s[start], end - start);
 	}
 
+	/**
+	 * Removes whitespace at the beginning and the end of the given string.
+	 *
+	 * @param s is the string that should be trimmed.
+	 * @return a trimmed copy of s.
+	 */
+	static std::string trim(const std::string &s)
+	{
+		std::pair<size_t, size_t> bounds =
+		    trim(s, [&s](size_t i) { return isWhitespace(s[i]); });
+		return s.substr(bounds.first, bounds.second - bounds.first);
+	}
+
 	/**
 	 * Collapses the whitespaces in the given string (trims the string and
 	 * replaces all whitespace characters by a single one).
@@ -219,7 +227,8 @@ public:
 	{
 		size_t start;
 		size_t end;
-		return collapse(s, s.size(), start, end);
+		return collapse(s, s.size(), start, end,
+		                [&s](size_t i) { return isWhitespace(s[i]); });
 	}
 
 	/**
@@ -236,7 +245,8 @@ public:
 	static std::string collapse(const std::string &s, size_t &start,
 	                            size_t &end)
 	{
-		return collapse(s, s.size(), start, end);
+		return collapse(s, s.size(), start, end,
+		                [&s](size_t i) { return isWhitespace(s[i]); });
 	}
 
 	/**
@@ -244,6 +254,8 @@ public:
 	 * replaces all whitespace characters by a single one).
 	 *
 	 * @tparam T is the string type that should be used.
+	 * @tparam Filter is a filter function used for detecting the character
+	 * indices that might be removed.
 	 * @param s is the string in which the whitespace should be collapsed.
 	 * @param len is the length of the input string
 	 * @param start is an output parameter which is set to the offset at which
@@ -252,9 +264,9 @@ public:
 	 * the collapsed version of the string ends.
 	 * @return a copy of s with collapsed whitespace.
 	 */
-	template <class T>
+	template <class T, class Filter>
 	static std::string collapse(const T &s, size_t len, size_t &start,
-	                            size_t &end)
+	                            size_t &end, Filter f)
 	{
 		// Result vector
 		std::vector<char> res;
@@ -268,8 +280,7 @@ public:
 		bool hadWhitespace = false;
 		for (size_t i = 0; i < len; i++) {
 			const char c = s[i];
-			const bool whitespace = isWhitespace(c);
-			if (whitespace) {
+			if (f(i)) {
 				hadWhitespace = !res.empty();
 			} else {
 				// Adapt the start and end position
-- 
cgit v1.2.3


From 596fdab71b8bd116e20e33647d68f1d7a567696e Mon Sep 17 00:00:00 2001
From: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 00:34:15 +0100
Subject: Wrote isUserDefinedToken function which checks whether a token is a
 valid user defined token and added unit tests

---
 src/core/common/Utils.cpp      | 24 ++++++++++++++++++++++++
 src/core/common/Utils.hpp      | 19 +++++++++++++++++++
 test/core/common/UtilsTest.cpp | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 1 deletion(-)

(limited to 'src/core/common/Utils.hpp')

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 85d2c28..219b437 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -118,5 +118,29 @@ bool Utils::endsWith(const std::string &s, const std::string &suffix)
 	return suffix.size() <= s.size() &&
 	       s.substr(s.size() - suffix.size(), suffix.size()) == suffix;
 }
+
+bool Utils::isUserDefinedToken(const std::string &token)
+{
+	// Make sure the token meets is neither empty, nor starts or ends with an
+	// alphanumeric character
+	const size_t len = token.size();
+	if (len == 0 || isAlphanumeric(token[0]) || isAlphanumeric(token[len - 1])) {
+		return false;
+	}
+
+	// Make sure the token is not any special OSML token
+	if (token == "\\" || token == "%" || token == "%{" || token == "}%" ||
+	    token == "{!" || token == "<\\" || token == "\\>") {
+		return false;
+	}
+
+	// Make sure the token contains other characters but { and }
+	for (char c: token) {
+		if (c != '{' && c != '}') {
+			return true;
+		}
+	}
+	return false;
+}
 }
 
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 82a8f8c..25a4de5 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -102,6 +102,25 @@ public:
 	 */
 	static bool isNamespacedIdentifier(const std::string &name);
 
+	/**
+	 * Returns true if the given characters form a valid user-defined token.
+	 * This function returns true under the following circumstances:
+	 * <ul>
+	 *   <li>The given token is not empty</li>
+	 *   <li>The given token starts and ends with a non-alphanumeric character
+	 *       </li>
+	 *   <li>The token is none of the following character sequences (which are
+	 *       special in OSML):
+	 *      <ul>
+	 *        <li>'{', '}' or any combined repetition of these characters</li>
+	 *        <li>'\', '{!', '<\', '\>'</li>
+	 *        <li>'%', '%{', '}%'</li>
+	 *      </ul>
+	 *   </li>
+	 * </ul>
+	 */
+	static bool isUserDefinedToken(const std::string &token);
+
 	/**
 	 * Returns true if the given character is a linebreak character.
 	 */
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 4bf1587..54890ee 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -131,4 +131,33 @@ TEST(Utils, collapse)
 	ASSERT_EQ("long test", Utils::collapse("     long    test   "));
 }
 
-}
\ No newline at end of file
+TEST(Utils, isUserDefinedToken)
+{
+	EXPECT_FALSE(Utils::isUserDefinedToken(""));
+	EXPECT_FALSE(Utils::isUserDefinedToken("a"));
+	EXPECT_TRUE(Utils::isUserDefinedToken(":"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("::"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("!?"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("."));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<<"));
+	EXPECT_TRUE(Utils::isUserDefinedToken(">>"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("''"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("``"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("´´"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("´"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("`"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<"));
+	EXPECT_TRUE(Utils::isUserDefinedToken(">"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("a:"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("a:a"));
+	EXPECT_FALSE(Utils::isUserDefinedToken(":a"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{{"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("}}"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{{}{}"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("<\\"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("\\>"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("{!"));
+}
+
+}
-- 
cgit v1.2.3


From 522580cfdfc9e6dc3448240448c29533e68f240f Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:52:34 +0100
Subject: added check for witespace characters in Utils::isUserDefinedToken

---
 src/core/common/Utils.cpp      | 15 +++++++++++----
 src/core/common/Utils.hpp      |  1 +
 test/core/common/UtilsTest.cpp |  2 ++
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'src/core/common/Utils.hpp')

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 219b437..a87ff6d 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -124,7 +124,8 @@ bool Utils::isUserDefinedToken(const std::string &token)
 	// Make sure the token meets is neither empty, nor starts or ends with an
 	// alphanumeric character
 	const size_t len = token.size();
-	if (len == 0 || isAlphanumeric(token[0]) || isAlphanumeric(token[len - 1])) {
+	if (len == 0 || isAlphanumeric(token[0]) ||
+	    isAlphanumeric(token[len - 1])) {
 		return false;
 	}
 
@@ -134,13 +135,19 @@ bool Utils::isUserDefinedToken(const std::string &token)
 		return false;
 	}
 
+	// Make sure the token does not contain any whitespaces.
+	for (char c : token) {
+		if (isWhitespace(c)) {
+			return false;
+		}
+	}
+
 	// Make sure the token contains other characters but { and }
-	for (char c: token) {
+	for (char c : token) {
 		if (c != '{' && c != '}') {
 			return true;
 		}
 	}
 	return false;
 }
-}
-
+}
\ No newline at end of file
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 25a4de5..d9e26da 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -117,6 +117,7 @@ public:
 	 *        <li>'%', '%{', '}%'</li>
 	 *      </ul>
 	 *   </li>
+	 *   <li>The token does not contain any whitespaces.</li>
 	 * </ul>
 	 */
 	static bool isUserDefinedToken(const std::string &token);
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 54890ee..2aaa430 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -148,6 +148,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_TRUE(Utils::isUserDefinedToken("`"));
 	EXPECT_TRUE(Utils::isUserDefinedToken("<"));
 	EXPECT_TRUE(Utils::isUserDefinedToken(">"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<+>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:a"));
 	EXPECT_FALSE(Utils::isUserDefinedToken(":a"));
@@ -158,6 +159,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_FALSE(Utils::isUserDefinedToken("<\\"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("\\>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("{!"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("< + >"));
 }
 
 }
-- 
cgit v1.2.3