From 5d6ee07995c7f59e66e0df558c8ebe7d2a8d1f68 Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:52:13 +0100
Subject: refactored SyntaxDescriptor to Token.hpp and added TokenDescriptor
 class.

---
 src/core/common/Token.cpp | 14 ----------
 src/core/common/Token.hpp | 67 +----------------------------------------------
 2 files changed, 1 insertion(+), 80 deletions(-)

(limited to 'src/core/common')

diff --git a/src/core/common/Token.cpp b/src/core/common/Token.cpp
index e454ae4..17ce03e 100644
--- a/src/core/common/Token.cpp
+++ b/src/core/common/Token.cpp
@@ -20,19 +20,5 @@
 
 namespace ousia {
 
-/* Class TokenSyntaxDescriptor */
-
-void TokenSyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const
-{
-	if (start != Tokens::Empty) {
-		set.insert(start);
-	}
-	if (end != Tokens::Empty) {
-		set.insert(end);
-	}
-	if (shortForm != Tokens::Empty) {
-		set.insert(shortForm);
-	}
-}
 }
 
diff --git a/src/core/common/Token.hpp b/src/core/common/Token.hpp
index f89a0ce..f37151f 100644
--- a/src/core/common/Token.hpp
+++ b/src/core/common/Token.hpp
@@ -173,71 +173,6 @@ struct Token {
 	const SourceLocation &getLocation() const { return location; }
 };
 
-/**
- * Class describing the user defined syntax for a single field or annotation.
- */
-struct TokenSyntaxDescriptor {
-	/**
-	 * Possible start token or Tokens::Empty if no token is set.
-	 */
-	TokenId start;
-
-	/**
-	 * Possible end token or Tokens::Empty if no token is set.
-	 */
-	TokenId end;
-
-	/**
-	 * Possible representation token or Tokens::Empty if no token is set.
-	 */
-	TokenId shortForm;
-
-	/**
-	 * Flag specifying whether this TokenSyntaxDescriptor describes an
-	 * annotation.
-	 */
-	bool isAnnotation;
-
-	/**
-	 * Default constructor, sets all token ids to Tokens::Empty and isAnnotation
-	 * to false.
-	 */
-	TokenSyntaxDescriptor()
-	    : start(Tokens::Empty),
-	      end(Tokens::Empty),
-	      shortForm(Tokens::Empty),
-	      isAnnotation(false)
-	{
-	}
-
-	/**
-	 * Member initializer constructor.
-	 *
-	 * @param start is a possible start token.
-	 * @param end is a possible end token.
-	 * @param shortForm is a possible short form token.
-	 * @param isAnnotation is set to true if this syntax descriptor describes an
-	 * annotation.
-	 */
-	TokenSyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm,
-	                      bool isAnnotation)
-	    : start(start),
-	      end(end),
-	      shortForm(shortForm),
-	      isAnnotation(isAnnotation)
-	{
-	}
-
-	/**
-	 * Inserts all tokens referenced in this TokenSyntaxDescriptor into the
-	 * given TokenSet. Skips token ids set to Tokens::Empty.
-	 *
-	 * @param set is the TokenSet instance into which the Tokens should be
-	 * inserted.
-	 */
-	void insertIntoTokenSet(TokenSet &set) const;
-};
 }
 
-#endif /* _OUSIA_TOKENS_HPP_ */
-
+#endif /* _OUSIA_TOKENS_HPP_ */
\ No newline at end of file
-- 
cgit v1.2.3


From 522580cfdfc9e6dc3448240448c29533e68f240f Mon Sep 17 00:00:00 2001
From: Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de>
Date: Mon, 2 Mar 2015 15:52:34 +0100
Subject: added check for witespace characters in Utils::isUserDefinedToken

---
 src/core/common/Utils.cpp      | 15 +++++++++++----
 src/core/common/Utils.hpp      |  1 +
 test/core/common/UtilsTest.cpp |  2 ++
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'src/core/common')

diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp
index 219b437..a87ff6d 100644
--- a/src/core/common/Utils.cpp
+++ b/src/core/common/Utils.cpp
@@ -124,7 +124,8 @@ bool Utils::isUserDefinedToken(const std::string &token)
 	// Make sure the token meets is neither empty, nor starts or ends with an
 	// alphanumeric character
 	const size_t len = token.size();
-	if (len == 0 || isAlphanumeric(token[0]) || isAlphanumeric(token[len - 1])) {
+	if (len == 0 || isAlphanumeric(token[0]) ||
+	    isAlphanumeric(token[len - 1])) {
 		return false;
 	}
 
@@ -134,13 +135,19 @@ bool Utils::isUserDefinedToken(const std::string &token)
 		return false;
 	}
 
+	// Make sure the token does not contain any whitespaces.
+	for (char c : token) {
+		if (isWhitespace(c)) {
+			return false;
+		}
+	}
+
 	// Make sure the token contains other characters but { and }
-	for (char c: token) {
+	for (char c : token) {
 		if (c != '{' && c != '}') {
 			return true;
 		}
 	}
 	return false;
 }
-}
-
+}
\ No newline at end of file
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp
index 25a4de5..d9e26da 100644
--- a/src/core/common/Utils.hpp
+++ b/src/core/common/Utils.hpp
@@ -117,6 +117,7 @@ public:
 	 *        <li>'%', '%{', '}%'</li>
 	 *      </ul>
 	 *   </li>
+	 *   <li>The token does not contain any whitespaces.</li>
 	 * </ul>
 	 */
 	static bool isUserDefinedToken(const std::string &token);
diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp
index 54890ee..2aaa430 100644
--- a/test/core/common/UtilsTest.cpp
+++ b/test/core/common/UtilsTest.cpp
@@ -148,6 +148,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_TRUE(Utils::isUserDefinedToken("`"));
 	EXPECT_TRUE(Utils::isUserDefinedToken("<"));
 	EXPECT_TRUE(Utils::isUserDefinedToken(">"));
+	EXPECT_TRUE(Utils::isUserDefinedToken("<+>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("a:a"));
 	EXPECT_FALSE(Utils::isUserDefinedToken(":a"));
@@ -158,6 +159,7 @@ TEST(Utils, isUserDefinedToken)
 	EXPECT_FALSE(Utils::isUserDefinedToken("<\\"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("\\>"));
 	EXPECT_FALSE(Utils::isUserDefinedToken("{!"));
+	EXPECT_FALSE(Utils::isUserDefinedToken("< + >"));
 }
 
 }
-- 
cgit v1.2.3