From ce4fd84a714d80859aa01bbca32a81302b93c4d7 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:43:32 +0100 Subject: Moved code for handling whitespaces to own header, including the "WhitespaceMode" enum --- src/core/common/Utils.cpp | 7 ------- 1 file changed, 7 deletions(-) (limited to 'src/core/common/Utils.cpp') diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index 563fe2a..4005143 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -18,19 +18,12 @@ #include #include -#include #include #include "Utils.hpp" namespace ousia { -std::string Utils::trim(const std::string &s) -{ - std::pair bounds = trim(s, Utils::isWhitespace); - return s.substr(bounds.first, bounds.second - bounds.first); -} - bool Utils::isIdentifier(const std::string &name) { bool first = true; -- cgit v1.2.3 From 295783320ea3855a14123f9cea163f8f5f689e07 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 23:50:11 +0100 Subject: Moved some of the whitespace functionality back to Utils --- src/core/common/Utils.cpp | 25 ++++++++++++ src/core/common/Utils.hpp | 72 +++++++++++++++++++++++++++++++++++ src/core/common/Whitespace.hpp | 62 +----------------------------- src/core/common/WhitespaceHandler.hpp | 7 ++-- test/core/common/UtilsTest.cpp | 17 +++++++++ 5 files changed, 119 insertions(+), 64 deletions(-) (limited to 'src/core/common/Utils.cpp') diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index 4005143..3739c61 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -21,6 +21,7 @@ #include #include "Utils.hpp" +#include "WhitespaceHandler.hpp" namespace ousia { @@ -87,5 +88,29 @@ std::string Utils::extractFileExtension(const std::string &filename) } return std::string{}; } + +std::string Utils::trim(const std::string &s) +{ + std::pair bounds = trim(s, Utils::isWhitespace); + return s.substr(bounds.first, bounds.second - bounds.first); +} + +std::string Utils::collapse(const std::string &s) +{ + CollapsingWhitespaceHandler h; + appendToWhitespaceHandler(h, s, 0); + return h.toString(); +} + +bool Utils::startsWith(const std::string &s, const std::string &prefix) +{ + return prefix.size() <= s.size() && s.substr(0, prefix.size()) == prefix; +} + +bool Utils::endsWith(const std::string &s, const std::string &suffix) +{ + return suffix.size() <= s.size() && + s.substr(s.size() - suffix.size(), suffix.size()) == suffix; +} } diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index af7a773..16a9136 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -99,6 +99,60 @@ public: */ static bool hasNonWhitepaceChar(const std::string &s); + /** + * Removes whitespace at the beginning and the end of the given string. + * + * @param s is the string that should be trimmed. + * @return a trimmed copy of s. + */ + static std::string trim(const std::string &s); + + /** + * Trims the given string or vector of chars by returning the start and end + * index. + * + * @param s is the container that should be trimmed. + * @param f is a function that returns true for values that should be + * removed. + * @return start and end index. Note that "end" points at the character + * beyond the end, thus "end" minus "start" + */ + template + static std::pair trim(const T &s, Filter f) + { + size_t start = 0; + for (size_t i = 0; i < s.size(); i++) { + if (!f(s[i])) { + start = i; + break; + } + } + + size_t end = 0; + for (ssize_t i = s.size() - 1; i >= static_cast(start); i--) { + if (!f(s[i])) { + end = i + 1; + break; + } + } + + if (end < start) { + start = 0; + end = 0; + } + + return std::pair{start, end}; + } + + /** + * Collapses the whitespaces in the given string (trims the string and + * replaces all whitespace characters by a single one). + * + * @param s is the string in which the whitespace should be collapsed. + * @return a copy of s with collapsed whitespace. + */ + static std::string collapse(const std::string &s); + /** * Turns the elements of a collection into a string separated by the * given delimiter. @@ -159,6 +213,24 @@ public: */ static std::string extractFileExtension(const std::string &filename); + /** + * Checks whether the given string starts with the given prefix. + * + * @param s is the string. + * @param prefix is the string which should be checked for being a prefix of + * s. + */ + static bool startsWith(const std::string &s, const std::string &prefix); + + /** + * Checks whether the given string ends with the given suffix. + * + * @param s is the string. + * @param suffix is the string which should be checked for being a suffix of + * s. + */ + static bool endsWith(const std::string &s, const std::string &suffix); + /** * Hash functional to be used for enum classes. * See http://stackoverflow.com/a/24847480/2188211 diff --git a/src/core/common/Whitespace.hpp b/src/core/common/Whitespace.hpp index 1e9f36a..72a2291 100644 --- a/src/core/common/Whitespace.hpp +++ b/src/core/common/Whitespace.hpp @@ -19,8 +19,7 @@ /** * @file Whitespace.hpp * - * Contains the WhitespaceMode enum used in various places, as well es functions - * for trimming and collapsing whitespaces. + * Contains the WhitespaceMode enum used in various places. * * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) */ @@ -55,65 +54,6 @@ enum class WhitespaceMode { COLLAPSE }; -/** - * Collection of functions for trimming or collapsing whitespace. - */ -class Whitespace { - /** - * Removes whitespace at the beginning and the end of the given string. - * - * @param s is the string that should be trimmed. - * @return a trimmed copy of s. - */ - static std::string trim(const std::string &s); - - /** - * Trims the given string or vector of chars by returning the start and end - * index. - * - * @param s is the container that should be trimmed. - * @param f is a function that returns true for values that should be - * removed. - * @return start and end index. Note that "end" points at the character - * beyond the end, thus "end" minus "start" - */ - template - static std::pair trim(const T &s, Filter f) - { - size_t start = 0; - for (size_t i = 0; i < s.size(); i++) { - if (!f(s[i])) { - start = i; - break; - } - } - - size_t end = 0; - for (ssize_t i = s.size() - 1; i >= static_cast(start); i--) { - if (!f(s[i])) { - end = i + 1; - break; - } - } - - if (end < start) { - start = 0; - end = 0; - } - - return std::pair{start, end}; - } - - /** - * Collapses the whitespaces in the given string (trims the string and - * replaces all whitespace characters by a single one). - * - * @param s is the string in which the whitespace should be collapsed. - * @return a copy of s with collapsed whitespace. - */ - static std::string collapse(const std::string &s); -}; - } #endif /* _OUSIA_WHITESPACE_HPP_ */ diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp index 1935c24..79e0518 100644 --- a/src/core/common/WhitespaceHandler.hpp +++ b/src/core/common/WhitespaceHandler.hpp @@ -32,7 +32,7 @@ #include #include -#include "WhitespaceHandler.hpp" +#include "Utils.hpp" namespace ousia { @@ -76,7 +76,7 @@ public: /** * Returns the content of the WhitespaceHandler as string. */ - std::string toString() + std::string toString() const { return std::string(textBuf.data(), textBuf.size()); } @@ -214,7 +214,8 @@ inline void appendToWhitespaceHandler(WhitespaceHandler &handler, Buffer buf, size_t start) { for (auto elem : buf) { - handler.append(elem, start++); + handler.append(elem, start, start + 1); + start++; } } } diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index 6b8a916..a4bf4b2 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -65,5 +65,22 @@ TEST(Utils, extractFileExtension) ASSERT_EQ("ext", Utils::extractFileExtension("foo.bar/test.EXT")); } +TEST(Utils, startsWith) +{ + ASSERT_TRUE(Utils::startsWith("foobar", "foo")); + ASSERT_TRUE(Utils::startsWith("foo", "foo")); + ASSERT_FALSE(Utils::startsWith("foo", "foobar")); + ASSERT_FALSE(Utils::startsWith("foobar", "bar")); + ASSERT_TRUE(Utils::startsWith("foo", "")); +} + +TEST(Utils, endsWith) +{ + ASSERT_FALSE(Utils::endsWith("foobar", "foo")); + ASSERT_TRUE(Utils::endsWith("foo", "foo")); + ASSERT_FALSE(Utils::endsWith("foo", "foobar")); + ASSERT_TRUE(Utils::endsWith("foobar", "bar")); + ASSERT_TRUE(Utils::endsWith("foo", "")); +} } -- cgit v1.2.3 From b04364cdbc2144661a28f78e0aa4e5e337254c50 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:10:16 +0100 Subject: Added isNamespacedIdentifier method to Utils --- src/core/common/Utils.cpp | 15 +++++++++++++++ src/core/common/Utils.hpp | 21 ++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'src/core/common/Utils.cpp') diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index 3739c61..fc8ee00 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -40,6 +40,21 @@ bool Utils::isIdentifier(const std::string &name) return true; } +bool Utils::isNamespaceIdentifier(const std::string &name) +{ + bool first = true; + for (char c : name) { + if (first && !isIdentifierStartCharacter(c)) { + return false; + } + if (!first && (!isIdentifierCharacter(c) || c == ':')) { + return false; + } + first = (c == ':'); + } + return true; +} + bool Utils::hasNonWhitepaceChar(const std::string &s) { for (char c : s) { diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 8361973..b5cd178 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -74,10 +74,29 @@ public: } /** - * Returns true if the given character is in [A-Za-z][A-Za-z0-9_-]* + * Returns true if the given string is in + * \code{.txt} + * [A-Za-z][A-Za-z0-9_-]* + * \endCode + * + * @param name is the string that should be tested. + * @return true if the string matches the regular expression given above, + * false otherwise. */ static bool isIdentifier(const std::string &name); + /** + * Returns true if the given string is in + * \code{.txt} + * ([A-Za-z][A-Za-z0-9_-]*)(:[A-Za-z][A-Za-z0-9_-]*)* + * \endCode + * + * @param name is the string that should be tested. + * @return true if the string matches the regular expression given above, + * false otherwise. + */ + static bool isNamespacedIdentifier(const std::string &name); + /** * Returns true if the given character is a linebreak character. */ -- cgit v1.2.3 From 9acab70815a0f62bdaf2c7f01e588066b818d330 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sat, 14 Feb 2015 22:45:19 +0100 Subject: Fixed isIdentifier and isNamespacedIdentifier, added and used isIdentifierOrEmpty for use in Node --- src/core/common/Utils.cpp | 13 +++++++++---- src/core/common/Utils.hpp | 5 +++++ src/core/model/Node.cpp | 2 +- test/core/common/UtilsTest.cpp | 39 +++++++++++++++++++++++++++++++++------ 4 files changed, 48 insertions(+), 11 deletions(-) (limited to 'src/core/common/Utils.cpp') diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index fc8ee00..f8b53c6 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -37,22 +37,27 @@ bool Utils::isIdentifier(const std::string &name) } first = false; } - return true; + return !first; } -bool Utils::isNamespaceIdentifier(const std::string &name) +bool Utils::isIdentifierOrEmpty(const std::string &name) +{ + return name.empty() || isIdentifier(name); +} + +bool Utils::isNamespacedIdentifier(const std::string &name) { bool first = true; for (char c : name) { if (first && !isIdentifierStartCharacter(c)) { return false; } - if (!first && (!isIdentifierCharacter(c) || c == ':')) { + if (!first && (!isIdentifierCharacter(c) && c != ':')) { return false; } first = (c == ':'); } - return true; + return !first; } bool Utils::hasNonWhitepaceChar(const std::string &s) diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index b5cd178..b5a54fc 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -85,6 +85,11 @@ public: */ static bool isIdentifier(const std::string &name); + /** + * Returns true if the given string is an identifier or an empty string. + */ + static bool isIdentifierOrEmpty(const std::string &name); + /** * Returns true if the given string is in * \code{.txt} diff --git a/src/core/model/Node.cpp b/src/core/model/Node.cpp index 39ee2e4..ce15cad 100644 --- a/src/core/model/Node.cpp +++ b/src/core/model/Node.cpp @@ -448,7 +448,7 @@ bool Node::doValidate(Logger &logger) const { return true; } bool Node::validateName(Logger &logger) const { - if (!Utils::isIdentifier(name)) { + if (!Utils::isIdentifierOrEmpty(name)) { logger.error(type()->name + std::string(" name \"") + name + std::string("\" is not a valid identifier"), this); diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index a4bf4b2..7801296 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -24,14 +24,40 @@ namespace ousia { TEST(Utils, isIdentifier) { - ASSERT_TRUE(Utils::isIdentifier("test")); - ASSERT_TRUE(Utils::isIdentifier("t0-_est")); - ASSERT_FALSE(Utils::isIdentifier("_t0-_EST")); - ASSERT_FALSE(Utils::isIdentifier("-t0-_EST")); - ASSERT_FALSE(Utils::isIdentifier("0t-_EST")); - ASSERT_FALSE(Utils::isIdentifier("invalid key")); + EXPECT_TRUE(Utils::isIdentifier("test")); + EXPECT_TRUE(Utils::isIdentifier("t0-_est")); + EXPECT_FALSE(Utils::isIdentifier("_t0-_EST")); + EXPECT_FALSE(Utils::isIdentifier("-t0-_EST")); + EXPECT_FALSE(Utils::isIdentifier("0t-_EST")); + EXPECT_FALSE(Utils::isIdentifier("_A")); + EXPECT_FALSE(Utils::isIdentifier("invalid key")); + EXPECT_FALSE(Utils::isIdentifier("")); } + +TEST(Utils, isNamespacedIdentifier) +{ + EXPECT_TRUE(Utils::isNamespacedIdentifier("test")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("_t0-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("-t0-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("0t-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("invalid key")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("_A")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("")); + EXPECT_FALSE(Utils::isNamespacedIdentifier(":")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("test:a")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est:b")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("test:test")); + EXPECT_TRUE(Utils::isNamespacedIdentifier("t0-_est:t0-_est")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("test:_A")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("test::a")); + EXPECT_FALSE(Utils::isNamespacedIdentifier(":test")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("t0-_est:_t0-_EST")); + EXPECT_FALSE(Utils::isNamespacedIdentifier("t0-_est: b")); +} + + TEST(Utils, split) { ASSERT_EQ(std::vector({"ab"}), Utils::split("ab", '.')); @@ -82,5 +108,6 @@ TEST(Utils, endsWith) ASSERT_TRUE(Utils::endsWith("foobar", "bar")); ASSERT_TRUE(Utils::endsWith("foo", "")); } + } -- cgit v1.2.3