From 6d59aa6bff5ee69ca8afaaf34227a33f0fb9d145 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 22 Feb 2015 22:59:18 +0100 Subject: Moved implementation of collapse to Utils, providing same interface for collapse and trim --- src/core/common/Utils.cpp | 8 ---- src/core/common/Utils.hpp | 106 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 102 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index f8b53c6..a77951e 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -21,7 +21,6 @@ #include #include "Utils.hpp" -#include "WhitespaceHandler.hpp" namespace ousia { @@ -115,13 +114,6 @@ std::string Utils::trim(const std::string &s) return s.substr(bounds.first, bounds.second - bounds.first); } -std::string Utils::collapse(const std::string &s) -{ - CollapsingWhitespaceHandler h; - appendToWhitespaceHandler(h, s, 0); - return h.toString(); -} - bool Utils::startsWith(const std::string &s, const std::string &prefix) { return prefix.size() <= s.size() && s.substr(0, prefix.size()) == prefix; diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index b5a54fc..7d96562 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -80,7 +80,7 @@ public: * \endCode * * @param name is the string that should be tested. - * @return true if the string matches the regular expression given above, + * @return true if the string matches the regular expression given above, * false otherwise. */ static bool isIdentifier(const std::string &name); @@ -97,7 +97,7 @@ public: * \endCode * * @param name is the string that should be tested. - * @return true if the string matches the regular expression given above, + * @return true if the string matches the regular expression given above, * false otherwise. */ static bool isNamespacedIdentifier(const std::string &name); @@ -185,14 +185,113 @@ public: return std::pair{start, end}; } + /** + * Trims the given string and returns both the trimmed string and the start + * and end location. + * + * @tparam T is the string type that should be used. + * @param s is the container that should be trimmed. + * @param len is the number of elements in the container. + * @param start is an output parameter which is set to the offset at which + * the collapsed version of the string starts. + * @param end is an output parameter which is set to the offset at which + * the collapsed version of the string ends. + * @return start and end index. Note that "end" points at the character + * beyond the end, thus "end" minus "start" + */ + template + static std::string trim(const T &s, size_t len, size_t &start, size_t &end) + { + auto res = trim(s, len, isWhitespace); + start = res.first; + end = res.second; + return std::string(&s[start], end - start); + } + + /** + * Collapses the whitespaces in the given string (trims the string and + * replaces all whitespace characters by a single one). + * + * @param s is the string in which the whitespace should be collapsed. + * @return a copy of s with collapsed whitespace. + */ + static std::string collapse(const std::string &s) + { + size_t start; + size_t end; + return collapse(s, s.size(), start, end); + } + + /** + * Collapses the whitespaces in the given string (trims the string and + * replaces all whitespace characters by a single one). + * + * @param s is the string in which the whitespace should be collapsed. + * @param start is an output parameter which is set to the offset at which + * the collapsed version of the string starts. + * @param end is an output parameter which is set to the offset at which + * the collapsed version of the string ends. + * @return a copy of s with collapsed whitespace. + */ + static std::string collapse(const std::string &s, size_t &start, + size_t &end) + { + return collapse(s, s.size(), start, end); + } + /** * Collapses the whitespaces in the given string (trims the string and * replaces all whitespace characters by a single one). * + * @tparam T is the string type that should be used. * @param s is the string in which the whitespace should be collapsed. + * @param len is the length of the input string + * @param start is an output parameter which is set to the offset at which + * the collapsed version of the string starts. + * @param end is an output parameter which is set to the offset at which + * the collapsed version of the string ends. * @return a copy of s with collapsed whitespace. */ - static std::string collapse(const std::string &s); + template + static std::string collapse(const T &s, size_t len, size_t &start, + size_t &end) + { + // Result vector + std::vector res; + + // Initialize the output arguments + start = 0; + end = 0; + + // Iterate over the input string and replace all whitespace sequences by + // a single space + bool hadWhitespace = false; + for (size_t i = 0; i < len; i++) { + const char c = s[i]; + const bool whitespace = isWhitespace(c); + if (whitespace) { + hadWhitespace = !res.empty(); + } else { + // Adapt the start and end position + if (res.empty()) { + start = i; + } + end = i + 1; + + // Insert a space character if there was a whitespace + if (hadWhitespace) { + res.push_back(' '); + hadWhitespace = false; + } + + // Insert the character + res.push_back(c); + } + } + + // Return the result vector as string + return std::string(res.data(), res.size()); + } /** * Turns the elements of a collection into a string separated by the @@ -287,4 +386,3 @@ public: } #endif /* _OUSIA_UTILS_H_ */ - -- cgit v1.2.3