/* Ousía Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef _OUSIA_UTILS_H_ #define _OUSIA_UTILS_H_ #include #include #include namespace ousia { class Utils { public: /** * Returns true if the given character is in [A-Za-z] */ static bool isAlphabetic(const char c) { return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); } /** * Returns true if the given character is in [0-9] */ static bool isNumeric(const char c) { return (c >= '0') && (c <= '9'); } /** * Returns true if the given character is in [0-9A-Fa-f] */ static bool isHexadecimal(const char c) { return ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'F')) || ((c >= 'a') && (c <= 'f')); } /** * Returns true if the given character is in [A-Za-z0-9] */ static bool isAlphanumeric(const char c) { return isAlphabetic(c) || isNumeric(c); } /** * Returns true if the given character is in [A-Za-z]. */ static bool isIdentifierStartCharacter(const char c) { return isAlphabetic(c); } /** * Returns true if the given character is in [A-Za-z0-9_-]. */ static bool isIdentifierCharacter(const char c) { return isAlphanumeric(c) || (c == '_') || (c == '-'); } /** * Returns true if the given string is in * \code{.txt} * [A-Za-z][A-Za-z0-9_-]* * \endCode * * @param name is the string that should be tested. * @return true if the string matches the regular expression given above, * false otherwise. */ static bool isIdentifier(const std::string &name); /** * Returns true if the given string is an identifier or an empty string. */ static bool isIdentifierOrEmpty(const std::string &name); /** * Returns true if the given string is in * \code{.txt} * ([A-Za-z][A-Za-z0-9_-]*)(:[A-Za-z][A-Za-z0-9_-]*)* * \endCode * * @param name is the string that should be tested. * @return true if the string matches the regular expression given above, * false otherwise. */ static bool isNamespacedIdentifier(const std::string &name); /** * Returns true if the given characters form a valid user-defined token. * This function returns true under the following circumstances: *

The given token is not empty
The given token starts and ends with a non-alphanumeric character *
The token is none of the following character sequences (which are * special in OSML): *
- '{', '}' or any combined repetition of these characters
- '\', '{!', '<\', '\>'
- '%', '%{', '}%'
*

*/ static bool isUserDefinedToken(const std::string &token); /** * Returns true if the given character is a linebreak character. */ static bool isLinebreak(const char c) { return (c == '\n') || (c == '\r'); } /** * Returns true if the given character is a whitespace character. */ static bool isWhitespace(const char c) { return (c == ' ') || (c == '\t') || isLinebreak(c); } /** * Returns true if the given string has a non-whitespace character. * * @param s is the string that should be checked. * @return true if the string contains a non-whitespace character. */ static bool hasNonWhitepaceChar(const std::string &s); /** * Trims the given string or vector of chars by returning the start and end * index. * * @param s is the container that should be trimmed. * @param f is a function that returns true for values that should be * removed. * @return start and end index. Note that "end" points at the character * beyond the end, thus "end" minus "start" */ template static std::pair trim(const T &s, Filter f) { return trim(s, s.size(), f); } /** * Trims the given string or vector of chars by returning the start and end * index. * * @param s is the container that should be trimmed. * @param len is the number of elements in the container. * @param f is a function that returns true for values at a certain index * that should be removed. * @return start and end index. Note that "end" points at the character * beyond the end, thus "end" minus "start" */ template static std::pair trim(const T &s, size_t len, Filter f) { size_t start = 0; for (size_t i = 0; i < len; i++) { if (!f(i)) { start = i; break; } } size_t end = 0; for (ssize_t i = len - 1; i >= static_cast(start); i--) { if (!f(i)) { end = i + 1; break; } } if (end < start) { start = 0; end = 0; } return std::pair{start, end}; } /** * Trims the given string and returns both the trimmed string and the start * and end location. * * @tparam T is the string type that should be used. * @param s is the container that should be trimmed. * @param len is the number of elements in the container. * @param start is an output parameter which is set to the offset at which * the collapsed version of the string starts. * @param end is an output parameter which is set to the offset at which * the collapsed version of the string ends. * @return start and end index. Note that "end" points at the character * beyond the end, thus "end" minus "start" * @param f is a function that returns true for values at a certain index * that should be removed. */ template static std::string trim(const T &s, size_t len, size_t &start, size_t &end, Filter f) { auto res = trim(s, len, f); start = res.first; end = res.second; return std::string(&s[start], end - start); } /** * Removes whitespace at the beginning and the end of the given string. * * @param s is the string that should be trimmed. * @return a trimmed copy of s. */ static std::string trim(const std::string &s) { std::pair bounds = trim(s, [&s](size_t i) { return isWhitespace(s[i]); }); return s.substr(bounds.first, bounds.second - bounds.first); } /** * Collapses the whitespaces in the given string (trims the string and * replaces all whitespace characters by a single one). * * @param s is the string in which the whitespace should be collapsed. * @return a copy of s with collapsed whitespace. */ static std::string collapse(const std::string &s) { size_t start; size_t end; return collapse(s, s.size(), start, end, [&s](size_t i) { return isWhitespace(s[i]); }); } /** * Collapses the whitespaces in the given string (trims the string and * replaces all whitespace characters by a single one). * * @param s is the string in which the whitespace should be collapsed. * @param start is an output parameter which is set to the offset at which * the collapsed version of the string starts. * @param end is an output parameter which is set to the offset at which * the collapsed version of the string ends. * @return a copy of s with collapsed whitespace. */ static std::string collapse(const std::string &s, size_t &start, size_t &end) { return collapse(s, s.size(), start, end, [&s](size_t i) { return isWhitespace(s[i]); }); } /** * Collapses the whitespaces in the given string (trims the string and * replaces all whitespace characters by a single one). * * @tparam T is the string type that should be used. * @tparam Filter is a filter function used for detecting the character * indices that might be removed. * @param s is the string in which the whitespace should be collapsed. * @param len is the length of the input string * @param start is an output parameter which is set to the offset at which * the collapsed version of the string starts. * @param end is an output parameter which is set to the offset at which * the collapsed version of the string ends. * @return a copy of s with collapsed whitespace. */ template static std::string collapse(const T &s, size_t len, size_t &start, size_t &end, Filter f) { // Result vector std::vector res; // Initialize the output arguments start = 0; end = 0; // Iterate over the input string and replace all whitespace sequences by // a single space bool hadWhitespace = false; for (size_t i = 0; i < len; i++) { const char c = s[i]; if (f(i)) { hadWhitespace = !res.empty(); } else { // Adapt the start and end position if (res.empty()) { start = i; } end = i + 1; // Insert a space character if there was a whitespace if (hadWhitespace) { res.push_back(' '); hadWhitespace = false; } // Insert the character res.push_back(c); } } // Return the result vector as string return std::string(res.data(), res.size()); } /** * Turns the elements of a collection into a string separated by the * given delimiter. * * @param es is an iterable container of elements that can be appended to an * output stream (the << operator must be implemented). * @param delim is the delimiter that should be used to separate the items. * @param start is a character sequence that should be prepended to the * result. * @param end is a character sequence that should be appended to the result. */ template static std::string join(T es, const std::string &delim, const std::string &start = "", const std::string &end = "") { std::stringstream res; bool first = true; res << start; for (const auto &e : es) { if (!first) { res << delim; } res << e; first = false; } res << end; return res.str(); } /** * Splits the given string at the delimiter and returns an array of * substrings without the delimiter. * * @param s is the string that should be splitted. * @param delim is the delimiter at which the string should be splitted. * @return a vector of strings containing the splitted sub-strings. */ static std::vector split(const std::string &s, char delim); /** * Converts the given string to lowercase (only works for ANSI characters). * * @param s is the string that should be converted to lowercase. * @return s in lowercase. */ static std::string toLower(std::string s); /** * Reads the file extension of the given filename. * * @param filename is the filename from which the extension should be * extracted. * @return the extension, excluding any leading dot. The extension is * defined as the substring after the last dot in the given string, if the * dot is after a slash or backslash. The extension is converted to * lowercase. */ static std::string extractFileExtension(const std::string &filename); /** * Checks whether the given string starts with the given prefix. * * @param s is the string. * @param prefix is the string which should be checked for being a prefix of * s. */ static bool startsWith(const std::string &s, const std::string &prefix); /** * Checks whether the given string ends with the given suffix. * * @param s is the string. * @param suffix is the string which should be checked for being a suffix of * s. */ static bool endsWith(const std::string &s, const std::string &suffix); /** * Hash functional to be used for enum classes. * See http://stackoverflow.com/a/24847480/2188211 */ struct EnumHash { template std::size_t operator()(T t) const { return static_cast(t); } }; }; } #endif /* _OUSIA_UTILS_H_ */