summaryrefslogtreecommitdiff
path: root/src/core/common/Whitespace.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/common/Whitespace.hpp')
-rw-r--r--src/core/common/Whitespace.hpp120
1 files changed, 120 insertions, 0 deletions
diff --git a/src/core/common/Whitespace.hpp b/src/core/common/Whitespace.hpp
new file mode 100644
index 0000000..1e9f36a
--- /dev/null
+++ b/src/core/common/Whitespace.hpp
@@ -0,0 +1,120 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file Whitespace.hpp
+ *
+ * Contains the WhitespaceMode enum used in various places, as well es functions
+ * for trimming and collapsing whitespaces.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_WHITESPACE_HPP_
+#define _OUSIA_WHITESPACE_HPP_
+
+#include <string>
+#include <utility>
+
+namespace ousia {
+
+/**
+ * Enum specifying the whitespace handling mode of the tokenizer and the
+ * parsers.
+ */
+enum class WhitespaceMode {
+ /**
+ * Preserves all whitespaces as they are found in the source file.
+ */
+ PRESERVE,
+
+ /**
+ * Trims whitespace at the beginning and the end of the found text.
+ */
+ TRIM,
+
+ /**
+ * Whitespaces are trimmed and collapsed, multiple whitespace characters
+ * are replaced by a single space character.
+ */
+ COLLAPSE
+};
+
+/**
+ * Collection of functions for trimming or collapsing whitespace.
+ */
+class Whitespace {
+ /**
+ * Removes whitespace at the beginning and the end of the given string.
+ *
+ * @param s is the string that should be trimmed.
+ * @return a trimmed copy of s.
+ */
+ static std::string trim(const std::string &s);
+
+ /**
+ * Trims the given string or vector of chars by returning the start and end
+ * index.
+ *
+ * @param s is the container that should be trimmed.
+ * @param f is a function that returns true for values that should be
+ * removed.
+ * @return start and end index. Note that "end" points at the character
+ * beyond the end, thus "end" minus "start"
+ */
+ template <class T, class Filter>
+ static std::pair<size_t, size_t> trim(const T &s, Filter f)
+ {
+ size_t start = 0;
+ for (size_t i = 0; i < s.size(); i++) {
+ if (!f(s[i])) {
+ start = i;
+ break;
+ }
+ }
+
+ size_t end = 0;
+ for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) {
+ if (!f(s[i])) {
+ end = i + 1;
+ break;
+ }
+ }
+
+ if (end < start) {
+ start = 0;
+ end = 0;
+ }
+
+ return std::pair<size_t, size_t>{start, end};
+ }
+
+ /**
+ * Collapses the whitespaces in the given string (trims the string and
+ * replaces all whitespace characters by a single one).
+ *
+ * @param s is the string in which the whitespace should be collapsed.
+ * @return a copy of s with collapsed whitespace.
+ */
+ static std::string collapse(const std::string &s);
+};
+
+}
+
+#endif /* _OUSIA_WHITESPACE_HPP_ */
+