summaryrefslogtreecommitdiff
path: root/src/core/parser
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/parser')
-rw-r--r--src/core/parser/utils/SourceOffsetVector.cpp24
-rw-r--r--src/core/parser/utils/SourceOffsetVector.hpp165
2 files changed, 189 insertions, 0 deletions
diff --git a/src/core/parser/utils/SourceOffsetVector.cpp b/src/core/parser/utils/SourceOffsetVector.cpp
new file mode 100644
index 0000000..87ad44a
--- /dev/null
+++ b/src/core/parser/utils/SourceOffsetVector.cpp
@@ -0,0 +1,24 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "SourceOffsetVector.hpp"
+
+namespace ousia {
+// Do nothing here, make sure the header compiles correctly
+}
+
diff --git a/src/core/parser/utils/SourceOffsetVector.hpp b/src/core/parser/utils/SourceOffsetVector.hpp
new file mode 100644
index 0000000..d15055a
--- /dev/null
+++ b/src/core/parser/utils/SourceOffsetVector.hpp
@@ -0,0 +1,165 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file SourceOffsetVector.hpp
+ *
+ * Contains a helper class used for storing the SourceOffset of each character
+ * in a character vector in a compressed manner.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_SOURCE_OFFSET_VECTOR_HPP_
+#define _OUSIA_SOURCE_OFFSET_VECTOR_HPP_
+
+#include <cstdint>
+#include <cassert>
+#include <limits>
+#include <vector>
+#include <utility>
+
+#include <core/common/Location.hpp>
+
+namespace ousia {
+
+/**
+ * Class used for storing the SourceOffset of each character in the buffer using
+ * a delta compression.
+ */
+class SourceOffsetVector {
+private:
+ /**
+ * Type used for representing the length of a character.
+ */
+ using Length = uint8_t;
+
+ /**
+ * Maximum length that can be represented using the Length type.
+ */
+ static constexpr size_t MAX_LEN = std::numeric_limits<Length>::max();
+
+ /**
+ * Interval in which the actual offset is stored, expressed as the binary
+ * logarithm.
+ */
+ static constexpr size_t LOG2_OFFSET_INTERVAL = 6;
+
+ /**
+ * Interval in which the actual offset is stored.
+ */
+ static constexpr size_t OFFSET_INTERVAL = (1 << LOG2_OFFSET_INTERVAL);
+
+ /**
+ * Bitmask for the bits that are set to zero when a new offset needs to be
+ * written.
+ */
+ static constexpr size_t OFFSET_INTERVAL_MASK = OFFSET_INTERVAL - 1;
+
+ /**
+ * Vector containing the delta compressed offset information.
+ */
+ std::vector<Length> lens;
+
+ /**
+ * Offsets containing the absolute offset for all OFFSET_INTERVAL elements.
+ */
+ std::vector<SourceOffset> offsets;
+
+ /**
+ * Last position given as "end" position in the storeOffset() method.
+ * Used to adapt the length of the previous element in case start and end
+ * positions do not match.
+ */
+ SourceOffset lastEnd;
+
+public:
+ /**
+ * Default constructor of the SourceOffsetVector class.
+ */
+ SourceOffsetVector() : lastEnd(0) {}
+
+ /**
+ * Stores the location of a character in this SourceOffsetVector.
+ *
+ * @param start is the start location of the chracter in the source file.
+ * @param end is the end location of the character in the source file.
+ */
+ void storeOffset(SourceOffset start, SourceOffset end)
+ {
+ // Make sure (end - start) is smaller than MAX_LEN
+ assert(end - start < MAX_LEN);
+
+ // Adapt the length of the previous character in case there is a gap
+ if (!lens.empty() && start > lastEnd) {
+ lens.back() += start - lastEnd;
+ }
+ lastEnd = end;
+
+ // Store an absolute offset every OFFSET_INTERVAL elements
+ if ((lens.size() & OFFSET_INTERVAL_MASK) == 0) {
+ offsets.push_back(start);
+ }
+
+ // Store the length
+ lens.push_back(end - start);
+ }
+
+ /**
+ * Loads the location of the character with the given index.
+ *
+ * @param idx is the index of the character for which the location should be
+ * read.
+ * @return a pair containing start and end source offset.
+ */
+ std::pair<SourceOffset, SourceOffset> loadOffset(size_t idx)
+ {
+ // Special treatment for the last character
+ const size_t count = lens.size();
+ if (idx > 0 && idx == count) {
+ auto offs = loadOffset(count - 1);
+ return std::pair<SourceOffset, SourceOffset>(offs.second,
+ offs.second);
+ }
+
+ // Calculate the start index in the lens vector and in the offsets
+ // vector
+ const size_t offsetIdx = idx >> LOG2_OFFSET_INTERVAL;
+ const size_t sumStartIdx = idx & ~OFFSET_INTERVAL_MASK;
+
+ // Make sure the index is valid
+ assert(idx < count);
+ assert(offsetIdx < offsets.size());
+
+ // Sum over the length starting with the start offset
+ SourceOffset start = offsets[offsetIdx];
+ for (size_t i = sumStartIdx; i < idx; i++) {
+ start += lens[i];
+ }
+ return std::pair<SourceOffset, SourceOffset>(start, start + lens[idx]);
+ }
+
+ /**
+ * Returns the number of characters for which offsets are stored.
+ */
+ size_t size() { return lens.size(); }
+};
+}
+
+#endif /* _OUSIA_SOURCE_OFFSET_VECTOR_HPP_ */
+