diff options
Diffstat (limited to 'src/core/parser/utils')
| -rw-r--r-- | src/core/parser/utils/SourceOffsetVector.cpp | 24 | ||||
| -rw-r--r-- | src/core/parser/utils/SourceOffsetVector.hpp | 165 | 
2 files changed, 189 insertions, 0 deletions
diff --git a/src/core/parser/utils/SourceOffsetVector.cpp b/src/core/parser/utils/SourceOffsetVector.cpp new file mode 100644 index 0000000..87ad44a --- /dev/null +++ b/src/core/parser/utils/SourceOffsetVector.cpp @@ -0,0 +1,24 @@ +/* +    Ousía +    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "SourceOffsetVector.hpp" + +namespace ousia { +// Do nothing here, make sure the header compiles correctly +} + diff --git a/src/core/parser/utils/SourceOffsetVector.hpp b/src/core/parser/utils/SourceOffsetVector.hpp new file mode 100644 index 0000000..d15055a --- /dev/null +++ b/src/core/parser/utils/SourceOffsetVector.hpp @@ -0,0 +1,165 @@ +/* +    Ousía +    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel + +    This program is free software: you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation, either version 3 of the License, or +    (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +    GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file SourceOffsetVector.hpp + * + * Contains a helper class used for storing the SourceOffset of each character + * in a character vector in a compressed manner. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_SOURCE_OFFSET_VECTOR_HPP_ +#define _OUSIA_SOURCE_OFFSET_VECTOR_HPP_ + +#include <cstdint> +#include <cassert> +#include <limits> +#include <vector> +#include <utility> + +#include <core/common/Location.hpp> + +namespace ousia { + +/** + * Class used for storing the SourceOffset of each character in the buffer using + * a delta compression. + */ +class SourceOffsetVector { +private: +	/** +	 * Type used for representing the length of a character. +	 */ +	using Length = uint8_t; + +	/** +	 * Maximum length that can be represented using the Length type. +	 */ +	static constexpr size_t MAX_LEN = std::numeric_limits<Length>::max(); + +	/** +	 * Interval in which the actual offset is stored, expressed as the binary +	 * logarithm. +	 */ +	static constexpr size_t LOG2_OFFSET_INTERVAL = 6; + +	/** +	 * Interval in which the actual offset is stored. +	 */ +	static constexpr size_t OFFSET_INTERVAL = (1 << LOG2_OFFSET_INTERVAL); + +	/** +	 * Bitmask for the bits that are set to zero when a new offset needs to be +	 * written. +	 */ +	static constexpr size_t OFFSET_INTERVAL_MASK = OFFSET_INTERVAL - 1; + +	/** +	 * Vector containing the delta compressed offset information. +	 */ +	std::vector<Length> lens; + +	/** +	 * Offsets containing the absolute offset for all OFFSET_INTERVAL elements. +	 */ +	std::vector<SourceOffset> offsets; + +	/** +	 * Last position given as "end" position in the storeOffset() method. +	 * Used to adapt the length of the previous element in case start and end +	 * positions do not match. +	 */ +	SourceOffset lastEnd; + +public: +	/** +	 * Default constructor of the SourceOffsetVector class. +	 */ +	SourceOffsetVector() : lastEnd(0) {} + +	/** +	 * Stores the location of a character in this SourceOffsetVector. +	 * +	 * @param start is the start location of the chracter in the source file. +	 * @param end is the end location of the character in the source file. +	 */ +	void storeOffset(SourceOffset start, SourceOffset end) +	{ +		// Make sure (end - start) is smaller than MAX_LEN +		assert(end - start < MAX_LEN); + +		// Adapt the length of the previous character in case there is a gap +		if (!lens.empty() && start > lastEnd) { +			lens.back() += start - lastEnd; +		} +		lastEnd = end; + +		// Store an absolute offset every OFFSET_INTERVAL elements +		if ((lens.size() & OFFSET_INTERVAL_MASK) == 0) { +			offsets.push_back(start); +		} + +		// Store the length +		lens.push_back(end - start); +	} + +	/** +	 * Loads the location of the character with the given index. +	 * +	 * @param idx is the index of the character for which the location should be +	 * read. +	 * @return a pair containing start and end source offset. +	 */ +	std::pair<SourceOffset, SourceOffset> loadOffset(size_t idx) +	{ +		// Special treatment for the last character +		const size_t count = lens.size(); +		if (idx > 0 && idx == count) { +			auto offs = loadOffset(count - 1); +			return std::pair<SourceOffset, SourceOffset>(offs.second, +			                                             offs.second); +		} + +		// Calculate the start index in the lens vector and in the offsets +		// vector +		const size_t offsetIdx = idx >> LOG2_OFFSET_INTERVAL; +		const size_t sumStartIdx = idx & ~OFFSET_INTERVAL_MASK; + +		// Make sure the index is valid +		assert(idx < count); +		assert(offsetIdx < offsets.size()); + +		// Sum over the length starting with the start offset +		SourceOffset start = offsets[offsetIdx]; +		for (size_t i = sumStartIdx; i < idx; i++) { +			start += lens[i]; +		} +		return std::pair<SourceOffset, SourceOffset>(start, start + lens[idx]); +	} + +	/** +	 * Returns the number of characters for which offsets are stored. +	 */ +	size_t size() { return lens.size(); } +}; +} + +#endif /* _OUSIA_SOURCE_OFFSET_VECTOR_HPP_ */ +  | 
