Implemented SourceContextReader, added unit tests, implemented SourceContextReader interface in ResourceManager, added LoggerTest

author: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> 2015-01-24 03:08:16 +0100
committer: Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> 2015-01-24 03:08:16 +0100
commit: 67d36e699a2852ce471c4d1b8dab5992d6c01a98 (patch)
tree: 0ef23befe3fa5af9c5d83b3b8934e444366a8575 /src/core
parent: f819b42057b2baea205569dd808c4fcf2bc4d630 (diff)
4 files changed, 318 insertions, 16 deletions
diff --git a/src/core/common/SourceContextReader.cpp b/src/core/common/SourceContextReader.cpp
new file mode 100644
index 0000000..65a6281
--- /dev/null
+++ b/src/core/common/SourceContextReader.cpp
@@ -0,0 +1,198 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <algorithm>
+
+#include <core/common/CharReader.hpp>
+#include <core/common/Utils.hpp>
+
+#include "SourceContextReader.hpp"
+
+namespace ousia {
+
+SourceContextReader::SourceContextReader() : cache{0} {}
+
+SourceContext SourceContextReader::readContext(CharReader &reader,
+                                               const SourceRange &range,
+                                               size_t maxContextLength,
+                                               const std::string &filename)
+{
+	// Abort if the given range is not valid
+	if (!range.isValid()) {  // (I2)
+		return SourceContext{};
+	}
+
+	// Set the filename and the range
+	SourceContext ctx;
+	ctx.startLine = 1;
+	ctx.startColumn = 1;
+	ctx.endLine = 1;
+	ctx.endColumn = 1;
+	ctx.range = range;
+	ctx.filename = filename;
+
+	// Some constants for convenience
+	const SourceOffset start = range.getStart();
+	const SourceOffset end = range.getEnd();
+	const SourceOffset lastCacheOffs = cache.back();
+
+	// Find the entry in the cache that is just below the given start offset
+	// and jump to this location
+	size_t offs = 0;
+	auto it = std::lower_bound(cache.begin(), cache.end(), start);
+	if (it != cache.begin()) {
+		it--;  // Go to the previous entry
+		offs = *it; // Read the corresponding byte offset
+		size_t line = it - cache.begin() + 1;
+		ctx.startLine = line;
+		ctx.endLine = line;
+	}
+
+	// Move the char reader to the specified offset, abort if this did not work
+	// out
+	if (offs != reader.seek(offs)) {
+		return SourceContext{};
+	}
+
+	// TODO: Handle skew introduced by linebreak processing \n\r => \n
+
+	// Read until the requested byte offset is reached, track linebreaks in the
+	// linebreak cache
+	std::vector<char> lineBuf;
+	size_t lineBufStart = offs;
+	size_t lastLineStart = offs;
+	char c;
+	while (reader.read(c)) {
+		// Fetch the offset after this character
+		const size_t nextOffs = reader.getOffset();
+
+		// Fetch the current offset, check whether start was reached
+		const bool reachedStart = offs >= start;
+		const bool reachedEnd = offs >= end;
+
+		// Handle linebreaks and update the linebreak cache
+		if (c == '\n') {
+			// Update the linebreak cache if we are in uncached regions
+			if (offs > lastCacheOffs) {
+				cache.push_back(nextOffs);
+			}
+			if (!reachedStart) {
+				ctx.startLine++;
+				ctx.startColumn = 1;
+				lineBuf.clear();
+				lineBufStart = nextOffs;
+				lastLineStart = nextOffs;
+			} else {
+				lineBuf.push_back('\n');
+			}
+			if (!reachedEnd) {
+				ctx.endLine++;
+				ctx.endColumn = 1;
+			} else {
+				// This was the last character, abort
+				break;
+			}
+		} else {
+			// Increment the start and the end column if this is not an
+			// UTF8-continuation byte (note that we count unicode codepoints not
+			// actual characters, which may be more than one codepoint)
+			if (!((c & 0x80) && !(c & 0x40))) {
+				if (!reachedStart) {
+					ctx.startColumn++;
+				}
+				if (!reachedEnd) {
+					ctx.endColumn++;
+				}
+			}
+
+			// Record all characters when start is reached or at least when
+			// the distance to start is smaller than the maximum context length
+			// TODO: This is suboptimal as parts of lineBuf are thrown away
+			// later. If the given range is really large, this will waste huge
+			// amounts of RAM.
+			if (reachedStart || (start - offs <= maxContextLength)) {
+				if (lineBuf.empty()) {
+					lineBufStart = offs;
+				}
+				lineBuf.push_back(c);
+			}
+		}
+
+		// Set the new offset
+		offs = nextOffs;
+	}
+
+	// If we did not reach the end or for some reason the lineBufStart is larger
+	// than start (to assure invariant I1 is fulfilled), abort
+	offs = reader.getOffset();
+	if (offs < end || lineBufStart > start) {  // (I1)
+		return SourceContext{};
+	}
+
+	// Calculate a first relative position and length
+	ctx.relPos = start - lineBufStart;  // lineBufStart > start (I1)
+	ctx.relLen = end - start;           // end >= start (I2)
+
+	// Remove linebreaks at the beginning and the end
+	const std::pair<size_t, size_t> b =
+	    Utils::trim(lineBuf, Utils::isLinebreak);
+	ssize_t s = b.first, e = b.second;
+	s = std::min(s, static_cast<ssize_t>(ctx.relPos));
+
+	// Remember the trimmed positions, only continue if the context text did
+	// not entirely consist of linebreaks
+	const ssize_t ts = s, te = e;  // s >= 0, e >= 0, ts >= 0, te >= 0 (I3)
+	if (te > ts) {
+		// Trim the line further if it is longer than the maxContextLength
+		if (static_cast<size_t>(te - ts) > maxContextLength &&
+		    maxContextLength != MAX_MAX_CONTEXT_LENGTH) {
+			ssize_t c = (ctx.relPos + ctx.relLen / 2);
+			s = c - maxContextLength / 2;
+			e = c + maxContextLength / 2;
+
+			// Account for rounding error
+			if (static_cast<size_t>(e - s) < maxContextLength) {
+				e++;
+			}
+
+			// Redistribute available characters at the beginning or the end
+			if (s < ts) {
+				e = e + (ts - s);
+				s = ts;  // ts >= 0 => s >= 0 (I3)
+			}
+			if (e > te) {
+				s = s - std::min(s - ts, e - te);  // ts - s <= s => s >= 0
+				e = te;                            // te >= 0 => e >= 0 (I3)
+			}
+		}
+
+		// Update the relative position and length, set the "truncated" flags
+		size_t us = static_cast<size_t>(s), ue = static_cast<size_t>(e);
+		ctx.relPos = start - lineBufStart - us;
+		ctx.relLen = std::min(ctx.relLen, ue - us);
+		ctx.truncatedStart = s > ts || lastLineStart < lineBufStart;
+		ctx.truncatedEnd = e < te;
+
+		// Copy the selected area to the output string
+		ctx.text = std::string{&lineBuf[s], ue - us};
+	}
+
+	return ctx;
+}
+}
+
diff --git a/src/core/common/SourceContextReader.hpp b/src/core/common/SourceContextReader.hpp
new file mode 100644
index 0000000..35e71b3
--- /dev/null
+++ b/src/core/common/SourceContextReader.hpp
@@ -0,0 +1,91 @@
+/*
+    Ousía
+    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file SourceContextReader.hpp
+ *
+ * The SourceContextReader class is used to read a SourceContext struct from
+ * a SourcePosition instance and an input stream.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_SOURCE_CONTEXT_READER_HPP_
+#define _OUSIA_SOURCE_CONTEXT_READER_HPP_
+
+#include <string>
+#include <vector>
+#include <limits>
+
+#include "Location.hpp"
+
+namespace ousia {
+
+// Forward declarations
+class CharReader;
+
+/**
+ * The SourceContextReader can read SourceContext structures given a
+ * SourcePosition or SourceRange and a char reader. It is capable of managing
+ * a line number cache which speeds up repeated context lookups.
+ */
+class SourceContextReader {
+private:
+	/**
+	 * Cache containing the byte offset of each line break.
+	 */
+	std::vector<SourceOffset> cache;
+
+public:
+	/**
+	 * Maximum context size. Used to indicate that the context should have an
+	 * unlimited size.
+	 */
+	static constexpr size_t MAX_MAX_CONTEXT_LENGTH =
+	    std::numeric_limits<ssize_t>::max();
+
+	/**
+	 * Default constructor. Initializes the internal lineNumberCache with a
+	 * single zero entry.
+	 */
+	SourceContextReader();
+
+	/**
+	 * Returns the context for the char reader and the given SourceRange.
+	 * Returns an invalid source context if either the given range is invalid
+	 * or the byte offset described in the SourceRange cannot be reached because
+	 * the CharReader cannot be seeked back to this position.
+	 *
+	 * @param reader is the CharReader instance from which the context should be
+	 * read.
+	 * @param range describes the Range within the source file for which the
+	 * context should be extraced.
+	 * @param filename is the filename that should be stored in the returned
+	 * context.
+	 * @param maxContextLength is the maximum number of characters that should
+	 * be stored in the returned context.
+	 * @return a SourceContext instance describing the
+	 */
+	SourceContext readContext(CharReader &reader, const SourceRange &range,
+	                          size_t maxContextLength = MAX_MAX_CONTEXT_LENGTH,
+	                          const std::string &filename = "");
+};
+}
+
+#endif /* _OUSIA_SOURCE_CONTEXT_READER_HPP_ */
+
diff --git a/src/core/resource/ResourceManager.cpp b/src/core/resource/ResourceManager.cpp
index f154c9c..a5e76b0 100644
--- a/src/core/resource/ResourceManager.cpp
+++ b/src/core/resource/ResourceManager.cpp
@@ -35,7 +35,8 @@ namespace ousia {
 
 /* Static helper functions */
 
-static void logUnsopportedType(Logger &logger, Resource &resource, const RttiSet &supportedTypes)
+static void logUnsopportedType(Logger &logger, Resource &resource,
+                               const RttiSet &supportedTypes)
 {
 	// Build a list containing the expected type names
 	std::vector<std::string> expected;
@@ -81,7 +82,7 @@ void ResourceManager::purgeResource(SourceId sourceId)
 	}
 	resources.erase(sourceId);
 	nodes.erase(sourceId);
-	lineNumberCache.erase(sourceId);
+	contextReaders.erase(sourceId);
 }
 
 Rooted<Node> ResourceManager::parse(ParserContext &ctx, Resource &resource,
@@ -93,7 +94,8 @@ Rooted<Node> ResourceManager::parse(ParserContext &ctx, Resource &resource,
 	if (mime.empty()) {
 		mime = ctx.registry.getMimetypeForFilename(resource.getLocation());
 		if (mime.empty()) {
-			ctx.logger.error(std::string("Filename \"") + resource.getLocation() +
+			ctx.logger.error(std::string("Filename \"") +
+			                 resource.getLocation() +
 			                 std::string(
 			                     "\" has an unknown file extension. Explicitly "
 			                     "specify a mimetype."));
@@ -137,7 +139,8 @@ Rooted<Node> ResourceManager::parse(ParserContext &ctx, Resource &resource,
 		if (node == nullptr) {
 			throw LoggableException{"Internal error: Parser returned null."};
 		}
-	} catch (LoggableException ex) {
+	}
+	catch (LoggableException ex) {
 		// Remove all data associated with the allocated source id
 		purgeResource(sourceId);
 
@@ -262,14 +265,20 @@ Rooted<Node> ResourceManager::link(ParserContext &ctx, const std::string &path,
 	return link(ctx, path, mimetype, rel, supportedTypes, relativeResource);
 }
 
-SourceContext ResourceManager::buildContext(const SourceLocation &location)
+SourceContext ResourceManager::readContext(const SourceLocation &location,
+                                           size_t maxContextLength)
 {
-	SourceContext res;
-
-	// TODO
+	const Resource &resource = getResource(location.getSourceId());
+	if (resource.isValid()) {
+		// Fetch a char reader for the resource
+		std::unique_ptr<std::istream> is = resource.stream();
+		CharReader reader{*is, location.getSourceId()};
 
-	return res;
+		// Return the context
+		return contextReaders[location.getSourceId()].readContext(
+		    reader, location, maxContextLength, resource.getLocation());
+	}
+	return SourceContext{};
 }
-
 }
 
diff --git a/src/core/resource/ResourceManager.hpp b/src/core/resource/ResourceManager.hpp
index 51c00e3..d5381b9 100644
--- a/src/core/resource/ResourceManager.hpp
+++ b/src/core/resource/ResourceManager.hpp
@@ -34,6 +34,7 @@
 
 #include <core/common/Location.hpp>
 #include <core/common/Rtti.hpp>
+#include <core/common/SourceContextReader.hpp>
 #include <core/managed/Managed.hpp>
 
 #include "Resource.hpp"
@@ -74,11 +75,11 @@ private:
 	std::unordered_map<SourceId, ManagedUid> nodes;
 
 	/**
-	 * Cache used for translating byte offsets to line numbers. Maps from a
-	 * SourceId onto a list of (sorted) SourceOffsets. The index in the list
-	 * corresponds to the line number.
+	 * Map containing SourceContextReader instances which are -- as their name
+	 * suggests -- used to produce SourceContext structures describing the
+	 * source code at a given SourceLocation.
 	 */
-	std::unordered_map<SourceId, std::vector<SourceOffset>> lineNumberCache;
+	std::unordered_map<SourceId, SourceContextReader> contextReaders;
 
 	/**
 	 * Allocates a new SourceId for the given resource.
@@ -224,11 +225,14 @@ public:
 	 * @param location is the SourceLocation for which context information
 	 * should be retrieved. This method is used by the Logger class to print
 	 * pretty messages.
+	 * @param maxContextLength is the maximum length in character of context
+	 * that should be extracted.
 	 * @return a valid SourceContext if a valid SourceLocation was given or an
 	 * invalid SourceContext if the location is invalid.
 	 */
-	SourceContext buildContext(const SourceLocation &location);
-
+	SourceContext readContext(
+	    const SourceLocation &location,
+	    size_t maxContextLength = SourceContextReader::MAX_MAX_CONTEXT_LENGTH);
 };
 }
author	Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>	2015-01-24 03:08:16 +0100
committer	Andreas Stöckel <astoecke@techfak.uni-bielefeld.de>	2015-01-24 03:08:16 +0100
commit	67d36e699a2852ce471c4d1b8dab5992d6c01a98 (patch)
tree	0ef23befe3fa5af9c5d83b3b8934e444366a8575 /src/core
parent	f819b42057b2baea205569dd808c4fcf2bc4d630 (diff)