summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-01-24 03:08:16 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-01-24 03:08:16 +0100
commit67d36e699a2852ce471c4d1b8dab5992d6c01a98 (patch)
tree0ef23befe3fa5af9c5d83b3b8934e444366a8575 /src/core
parentf819b42057b2baea205569dd808c4fcf2bc4d630 (diff)
Implemented SourceContextReader, added unit tests, implemented SourceContextReader interface in ResourceManager, added LoggerTest
Diffstat (limited to 'src/core')
-rw-r--r--src/core/common/SourceContextReader.cpp198
-rw-r--r--src/core/common/SourceContextReader.hpp91
-rw-r--r--src/core/resource/ResourceManager.cpp29
-rw-r--r--src/core/resource/ResourceManager.hpp16
4 files changed, 318 insertions, 16 deletions
diff --git a/src/core/common/SourceContextReader.cpp b/src/core/common/SourceContextReader.cpp
new file mode 100644
index 0000000..65a6281
--- /dev/null
+++ b/src/core/common/SourceContextReader.cpp
@@ -0,0 +1,198 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <algorithm>
+
+#include <core/common/CharReader.hpp>
+#include <core/common/Utils.hpp>
+
+#include "SourceContextReader.hpp"
+
+namespace ousia {
+
+SourceContextReader::SourceContextReader() : cache{0} {}
+
+SourceContext SourceContextReader::readContext(CharReader &reader,
+ const SourceRange &range,
+ size_t maxContextLength,
+ const std::string &filename)
+{
+ // Abort if the given range is not valid
+ if (!range.isValid()) { // (I2)
+ return SourceContext{};
+ }
+
+ // Set the filename and the range
+ SourceContext ctx;
+ ctx.startLine = 1;
+ ctx.startColumn = 1;
+ ctx.endLine = 1;
+ ctx.endColumn = 1;
+ ctx.range = range;
+ ctx.filename = filename;
+
+ // Some constants for convenience
+ const SourceOffset start = range.getStart();
+ const SourceOffset end = range.getEnd();
+ const SourceOffset lastCacheOffs = cache.back();
+
+ // Find the entry in the cache that is just below the given start offset
+ // and jump to this location
+ size_t offs = 0;
+ auto it = std::lower_bound(cache.begin(), cache.end(), start);
+ if (it != cache.begin()) {
+ it--; // Go to the previous entry
+ offs = *it; // Read the corresponding byte offset
+ size_t line = it - cache.begin() + 1;
+ ctx.startLine = line;
+ ctx.endLine = line;
+ }
+
+ // Move the char reader to the specified offset, abort if this did not work
+ // out
+ if (offs != reader.seek(offs)) {
+ return SourceContext{};
+ }
+
+ // TODO: Handle skew introduced by linebreak processing \n\r => \n
+
+ // Read until the requested byte offset is reached, track linebreaks in the
+ // linebreak cache
+ std::vector<char> lineBuf;
+ size_t lineBufStart = offs;
+ size_t lastLineStart = offs;
+ char c;
+ while (reader.read(c)) {
+ // Fetch the offset after this character
+ const size_t nextOffs = reader.getOffset();
+
+ // Fetch the current offset, check whether start was reached
+ const bool reachedStart = offs >= start;
+ const bool reachedEnd = offs >= end;
+
+ // Handle linebreaks and update the linebreak cache
+ if (c == '\n') {
+ // Update the linebreak cache if we are in uncached regions
+ if (offs > lastCacheOffs) {
+ cache.push_back(nextOffs);
+ }
+ if (!reachedStart) {
+ ctx.startLine++;
+ ctx.startColumn = 1;
+ lineBuf.clear();
+ lineBufStart = nextOffs;
+ lastLineStart = nextOffs;
+ } else {
+ lineBuf.push_back('\n');
+ }
+ if (!reachedEnd) {
+ ctx.endLine++;
+ ctx.endColumn = 1;
+ } else {
+ // This was the last character, abort
+ break;
+ }
+ } else {
+ // Increment the start and the end column if this is not an
+ // UTF8-continuation byte (note that we count unicode codepoints not
+ // actual characters, which may be more than one codepoint)
+ if (!((c & 0x80) && !(c & 0x40))) {
+ if (!reachedStart) {
+ ctx.startColumn++;
+ }
+ if (!reachedEnd) {
+ ctx.endColumn++;
+ }
+ }
+
+ // Record all characters when start is reached or at least when
+ // the distance to start is smaller than the maximum context length
+ // TODO: This is suboptimal as parts of lineBuf are thrown away
+ // later. If the given range is really large, this will waste huge
+ // amounts of RAM.
+ if (reachedStart || (start - offs <= maxContextLength)) {
+ if (lineBuf.empty()) {
+ lineBufStart = offs;
+ }
+ lineBuf.push_back(c);
+ }
+ }
+
+ // Set the new offset
+ offs = nextOffs;
+ }
+
+ // If we did not reach the end or for some reason the lineBufStart is larger
+ // than start (to assure invariant I1 is fulfilled), abort
+ offs = reader.getOffset();
+ if (offs < end || lineBufStart > start) { // (I1)
+ return SourceContext{};
+ }
+
+ // Calculate a first relative position and length
+ ctx.relPos = start - lineBufStart; // lineBufStart > start (I1)
+ ctx.relLen = end - start; // end >= start (I2)
+
+ // Remove linebreaks at the beginning and the end
+ const std::pair<size_t, size_t> b =
+ Utils::trim(lineBuf, Utils::isLinebreak);
+ ssize_t s = b.first, e = b.second;
+ s = std::min(s, static_cast<ssize_t>(ctx.relPos));
+
+ // Remember the trimmed positions, only continue if the context text did
+ // not entirely consist of linebreaks
+ const ssize_t ts = s, te = e; // s >= 0, e >= 0, ts >= 0, te >= 0 (I3)
+ if (te > ts) {
+ // Trim the line further if it is longer than the maxContextLength
+ if (static_cast<size_t>(te - ts) > maxContextLength &&
+ maxContextLength != MAX_MAX_CONTEXT_LENGTH) {
+ ssize_t c = (ctx.relPos + ctx.relLen / 2);
+ s = c - maxContextLength / 2;
+ e = c + maxContextLength / 2;
+
+ // Account for rounding error
+ if (static_cast<size_t>(e - s) < maxContextLength) {
+ e++;
+ }
+
+ // Redistribute available characters at the beginning or the end
+ if (s < ts) {
+ e = e + (ts - s);
+ s = ts; // ts >= 0 => s >= 0 (I3)
+ }
+ if (e > te) {
+ s = s - std::min(s - ts, e - te); // ts - s <= s => s >= 0
+ e = te; // te >= 0 => e >= 0 (I3)
+ }
+ }
+
+ // Update the relative position and length, set the "truncated" flags
+ size_t us = static_cast<size_t>(s), ue = static_cast<size_t>(e);
+ ctx.relPos = start - lineBufStart - us;
+ ctx.relLen = std::min(ctx.relLen, ue - us);
+ ctx.truncatedStart = s > ts || lastLineStart < lineBufStart;
+ ctx.truncatedEnd = e < te;
+
+ // Copy the selected area to the output string
+ ctx.text = std::string{&lineBuf[s], ue - us};
+ }
+
+ return ctx;
+}
+}
+
diff --git a/src/core/common/SourceContextReader.hpp b/src/core/common/SourceContextReader.hpp
new file mode 100644
index 0000000..35e71b3
--- /dev/null
+++ b/src/core/common/SourceContextReader.hpp
@@ -0,0 +1,91 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file SourceContextReader.hpp
+ *
+ * The SourceContextReader class is used to read a SourceContext struct from
+ * a SourcePosition instance and an input stream.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_SOURCE_CONTEXT_READER_HPP_
+#define _OUSIA_SOURCE_CONTEXT_READER_HPP_
+
+#include <string>
+#include <vector>
+#include <limits>
+
+#include "Location.hpp"
+
+namespace ousia {
+
+// Forward declarations
+class CharReader;
+
+/**
+ * The SourceContextReader can read SourceContext structures given a
+ * SourcePosition or SourceRange and a char reader. It is capable of managing
+ * a line number cache which speeds up repeated context lookups.
+ */
+class SourceContextReader {
+private:
+ /**
+ * Cache containing the byte offset of each line break.
+ */
+ std::vector<SourceOffset> cache;
+
+public:
+ /**
+ * Maximum context size. Used to indicate that the context should have an
+ * unlimited size.
+ */
+ static constexpr size_t MAX_MAX_CONTEXT_LENGTH =
+ std::numeric_limits<ssize_t>::max();
+
+ /**
+ * Default constructor. Initializes the internal lineNumberCache with a
+ * single zero entry.
+ */
+ SourceContextReader();
+
+ /**
+ * Returns the context for the char reader and the given SourceRange.
+ * Returns an invalid source context if either the given range is invalid
+ * or the byte offset described in the SourceRange cannot be reached because
+ * the CharReader cannot be seeked back to this position.
+ *
+ * @param reader is the CharReader instance from which the context should be
+ * read.
+ * @param range describes the Range within the source file for which the
+ * context should be extraced.
+ * @param filename is the filename that should be stored in the returned
+ * context.
+ * @param maxContextLength is the maximum number of characters that should
+ * be stored in the returned context.
+ * @return a SourceContext instance describing the
+ */
+ SourceContext readContext(CharReader &reader, const SourceRange &range,
+ size_t maxContextLength = MAX_MAX_CONTEXT_LENGTH,
+ const std::string &filename = "");
+};
+}
+
+#endif /* _OUSIA_SOURCE_CONTEXT_READER_HPP_ */
+
diff --git a/src/core/resource/ResourceManager.cpp b/src/core/resource/ResourceManager.cpp
index f154c9c..a5e76b0 100644
--- a/src/core/resource/ResourceManager.cpp
+++ b/src/core/resource/ResourceManager.cpp
@@ -35,7 +35,8 @@ namespace ousia {
/* Static helper functions */
-static void logUnsopportedType(Logger &logger, Resource &resource, const RttiSet &supportedTypes)
+static void logUnsopportedType(Logger &logger, Resource &resource,
+ const RttiSet &supportedTypes)
{
// Build a list containing the expected type names
std::vector<std::string> expected;
@@ -81,7 +82,7 @@ void ResourceManager::purgeResource(SourceId sourceId)
}
resources.erase(sourceId);
nodes.erase(sourceId);
- lineNumberCache.erase(sourceId);
+ contextReaders.erase(sourceId);
}
Rooted<Node> ResourceManager::parse(ParserContext &ctx, Resource &resource,
@@ -93,7 +94,8 @@ Rooted<Node> ResourceManager::parse(ParserContext &ctx, Resource &resource,
if (mime.empty()) {
mime = ctx.registry.getMimetypeForFilename(resource.getLocation());
if (mime.empty()) {
- ctx.logger.error(std::string("Filename \"") + resource.getLocation() +
+ ctx.logger.error(std::string("Filename \"") +
+ resource.getLocation() +
std::string(
"\" has an unknown file extension. Explicitly "
"specify a mimetype."));
@@ -137,7 +139,8 @@ Rooted<Node> ResourceManager::parse(ParserContext &ctx, Resource &resource,
if (node == nullptr) {
throw LoggableException{"Internal error: Parser returned null."};
}
- } catch (LoggableException ex) {
+ }
+ catch (LoggableException ex) {
// Remove all data associated with the allocated source id
purgeResource(sourceId);
@@ -262,14 +265,20 @@ Rooted<Node> ResourceManager::link(ParserContext &ctx, const std::string &path,
return link(ctx, path, mimetype, rel, supportedTypes, relativeResource);
}
-SourceContext ResourceManager::buildContext(const SourceLocation &location)
+SourceContext ResourceManager::readContext(const SourceLocation &location,
+ size_t maxContextLength)
{
- SourceContext res;
-
- // TODO
+ const Resource &resource = getResource(location.getSourceId());
+ if (resource.isValid()) {
+ // Fetch a char reader for the resource
+ std::unique_ptr<std::istream> is = resource.stream();
+ CharReader reader{*is, location.getSourceId()};
- return res;
+ // Return the context
+ return contextReaders[location.getSourceId()].readContext(
+ reader, location, maxContextLength, resource.getLocation());
+ }
+ return SourceContext{};
}
-
}
diff --git a/src/core/resource/ResourceManager.hpp b/src/core/resource/ResourceManager.hpp
index 51c00e3..d5381b9 100644
--- a/src/core/resource/ResourceManager.hpp
+++ b/src/core/resource/ResourceManager.hpp
@@ -34,6 +34,7 @@
#include <core/common/Location.hpp>
#include <core/common/Rtti.hpp>
+#include <core/common/SourceContextReader.hpp>
#include <core/managed/Managed.hpp>
#include "Resource.hpp"
@@ -74,11 +75,11 @@ private:
std::unordered_map<SourceId, ManagedUid> nodes;
/**
- * Cache used for translating byte offsets to line numbers. Maps from a
- * SourceId onto a list of (sorted) SourceOffsets. The index in the list
- * corresponds to the line number.
+ * Map containing SourceContextReader instances which are -- as their name
+ * suggests -- used to produce SourceContext structures describing the
+ * source code at a given SourceLocation.
*/
- std::unordered_map<SourceId, std::vector<SourceOffset>> lineNumberCache;
+ std::unordered_map<SourceId, SourceContextReader> contextReaders;
/**
* Allocates a new SourceId for the given resource.
@@ -224,11 +225,14 @@ public:
* @param location is the SourceLocation for which context information
* should be retrieved. This method is used by the Logger class to print
* pretty messages.
+ * @param maxContextLength is the maximum length in character of context
+ * that should be extracted.
* @return a valid SourceContext if a valid SourceLocation was given or an
* invalid SourceContext if the location is invalid.
*/
- SourceContext buildContext(const SourceLocation &location);
-
+ SourceContext readContext(
+ const SourceLocation &location,
+ size_t maxContextLength = SourceContextReader::MAX_MAX_CONTEXT_LENGTH);
};
}