diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-01-24 03:08:16 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-01-24 03:08:16 +0100 |
commit | 67d36e699a2852ce471c4d1b8dab5992d6c01a98 (patch) | |
tree | 0ef23befe3fa5af9c5d83b3b8934e444366a8575 /src/core | |
parent | f819b42057b2baea205569dd808c4fcf2bc4d630 (diff) |
Implemented SourceContextReader, added unit tests, implemented SourceContextReader interface in ResourceManager, added LoggerTest
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/common/SourceContextReader.cpp | 198 | ||||
-rw-r--r-- | src/core/common/SourceContextReader.hpp | 91 | ||||
-rw-r--r-- | src/core/resource/ResourceManager.cpp | 29 | ||||
-rw-r--r-- | src/core/resource/ResourceManager.hpp | 16 |
4 files changed, 318 insertions, 16 deletions
diff --git a/src/core/common/SourceContextReader.cpp b/src/core/common/SourceContextReader.cpp new file mode 100644 index 0000000..65a6281 --- /dev/null +++ b/src/core/common/SourceContextReader.cpp @@ -0,0 +1,198 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <algorithm> + +#include <core/common/CharReader.hpp> +#include <core/common/Utils.hpp> + +#include "SourceContextReader.hpp" + +namespace ousia { + +SourceContextReader::SourceContextReader() : cache{0} {} + +SourceContext SourceContextReader::readContext(CharReader &reader, + const SourceRange &range, + size_t maxContextLength, + const std::string &filename) +{ + // Abort if the given range is not valid + if (!range.isValid()) { // (I2) + return SourceContext{}; + } + + // Set the filename and the range + SourceContext ctx; + ctx.startLine = 1; + ctx.startColumn = 1; + ctx.endLine = 1; + ctx.endColumn = 1; + ctx.range = range; + ctx.filename = filename; + + // Some constants for convenience + const SourceOffset start = range.getStart(); + const SourceOffset end = range.getEnd(); + const SourceOffset lastCacheOffs = cache.back(); + + // Find the entry in the cache that is just below the given start offset + // and jump to this location + size_t offs = 0; + auto it = std::lower_bound(cache.begin(), cache.end(), start); + if (it != cache.begin()) { + it--; // Go to the previous entry + offs = *it; // Read the corresponding byte offset + size_t line = it - cache.begin() + 1; + ctx.startLine = line; + ctx.endLine = line; + } + + // Move the char reader to the specified offset, abort if this did not work + // out + if (offs != reader.seek(offs)) { + return SourceContext{}; + } + + // TODO: Handle skew introduced by linebreak processing \n\r => \n + + // Read until the requested byte offset is reached, track linebreaks in the + // linebreak cache + std::vector<char> lineBuf; + size_t lineBufStart = offs; + size_t lastLineStart = offs; + char c; + while (reader.read(c)) { + // Fetch the offset after this character + const size_t nextOffs = reader.getOffset(); + + // Fetch the current offset, check whether start was reached + const bool reachedStart = offs >= start; + const bool reachedEnd = offs >= end; + + // Handle linebreaks and update the linebreak cache + if (c == '\n') { + // Update the linebreak cache if we are in uncached regions + if (offs > lastCacheOffs) { + cache.push_back(nextOffs); + } + if (!reachedStart) { + ctx.startLine++; + ctx.startColumn = 1; + lineBuf.clear(); + lineBufStart = nextOffs; + lastLineStart = nextOffs; + } else { + lineBuf.push_back('\n'); + } + if (!reachedEnd) { + ctx.endLine++; + ctx.endColumn = 1; + } else { + // This was the last character, abort + break; + } + } else { + // Increment the start and the end column if this is not an + // UTF8-continuation byte (note that we count unicode codepoints not + // actual characters, which may be more than one codepoint) + if (!((c & 0x80) && !(c & 0x40))) { + if (!reachedStart) { + ctx.startColumn++; + } + if (!reachedEnd) { + ctx.endColumn++; + } + } + + // Record all characters when start is reached or at least when + // the distance to start is smaller than the maximum context length + // TODO: This is suboptimal as parts of lineBuf are thrown away + // later. If the given range is really large, this will waste huge + // amounts of RAM. + if (reachedStart || (start - offs <= maxContextLength)) { + if (lineBuf.empty()) { + lineBufStart = offs; + } + lineBuf.push_back(c); + } + } + + // Set the new offset + offs = nextOffs; + } + + // If we did not reach the end or for some reason the lineBufStart is larger + // than start (to assure invariant I1 is fulfilled), abort + offs = reader.getOffset(); + if (offs < end || lineBufStart > start) { // (I1) + return SourceContext{}; + } + + // Calculate a first relative position and length + ctx.relPos = start - lineBufStart; // lineBufStart > start (I1) + ctx.relLen = end - start; // end >= start (I2) + + // Remove linebreaks at the beginning and the end + const std::pair<size_t, size_t> b = + Utils::trim(lineBuf, Utils::isLinebreak); + ssize_t s = b.first, e = b.second; + s = std::min(s, static_cast<ssize_t>(ctx.relPos)); + + // Remember the trimmed positions, only continue if the context text did + // not entirely consist of linebreaks + const ssize_t ts = s, te = e; // s >= 0, e >= 0, ts >= 0, te >= 0 (I3) + if (te > ts) { + // Trim the line further if it is longer than the maxContextLength + if (static_cast<size_t>(te - ts) > maxContextLength && + maxContextLength != MAX_MAX_CONTEXT_LENGTH) { + ssize_t c = (ctx.relPos + ctx.relLen / 2); + s = c - maxContextLength / 2; + e = c + maxContextLength / 2; + + // Account for rounding error + if (static_cast<size_t>(e - s) < maxContextLength) { + e++; + } + + // Redistribute available characters at the beginning or the end + if (s < ts) { + e = e + (ts - s); + s = ts; // ts >= 0 => s >= 0 (I3) + } + if (e > te) { + s = s - std::min(s - ts, e - te); // ts - s <= s => s >= 0 + e = te; // te >= 0 => e >= 0 (I3) + } + } + + // Update the relative position and length, set the "truncated" flags + size_t us = static_cast<size_t>(s), ue = static_cast<size_t>(e); + ctx.relPos = start - lineBufStart - us; + ctx.relLen = std::min(ctx.relLen, ue - us); + ctx.truncatedStart = s > ts || lastLineStart < lineBufStart; + ctx.truncatedEnd = e < te; + + // Copy the selected area to the output string + ctx.text = std::string{&lineBuf[s], ue - us}; + } + + return ctx; +} +} + diff --git a/src/core/common/SourceContextReader.hpp b/src/core/common/SourceContextReader.hpp new file mode 100644 index 0000000..35e71b3 --- /dev/null +++ b/src/core/common/SourceContextReader.hpp @@ -0,0 +1,91 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file SourceContextReader.hpp + * + * The SourceContextReader class is used to read a SourceContext struct from + * a SourcePosition instance and an input stream. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_SOURCE_CONTEXT_READER_HPP_ +#define _OUSIA_SOURCE_CONTEXT_READER_HPP_ + +#include <string> +#include <vector> +#include <limits> + +#include "Location.hpp" + +namespace ousia { + +// Forward declarations +class CharReader; + +/** + * The SourceContextReader can read SourceContext structures given a + * SourcePosition or SourceRange and a char reader. It is capable of managing + * a line number cache which speeds up repeated context lookups. + */ +class SourceContextReader { +private: + /** + * Cache containing the byte offset of each line break. + */ + std::vector<SourceOffset> cache; + +public: + /** + * Maximum context size. Used to indicate that the context should have an + * unlimited size. + */ + static constexpr size_t MAX_MAX_CONTEXT_LENGTH = + std::numeric_limits<ssize_t>::max(); + + /** + * Default constructor. Initializes the internal lineNumberCache with a + * single zero entry. + */ + SourceContextReader(); + + /** + * Returns the context for the char reader and the given SourceRange. + * Returns an invalid source context if either the given range is invalid + * or the byte offset described in the SourceRange cannot be reached because + * the CharReader cannot be seeked back to this position. + * + * @param reader is the CharReader instance from which the context should be + * read. + * @param range describes the Range within the source file for which the + * context should be extraced. + * @param filename is the filename that should be stored in the returned + * context. + * @param maxContextLength is the maximum number of characters that should + * be stored in the returned context. + * @return a SourceContext instance describing the + */ + SourceContext readContext(CharReader &reader, const SourceRange &range, + size_t maxContextLength = MAX_MAX_CONTEXT_LENGTH, + const std::string &filename = ""); +}; +} + +#endif /* _OUSIA_SOURCE_CONTEXT_READER_HPP_ */ + diff --git a/src/core/resource/ResourceManager.cpp b/src/core/resource/ResourceManager.cpp index f154c9c..a5e76b0 100644 --- a/src/core/resource/ResourceManager.cpp +++ b/src/core/resource/ResourceManager.cpp @@ -35,7 +35,8 @@ namespace ousia { /* Static helper functions */ -static void logUnsopportedType(Logger &logger, Resource &resource, const RttiSet &supportedTypes) +static void logUnsopportedType(Logger &logger, Resource &resource, + const RttiSet &supportedTypes) { // Build a list containing the expected type names std::vector<std::string> expected; @@ -81,7 +82,7 @@ void ResourceManager::purgeResource(SourceId sourceId) } resources.erase(sourceId); nodes.erase(sourceId); - lineNumberCache.erase(sourceId); + contextReaders.erase(sourceId); } Rooted<Node> ResourceManager::parse(ParserContext &ctx, Resource &resource, @@ -93,7 +94,8 @@ Rooted<Node> ResourceManager::parse(ParserContext &ctx, Resource &resource, if (mime.empty()) { mime = ctx.registry.getMimetypeForFilename(resource.getLocation()); if (mime.empty()) { - ctx.logger.error(std::string("Filename \"") + resource.getLocation() + + ctx.logger.error(std::string("Filename \"") + + resource.getLocation() + std::string( "\" has an unknown file extension. Explicitly " "specify a mimetype.")); @@ -137,7 +139,8 @@ Rooted<Node> ResourceManager::parse(ParserContext &ctx, Resource &resource, if (node == nullptr) { throw LoggableException{"Internal error: Parser returned null."}; } - } catch (LoggableException ex) { + } + catch (LoggableException ex) { // Remove all data associated with the allocated source id purgeResource(sourceId); @@ -262,14 +265,20 @@ Rooted<Node> ResourceManager::link(ParserContext &ctx, const std::string &path, return link(ctx, path, mimetype, rel, supportedTypes, relativeResource); } -SourceContext ResourceManager::buildContext(const SourceLocation &location) +SourceContext ResourceManager::readContext(const SourceLocation &location, + size_t maxContextLength) { - SourceContext res; - - // TODO + const Resource &resource = getResource(location.getSourceId()); + if (resource.isValid()) { + // Fetch a char reader for the resource + std::unique_ptr<std::istream> is = resource.stream(); + CharReader reader{*is, location.getSourceId()}; - return res; + // Return the context + return contextReaders[location.getSourceId()].readContext( + reader, location, maxContextLength, resource.getLocation()); + } + return SourceContext{}; } - } diff --git a/src/core/resource/ResourceManager.hpp b/src/core/resource/ResourceManager.hpp index 51c00e3..d5381b9 100644 --- a/src/core/resource/ResourceManager.hpp +++ b/src/core/resource/ResourceManager.hpp @@ -34,6 +34,7 @@ #include <core/common/Location.hpp> #include <core/common/Rtti.hpp> +#include <core/common/SourceContextReader.hpp> #include <core/managed/Managed.hpp> #include "Resource.hpp" @@ -74,11 +75,11 @@ private: std::unordered_map<SourceId, ManagedUid> nodes; /** - * Cache used for translating byte offsets to line numbers. Maps from a - * SourceId onto a list of (sorted) SourceOffsets. The index in the list - * corresponds to the line number. + * Map containing SourceContextReader instances which are -- as their name + * suggests -- used to produce SourceContext structures describing the + * source code at a given SourceLocation. */ - std::unordered_map<SourceId, std::vector<SourceOffset>> lineNumberCache; + std::unordered_map<SourceId, SourceContextReader> contextReaders; /** * Allocates a new SourceId for the given resource. @@ -224,11 +225,14 @@ public: * @param location is the SourceLocation for which context information * should be retrieved. This method is used by the Logger class to print * pretty messages. + * @param maxContextLength is the maximum length in character of context + * that should be extracted. * @return a valid SourceContext if a valid SourceLocation was given or an * invalid SourceContext if the location is invalid. */ - SourceContext buildContext(const SourceLocation &location); - + SourceContext readContext( + const SourceLocation &location, + size_t maxContextLength = SourceContextReader::MAX_MAX_CONTEXT_LENGTH); }; } |