diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/common/Location.hpp | 375 | ||||
-rw-r--r-- | src/core/parser/ParserContext.hpp | 4 | ||||
-rw-r--r-- | src/core/resource/ResourceManager.cpp | 252 | ||||
-rw-r--r-- | src/core/resource/ResourceManager.hpp | 152 |
4 files changed, 722 insertions, 61 deletions
diff --git a/src/core/common/Location.hpp b/src/core/common/Location.hpp index 39e1011..f3a30b2 100644 --- a/src/core/common/Location.hpp +++ b/src/core/common/Location.hpp @@ -16,91 +16,310 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +/** + * @file Location.hpp + * + * Types used for describing positions, ranges and excerpts of source files used + * for describing log messages. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + #ifndef _OUSIA_LOCATION_HPP_ #define _OUSIA_LOCATION_HPP_ +#include <cstdint> +#include <limits> #include <string> namespace ousia { /** - * Struct representing a location within a source file. A position is defined by - * a byte offset (which is always reproducable), a line number and a column - * number (which may differ depending on the encoding used). + * Type used for referencing a source file currently opened in a Project. + */ +using SourceId = uint32_t; + +/** + * Type used for specifying an offset within a source file. + */ +using SourceOffset = uint32_t; + +/** + * Maximum value for a SourceOffset. As SourceOffset is a 32 Bit unsigned + * integer, the maximum value is 2^32-1, which means that 4 GiB are addressable + * by SourceOffset. */ -struct SourceLocation { +constexpr SourceOffset SourceOffsetMax = + std::numeric_limits<SourceOffset>::max(); + +/** + * Function for clamping a size_t to a valid SourceOffset value. + * + * @param pos is the size_t value that should be converted to a SourceOffset + * value. If pos is larger than the maximum value that can be represented by + * SourceOffset, the result is set to this maximum value, which is interpreted + * as "invalid" by functions dealing with the SourceOffset type. + * @return the clamped position value. + */ +inline SourceOffset clampToSourcePosition(size_t pos) +{ + return pos > SourceOffsetMax ? SourceOffsetMax : pos; +} + +/** + * Class specifying a position within an (unspecified) source file. + */ +class SourcePosition { +private: /** - * Current line, starting with one. + * Offset position in bytes relative to the start of the document. */ - int line; + SourceOffset pos; +public: /** - * Current column, starting with one. + * Default constructor of the SourcePosition class. Sets the position to + * SourceOffsetMax and thus marks the SourcePosition as invalid. */ - int column; + SourcePosition() : pos(SourceOffsetMax) {} /** - * Current byte offset. + * Creates a new SourcePosition instance with the given byte offset. */ - size_t offs; + SourcePosition(size_t pos) : pos(clampToSourcePosition(pos)) {} /** - * Default constructor of the SourceLocation struct, initializes all - * memebers with zero. + * Sets the position of the SourcePosition value to the given value. Clamps + * the given size_t to the valid range. + * + * @param pos is the position value that should be set. */ - SourceLocation() : line(0), column(0), offs(0) {} + void setPosition(size_t pos) { this->pos = clampToSourcePosition(pos); } /** - * Creates a new SourceLocation struct with only a line and no column. + * Returns the position value. Only use the value if "valid" returns true. * - * @param line is the line number. - * @param column is the column number. + * @return the current position. */ - SourceLocation(int line) : line(line), column(0), offs(0) {} + SourceOffset getPosition() const { return pos; } /** - * Creates a new SourceLocation struct with a line and column. + * Returns true if the source position is valid, false otherwise. Invalid + * positions are set to the maximum representable number. * - * @param line is the line number. - * @param column is the column number. + * @return true if the SourcePosition instance is value, false otherwise. */ - SourceLocation(int line, int column) : line(line), column(column), offs(0) + bool isValid() { return pos != SourceOffsetMax; } +}; + +/** + * The SourceRange class represents a range within an (unspecified) source file. + */ +class SourceRange { +private: + /** + * Start byte offset. + */ + SourcePosition start; + + /** + * End byte offset. + */ + SourcePosition end; + +public: + /** + * Default constructor. Creates an invalid range. + */ + SourceRange(){}; + + /** + * Constructor for a zero-length range. + * + * @param pos is the byte offset at which the SourceRange instance should be + * located. + */ + SourceRange(SourcePosition pos) : start(pos), end(pos) {} + + /** + * Constructor of a SourceRange instance. + * + * @param start is the byte offset of the first character in the range + * (start is inclusive). + * @param end points at the end of the range (end is non-inclusive). + */ + SourceRange(SourcePosition start, SourcePosition end) + : start(start), end(end) { } /** - * Creates a new SourceLocation struct with a line, column and byte offset. + * Sets the start of the SourceRange value to the given value. This + * operation might render the SourceRange invalid (if the given position is + * larger than the end position). + * + * @param pos is the start position value that should be set. + */ + void setStart(SourcePosition pos) { this->start = pos; } + + /** + * Sets the end of the SourceRange value to the given value. This operation + * might render the SourceRange invalid (if the given position is smaller + * than the start position). * - * @param line is the line number. - * @param column is the column number. - * @param offs is the byte offset. + * @param pos is the end position that should be set. */ - SourceLocation(int line, int column, size_t offs) - : line(line), column(column), offs(offs) + void setEnd(SourcePosition pos) { this->end = pos; } + + /** + * Sets the start and end of the SourceRange value to the given values. + * This operation might render the SourceRange invalid (if the given end + * position is smaller than the start position). + * + * @param start is the start position that should be set. + * @param end is the end position that should be set. + */ + void setRange(SourcePosition start, SourcePosition end) { + this->start = start; + this->end = end; } /** - * Returns true, if the line number is valid, false otherwise. + * Makes the Range represent a zero-length range that is located at the + * given position. The given position should be interpreted as being located + * "between the character just before the start offset and the start + * offset". * - * @return true for valid line numbers. + * @param pos is the position to which start and end should be set. */ - bool hasLine() const { return line > 0; } + void setPosition(SourcePosition pos) + { + this->start = pos; + this->end = pos; + } /** - * Returns true, if the column number is valid, false otherwise. + * Returns the start position of the SourceRange instance. * - * @return true for valid column numbers. + * @return the start offset in bytes. */ - bool hasColumn() const { return column > 0; } + SourceOffset getStart() const { return start.getPosition(); } /** - * Returns true, if the position is valid, false otherwise. This function is - * equivalent to the hasLine() function. + * Returns the end position of the SourceRange instance. * - * @return true if the Position struct is valid. + * @return the end offset in bytes (non-inclusive). */ - bool valid() const { return hasLine(); } + SourceOffset getEnd() const { return end.getPosition(); } + + /** + * Returns a copy of the underlying SourcePosition instance representing the + * start position. + * + * @return a copy of the start SourcePosition instance. + */ + SourcePosition getStartPosition() const { return start; } + + /** + * Returns a copy of the underlying SourcePosition instance representing the + * end position. + * + * @return a copy of the end SourcePosition instance. + */ + SourcePosition getEndPosition() const { return end; } + + /** + * Returns the length of the range. A range may have a zero value length, in + * which case it should be interpreted as "between the character before + * the start offset and the start offset". The returned value is only valid + * if the isValid() method returns true! + * + * @return the length of the range in bytes. + */ + size_t getLength() const { return end.getPosition() - start.getPosition(); } + + /** + * Returns true if this range is actually valid. This is the case if the + * start position is smaller or equal to the end position and start and end + * position themself are valid. + * + * @return true if the Range is valid. + */ + bool isValid() + { + return start.isValid() && end.isValid() && + start.getPosition() <= end.getPosition(); + } +}; + +/** + * The SourceLocation class describes a range within a specific source file. + */ +class SourceLocation : public SourceRange { +private: + /** + * Id of the source file. + */ + SourceId sourceId; + +public: + /** + * Constructor, binds the SourceLocation to the given source file. + * + * @param sourceId is the sourceId to which the SourceLocation instance + * should be bound. The sourceId cannot be overriden after construction. + */ + SourceLocation(SourceId sourceId) : sourceId(sourceId){}; + + /** + * Constructor for a zero-length range. + * + * @param sourceId is the sourceId to which the SourceLocation instance + * should be bound. The sourceId cannot be overriden after construction. + * @param pos is the byte offset at which the SourceRange instance should be + * located. + */ + SourceLocation(SourceId sourceId, SourcePosition pos) + : SourceRange(pos), sourceId(sourceId) + { + } + + /** + * Constructor of a SourceRange instance. + * + * @param sourceId is the sourceId to which the SourceLocation instance + * should be bound. The sourceId cannot be overriden after construction. + * @param start is the byte offset of the first character in the range + * (start is inclusive). + * @param end points at the end of the range (end is non-inclusive). + */ + SourceLocation(SourceId sourceId, SourcePosition start, SourcePosition end) + : SourceRange(start, end), sourceId(sourceId) + { + } + + /** + * Constructor of a SourceRange instance. + * + * @param sourceId is the sourceId to which the SourceLocation instance + * should be bound. The sourceId cannot be overriden after construction. + * @param start is the byte offset of the first character in the range + * (start is inclusive). + * @param end points at the end of the range (end is non-inclusive). + */ + SourceLocation(SourceId sourceId, const SourceRange &range) + : SourceRange(range), sourceId(sourceId) + { + } + + /** + * Returns the id of the source file this SourceLocation instance is bound + * to. + * + * @return the id of the source file this instance is bound to. + */ + SourceId getSourceId() { return sourceId; } }; /** @@ -109,6 +328,37 @@ struct SourceLocation { */ struct SourceContext { /** + * Underlying source range (contains the byte start and end offsets in + * bytes). + */ + SourceRange range; + + /** + * Name of the underlying resource. + */ + std::string filename; + + /** + * Start line, starting with one. + */ + int startLine; + + /** + * Start column, starting with one. + */ + int startColumn; + + /** + * End line, starting with one. + */ + int endLine; + + /** + * End column, starting with one. + */ + int endColumn; + + /** * Set to the content of the current line. */ std::string text; @@ -120,6 +370,12 @@ struct SourceContext { int relPos; /** + * Relative length (in characters) within that line. May end beyond the + * text given in the context. + */ + int relLen; + + /** * Set to true if the beginning of the line has been truncated (because * the reader position is too far away from the actual position of the * line). @@ -134,39 +390,40 @@ struct SourceContext { bool truncatedEnd; /** - * Default constructor, initializes all members with zero values. + * Default constructor, initializes primitive members with zero values. */ SourceContext() - : text(), relPos(0), truncatedStart(false), truncatedEnd(false) + : startLine(0), + startColumn(0), + endLine(0), + endColumn(0), + relPos(0), + relLen(0), + truncatedStart(false), + truncatedEnd(false) { } /** - * Constructor of the SourceContext class. + * Returns true the context text is not empty. * - * @param text is the current line the text cursor is at. - * @param relPos is the relative position of the text cursor within that - * line. - * @param truncatedStart specifies whether the text was truncated at the - * beginning. - * @param truncatedEnd specifies whether the text was truncated at the - * end. - */ - SourceContext(std::string text, size_t relPos, bool truncatedStart, - bool truncatedEnd) - : text(std::move(text)), - relPos(relPos), - truncatedStart(truncatedStart), - truncatedEnd(truncatedEnd) - { - } + * @return true if the context is valid and e.g. should be printed. + */ + bool isValid() const { return range.isValid() && hasLine() && hasColumn(); } /** - * Returns true the context text is not empty. + * Returns true, if the start line number is valid, false otherwise. * - * @return true if the context is valid and e.g. should be printed. + * @return true for valid line numbers. + */ + bool hasLine() const { return startLine > 0; } + + /** + * Returns true, if the start column number is valid, false otherwise. + * + * @return true for valid column numbers. */ - bool valid() const { return !text.empty(); } + bool hasColumn() const { return startColumn > 0; } }; /** diff --git a/src/core/parser/ParserContext.hpp b/src/core/parser/ParserContext.hpp index 88d1f52..bb64600 100644 --- a/src/core/parser/ParserContext.hpp +++ b/src/core/parser/ParserContext.hpp @@ -71,8 +71,8 @@ struct ParserContext { /** * Constructor of the ParserContext class. * - * @param scope is a reference to the ParserScope instance that should be used to - * lookup names. + * @param scope is a reference to the ParserScope instance that should be + * used to lookup names. * @param registry is a reference at the Registry class, which allows to * obtain references at parsers for other formats or script engine * implementations. diff --git a/src/core/resource/ResourceManager.cpp b/src/core/resource/ResourceManager.cpp new file mode 100644 index 0000000..563fc12 --- /dev/null +++ b/src/core/resource/ResourceManager.cpp @@ -0,0 +1,252 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/common/Logger.hpp> +#include <core/common/Rtti.hpp> +#include <core/common/Utils.hpp> +#include <core/model/Node.hpp> +#include <core/parser/ParserContext.hpp> +#include <core/Registry.hpp> + +#include "Resource.hpp" +#include "ResourceManager.hpp" + +namespace ousia { + +/* Deduction of the ResourceType */ + +namespace RttiTypes { +extern const Rtti Document; +extern const Rtti Domain; +extern const Rtti Node; +extern const Rtti Typesystem; +} + +/** + * Map mapping from relations (the "rel" attribute in includes) to the + * corresponding ResourceType. + */ +static const std::unordered_map<std::string, ResourceType> RelResourceTypeMap{ + {"document", ResourceType::DOCUMENT}, + {"domain", ResourceType::DOMAIN_DESC}, + {"typesystem", ResourceType::TYPESYSTEM}}; + +/** + * Map mapping from Rtti pointers to the corresponding ResourceType. + */ +static const std::unordered_map<Rtti *, ResourceType> RttiResourceTypeMap{ + {&RttiTypes::Document, ResourceType::DOCUMENT}, + {&RttiTypes::Domain, ResourceType::DOMAIN_DESC}, + {&RttiTypes::Typesystem, ResourceType::TYPESYSTEM}}; + +static ResourceType relToResourceType(const std::string &rel, Logger &logger) +{ + std::string s = Utils::toLowercase(rel); + if (!s.empty()) { + auto it = RelResourceTypeMap.find(s); + if (it != RelResourceTypeMap.end()) { + return it->second; + } else { + logger.error(std::string("Unknown relation \"") + rel + + std::string("\"")); + } + } + return ResourceType::UNKNOWN; +} + +static ResourceType supportedTypesToResourceType(const RttiSet &supportedTypes) +{ + if (supportedTypes.size() == 1U) { + auto it = RttiResourceTypeMap.find(supportedTypes[0]); + if (it != RelResourceTypeMap.end()) { + return it->second; + } + } + return ResourceType::UNKNOWN; +} + +static ResourceType deduceResourceType(const std::string &rel, + const RttiSet &supportedTypes, + Logger &logger) +{ + ResourceType res; + + // Try to deduce the ResourceType from the "rel" attribute + res = relToResourceType(rel, logger); + + // If this did not work, try to deduce the ResourceType from the + // supportedTypes supplied by the Parser instance. + if (res == ResourceType::UNKNOWN) { + res = supportedTypesToResourceType(supportedTypes); + } + if (res == ResourceType::UNKNOWN) { + logger.note( + "Ambigous resource type, consider specifying the \"rel\" " + "attribute"); + } + return res; +} + +/* Functions for reducing the set of supported types */ + +/** + * Map mapping from relations (the "rel" attribute in includes) to the + * corresponding RttiType + */ +static const std::unordered_map<std::string, Rtti *> RelRttiTypeMap{ + {"document", &RttiTypes::DOCUMENT}, + {"domain", &RttiTypes::DOMAIN}, + {"typesystem", &RttiTypes::TYPESYSTEM}}; + +static Rtti *relToRttiType(const std::string &rel) +{ + std::string s = Utils::toLowercase(rel); + if (!s.empty()) { + auto it = RelRttiTypeMap.find(s); + if (it != RelRttiTypeMap.end()) { + return it->second; + } + } + return &ResourceType::Node; +} + +static RttiType shrinkSupportedTypes(const RttiSet &supportedTypes, + const std::string &rel) +{ + RttiSet types; + RttiType *type = relToRttiType(rel); + for (RttiType *supportedType : supportedTypes) { + if (supportedType->isa(type)) { + types.insert(supportedType); + } + } + return types; +} + +/* Class ResourceManager */ + +Rooted<Node> ResourceManager::link(ParserContext &ctx, Resource &resource, + const std::string &mimetype, + const RttiSet &supportedTypes) +{ + +} + +Rooted<Node> ResourceManager::link(ParserContext &ctx, const std::string &path, + const std::string &mimetype, + const std::string &rel, + const RttiSet &supportedTypes, + const Resource &relativeTo) +{ + // Try to deduce the ResourceType + ResourceType resourceType = + deduceResourceType(rel, supportedTypes, ctx.logger); + + // Lookup the resource for given path and resource type + Resource resource; + if (!ctx.registry.locateResource(resource, path, resourceType, + relativeTo)) { + ctx.logger.error("File \"" + path + "\" not found."); + return nullptr; + } + + // Try to shrink the set of supportedTypes + RttiSet types = shrinkSupportedTypes(supportedTypes, rel); + + // Check whether the resource has already been parsed + Rooted<Node> node = nullptr; + auto it = locations.find(res.getLocation()); + if (it != locations.end()) { + node = + } + = link(ctx, resource, mimetype, types); + + // Try to deduce the mimetype + std::string mime = mimetype; + if (mime.empty()) { + // Fetch the file extension + std::string ext = Utils::extractFileExtension(path); + if (ext.empty()) { + ctx.logger.error( + std::string("Specified filename \"") + path + + std::string( + "\" has no extension and no mimetype (\"type\" " + "attribute) was given instead.")); + return nullptr; + } + + // Fetch the mimetype for the extension + mime = ctx.registry.getMimetypeForExtension(ext); + if (mime.empty()) { + ctx.logger.error(std::string("Unknown file extension \"") + ext + + std::string("\"")); + return nullptr; + } + } + + // Fetch a parser for the mimetype + const std::pair<Parser *, RttiSet> parser = + ctx.registry.getParserForMimetype(mime); + + // Make sure a parser was found + if (!parser->first) { + ctx.logger.error(std::string("Cannot parse files of type \"") + mime + + std::string("\"")); + } + + // Make sure the parser returns one of the supported types +} + +Rooted<Node> ResourceManager::link(ParserContext &ctx, const std::string &path, + const std::string &mimetype, + const std::string &rel, + const RttiSet &supportedTypes, + SourceId relativeTo) +{ + // Fetch the resource corresponding to the source id, make sure it is valid + const Resource &relativeResource = getResource(relativeTo); + if (!relativeResource.isValid()) { + ctx.logger.fatalError("Internal error: Invalid SourceId supplied."); + return nullptr; + } + + // Continue with the usual include routine + return include(ctx, path, mimetype, rel, supportedTypes, relativeResource); +} + +const Resource &getResource(SourceId sourceId) const +{ + if (sourceId < resources.size()) { + return resources[sourceId]; + } + return NullResource; +} + +SourceContext ResourceManager::buildContext(const SourceLocation &location) +{ + SourceContext res; + + // TODO + + return res; +} +}; +} + +#endif /* _OUSIA_RESOURCE_MANAGER_HPP_ */ + diff --git a/src/core/resource/ResourceManager.hpp b/src/core/resource/ResourceManager.hpp new file mode 100644 index 0000000..809fd55 --- /dev/null +++ b/src/core/resource/ResourceManager.hpp @@ -0,0 +1,152 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file ResourceManager.hpp + * + * Defines the ResourceManager class which is responsible for keeping track of + * already included resources and to retrieve CharReader instance for not-yet + * parsed resources. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_RESOURCE_MANAGER_HPP_ +#define _OUSIA_RESOURCE_MANAGER_HPP_ + +#include <string> +#include <unordered_map> + +#include <core/common/Location.hpp> +#include <core/managed/Managed.hpp> + +namespace ousia { + +// Forward declarations +class Node; +class ParserContext; +class Resource; +class RttiSet; +extern const Resource NullResource; + +/** + * The ResourceManager class is responsible for keepking track of all included + * resources. It retrieves CharReader instances for not-yet parsed resources + * and returns references for those resources that already have been parsed. + */ +class ResourceManager { +private: + /** + * Vector used for mapping SourceId instances to the underlying resource. + */ + std::vector<Resource> resources; + + /** + * Index pointing at the next free entry in the resources list. + */ + SourceId nextFreeSourceId = 0; + + /** + * Map between Resource locations and the corresponding SourceId. + */ + std::unordered_map<std::string, SourceId> locations; + + /** + * Map between a SourceId and the corresponding (if available) parsed node + * uid (this resembles weak references to the Node instance). + */ + std::unordered_map<SourceId, ManagedUid> nodes; + + /** + * Cache used for translating byte offsets to line numbers. Maps from a + * SourceId onto a list of (sorted) SourceOffsets. The index in the list + * corresponds to the line number. + */ + std::unordered_map<SourceId, std::vector<SourceOffset>> lineNumberCache; + + + Rooted<Node> getCachedNode(SourceId id); + + Rooted<Node> getCachedNode(const std::string &location); + + SourceId getSourceId(const std::string &location); + + /** + * Used internally to either parse a resource or retrieve it from the + * internal cache of already parsed resources. + */ + Rooted<Node> link(ParserContext &ctx, Resource &resource, const std::string &mimetype, + const RttiSet &supportedTypes); + + +public: + /** + * Resolves the reference to the file specified by the given path and -- if + * this has not already happened -- parses the file. Logs any problem in + * the logger instance of the given ParserContext. + * + * @param ctx is the context from the Registry and the Logger instance will + * be looked up. + * @param path is the path to the file that should be included. + * @param mimetype is the mimetype the file was included with. If no + * mimetype is given, the path must have an extension that is known by + */ + Rooted<Node> link(ParserContext &ctx, const std::string &path, + const std::string &mimetype = "", + const std::string &rel = "", + const RttiSet &supportedTypes = RttiSet{}, + const Resource &relativeTo = NullResource); + + /** + * Resolves the reference to the file specified by the given path and -- if + * this has not already happened -- parses the file. Logs any problem in + * the logger instance of the given ParserContext. + */ + Rooted<Node> link(ParserContext &ctx, const std::string &path, + const std::string &mimetype, const std::string &rel, + const RttiSet &supportedTypes, SourceId relativeTo); + + /** + * Returns a Resource instance for the given SourceId. + * + * @param sourceId is the id of the Resource instance that should be + * returned. + * @return the Resource instance corresponding to the given sourceId. If the + * sourceId is invalid, the returned Resource will be invalid (a reference + * at NullResource). + */ + const Resource &getResource(SourceId sourceId) const; + + /** + * Creates and returns a SourceContext structure containing information + * about the given SourceLocation (such as line and column number). Throws + * a LoggableException if an irrecoverable error occurs while looking up the + * context (such as a no longer existing resource). + * + * @param location is the SourceLocation for which context information + * should be retrieved. This method is used by the Logger class to print + * pretty messages. + * @return a valid SourceContext if a valid SourceLocation was given or an + * invalid SourceContext if the location is invalid. + */ + SourceContext buildContext(const SourceLocation &location); +}; +} + +#endif /* _OUSIA_RESOURCE_MANAGER_HPP_ */ + |