diff options
Diffstat (limited to 'src/transformations')
-rw-r--r-- | src/transformations/uniqueid/UniqueIdTransformation.cpp | 176 | ||||
-rw-r--r-- | src/transformations/uniqueid/UniqueIdTransformation.hpp | 55 |
2 files changed, 231 insertions, 0 deletions
diff --git a/src/transformations/uniqueid/UniqueIdTransformation.cpp b/src/transformations/uniqueid/UniqueIdTransformation.cpp new file mode 100644 index 0000000..028c1ef --- /dev/null +++ b/src/transformations/uniqueid/UniqueIdTransformation.cpp @@ -0,0 +1,176 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <queue> +#include <set> +#include <string> + +#include <core/common/Variant.hpp> + +#include "UniqueIdTransformation.hpp" + +namespace ousia { + +namespace { +/** + * Internally used implementation class used for a single transformation pass. + */ +class UniqueIdTransformationImpl { +private: + /** + * Set containing all ids that are already present in the document. + */ + std::unordered_set<std::string> ids; + + /** + * Vector containing all elements that still need an id. + */ + ManagedVector<Node> nodesWithoutId; + + /** + * Traverse the document tree -- find all elements with primitive content. + */ + std::queue<Rooted<StructuredEntity>> queue; + + /** + * Method used to iterate over all fields of a DocumentEntity and to place + * the corresponding elements on a queue. + */ + void processFields(const DocumentEntity *entity); + + /** + * Searches the variant for any object references. + */ + void processVariant(const Variant &data); + +public: + /** + * Applys the transformation to the given document. + * + * @param doc is the document for which unique IDs should be generated. + */ + void transform(Handle<Document> doc); +}; + +void UniqueIdTransformationImpl::processVariant(const Variant &var) +{ + if (var.isArray()) { + for (const auto &elem : var.asArray()) { + processVariant(elem); + } + } else if (var.isMap()) { + for (const auto &elem : var.asMap()) { + processVariant(elem.second); + } + } else if (var.isObject()) { + Rooted<Managed> obj = var.asObject(); + if (!obj->hasDataKey("id") && obj->isa(&RttiTypes::Node)) { + nodesWithoutId.push_back(obj.cast<Node>()); + } + } +} + +void UniqueIdTransformationImpl::processFields(const DocumentEntity *entity) +{ + for (const NodeVector<StructureNode> &nodes : entity->getFields()) { + for (Rooted<StructureNode> node : nodes) { + // Check whether the node has the "id"-data field attached to it -- + // if yes, store the id in the ids list + Rooted<ManagedVariant> id = node->readData<ManagedVariant>("id"); + if (id != nullptr && id->v.isString()) { + ids.insert(id->v.asString()); + } + + // If the node is a structured entity just push it onto the stack + if (node->isa(&RttiTypes::StructuredEntity)) { + queue.push(node.cast<StructuredEntity>()); + } else if (node->isa(&RttiTypes::DocumentPrimitive)) { + // This is a primitive node -- check whether it references any + // other, if yes, check whether the primitive field is an object + // that references another entry + processVariant(node.cast<DocumentPrimitive>()->getContent()); + } + } + } +} + +void UniqueIdTransformationImpl::transform(Handle<Document> doc) +{ + // Push the document root element onto the queue + queue.push(doc->getRoot()); + + // Push the fields of all annotations onto the queue + for (Rooted<AnnotationEntity> annotation : doc->getAnnotations()) { + processFields(annotation.get()); + } + + // Iterate over all queue elements and process the fields of those elements + while (!queue.empty()) { + processFields(queue.front().get()); + queue.pop(); + } + + // Generate ids for all referenced elements that do not yet have ids + std::map<std::string, size_t> seqNos; + for (Rooted<Node> node : nodesWithoutId) { + // Generate a first id -- use the node name if it is available, + // otherwise use the internal type name and append the internal unique + // id. + std::string id = + node->getName().empty() + ? node->type()->name + "_" + std::to_string(node->getUid()) + : node->getName(); + + // If the id name is not unique, append a sequence number + if (ids.count(id) != 0) { + std::string prefix = id; + size_t seqNo = 0; + + // Find the last sequence number for this prefix + auto it = seqNos.find(prefix); + if (it != seqNos.end()) { + seqNo = it->second; + } + + // Increment the sequence number and make sure the resulting name + // is unique + do { + seqNo++; + id = prefix + "_" + std::to_string(seqNo); + } while (ids.count(id) > 0); + + // Store the new sequence number in the seqNos map + seqNos.emplace(prefix, seqNo); + } + + // Remember the generated id + ids.insert(id); + + // Store the resulting string as "id" + node->storeData("id", + Variant::fromString(id).toManaged(node->getManager())); + } +} +} + +void UniqueIdTransformation::transform(Handle<Document> doc) +{ + UniqueIdTransformationImpl().transform(doc); +} +} + diff --git a/src/transformations/uniqueid/UniqueIdTransformation.hpp b/src/transformations/uniqueid/UniqueIdTransformation.hpp new file mode 100644 index 0000000..ea342d6 --- /dev/null +++ b/src/transformations/uniqueid/UniqueIdTransformation.hpp @@ -0,0 +1,55 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file UniqueIdTransformation.hpp + * + * Contains a transformation capable of generating unique ids for referenced + * document nodes. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_UNIQUE_ID_TRANSFORMATION_HPP_ +#define _OUSIA_UNIQUE_ID_TRANSFORMATION_HPP_ + +#include <core/model/Document.hpp> + +namespace ousia { + +/** + * The UniqueIdTransformation class implements a transformation that attaches + * unique ids to elements that are being referenced in the document. These + * unique ids can for example be used in XML or HTML output. + * + * TODO: Write an actual base class for transformations and derive from it + */ +class UniqueIdTransformation { +public: + /** + * Applys the transformation to the given document. + * + * @param doc is the document for which unique IDs should be generated. + */ + static void transform(Handle<Document> doc); +}; + +} + +#endif /* _OUSIA_UNIQUE_ID_TRANSFORMATION_HPP_ */ + |