From cd0e0eaee10d6587a4547af4d86f261d34a54ee0 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Thu, 16 Apr 2015 01:11:05 +0200 Subject: Added transformation for generating unique IDs for referenced document nodes --- .../uniqueid/UniqueIdTransformation.cpp | 176 +++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 src/transformations/uniqueid/UniqueIdTransformation.cpp (limited to 'src/transformations/uniqueid/UniqueIdTransformation.cpp') diff --git a/src/transformations/uniqueid/UniqueIdTransformation.cpp b/src/transformations/uniqueid/UniqueIdTransformation.cpp new file mode 100644 index 0000000..028c1ef --- /dev/null +++ b/src/transformations/uniqueid/UniqueIdTransformation.cpp @@ -0,0 +1,176 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include + +#include + +#include "UniqueIdTransformation.hpp" + +namespace ousia { + +namespace { +/** + * Internally used implementation class used for a single transformation pass. + */ +class UniqueIdTransformationImpl { +private: + /** + * Set containing all ids that are already present in the document. + */ + std::unordered_set ids; + + /** + * Vector containing all elements that still need an id. + */ + ManagedVector nodesWithoutId; + + /** + * Traverse the document tree -- find all elements with primitive content. + */ + std::queue> queue; + + /** + * Method used to iterate over all fields of a DocumentEntity and to place + * the corresponding elements on a queue. + */ + void processFields(const DocumentEntity *entity); + + /** + * Searches the variant for any object references. + */ + void processVariant(const Variant &data); + +public: + /** + * Applys the transformation to the given document. + * + * @param doc is the document for which unique IDs should be generated. + */ + void transform(Handle doc); +}; + +void UniqueIdTransformationImpl::processVariant(const Variant &var) +{ + if (var.isArray()) { + for (const auto &elem : var.asArray()) { + processVariant(elem); + } + } else if (var.isMap()) { + for (const auto &elem : var.asMap()) { + processVariant(elem.second); + } + } else if (var.isObject()) { + Rooted obj = var.asObject(); + if (!obj->hasDataKey("id") && obj->isa(&RttiTypes::Node)) { + nodesWithoutId.push_back(obj.cast()); + } + } +} + +void UniqueIdTransformationImpl::processFields(const DocumentEntity *entity) +{ + for (const NodeVector &nodes : entity->getFields()) { + for (Rooted node : nodes) { + // Check whether the node has the "id"-data field attached to it -- + // if yes, store the id in the ids list + Rooted id = node->readData("id"); + if (id != nullptr && id->v.isString()) { + ids.insert(id->v.asString()); + } + + // If the node is a structured entity just push it onto the stack + if (node->isa(&RttiTypes::StructuredEntity)) { + queue.push(node.cast()); + } else if (node->isa(&RttiTypes::DocumentPrimitive)) { + // This is a primitive node -- check whether it references any + // other, if yes, check whether the primitive field is an object + // that references another entry + processVariant(node.cast()->getContent()); + } + } + } +} + +void UniqueIdTransformationImpl::transform(Handle doc) +{ + // Push the document root element onto the queue + queue.push(doc->getRoot()); + + // Push the fields of all annotations onto the queue + for (Rooted annotation : doc->getAnnotations()) { + processFields(annotation.get()); + } + + // Iterate over all queue elements and process the fields of those elements + while (!queue.empty()) { + processFields(queue.front().get()); + queue.pop(); + } + + // Generate ids for all referenced elements that do not yet have ids + std::map seqNos; + for (Rooted node : nodesWithoutId) { + // Generate a first id -- use the node name if it is available, + // otherwise use the internal type name and append the internal unique + // id. + std::string id = + node->getName().empty() + ? node->type()->name + "_" + std::to_string(node->getUid()) + : node->getName(); + + // If the id name is not unique, append a sequence number + if (ids.count(id) != 0) { + std::string prefix = id; + size_t seqNo = 0; + + // Find the last sequence number for this prefix + auto it = seqNos.find(prefix); + if (it != seqNos.end()) { + seqNo = it->second; + } + + // Increment the sequence number and make sure the resulting name + // is unique + do { + seqNo++; + id = prefix + "_" + std::to_string(seqNo); + } while (ids.count(id) > 0); + + // Store the new sequence number in the seqNos map + seqNos.emplace(prefix, seqNo); + } + + // Remember the generated id + ids.insert(id); + + // Store the resulting string as "id" + node->storeData("id", + Variant::fromString(id).toManaged(node->getManager())); + } +} +} + +void UniqueIdTransformation::transform(Handle doc) +{ + UniqueIdTransformationImpl().transform(doc); +} +} + -- cgit v1.2.3