From cd0e0eaee10d6587a4547af4d86f261d34a54ee0 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Thu, 16 Apr 2015 01:11:05 +0200 Subject: Added transformation for generating unique IDs for referenced document nodes --- CMakeLists.txt | 3 + .../uniqueid/UniqueIdTransformation.cpp | 176 +++++++++++++++++++++ .../uniqueid/UniqueIdTransformation.hpp | 55 +++++++ 3 files changed, 234 insertions(+) create mode 100644 src/transformations/uniqueid/UniqueIdTransformation.cpp create mode 100644 src/transformations/uniqueid/UniqueIdTransformation.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index d2f69c6..80ad83d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -269,6 +269,9 @@ TARGET_LINK_LIBRARIES(ousia_html ADD_LIBRARY(ousia_xml src/plugins/xml/XmlOutput + # TODO: This dependency is temporary -- remove once we have a proper + # transformation pipeline + src/transformations/uniqueid/UniqueIdTransformation ) TARGET_LINK_LIBRARIES(ousia_xml diff --git a/src/transformations/uniqueid/UniqueIdTransformation.cpp b/src/transformations/uniqueid/UniqueIdTransformation.cpp new file mode 100644 index 0000000..028c1ef --- /dev/null +++ b/src/transformations/uniqueid/UniqueIdTransformation.cpp @@ -0,0 +1,176 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include + +#include + +#include "UniqueIdTransformation.hpp" + +namespace ousia { + +namespace { +/** + * Internally used implementation class used for a single transformation pass. + */ +class UniqueIdTransformationImpl { +private: + /** + * Set containing all ids that are already present in the document. + */ + std::unordered_set ids; + + /** + * Vector containing all elements that still need an id. + */ + ManagedVector nodesWithoutId; + + /** + * Traverse the document tree -- find all elements with primitive content. + */ + std::queue> queue; + + /** + * Method used to iterate over all fields of a DocumentEntity and to place + * the corresponding elements on a queue. + */ + void processFields(const DocumentEntity *entity); + + /** + * Searches the variant for any object references. + */ + void processVariant(const Variant &data); + +public: + /** + * Applys the transformation to the given document. + * + * @param doc is the document for which unique IDs should be generated. + */ + void transform(Handle doc); +}; + +void UniqueIdTransformationImpl::processVariant(const Variant &var) +{ + if (var.isArray()) { + for (const auto &elem : var.asArray()) { + processVariant(elem); + } + } else if (var.isMap()) { + for (const auto &elem : var.asMap()) { + processVariant(elem.second); + } + } else if (var.isObject()) { + Rooted obj = var.asObject(); + if (!obj->hasDataKey("id") && obj->isa(&RttiTypes::Node)) { + nodesWithoutId.push_back(obj.cast()); + } + } +} + +void UniqueIdTransformationImpl::processFields(const DocumentEntity *entity) +{ + for (const NodeVector &nodes : entity->getFields()) { + for (Rooted node : nodes) { + // Check whether the node has the "id"-data field attached to it -- + // if yes, store the id in the ids list + Rooted id = node->readData("id"); + if (id != nullptr && id->v.isString()) { + ids.insert(id->v.asString()); + } + + // If the node is a structured entity just push it onto the stack + if (node->isa(&RttiTypes::StructuredEntity)) { + queue.push(node.cast()); + } else if (node->isa(&RttiTypes::DocumentPrimitive)) { + // This is a primitive node -- check whether it references any + // other, if yes, check whether the primitive field is an object + // that references another entry + processVariant(node.cast()->getContent()); + } + } + } +} + +void UniqueIdTransformationImpl::transform(Handle doc) +{ + // Push the document root element onto the queue + queue.push(doc->getRoot()); + + // Push the fields of all annotations onto the queue + for (Rooted annotation : doc->getAnnotations()) { + processFields(annotation.get()); + } + + // Iterate over all queue elements and process the fields of those elements + while (!queue.empty()) { + processFields(queue.front().get()); + queue.pop(); + } + + // Generate ids for all referenced elements that do not yet have ids + std::map seqNos; + for (Rooted node : nodesWithoutId) { + // Generate a first id -- use the node name if it is available, + // otherwise use the internal type name and append the internal unique + // id. + std::string id = + node->getName().empty() + ? node->type()->name + "_" + std::to_string(node->getUid()) + : node->getName(); + + // If the id name is not unique, append a sequence number + if (ids.count(id) != 0) { + std::string prefix = id; + size_t seqNo = 0; + + // Find the last sequence number for this prefix + auto it = seqNos.find(prefix); + if (it != seqNos.end()) { + seqNo = it->second; + } + + // Increment the sequence number and make sure the resulting name + // is unique + do { + seqNo++; + id = prefix + "_" + std::to_string(seqNo); + } while (ids.count(id) > 0); + + // Store the new sequence number in the seqNos map + seqNos.emplace(prefix, seqNo); + } + + // Remember the generated id + ids.insert(id); + + // Store the resulting string as "id" + node->storeData("id", + Variant::fromString(id).toManaged(node->getManager())); + } +} +} + +void UniqueIdTransformation::transform(Handle doc) +{ + UniqueIdTransformationImpl().transform(doc); +} +} + diff --git a/src/transformations/uniqueid/UniqueIdTransformation.hpp b/src/transformations/uniqueid/UniqueIdTransformation.hpp new file mode 100644 index 0000000..ea342d6 --- /dev/null +++ b/src/transformations/uniqueid/UniqueIdTransformation.hpp @@ -0,0 +1,55 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file UniqueIdTransformation.hpp + * + * Contains a transformation capable of generating unique ids for referenced + * document nodes. + * + * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_UNIQUE_ID_TRANSFORMATION_HPP_ +#define _OUSIA_UNIQUE_ID_TRANSFORMATION_HPP_ + +#include + +namespace ousia { + +/** + * The UniqueIdTransformation class implements a transformation that attaches + * unique ids to elements that are being referenced in the document. These + * unique ids can for example be used in XML or HTML output. + * + * TODO: Write an actual base class for transformations and derive from it + */ +class UniqueIdTransformation { +public: + /** + * Applys the transformation to the given document. + * + * @param doc is the document for which unique IDs should be generated. + */ + static void transform(Handle doc); +}; + +} + +#endif /* _OUSIA_UNIQUE_ID_TRANSFORMATION_HPP_ */ + -- cgit v1.2.3