summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt3
-rw-r--r--src/transformations/uniqueid/UniqueIdTransformation.cpp176
-rw-r--r--src/transformations/uniqueid/UniqueIdTransformation.hpp55
3 files changed, 234 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d2f69c6..80ad83d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -269,6 +269,9 @@ TARGET_LINK_LIBRARIES(ousia_html
ADD_LIBRARY(ousia_xml
src/plugins/xml/XmlOutput
+ # TODO: This dependency is temporary -- remove once we have a proper
+ # transformation pipeline
+ src/transformations/uniqueid/UniqueIdTransformation
)
TARGET_LINK_LIBRARIES(ousia_xml
diff --git a/src/transformations/uniqueid/UniqueIdTransformation.cpp b/src/transformations/uniqueid/UniqueIdTransformation.cpp
new file mode 100644
index 0000000..028c1ef
--- /dev/null
+++ b/src/transformations/uniqueid/UniqueIdTransformation.cpp
@@ -0,0 +1,176 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <queue>
+#include <set>
+#include <string>
+
+#include <core/common/Variant.hpp>
+
+#include "UniqueIdTransformation.hpp"
+
+namespace ousia {
+
+namespace {
+/**
+ * Internally used implementation class used for a single transformation pass.
+ */
+class UniqueIdTransformationImpl {
+private:
+ /**
+ * Set containing all ids that are already present in the document.
+ */
+ std::unordered_set<std::string> ids;
+
+ /**
+ * Vector containing all elements that still need an id.
+ */
+ ManagedVector<Node> nodesWithoutId;
+
+ /**
+ * Traverse the document tree -- find all elements with primitive content.
+ */
+ std::queue<Rooted<StructuredEntity>> queue;
+
+ /**
+ * Method used to iterate over all fields of a DocumentEntity and to place
+ * the corresponding elements on a queue.
+ */
+ void processFields(const DocumentEntity *entity);
+
+ /**
+ * Searches the variant for any object references.
+ */
+ void processVariant(const Variant &data);
+
+public:
+ /**
+ * Applys the transformation to the given document.
+ *
+ * @param doc is the document for which unique IDs should be generated.
+ */
+ void transform(Handle<Document> doc);
+};
+
+void UniqueIdTransformationImpl::processVariant(const Variant &var)
+{
+ if (var.isArray()) {
+ for (const auto &elem : var.asArray()) {
+ processVariant(elem);
+ }
+ } else if (var.isMap()) {
+ for (const auto &elem : var.asMap()) {
+ processVariant(elem.second);
+ }
+ } else if (var.isObject()) {
+ Rooted<Managed> obj = var.asObject();
+ if (!obj->hasDataKey("id") && obj->isa(&RttiTypes::Node)) {
+ nodesWithoutId.push_back(obj.cast<Node>());
+ }
+ }
+}
+
+void UniqueIdTransformationImpl::processFields(const DocumentEntity *entity)
+{
+ for (const NodeVector<StructureNode> &nodes : entity->getFields()) {
+ for (Rooted<StructureNode> node : nodes) {
+ // Check whether the node has the "id"-data field attached to it --
+ // if yes, store the id in the ids list
+ Rooted<ManagedVariant> id = node->readData<ManagedVariant>("id");
+ if (id != nullptr && id->v.isString()) {
+ ids.insert(id->v.asString());
+ }
+
+ // If the node is a structured entity just push it onto the stack
+ if (node->isa(&RttiTypes::StructuredEntity)) {
+ queue.push(node.cast<StructuredEntity>());
+ } else if (node->isa(&RttiTypes::DocumentPrimitive)) {
+ // This is a primitive node -- check whether it references any
+ // other, if yes, check whether the primitive field is an object
+ // that references another entry
+ processVariant(node.cast<DocumentPrimitive>()->getContent());
+ }
+ }
+ }
+}
+
+void UniqueIdTransformationImpl::transform(Handle<Document> doc)
+{
+ // Push the document root element onto the queue
+ queue.push(doc->getRoot());
+
+ // Push the fields of all annotations onto the queue
+ for (Rooted<AnnotationEntity> annotation : doc->getAnnotations()) {
+ processFields(annotation.get());
+ }
+
+ // Iterate over all queue elements and process the fields of those elements
+ while (!queue.empty()) {
+ processFields(queue.front().get());
+ queue.pop();
+ }
+
+ // Generate ids for all referenced elements that do not yet have ids
+ std::map<std::string, size_t> seqNos;
+ for (Rooted<Node> node : nodesWithoutId) {
+ // Generate a first id -- use the node name if it is available,
+ // otherwise use the internal type name and append the internal unique
+ // id.
+ std::string id =
+ node->getName().empty()
+ ? node->type()->name + "_" + std::to_string(node->getUid())
+ : node->getName();
+
+ // If the id name is not unique, append a sequence number
+ if (ids.count(id) != 0) {
+ std::string prefix = id;
+ size_t seqNo = 0;
+
+ // Find the last sequence number for this prefix
+ auto it = seqNos.find(prefix);
+ if (it != seqNos.end()) {
+ seqNo = it->second;
+ }
+
+ // Increment the sequence number and make sure the resulting name
+ // is unique
+ do {
+ seqNo++;
+ id = prefix + "_" + std::to_string(seqNo);
+ } while (ids.count(id) > 0);
+
+ // Store the new sequence number in the seqNos map
+ seqNos.emplace(prefix, seqNo);
+ }
+
+ // Remember the generated id
+ ids.insert(id);
+
+ // Store the resulting string as "id"
+ node->storeData("id",
+ Variant::fromString(id).toManaged(node->getManager()));
+ }
+}
+}
+
+void UniqueIdTransformation::transform(Handle<Document> doc)
+{
+ UniqueIdTransformationImpl().transform(doc);
+}
+}
+
diff --git a/src/transformations/uniqueid/UniqueIdTransformation.hpp b/src/transformations/uniqueid/UniqueIdTransformation.hpp
new file mode 100644
index 0000000..ea342d6
--- /dev/null
+++ b/src/transformations/uniqueid/UniqueIdTransformation.hpp
@@ -0,0 +1,55 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file UniqueIdTransformation.hpp
+ *
+ * Contains a transformation capable of generating unique ids for referenced
+ * document nodes.
+ *
+ * @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
+ */
+
+#ifndef _OUSIA_UNIQUE_ID_TRANSFORMATION_HPP_
+#define _OUSIA_UNIQUE_ID_TRANSFORMATION_HPP_
+
+#include <core/model/Document.hpp>
+
+namespace ousia {
+
+/**
+ * The UniqueIdTransformation class implements a transformation that attaches
+ * unique ids to elements that are being referenced in the document. These
+ * unique ids can for example be used in XML or HTML output.
+ *
+ * TODO: Write an actual base class for transformations and derive from it
+ */
+class UniqueIdTransformation {
+public:
+ /**
+ * Applys the transformation to the given document.
+ *
+ * @param doc is the document for which unique IDs should be generated.
+ */
+ static void transform(Handle<Document> doc);
+};
+
+}
+
+#endif /* _OUSIA_UNIQUE_ID_TRANSFORMATION_HPP_ */
+