diff options
author | Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de> | 2014-12-16 14:26:38 +0100 |
---|---|---|
committer | Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de> | 2014-12-16 14:26:38 +0100 |
commit | 8230347c1b4797ea214fb16d8c23e30f7cda9042 (patch) | |
tree | 08291dde0a0dc18997608a1cbdb45c8305c83158 /src/core/model | |
parent | bd9581fc44fb0a653105c76e7ad7b2f4dba8075b (diff) |
first draft for DocumentEntity.
Diffstat (limited to 'src/core/model')
-rw-r--r-- | src/core/model/Document.hpp | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/src/core/model/Document.hpp b/src/core/model/Document.hpp new file mode 100644 index 0000000..925570b --- /dev/null +++ b/src/core/model/Document.hpp @@ -0,0 +1,193 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Document.hpp + * + * This header contains the class hierarchy of actual document classes. A graph + * of connected instances of these nodes is a "Document". How the different + * DocumentEntity instances may be connected within the graph is subject to the + * specification in the respective Domain(s) (see also the Domain.hpp). + * + * A Document, from top to bottom, consists of "Document" instance, + * which "owns" the structural root node of the in-document graph. This might + * for example be a "book" node, if the respective document implements the + * "book" domain. That root node in turn has structure nodes as children as well + * as annotations that refer to the content of that structure node. + * + * Consider this simplified XML representation of a document (TODO: Use + * non-simplified XML as soon as possible): + * + * <Document implements="book"> + * <StructureEntity class="book"> + * <StructureEntity class="section"> + * <DocumentPrimitive> + * This is some text with some <Anchor id="1"/>emphasized and + * <Anchor id="2"/>strong<Anchor id="3"/> text. + * </DocumentPrimitive> + * <AnnotationEntity class="emphasized" start="1", end="3"/> + * <AnnotationEntity class="strong" start="2", end="3"/> + * </StructureEntity> + * </StructureEntity> + * </Document> + * + * As can be seen the StructureEntities inherently follow a tree structure that + * is restricted by the implicit context free grammar of the "book" Domain + * definition (e.g. it is not allowed to have a "book" node inside a "section"; + * refer to te Domain.hpp for more information). + * + * Another interesting fact is the special place of AnnotationEntities: They are + * Defined by start and end Anchors in the text. Note that this allows for + * overlapping annotations and provides a more intuitive (and semantically + * sound) handling of such span-like concepts. + * Note that the place of an AnnotationEntity within the XML above is not + * strictly defined. It might as well be placed as a child of the "book" node. + * In general it is recommended to use the lowest possible place in the + * StructureTree to include the AnnotationEntity for better readability. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_MODEL_DOCUMENT_HPP_ +#define _OUSIA_MODEL_DOCUMENT_HPP_ + +#include <core/managed/ManagedContainer.hpp> +#include <core/Node.hpp> +#include <core/common/Variant.hpp> + +#include "Domain.hpp" +#include "Typesystem.hpp" + +namespace ousia { +namespace model { + +class StructuredEntity; + +/** + * A DocumentEntity is the common superclass for StructuredEntities and + * AnnotationEntities. Similarly to DescriptorEntity in the Domain.hpp it + * defines that each node in the Document graph may have attributes (in form + * of a struct Variant), and fields. + * The fields here are a vector of vectors. The first vector implements all + * fields while the inner vector contains all children in this field. + * We provide, however, convenience functions for better access via the field + * name. + * + */ +class DocumentEntity : public Node { +private: + Owned<Descriptor> descriptor; + const Variant attributes; + std::vector<ManagedVector<StructuredEntity>> fields; + + Rooted<FieldDescriptor> getFieldDescriptor(const std::string &fieldName); + +public: + DocumentEntity(Manager &mgr, std::string name = "", Handle<Node> parent, + Handle<Descriptor> descriptor, Variant attributes) + : Node(mgr, std::move(name), parent), + descriptor(acquire(descriptor)), + attributes(std::move(attributes)) + { + // TODO: Validation at construction time? + // insert empty vectors for each field. + for (int f = 0; f < descriptor->getFieldDescriptors.size(); f++) { + fields.push_back(ManagedVector(this)); + } + } + + Rooted<Descriptor> getDescriptor const() { return descriptor; } + + const Variant &getAttributes() const { return attributes; } + + Variant getAttributes const { return attributes; } + + /** + * This allows a direct manipulation of the internal data structure of a + * DocumentEntity and is not recommended. TODO: Delete this? + */ + std::vector<ManagedVector<StructuredEntity>> &getFields() { return fields; } + + /** + * This returns true if there is a FieldDescriptor in the Descriptor for + * this DocumentEntity which has the given name. If an empty name is + * given it is assumed that the 'default' FieldDescriptor is referenced, + * where 'default' means either: + * 1.) The only TREE typed FieldDescriptor (if present) or + * 2.) the only FieldDescriptor (if only one is specified). + * + * @param fieldName is the name of a field as specified in the + * FieldDescriptor in the Domain description. + * @return true if this FieldDescriptor exists. + */ + bool hasField(const std::string &fieldName = "") + { + return getFieldDescriptor(fieldName) != nullptr; + } + + /** + * This returns the vector of entities containing all members of the field + * for which the FieldDescriptor has the specified name. If an empty name is + * given it is assumed that the 'default' FieldDescriptor is referenced, + * where 'default' means either: + * 1.) The only TREE typed FieldDescriptor (if present) or + * 2.) the only FieldDescriptor (if only one is specified). + * + * Note that the output of this method might well be ambigous: If no + * FieldDescriptor matches the given name an empty ManagedVector is + * returned. This is also the case, however, if there are no members for an + * existing field. Therefore it is recommended to additionally check the + * output of "hasField" or use the overloaded version of this method with + * a FieldDescriptor as input. + * + * @param fieldName is the name of the field as specified in the + * FieldDescriptor in the Domain description. + * @return a ManagedVector of all StructuredEntities in that field. If the + * field is unknown or if no members exist in that field yet, the + * ManagedVector will be empty. Note that the ManagedVector is + * returned as a reference, so it is possible to manipulate this + * DocumentEntities content using this function. + */ + ManagedVector<StructuredEntity> &getField(const std::string &fieldName = "") + { + Rooted<FieldDescriptor> fd = getFieldDescriptor(fieldName); + if (fd == nullptr) { + return ManagedVector<StructuredEntity>(this); + } + return getField(fd); + } + + /** + * This returns the vector of entities containing all members of the field + * with the given FieldDescriptor. + * + * If the FieldDescriptor does not belong to the Descriptor of this node + * an exception is thrown. + * + * @param fieldDescriptor is a FieldDescriptor defined in the Descriptor for + * this DocumentEntity. + * @return a ManagedVector of all StructuredEntities in that field. + */ + ManagedVector<StructuredEntity> &getField( + Rooted<FieldDescriptor> fieldDescriptor); +}; +} +} + +#endif /* _OUSIA_MODEL_DOCUMENT_HPP_ */ + |