diff options
Diffstat (limited to 'src/core/model/Ontology.hpp')
-rw-r--r-- | src/core/model/Ontology.hpp | 1203 |
1 files changed, 1203 insertions, 0 deletions
diff --git a/src/core/model/Ontology.hpp b/src/core/model/Ontology.hpp new file mode 100644 index 0000000..948caa5 --- /dev/null +++ b/src/core/model/Ontology.hpp @@ -0,0 +1,1203 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file Ontology.hpp + * + * This header contains the class hierarchy of descriptor classes for ontologies. + * Properly connected instances of these classes with a Ontology node as root + * describe a semantic Ontology in a formal way. It specifies the allowed (tree) + * structure of a document by means of StructuredClasses as well as the allowed + * Annotations by means of AnnotationClasses. + * + * The Structure Description contained in the hierarchy of StructuredClasses is + * equivalent to a context free grammar of a special form. We introduce the + * terms "StructuredClass" and "FieldDescriptor". + * On the top level you would start with a StructuredClass, say "book", which + * in turn might contain two FieldDescriptors, one for the meta data of ones + * book and one for the actual structure. Consider the following XML: + * + * \code{.xml} + * <ontology name="book"> + * <struct name="book" cardinality="1" isRoot="true"> + * <field> + * <childRef ref="book.chapter"/> + * <childRef ref="book.paragraph"/> + * </field> + * </struct> + * <struct name="chapter"> + * <field> + * <childRef ref="book.section"/> + * <childRef ref="book.paragraph"/> + * </field> + * </struct> + * <struct name="section"> + * <field> + * <childRef ref="book.subsection"/> + * <childRef ref="book.paragraph"/> + * </field> + * </struct> + * <struct name="subsection"> + * <field> + * <childRef ref="book.paragraph"/> + * </field> + * </struct> + * <struct name="paragraph" transparent="true"> + * <field> + * <childRef ref="book.text"/> + * </field> + * </struct> + * <struct name="text" transparent="true"> + * <primitive type="string"/> + * </struct> + * </ontology> + * \endcode + * + * Note that we define one field as the TREE (meaning the main or default + * document structure) and one mearly as SUBTREE, relating to supporting + * information. You are not allowed to define more than one field of type + * "TREE". + * + * The translation to a context free grammar is as follows: + * + * \code{.txt} + * BOOK := <book> BOOK_TREE </book> + * BOOK_TREE := CHAPTER BOOK_TREE | PARAGRAPH BOOK_TREE | epsilon + * CHAPTER := <chapter> CHAPTER_TREE </chapter> + * CHAPTER_TREE := SECTION CHAPTER_TREE | PARAGRAPH CHAPTER_TREE | epsilon + * SECTION := <section> SECTION_TREE </section> + * SECTION_TREE := SUBSECTION SECTION_TREE | PARAGRAPH SECTION_TREE | + * epsilon + * SUBSECTION := <subsection> SUBSECTION_TREE </subsection> + * SUBSECTION_TREE := PARAGRAPH SUBSECTION_TREE | epsilon + * PARAGRAPH := <paragraph> PARAGRAPH_CONTENT </paragraph> + * PARAGRAPH_CONTENT := string + * \endcode + * + * Note that this translation recurs to further nonterminals like SECTION but + * necessarily produces one "book" terminal. Also note that, in principle, + * this grammar translation allows for arbitrarily many children instances of + * the proper StructuredClass. This can be regulated by the "cardinality" + * property of a StructuredClass. + * + * It is possible to add further fields, like we would in the "headings" ontology + * to add titles to our structure. + * + * \code{.xml} + * <ontology name="headings"> + * <import rel="ontology" src="./book_ontology.osxml"/> + * <struct name="heading" cardinality="1" transparent="true"> + * <parentRef ref="book.book"> + * <field name="heading" isSubtree="true" optional="true"/> + * </parentRef> + * ... + * <fieldRef name="book.paragraph."> + * </struct> + * </ontology> + * \endcode + * + * This would change the context free grammar as follows: + * + * \code{.txt} + * BOOK := <book> HEADING BOOK_TREE </book> + * HEADING := <heading> PARAGRAPH </heading> + * \endcode + * + * AnnotationClasses on the other hand do not specify a context free grammar. + * They merely specify what kinds of Annotations are allowed within this ontology + * and which fields or attributes they have. Note that Annotations are allowed + * to define structured children that manifest e.g. meta information of that + * Annotation. An example for that would be the "comment" ontology: + * + * \code{.xml} + * <ontology name="comments"> + * <import rel="ontology" src="./book_ontology.osxml"/> + * + * <annotation name="comment"> + * <field name="content" isSubtree="true"> + * <childRef ref="book.paragraph"/> + * </field> + * <field name="replies" isSubtree="true"> + * <childRef ref="reply"/> + * </field> + * </annotation> + * + * <struct name="comment"> + * <field name="content"> + * <childRef ref="book.paragraph"/> + * </field> + * <field name="replies" isSubtree="true"> + * <childRef ref="reply"/> + * </field> + * <parentRef ref="book.paragraph"> + * <fieldRef ref="$default"/> + * </parentRef> + * </struct> + * <struct name="reply"> + * <field name="content" isSubtree="true"> + * <childRef ref="book.paragraph"/> + * </field> + * <field name="replies" isSubtree="true"> + * <childRef ref="reply"/> + * </field> + * </struct> + * </ontology> + * \endcode + * + * Here we have comment annotations, which have a reply tree as sub structure. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ + +#ifndef _OUSIA_MODEL_DOMAIN_HPP_ +#define _OUSIA_MODEL_DOMAIN_HPP_ + +#include <core/managed/ManagedContainer.hpp> +#include <core/RangeSet.hpp> + +#include "Node.hpp" +#include "RootNode.hpp" +#include "Typesystem.hpp" + +namespace ousia { + +// Forward declarations +class Rtti; +class Descriptor; +class StructuredClass; +class Ontology; + +/** + * Magic field name used to identify the default field. The default field is + * either the tree field or the only subtree field. + */ +static const std::string DEFAULT_FIELD_NAME = "$default"; + +/** + * As mentioned in the description above a FieldDescriptor specifies the + * StructuredClasses that are allowed as children of a StructuredClass or + * AnnotationClass. A field may also be primitive, which means that a proper + * instance of the respective StructuredClass or AnnotationClass must provide + * accordingly typed content without further descending in the Structure + * Hierarchy. + * + * As an example consider the "text" StructuredClass, which might allow + * the actual text content. Here is the according XML: + * + * \code{.xml} + * <struct name="text" transparent="true"> + * <primitive type="string"/> + * </struct> + * \endcode + * + */ +class FieldDescriptor : public Node { + friend Descriptor; + +public: + /** + * This enum class contains all possible FieldTypes, meaning either the + * main structure beneath this Descriptor (TREE) or supporting structure + * (SUBTREE) + * + * Note that there may be only one TREE field in a descriptor. + */ + enum class FieldType { TREE, SUBTREE }; + +private: + NodeVector<StructuredClass> children; + FieldType fieldType; + Owned<Type> primitiveType; + bool optional; + bool primitive; + +protected: + bool doValidate(Logger &logger) const override; + +public: + /** + * This is the constructor for primitive fields. + * + * @param mgr is the global Manager instance. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param primitiveType is a handle to some Type in some Typesystem of which + * one instance is allowed to fill this field. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + */ + FieldDescriptor(Manager &mgr, Handle<Type> primitiveType, + Handle<Descriptor> parent, + FieldType fieldType = FieldType::TREE, + std::string name = "", bool optional = false); + + /** + * This is the constructor for non-primitive fields. You have to provide + * children here later on. + * + * @param mgr is the global Manager instance. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param fieldType is the FieldType of this FieldDescriptor, either + * TREE for the main or default structure or SUBTREE + * for supporting structures. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + */ + FieldDescriptor(Manager &mgr, Handle<Descriptor> parent = nullptr, + FieldType fieldType = FieldType::TREE, + std::string name = "", bool optional = false); + + /** + * Returns a const reference to the NodeVector of StructuredClasses whose + * instances are allowed as children in the StructureTree of instances of + * this field. + * + * @return a const reference to the NodeVector of StructuredClasses whose + * instances are allowed as children in the StructureTree of instances of + * this field. + */ + const NodeVector<StructuredClass> &getChildren() const { return children; } + + /** + * Returns all StructuredClasses whose instances are allowed as children in + * the Structure Tree of instances of this field including subclasses of + * children, which are allowed directly. + * + * @return all StructuredClasses whose instances are allowed as children in + * the Structure Tree of instances of this field including subclasses of + * children, which are allowed directly. + */ + NodeVector<StructuredClass> getChildrenWithSubclasses() const; + + /** + * Adds a StructuredClass whose instances shall be allowed as children in + * the StructureTree of instances of this field. + */ + void addChild(Handle<StructuredClass> c) + { + invalidate(); + children.push_back(c); + } + + /** + * Adds multiple StructuredClasses whose instances shall be allowed as + * children in the StructureTree of instances of this field. + */ + void addChildren(const std::vector<Handle<StructuredClass>> &cs) + { + invalidate(); + children.insert(children.end(), cs.begin(), cs.end()); + } + + /** + * Removes the given StructuredClass from the list of children of this + * FieldDescriptor. + * + * @param c some StructuredClass that shan't be a child of this + * FieldDescriptor anymore. + * @return true if the FieldDescriptor contained this child and false if it + * did not. + */ + bool removeChild(Handle<StructuredClass> c); + + /** + * Returns the type of this field (not to be confused with the primitive + * type of this field). + * + * @return the type of this field. + */ + FieldType getFieldType() const { return fieldType; } + /** + * Sets the type of this field (not to be confused with the primitive type + * of this field). + * + * @param ft is the new type of this field. + */ + void setFieldType(const FieldType &ft) + { + invalidate(); + fieldType = ft; + } + + /** + * Returns if this field is primitive. + * + * @return true if and only if this field is primitive. + */ + bool isPrimitive() const { return primitive; } + + /** + * Returns the primitive type of this field, which is only allowed to be + * set if the type of this field is PRIMITIVE. + * + * @return the primitive type of this field. + */ + Rooted<Type> getPrimitiveType() const { return primitiveType; } + + /** + * Sets the primitive type of this field, which is only allowed to be + * set if the type of this field is PRIMITIVE. + * + * @param t is the new primitive type of this field- + */ + void setPrimitiveType(Handle<Type> t) + { + invalidate(); + primitiveType = acquire(t); + } + + /** + * Returns true if and only if this field is optional. + * + * @return true if and only if this field is optional. + */ + bool isOptional() const { return optional; } + + /** + * Specifies whether this field shall be optional. + * + * @param o should be true if and only if this field should be optional. + */ + void setOptional(bool o) + { + invalidate(); + optional = std::move(o); + } + + /** + * This tries to construct the shortest possible path of this Descriptor + * to the given child Descriptor. Note that this method has the problem that + * an empty return path does NOT strictly imply that no such path could + * be constructed: We also return an empty vector if the given + * Descriptor is a direct child. Therefore we also return a bool value + * indicating that the path is valid or not. + * + * Implicitly this does a breadth-first search on the graph of + * StructuredClasses that are transparent. It also takes care of cycles. + * + * @param childDescriptor is a supposedly valid child Descriptor of this + * Descriptor. + * @return a tuple containing a path of FieldDescriptors and + * StructuredClasses between this Descriptor and the + * input Descriptor and a bool value indicating if + * the construction was successful. + * + */ + std::pair<NodeVector<Node>, bool> pathTo( + Handle<StructuredClass> childDescriptor, Logger &logger) const; + /** + * This tries to construct the shortest possible path of this Descriptor + * to the given FieldDescriptor. Note that this method has the problem that + * an empty return path does NOT strictly imply that no such path could + * be constructed: We also return an empty vector if the given + * FieldDescriptor is a direct child. Therefore we also return a bool value + * indicating that the path is valid or not. + * + * + * Implicitly this does a breadth-first search on the graph of + * StructuredClasses that are transparent. It also takes care of cycles. + * + * @param field is a FieldDescriptor that may be allowed as child of this + * Descriptor. + * @return a path of FieldDescriptors and StructuredClasses between + * this Descriptor and the input FieldDescriptor or an empty + * vector if no such path could be constructed. + */ + NodeVector<Node> pathTo(Handle<FieldDescriptor> field, + Logger &logger) const; + + /** + * Returns a vector of all TREE fields that are allowed as structure tree + * children of an instance of this Descriptor. This also makes use of + * transparency. + * The list is sorted by the number of transparent elements that have to be + * constructed to arrive at the respective FieldDescriptor. + * + * @return a vector of all TREE fields that are allowed as structure tree + * children of an instance of this Descriptor. + */ + NodeVector<FieldDescriptor> getDefaultFields() const; +}; + +/** + * This is a super class for StructuredClasses and AnnotationClasses and is, + * in itself, not supposed to be instantiated. It defines that both, Annotations + * and StructuredEntities, may have attributes and fields. For more information + * on fields please have a look at the header documentation as well as the + * documentation of the FieldDescriptor class. + * + * Attributes are primitive content stored in a key-value fashion. Therefore + * the attribute specification of a descriptor is done by referencing an + * appropriate StructType that contains all permitted keys and value types. + * + * In XML terms the difference between primitive fields and attributes can be + * explained as the difference between node attributes and node children. + * Consider the XML + * + * \code{.xml} + * <A key="value"> + * <key>value</key> + * </A> + * \endcode + * + * key="value" inside the A-node would be an attribute, while <key>value</key> + * would be a primitive field. While equivalent in XML the semantics are + * different: An attribute describes indeed attributes, features of one single + * node whereas a primitive field describes the _content_ of a node. + * + */ +class Descriptor : public Node { + friend FieldDescriptor; + +private: + Owned<StructType> attributesDescriptor; + NodeVector<FieldDescriptor> fieldDescriptors; + + bool addAndSortFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger); + +protected: + void doResolve(ResolutionState &state) override; + + bool doValidate(Logger &logger) const override; + +public: + Descriptor(Manager &mgr, std::string name, Handle<Ontology> ontology) + : Node(mgr, std::move(name), ontology), + attributesDescriptor(acquire(new StructType(mgr, "", nullptr))), + fieldDescriptors(this) + { + } + + /** + * Returns a reference to the StructType that specifies the attribute keys + * as well as value ontologies for this Descriptor. + * + * @return a reference to the StructType that specifies the attribute keys + * as well as value ontologies for this Descriptor. + */ + Rooted<StructType> getAttributesDescriptor() const + { + return attributesDescriptor; + } + + /** + * Returns the NodeVector of all FieldDescriptors of this Descriptor. + * + * @return the NodeVector of all FieldDescriptors of this Descriptor. + */ + virtual NodeVector<FieldDescriptor> getFieldDescriptors() const + { + return fieldDescriptors; + } + + /** + * Returns the index of the FieldDescriptor with the given name or -1 if no + * such FieldDescriptor was found. + * + * @param name the name of a FieldDescriptor. + + * @return the index of the FieldDescriptor with the given name or -1 if + * no such FieldDescriptor was found. + */ + ssize_t getFieldDescriptorIndex( + const std::string &name = DEFAULT_FIELD_NAME) const; + /** + * Returns the index of the given FieldDescriptor or -1 of the given + * FieldDescriptor is not registered at this Descriptor. + * + * @param fd a FieldDescriptor. + + * @return the index of the given FieldDescriptor or -1 of the given + * FieldDescriptor is not registered at this Descriptor. + */ + ssize_t getFieldDescriptorIndex(Handle<FieldDescriptor> fd) const; + /** + * Returns the FieldDescriptor with the given name. + * + * @param name the name of a FieldDescriptor. + + * @return the FieldDescriptor with the given name or a nullptr if no + * such FieldDescriptor was found. + */ + Rooted<FieldDescriptor> getFieldDescriptor( + const std::string &name = DEFAULT_FIELD_NAME) const; + + /** + * This returns true if this Descriptor has a FieldDescriptor with the + * given name. + * + * @param name the name of a FieldDescriptor. + + * @return true if this Descriptor has a FieldDescriptor with the given + * name + */ + bool hasField(const std::string &fieldName = DEFAULT_FIELD_NAME) const + { + return getFieldDescriptorIndex(fieldName) != -1; + } + + /** + * Adds the given FieldDescriptor to this Descriptor. This also sets the + * parent of the given FieldDescriptor if it is not set yet. + * + * @param fd is a FieldDescriptor. + * @return returns true if the given FieldDescriptor was not added at the + * end one place before because a TREE field already existed and + * the TREE field has to be at the end. + */ + bool addFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger); + + /** + * Adds the given FieldDescriptor to this Descriptor. This also sets the + * parent of the given FieldDescriptor if it is not set to this Descriptor + * already and removes it from the old parent Descriptor. + * + * @param fd is a FieldDescriptor. + * @return returns true if the given FieldDescriptor was not added at the + * end one place before because a TREE field already existed and + * the TREE field has to be at the end. + */ + bool moveFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger); + + /** + * Copies a FieldDescriptor that belongs to another Descriptor to this + * Descriptor. + * + * @param fd some FieldDescriptor belonging to another Descriptor. + * @return returns true if the given FieldDescriptor was not added at the + * end one place before because a TREE field already existed and + * the TREE field has to be at the end. + */ + bool copyFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger); + + /** + * Removes the given FieldDescriptor from this Descriptor. This also sets + * the parent of the given FieldDescriptor to null. + * + * @param fd is a FieldDescriptor. + * @return true if the FieldDescriptor was removed and false if this + * Descriptor did not have the given FieldDescriptor as child. + */ + bool removeFieldDescriptor(Handle<FieldDescriptor> fd); + + /** + * This creates a new primitive FieldDescriptor and adds it to this + * Descriptor. + * + * @param primitiveType is a handle to some Type in some Typesystem of which + * one instance is allowed to fill this field. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + * + * @return the newly created FieldDescriptor and a bool + * indicating whether the order of FieldDescriptors had + * to be changed for the TREE field to be in the last + * spot. + */ + std::pair<Rooted<FieldDescriptor>, bool> createPrimitiveFieldDescriptor( + Handle<Type> primitiveType, Logger &logger, + FieldDescriptor::FieldType fieldType = FieldDescriptor::FieldType::TREE, + std::string name = "", bool optional = false); + + /** + * This creates a new primitive FieldDescriptor and adds it to this + * Descriptor. + * + * @param fieldType is the FieldType of this FieldDescriptor, either + * TREE for the main or default structure or SUBTREE + * for supporting structures. + * @param name is the name of this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + * + * @return the newly created FieldDescriptor and a bool + * indicating whether the order of FieldDescriptors had + * to be changed for the TREE field to be in the last + * spot. + */ + std::pair<Rooted<FieldDescriptor>, bool> createFieldDescriptor( + Logger &logger, + FieldDescriptor::FieldType fieldType = FieldDescriptor::FieldType::TREE, + std::string name = "", bool optional = false); + + /** + * This tries to construct the shortest possible path of this Descriptor + * to the given child Descriptor. As an example consider the book ontology + * from above. + * + * First consider the call book->pathTo(chapter). This is an easy example: + * Our path just contains a reference to the default field of book, because + * a chapter may be directly added to the main field of book. + * + * Second consider the call book->pathTo(text). This is somewhat more + * complicated, but it is still a valid request, because we can construct + * the path: {book_main_field, paragraph, paragraph_main_field}. + * This is only valid because paragraph is transparent. + * + * What about the call book->pathTo(section)? This will lead to an empty + * return path (= invalid). We could, of course, in principle construct + * a path between book and section (via chapter), but chapter is not + * transparent. Therefore that path is not allowed. + * + * Implicitly this does a breadth-first search on the graph of + * StructuredClasses that are transparent. It also takes care of cycles. + * + * @param childDescriptor is a supposedly valid child Descriptor of this + * Descriptor. + * @return either a path of FieldDescriptors and + * StructuredClasses between this Descriptor and + * the input StructuredClass or an empty vector if + * no such path can be constructed. + * + */ + NodeVector<Node> pathTo(Handle<StructuredClass> childDescriptor, + Logger &logger) const; + /** + * This tries to construct the shortest possible path of this Descriptor + * to the given FieldDescriptor. Note that this method has the problem that + * an empty return path does NOT strictly imply that no such path could + * be constructed: We also return an empty vector if the given + * FieldDescriptor is a direct child. Therefore we also return a bool value + * indicating that the path is valid or not. + * + * + * Implicitly this does a breadth-first search on the graph of + * StructuredClasses that are transparent. It also takes care of cycles. + * + * @param field is a FieldDescriptor that may be allowed as child of this + * Descriptor. + * @return returns a tuple containing a path of FieldDescriptors and + * StructuredClasses between this Descriptor and the input + * FieldDescriptor and a bool value indicating if the + * construction was successful. + */ + std::pair<NodeVector<Node>, bool> pathTo(Handle<FieldDescriptor> field, + Logger &logger) const; + + /** + * Returns a vector of all TREE fields that are allowed as structure tree + * children of an instance of this Descriptor. This also makes use of + * transparency. + * The list is sorted by the number of transparent elements that have to be + * constructed to arrive at the respective FieldDescriptor. + * + * @return a vector of all TREE fields that are allowed as structure tree + * children of an instance of this Descriptor. + */ + NodeVector<FieldDescriptor> getDefaultFields() const; + + /** + * Returns a vector of all StructuredClasses that are allowed as children + * of an instance of this Descriptor in the structure tree. This also makes + * use of transparency. + * The list is sorted by the number of transparent elements that have to be + * constructed to arrive at the respective FieldDescriptor. + * + * @return a vector of all StructuredClasses that are allowed as children + * of an instance of this Descriptor in the structure tree. + */ + NodeVector<StructuredClass> getPermittedChildren() const; +}; +/* + * TODO: We should discuss Cardinalities one more time. Is it smart to define + * cardinalities independent of context? Should we not have at least have the + * possibility to define it context-dependently? + */ + +/** + * A StructuredClass specifies nodes in the StructureTree of a document that + * implements this ontology. For more information on the StructureTree please + * consult the Header documentation above. + * + * Note that a StructuredClass may "invade" an existing Ontology description by + * defining itself as a viable child in one existing field. Consider the + * example of the "heading" ontology from the header documentation again: + * + * \code{.xml} + * <ontology name="headings"> + * <head> + * <import rel="ontology" src="book.oxm"/> + * </head> + * <structs> + * <struct name="heading" cardinality="0-1" transparent="true"> + * <parents> + * <parent name="book.book"> + * <field name="heading" type="SUBTREE"/> + * </parent> + * ... + * </parents> + * <fields> + * <fieldRef name="book.paragraph."> + * </fields> + * </structs> + * </ontology> + * \endcode + * + * The "parent" construct allows to "invade" another ontology. + * + * This does indeed interfere with an existing ontology and one must carefully + * craft such parent references to not create undesired side effects. However + * they provide the most convenient mechanism to extend existing ontologies + * without having to rewrite them. + * + * Another important factor is the 'transparent' flag. Transparent + * StructureClasses may be implicitly constructed in the document graph. + * If we go back to our example a user would (without transparency) have to + * explicitly declare: + * + * \code{.xml} + * <book> + * <section> + * <paragraph>Text.</paragraph> + * </section> + * </book> + * \endcode + * + * But in our mind the document + * + * \code{.xml} + * <book> + * <section> + * Text. + * </section> + * </book> + * \endcode + * + * Is already sufficiently specific. We can infer that a paragraph should be + * wrapped around "Text.". Therefore we set the 'transparent' flag of the + * "paragraph" StructuredClass to true. Please note that such inferences + * become increasingly complicated when children of transparent + * StructuredClasses are allowed to be transparent as well. So use with care. + * + * Finally we allow StructuredClasses to inherit attributes of other + * StructuredClasses. Inheritance also implies that instance of the inheriting + * class can be used wherever an instance of the inherited class is allowed. + * Inheritance therefore also goes for fields. + */ +class StructuredClass : public Descriptor { + friend Ontology; + +private: + const Variant cardinality; + Owned<StructuredClass> superclass; + NodeVector<StructuredClass> subclasses; + bool transparent; + bool root; + + /** + * Helper method for getFieldDescriptors. + */ + Rooted<FieldDescriptor> gatherFieldDescriptors( + NodeVector<FieldDescriptor> ¤t, + std::unordered_set<const StructuredClass *> &visited, + std::set<std::string> &overriddenFields, bool hasTREE) const; + +protected: + bool doValidate(Logger &logger) const override; + +public: + /** + * The constructor for a StructuredClass. + * + * @param mgr is the current Manager. + * @param name is the name of the StructuredClass. + * @param ontology is the Ontology this StructuredClass belongs + * to. + * @param cardinality specifies how often an element of this type + * may occur at a specific point in the + * StructureTree. For example: A document should + * have at least one author. This is set to * + * per default, meaning that any number of + * of instances is valid, including zero. + * @param superclass references a parent StructuredClass. Please + * look for more information on inheritance in + * the class documentation above. The default is + * a null reference, meaning no super class. + * The constructor automatically registers this + * class as a subclass at the super class. + * @param transparent specifies whether this StructuredClass is + * transparent. For more information on + * transparency please refer to the class + * documentation above. The default is false. + * @param root specifies whether this StructuredClass is + * allowed to be at the root of a Document. + */ + StructuredClass(Manager &mgr, std::string name, + Handle<Ontology> ontology = nullptr, + Variant cardinality = Cardinality::any(), + Handle<StructuredClass> superclass = nullptr, + bool transparent = false, bool root = false); + + /** + * Returns the Cardinality of this StructuredClass (as a RangeSet). + * + * @return the Cardinality of this StructuredClass (as a RangeSet). + */ + const Variant &getCardinality() const { return cardinality; } + + /** + * Returns the superclass of this StructuredClass. This is not the same as + * the parents in the Structure Tree! + * + * @return the superclass of this StructuredClass. + */ + Rooted<StructuredClass> getSuperclass() const { return superclass; } + + /** + * Sets the superclass of this StructuredClass. This is not the same as + * the parents in the Structure Tree! + * + * This will also register this class as a subclass at the given superclass + * and unregister it at the previous superclass. + * + * It will also set the parent for this Descriptors AttributesDescriptor. + * + * @param sup some StructuredClass that shall be the new superclass of + * this StructuredClass. + * @param logger is some logger. Errors during setting the parent for this + * Descriptors AttributesDescriptor will be written into this + * logger. + */ + void setSuperclass(Handle<StructuredClass> sup, Logger &logger); + + /** + * Returns true if this class is a subclass of the given class. It does not + * return true if the other class is equal to the given class. + * + * @param c is another class that might or might not be a superclass of this + * one + * @return true if this class is a subclass of the given class. + * + */ + bool isSubclassOf(Handle<StructuredClass> c) const; + + /** + * Returns the StructuredClasses that are subclasses of this class. This + * is the inverted version of isa, meaning: each class c that has a isa + * relationship to this class is part of the returned vector. + * + * Note that the order of subclasses is not strictly defined. + * + * You are not allowed to add subclasses directly to the vector. When you + * construct a new StructuredClass with a non-empty isa-handle it will + * automatically register as subclass at the super class. + * + * @return the StructuredClasses that are subclasses of this class. + */ + const NodeVector<StructuredClass> &getSubclasses() const + { + return subclasses; + } + + /** + * Adds a subclass to this StructuredClass. This also calls setSuperclass + * on the given subclass. + * + * @param sc is some StructuredClass. + * @param logger is some logger. Errors during setting the parent for the + * new subclasses AttributesDescriptor will be written into + * this logger. + */ + void addSubclass(Handle<StructuredClass> sc, Logger &logger); + + /** + * Removes a subclass from this StructuredClass. This also calls + * setSuperclass(nullptr) on the given subclass. + * + * @param sc is some StructuredClass. + * @param logger is some logger. Errors during setting the parent for the + * removed subclasses AttributesDescriptor will be written + * into this logger. + */ + void removeSubclass(Handle<StructuredClass> sc, Logger &logger); + + /** + * Returns a NodeVector of all FieldDescriptors of + * this StructuredClass. This also merges the FieldDescriptors directly + * belonging to this StructuredClass with all FieldDescritptors of its + * Superclass (and so on recurvively). The order of field descriptors is + * as follows: + * 1.) non-overridden SUBTREE FieldDescriptors of super classes. + * 2.) SUBTREE FieldDescriptors of this class. + * 3.) TREE FieldDescriptor (either inherited from super class or direct) + * + * @return a NodeVector of all FieldDescriptors of this StructuredClass. + */ + NodeVector<FieldDescriptor> getFieldDescriptors() const override; + + bool isTransparent() const { return transparent; } + + void setTransparent(bool t) + { + invalidate(); + transparent = std::move(t); + } + + bool hasRootPermission() const { return root; } + + void setRootPermission(bool r) + { + invalidate(); + root = std::move(r); + } +}; + +/** + * An AnnotationClass defines allowed Annotations. For more information on + * Annotations please refer to the Document.hpp. + * + * This class has no special properties and is in essence just a Descriptor. + */ +class AnnotationClass : public Descriptor { + friend Ontology; + +public: + /** + * The constructor for a new AnnotationClass. Note that you have to add + * the FieldDescriptors to it later on. + * + * @param mgr is the Manager instance. + * @param name is a name for this AnnotationClass that will + * be used for later references to this + * AnnotationClass. + * @param ontology is the Ontology this AnnotationClass belongs + * to. + */ + AnnotationClass(Manager &mgr, std::string name, Handle<Ontology> ontology); +}; + +/** + * A Ontology node specifies which StructuredClasses and which AnnotationClasses + * are part of this ontology. TODO: Do we want to be able to restrict Annotations + * to certain Structures? + */ +class Ontology : public RootNode { + friend StructuredClass; + friend AnnotationClass; + +private: + NodeVector<StructuredClass> structuredClasses; + NodeVector<AnnotationClass> annotationClasses; + NodeVector<Typesystem> typesystems; + NodeVector<Ontology> ontologies; + +protected: + void doResolve(ResolutionState &state) override; + bool doValidate(Logger &logger) const override; + void doReference(Handle<Node> node) override; + RttiSet doGetReferenceTypes() const override; + +public: + /** + * The constructor for a new ontology. Note that this is an empty Ontology and + * still has to be filled with StructuredClasses and AnnotationClasses. + * + * @param mgr is the Manager instance. + * @param name is a name for this ontology which will be used for later + * references to this Ontology. + */ + Ontology(Manager &mgr, std::string name = "") + : RootNode(mgr, std::move(name), nullptr), + structuredClasses(this), + annotationClasses(this), + typesystems(this), + ontologies(this) + { + } + + /** + * The constructor for a new ontology. Note that this is an empty Ontology and + * still has to be filled with StructuredClasses and AnnotationClasses. + * + * @param mgr is the Manager instance. + * @param sys is the SystemTypesystem instance. + * @param name is a name for this ontology which will be used for later + * references to this Ontology. + */ + Ontology(Manager &mgr, Handle<SystemTypesystem> sys, std::string name = "") + : Ontology(mgr, std::move(name)) + { + referenceTypesystem(sys); + } + + /** + * Creates a new Ontology and returns it. + * + * @param mgr is the Manager instance. + * @param name is a name for this ontology which will be used for later + * references to this Ontology. + */ + static Rooted<Ontology> createEmptyOntology(Manager &mgr, std::string name) + { + return Rooted<Ontology>{new Ontology(mgr, std::move(name))}; + } + + /** + * Returns a const reference to the NodeVector of StructuredClasses that are + * part of this Ontology. + * + * @return a const reference to the NodeVector of StructuredClasses that are + * part of this Ontology. + */ + const NodeVector<StructuredClass> &getStructureClasses() const + { + return structuredClasses; + } + /** + * Adds a StructuredClass to this ontology. This also sets the parent of the + * given StructuredClass if it is not set to this Ontology already and removes + * it from the old Ontology. + * + * @param s is some StructuredClass. + */ + void addStructuredClass(Handle<StructuredClass> s); + + /** + * Removes a StructuredClass from this ontology. This also sets the parent of + * the given StructuredClass to null. + * + * @param s is some StructuredClass. + * @return true if the given StructuredClass was removed and false if this + * Ontology did not have the given StructuredClass as child. + */ + bool removeStructuredClass(Handle<StructuredClass> s); + + /** + * This creates a new StructuredClass and appends it to this Ontology. + * + * @param name is the name of the StructuredClass. + * @param cardinality specifies how often an element of this type + * may occur at a specific point in the + * StructureTree. For example: A document should + * have at least one author. This is set to * + * per default, meaning that any number of + * of instances is valid, including zero. + * @param superclass references a parent StructuredClass. Please + * look for more information on inheritance in + * the class documentation above. The default is + * a null reference, meaning no super class. + * The constructor automatically registers this + * class as a subclass at the super class. + * @param transparent specifies whether this StructuredClass is + * transparent. For more information on + * transparency please refer to the class + * documentation above. The default is false. + * @param root specifies whether this StructuredClass is + * allowed to be at the root of a Document. + * + * @return the newly created StructuredClass. + */ + Rooted<StructuredClass> createStructuredClass( + std::string name, Variant cardinality = Cardinality::any(), + Handle<StructuredClass> superclass = nullptr, bool transparent = false, + bool root = false); + + /** + * Returns a const reference to the NodeVector of AnnotationClasses that are + * part of this Ontology. + * + * @return a const reference to the NodeVector of AnnotationClasses that are + * part of this Ontology. + */ + const NodeVector<AnnotationClass> &getAnnotationClasses() const + { + return annotationClasses; + } + /** + * Adds an AnnotationClass to this ontology. This also sets the parent of the + * given AnnotationClass if it is not set to this Ontology already and removes + * it from the old Ontology. + * + * @param a is some AnnotationClass. + */ + void addAnnotationClass(Handle<AnnotationClass> a); + + /** + * Removes a AnnotationClass from this ontology. This also sets the parent of + * the given AnnotationClass to null. + * + * @param a is some AnnotationClass. + * @return true if the given AnnotationClass was removed and false if this + * Ontology did not have the given AnnotationClass as child. + */ + bool removeAnnotationClass(Handle<AnnotationClass> a); + + /** + * This creates a new AnnotationClass and appends it to this Ontology. + * + * @param name is a name for this AnnotationClass that will + * be used for later references to this + * AnnotationClass. + */ + Rooted<AnnotationClass> createAnnotationClass(std::string name); + + /** + * Returns a const reference to the NodeVector of TypeSystems that are + * references in this Ontology. + * + * @return a const reference to the NodeVector of TypeSystems that are + * references in this Ontology. + */ + const NodeVector<Typesystem> &getTypesystems() const { return typesystems; } + + /** + * Adds a Typesystem reference to this Ontology. + */ + void referenceTypesystem(Handle<Typesystem> t) { typesystems.push_back(t); } + + /** + * Adds multiple Typesystem references to this Ontology. + */ + void referenceTypesystems(const std::vector<Handle<Typesystem>> &ts) + { + typesystems.insert(typesystems.end(), ts.begin(), ts.end()); + } + + /** + * Adds a Ontology reference to this Ontology. + */ + void referenceOntology(Handle<Ontology> d) { ontologies.push_back(d); } + + /** + * Adds multiple Ontology references to this Ontology. + */ + void referenceOntologys(const std::vector<Handle<Ontology>> &ds) + { + ontologies.insert(ontologies.end(), ds.begin(), ds.end()); + } +}; + +namespace RttiTypes { + +extern const Rtti FieldDescriptor; +extern const Rtti Descriptor; +extern const Rtti StructuredClass; +extern const Rtti AnnotationClass; +extern const Rtti Ontology; +} +} + +#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */ |