From 0194b141fe507fd557c584b759a593aea5103f04 Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Mon, 15 Dec 2014 12:00:56 +0100 Subject: Implementation of FieldDescriptor class. --- src/core/model/Domain.hpp | 178 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 166 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 565313f..004ef3c 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -19,7 +19,62 @@ /** * @file Domain.hpp * - * TODO: Docu + * This header contains the class hierarchy of descriptor classes for domains. + * Properly connected instances of these classes with a Domain node as root + * describe a semantic Domain in a formal way. It specifies the allowed (tree) + * structure of a document by means of StructuredClasses as well as the allowed + * Annotations by means of AnnotationClasses. + * + * The Structure Description contained in the hierarchy of StructuredClasses is + * equivalent to a context free grammar of a special form. We introduce the + * terms "StructuredClass" and "FieldDescriptor". + * On the top level you would start with a StructuredClass, say "book", which + * in turn might contain two FieldDescriptors, one for the meta data of ones + * book and one for the actual structure. Consider the following (simplified) + * XML notation (TODO: Use a non-simplified notation as soon as the format is + * clear.) + * + * + * + * + * Here we would reference the possible child classes, e.g. section, + * paragraph, etc. + * + * + * + * + * Here we would reference the possible child classes for meta, + * information, e.g. authors, date, version, etc. + * + * + * + * + * Note that we define one field as the TREE (meaning the main or default + * document structure) and one mearly as SUBTREE, relating to supporting + * information. You are not allowed to define more than one field of type + * "TREE". Accordingly for each StructuredClass in the main TREE there must be + * at least one possible primitive child or one TREE field. Otherwise the + * grammar would be nonterminal. For SUBTREE fields no children may define a + * TREE field and at least one permitted child must exist, either primitive or + * as another StructuredClass. + * + * The translation to context free grammars is roughly as follows: + * + * BOOK := book BOOK_STRUCTURE BOOK_META + * BOOK_STRUCTURE := SECTION BOOK_STRUCTURE | PARAGRAPH BOOK_STRUCTURE | epsilon + * BOOK_META := AUTHOR BOOK_META | DATE BOOK_META + * + * Note that this translation recurs to further nonterminals like SECTION but + * necessarily produces one "book" terminal. Also note that, in principle, + * this grammar translation allows for arbitrarily many children instances of + * the proper StructuredClass. This can be regulated by the "cardinality" + * property of a StructuredClass. + * + * AnnotationClasses on the other hand do not specify a context free grammar. + * They merely specify what kinds of Annotations are allowed within this domain + * and which fields or attributes they have. Note that Annotations are allowed + * to define structured children that manifest e.g. meta information of that + * Annotation. * * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) */ @@ -37,20 +92,46 @@ class StructuredClass; class Descriptor; /** - * TODO: DOC + * As mentioned in the description above a FieldDescriptor specifies the + * StructuredClasses that are allowed as children of a StructuredClass or + * AnnotationClass. A field may also be primitive, which means that a proper + * instance of the respective StructuredClass or AnnotationClass must provide + * accordingly typed content without further descending in the Structure + * Hierarchy. + * + * As an example consider the "paragraph" StructuredClass, which might allow + * the actual text content. Here is the according simplified XML (TODO: replace + * with a non-simplified version as soon as the XML syntax is clear.) + * + * + * + * + * + * Accordingly the primitiveType field of a FieldDescriptor may only be + * defined if the type is set to "PRIMITIVE". If the type is something else + * at least one child must be defined and the primitiveType remains in an + * undefined state. */ class FieldDescriptor : public Node { public: /** - * TODO: DOC + * This enum class contains all possible FieldTypes, meaning either the + * main structure beneath this Descritor (TREE), supporting structure + * (SUBTREE) or a primitive terminal (PRIMITIVE). + * + * Note the following rules (which are also mentioned above): + * 1.) There may be only one TREE field in a Descriptor. + * 2.) Each TREE field must allow for at least one child, which in turn has + * either a TREE field or a PRIMITIVE field. + * 3.) SUBTREE fields may not allow for children with TREE fields. + * 4.) SUBTREE fields must allow for at least one child with another SUBTREE + * or PRIMITIVE field. */ - enum class FieldType { - TREE, - SUBTREE, - PRIMITIVE - } + enum class FieldType { TREE, SUBTREE, PRIMITIVE }; - private : ManagedVector children; +private: + ManagedVector children; FieldType fieldType; Owned primitiveType; @@ -58,15 +139,88 @@ public: const bool optional; // TODO: What about the name of default fields? - Type(Manager &mgr, std::string name, Handle parent, - FieldType fieldType, Handle primitiveType, bool optional) + /** + * This is the constructor for primitive fields. The type is automatically + * set to "PRIMITIVE". + * + * @param mgr is the global Manager instance. + * @param name is the name of this field. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param primitiveType is a handle to some Type in some Typesystem of which + * one instance is allowed to fill this field. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + */ + FieldDescriptor(Manager &mgr, std::string name, Handle parent, + Handle primitiveType, bool optional) : Node(mgr, std::move(name), parent), - fieldType(fieldType), + fieldType(FieldType::PRIMITIVE), primitiveType(acquire(primitiveType)), optional(optional) { } + /** + * This is the constructor for non-primitive fields. You have to provide + * children here. + * + * @param mgr is the global Manager instance. + * @param name is the name of this field. + * @param parent is a handle of the Descriptor node that has this + * FieldDescriptor. + * @param type is the FieldType of this FieldDescriptor, either + * TREE for the main or default structure or SUBTREE + * for supporting structures. + * @param optional should be set to 'false' is this field needs to be + * filled in order for an instance of the parent + * Descriptor to be valid. + */ + FieldDescriptor(Manager &mgr, std::string name, Handle parent, + FieldType type, ManagedVector children, + bool optional) + : Node(mgr, std::move(name), parent), + fieldType(type), + children(children), + optional(optional) + // TODO: What would be a wise initialization of the primitiveType? + { + } + + ManagedVector &getChildren() { return children; } + + FieldType getFieldType() { return type; } + + bool isPrimitive() { return type == FieldType::PRIMITIVE; } + + Rooted getPrimitiveType() { return primitiveType; } +}; + +/** + * + * + * Furthermore StructuredClasses may specify a StructType of a type system, + * which in turn specifies which key-value pairs may be added as attributes + * to an instance of this StructuredClass. + */ + +class Descriptor : public Node { +private: + Owned attributes; + ManagedVector fields; + +public: + Descriptor(Manager &mgr, std::string name, Handle parent, + // TODO: What would be a wise default value for attributes? + Handle attributes, + ManagedVector fields) + : Node(mgr, std::move(name), parent), + attributes(attributes), + fields(fields) + // TODO: What would be a wise initialization of the primitiveType? + { + } }; } } -- cgit v1.2.3 From 280f886a42d74c6dcf81efbd1604ad972e8bae3a Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Mon, 15 Dec 2014 12:59:11 +0100 Subject: first draft of Descriptor. --- src/core/model/Domain.hpp | 52 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 004ef3c..13c65e5 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -188,6 +188,7 @@ public: { } + // TODO: Is returning a ManagedVector alright? ManagedVector &getChildren() { return children; } FieldType getFieldType() { return type; } @@ -198,28 +199,57 @@ public: }; /** + * This is a super class for StructuredClasses and AnnotationClasses and is, + * in itself, not supposed to be instantiated. It defines that both, Annotations + * and StructuredEntities, may have attributes and fields. For more information + * on fields please have a look at the header documentation as well as the + * documentation of the FieldDescriptor class. * + * Attributes are primitive content stored in a key-value fashion. Therefore + * the attribute specification of a descriptor is done by referencing an + * appropriate StructType that contains all permitted keys and value types. + * + * TODO: What aout optional attributes? + * + * In XML terms the difference between primitive fields and attributes can be + * explained as the difference between node attributes and node children. + * Consider the XML + * + * + * value + * + * + * key="value" inside the A-node would be an attribute, while value + * would be a primitive field. While equivalent in XML the semantics are + * different: An attribute describes indeed attributes, features of one single + * node whereas a primitive field describes the _content_ of a node. * - * Furthermore StructuredClasses may specify a StructType of a type system, - * which in turn specifies which key-value pairs may be added as attributes - * to an instance of this StructuredClass. */ - class Descriptor : public Node { private: - Owned attributes; - ManagedVector fields; + Owned attributesDescriptor; + ManagedVector fieldDescriptors; public: Descriptor(Manager &mgr, std::string name, Handle parent, // TODO: What would be a wise default value for attributes? - Handle attributes, - ManagedVector fields) + Handle attributesDescriptor, + ManagedVector fieldDescriptors) : Node(mgr, std::move(name), parent), - attributes(attributes), - fields(fields) - // TODO: What would be a wise initialization of the primitiveType? + attributesDescriptor(attributesDescriptor), + fieldDescriptors(fieldDescriptors) + { + } + + Rooted getAttributesDescriptor() + { + return attributesDescriptor; + } + + // TODO: Is returning a ManagedVector alright? + ManagedVector getFieldDescriptors() { + return fieldDescriptors; } }; } -- cgit v1.2.3 From 5712ae821cee989943625629e8fe59ea7bb8eb1c Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Mon, 15 Dec 2014 13:40:36 +0100 Subject: first draft for StructuredClass --- src/core/model/Domain.hpp | 120 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 114 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 13c65e5..9bd2982 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -191,11 +191,11 @@ public: // TODO: Is returning a ManagedVector alright? ManagedVector &getChildren() { return children; } - FieldType getFieldType() { return type; } + FieldType getFieldType() const { return type; } - bool isPrimitive() { return type == FieldType::PRIMITIVE; } + bool isPrimitive() const { return type == FieldType::PRIMITIVE; } - Rooted getPrimitiveType() { return primitiveType; } + Rooted getPrimitiveType() const { return primitiveType; } }; /** @@ -236,22 +236,130 @@ public: Handle attributesDescriptor, ManagedVector fieldDescriptors) : Node(mgr, std::move(name), parent), - attributesDescriptor(attributesDescriptor), + attributesDescriptor(acquire(attributesDescriptor)), fieldDescriptors(fieldDescriptors) { } - Rooted getAttributesDescriptor() + Rooted getAttributesDescriptor() const { return attributesDescriptor; } // TODO: Is returning a ManagedVector alright? - ManagedVector getFieldDescriptors() + ManagedVector getFieldDescriptors() const { return fieldDescriptors; } }; + +// TODO: Implement +class Cardinality { +} + +/** + * A StructuredClass specifies nodes in the StructureTree of a document that + * implements this domain. For more information on the StructureTree please + * consult the Header documentation above. + * + * Note that a StructuredClass may "invade" an existing Domain description by + * defining itself as a viable child in one existing field. Consider a "section" + * StructuredClass (continuing the example in the header documentation): + * + * + * + * + * paragraph + * + * + * + * + * Of course in most cases we do not only want to allow paragraphs inside + * sections, but also (for example) lists. How would one add that + * without manipulating the existing domain or having to define an entirely + * new domain in which section allows for lists? + * + * Our solution to this problem is the parent mechanism. The simplified XML + * (TODO: Use non-simplified version as soon as possible) for the "list" + * StructuredClass would look like this: + * + * + * + * + * item + * + * + * + * section.structure + * + * + * + * This does indeed interfere with an existing domain and one must carefully + * craft such parent references to not create undesired side effects. However + * they provide the most convenient mechanism to extend existing domains + * without having to rewrite them. + * + * Another important factor is the 'transparent' flag. Transparent + * StructureClasses may be implicitly constructed in the document graph. + * If we go back to our example a user would (without transparency) have to + * explicitly declare: + * + * + *
+ * Text. + *
+ *
+ * + * But in our mind the document + + * + *
+ * Text. + *
+ *
+ * + * Is already sufficiently specific. We can infer that a paragraph should be + * wrapped around "Text.". Therefore we set the 'transparent' flag of the + * "paragraph" StructuredClass to true. Please note that such inferences + * become increasingly complicated when children of transparent + * StructuredClasses are allowed to be transparent as well. So use with care. + * + * Finally we allow StructuredClasses to inherit attributes of other + * StructuredClasses. Inheritance also implies that instance of the inheriting + * class can be used wherever an instance of the inherited class is allowed. + * Inheritance therefore also goes for fields. TODO: What is the specification + * for field inheritance? Is the child allowed to specify children at all? + * Is that interpreted as overriding the parent fields or constructing a union? + * What about the cardinality? + */ +class StructuredClass : public Descriptor { +private: + const Cardinality cardinality; + Owned isa; + ManagedVector parents; + +public: + const bool transparent; + + StructuredClass(Manager &mgr, std::string name, Handle parent, + const Cardinality cardinality &, + // TODO: Wha would be a wise default value for isa? + Handle isa, + ManagedVector parents) + : Node(mgr, std::move(name), parent), + cardinality(cardinality), + isa(acquire(isa)), + parents(parents) + { + } + + const Cardinality &getCardinality() const { return cardinality; } + + Rooted getIsA() const {return isa}; + + // TODO: Is returning a ManagedVector alright? + ManagedVector getParents() { return parents; } +}; } } -- cgit v1.2.3 From a98797188a48148f5d8b268bf623b15d283d6334 Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Mon, 15 Dec 2014 14:00:15 +0100 Subject: finished first version of Domain.hpp, which compiles now. --- CMakeLists.txt | 2 +- src/core/model/Domain.hpp | 79 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 66 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/CMakeLists.txt b/CMakeLists.txt index cac6061..817bf90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,7 @@ ADD_LIBRARY(ousia_core src/core/common/Utils src/core/common/Variant src/core/common/VariantReader -# src/core/model/Domain + src/core/model/Domain src/core/model/Typesystem src/core/parser/Parser src/core/parser/ParserStack diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 9bd2982..c163254 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -85,6 +85,8 @@ #include #include +#include "Typesystem.hpp" + namespace ousia { namespace model { @@ -156,6 +158,7 @@ public: FieldDescriptor(Manager &mgr, std::string name, Handle parent, Handle primitiveType, bool optional) : Node(mgr, std::move(name), parent), + children(this), fieldType(FieldType::PRIMITIVE), primitiveType(acquire(primitiveType)), optional(optional) @@ -170,7 +173,7 @@ public: * @param name is the name of this field. * @param parent is a handle of the Descriptor node that has this * FieldDescriptor. - * @param type is the FieldType of this FieldDescriptor, either + * @param fieldType is the FieldType of this FieldDescriptor, either * TREE for the main or default structure or SUBTREE * for supporting structures. * @param optional should be set to 'false' is this field needs to be @@ -178,22 +181,22 @@ public: * Descriptor to be valid. */ FieldDescriptor(Manager &mgr, std::string name, Handle parent, - FieldType type, ManagedVector children, - bool optional) + FieldType fieldType, + ManagedVector children, bool optional) : Node(mgr, std::move(name), parent), - fieldType(type), children(children), + fieldType(fieldType), + // TODO: What would be a wise initialization of the primitiveType? optional(optional) - // TODO: What would be a wise initialization of the primitiveType? { } // TODO: Is returning a ManagedVector alright? ManagedVector &getChildren() { return children; } - FieldType getFieldType() const { return type; } + FieldType getFieldType() const { return fieldType; } - bool isPrimitive() const { return type == FieldType::PRIMITIVE; } + bool isPrimitive() const { return fieldType == FieldType::PRIMITIVE; } Rooted getPrimitiveType() const { return primitiveType; } }; @@ -255,7 +258,7 @@ public: // TODO: Implement class Cardinality { -} +}; /** * A StructuredClass specifies nodes in the StructureTree of a document that @@ -342,24 +345,72 @@ public: const bool transparent; StructuredClass(Manager &mgr, std::string name, Handle parent, - const Cardinality cardinality &, - // TODO: Wha would be a wise default value for isa? + Handle attributesDescriptor, + ManagedVector fieldDescriptors, + const Cardinality &cardinality, + // TODO: What would be a wise default value for isa? Handle isa, - ManagedVector parents) - : Node(mgr, std::move(name), parent), + ManagedVector parents, + bool transparent) + : Descriptor(mgr, std::move(name), parent, attributesDescriptor, + fieldDescriptors), cardinality(cardinality), isa(acquire(isa)), - parents(parents) + parents(parents), + transparent(transparent) { } const Cardinality &getCardinality() const { return cardinality; } - Rooted getIsA() const {return isa}; + Rooted getIsA() const {return isa;} // TODO: Is returning a ManagedVector alright? ManagedVector getParents() { return parents; } }; + +/** + * An AnnotationClass defines allowed Annotations. For more information on + * Annotations please refer to the Document.hpp. + * + * This class has no special properties and is in essence just a Descriptor. + */ +class AnnotationClass : public Descriptor { +}; + +/** + * A Domain node specifies which StructuredClasses are allowed at the root + * level (or which Nonterminals are axioms of the grammar) and which Annotations + * are allowed globally. TODO: Do we want to be able to restrict Annotations to + * certain Structures? + */ +class Domain : public Node { +private: + ManagedVector rootStructures; + ManagedVector annotationClasses; + +public: + Domain(Manager &mgr, std::string name, + ManagedVector rootStructures, + ManagedVector annotationClasses) + // TODO: Can a domain have a parent? + : Node(mgr, std::move(name), nullptr), + rootStructures(rootStructures), + annotationClasses(annotationClasses) + { + } + + // TODO: Is returning a ManagedVector alright? + ManagedVector getRootStructures() + { + return rootStructures; + } + + ManagedVector getAnnotationClasses() + { + return annotationClasses; + } +}; } } -- cgit v1.2.3