diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/model/Domain.hpp | 178 | 
1 files changed, 166 insertions, 12 deletions
diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 565313f..004ef3c 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -19,7 +19,62 @@  /**   * @file Domain.hpp   * - * TODO: Docu + * This header contains the class hierarchy of descriptor classes for domains. + * Properly connected instances of these classes with a Domain node as root + * describe a semantic Domain in a formal way. It specifies the allowed (tree) + * structure of a document by means of StructuredClasses as well as the allowed + * Annotations by means of AnnotationClasses. + * + * The Structure Description contained in the hierarchy of StructuredClasses is + * equivalent to a context free grammar of a special form. We introduce the + * terms "StructuredClass" and "FieldDescriptor". + * On the top level you would start with a StructuredClass, say "book", which + * in turn might contain two FieldDescriptors, one for the meta data of ones + * book and one for the actual structure. Consider the following (simplified) + * XML notation (TODO: Use a non-simplified notation as soon as the format is + * clear.) + * + * <StructuredClass name="book"> + *   <FieldDescriptor name="structure", type="TREE", optional="false"> + *     <children> + *       Here we would reference the possible child classes, e.g. section, + *       paragraph, etc. + *     </children> + *   </FieldDescriptor> + *   <FieldDescriptor name="meta", type="SUBTREE", optional="true"> + *     <children> + *       Here we would reference the possible child classes for meta, + *       information, e.g. authors, date, version, etc. + *     </children> + *   </FieldDescriptor> + * </StructuredClass> + * + * Note that we define one field as the TREE (meaning the main or default + * document structure) and one mearly as SUBTREE, relating to supporting + * information. You are not allowed to define more than one field of type + * "TREE". Accordingly for each StructuredClass in the main TREE there must be + * at least one possible primitive child or one TREE field. Otherwise the + * grammar would be nonterminal. For SUBTREE fields no children may define a + * TREE field and at least one permitted child must exist, either primitive or + * as another StructuredClass. + * + * The translation to context free grammars is roughly as follows: + * + * BOOK           := book BOOK_STRUCTURE BOOK_META + * BOOK_STRUCTURE := SECTION BOOK_STRUCTURE | PARAGRAPH BOOK_STRUCTURE | epsilon + * BOOK_META      := AUTHOR BOOK_META | DATE BOOK_META + * + * Note that this translation recurs to further nonterminals like SECTION but + * necessarily produces one "book" terminal. Also note that, in principle, + * this grammar translation allows for arbitrarily many children instances of + * the proper StructuredClass. This can be regulated by the "cardinality" + * property of a StructuredClass. + * + * AnnotationClasses on the other hand do not specify a context free grammar. + * They merely specify what kinds of Annotations are allowed within this domain + * and which fields or attributes they have. Note that Annotations are allowed + * to define structured children that manifest e.g. meta information of that + * Annotation.   *   * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)   */ @@ -37,20 +92,46 @@ class StructuredClass;  class Descriptor;  /** - * TODO: DOC + * As mentioned in the description above a FieldDescriptor specifies the + * StructuredClasses that are allowed as children of a StructuredClass or + * AnnotationClass. A field may also be primitive, which means that a proper + * instance of the respective StructuredClass or AnnotationClass must provide + * accordingly typed content without further descending in the Structure + * Hierarchy. + * + * As an example consider the "paragraph" StructuredClass, which might allow + * the actual text content. Here is the according simplified XML (TODO: replace + * with a non-simplified version as soon as the XML syntax is clear.) + * + * <StructuredClass name="paragraph"> + *   <FieldDescriptor name="text", type="PRIMITIVE", optional="false", + *                    primitiveType="string"/> + * </StructuredClass> + * + * Accordingly the primitiveType field of a FieldDescriptor may only be + * defined if the type is set to "PRIMITIVE". If the type is something else + * at least one child must be defined and the primitiveType remains in an + * undefined state.   */  class FieldDescriptor : public Node {  public:  	/** -	 * TODO: DOC +	 * This enum class contains all possible FieldTypes, meaning either the +	 * main structure beneath this Descritor (TREE), supporting structure +	 * (SUBTREE) or a primitive terminal (PRIMITIVE). +	 * +	 * Note the following rules (which are also mentioned above): +	 * 1.) There may be only one TREE field in a Descriptor. +	 * 2.) Each TREE field must allow for at least one child, which in turn has +	 *     either a TREE field or a PRIMITIVE field. +	 * 3.) SUBTREE fields may not allow for children with TREE fields. +	 * 4.) SUBTREE fields must allow for at least one child with another SUBTREE +	 *     or PRIMITIVE field.  	 */ -	enum class FieldType { -		TREE, -		SUBTREE, -		PRIMITIVE -	} +	enum class FieldType { TREE, SUBTREE, PRIMITIVE }; -	private : ManagedVector<StructuredClass> children; +private: +	ManagedVector<StructuredClass> children;  	FieldType fieldType;  	Owned<Type> primitiveType; @@ -58,15 +139,88 @@ public:  	const bool optional;  	// TODO: What about the name of default fields? -	Type(Manager &mgr, std::string name, Handle<Descriptor> parent, -	     FieldType fieldType, Handle<Type> primitiveType, bool optional) +	/** +	 * This is the constructor for primitive fields. The type is automatically +	 * set to "PRIMITIVE". +	 * +	 * @param mgr           is the global Manager instance. +	 * @param name          is the name of this field. +	 * @param parent        is a handle of the Descriptor node that has this +	 *                      FieldDescriptor. +	 * @param primitiveType is a handle to some Type in some Typesystem of which +	 *                      one instance is allowed to fill this field. +	 * @param optional      should be set to 'false' is this field needs to be +	 *                      filled in order for an instance of the parent +	 *                      Descriptor to be valid. +	 */ +	FieldDescriptor(Manager &mgr, std::string name, Handle<Descriptor> parent, +	                Handle<Type> primitiveType, bool optional)  	    : Node(mgr, std::move(name), parent), -	      fieldType(fieldType), +	      fieldType(FieldType::PRIMITIVE),  	      primitiveType(acquire(primitiveType)),  	      optional(optional)  	{  	} +	/** +	 * This is the constructor for non-primitive fields. You have to provide +	 * children here. +	 * +	 * @param mgr           is the global Manager instance. +	 * @param name          is the name of this field. +	 * @param parent        is a handle of the Descriptor node that has this +	 *                      FieldDescriptor. +	 * @param type          is the FieldType of this FieldDescriptor, either +	 *                      TREE for the main or default structure or SUBTREE +	 *                      for supporting structures. +	 * @param optional      should be set to 'false' is this field needs to be +	 *                      filled in order for an instance of the parent +	 *                      Descriptor to be valid. +	 */ +	FieldDescriptor(Manager &mgr, std::string name, Handle<Descriptor> parent, +	                FieldType type, ManagedVector<StructuredClass> children, +	                bool optional) +	    : Node(mgr, std::move(name), parent), +	      fieldType(type), +	      children(children), +	      optional(optional) +	// TODO: What would be a wise initialization of the primitiveType? +	{ +	} + +	ManagedVector<StructuredClass> &getChildren() { return children; } + +	FieldType getFieldType() { return type; } + +	bool isPrimitive() { return type == FieldType::PRIMITIVE; } + +	Rooted<Type> getPrimitiveType() { return primitiveType; } +}; + +/** + * + * + * Furthermore StructuredClasses may specify a StructType of a type system, + * which in turn specifies which key-value pairs may be added as attributes + * to an instance of this StructuredClass. + */ + +class Descriptor : public Node { +private: +	Owned<StructType> attributes; +	ManagedVector<FieldDescriptor> fields; + +public: +	Descriptor(Manager &mgr, std::string name, Handle<Node> parent, +	           // TODO: What would be a wise default value for attributes? +	           Handle<StructType> attributes, +	           ManagedVector<FieldDescriptor> fields) +	    : Node(mgr, std::move(name), parent), +	      attributes(attributes), +	      fields(fields) +	// TODO: What would be a wise initialization of the primitiveType? +	{ +	}  };  }  }  | 
