1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
|
/*
Ousía
Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file Domain.hpp
*
* This header contains the class hierarchy of descriptor classes for domains.
* Properly connected instances of these classes with a Domain node as root
* describe a semantic Domain in a formal way. It specifies the allowed (tree)
* structure of a document by means of StructuredClasses as well as the allowed
* Annotations by means of AnnotationClasses.
*
* The Structure Description contained in the hierarchy of StructuredClasses is
* equivalent to a context free grammar of a special form. We introduce the
* terms "StructuredClass" and "FieldDescriptor".
* On the top level you would start with a StructuredClass, say "book", which
* in turn might contain two FieldDescriptors, one for the meta data of ones
* book and one for the actual structure. Consider the following (simplified)
* XML notation (TODO: Use a non-simplified notation as soon as the format is
* clear.)
*
* <StructuredClass name="book">
* <FieldDescriptor name="structure", type="TREE", optional="false">
* <children>
* Here we would reference the possible child classes, e.g. section,
* paragraph, etc.
* </children>
* </FieldDescriptor>
* <FieldDescriptor name="meta", type="SUBTREE", optional="true">
* <children>
* Here we would reference the possible child classes for meta,
* information, e.g. authors, date, version, etc.
* </children>
* </FieldDescriptor>
* </StructuredClass>
*
* Note that we define one field as the TREE (meaning the main or default
* document structure) and one mearly as SUBTREE, relating to supporting
* information. You are not allowed to define more than one field of type
* "TREE". Accordingly for each StructuredClass in the main TREE there must be
* at least one possible primitive child or one TREE field. Otherwise the
* grammar would be nonterminal. For SUBTREE fields no children may define a
* TREE field and at least one permitted child must exist, either primitive or
* as another StructuredClass.
*
* The translation to context free grammars is roughly as follows:
*
* BOOK := book BOOK_STRUCTURE BOOK_META
* BOOK_STRUCTURE := SECTION BOOK_STRUCTURE | PARAGRAPH BOOK_STRUCTURE | epsilon
* BOOK_META := AUTHOR BOOK_META | DATE BOOK_META
*
* Note that this translation recurs to further nonterminals like SECTION but
* necessarily produces one "book" terminal. Also note that, in principle,
* this grammar translation allows for arbitrarily many children instances of
* the proper StructuredClass. This can be regulated by the "cardinality"
* property of a StructuredClass.
*
* AnnotationClasses on the other hand do not specify a context free grammar.
* They merely specify what kinds of Annotations are allowed within this domain
* and which fields or attributes they have. Note that Annotations are allowed
* to define structured children that manifest e.g. meta information of that
* Annotation.
*
* @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
*/
#ifndef _OUSIA_MODEL_DOMAIN_HPP_
#define _OUSIA_MODEL_DOMAIN_HPP_
#include <core/ManagedContainers.hpp>
#include <core/Node.hpp>
namespace ousia {
namespace model {
class StructuredClass;
class Descriptor;
/**
* As mentioned in the description above a FieldDescriptor specifies the
* StructuredClasses that are allowed as children of a StructuredClass or
* AnnotationClass. A field may also be primitive, which means that a proper
* instance of the respective StructuredClass or AnnotationClass must provide
* accordingly typed content without further descending in the Structure
* Hierarchy.
*
* As an example consider the "paragraph" StructuredClass, which might allow
* the actual text content. Here is the according simplified XML (TODO: replace
* with a non-simplified version as soon as the XML syntax is clear.)
*
* <StructuredClass name="paragraph">
* <FieldDescriptor name="text", type="PRIMITIVE", optional="false",
* primitiveType="string"/>
* </StructuredClass>
*
* Accordingly the primitiveType field of a FieldDescriptor may only be
* defined if the type is set to "PRIMITIVE". If the type is something else
* at least one child must be defined and the primitiveType remains in an
* undefined state.
*/
class FieldDescriptor : public Node {
public:
/**
* This enum class contains all possible FieldTypes, meaning either the
* main structure beneath this Descritor (TREE), supporting structure
* (SUBTREE) or a primitive terminal (PRIMITIVE).
*
* Note the following rules (which are also mentioned above):
* 1.) There may be only one TREE field in a Descriptor.
* 2.) Each TREE field must allow for at least one child, which in turn has
* either a TREE field or a PRIMITIVE field.
* 3.) SUBTREE fields may not allow for children with TREE fields.
* 4.) SUBTREE fields must allow for at least one child with another SUBTREE
* or PRIMITIVE field.
*/
enum class FieldType { TREE, SUBTREE, PRIMITIVE };
private:
ManagedVector<StructuredClass> children;
FieldType fieldType;
Owned<Type> primitiveType;
public:
const bool optional;
// TODO: What about the name of default fields?
/**
* This is the constructor for primitive fields. The type is automatically
* set to "PRIMITIVE".
*
* @param mgr is the global Manager instance.
* @param name is the name of this field.
* @param parent is a handle of the Descriptor node that has this
* FieldDescriptor.
* @param primitiveType is a handle to some Type in some Typesystem of which
* one instance is allowed to fill this field.
* @param optional should be set to 'false' is this field needs to be
* filled in order for an instance of the parent
* Descriptor to be valid.
*/
FieldDescriptor(Manager &mgr, std::string name, Handle<Descriptor> parent,
Handle<Type> primitiveType, bool optional)
: Node(mgr, std::move(name), parent),
fieldType(FieldType::PRIMITIVE),
primitiveType(acquire(primitiveType)),
optional(optional)
{
}
/**
* This is the constructor for non-primitive fields. You have to provide
* children here.
*
* @param mgr is the global Manager instance.
* @param name is the name of this field.
* @param parent is a handle of the Descriptor node that has this
* FieldDescriptor.
* @param type is the FieldType of this FieldDescriptor, either
* TREE for the main or default structure or SUBTREE
* for supporting structures.
* @param optional should be set to 'false' is this field needs to be
* filled in order for an instance of the parent
* Descriptor to be valid.
*/
FieldDescriptor(Manager &mgr, std::string name, Handle<Descriptor> parent,
FieldType type, ManagedVector<StructuredClass> children,
bool optional)
: Node(mgr, std::move(name), parent),
fieldType(type),
children(children),
optional(optional)
// TODO: What would be a wise initialization of the primitiveType?
{
}
ManagedVector<StructuredClass> &getChildren() { return children; }
FieldType getFieldType() { return type; }
bool isPrimitive() { return type == FieldType::PRIMITIVE; }
Rooted<Type> getPrimitiveType() { return primitiveType; }
};
/**
*
*
* Furthermore StructuredClasses may specify a StructType of a type system,
* which in turn specifies which key-value pairs may be added as attributes
* to an instance of this StructuredClass.
*/
class Descriptor : public Node {
private:
Owned<StructType> attributes;
ManagedVector<FieldDescriptor> fields;
public:
Descriptor(Manager &mgr, std::string name, Handle<Node> parent,
// TODO: What would be a wise default value for attributes?
Handle<StructType> attributes,
ManagedVector<FieldDescriptor> fields)
: Node(mgr, std::move(name), parent),
attributes(attributes),
fields(fields)
// TODO: What would be a wise initialization of the primitiveType?
{
}
};
}
}
#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
|