summaryrefslogtreecommitdiff
path: root/src/core/model/Syntax.hpp
blob: 4adb32993ce75093e7aa28985230ba15eb865e29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/*
    Ousía
    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/**
 * @file Syntax.hpp
 *
 * This header contains the Descriptor classes for user definable syntax for
 * Document entities or fields. These classes are referenced in Ontology.hpp.
 */

#ifndef _OUSIA_MODEL_SYNTAX_HPP_
#define _OUSIA_MODEL_SYNTAX_HPP_

#include <core/common/Token.hpp>
#include "Node.hpp"

namespace ousia {

/**
 * Class to describe a single token that shall be used as user-defined syntax.
 */
struct TokenDescriptor {
	/**
	 * The string content of this token, if it is not a special one.
	 */
	std::string token;
	/**
	 * A flag to be set true if this TokenDescriptor uses a special token.
	 */
	bool special;
	/**
	 * An id to uniquely identify this token.
	 */
	TokenId id;

	/**
	 * Constructor for non-special tokens. The special flag is set to false and
	 * the id to Tokens::Empty.
	 *
	 * @param token The string content of this token, if it is not a special
	 *              one.
	 */
	TokenDescriptor(std::string token = std::string())
	    : token(std::move(token)), special(false), id(Tokens::Empty)
	{
	}

	/**
	 * Constructor for special tokens. The token is set to an empty string and
	 * the special flag to true.
	 *
	 * @param id the id of the special token.
	 */
	TokenDescriptor(TokenId id) : special(true), id(id) {}

	/**
	 * Returns true if and only if neither a string nor an ID is given.
	 *
	 * @return true if and only if neither a string nor an ID is given.
	 */
	bool isEmpty() const { return token.empty() && id == Tokens::Empty; }

	/**
	 * Returns true if the token is valid, which is the case if this class is
	 * either marked as special token or is empty or does have a valid token
	 * string set.
	 *
	 * @return true if the token descriptor is valid, false otherwise.
	 */
	bool isValid() const;
};

/**
 * Class describing the user defined syntax for a StructuredClass,
 * AnnotationClass or FieldDescriptor.
 *
 * This class is used during parsing of a Document. It is used to describe
 * the tokens relevant for one Descriptor that could be created at this point
 * during parsing.
 */
struct SyntaxDescriptor {
	/**
	 * Possible open token or Tokens::Empty if no token is set.
	 */
	TokenId open;

	/**
	 * Possible close token or Tokens::Empty if no token is set.
	 */
	TokenId close;

	/**
	 * Possible short form token or Tokens::Empty if no token is set.
	 */
	TokenId shortForm;

	/*
	 * The Descriptor this SyntaxDescriptor belongs to. As this may be
	 * a FieldDescriptor as well as a class Descriptor (StructuredClass or
	 * AnnotationClass) we can only use the class Node as inner argument here.
	 */
	Rooted<Node> descriptor;
	/*
	 * Given the current leaf in the parsed document the depth of a
	 * SyntaxDescriptor is defined as the number of transparent elements that
	 * would be needed to construct an instance of the referenced descriptor.
	 */
	ssize_t depth;

	/**
	 * Default constructor, sets all token ids to Tokens::Empty and the
	 * descriptor handle to nullptr.
	 */
	SyntaxDescriptor()
	    : open(Tokens::Empty),
	      close(Tokens::Empty),
	      shortForm(Tokens::Empty),
	      descriptor(nullptr),
	      depth(-1)
	{
	}

	/**
	 * Member initializer constructor.
	 *
	 * @param open is a possible open token.
	 * @param close is a possible close token.
	 * @param shortForm is a possible short form token.
	 * @param descriptor The Descriptor this SyntaxDescriptor belongs to.
	 * @param depth Given the current leaf in the parsed document the depth of a
	 * SyntaxDescriptor is defined as the number of transparent elements that
	 * would be needed to construct an instance of the referenced descriptor.
	 */
	SyntaxDescriptor(TokenId open, TokenId close, TokenId shortForm,
	                 Handle<Node> descriptor, ssize_t depth)
	    : open(open),
	      close(close),
	      shortForm(shortForm),
	      descriptor(descriptor),
	      depth(depth)
	{
	}

	/**
	 * Inserts all tokens referenced in this SyntaxDescriptor into the
	 * given TokenSet. Skips token ids set to Tokens::Empty.
	 *
	 * @param set is the TokenSet instance into which the Tokens should be
	 * inserted.
	 */
	void insertIntoTokenSet(TokenSet &set) const;

	/**
	 * Returns true if and only if this SyntaxDescriptor belongs to an
	 * AnnotationClass.
	 *
	 * @return true if and only if this SyntaxDescriptor belongs to an
	 * AnnotationClass.
	 */
	bool isAnnotation() const;

	/**
	 * Returns true if and only if this SyntaxDescriptor belongs to a
	 * StrcturedClass.
	 *
	 * @return true if and only if this SyntaxDescriptor belongs to a
	 * StrcturedClass.
	 */
	bool isStruct() const;

	/**
	 * Returns true if and only if this SyntaxDescriptor belongs to a
	 * FieldDescriptor.
	 *
	 * @return true if and only if this SyntaxDescriptor belongs to a
	 * FieldDescriptor.
	 */
	bool isFieldDescriptor() const;

	/**
	 * Returns true if and only if this SyntaxDescriptor has only empty
	 * entries in start, end and short.
	 *
	 * @return true if and only if this SyntaxDescriptor has only empty
	 * entries in start, end and short.
	 */
	bool isEmpty() const;
};
}
#endif