1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
|
/*
Ousía
Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file Handler.hpp
*
* Contains the definition of the Handler class, used for representing Handlers
* for certain syntactic elements.
*
* @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
*/
#ifndef _OUSIA_PARSER_STACK_HANDLER_HPP_
#define _OUSIA_PARSER_STACK_HANDLER_HPP_
#include <string>
#include <core/common/Location.hpp>
#include <core/common/Variant.hpp>
#include <core/common/Whitespace.hpp>
#include <core/common/Token.hpp>
#include <core/model/Node.hpp>
#include <core/model/Syntax.hpp>
namespace ousia {
// Forward declarations
class ParserScope;
class ParserContext;
class Logger;
class TokenizedData;
class Variant;
namespace parser_stack {
// More forward declarations
class HandlerCallbacks;
class State;
/**
* Enum describing the type of the Handler instance -- a document handler may
* be created for handling a simple command, a token or an annotation start and
* end.
*/
enum class HandlerType { COMMAND, ANNOTATION_START, ANNOTATION_END, TOKEN };
/**
* Class collecting all the data that is being passed to a Handler
* instance.
*/
class HandlerData {
public:
/**
* Reference to the ParserContext instance that should be used to resolve
* references to nodes in the Graph.
*/
ParserContext &ctx;
/**
* Reference at a class implementing the HandlerCallbacks interface, used
* for modifying the behaviour of the parser (like registering tokens,
* setting the data type or changing the whitespace handling mode).
*/
HandlerCallbacks &callbacks;
/**
* Contains the current state of the state machine.
*/
const State &state;
/**
* Token containing the name of the command that is being handled, the
* location of the element in the source code or the token id of the token
* that is being handled.
*/
Token token;
/**
* Type describing for which purpose the HandlerData instance was created.
*/
HandlerType type;
/**
* Constructor of the HandlerData class.
*
* @param ctx is the parser context the handler should be executed in.
* @param callbacks is an instance of Callbacks used to notify
* the parser about certain state changes.
* @param state is the state this handler was called for.
* @param token contains name, token id and location of the command that is
* being handled.
* @param type describes the purpose of the Handler instance at hand.
*/
HandlerData(ParserContext &ctx, HandlerCallbacks &callbacks,
const State &state, const Token &token, HandlerType type);
};
/**
* The Handler class provides a context for handling a generic stack element.
* It has to beoverridden and registered in the StateStack class to form
* handlers for concrete XML tags.
*/
class Handler {
private:
/**
* Structure containing the internal handler data.
*/
const HandlerData handlerData;
/**
* Reference at the current logger. If not nullptr, this will override the
* logger from the ParserContext specified in the handlerData.
*/
Logger *internalLogger;
protected:
/**
* Constructor of the Handler class.
*
* @param data is a structure containing all data being passed to the
* handler.
*/
Handler(const HandlerData &handlerData);
/**
* Calls the corresponding function in the HandlerCallbacks instance. This
* method registers the given tokens as tokens that are generally available,
* tokens must be explicitly enabled using the "pushTokens" and "popTokens"
* method. Tokens that have not been registered are not guaranteed to be
* reported (except for special tokens, these do not have to be registerd).
*
* @param token is the token string that should be made available.
* @return the TokenId that will be used to refer to the token.
*/
TokenId registerToken(const std::string &token);
/**
* Calls the corresponding function in the HandlerCallbacks instance. This
* method unregisters the given token. Note that for a token to be no longer
* reported, this function has to be called as many times as registerToken()
* for the corresponding token.
*
* @param id is the id of the Token that should be unregistered.
*/
void unregisterToken(TokenId id);
/**
* Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
* The tokens described in the token list are the tokens that are currently
* enabled.
*
* @param tokens is a list of TokenSyntaxDescriptor instances that should be
* stored on the stack.
*/
void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
/**
* Calls the corresponding function in the HandlerCallbacks instance.
* Removes the previously pushed list of tokens from the stack.
*/
void popTokens();
/**
* Calls the corresponding method in the HandlerCallbacks instance. Reads a
* string variant form the current input stream. This function must be
* called from the data() method.
*
* @return a string variant containing the current text data. The return
* value depends on the currently set whitespace mode and the tokens that
* were enabled using the enableTokens callback method.
*/
Variant readData();
/**
* Calls the corresponding function in the Callbacks instance. Sets the
* whitespace mode that specifies how string data should be processed. The
* calls to this function are placed on a stack by the underlying Stack
* class. This function should be called from the "fieldStart" callback and
* the "start" callback. If no whitespace mode is pushed in the "start"
* method the whitespace mode "TRIM" is implicitly assumed.
*
* @param whitespaceMode specifies one of the three WhitespaceMode constants
* PRESERVE, TRIM or COLLAPSE.
*/
// void pushWhitespaceMode(WhitespaceMode whitespaceMode);
/**
* Pops a previously pushed whitespace mode. Calls to this function should
* occur in the "end" callback and the "fieldEnd" callback. This function
* can only undo pushs that were performed by the pushWhitespaceMode()
* method of the same handler.
*/
// void popWhitespaceMode();
public:
/**
* Enum type representing the possible outcomes of the endToken() method.
*/
enum class EndTokenResult { ENDED_THIS, ENDED_HIDDEN, ENDED_NONE };
/**
* Virtual destructor.
*/
virtual ~Handler();
/**
* Returns a reference at the ParserContext.
*
* @return a reference at the ParserContext.
*/
ParserContext &context();
/**
* Returns a reference at the ParserScope instance.
*
* @return a reference at the ParserScope instance.
*/
ParserScope &scope();
/**
* Returns a reference at the Manager instance which manages all nodes.
*
* @return a referance at the Manager instance.
*/
Manager &manager();
/**
* Returns a reference at the Logger instance used for logging error
* messages.
*
* @return a reference at the Logger instance.
*/
Logger &logger();
/**
* Returns the name of the command or annotation the handler is currently
* handling. In case the command is currently handling a token, the name
* corresponds to the token string sequence.
*
* @return the name of the command or the string sequence of the token that
* is being handled by this handler.
*/
const std::string &name() const;
/**
* Returns the token id of the token that is currently being handled by the
* handler. In case the handler currently handles a command or annotation,
* the token id is set to Tokens::Data.
*
* @return the current token id or Tokens::Data if no token is being
* handled.
*/
TokenId tokenId() const;
/**
* Returns a reference at the Token instance, containing either the token
* that is currently being handled or the name of the command and annotation
* and their location.
*
* @return a const reference at the internal token instance.
*/
const Token &token() const;
/**
* Returns the location of the element in the source file, for which this
* Handler was created.
*
* @return the location of the Handler in the source file.
*/
const SourceLocation &location() const;
/**
* Returns the type describing the purpose for which the handler instance
* was created.
*/
HandlerType type() const;
/**
* Returns a reference at the State descriptor for which this Handler was
* created.
*
* @return a const reference at the constructing State descriptor.
*/
const State &state() const;
/**
* Sets the internal logger to the given logger instance.
*
* @param logger is the Logger instance to which the logger should be set.
*/
void setLogger(Logger &logger);
/**
* Resets the logger instance to the logger instance provided in the
* ParserContext.
*/
void resetLogger();
/**
* Returns the location of the element in the source file, for which this
* Handler was created.
*
* @return the location of the Handler in the source file.
*/
const SourceLocation &getLocation() const;
/**
* Called whenever the handler should handle the start of a command. This
* method (or any other of the "start" methods) is called exactly once,
* after the constructor. The name of the command that is started here can
* be accessed using the name() method.
*
* @param args is a map from strings to variants (argument name and value).
* @return true if the handler was successful in starting an element with
* the given name represents, false otherwise.
*/
virtual bool startCommand(Variant::mapType &args) = 0;
/**
* Called whenever the handler should handle the start of an annotation.
* This method (or any other of the "start" methods) is called exactly once,
* after the constructor. This method is only called if the
* "supportsAnnotations" flag of the State instance referencing this Handler
* is set to true. The name of the command that is started here can be
* accessed using the name() method.
*
* @param args is a map from strings to variants (argument name and value).
*/
virtual bool startAnnotation(Variant::mapType &args) = 0;
/**
* Called whenever the handler should handle the start of a token. This
* method (or any other of the "start" methods) is called exactly once,
* after the constructor. This method is only called if the "supportsTokens"
* flag of the State instance referencing this Handler is set to true. The
* token id of the token that is should be handled can be accessed using the
* tokenId() method.
*
* @param node is the node for which this token was registered.
*/
virtual bool startToken(Handle<Node> node) = 0;
/**
* Called whenever a token is marked as "end" token and this handler happens
* to be the currently active handler. This operation may have three
* outcomes:
* <ol>
* <li>The token marks the end of the complete handler and the calling
* code should call the "end" method.</li>
* <li>The token marks the end of some element that is unknown the calling
* code. So the operation itself was a success, but the calling code
* should not call the "end" method.
* <li>The token did not anything in this context. Basically this shuold
* never happen, but who knows.</li>
* </ol>
*
* @param id is the Token for which the handler should be started.
* @param node is the node for which this token was registered.
*/
virtual EndTokenResult endToken(const Token &token, Handle<Node> node) = 0;
/**
* Called before the command for which this handler is defined ends (is
* forever removed from the stack).
*/
virtual void end() = 0;
/**
* Called when a new field starts, while the handler is active. This
* function should return true if the field is supported, false otherwise.
* No error should be logged if the field cannot be started, the caller will
* take care of that (since it is always valid to start a default field,
* even though the corresponding structure does not have a field, as long as
* no data is fed into the field).
*
* @param isDefault is set to true if the field that is being started is the
* default/tree field. The handler should set the value of this variable to
* true if the referenced field is indeed the default field.
* @param fieldIdx is the numerical index of the field.
*/
virtual bool fieldStart(bool &isDefault, size_t fieldIdx) = 0;
/**
* Called when a previously opened field ends, while the handler is active.
* Note that a "fieldStart" and "fieldEnd" are always called alternately.
*/
virtual void fieldEnd() = 0;
/**
* Called whenever raw data (int the form of a string) is available for the
* Handler instance. Should return true if the data could be handled, false
* otherwise. The actual data variant must be retrieved using the "text()"
* callback.
*
* @return true if the data could be handled, false otherwise.
*/
virtual bool data() = 0;
};
/**
* HandlerConstructor is a function pointer type used to create concrete
* instances of the Handler class.
*
* @param handlerData is the data that should be passed to the new handler
* instance.
* @return a newly created handler instance.
*/
using HandlerConstructor = Handler *(*)(const HandlerData &handlerData);
/**
* The EmptyHandler class is used in case no element handler is specified in
* the State descriptor. It just accepts all data and does nothing.
*/
class EmptyHandler : public Handler {
protected:
using Handler::Handler;
public:
bool startCommand(Variant::mapType &args) override;
bool startAnnotation(Variant::mapType &args) override;
bool startToken(Handle<Node> node) override;
EndTokenResult endToken(const Token &token, Handle<Node> node) override;
void end() override;
bool fieldStart(bool &isDefault, size_t fieldIdx) override;
void fieldEnd() override;
bool data() override;
/**
* Creates an instance of the EmptyHandler class.
*/
static Handler *create(const HandlerData &handlerData);
};
/**
* The StaticHandler class is used to handle predifined commands which do
* neither support annotations, nor multiple fields. Child classes can decide
* whether a single data field should be used.
*/
class StaticHandler : public Handler {
protected:
using Handler::Handler;
public:
bool startCommand(Variant::mapType &args) override;
bool startAnnotation(Variant::mapType &args) override;
bool startToken(Handle<Node> node) override;
EndTokenResult endToken(const Token &token, Handle<Node> node) override;
void end() override;
bool fieldStart(bool &isDefault, size_t fieldIdx) override;
void fieldEnd() override;
bool data() override;
};
/**
* The StaticFieldHandler class is used to handle predifined commands which do
* neither support annotations, nor multiple fields. Additionally, it captures a
* data entry from a single default field.
*/
class StaticFieldHandler : public StaticHandler {
private:
/**
* Set to the name of the data argument that should be used instead of the
* data field, if no data field is given.
*/
std::string argName;
/**
* Set to true, once the "doHandle" function has been called.
*/
bool handled;
/**
* Map containing the arguments given in the start function.
*/
Variant::mapType args;
protected:
/**
* Constructor of the StaticFieldHandler class.
*
* @param handlerData is a structure containing the internal data that
* should be stored inside the handler.
* @param name of the data argument that -- if present -- should be used
* instead of the data field. If empty, data is not captured from the
* arguments. If both, data in the data field and the argument, are given,
* this results in an error.
*/
StaticFieldHandler(const HandlerData &handlerData,
const std::string &argName);
/**
* Function that should be overriden in order to handle the field data and
* the other arguments. This function is not called if no data was given.
*
* @param fieldData is the captured field data.
* @param args are the arguments that were given in the "start" function.
*/
virtual void doHandle(const Variant &fieldData, Variant::mapType &args) = 0;
public:
bool startCommand(Variant::mapType &args) override;
bool data() override;
void end() override;
};
}
}
#endif /* _OUSIA_PARSER_STACK_HANDLER_HPP_ */
|