diff options
Diffstat (limited to 'src/core/parser/stack')
-rw-r--r-- | src/core/parser/stack/DocumentHandler.cpp | 24 | ||||
-rw-r--r-- | src/core/parser/stack/DocumentHandler.hpp | 4 | ||||
-rw-r--r-- | src/core/parser/stack/Handler.cpp | 25 | ||||
-rw-r--r-- | src/core/parser/stack/Handler.hpp | 74 | ||||
-rw-r--r-- | src/core/parser/stack/Stack.cpp | 55 | ||||
-rw-r--r-- | src/core/parser/stack/Stack.hpp | 18 |
6 files changed, 144 insertions, 56 deletions
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp index bb04bd3..d44176a 100644 --- a/src/core/parser/stack/DocumentHandler.cpp +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -25,6 +25,7 @@ #include <core/model/Domain.hpp> #include <core/model/Project.hpp> #include <core/model/Typesystem.hpp> +#include <core/parser/utils/TokenizedData.hpp> #include <core/parser/ParserScope.hpp> #include <core/parser/ParserContext.hpp> @@ -372,8 +373,15 @@ bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field, return valid && scope().resolveValue(data, type, logger); } -bool DocumentChildHandler::data(Variant &data) +bool DocumentChildHandler::data(TokenizedData &data) { + // TODO: Handle this correctly + Variant text = data.text(WhitespaceMode::TRIM); + if (text == nullptr) { + // For now, except "no data" as success + return true; + } + // We're past the region in which explicit fields can be defined in the // parent structure element scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, true); @@ -393,11 +401,11 @@ bool DocumentChildHandler::data(Variant &data) // If it is a primitive field directly, try to parse the content. if (field->isPrimitive()) { // Add it as primitive content. - if (!convertData(field, data, logger())) { + if (!convertData(field, text, logger())) { return false; } - parent->createChildDocumentPrimitive(data, fieldIdx); + parent->createChildDocumentPrimitive(text, fieldIdx); return true; } @@ -411,7 +419,7 @@ bool DocumentChildHandler::data(Variant &data) for (auto primitiveField : defaultFields) { // Then try to parse the content using the type specification. forks.emplace_back(logger().fork()); - if (!convertData(primitiveField, data, forks.back())) { + if (!convertData(primitiveField, text, forks.back())) { continue; } @@ -424,7 +432,7 @@ bool DocumentChildHandler::data(Variant &data) createPath(fieldIdx, path, parent); // Then create the primitive element - parent->createChildDocumentPrimitive(data); + parent->createChildDocumentPrimitive(text); return true; } @@ -434,10 +442,10 @@ bool DocumentChildHandler::data(Variant &data) if (defaultFields.empty()) { logger().error("Got data, but structure \"" + name() + "\" does not have any primitive field", - data); + text); } else { logger().error("Could not read data with any of the possible fields:", - data); + text); size_t f = 0; for (auto field : defaultFields) { logger().note(std::string("Field ") + @@ -471,4 +479,4 @@ namespace RttiTypes { const Rtti DocumentField = RttiBuilder<ousia::parser_stack::DocumentField>( "DocumentField").parent(&Node); } -}
\ No newline at end of file +} diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp index 862081c..dda7d8b 100644 --- a/src/core/parser/stack/DocumentHandler.hpp +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -167,7 +167,7 @@ public: bool start(Variant::mapType &args) override; void end() override; - bool data(Variant &data) override; + bool data(TokenizedData &data) override; bool fieldStart(bool &isDefault, size_t fieldIdx) override; @@ -213,4 +213,4 @@ extern const Rtti DocumentField; } } -#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */
\ No newline at end of file +#endif /* _OUSIA_PARSER_STACK_DOCUMENT_HANDLER_HPP_ */ diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index bf5d4ea..3d413e8 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -18,6 +18,7 @@ #include <core/common/Exceptions.hpp> #include <core/common/Logger.hpp> +#include <core/parser/utils/TokenizedData.hpp> #include <core/parser/ParserContext.hpp> #include "Callbacks.hpp" @@ -130,7 +131,7 @@ bool EmptyHandler::annotationEnd(const Variant &className, return true; } -bool EmptyHandler::data(Variant &data) +bool EmptyHandler::data(TokenizedData &data) { // Support any data return true; @@ -184,10 +185,13 @@ bool StaticHandler::annotationEnd(const Variant &className, return false; } -bool StaticHandler::data(Variant &data) +bool StaticHandler::data(TokenizedData &data) { - logger().error("Did not expect any data here", data); - return false; + if (data.text(WhitespaceMode::TRIM) != nullptr) { + logger().error("Did not expect any data here", data); + return false; + } + return true; } /* Class StaticFieldHandler */ @@ -227,12 +231,19 @@ void StaticFieldHandler::end() } } -bool StaticFieldHandler::data(Variant &data) +bool StaticFieldHandler::data(TokenizedData &data) { + Variant text = data.text(WhitespaceMode::TRIM); + if (text == nullptr) { + // Providing no data here is ok as long as the "doHandle" callback + // function has already been called + return handled; + } + // Call the doHandle function if this has not been done before if (!handled) { handled = true; - doHandle(data, args); + doHandle(text, args); return true; } @@ -240,7 +251,7 @@ bool StaticFieldHandler::data(Variant &data) logger().error( std::string("Found data, but the corresponding argument \"") + argName + std::string("\" was already specified"), - data); + text); // Print the location at which the attribute was originally specified auto it = args.find(argName); diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp index 7cda7a4..929466d 100644 --- a/src/core/parser/stack/Handler.hpp +++ b/src/core/parser/stack/Handler.hpp @@ -31,6 +31,7 @@ namespace ousia { class ParserScope; class ParserContext; class Logger; +class TokenizedData; namespace parser_stack { @@ -158,40 +159,63 @@ protected: */ const std::string &name() const; -public: - /** - * Virtual destructor. - */ - virtual ~Handler(); - /** * Calls the corresponding function in the Callbacks instance. Sets the * whitespace mode that specifies how string data should be processed. The * calls to this function are placed on a stack by the underlying Stack - * class. + * class. This function should be called from the "fieldStart" callback and + * the "start" callback. If no whitespace mode is pushed in the "start" + * method the whitespace mode "TRIM" is implicitly assumed. * * @param whitespaceMode specifies one of the three WhitespaceMode constants * PRESERVE, TRIM or COLLAPSE. */ - void setWhitespaceMode(WhitespaceMode whitespaceMode); + void pushWhitespaceMode(WhitespaceMode whitespaceMode); /** - * Calls the corresponding function in the Callbacks instance. - * Registers the given token as token that should be reported to the handler - * using the "token" function. - * - * @param token is the token string that should be reported. + * Pops a previously pushed whitespace mode. Calls to this function should + * occur in the "end" callback and the "fieldEnd" callback. This function + * can only undo pushs that were performed by the pushWhitespaceMode() + * method of the same handler. */ - void registerToken(const std::string &token); + void popWhitespaceMode(); /** - * Calls the corresponding function in the Callbacks instance. - * Unregisters the given token, it will no longer be reported to the handler - * using the "token" function. + * Calls the corresponding function in the Callbacks instance. Sets the + * whitespace mode that specifies how string data should be processed. The + * calls to this function are placed on a stack by the underlying Stack + * class. This function should be called from the "fieldStart" callback and + * the "start" callback. If no whitespace mode is pushed in the "start" + * method the whitespace mode "TRIM" is implicitly assumed. * - * @param token is the token string that should be unregistered. + * @param tokens is a list of tokens that should be reported to this handler + * instance via the "token" method. */ - void unregisterToken(const std::string &token); + void pushTokens(const std::vector<std::string> &tokens); + + /** + * Pops a previously pushed whitespace mode. Calls to this function should + * occur in the "end" callback and the "fieldEnd" callback. This function + * can only undo pushs that were performed by the pushWhitespaceMode() + * method of the same handler. + */ + void popWhitespaceMode(); + + + /** + * Calls the corresponding function in the Callbacks instance. This method + * registers the given tokens as tokens that are generally available, tokens + * must be explicitly enabled using the "pushTokens" and "popTokens" method. + * Tokens that have not been registered are not guaranteed to be reported, + * even though they are + */ + void registerTokens(const std::vector<std::string> &tokens); + +public: + /** + * Virtual destructor. + */ + virtual ~Handler(); /** * Returns the command name for which the handler was created. @@ -299,11 +323,11 @@ public: * Handler instance. Should return true if the data could be handled, false * otherwise. * - * @param data is a string variant containing the character data and its - * location. + * @param data is an instance of TokenizedData containing the segmented + * character data and its location. * @return true if the data could be handled, false otherwise. */ - virtual bool data(Variant &data) = 0; + virtual bool data(TokenizedData &data) = 0; }; /** @@ -333,7 +357,7 @@ public: Variant::mapType &args) override; bool annotationEnd(const Variant &className, const Variant &elementName) override; - bool data(Variant &data) override; + bool data(TokenizedData &data) override; /** * Creates an instance of the EmptyHandler class. @@ -359,7 +383,7 @@ public: Variant::mapType &args) override; bool annotationEnd(const Variant &className, const Variant &elementName) override; - bool data(Variant &data) override; + bool data(TokenizedData &data) override; }; /** @@ -412,7 +436,7 @@ protected: public: bool start(Variant::mapType &args) override; void end() override; - bool data(Variant &data) override; + bool data(TokenizedData &data) override; }; } } diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp index 5b67248..309c9a0 100644 --- a/src/core/parser/stack/Stack.cpp +++ b/src/core/parser/stack/Stack.cpp @@ -19,6 +19,7 @@ #include <core/common/Logger.hpp> #include <core/common/Utils.hpp> #include <core/common/Exceptions.hpp> +#include <core/parser/utils/TokenizedData.hpp> #include <core/parser/ParserScope.hpp> #include <core/parser/ParserContext.hpp> @@ -413,16 +414,24 @@ void Stack::command(const Variant &name, const Variant::mapType &args) } } -void Stack::data(const Variant &data) +void Stack::data(TokenizedData data) { - // End handlers that already had a default field and are currently not - // active. - endOverdueHandlers(); + // TODO: Rewrite this function for token handling + // TODO: This loop needs to be refactored out + while (!data.atEnd()) { + // End handlers that already had a default field and are currently not + // active. + endOverdueHandlers(); - while (true) { - // Check whether there is any command the data can be sent to + const bool hasNonWhitespaceText = data.hasNonWhitespaceText(); + + // Check whether there is any command the data can be sent to -- if not, + // make sure the data actually is data if (stack.empty()) { - throw LoggableException("No command here to receive data.", data); + if (hasNonWhitespaceText) { + throw LoggableException("No command here to receive data.", data); + } + return; } // Fetch the current command handler information @@ -440,7 +449,10 @@ void Stack::data(const Variant &data) // If the "hadDefaultField" flag is set, we already issued an error // message if (!info.hadDefaultField) { - logger().error("Did not expect any data here", data); + if (hasNonWhitespaceText) { + logger().error("Did not expect any data here", data); + } + return; } } @@ -454,8 +466,16 @@ void Stack::data(const Variant &data) // Pass the data to the current Handler instance bool valid = false; try { - Variant dataCopy = data; - valid = info.handler->data(dataCopy); + // Create a fork of the TokenizedData and let the handler work + // on it + TokenizedData dataFork = data; + valid = info.handler->data(dataFork); + + // If the data was validly handled by the handler, commit the + // change + if (valid) { + data = dataFork; + } } catch (LoggableException ex) { loggerFork.log(ex); @@ -482,6 +502,19 @@ void Stack::data(const Variant &data) } } +void Stack::data(const Variant &stringData) +{ + // Fetch the SourceLocation of the given stringData variant + SourceLocation loc = stringData.getLocation(); + + // Create a TokenizedData instance and feed the given string data into it + TokenizedData tokenizedData(loc.getSourceId()); + tokenizedData.append(stringData.asString(), loc.getStart()); + + // Call the actual "data" method + data(tokenizedData); +} + void Stack::fieldStart(bool isDefault) { // Make sure the current handler stack is not empty @@ -584,4 +617,4 @@ void Stack::token(Variant token) // TODO } } -}
\ No newline at end of file +} diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp index b67ce82..cd29b28 100644 --- a/src/core/parser/stack/Stack.hpp +++ b/src/core/parser/stack/Stack.hpp @@ -44,6 +44,7 @@ namespace ousia { // Forward declarations class ParserContext; class Logger; +class TokenizedData; namespace parser_stack { @@ -292,13 +293,24 @@ public: void command(const Variant &name, const Variant::mapType &args); /** - * Function that shuold be called whenever character data is found in the + * Function that should be called whenever character data is found in the * input stream. May only be called if the currently is a command on the * stack. * - * @param data is a string variant containing the data that has been found. + * @param data is a TokenizedData instance containing the pre-segmented data + * that should be read. + */ + void data(TokenizedData data); + + /** + * Function that shuold be called whenever character data is found in the + * input stream. The given string variant is converted into a TokenizedData + * instance internally. + * + * @param stringData is a string variant containing the data that has been + * found. */ - void data(const Variant &data); + void data(const Variant &stringData); /** * Function that should be called whenever a new field starts. Fields of the |