diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-04-11 16:51:38 +0200 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2016-04-25 22:24:14 +0200 |
commit | d369ff33faa4bf5654db3f1eb105141fccf2270d (patch) | |
tree | dd09b8f9a628d055b36b96b3fca9b4b407c1413d /src/core/parser/stack | |
parent | 994615f76b86a65f11829863be96c63135eef977 (diff) |
Reimplement closeToken handling
Idea: Only start unrolling anything on the parser stack
if an element that matches the given close token is found.
This requires the endToken method in DocumentChildHandler
to search for the given descriptor that might be ended.
While performing this search, only a specified number of
"explicit" structures/fields opened by the Stack class may
be skipped (those with implicit default fields).
Added an integration test ("python_code") which requires
this new (hopefully sane) behaviour.
Diffstat (limited to 'src/core/parser/stack')
-rw-r--r-- | src/core/parser/stack/DocumentHandler.cpp | 74 | ||||
-rw-r--r-- | src/core/parser/stack/DocumentHandler.hpp | 2 | ||||
-rw-r--r-- | src/core/parser/stack/Handler.cpp | 17 | ||||
-rw-r--r-- | src/core/parser/stack/Handler.hpp | 71 | ||||
-rw-r--r-- | src/core/parser/stack/Stack.cpp | 184 |
5 files changed, 222 insertions, 126 deletions
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp index 34d4d17..ce5d8a2 100644 --- a/src/core/parser/stack/DocumentHandler.cpp +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -583,60 +583,70 @@ bool DocumentChildHandler::startToken(Handle<Node> node) } } -DocumentChildHandler::EndTokenResult DocumentChildHandler::endToken( - const Token &token, Handle<Node> node) +EndTokenResult DocumentChildHandler::endToken(Handle<Node> node, size_t maxStackDepth) { - // Iterate over the transparent elements in the scope stack + // Fetch the current scope stack const NodeVector<Node> &stack = scope().getStack(); - ssize_t depth = -1; - for (auto sit = stack.crbegin(); sit != stack.crend(); sit++, depth++) { + + bool found = false; // true once the given node has been found + bool repeat = false; + size_t scopeStackDepth = 0; // # of elems on the scope stack + size_t currentStackDepth = 0; // # of "explicit" elems on the parser stack + + // Iterate over the elements in the scope stack + for (auto sit = stack.crbegin(); sit != stack.crend(); + sit++, scopeStackDepth++) { Rooted<Node> leaf = *sit; + bool isExplicit = false; if (leaf->isa(&RttiTypes::DocumentField)) { Rooted<DocumentField> field = leaf.cast<DocumentField>(); if (field->getDescriptor() == node) { // If the field is transparent, end it by incrementing the depth // counter -- both the field itself and the consecutive element // need to be removed + found = true; if (field->transparent) { - depth += 2; - break; + repeat = true; + scopeStackDepth++; } - return EndTokenResult::ENDED_THIS; - } - - // Abort if the field is explicit - if (!field->transparent) { - return EndTokenResult::ENDED_NONE; } + isExplicit = field->explicitField; + } else if (leaf->isa(&RttiTypes::StructuredEntity)) { + Rooted<StructuredEntity> entity = leaf.cast<StructuredEntity>(); + found = entity->getDescriptor() == node; + repeat = found && entity->isTransparent(); + isExplicit = !entity->isTransparent(); } - if (leaf->isa(&RttiTypes::StructuredEntity)) { - Rooted<StructuredEntity> entity = leaf.cast<StructuredEntity>(); - if (entity->getDescriptor() == node) { - // If the entity is transparent, end it by incrementing the - // depth counter and aborting - if (entity->isTransparent()) { - depth++; - break; - } - return EndTokenResult::ENDED_THIS; - } + // TODO: End annotations! - // Abort if this entity is explicit - if (!entity->isTransparent()) { - return EndTokenResult::ENDED_NONE; - } + // If the given structure is a explicit sturcture (represents a handler) + // increment the stack depth and abort once the maximum stack depth has + // been surpassed. + if (isExplicit) { + currentStackDepth++; + } + if (found || currentStackDepth > maxStackDepth) { + break; } + } - // TODO: End annotations! + // Abort with a value smaller than zero if the element has not been found + if (!found || currentStackDepth > maxStackDepth) { + return EndTokenResult(); + } + + // If the element has been found, return the number of handlers that have to + // be popped from the parser stack + if (currentStackDepth > 0) { + return EndTokenResult(currentStackDepth, true, repeat); } // End all elements that were marked for being closed - for (ssize_t i = 0; i <= depth; i++) { + for (size_t i = 0; i < scopeStackDepth + 1; i++) { scope().pop(logger()); } - return (depth >= 0) ? EndTokenResult::ENDED_HIDDEN - : EndTokenResult::ENDED_NONE; + return EndTokenResult(0, true, false); } void DocumentChildHandler::end() diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp index 75e32fd..3ef5f08 100644 --- a/src/core/parser/stack/DocumentHandler.hpp +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -222,7 +222,7 @@ public: bool startCommand(Variant::mapType &args) override; bool startAnnotation(Variant::mapType &args) override; bool startToken(Handle<Node> node) override; - EndTokenResult endToken(const Token &token, Handle<Node> node) override; + EndTokenResult endToken(Handle<Node> node, size_t maxStackDepth) override; void end() override; bool data() override; bool fieldStart(bool &isDefault, size_t fieldIdx) override; diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index 1399fef..69bfc76 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -47,7 +47,8 @@ Handler::Handler(const HandlerData &handlerData) { } -Handler::~Handler() { +Handler::~Handler() +{ while (tokenStackDepth > 0) { popTokens(); } @@ -90,7 +91,8 @@ void Handler::pushTokens(const std::vector<SyntaxDescriptor> &tokens) handlerData.callbacks.pushTokens(tokens); } -void Handler::popTokens() { +void Handler::popTokens() +{ assert(tokenStackDepth > 0 && "popTokens called too often"); tokenStackDepth--; handlerData.callbacks.popTokens(); @@ -133,11 +135,10 @@ bool EmptyHandler::startToken(Handle<Node> node) return false; } -Handler::EndTokenResult EmptyHandler::endToken(const Token &token, - Handle<Node> node) +EndTokenResult EmptyHandler::endToken(Handle<Node> node, size_t maxStackDepth) { // There are no tokens to end here. - return EndTokenResult::ENDED_NONE; + return EndTokenResult(); } void EmptyHandler::end() @@ -179,10 +180,10 @@ bool StaticHandler::startAnnotation(Variant::mapType &args) { return false; } bool StaticHandler::startToken(Handle<Node> node) { return false; } -Handler::EndTokenResult StaticHandler::endToken(const Token &token, - Handle<Node> node) +EndTokenResult StaticHandler::endToken(Handle<Node> node, size_t maxStackDepth) { - return EndTokenResult::ENDED_NONE; + // There are no tokens to end here. + return EndTokenResult(); } void StaticHandler::end() diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp index 9355e09..bd6ea72 100644 --- a/src/core/parser/stack/Handler.hpp +++ b/src/core/parser/stack/Handler.hpp @@ -60,6 +60,47 @@ class State; enum class HandlerType { COMMAND, ANNOTATION_START, ANNOTATION_END, TOKEN }; /** + * Structure returned by the endToken method. + */ +struct EndTokenResult { + /** + * Contains the number of explicit elements that need to be unrolled. + */ + size_t depth; + + /** + * Set to true if the given descriptor was found. + */ + bool found; + + /** + * Set to true if the endToken function must be called again after + * unrolling. + */ + bool repeat; + + /** + * Default constructor, initializes all members to sane default values, + * the descriptor has not been found. + */ + EndTokenResult() : depth(0), found(false), repeat(false) {} + + /** + * Constructor, initializes all member variables with the given values. + * + * @param depth is the number of explicit elements that need to be unrolled. + * @param found if true, an element corresponding to the descriptor has been + * found. + * @param repeat if true, the endToken method needs to be called again once + * the elements have been unrolled. + */ + EndTokenResult(size_t depth, bool found, bool repeat) + : depth(depth), found(found), repeat(repeat) + { + } +}; + +/** * Class collecting all the data that is being passed to a Handler * instance. */ @@ -214,11 +255,6 @@ protected: public: /** - * Enum type representing the possible outcomes of the endToken() method. - */ - enum class EndTokenResult { ENDED_THIS, ENDED_HIDDEN, ENDED_NONE }; - - /** * Virtual destructor. */ virtual ~Handler(); @@ -362,22 +398,15 @@ public: /** * Called whenever a token is marked as "end" token and this handler happens - * to be the currently active handler. This operation may have three - * outcomes: - * <ol> - * <li>The token marks the end of the complete handler and the calling - * code should call the "end" method.</li> - * <li>The token marks the end of some element that is unknown the calling - * code. So the operation itself was a success, but the calling code - * should not call the "end" method. - * <li>The token did not match anything in this context. Basically this - * should never happen, but who knows.</li> - * </ol> + * to be the currently active handler. * - * @param id is the Token for which the handler should be started. - * @param node is the node for which this token was registered. + * @param node is the node for for which a close token was found. + * @param maxStackDepth contains the number of handlers on the stack that + * can end. + * @return an EndTokenResult instance specifiying how to procede. */ - virtual EndTokenResult endToken(const Token &token, Handle<Node> node) = 0; + virtual EndTokenResult endToken(Handle<Node> node, + size_t maxStackDepth) = 0; /** * Called before the command for which this handler is defined ends (is @@ -439,7 +468,7 @@ public: bool startCommand(Variant::mapType &args) override; bool startAnnotation(Variant::mapType &args) override; bool startToken(Handle<Node> node) override; - EndTokenResult endToken(const Token &token, Handle<Node> node) override; + EndTokenResult endToken(Handle<Node> node, size_t maxStackDepth) override; void end() override; bool fieldStart(bool &isDefault, size_t fieldIdx) override; void fieldEnd() override; @@ -464,7 +493,7 @@ public: bool startCommand(Variant::mapType &args) override; bool startAnnotation(Variant::mapType &args) override; bool startToken(Handle<Node> node) override; - EndTokenResult endToken(const Token &token, Handle<Node> node) override; + EndTokenResult endToken(Handle<Node> node, size_t maxStackDepth) override; void end() override; bool fieldStart(bool &isDefault, size_t fieldIdx) override; void fieldEnd() override; diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp index 697e663..3c17de7 100644 --- a/src/core/parser/stack/Stack.cpp +++ b/src/core/parser/stack/Stack.cpp @@ -386,13 +386,25 @@ private: HandlerInfo &lastInfo(); /** - * Returns a list containing the currently pending close tokens. + * Returns the maximum stack depth that can be unrolled. * - * @param token is a TokenId for which the result list should be filtered. - * If set to Tokens::Empty, all tokens are returned. + * @return the number of elements that can currently be removed from the + * stack. + */ + size_t maxUnrollStackDepth() const; + + /** + * Returns the index of the next reachable handler on the stack having the + * given token as close token -- or in case Tokens::Empty is given, the + * index of the next reachable handler with a set close token. + * + * @param token is the TokenId of the close token that is being searched. If + * Tokens::Empty is passed, the first handler with a non-empty close token + * will be returned. + * @return the index of the corresponding handler on the stack or a negative + * value if no such handler could be found. */ - std::vector<SyntaxDescriptor> pendingCloseTokens( - TokenId token = Tokens::Empty) const; + ssize_t pendingCloseTokenHandlerIdx(TokenId token = Tokens::Empty) const; /** * Returns a set containing the tokens that should currently be processed @@ -650,33 +662,40 @@ std::string StackImpl::currentCommandName() const return stack.empty() ? std::string{} : stack.back().name(); } -std::vector<SyntaxDescriptor> StackImpl::pendingCloseTokens(TokenId token) const +size_t StackImpl::maxUnrollStackDepth() const { - // TODO: Are there cases in which the returned vector will contain more - // than one element? - std::vector<SyntaxDescriptor> res; - for (auto it = stack.crbegin(); it != stack.crend(); it++) { - if (it->closeToken != Tokens::Empty && - (token == Tokens::Empty || token == it->closeToken)) { - res.push_back(SyntaxDescriptor(Tokens::Empty, it->closeToken, - Tokens::Empty, it->tokenDesciptor, - -1)); - } - if (it->range || it->inField) { + size_t res = 0; + for (ssize_t i = stack.size() - 1; i >= 0; i--) { + const HandlerInfo &info = stack[i]; + if (info.range || (info.inField && !info.inImplicitDefaultField)) { break; } + res++; } return res; } +ssize_t StackImpl::pendingCloseTokenHandlerIdx(TokenId token) const +{ + const ssize_t minIdx = std::max<ssize_t>(0, stack.size() - maxUnrollStackDepth() - 1); + for (ssize_t i = stack.size() - 1; i >= minIdx; i--) { + const HandlerInfo &info = stack[i]; + if (info.closeToken != Tokens::Empty && + (token == Tokens::Empty || token == info.closeToken)) { + return i; + } + } + return -1; +} + TokenSet StackImpl::currentTokens() const { TokenSet res; if (currentInfo().state().supportsTokens) { res = tokenStack.tokens(); - std::vector<SyntaxDescriptor> pending = pendingCloseTokens(); - for (const auto &descr : pending) { - res.insert(descr.close); + ssize_t idx = pendingCloseTokenHandlerIdx(); + if (idx >= 0) { + res.insert(stack[idx].closeToken); } } return res; @@ -903,24 +922,52 @@ static void strayTokenError(const Token &token, TokenDescriptor &descr, bool StackImpl::handleCloseTokens(const Token &token, const std::vector<SyntaxDescriptor> &descrs) { - // Fetch the current information - HandlerInfo &info = currentInfo(); + // Abort if the stack is empty -- nothing can be ended in that case + if (stack.empty()) { + return false; + } - // Iterate over all possible SyntaxDescriptors and try to end the token - for (const SyntaxDescriptor &descr : descrs) { - Handler::EndTokenResult res = - info.handler->endToken(token.id, descr.descriptor); - switch (res) { - case Handler::EndTokenResult::ENDED_THIS: - endCurrentHandler(); - return true; - case Handler::EndTokenResult::ENDED_HIDDEN: - return true; - case Handler::EndTokenResult::ENDED_NONE: - break; + // Check whether any of the given token descriptors can be ended -- select + // the one that needs the fewest unrolling + const size_t maxStackDepth = maxUnrollStackDepth(); + const HandlerInfo &info = currentInfo(); + size_t idx = 0; + EndTokenResult bestRes = EndTokenResult(); + for (size_t i = 0; i < descrs.size(); i++) { + // Try to end the handler + const EndTokenResult res = + info.handler->endToken(descrs[i].descriptor, maxStackDepth); + + // Abort if the "endToken" function ended a transparent field -- in this + // case this method has already been successful + if (res.depth == 0 && res.found) { + return true; + } + + // Otherwise check whether the result is positive and smaller than any + // previous result + if (res.found && (!bestRes.found || res.depth < bestRes.depth)) { + idx = i; + bestRes = res; } } - return false; + + // Abort if no descriptor can be ended + if (!bestRes.found) { + return false; + } + + // End as many handlers as indicated by the "depth" counter, repeat the + // process if needed + for (size_t i = 0; i < bestRes.depth; i++) { + endCurrentHandler(); + } + if (!stack.empty() && bestRes.repeat) { + currentInfo().handler->endToken( + descrs[idx].descriptor, + 0); + } + return true; } bool StackImpl::handleOpenTokens(Logger &logger, const Token &token, @@ -978,32 +1025,46 @@ bool StackImpl::handleOpenTokens(Logger &logger, const Token &token, void StackImpl::handleToken(const Token &token) { - // Fetch the TokenDescriptor and the "pendingClose" list + // If the token matches one from the "pendingCloseTokens" list, then just + // end the corresponding handler + const ssize_t pendingCloseIndex = pendingCloseTokenHandlerIdx(token.id); + if (pendingCloseIndex >= 0) { + for (ssize_t i = stack.size() - 1; i >= pendingCloseIndex; i--) { + endCurrentHandler(); + } + return; + } + + // Fetch the TokenDescriptor TokenDescriptor descr = tokenStack.lookup(token.id); - std::vector<SyntaxDescriptor> pendingClose = pendingCloseTokens(token.id); - // Iterate until the stack can no longer be unwound + // First try to close pending open tokens, issue an error if this does not + // work and no shortForm or open tokens are declared and the token is not + // a special whitespace token + if (handleCloseTokens(token, descr.close)) { + return; + } else if (descr.shortForm.empty() && descr.open.empty()) { + if (!Token::isSpecial(token.id)) { + strayTokenError(token, descr, logger()); + } + return; + } + + // Now try to handle open or short form tokens. Iterate until the stack can + // no longer be unwound. while (!stack.empty()) { LoggerFork loggerFork = logger().fork(); + // TODO: Instead of using hadError flag here implement a "hasError" + // method for LoggerFork bool hadError = false; try { - // Try to close the current handlers - if (handleCloseTokens(token, pendingClose) || - (pendingClose.empty() && - handleCloseTokens(token, descr.close))) { - return; - } - // Try to open an implicit default field and try to start the token // as short form or as start token - prepareCurrentHandler(pendingClose.empty()); - if (pendingClose.empty()) { - if (handleOpenTokens(loggerFork, token, true, - descr.shortForm) || - handleOpenTokens(loggerFork, token, false, descr.open)) { - return; - } + prepareCurrentHandler(true); + if (handleOpenTokens(loggerFork, token, true, descr.shortForm) || + handleOpenTokens(loggerFork, token, false, descr.open)) { + return; } } catch (LoggableException ex) { @@ -1015,19 +1076,14 @@ void StackImpl::handleToken(const Token &token) HandlerInfo &info = currentInfo(); if (info.inImplicitDefaultField && !stack.empty()) { endCurrentHandler(); - } else if (info.inDefaultField && info.closeToken == token.id) { - endCurrentField(); } else { - // Ignore close-only special (whitespace) rules, in all other cases - // issue an error - if (!Token::isSpecial(token.id) || descr.close.empty() || - !descr.open.empty() || !descr.shortForm.empty()) { - loggerFork.commit(); - - // If there was no other error, issue a "stray token" error - if (!hadError) { - strayTokenError(token, descr, logger()); - } + // Commit all encountered errors + loggerFork.commit(); + + // If there was no other error message already, issue a "stray + // token" error + if (!hadError) { + strayTokenError(token, descr, logger()); } return; } |