diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-04-12 17:40:32 +0200 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2016-04-25 22:24:16 +0200 |
commit | bed013e617130f7afd1f90ba57afc160b43c71df (patch) | |
tree | bb5d335132a522e24351e5a3239355f7a9ca2070 | |
parent | 84ab3caa172fc3f4ec7085135964173c8eed5f84 (diff) |
Implement non-greedy behaviour for short tokens
8 files changed, 108 insertions, 22 deletions
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp index aa9a28f..7564fad 100644 --- a/src/core/parser/stack/DocumentHandler.cpp +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -145,6 +145,11 @@ void DocumentChildHandler::pushScopeTokens() // List containing the unfiltered syntax descriptors std::vector<SyntaxDescriptor> descrs; + // Skip the DocumentField and the curresponding StructuredEntity + // if we're currently in the implicit default field of a non-greedy + // structure. + size_t explicitSkipCount = (!isGreedy && inImplicitDefaultField) ? 2 : 0; + // Fetch the current scope stack and search the first non-transparent field // or structure const ManagedVector<Node> &stack = scope().getStack(); @@ -157,6 +162,10 @@ void DocumentChildHandler::pushScopeTokens() if (nd->isa(&RttiTypes::DocumentField)) { Rooted<DocumentField> field = nd.cast<DocumentField>(); if (!field->transparent) { + if (explicitSkipCount > 0) { + explicitSkipCount--; + continue; + } descrs = field->getDescriptor()->getPermittedTokens(); break; } @@ -166,6 +175,10 @@ void DocumentChildHandler::pushScopeTokens() if (nd->isa(&RttiTypes::StructuredEntity)) { Rooted<StructuredEntity> entity = nd.cast<StructuredEntity>(); if (!entity->isTransparent()) { + if (explicitSkipCount > 0) { + explicitSkipCount--; + continue; + } descrs = entity->getDescriptor()->getPermittedTokens(); break; } @@ -591,12 +604,13 @@ bool DocumentChildHandler::startToken(Handle<Node> node, bool greedy) } } -EndTokenResult DocumentChildHandler::endToken(Handle<Node> node, size_t maxStackDepth) +EndTokenResult DocumentChildHandler::endToken(Handle<Node> node, + size_t maxStackDepth) { // Fetch the current scope stack const ManagedVector<Node> &stack = scope().getStack(); - bool found = false; // true once the given node has been found + bool found = false; // true once the given node has been found bool repeat = false; size_t scopeStackDepth = 0; // # of elems on the scope stack size_t currentStackDepth = 0; // # of "explicit" elems on the parser stack diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp index ef503f7..cc1eb23 100644 --- a/src/core/parser/stack/Stack.cpp +++ b/src/core/parser/stack/Stack.cpp @@ -464,10 +464,14 @@ private: * startImplicitDefaultField is set to false. If false, prevents this method * from ending a handler if it potentially can have a default field, but did * not have one yet. + * @param startImplicitDefaultFieldForNonGreedy is set to true, even starts + * an implicit default field for greedy handlers. Otherwise these handlers + * are ended. * @return true if the current command is in a valid field. */ bool prepareCurrentHandler(bool startImplicitDefaultField = true, - bool endHandlersWithoutDefaultField = true); + bool endHandlersWithoutDefaultField = true, + bool endNonGreedyHandlers = true); /** * Returns true if all handlers on the stack are currently valid, or false @@ -779,7 +783,8 @@ bool StackImpl::endCurrentHandler() } bool StackImpl::prepareCurrentHandler(bool startImplicitDefaultField, - bool endHandlersWithoutDefaultField) + bool endHandlersWithoutDefaultField, + bool endNonGreedyHandlers) { // Repeat until a valid handler is found on the stack while (!stack.empty()) { @@ -787,9 +792,15 @@ bool StackImpl::prepareCurrentHandler(bool startImplicitDefaultField, HandlerInfo &info = currentInfo(); // If the current Handler is in a field, there is nothing to be done, - // abort + // abort. Exception: If the handler is not greedy and currently is in + // its default field, then continue. if (info.inField) { - return true; + if (!info.greedy && info.hadData && info.inImplicitDefaultField) { + endCurrentField(); + continue; + } else { + return true; + } } // If the current field already had a default field or is not valid, @@ -798,7 +809,7 @@ bool StackImpl::prepareCurrentHandler(bool startImplicitDefaultField, info.type() == HandlerType::COMMAND || info.type() == HandlerType::TOKEN || (info.type() == HandlerType::ANNOTATION_START && info.range); - if (info.hadDefaultField || + if (info.hadDefaultField || (!info.greedy && endNonGreedyHandlers) || (!startImplicitDefaultField && endHandlersWithoutDefaultField) || !info.valid || !canHaveImplicitDefaultField) { // We cannot end the command if it is marked as "range" command @@ -845,7 +856,7 @@ bool StackImpl::handleData() while (true) { // Prepare the stack -- make sure all overdue handlers are ended and // we currently are in an open field - if (stack.empty() || !prepareCurrentHandler()) { + if (stack.empty() || !prepareCurrentHandler(true, true, false)) { throw LoggableException("Did not expect any data here"); } @@ -948,8 +959,8 @@ static void strayTokenError(const Token &token, TokenDescriptor &descr, } static void checkTokensAreUnambiguous(const Token &token, - const TokenDescriptor &descr, - Logger &logger) + const TokenDescriptor &descr, + Logger &logger) { // Some helper functions and constants constexpr ssize_t MAX_DEPTH = std::numeric_limits<ssize_t>::max(); @@ -1158,7 +1169,7 @@ void StackImpl::handleToken(const Token &token) try { // Try to open an implicit default field and try to start the token // as short form or as start token - prepareCurrentHandler(true); + prepareCurrentHandler(); if (handleOpenTokens(loggerFork, token, true, descr.shortForm) || handleOpenTokens(loggerFork, token, false, descr.open)) { return; @@ -1417,7 +1428,7 @@ void StackImpl::data(const TokenizedData &data) // Close all handlers that did already had or cannot have a default field // and are not currently inside a field (repeat this after each chunk of // data/text) - prepareCurrentHandler(false, false); + prepareCurrentHandler(false, false, false); // Peek a token from the reader, repeat until all tokens have been read Token token; @@ -1434,7 +1445,7 @@ void StackImpl::data(const TokenizedData &data) handleToken(token); reader.consumePeek(); } - prepareCurrentHandler(false, false); + prepareCurrentHandler(false, false, false); } } @@ -1511,11 +1522,7 @@ void StackImpl::pushTokens(const std::vector<SyntaxDescriptor> &tokens) tokenStack.pushTokens(tokens); } -void StackImpl::popTokens() -{ - // Pop the last set of tokens from the token stack. - tokenStack.popTokens(); -} +void StackImpl::popTokens() { tokenStack.popTokens(); } bool StackImpl::readToken(Token &token) { diff --git a/testdata/integration/user_defined_syntax/math_non_greedy_limits.in.osml b/testdata/integration/user_defined_syntax/math_non_greedy_limits.in.osml new file mode 100644 index 0000000..d9481d0 --- /dev/null +++ b/testdata/integration/user_defined_syntax/math_non_greedy_limits.in.osml @@ -0,0 +1,5 @@ +\document +\import[ontology]{./ontologies/math} +\begin{math} + \sum_{i=3}^{5} i_M^2 +\end{math} diff --git a/testdata/integration/user_defined_syntax/math_non_greedy_limits.out.osxml b/testdata/integration/user_defined_syntax/math_non_greedy_limits.out.osxml new file mode 100644 index 0000000..86f38b1 --- /dev/null +++ b/testdata/integration/user_defined_syntax/math_non_greedy_limits.out.osxml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<document xmlns:math="math"> + <math:math> + <math:equation> + <math:field> + <math:sum> + <math:limits> + <math:lowerLimit> + <math:text>i</math:text> + <math:equals/> + <math:number>3</math:number> + </math:lowerLimit> + <math:upperLimit> + <math:number>5</math:number> + </math:upperLimit> + </math:limits> + </math:sum> + <math:text>i</math:text> + <math:index> + <math:text>M</math:text> + </math:index> + <math:power> + <math:number>2</math:number> + </math:power> + </math:field> + </math:equation> + </math:math> +</document> diff --git a/testdata/integration/user_defined_syntax/non_greedy_shortform.in.osml b/testdata/integration/user_defined_syntax/non_greedy_shortform.in.osml new file mode 100644 index 0000000..68866c6 --- /dev/null +++ b/testdata/integration/user_defined_syntax/non_greedy_shortform.in.osml @@ -0,0 +1,2 @@ +\import[ontology]{./ontologies/non_greedy_shortform} +\begin{test}?a ?b?c ??d + ?+{?} \end{test} diff --git a/testdata/integration/user_defined_syntax/non_greedy_shortform.out.osxml b/testdata/integration/user_defined_syntax/non_greedy_shortform.out.osxml new file mode 100644 index 0000000..c2431eb --- /dev/null +++ b/testdata/integration/user_defined_syntax/non_greedy_shortform.out.osxml @@ -0,0 +1,15 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<document xmlns:test="test"> + <test:test> + <test:a>a</test:a> + <test:a>b</test:a> + <test:a>c</test:a> + <test:a/> + <test:a>d</test:a> + <test:b/> + <test:a/> + <test:b> + <test:a/> + </test:b> + </test:test> +</document> diff --git a/testdata/integration/user_defined_syntax/ontologies/math.osml b/testdata/integration/user_defined_syntax/ontologies/math.osml index c864d2b..f10b6a2 100644 --- a/testdata/integration/user_defined_syntax/ontologies/math.osml +++ b/testdata/integration/user_defined_syntax/ontologies/math.osml @@ -76,12 +76,12 @@ % TODO: Differentiate between lower and upper index vs. power operator? \struct#power[isa=primitive] \syntax - \short{^} + \short[greedy=false]{^} \field \childRef[ref=primitive] \struct#index[isa=primitive] \syntax - \short{_} + \short[greedy=false]{_} \field \childRef[ref=primitive] @@ -211,12 +211,12 @@ \childRef[ref=upperLimit] \struct#lowerLimit[cardinality={0,1}] \syntax - \short{_} + \short[greedy=false]{_} \field \childRef[ref=primitive] \struct#upperLimit[cardinality={0,1}] \syntax - \short{^} + \short[greedy=false]{^} \field \childRef[ref=primitive] diff --git a/testdata/integration/user_defined_syntax/ontologies/non_greedy_shortform.osml b/testdata/integration/user_defined_syntax/ontologies/non_greedy_shortform.osml new file mode 100644 index 0000000..7a370ac --- /dev/null +++ b/testdata/integration/user_defined_syntax/ontologies/non_greedy_shortform.osml @@ -0,0 +1,15 @@ +\ontology#test + \struct#test[root=true] + \field + \childRef[a] + \childRef[b] + \struct#a + \syntax + \short[false]{?} + \primitive[optional=true,type=string] + \struct#b + \syntax + \short[false]{+} + \field + \childRef[a] + |