summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-04-12 17:40:32 +0200
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2016-04-25 22:24:16 +0200
commitbed013e617130f7afd1f90ba57afc160b43c71df (patch)
treebb5d335132a522e24351e5a3239355f7a9ca2070
parent84ab3caa172fc3f4ec7085135964173c8eed5f84 (diff)
Implement non-greedy behaviour for short tokens
-rw-r--r--src/core/parser/stack/DocumentHandler.cpp18
-rw-r--r--src/core/parser/stack/Stack.cpp39
-rw-r--r--testdata/integration/user_defined_syntax/math_non_greedy_limits.in.osml5
-rw-r--r--testdata/integration/user_defined_syntax/math_non_greedy_limits.out.osxml28
-rw-r--r--testdata/integration/user_defined_syntax/non_greedy_shortform.in.osml2
-rw-r--r--testdata/integration/user_defined_syntax/non_greedy_shortform.out.osxml15
-rw-r--r--testdata/integration/user_defined_syntax/ontologies/math.osml8
-rw-r--r--testdata/integration/user_defined_syntax/ontologies/non_greedy_shortform.osml15
8 files changed, 108 insertions, 22 deletions
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp
index aa9a28f..7564fad 100644
--- a/src/core/parser/stack/DocumentHandler.cpp
+++ b/src/core/parser/stack/DocumentHandler.cpp
@@ -145,6 +145,11 @@ void DocumentChildHandler::pushScopeTokens()
// List containing the unfiltered syntax descriptors
std::vector<SyntaxDescriptor> descrs;
+ // Skip the DocumentField and the curresponding StructuredEntity
+ // if we're currently in the implicit default field of a non-greedy
+ // structure.
+ size_t explicitSkipCount = (!isGreedy && inImplicitDefaultField) ? 2 : 0;
+
// Fetch the current scope stack and search the first non-transparent field
// or structure
const ManagedVector<Node> &stack = scope().getStack();
@@ -157,6 +162,10 @@ void DocumentChildHandler::pushScopeTokens()
if (nd->isa(&RttiTypes::DocumentField)) {
Rooted<DocumentField> field = nd.cast<DocumentField>();
if (!field->transparent) {
+ if (explicitSkipCount > 0) {
+ explicitSkipCount--;
+ continue;
+ }
descrs = field->getDescriptor()->getPermittedTokens();
break;
}
@@ -166,6 +175,10 @@ void DocumentChildHandler::pushScopeTokens()
if (nd->isa(&RttiTypes::StructuredEntity)) {
Rooted<StructuredEntity> entity = nd.cast<StructuredEntity>();
if (!entity->isTransparent()) {
+ if (explicitSkipCount > 0) {
+ explicitSkipCount--;
+ continue;
+ }
descrs = entity->getDescriptor()->getPermittedTokens();
break;
}
@@ -591,12 +604,13 @@ bool DocumentChildHandler::startToken(Handle<Node> node, bool greedy)
}
}
-EndTokenResult DocumentChildHandler::endToken(Handle<Node> node, size_t maxStackDepth)
+EndTokenResult DocumentChildHandler::endToken(Handle<Node> node,
+ size_t maxStackDepth)
{
// Fetch the current scope stack
const ManagedVector<Node> &stack = scope().getStack();
- bool found = false; // true once the given node has been found
+ bool found = false; // true once the given node has been found
bool repeat = false;
size_t scopeStackDepth = 0; // # of elems on the scope stack
size_t currentStackDepth = 0; // # of "explicit" elems on the parser stack
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index ef503f7..cc1eb23 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -464,10 +464,14 @@ private:
* startImplicitDefaultField is set to false. If false, prevents this method
* from ending a handler if it potentially can have a default field, but did
* not have one yet.
+ * @param startImplicitDefaultFieldForNonGreedy is set to true, even starts
+ * an implicit default field for greedy handlers. Otherwise these handlers
+ * are ended.
* @return true if the current command is in a valid field.
*/
bool prepareCurrentHandler(bool startImplicitDefaultField = true,
- bool endHandlersWithoutDefaultField = true);
+ bool endHandlersWithoutDefaultField = true,
+ bool endNonGreedyHandlers = true);
/**
* Returns true if all handlers on the stack are currently valid, or false
@@ -779,7 +783,8 @@ bool StackImpl::endCurrentHandler()
}
bool StackImpl::prepareCurrentHandler(bool startImplicitDefaultField,
- bool endHandlersWithoutDefaultField)
+ bool endHandlersWithoutDefaultField,
+ bool endNonGreedyHandlers)
{
// Repeat until a valid handler is found on the stack
while (!stack.empty()) {
@@ -787,9 +792,15 @@ bool StackImpl::prepareCurrentHandler(bool startImplicitDefaultField,
HandlerInfo &info = currentInfo();
// If the current Handler is in a field, there is nothing to be done,
- // abort
+ // abort. Exception: If the handler is not greedy and currently is in
+ // its default field, then continue.
if (info.inField) {
- return true;
+ if (!info.greedy && info.hadData && info.inImplicitDefaultField) {
+ endCurrentField();
+ continue;
+ } else {
+ return true;
+ }
}
// If the current field already had a default field or is not valid,
@@ -798,7 +809,7 @@ bool StackImpl::prepareCurrentHandler(bool startImplicitDefaultField,
info.type() == HandlerType::COMMAND ||
info.type() == HandlerType::TOKEN ||
(info.type() == HandlerType::ANNOTATION_START && info.range);
- if (info.hadDefaultField ||
+ if (info.hadDefaultField || (!info.greedy && endNonGreedyHandlers) ||
(!startImplicitDefaultField && endHandlersWithoutDefaultField) ||
!info.valid || !canHaveImplicitDefaultField) {
// We cannot end the command if it is marked as "range" command
@@ -845,7 +856,7 @@ bool StackImpl::handleData()
while (true) {
// Prepare the stack -- make sure all overdue handlers are ended and
// we currently are in an open field
- if (stack.empty() || !prepareCurrentHandler()) {
+ if (stack.empty() || !prepareCurrentHandler(true, true, false)) {
throw LoggableException("Did not expect any data here");
}
@@ -948,8 +959,8 @@ static void strayTokenError(const Token &token, TokenDescriptor &descr,
}
static void checkTokensAreUnambiguous(const Token &token,
- const TokenDescriptor &descr,
- Logger &logger)
+ const TokenDescriptor &descr,
+ Logger &logger)
{
// Some helper functions and constants
constexpr ssize_t MAX_DEPTH = std::numeric_limits<ssize_t>::max();
@@ -1158,7 +1169,7 @@ void StackImpl::handleToken(const Token &token)
try {
// Try to open an implicit default field and try to start the token
// as short form or as start token
- prepareCurrentHandler(true);
+ prepareCurrentHandler();
if (handleOpenTokens(loggerFork, token, true, descr.shortForm) ||
handleOpenTokens(loggerFork, token, false, descr.open)) {
return;
@@ -1417,7 +1428,7 @@ void StackImpl::data(const TokenizedData &data)
// Close all handlers that did already had or cannot have a default field
// and are not currently inside a field (repeat this after each chunk of
// data/text)
- prepareCurrentHandler(false, false);
+ prepareCurrentHandler(false, false, false);
// Peek a token from the reader, repeat until all tokens have been read
Token token;
@@ -1434,7 +1445,7 @@ void StackImpl::data(const TokenizedData &data)
handleToken(token);
reader.consumePeek();
}
- prepareCurrentHandler(false, false);
+ prepareCurrentHandler(false, false, false);
}
}
@@ -1511,11 +1522,7 @@ void StackImpl::pushTokens(const std::vector<SyntaxDescriptor> &tokens)
tokenStack.pushTokens(tokens);
}
-void StackImpl::popTokens()
-{
- // Pop the last set of tokens from the token stack.
- tokenStack.popTokens();
-}
+void StackImpl::popTokens() { tokenStack.popTokens(); }
bool StackImpl::readToken(Token &token)
{
diff --git a/testdata/integration/user_defined_syntax/math_non_greedy_limits.in.osml b/testdata/integration/user_defined_syntax/math_non_greedy_limits.in.osml
new file mode 100644
index 0000000..d9481d0
--- /dev/null
+++ b/testdata/integration/user_defined_syntax/math_non_greedy_limits.in.osml
@@ -0,0 +1,5 @@
+\document
+\import[ontology]{./ontologies/math}
+\begin{math}
+ \sum_{i=3}^{5} i_M^2
+\end{math}
diff --git a/testdata/integration/user_defined_syntax/math_non_greedy_limits.out.osxml b/testdata/integration/user_defined_syntax/math_non_greedy_limits.out.osxml
new file mode 100644
index 0000000..86f38b1
--- /dev/null
+++ b/testdata/integration/user_defined_syntax/math_non_greedy_limits.out.osxml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<document xmlns:math="math">
+ <math:math>
+ <math:equation>
+ <math:field>
+ <math:sum>
+ <math:limits>
+ <math:lowerLimit>
+ <math:text>i</math:text>
+ <math:equals/>
+ <math:number>3</math:number>
+ </math:lowerLimit>
+ <math:upperLimit>
+ <math:number>5</math:number>
+ </math:upperLimit>
+ </math:limits>
+ </math:sum>
+ <math:text>i</math:text>
+ <math:index>
+ <math:text>M</math:text>
+ </math:index>
+ <math:power>
+ <math:number>2</math:number>
+ </math:power>
+ </math:field>
+ </math:equation>
+ </math:math>
+</document>
diff --git a/testdata/integration/user_defined_syntax/non_greedy_shortform.in.osml b/testdata/integration/user_defined_syntax/non_greedy_shortform.in.osml
new file mode 100644
index 0000000..68866c6
--- /dev/null
+++ b/testdata/integration/user_defined_syntax/non_greedy_shortform.in.osml
@@ -0,0 +1,2 @@
+\import[ontology]{./ontologies/non_greedy_shortform}
+\begin{test}?a ?b?c ??d + ?+{?} \end{test}
diff --git a/testdata/integration/user_defined_syntax/non_greedy_shortform.out.osxml b/testdata/integration/user_defined_syntax/non_greedy_shortform.out.osxml
new file mode 100644
index 0000000..c2431eb
--- /dev/null
+++ b/testdata/integration/user_defined_syntax/non_greedy_shortform.out.osxml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<document xmlns:test="test">
+ <test:test>
+ <test:a>a</test:a>
+ <test:a>b</test:a>
+ <test:a>c</test:a>
+ <test:a/>
+ <test:a>d</test:a>
+ <test:b/>
+ <test:a/>
+ <test:b>
+ <test:a/>
+ </test:b>
+ </test:test>
+</document>
diff --git a/testdata/integration/user_defined_syntax/ontologies/math.osml b/testdata/integration/user_defined_syntax/ontologies/math.osml
index c864d2b..f10b6a2 100644
--- a/testdata/integration/user_defined_syntax/ontologies/math.osml
+++ b/testdata/integration/user_defined_syntax/ontologies/math.osml
@@ -76,12 +76,12 @@
% TODO: Differentiate between lower and upper index vs. power operator?
\struct#power[isa=primitive]
\syntax
- \short{^}
+ \short[greedy=false]{^}
\field
\childRef[ref=primitive]
\struct#index[isa=primitive]
\syntax
- \short{_}
+ \short[greedy=false]{_}
\field
\childRef[ref=primitive]
@@ -211,12 +211,12 @@
\childRef[ref=upperLimit]
\struct#lowerLimit[cardinality={0,1}]
\syntax
- \short{_}
+ \short[greedy=false]{_}
\field
\childRef[ref=primitive]
\struct#upperLimit[cardinality={0,1}]
\syntax
- \short{^}
+ \short[greedy=false]{^}
\field
\childRef[ref=primitive]
diff --git a/testdata/integration/user_defined_syntax/ontologies/non_greedy_shortform.osml b/testdata/integration/user_defined_syntax/ontologies/non_greedy_shortform.osml
new file mode 100644
index 0000000..7a370ac
--- /dev/null
+++ b/testdata/integration/user_defined_syntax/ontologies/non_greedy_shortform.osml
@@ -0,0 +1,15 @@
+\ontology#test
+ \struct#test[root=true]
+ \field
+ \childRef[a]
+ \childRef[b]
+ \struct#a
+ \syntax
+ \short[false]{?}
+ \primitive[optional=true,type=string]
+ \struct#b
+ \syntax
+ \short[false]{+}
+ \field
+ \childRef[a]
+