diff options
-rw-r--r-- | src/core/model/Ontology.hpp | 21 | ||||
-rw-r--r-- | src/core/model/Syntax.hpp | 41 | ||||
-rw-r--r-- | src/core/parser/stack/OntologyHandler.cpp | 5 | ||||
-rw-r--r-- | src/core/parser/stack/Stack.cpp | 40 | ||||
-rw-r--r-- | test/core/parser/stack/TokenStackTest.cpp | 14 |
5 files changed, 77 insertions, 44 deletions
diff --git a/src/core/model/Ontology.hpp b/src/core/model/Ontology.hpp index 2533b9d..014f912 100644 --- a/src/core/model/Ontology.hpp +++ b/src/core/model/Ontology.hpp @@ -555,9 +555,9 @@ public: */ SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) { - SyntaxDescriptor stx{openToken.id, closeToken.id, Tokens::Empty, - const_cast<FieldDescriptor *>(this), depth}; - return stx; + return {openToken.id, closeToken.id, + Tokens::Empty, const_cast<FieldDescriptor *>(this), + depth, true}; } /** @@ -645,7 +645,8 @@ public: */ virtual ManagedVector<FieldDescriptor> getFieldDescriptors() const { - return ManagedVector<FieldDescriptor>(const_cast<Descriptor*>(this), fieldDescriptors.begin(), + return ManagedVector<FieldDescriptor>(const_cast<Descriptor *>(this), + fieldDescriptors.begin(), fieldDescriptors.end()); } @@ -934,9 +935,8 @@ public: */ virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) { - SyntaxDescriptor stx{openToken.id, closeToken.id, Tokens::Empty, - const_cast<Descriptor *>(this), depth}; - return stx; + return {openToken.id, closeToken.id, Tokens::Empty, + const_cast<Descriptor *>(this), depth, true}; } /** @@ -1227,10 +1227,9 @@ public: */ SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override { - SyntaxDescriptor stx{getOpenToken().id, getCloseToken().id, - shortToken.id, const_cast<StructuredClass *>(this), - depth}; - return stx; + return {getOpenToken().id, getCloseToken().id, + shortToken.id, const_cast<StructuredClass *>(this), + depth, shortToken.greedy}; } }; diff --git a/src/core/model/Syntax.hpp b/src/core/model/Syntax.hpp index e525224..5f360bc 100644 --- a/src/core/model/Syntax.hpp +++ b/src/core/model/Syntax.hpp @@ -41,14 +41,22 @@ struct TokenDescriptor { std::string token; /** + * An id to uniquely identify this token. + */ + TokenId id; + + /** * A flag to be set true if this TokenDescriptor uses a special token. */ bool special; /** - * An id to uniquely identify this token. + * A flag indicating whether the token is greedy or not. Currently only used + * for "shortForm" tokens. Default value is true. If false, only one data + * command is passed to the corresponding handler if the handler was opened + * for the implicity default field. */ - TokenId id; + bool greedy; /** * Constructor for non-special tokens. The special flag is set to false and @@ -58,7 +66,10 @@ struct TokenDescriptor { * one. */ TokenDescriptor(std::string token = std::string()) - : token(std::move(token)), special(false), id(Tokens::Empty) + : token(std::move(token)), + id(Tokens::Empty), + special(false), + greedy(true) { } @@ -68,7 +79,10 @@ struct TokenDescriptor { * * @param id the id of the special token. */ - TokenDescriptor(TokenId id) : special(true), id(id) {} + TokenDescriptor(TokenId id, bool greedy = true) + : id(id), special(true), greedy(greedy) + { + } /** * Returns true if and only if neither a string nor an ID is given. @@ -128,6 +142,13 @@ struct SyntaxDescriptor { ssize_t depth; /** + * Set to true if the shortForm is greedy (default), to false if the + * corresponding handler should receive at most one piece of data if it was + * started implicitly. + */ + bool greedyShortForm; + + /** * Default constructor, sets all token ids to Tokens::Empty and the * descriptor handle to nullptr. */ @@ -136,7 +157,8 @@ struct SyntaxDescriptor { close(Tokens::Empty), shortForm(Tokens::Empty), descriptor(nullptr), - depth(-1) + depth(-1), + greedyShortForm(true) { } @@ -150,14 +172,19 @@ struct SyntaxDescriptor { * @param depth Given the current leaf in the parsed document the depth of a * SyntaxDescriptor is defined as the number of transparent elements that * would be needed to construct an instance of the referenced descriptor. + * @param greedyShortForm set to false if the shortForm token should be + * treated in a non-greedy way, meaning that it should be given at most + * one piece of data if it was started implicitly. */ SyntaxDescriptor(TokenId open, TokenId close, TokenId shortForm, - Handle<Node> descriptor, ssize_t depth) + Handle<Node> descriptor, ssize_t depth, + bool greedyShortForm) : open(open), close(close), shortForm(shortForm), descriptor(descriptor), - depth(depth) + depth(depth), + greedyShortForm(greedyShortForm) { } diff --git a/src/core/parser/stack/OntologyHandler.cpp b/src/core/parser/stack/OntologyHandler.cpp index c153316..f6bfb9a 100644 --- a/src/core/parser/stack/OntologyHandler.cpp +++ b/src/core/parser/stack/OntologyHandler.cpp @@ -502,11 +502,16 @@ bool OntologyOpenCloseShortHandler::data() void OntologyOpenCloseShortHandler::end() { + // Make sure data was given if (descr->isEmpty()) { logger().error(std::string("Expected valid token for ") + name() + std::string(" syntax descriptor."), location()); } + + // Update the greedy flag + descr->greedy = greedy; + scope().pop(logger()); } diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp index 3e719e6..696a070 100644 --- a/src/core/parser/stack/Stack.cpp +++ b/src/core/parser/stack/Stack.cpp @@ -928,37 +928,39 @@ static void strayTokenError(const Token &token, TokenDescriptor &descr, return; } -static void checkTokensAreUnambigous(const Token &token, +static void checkTokensAreUnambiguous(const Token &token, const TokenDescriptor &descr, Logger &logger) { - const ssize_t maxDepth = std::numeric_limits<ssize_t>::max(); - const SyntaxDescriptor none(Tokens::Empty, Tokens::Empty, Tokens::Empty, - nullptr, maxDepth); + // Some helper functions and constants + constexpr ssize_t MAX_DEPTH = std::numeric_limits<ssize_t>::max(); + static const SyntaxDescriptor EMPTY_DESCR( + Tokens::Empty, Tokens::Empty, Tokens::Empty, nullptr, MAX_DEPTH, true); + static auto get = [](size_t i, const std::vector<SyntaxDescriptor> &descrs) + -> const SyntaxDescriptor & + { + return (i < descrs.size()) ? descrs[i] : EMPTY_DESCR; + }; // Check whether there is any ambiguity -- e.g. there are two tokens with // the same depth (the effort they need to be created). The shortForm and // open lists are assumed to be sorted by depth. - ssize_t errorDepth = maxDepth; + ssize_t errorDepth = MAX_DEPTH; size_t i = 0; size_t j = 0; - while (errorDepth == maxDepth && + while (errorDepth == MAX_DEPTH && (i < descr.open.size() || j < descr.shortForm.size())) { - const SyntaxDescriptor &di1 = - i < descr.open.size() ? descr.open[i] : none; - const SyntaxDescriptor &di2 = - (i + 1 < descr.open.size()) ? descr.open[i + 1] : none; - const SyntaxDescriptor &dj1 = - j < descr.shortForm.size() ? descr.shortForm[j] : none; - const SyntaxDescriptor &dj2 = - (j + 1 < descr.shortForm.size()) ? descr.shortForm[j + 1] : none; - - if (di1.depth != maxDepth && + const SyntaxDescriptor &di1 = get(i, descr.open); + const SyntaxDescriptor &di2 = get(i + 1, descr.open); + const SyntaxDescriptor &dj1 = get(j, descr.shortForm); + const SyntaxDescriptor &dj2 = get(j + 1, descr.shortForm); + + if (di1.depth != MAX_DEPTH && (di1.depth == di2.depth || di1.depth == dj1.depth || di1.depth == dj2.depth)) { errorDepth = di1.depth; } - if (dj1.depth != maxDepth && + if (dj1.depth != MAX_DEPTH && (dj1.depth == dj2.depth || di2.depth == dj1.depth)) { errorDepth = dj1.depth; } @@ -968,7 +970,7 @@ static void checkTokensAreUnambigous(const Token &token, } // Issue an error message if an ambiguity exists - if (errorDepth != maxDepth) { + if (errorDepth != MAX_DEPTH) { logger.error("Token \"" + token.name() + "\" is ambiguous!"); logger.note( "The token could be ambiguously used in one of the following " @@ -1123,7 +1125,7 @@ void StackImpl::handleToken(const Token &token) } // Make sure the given open token descriptors are unambiguous - checkTokensAreUnambigous(token, descr, logger()); + checkTokensAreUnambiguous(token, descr, logger()); // Now try to handle open or short form tokens. Iterate until the stack can // no longer be unwound. diff --git a/test/core/parser/stack/TokenStackTest.cpp b/test/core/parser/stack/TokenStackTest.cpp index bca16ec..7393edc 100644 --- a/test/core/parser/stack/TokenStackTest.cpp +++ b/test/core/parser/stack/TokenStackTest.cpp @@ -30,18 +30,18 @@ static Rooted<Node> nd2{new Node(mgr)}; static Rooted<Node> nd3{new Node(mgr)}; static const std::vector<SyntaxDescriptor> ListA{ - SyntaxDescriptor(Tokens::Empty, 1, Tokens::Empty, nd1, 0), - SyntaxDescriptor(2, Tokens::Empty, Tokens::Empty, nd2, 2), - SyntaxDescriptor(3, Tokens::Empty, Tokens::Empty, nd3, 1)}; + SyntaxDescriptor(Tokens::Empty, 1, Tokens::Empty, nd1, 0, true), + SyntaxDescriptor(2, Tokens::Empty, Tokens::Empty, nd2, 2, true), + SyntaxDescriptor(3, Tokens::Empty, Tokens::Empty, nd3, 1, true)}; static const std::vector<SyntaxDescriptor> ListB{ - SyntaxDescriptor(Tokens::Empty, 1, Tokens::Empty, nd1, -1), - SyntaxDescriptor(2, Tokens::Empty, 3, nd3, 3), + SyntaxDescriptor(Tokens::Empty, 1, Tokens::Empty, nd1, -1, true), + SyntaxDescriptor(2, Tokens::Empty, 3, nd3, 3, true), }; static const std::vector<SyntaxDescriptor> ListC{ - SyntaxDescriptor(Tokens::Empty, Tokens::Empty, 4, nd2, 5), - SyntaxDescriptor(Tokens::Empty, Tokens::Empty, 3, nd3, 6), + SyntaxDescriptor(Tokens::Empty, Tokens::Empty, 4, nd2, 5, true), + SyntaxDescriptor(Tokens::Empty, Tokens::Empty, 3, nd3, 6, true), }; TEST(TokenStack, tokens) |