diff options
author | Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de> | 2015-03-04 14:48:44 +0100 |
---|---|---|
committer | Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de> | 2015-03-04 14:48:44 +0100 |
commit | 11c6272abc2b34d861620b906bdee595674dca0f (patch) | |
tree | 3c5568c0653ade15d6f56078eb216418f0dade29 | |
parent | d6d08ae2dfc31c583f172c74ef9e19b203776107 (diff) | |
parent | 2714f95ab3c8632686d24ff8d2abfc1f35dc5dd7 (diff) |
Merge branch 'master' of somweyr.de:ousia
-rw-r--r-- | src/core/model/Ontology.cpp | 45 | ||||
-rw-r--r-- | src/core/model/Ontology.hpp | 87 | ||||
-rw-r--r-- | src/core/model/Syntax.cpp | 22 | ||||
-rw-r--r-- | src/core/model/Syntax.hpp | 35 | ||||
-rw-r--r-- | src/core/parser/stack/GenericParserStates.cpp | 9 | ||||
-rw-r--r-- | src/core/parser/stack/OntologyHandler.cpp | 423 | ||||
-rw-r--r-- | src/core/parser/stack/OntologyHandler.hpp | 133 | ||||
-rw-r--r-- | test/core/model/OntologyTest.cpp | 44 |
8 files changed, 641 insertions, 157 deletions
diff --git a/src/core/model/Ontology.cpp b/src/core/model/Ontology.cpp index e56d628..bc7b1a7 100644 --- a/src/core/model/Ontology.cpp +++ b/src/core/model/Ontology.cpp @@ -315,24 +315,22 @@ bool FieldDescriptor::doValidate(Logger &logger) const } else { valid = valid & validateName(logger); } - // check start and end token. - if (!startToken.special && !startToken.token.empty() && - !Utils::isUserDefinedToken(startToken.token)) { + // check open and close token. + if (!openToken.isValid()) { // TODO: Correct error message. logger.error(std::string("Field \"") + getNameOrDefaultName() + "\" of descriptor \"" + parentName + - "\" has an invalid custom start token: " + - startToken.token, + "\" has an invalid custom open token: " + + openToken.token, *this); valid = false; } - if (!endToken.special && !endToken.token.empty() && - !Utils::isUserDefinedToken(endToken.token)) { + if (!closeToken.isValid()) { // TODO: Correct error message. logger.error(std::string("Field \"") + getNameOrDefaultName() + "\" of descriptor \"" + parentName + - "\" has an invalid custom end token: " + - endToken.token, + "\" has an invalid custom close token: " + + closeToken.token, *this); valid = false; } @@ -524,19 +522,17 @@ bool Descriptor::doValidate(Logger &logger) const } // check start and end token. - if (!startToken.special && !startToken.token.empty() && - !Utils::isUserDefinedToken(startToken.token)) { + if (!openToken.isValid()) { logger.error(std::string("Descriptor \"") + getName() + "\" has an invalid custom start token: " + - startToken.token, + openToken.token, *this); valid = false; } - if (!endToken.special && !endToken.token.empty() && - !Utils::isUserDefinedToken(endToken.token)) { + if (!closeToken.isValid()) { logger.error(std::string("Descriptor \"") + getName() + "\" has an invalid custom end token: " + - endToken.token, + closeToken.token, *this); valid = false; } @@ -818,8 +814,7 @@ bool StructuredClass::doValidate(Logger &logger) const } // check short token. - if (!shortToken.special && !shortToken.token.empty() && - !Utils::isUserDefinedToken(shortToken.token)) { + if (!shortToken.isValid()) { logger.error(std::string("Descriptor \"") + getName() + "\" has an invalid custom short form token: " + shortToken.token, @@ -1092,22 +1087,22 @@ static void gatherTokenDescriptors( std::unordered_set<FieldDescriptor *> &visited) { // add the TokenDescriptors for the Descriptor itself. - if (!desc->getStartToken().isEmpty()) { - res.push_back(desc->getStartTokenPointer()); + if (!desc->getOpenToken().isEmpty()) { + res.push_back(desc->getOpenTokenPointer()); } - if (!desc->getEndToken().isEmpty()) { - res.push_back(desc->getEndTokenPointer()); + if (!desc->getCloseToken().isEmpty()) { + res.push_back(desc->getCloseTokenPointer()); } // add the TokenDescriptors for its FieldDescriptors. for (auto fd : desc->getFieldDescriptors()) { if (!visited.insert(fd.get()).second) { continue; } - if (!fd->getStartToken().isEmpty()) { - res.push_back(fd->getStartTokenPointer()); + if (!fd->getOpenToken().isEmpty()) { + res.push_back(fd->getOpenTokenPointer()); } - if (!fd->getEndToken().isEmpty()) { - res.push_back(fd->getEndTokenPointer()); + if (!fd->getCloseToken().isEmpty()) { + res.push_back(fd->getCloseTokenPointer()); } } } diff --git a/src/core/model/Ontology.hpp b/src/core/model/Ontology.hpp index c90093c..079640c 100644 --- a/src/core/model/Ontology.hpp +++ b/src/core/model/Ontology.hpp @@ -228,8 +228,8 @@ private: Owned<Type> primitiveType; bool optional; bool primitive; - TokenDescriptor startToken; - TokenDescriptor endToken; + TokenDescriptor openToken; + TokenDescriptor closeToken; WhitespaceMode whitespaceMode; protected: @@ -469,7 +469,7 @@ public: } /** - * Returns a pointer to the start TokenDescriptor. This Token is used as a + * Returns a pointer to the open TokenDescriptor. This Token is used as a * signifier during parsing that an instance of this FieldDescriptor starts. * * Note that this does not invalidate the FieldDescriptor. So use with @@ -477,54 +477,54 @@ public: * * @return a pointer to the start TokenDescriptor. */ - TokenDescriptor *getStartTokenPointer() { return &startToken; } + TokenDescriptor *getOpenTokenPointer() { return &openToken; } /** - * Returns a copy of the start TokenDescriptor. This Token is used as a + * Returns a copy of the open TokenDescriptor. This Token is used as a * signifier during parsing that an instance of this FieldDescriptor starts. * * @return a copy of the start TokenDescriptor. */ - TokenDescriptor getStartToken() const { return startToken; } + TokenDescriptor getOpenToken() const { return openToken; } /** - * Sets the start TokenDescriptor. This Token is used as a - * signifier during parsing that an instance of this FieldDescriptor starts. + * Sets the open TokenDescriptor. This Token is used as a signifier during + * parsing that an instance of this FieldDescriptor starts. * - * @param st the new start TokenDescriptor. + * @param t the new open TokenDescriptor. */ - void setStartToken(TokenDescriptor st) + void setOpenToken(TokenDescriptor t) { invalidate(); - startToken = st; + openToken = t; } /** - * Returns a pointer to the end TokenDescriptor. This Token is used as a + * Returns a pointer to the close TokenDescriptor. This Token is used as a * signifier during parsing that an instance of this FieldDescriptor ends. * - * @return a pointer to the end TokenDescriptor. + * @return a pointer to the close TokenDescriptor. */ - TokenDescriptor *getEndTokenPointer() { return &endToken; } + TokenDescriptor *getCloseTokenPointer() { return &closeToken; } /** - * Returns a copy of the end TokenDescriptor. This Token is used as a + * Returns a copy of the close TokenDescriptor. This Token is used as a * signifier during parsing that an instance of this FieldDescriptor ends. * - * @return a copy of the end TokenDescriptor. + * @return a copy of the close TokenDescriptor. */ - TokenDescriptor getEndToken() const { return endToken; } + TokenDescriptor getCloseToken() const { return closeToken; } /** - * Sets the end TokenDescriptor. This Token is used as a + * Sets the close TokenDescriptor. This Token is used as a * signifier during parsing that an instance of this FieldDescriptor ends. * - * @param e the new end TokenDescriptor. + * @param t the new close TokenDescriptor. */ - void setEndToken(TokenDescriptor e) + void setCloseToken(TokenDescriptor t) { invalidate(); - endToken = e; + closeToken = t; } /** @@ -555,7 +555,7 @@ public: */ SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) { - SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, + SyntaxDescriptor stx{openToken.id, closeToken.id, Tokens::Empty, const_cast<FieldDescriptor *>(this), depth}; return stx; } @@ -608,8 +608,8 @@ class Descriptor : public Node { private: Owned<StructType> attributesDescriptor; NodeVector<FieldDescriptor> fieldDescriptors; - TokenDescriptor startToken; - TokenDescriptor endToken; + TokenDescriptor openToken; + TokenDescriptor closeToken; bool addAndSortFieldDescriptor(Handle<FieldDescriptor> fd, Logger &logger); @@ -860,31 +860,31 @@ public: NodeVector<StructuredClass> getPermittedChildren() const; /** - * Returns a pointer to the start TokenDescriptor. This Token is used as a + * Returns a pointer to the open TokenDescriptor. This Token is used as a * signifier during parsing that an instance of this FieldDescriptor starts. * - * @return a pointer to the start TokenDescriptor. + * @return a pointer to the open TokenDescriptor. */ - TokenDescriptor *getStartTokenPointer() { return &startToken; } + TokenDescriptor *getOpenTokenPointer() { return &openToken; } /** - * Returns a copy of the start TokenDescriptor. This Token is used as a + * Returns a copy of the open TokenDescriptor. This Token is used as a * signifier during parsing that an instance of this FieldDescriptor starts. * - * @return a copy of the start TokenDescriptor. + * @return a copy of the open TokenDescriptor. */ - TokenDescriptor getStartToken() const { return startToken; } + TokenDescriptor getOpenToken() const { return openToken; } /** - * Sets the start TokenDescriptor. This Token is used as a + * Sets the open TokenDescriptor. This Token is used as a * signifier during parsing that an instance of this FieldDescriptor starts. * - * @param st the new start TokenDescriptor. + * @param t the new start TokenDescriptor. */ - void setStartToken(TokenDescriptor st) + void setOpenToken(TokenDescriptor t) { invalidate(); - startToken = st; + openToken = t; } /** @@ -893,7 +893,7 @@ public: * * @return a pointer to the end TokenDescriptor. */ - TokenDescriptor *getEndTokenPointer() { return &endToken; } + TokenDescriptor *getCloseTokenPointer() { return &closeToken; } /** * Returns a copy of the end TokenDescriptor. This Token is used as a @@ -901,18 +901,18 @@ public: * * @return a copy of the end TokenDescriptor. */ - TokenDescriptor getEndToken() const { return endToken; } + TokenDescriptor getCloseToken() const { return closeToken; } /** * Sets the end TokenDescriptor. This Token is used as a * signifier during parsing that an instance of this FieldDescriptor ends. * - * @param e the new end TokenDescriptor. + * @param t the new end TokenDescriptor. */ - void setEndToken(TokenDescriptor e) + void setCloseToken(TokenDescriptor t) { invalidate(); - endToken = e; + closeToken = t; } /** @@ -922,7 +922,7 @@ public: */ virtual SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) { - SyntaxDescriptor stx{startToken.id, endToken.id, Tokens::Empty, + SyntaxDescriptor stx{openToken.id, closeToken.id, Tokens::Empty, const_cast<Descriptor *>(this), depth}; return stx; } @@ -938,11 +938,6 @@ public: */ std::vector<SyntaxDescriptor> getPermittedTokens() const; }; -/* - * TODO: We should discuss Cardinalities one more time. Is it smart to define - * cardinalities independent of context? Should we not have at least have the - * possibility to define it context-dependently? - */ /** * A StructuredClass specifies nodes in the StructureTree of a document that @@ -1220,7 +1215,7 @@ public: */ SyntaxDescriptor getSyntaxDescriptor(ssize_t depth = -1) override { - SyntaxDescriptor stx{getStartToken().id, getEndToken().id, + SyntaxDescriptor stx{getOpenToken().id, getCloseToken().id, shortToken.id, const_cast<StructuredClass *>(this), depth}; return stx; diff --git a/src/core/model/Syntax.cpp b/src/core/model/Syntax.cpp index bd17bff..a97acf7 100644 --- a/src/core/model/Syntax.cpp +++ b/src/core/model/Syntax.cpp @@ -16,13 +16,21 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "Syntax.hpp" +#include <core/common/Utils.hpp> #include "Ontology.hpp" +#include "Syntax.hpp" namespace ousia { -/* Class TokenSyntaxDescriptor */ +/* Class TokenDescriptor */ + +bool TokenDescriptor::isValid() const +{ + return special || isEmpty() || Utils::isUserDefinedToken(token); +} + +/* Class SyntaxDescriptor */ bool SyntaxDescriptor::isAnnotation() const { @@ -39,11 +47,11 @@ bool SyntaxDescriptor::isStruct() const void SyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const { - if (start != Tokens::Empty) { - set.insert(start); + if (open != Tokens::Empty) { + set.insert(open); } - if (end != Tokens::Empty) { - set.insert(end); + if (close != Tokens::Empty) { + set.insert(close); } if (shortForm != Tokens::Empty) { set.insert(shortForm); @@ -52,7 +60,7 @@ void SyntaxDescriptor::insertIntoTokenSet(TokenSet &set) const bool SyntaxDescriptor::isEmpty() const { - return start == Tokens::Empty && end == Tokens::Empty && + return open == Tokens::Empty && close == Tokens::Empty && shortForm == Tokens::Empty; } } diff --git a/src/core/model/Syntax.hpp b/src/core/model/Syntax.hpp index 4da3408..4adb329 100644 --- a/src/core/model/Syntax.hpp +++ b/src/core/model/Syntax.hpp @@ -74,6 +74,15 @@ struct TokenDescriptor { * @return true if and only if neither a string nor an ID is given. */ bool isEmpty() const { return token.empty() && id == Tokens::Empty; } + + /** + * Returns true if the token is valid, which is the case if this class is + * either marked as special token or is empty or does have a valid token + * string set. + * + * @return true if the token descriptor is valid, false otherwise. + */ + bool isValid() const; }; /** @@ -86,17 +95,17 @@ struct TokenDescriptor { */ struct SyntaxDescriptor { /** - * Possible start token or Tokens::Empty if no token is set. + * Possible open token or Tokens::Empty if no token is set. */ - TokenId start; + TokenId open; /** - * Possible end token or Tokens::Empty if no token is set. + * Possible close token or Tokens::Empty if no token is set. */ - TokenId end; + TokenId close; /** - * Possible representation token or Tokens::Empty if no token is set. + * Possible short form token or Tokens::Empty if no token is set. */ TokenId shortForm; @@ -118,8 +127,8 @@ struct SyntaxDescriptor { * descriptor handle to nullptr. */ SyntaxDescriptor() - : start(Tokens::Empty), - end(Tokens::Empty), + : open(Tokens::Empty), + close(Tokens::Empty), shortForm(Tokens::Empty), descriptor(nullptr), depth(-1) @@ -129,18 +138,18 @@ struct SyntaxDescriptor { /** * Member initializer constructor. * - * @param start is a possible start token. - * @param end is a possible end token. + * @param open is a possible open token. + * @param close is a possible close token. * @param shortForm is a possible short form token. * @param descriptor The Descriptor this SyntaxDescriptor belongs to. * @param depth Given the current leaf in the parsed document the depth of a * SyntaxDescriptor is defined as the number of transparent elements that * would be needed to construct an instance of the referenced descriptor. */ - SyntaxDescriptor(TokenId start, TokenId end, TokenId shortForm, + SyntaxDescriptor(TokenId open, TokenId close, TokenId shortForm, Handle<Node> descriptor, ssize_t depth) - : start(start), - end(end), + : open(open), + close(close), shortForm(shortForm), descriptor(descriptor), depth(depth) @@ -193,4 +202,4 @@ struct SyntaxDescriptor { bool isEmpty() const; }; } -#endif
\ No newline at end of file +#endif diff --git a/src/core/parser/stack/GenericParserStates.cpp b/src/core/parser/stack/GenericParserStates.cpp index 7287524..c355365 100644 --- a/src/core/parser/stack/GenericParserStates.cpp +++ b/src/core/parser/stack/GenericParserStates.cpp @@ -40,6 +40,12 @@ const std::multimap<std::string, const State *> GenericParserStates{ {"parentRef", &States::OntologyStructParent}, {"field", &States::OntologyStructParentField}, {"fieldRef", &States::OntologyStructParentFieldRef}, + {"syntax", &States::OntologySyntax}, + {"open", &States::OntologySyntaxOpen}, + {"close", &States::OntologySyntaxClose}, + {"short", &States::OntologySyntaxShort}, + {"whitespace", &States::OntologySyntaxWhitespace}, + {"*", &States::OntologySyntaxToken}, {"typesystem", &States::Typesystem}, {"enum", &States::TypesystemEnum}, {"entry", &States::TypesystemEnumEntry}, @@ -47,7 +53,8 @@ const std::multimap<std::string, const State *> GenericParserStates{ {"field", &States::TypesystemStructField}, {"constant", &States::TypesystemConstant}, {"import", &States::Import}, - {"include", &States::Include}}; + {"include", &States::Include} +}; } } diff --git a/src/core/parser/stack/OntologyHandler.cpp b/src/core/parser/stack/OntologyHandler.cpp index 4474589..24a1c31 100644 --- a/src/core/parser/stack/OntologyHandler.cpp +++ b/src/core/parser/stack/OntologyHandler.cpp @@ -207,8 +207,8 @@ bool OntologyPrimitiveHandler::startCommand(Variant::mapType &args) const std::string &type = args["type"].asString(); scope().resolveType(type, res.first, logger(), - [](Handle<Node> type, Handle<Node> field, - Logger &logger) { + [](Handle<Node> type, Handle<Node> field, + Logger &logger) { if (type != nullptr) { field.cast<FieldDescriptor>()->setPrimitiveType(type.cast<Type>()); } @@ -244,8 +244,8 @@ bool OntologyParentHandler::startCommand(Variant::mapType &args) { Rooted<StructuredClass> strct = scope().selectOrThrow<StructuredClass>(); - Rooted<OntologyParent> parent{ - new OntologyParent(strct->getManager(), args["ref"].asString(), strct)}; + Rooted<ParserOntologyParentNode> parent{new ParserOntologyParentNode( + strct->getManager(), args["ref"].asString(), strct)}; parent->setLocation(location()); scope().push(parent); return true; @@ -257,7 +257,8 @@ void OntologyParentHandler::end() { scope().pop(logger()); } bool OntologyParentFieldHandler::startCommand(Variant::mapType &args) { - Rooted<OntologyParent> parentNameNode = scope().selectOrThrow<OntologyParent>(); + Rooted<ParserOntologyParentNode> parentNameNode = + scope().selectOrThrow<ParserOntologyParentNode>(); FieldDescriptor::FieldType type; if (args["subtree"].asBool()) { type = FieldDescriptor::FieldType::SUBTREE; @@ -275,7 +276,7 @@ bool OntologyParentFieldHandler::startCommand(Variant::mapType &args) scope().resolve<Descriptor>( parentNameNode->getName(), strct, logger(), [type, name, optional](Handle<Node> parent, Handle<Node> strct, - Logger &logger) { + Logger &logger) { if (parent != nullptr) { Rooted<FieldDescriptor> field = (parent.cast<Descriptor>()->createFieldDescriptor( @@ -290,7 +291,8 @@ bool OntologyParentFieldHandler::startCommand(Variant::mapType &args) bool OntologyParentFieldRefHandler::startCommand(Variant::mapType &args) { - Rooted<OntologyParent> parentNameNode = scope().selectOrThrow<OntologyParent>(); + Rooted<ParserOntologyParentNode> parentNameNode = + scope().selectOrThrow<ParserOntologyParentNode>(); const std::string &name = args["ref"].asString(); Rooted<StructuredClass> strct = @@ -299,29 +301,318 @@ bool OntologyParentFieldRefHandler::startCommand(Variant::mapType &args) // resolve the parent, get the referenced field and add the declared // StructuredClass as child to it. - scope().resolve<Descriptor>(parentNameNode->getName(), strct, logger(), - [name, loc](Handle<Node> parent, - Handle<Node> strct, Logger &logger) { - if (parent != nullptr) { - Rooted<FieldDescriptor> field = - parent.cast<Descriptor>()->getFieldDescriptor(name); - if (field == nullptr) { - logger.error( - std::string("Could not find referenced field ") + name, loc); - return; - } - field->addChild(strct.cast<StructuredClass>()); + scope().resolve<Descriptor>( + parentNameNode->getName(), strct, logger(), + [name, loc](Handle<Node> parent, Handle<Node> strct, Logger &logger) { + if (parent != nullptr) { + Rooted<FieldDescriptor> field = + parent.cast<Descriptor>()->getFieldDescriptor(name); + if (field == nullptr) { + logger.error( + std::string("Could not find referenced field ") + name, + loc); + return; + } + field->addChild(strct.cast<StructuredClass>()); + } + }); + return true; +} + +/* Class OntologySyntaxHandler */ + +bool OntologySyntaxHandler::startCommand(Variant::mapType &args) +{ + scope().push(new ParserSyntaxNode(manager())); + return true; +} + +void OntologySyntaxHandler::end() { scope().pop(logger()); } + +/* Class OntologyOpenCloseShortHandler */ + +namespace { +enum class TokenType { OPEN, CLOSE, SHORT }; +} + +OntologyOpenCloseShortHandler::OntologyOpenCloseShortHandler( + const HandlerData &handlerData) + : StaticHandler(handlerData), descr(nullptr) +{ +} + +bool OntologyOpenCloseShortHandler::startCommand(Variant::mapType &args) +{ + // Select the upper field, annotation and struct descriptor + Rooted<StructuredClass> strct = scope().select<StructuredClass>(); + Rooted<AnnotationClass> anno = scope().select<AnnotationClass>(); + Rooted<FieldDescriptor> field = scope().select<FieldDescriptor>(); + + // Fetch the token type this handler was created for + TokenType type; + if (name() == "open") { + type = TokenType::OPEN; + } else if (name() == "close") { + type = TokenType::CLOSE; + } else if (name() == "short") { + type = TokenType::SHORT; + } else { + logger().error(std::string("Invalid syntax element \"") + name() + + std::string("\"")); + return false; + } + + // We cannot define the short form inside a field + if (field != nullptr && type == TokenType::SHORT) { + logger().error( + std::string("Cannot define short syntax within a field."), + location()); + return false; + } + + // Open, close and short syntax may not be defined within the field of an + // annotation, only for the annotation itself + if (anno != nullptr && field != nullptr) { + logger().error(std::string("Cannot define ") + name() + + std::string(" syntax within annotation field."), + location()); + return false; + } + + // We cannot define a short form for an annotation + if (anno != nullptr && type == TokenType::SHORT) { + logger().error( + std::string("Cannot define short syntax for annotations"), + location()); + return false; + } + + // Fetch the pointer for either the open, close or short token + descr = nullptr; + if (field != nullptr) { + switch (type) { + case TokenType::OPEN: + descr = field->getOpenTokenPointer(); + break; + case TokenType::CLOSE: + descr = field->getCloseTokenPointer(); + break; + default: + break; } - }); + } else if (anno != nullptr) { + switch (type) { + case TokenType::OPEN: + descr = anno->getOpenTokenPointer(); + break; + case TokenType::CLOSE: + descr = anno->getCloseTokenPointer(); + break; + default: + break; + } + } else if (strct != nullptr) { + switch (type) { + case TokenType::OPEN: + descr = strct->getOpenTokenPointer(); + break; + case TokenType::CLOSE: + descr = strct->getCloseTokenPointer(); + break; + case TokenType::SHORT: + descr = strct->getShortTokenPointer(); + break; + } + } + + // Make sure a descriptor was set (the checks above should already prevent + // this case from happening). + if (descr == nullptr) { + logger().error( + "Internal error: Could not find corresponding token descriptor", + location()); + return false; + } + + // Make sure the descriptor does not already have any content + if (!descr->isEmpty()) { + if (field != nullptr) { + logger().error(name() + std::string(" syntax for field \"") + + field->getName() + + std::string("\" was already defined")); + } else if (anno != nullptr) { + logger().error(name() + std::string(" syntax for annotation \"") + + anno->getName() + + std::string("\" was already defined")); + } else if (strct != nullptr) { + logger().error(name() + std::string(" syntax for structure \"") + + anno->getName() + + std::string("\" was already defined")); + } + return false; + } + + // Push the corresponding nodes onto the stack + switch (type) { + case TokenType::OPEN: + scope().push(new ParserSyntaxOpenNode(manager(), descr)); + break; + case TokenType::CLOSE: + scope().push(new ParserSyntaxCloseNode(manager(), descr)); + break; + case TokenType::SHORT: + scope().push(new ParserSyntaxShortNode(manager(), descr)); + break; + } return true; } +bool OntologyOpenCloseShortHandler::data() +{ + Variant str = readData(); + if (descr && descr->isEmpty()) { + // Read the token descriptor + *descr = TokenDescriptor(str.asString()); + + // Make sure the token descriptor is actually valid, if not, reset it + // (do not, however return false as the data per se was at the right + // place) + if (!descr->isValid()) { + logger().error( + std::string("Given token \"") + str.asString() + + std::string( + "\" is not a valid user defined token (no whitespaces, " + "must start and end with a non-alphabetic character, must " + "not override OSML tokens).")); + *descr = TokenDescriptor(); + } + return true; + } + logger().error("Did not expect any data here", str); + return false; +} + +void OntologyOpenCloseShortHandler::end() +{ + if (descr->isEmpty()) { + logger().error(std::string("Expected valid token for ") + name() + + std::string(" syntax descriptor."), + location()); + } + scope().pop(logger()); +} + +/* Class OntologySyntaxTokenHandler */ + +bool OntologySyntaxTokenHandler::startCommand(Variant::mapType &args) +{ + // Select the ParserSyntaxTokenNode containing the reference at the + // TokenDescriptor + Rooted<ParserSyntaxTokenNode> tokenNode = + scope().selectOrThrow<ParserSyntaxTokenNode>(); + + if (!tokenNode->descr->isEmpty()) { + logger().error( + "Token was already set, did not expect another command here.", + location()); + return false; + } + + // Select the correct special token + TokenId id = Tokens::Empty; + if (name() == "newline") { + id = Tokens::Newline; + } else if (name() == "paragraph") { + id = Tokens::Paragraph; + } else if (name() == "section") { + id = Tokens::Section; + } else if (name() == "indent") { + id = Tokens::Indent; + } else if (name() == "dedent") { + id = Tokens::Dedent; + } else { + logger().error( + "Expected one of \"newline\", \"paragraph\", \"section\", " + "\"indent\", \"dedent\", but got \"" + + name() + "\"", + location()); + return false; + } + + // Set the token descriptor + *tokenNode->descr = TokenDescriptor(id); + return true; +} + +/* Class OntologySyntaxWhitespaceHandler */ + +OntologySyntaxWhitespaceHandler::OntologySyntaxWhitespaceHandler( + const HandlerData &handlerData) + : StaticHandler(handlerData), whitespaceModeStr("") +{ +} + +bool OntologySyntaxWhitespaceHandler::startCommand(Variant::mapType &args) +{ + // Fetch the field descriptor, log an error if "whitespace" was not + // specified inside a field descriptor + Rooted<FieldDescriptor> field = scope().select<FieldDescriptor>(); + if (field == nullptr) { + logger().error( + "Whitespace mode definition is only allowed inside fields.", + location()); + return false; + } + return true; +} + +bool OntologySyntaxWhitespaceHandler::data() +{ + if (whitespaceModeStr != nullptr) { + logger().error( + "Did not expect any more data, whitespace mode has already been " + "set.", + location()); + return false; + } + whitespaceModeStr = readData(); + return true; +} + +void OntologySyntaxWhitespaceHandler::end() +{ + // Make sure the given whitespace mode is valid + const std::string &mode = whitespaceModeStr.asString(); + Rooted<FieldDescriptor> field = scope().selectOrThrow<FieldDescriptor>(); + if (mode == "trim") { + field->setWhitespaceMode(WhitespaceMode::TRIM); + } else if (mode == "collapse") { + field->setWhitespaceMode(WhitespaceMode::COLLAPSE); + } else if (mode == "preserve") { + field->setWhitespaceMode(WhitespaceMode::PRESERVE); + } else { + logger().error( + "Expected \"trim\", \"collapse\" or \"preserve\" as whitespace " + "mode.", + whitespaceModeStr); + return; + } +} + +/* Class ParserSyntaxTokenNode */ + +ParserSyntaxTokenNode::ParserSyntaxTokenNode(Manager &mgr, + TokenDescriptor *descr) + : Node(mgr), descr(descr) +{ +} + namespace States { const State Ontology = StateBuilder() - .parents({&None, &Document}) - .createdNodeType(&RttiTypes::Ontology) - .elementHandler(OntologyHandler::create) - .arguments({Argument::String("name")}); + .parents({&None, &Document}) + .createdNodeType(&RttiTypes::Ontology) + .elementHandler(OntologyHandler::create) + .arguments({Argument::String("name")}); const State OntologyStruct = StateBuilder() @@ -356,12 +647,12 @@ const State OntologyAttribute = Argument::Any("default", Variant::fromObject(nullptr))}); const State OntologyField = StateBuilder() - .parents({&OntologyStruct, &OntologyAnnotation}) - .createdNodeType(&RttiTypes::FieldDescriptor) - .elementHandler(OntologyFieldHandler::create) - .arguments({Argument::String("name", ""), - Argument::Bool("subtree", false), - Argument::Bool("optional", false)}); + .parents({&OntologyStruct, &OntologyAnnotation}) + .createdNodeType(&RttiTypes::FieldDescriptor) + .elementHandler(OntologyFieldHandler::create) + .arguments({Argument::String("name", ""), + Argument::Bool("subtree", false), + Argument::Bool("optional", false)}); const State OntologyFieldRef = StateBuilder() @@ -379,15 +670,16 @@ const State OntologyStructPrimitive = {Argument::String("name", ""), Argument::Bool("subtree", false), Argument::Bool("optional", false), Argument::String("type")}); -const State OntologyStructChild = StateBuilder() - .parent(&OntologyField) - .elementHandler(OntologyChildHandler::create) - .arguments({Argument::String("ref")}); +const State OntologyStructChild = + StateBuilder() + .parent(&OntologyField) + .elementHandler(OntologyChildHandler::create) + .arguments({Argument::String("ref")}); const State OntologyStructParent = StateBuilder() .parent(&OntologyStruct) - .createdNodeType(&RttiTypes::OntologyParent) + .createdNodeType(&RttiTypes::ParserOntologyParentNode) .elementHandler(OntologyParentHandler::create) .arguments({Argument::String("ref")}); @@ -406,11 +698,68 @@ const State OntologyStructParentFieldRef = .createdNodeType(&RttiTypes::FieldDescriptor) .elementHandler(OntologyParentFieldRefHandler::create) .arguments({Argument::String("ref", DEFAULT_FIELD_NAME)}); + +const State OntologySyntax = + StateBuilder() + .parents({&OntologyStruct, &OntologyField, &OntologyAnnotation}) + .createdNodeType(&RttiTypes::ParserSyntaxNode) + .elementHandler(OntologySyntaxHandler::create) + .arguments(Arguments{}); + +const State OntologySyntaxToken = + StateBuilder() + .parents({&OntologySyntaxOpen, &OntologySyntaxClose, &OntologySyntax}) + .createdNodeType(&RttiTypes::ParserSyntaxTokenNode) + .elementHandler(OntologySyntaxTokenHandler::create) + .arguments(Arguments{}); + +const State OntologySyntaxOpen = + StateBuilder() + .parent(&OntologySyntax) + .createdNodeType(&RttiTypes::ParserSyntaxOpenNode) + .elementHandler(OntologyOpenCloseShortHandler::create) + .arguments(Arguments{}); + +const State OntologySyntaxClose = + StateBuilder() + .parent(&OntologySyntax) + .createdNodeType(&RttiTypes::ParserSyntaxCloseNode) + .elementHandler(OntologyOpenCloseShortHandler::create) + .arguments(Arguments{}); + +const State OntologySyntaxShort = + StateBuilder() + .parent(&OntologySyntax) + .createdNodeType(&RttiTypes::ParserSyntaxShortNode) + .elementHandler(OntologyOpenCloseShortHandler::create) + .arguments(Arguments{}); + +const State OntologySyntaxWhitespace = + StateBuilder() + .parent(&OntologySyntax) + .elementHandler(OntologySyntaxHandler::create) + .arguments(Arguments{}); } } namespace RttiTypes { -const Rtti OntologyParent = RttiBuilder<ousia::parser_stack::OntologyParent>( - "OntologyParent").parent(&Node); +const Rtti ParserOntologyParentNode = + RttiBuilder<ousia::parser_stack::ParserOntologyParentNode>( + "ParserOntologyParentNode").parent(&Node); +const Rtti ParserSyntaxNode = + RttiBuilder<ousia::parser_stack::ParserSyntaxNode>("ParserSyntaxNode") + .parent(&Node); +const Rtti ParserSyntaxTokenNode = + RttiBuilder<ousia::parser_stack::ParserSyntaxTokenNode>( + "ParserSyntaxTokenNode").parent(&Node); +const Rtti ParserSyntaxOpenNode = + RttiBuilder<ousia::parser_stack::ParserSyntaxOpenNode>( + "ParserSyntaxOpenNode").parent(&ParserSyntaxTokenNode); +const Rtti ParserSyntaxCloseNode = + RttiBuilder<ousia::parser_stack::ParserSyntaxCloseNode>( + "ParserSyntaxCloseNode").parent(&ParserSyntaxTokenNode); +const Rtti ParserSyntaxShortNode = + RttiBuilder<ousia::parser_stack::ParserSyntaxShortNode>( + "ParserSyntaxShortNode").parent(&ParserSyntaxTokenNode); } } diff --git a/src/core/parser/stack/OntologyHandler.hpp b/src/core/parser/stack/OntologyHandler.hpp index fd62f78..0203805 100644 --- a/src/core/parser/stack/OntologyHandler.hpp +++ b/src/core/parser/stack/OntologyHandler.hpp @@ -145,11 +145,6 @@ public: } }; -class OntologyParent : public Node { -public: - using Node::Node; -}; - class OntologyParentHandler : public StaticHandler { public: using StaticHandler::StaticHandler; @@ -187,6 +182,97 @@ public: } }; +class OntologySyntaxHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool startCommand(Variant::mapType &args) override; + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new OntologySyntaxHandler{handlerData}; + } +}; + +class OntologyOpenCloseShortHandler : public StaticHandler { +public: + TokenDescriptor *descr; + + OntologyOpenCloseShortHandler(const HandlerData &handlerData); + + bool startCommand(Variant::mapType &args) override; + bool data() override; + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new OntologyOpenCloseShortHandler{handlerData}; + } +}; + +class OntologySyntaxTokenHandler : public StaticHandler { +public: + using StaticHandler::StaticHandler; + + bool startCommand(Variant::mapType &args) override; + + static Handler *create(const HandlerData &handlerData) + { + return new OntologySyntaxTokenHandler{handlerData}; + } +}; + +class OntologySyntaxWhitespaceHandler : public StaticHandler { +public: + OntologySyntaxWhitespaceHandler(const HandlerData &handlerData); + + Variant whitespaceModeStr; + + bool startCommand(Variant::mapType &args) override; + bool data() override; + void end() override; + + static Handler *create(const HandlerData &handlerData) + { + return new OntologySyntaxWhitespaceHandler{handlerData}; + } +}; + +/* Internally used dummy node classes */ + +class ParserOntologyParentNode : public Node { +public: + using Node::Node; +}; + +class ParserSyntaxNode: public Node { +public: + using Node::Node; +}; + +class ParserSyntaxTokenNode: public Node { +public: + TokenDescriptor *descr; + + ParserSyntaxTokenNode(Manager &mgr, TokenDescriptor *descr); +}; + +class ParserSyntaxOpenNode: public ParserSyntaxTokenNode { +public: + using ParserSyntaxTokenNode::ParserSyntaxTokenNode; +}; + +class ParserSyntaxCloseNode: public ParserSyntaxTokenNode { +public: + using ParserSyntaxTokenNode::ParserSyntaxTokenNode; +}; + +class ParserSyntaxShortNode: public ParserSyntaxTokenNode { +public: + using ParserSyntaxTokenNode::ParserSyntaxTokenNode; +}; + namespace States { /** * State representing a "ontology" struct. @@ -247,11 +333,46 @@ extern const State OntologyStructParentField; * State representing a "fieldRef" tag within a "parent" tag. */ extern const State OntologyStructParentFieldRef; + +/** + * State representing a "syntax" tag within a structure, annotation or field. + */ +extern const State OntologySyntax; + +/** + * State representing a "open" tag within a "syntax" tag. + */ +extern const State OntologySyntaxOpen; + +/** + * State representing an "close" tag within a "syntax" tag. + */ +extern const State OntologySyntaxClose; + +/** + * State representing a "short" tag within a "syntax" tag. + */ +extern const State OntologySyntaxShort; + +/** + * State representing a "whitespace" tag within a "syntax" tag. + */ +extern const State OntologySyntaxWhitespace; + +/** + * State representing a token within a "start", "end" or "short" tag. + */ +extern const State OntologySyntaxToken; } } namespace RttiTypes { -extern const Rtti OntologyParent; +extern const Rtti ParserOntologyParentNode; +extern const Rtti ParserSyntaxNode; +extern const Rtti ParserSyntaxTokenNode; +extern const Rtti ParserSyntaxOpenNode; +extern const Rtti ParserSyntaxCloseNode; +extern const Rtti ParserSyntaxShortNode; } } #endif /* _OUSIA_ONTOLOGY_HANDLER_HPP_ */ diff --git a/test/core/model/OntologyTest.cpp b/test/core/model/OntologyTest.cpp index c6e0596..21893a1 100644 --- a/test/core/model/OntologyTest.cpp +++ b/test/core/model/OntologyTest.cpp @@ -530,8 +530,8 @@ TEST(Descriptor, getSyntaxDescriptor) Rooted<Ontology> ontology{new Ontology(mgr, sys, "ontology")}; Rooted<StructuredClass> A{new StructuredClass( mgr, "A", ontology, Cardinality::any(), {nullptr}, false, false)}; - A->setStartToken(TokenDescriptor(Tokens::Indent)); - A->setEndToken(TokenDescriptor(Tokens::Dedent)); + A->setOpenToken(TokenDescriptor(Tokens::Indent)); + A->setCloseToken(TokenDescriptor(Tokens::Dedent)); { TokenDescriptor sh{"<+>"}; sh.id = 1; @@ -539,8 +539,8 @@ TEST(Descriptor, getSyntaxDescriptor) } // check the SyntaxDescriptor SyntaxDescriptor stx = A->getSyntaxDescriptor(); - ASSERT_EQ(Tokens::Indent, stx.start); - ASSERT_EQ(Tokens::Dedent, stx.end); + ASSERT_EQ(Tokens::Indent, stx.open); + ASSERT_EQ(Tokens::Dedent, stx.close); ASSERT_EQ(1, stx.shortForm); ASSERT_EQ(A, stx.descriptor); ASSERT_TRUE(stx.isStruct()); @@ -559,8 +559,8 @@ TEST(Descriptor, getPermittedTokens) // add one StructuredClass with all tokens set. Rooted<StructuredClass> A{new StructuredClass( mgr, "A", ontology, Cardinality::any(), {nullptr}, false, false)}; - A->setStartToken(TokenDescriptor(Tokens::Indent)); - A->setEndToken(TokenDescriptor(Tokens::Dedent)); + A->setOpenToken(TokenDescriptor(Tokens::Indent)); + A->setCloseToken(TokenDescriptor(Tokens::Dedent)); { TokenDescriptor sh{"<+>"}; sh.id = 1; @@ -568,19 +568,19 @@ TEST(Descriptor, getPermittedTokens) } // add a field with one token set. Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first; - A_field->setEndToken(TokenDescriptor(Tokens::Newline)); + A_field->setCloseToken(TokenDescriptor(Tokens::Newline)); A_field->addChild(A); // add an annotation with start and end set. Rooted<AnnotationClass> A_anno = ontology->createAnnotationClass("A"); { TokenDescriptor start{"<"}; start.id = 7; - A_anno->setStartToken(start); + A_anno->setOpenToken(start); } { TokenDescriptor end{">"}; end.id = 8; - A_anno->setEndToken(end); + A_anno->setCloseToken(end); } // add a trivial annotation, which should not be returned. Rooted<AnnotationClass> B_anno = ontology->createAnnotationClass("B"); @@ -592,16 +592,16 @@ TEST(Descriptor, getPermittedTokens) // the field should be first, because A itself should not be collected // directly. ASSERT_EQ(A_field, stxs[0].descriptor); - ASSERT_EQ(Tokens::Empty, stxs[0].start); - ASSERT_EQ(Tokens::Newline, stxs[0].end); + ASSERT_EQ(Tokens::Empty, stxs[0].open); + ASSERT_EQ(Tokens::Newline, stxs[0].close); ASSERT_EQ(Tokens::Empty, stxs[0].shortForm); ASSERT_EQ(A, stxs[1].descriptor); - ASSERT_EQ(Tokens::Indent, stxs[1].start); - ASSERT_EQ(Tokens::Dedent, stxs[1].end); + ASSERT_EQ(Tokens::Indent, stxs[1].open); + ASSERT_EQ(Tokens::Dedent, stxs[1].close); ASSERT_EQ(1, stxs[1].shortForm); ASSERT_EQ(A_anno, stxs[2].descriptor); - ASSERT_EQ(7, stxs[2].start); - ASSERT_EQ(8, stxs[2].end); + ASSERT_EQ(7, stxs[2].open); + ASSERT_EQ(8, stxs[2].close); ASSERT_EQ(Tokens::Empty, stxs[2].shortForm); } @@ -720,11 +720,11 @@ TEST(Ontology, validate) ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); ASSERT_TRUE(ontology->validate(logger)); // add an invalid start token. - base_field->setStartToken(TokenDescriptor("< + >")); + base_field->setOpenToken(TokenDescriptor("< + >")); ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); ASSERT_FALSE(ontology->validate(logger)); // make it valid. - base_field->setStartToken(TokenDescriptor("<")); + base_field->setOpenToken(TokenDescriptor("<")); ASSERT_EQ(ValidationState::UNKNOWN, ontology->getValidationState()); ASSERT_TRUE(ontology->validate(logger)); // add a subclass for our base class. @@ -796,8 +796,8 @@ TEST(Ontology, getAllTokenDescriptors) // add one StructuredClass with all tokens set. Rooted<StructuredClass> A{new StructuredClass( mgr, "A", ontology, Cardinality::any(), {nullptr}, false, false)}; - A->setStartToken(TokenDescriptor(Tokens::Indent)); - A->setEndToken(TokenDescriptor(Tokens::Dedent)); + A->setOpenToken(TokenDescriptor(Tokens::Indent)); + A->setCloseToken(TokenDescriptor(Tokens::Dedent)); { TokenDescriptor sh{"<+>"}; sh.id = 1; @@ -805,19 +805,19 @@ TEST(Ontology, getAllTokenDescriptors) } // add a field with one token set. Rooted<FieldDescriptor> A_field = A->createFieldDescriptor(logger).first; - A_field->setEndToken(TokenDescriptor(Tokens::Newline)); + A_field->setCloseToken(TokenDescriptor(Tokens::Newline)); A_field->addChild(A); // add an annotation with start and end set. Rooted<AnnotationClass> A_anno = ontology->createAnnotationClass("A"); { TokenDescriptor start{"<"}; start.id = 7; - A_anno->setStartToken(start); + A_anno->setOpenToken(start); } { TokenDescriptor end{">"}; end.id = 8; - A_anno->setEndToken(end); + A_anno->setCloseToken(end); } // add a trivial annotation, which should not be returned. Rooted<AnnotationClass> B_anno = ontology->createAnnotationClass("B"); |