/* Ousía Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include "DocumentHandler.hpp" #include "State.hpp" namespace ousia { namespace parser_stack { /* DocumentHandler */ bool DocumentHandler::startCommand(Variant::mapType &args) { Rooted document = context().getProject()->createDocument(args["name"].asString()); document->setLocation(location()); scope().push(document); scope().setFlag(ParserFlag::POST_HEAD, false); scope().setFlag(ParserFlag::POST_USER_DEFINED_TOKEN_REGISTRATION, false); return true; } void DocumentHandler::end() { scope().pop(logger()); } /* DocumentField */ DocumentField::DocumentField(Manager &mgr, Handle parent, size_t fieldIdx, bool transparent, bool explicitField) : Node(mgr, parent), fieldIdx(fieldIdx), transparent(transparent), explicitField(explicitField) { } Rooted DocumentField::getDescriptor() { // Fetch the FieldDescriptor from the parent node. The parent node should // either be a structured entity or an annotation entity Rooted parent = getParent(); if (parent->isa(&RttiTypes::StructuredEntity)) { return parent.cast() ->getDescriptor() ->getFieldDescriptor(fieldIdx); } else if (parent->isa(&RttiTypes::AnnotationEntity)) { return parent.cast() ->getDescriptor() ->getFieldDescriptor(fieldIdx); } // Well, we never should get here // TODO: Introduce macro for unreachable code? assert(!"This should never be reached"); return nullptr; } /* DocumentChildHandler */ DocumentChildHandler::DocumentChildHandler(const HandlerData &handlerData) : Handler(handlerData), isExplicitField(false), isGreedy(true), inImplicitDefaultField(false) { // Register all user defined tokens if this has not yet been done if (!scope().getFlag(ParserFlag::POST_USER_DEFINED_TOKEN_REGISTRATION)) { registerUserDefinedTokens(); } } void DocumentChildHandler::registerUserDefinedTokens() { // Set the POST_USER_DEFINED_TOKEN_REGISTRATION flag, to prevent this method // from being called again scope().setFlag(ParserFlag::POST_USER_DEFINED_TOKEN_REGISTRATION, true); // Fetch the underlying document and all ontologies registered in the // document and register all user defined tokens in the parser Rooted doc = scope().selectOrThrow(); for (Rooted ontology : doc->getOntologies()) { std::vector tokens = ontology->getAllTokenDescriptors(); for (TokenDescriptor *token : tokens) { if (!token->special) { token->id = registerToken(token->token); } } } } void DocumentChildHandler::preamble(Rooted &parentNode, size_t &fieldIdx, DocumentEntity *&parent) { // Check if the parent in the structure tree was an explicit field // reference. if (parentNode->isa(&RttiTypes::DocumentField)) { fieldIdx = parentNode.cast()->fieldIdx; parentNode = scope().selectOrThrow( {&RttiTypes::StructuredEntity, &RttiTypes::AnnotationEntity}); } // Reference the parent entity explicitly. parent = nullptr; if (parentNode->isa(&RttiTypes::StructuredEntity)) { parent = static_cast( parentNode.cast().get()); } else if (parentNode->isa(&RttiTypes::AnnotationEntity)) { parent = static_cast( parentNode.cast().get()); } } void DocumentChildHandler::pushScopeTokens() { // List containing the unfiltered syntax descriptors std::vector descrs; // Skip the DocumentField and the curresponding StructuredEntity // if we're currently in the implicit default field of a non-greedy // structure. size_t explicitSkipCount = (!isGreedy && inImplicitDefaultField) ? 2 : 0; // Fetch the current scope stack and search the first non-transparent field // or structure const ManagedVector &stack = scope().getStack(); for (auto sit = stack.crbegin(); sit != stack.crend(); sit++) { Rooted nd = *sit; // TODO: Why can't this functionality be in a common base class? // Check whether the field is transparent, if not, fetch the tokens if (nd->isa(&RttiTypes::DocumentField)) { Rooted field = nd.cast(); if (!field->transparent) { if (explicitSkipCount > 0) { explicitSkipCount--; continue; } descrs = field->getDescriptor()->getPermittedTokens(); break; } } // Check whether the sturcture is transparent, if not, fetch the tokens if (nd->isa(&RttiTypes::StructuredEntity)) { Rooted entity = nd.cast(); if (!entity->isTransparent()) { if (explicitSkipCount > 0) { explicitSkipCount--; continue; } descrs = entity->getDescriptor()->getPermittedTokens(); break; } } } // Push the filtered tokens onto the stack pushTokens(descrs); } void DocumentChildHandler::pushDocumentField(Handle parent, Handle fieldDescr, size_t fieldIdx, bool transparent, bool explicitField) { // Push the field onto the scope Rooted field = new DocumentField(manager(), parent, fieldIdx, transparent, explicitField); field->setLocation(location()); scope().push(field); } void DocumentChildHandler::popDocumentField() { // Pop the field from the scope, make sure it actually is a DocumentField assert(scope().getLeaf()->isa(&RttiTypes::DocumentField)); scope().pop(logger()); } void DocumentChildHandler::createPath(const NodeVector &path, DocumentEntity *&parent, size_t p0) { size_t S = path.size(); for (size_t p = p0; p < S; p = p + 2) { // add the field. const ssize_t fieldIdx = parent->getDescriptor()->getFieldDescriptorIndex(); const Rooted fieldDescr = parent->getDescriptor()->getFieldDescriptor(fieldIdx); pushDocumentField(scope().getLeaf(), fieldDescr, fieldIdx, true, false); // add the transparent/implicit structure element. Rooted transparent = parent->createChildStructuredEntity(path[p].cast(), Variant::mapType{}, path[p - 1]->getName(), ""); transparent->setLocation(location()); transparent->setTransparent(true); scope().push(transparent); parent = static_cast(transparent.get()); } // add the field. const ssize_t fieldIdx = parent->getDescriptor()->getFieldDescriptorIndex(); const Rooted fieldDescr = parent->getDescriptor()->getFieldDescriptor(fieldIdx); pushDocumentField(scope().getLeaf(), fieldDescr, fieldIdx, true, false); // Generally allow explicit fields in the new field scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, false); } void DocumentChildHandler::createPath(const size_t &firstFieldIdx, const NodeVector &path, DocumentEntity *&parent) { // Add the first element Rooted transparent = parent->createChildStructuredEntity( path[0].cast(), firstFieldIdx); transparent->setLocation(location()); transparent->setTransparent(true); scope().push(transparent); parent = static_cast(transparent.get()); createPath(path, parent, 2); // Generally allow explicit fields in the new field scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, false); } void DocumentChildHandler::rollbackPath() { // Remove the topmost field popDocumentField(); // Pop all remaining transparent elements. while (scope().getLeaf()->isa(&RttiTypes::StructuredEntity) && scope().getLeaf().cast()->isTransparent()) { // Pop the transparent element. scope().pop(logger()); // Pop the transparent field. popDocumentField(); } } static std::string extractNameAttribute(Variant::mapType &args) { // Extract the special "name" attribute from the input arguments. // The remaining attributes will be forwarded to the newly constructed // element. std::string res; auto it = args.find("name"); if (it != args.end()) { res = it->second.asString(); args.erase(it); } return res; } bool DocumentChildHandler::startCommand(Variant::mapType &args) { std::string nameAttr = extractNameAttribute(args); scope().setFlag(ParserFlag::POST_HEAD, true); while (true) { Rooted parentNode = scope().getLeaf(); Rooted entity; // handle the root note specifically. if (parentNode->isa(&RttiTypes::Document)) { // if we already have a root node, stop. if (parentNode.cast()->getRoot() != nullptr) { logger().warning( "This document already has a root node. The additional " "node is ignored.", location()); return false; } Rooted strct = scope().resolve( Utils::split(name(), ':'), logger()); if (strct == nullptr) { // if we could not resolve the name, throw an exception. throw LoggableException( std::string("\"") + name() + "\" could not be resolved.", location()); } entity = parentNode.cast()->createRootStructuredEntity( strct, args, nameAttr); } else { assert(parentNode->isa(&RttiTypes::DocumentField)); size_t fieldIdx; DocumentEntity *parent; preamble(parentNode, fieldIdx, parent); /* * Try to find a FieldDescriptor for the given tag if we are not in * a field already. This does _not_ try to construct transparent * paths in between. */ { ssize_t newFieldIdx = parent->getDescriptor()->getFieldDescriptorIndex(name()); if (newFieldIdx != -1) { // Check whether explicit fields are allowed here, if not if (scope().getFlag(ParserFlag::POST_EXPLICIT_FIELDS)) { logger().note( std::string( "Data or structure commands have already been " "given, command \"") + name() + std::string( "\" is not interpreted as explicit " "field. Move explicit field " "references to the beginning."), location()); } else { pushDocumentField( parentNode, parent->getDescriptor()->getFieldDescriptor( newFieldIdx), newFieldIdx, false, true); pushScopeTokens(); isExplicitField = true; return true; } } } // Otherwise create a new StructuredEntity Rooted strct = scope().resolve( Utils::split(name(), ':'), logger()); if (strct == nullptr) { // if we could not resolve the name, throw an exception. throw LoggableException( std::string("\"") + name() + "\" could not be resolved.", location()); } // calculate a path if transparent entities are needed in between. Rooted field = parent->getDescriptor()->getFieldDescriptor(fieldIdx); size_t lastFieldIdx = fieldIdx; auto pathRes = field->pathTo(strct, logger()); if (!pathRes.second) { if (scope().getLeaf().cast()->transparent) { // if we have transparent elements above us in the structure // tree we try to unwind them before we give up. // pop the implicit field. popDocumentField(); // pop the implicit element. scope().pop(logger()); continue; } throw LoggableException( std::string("An instance of \"") + strct->getName() + "\" is not allowed as child of field \"" + field->getNameOrDefaultName() + "\" of descriptor \"" + parent->getDescriptor()->getName() + "\"", location()); } if (!pathRes.first.empty()) { createPath(lastFieldIdx, pathRes.first, parent); lastFieldIdx = parent->getDescriptor()->getFieldDescriptorIndex(); } // create the entity for the new element at last. entity = parent->createChildStructuredEntity(strct, lastFieldIdx, args, nameAttr); } // We're past the region in which explicit fields can be defined in the // parent structure element scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, true); // Push the entity onto the stack entity->setLocation(location()); scope().push(entity); pushScopeTokens(); return true; } } bool DocumentChildHandler::startAnnotation(Variant::mapType &args) { std::string nameAttr = extractNameAttribute(args); scope().setFlag(ParserFlag::POST_HEAD, true); size_t fieldIdx; DocumentEntity *parent; while (true) { Rooted parentNode = scope().getLeaf(); // Make sure the parent node is a DocumentField if (parentNode->isa(&RttiTypes::Document)) { logger().error( "Cannot start or end annotation at the document level.", location()); return false; } assert(parentNode->isa(&RttiTypes::DocumentField)); preamble(parentNode, fieldIdx, parent); if (!parent->getDescriptor() ->getFieldDescriptor(fieldIdx) ->isPrimitive()) { break; } // If we are inside a primitive field and have transparent elements on // the stack we unwind the stack until we are inside // a non-primitive field. if (scope().getLeaf().cast()->transparent) { // if we have transparent elements above us in the structure // tree we try to unwind them before we give up. // pop the implicit field. popDocumentField(); // pop the implicit element. scope().pop(logger()); continue; } else { logger().error( "Cannot start or end annotation within the primitive field \"" + parent->getDescriptor() ->getFieldDescriptor(fieldIdx) ->getNameOrDefaultName() + "\" of descriptor \"" + parent->getDescriptor()->getName() + "\".", location()); return false; } } // Create the anchor Rooted anchor = parent->createChildAnchor(fieldIdx); anchor->setLocation(location()); // Resolve the AnnotationClass Rooted annoClass; if (!name().empty()) { annoClass = scope().resolve(Utils::split(name(), ':'), logger()); } switch (type()) { case HandlerType::ANNOTATION_START: { // Create the AnnotationEntity itself. if (annoClass == nullptr) { // if we could not resolve the name, throw an exception. throw LoggableException( std::string("\"") + name() + "\" could not be resolved.", location()); } Rooted doc = scope().selectOrThrow(); Rooted anno = doc->createChildAnnotation( annoClass, anchor, nullptr, args, nameAttr); // Push the entity onto the stack anno->setLocation(location()); scope().push(anno); break; } case HandlerType::ANNOTATION_END: { // if we want to end an annotation, look for the matching start // Anchor ... Rooted start = parent->searchStartAnchor(fieldIdx, annoClass, nameAttr); if (start == nullptr) { logger().error( "Did not find matching annotation start for annotation " "end.", *anchor); parent->removeStructureNodeFromField(anchor, fieldIdx); return false; } // ... and set the end Anchor. start->getAnnotation()->setEnd(anchor); break; } default: throw OusiaException( "Internal Error: Invalid handler type in startAnnotation"); } // We're past the region in which explicit fields can be defined in the // parent structure element scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, true); return true; } bool DocumentChildHandler::startToken(Handle node, bool greedy) { // Copy the "greedy" flag. If not greedy, set the inImplicitDefaultField // flag to true, in order to push the tokens of the previous command. isGreedy = greedy; inImplicitDefaultField = !greedy; bool isStruct = node->isa(&RttiTypes::StructuredClass); // bool isField = node->isa(&RttiTypes::FieldDescriptor); // bool isAnnotation = node->isa(&RttiTypes::AnnotationClass); if (!isStruct) { // TODO: Implement return false; } Rooted strct = node.cast(); scope().setFlag(ParserFlag::POST_HEAD, true); while (true) { // Make sure the parent node is not the document Rooted parentNode = scope().getLeaf(); if (parentNode->isa(&RttiTypes::Document)) { logger().error( "Tokens are not allowed on the root document level."); return false; } assert(parentNode->isa(&RttiTypes::DocumentField)); // TODO: Move this to more generic method // Fetch the parent document entity and the parent field index size_t fieldIdx; DocumentEntity *parent; preamble(parentNode, fieldIdx, parent); // Calculate a path if transparent entities are needed in between. Rooted field = parent->getDescriptor()->getFieldDescriptor(fieldIdx); size_t lastFieldIdx = fieldIdx; auto pathRes = field->pathTo(strct, logger()); if (!pathRes.second) { // If we have transparent elements above us in the structure tree, // try to unwind them before we give up. if (scope().getLeaf().cast()->transparent) { // Pop the implicit field. popDocumentField(); // Pop the implicit element. scope().pop(logger()); continue; } throw LoggableException( std::string("An instance of \"") + strct->getName() + "\" is not allowed as child of field \"" + field->getNameOrDefaultName() + "\" of descriptor \"" + parent->getDescriptor()->getName() + "\"", location()); } // Create the path (if one is available) if (!pathRes.first.empty()) { createPath(lastFieldIdx, pathRes.first, parent); lastFieldIdx = parent->getDescriptor()->getFieldDescriptorIndex(); } // Create the entity for the new element at last. Rooted entity = parent->createChildStructuredEntity( strct, lastFieldIdx, Variant::mapType{}, ""); // We're past the region in which explicit fields can be defined in the // parent structure element scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, true); // Push the entity onto the stack entity->setLocation(location()); scope().push(entity); pushScopeTokens(); return true; } } EndTokenResult DocumentChildHandler::endToken(Handle node, size_t maxStackDepth) { // Fetch the current scope stack const ManagedVector &stack = scope().getStack(); bool found = false; // true once the given node has been found bool repeat = false; size_t scopeStackDepth = 0; // # of elems on the scope stack size_t currentStackDepth = 0; // # of "explicit" elems on the parser stack // Iterate over the elements in the scope stack for (auto sit = stack.crbegin(); sit != stack.crend(); sit++, scopeStackDepth++) { Rooted leaf = *sit; bool isExplicit = false; if (leaf->isa(&RttiTypes::DocumentField)) { Rooted field = leaf.cast(); if (field->getDescriptor() == node) { // If the field is transparent, end it by incrementing the depth // counter -- both the field itself and the consecutive element // need to be removed found = true; if (field->transparent) { repeat = true; scopeStackDepth++; } } isExplicit = field->explicitField; } else if (leaf->isa(&RttiTypes::StructuredEntity)) { Rooted entity = leaf.cast(); found = entity->getDescriptor() == node; repeat = found && entity->isTransparent(); isExplicit = !entity->isTransparent(); } // TODO: End annotations! // If the given structure is a explicit sturcture (represents a handler) // increment the stack depth and abort once the maximum stack depth has // been surpassed. if (isExplicit) { currentStackDepth++; } if (found || currentStackDepth > maxStackDepth) { break; } } // Abort with a value smaller than zero if the element has not been found if (!found || currentStackDepth > maxStackDepth) { return EndTokenResult(); } // If the element has been found, return the number of handlers that have to // be popped from the parser stack if (currentStackDepth > 0) { return EndTokenResult(currentStackDepth, true, repeat); } // End all elements that were marked for being closed for (size_t i = 0; i < scopeStackDepth + 1; i++) { scope().pop(logger()); } return EndTokenResult(0, true, false); } void DocumentChildHandler::end() { // Distinguish the handler type switch (type()) { case HandlerType::COMMAND: case HandlerType::ANNOTATION_START: case HandlerType::TOKEN: if (!isExplicitField) { // pop the "main" element. scope().pop(logger()); } else { // in case of explicit fields, roll back. rollbackPath(); } break; case HandlerType::ANNOTATION_END: // We have nothing to pop from the stack break; } } bool DocumentChildHandler::fieldStart(bool &isDefault, bool isImplicit, size_t fieldIdx) { if (isExplicitField) { // In case of explicit fields we do not want to create another field. isDefault = true; return fieldIdx == 0; } inImplicitDefaultField = isImplicit; Rooted parentNode = scope().getLeaf(); assert(parentNode->isa(&RttiTypes::StructuredEntity) || parentNode->isa(&RttiTypes::AnnotationEntity)); size_t dummy; DocumentEntity *parent; preamble(parentNode, dummy, parent); ManagedVector fields = parent->getDescriptor()->getFieldDescriptors(); if (isDefault) { if (fields.empty()) { return false; } fieldIdx = fields.size() - 1; } else { if (fieldIdx >= fields.size()) { return false; } isDefault = fieldIdx == fields.size() - 1; } // push the field on the stack. pushDocumentField(parentNode, fields[fieldIdx], fieldIdx, false, false); pushScopeTokens(); // Generally allow explicit fields in the new field scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, false); return true; } void DocumentChildHandler::fieldEnd() { if (!isExplicitField) { popTokens(); rollbackPath(); } inImplicitDefaultField = false; } bool DocumentChildHandler::convertData(Handle field, Variant &data, Logger &logger) { bool valid = true; Rooted type = field->getPrimitiveType(); // If the content is supposed to be of type string, we only need to check // for "magic" values -- otherwise just call the "parseGenericString" // function on the string data if (type->isa(&RttiTypes::StringType)) { const std::string &str = data.asString(); // TODO: Referencing constants with "." separator should also work if (Utils::isIdentifier(str)) { data.markAsMagic(); } } else { // Parse the string as generic string, assign the result auto res = VariantReader::parseGenericString( data.asString(), logger, data.getLocation().getSourceId(), data.getLocation().getStart()); data = res.second; } // Now try to resolve the value for the primitive type return valid && scope().resolveValue(data, type, logger); } static void createPrimitive(ParserScope &scope, Logger &logger, Handle field, DocumentEntity *parent, const Variant &data, ssize_t fieldIdx = -1) { // Create the primitive field Rooted primitive; if (fieldIdx < 0) { primitive = parent->createChildDocumentPrimitive(data); } else { primitive = parent->createChildDocumentPrimitive(data, fieldIdx); } // Fetch the primitive type, check whether it is a ReferenceType -- if yes, // resolve the referenced object and set it once the resolution has finished Rooted type = field->getPrimitiveType(); if (type->isa(&RttiTypes::ReferenceType)) { if (data.isString()) { scope.resolve( &RttiTypes::DocumentNode, Utils::split(data.asString(), ':'), primitive, logger, [](Handle resolved, Handle owner, Logger &logger) { owner.cast()->getContent().setObject( resolved, owner.get()); }); } } } bool DocumentChildHandler::data() { // We're past the region in which explicit fields can be defined in the // parent structure element scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, true); Rooted parentField = scope().getLeaf(); assert(parentField->isa(&RttiTypes::DocumentField)); size_t fieldIdx; DocumentEntity *parent; preamble(parentField, fieldIdx, parent); Rooted desc = parent->getDescriptor(); // Retrieve the actual FieldDescriptor Rooted field = desc->getFieldDescriptor(fieldIdx); // If it is a primitive field directly, try to parse the content. if (field->isPrimitive()) { // Add it as primitive content. Variant text = readData(); // TODO: Eliminate readData method if (!convertData(field, text, logger())) { return false; } createPrimitive(scope(), logger(), field, parent, text, fieldIdx); return true; } // Search through all permitted default fields of the parent class that // allow primitive content at this point and could be constructed via // transparent intermediate entities. ManagedVector defaultFields = field->getDefaultFields(); // Try to parse the data using the type specified by the respective field. // If that does not work we proceed to the next possible field. std::vector forks; for (auto primitiveField : defaultFields) { // Then try to parse the content using the type specification. forks.emplace_back(logger().fork()); // Try to parse the data Variant text = readData(); // TODO: Eliminate readData method if (!convertData(primitiveField, text, forks.back())) { continue; } // The conversion worked, commit any possible warnings forks.back().commit(); // Construct the necessary path NodeVector path = field->pathTo(primitiveField, logger()); createPath(fieldIdx, path, parent); // Then create the primitive element createPrimitive(scope(), logger(), primitiveField, parent, text); return true; } // No field was found that might take the data -- dump the error messages // from the loggers -- or, if there were no primitive fields, clearly state // this fact Variant text = readData(); if (defaultFields.empty()) { logger().error("Got data, but field \"" + field->getNameOrDefaultName() + "\" of structure \"" + name() + "\" does not have any primitive field", text); } else { logger().error("Could not read data with any of the possible fields:", text); size_t f = 0; for (auto field : defaultFields) { logger().note(std::string("Field ") + Utils::join(field->path(), ".") + std::string(":"), SourceLocation{}, MessageMode::NO_CONTEXT); forks[f].commit(); f++; } } return false; } namespace States { const State Document = StateBuilder() .parent(&None) .createdNodeType(&RttiTypes::Document) .elementHandler(DocumentHandler::create) .arguments({Argument::String("name", "")}); const State DocumentChild = StateBuilder() .parents({&Document, &DocumentChild}) .createdNodeTypes({&RttiTypes::StructureNode, &RttiTypes::AnnotationEntity, &RttiTypes::DocumentField}) .elementHandler(DocumentChildHandler::create) .supportsAnnotations(true) .supportsTokens(true); } } namespace RttiTypes { const Rtti DocumentField = RttiBuilder( "DocumentField").parent(&Node); } }