diff options
| -rw-r--r-- | CMakeLists.txt | 6 | ||||
| -rw-r--r-- | data/ontology/lists.osxml | 8 | ||||
| -rw-r--r-- | data/ontology/meta.osxml | 1 | ||||
| -rw-r--r-- | src/cli/Main.cpp | 2 | ||||
| -rw-r--r-- | src/core/model/Document.cpp | 30 | ||||
| -rw-r--r-- | src/core/parser/stack/Stack.cpp | 4 | ||||
| -rw-r--r-- | src/plugins/html/DemoOutput.cpp | 455 | ||||
| -rw-r--r-- | src/plugins/html/DemoOutput.hpp | 25 | ||||
| -rw-r--r-- | test/core/model/DocumentTest.cpp | 8 | ||||
| -rw-r--r-- | test/core/model/TestAdvanced.hpp | 25 | ||||
| -rw-r--r-- | test/plugins/html/DemoOutputTest.cpp | 52 | ||||
| -rw-r--r-- | testdata/osxmlparser/affiliation_typesystem.osxml | 10 | ||||
| -rw-r--r-- | testdata/osxmlparser/bibliography_ontology.osxml | 42 | ||||
| -rw-r--r-- | testdata/osxmlparser/complex_book.osxml | 36 | ||||
| -rw-r--r-- | testdata/osxmlparser/email_typesystem.osxml | 8 | ||||
| -rw-r--r-- | testdata/osxmlparser/emphasis_ontology.osxml | 5 | ||||
| -rw-r--r-- | testdata/osxmlparser/lists_ontology.osxml | 24 | ||||
| -rw-r--r-- | testdata/osxmlparser/meta_ontology.osxml | 49 | ||||
| -rw-r--r-- | testdata/osxmlparser/simple_annotation.osxml | 1 | ||||
| -rw-r--r-- | testdata/osxmlparser/simple_book.osxml | 2 | ||||
| -rw-r--r-- | testdata/osxmlparser/version_typesystem.osxml | 8 | 
21 files changed, 533 insertions, 268 deletions
| diff --git a/CMakeLists.txt b/CMakeLists.txt index b7df07c..c0bb15e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -374,7 +374,9 @@ IF(TEST)  	TARGET_LINK_LIBRARIES(ousia_test_html  		${GTEST_LIBRARIES}  		ousia_core +		ousia_filesystem  		ousia_html +		ousia_osxml  	)  #	ADD_EXECUTABLE(ousia_test_mozjs @@ -395,8 +397,8 @@ IF(TEST)  	TARGET_LINK_LIBRARIES(ousia_test_osml  		${GTEST_LIBRARIES}  		ousia_core -		ousia_osml  		ousia_filesystem +		ousia_osml  	)  	ADD_EXECUTABLE(ousia_test_osxml @@ -407,8 +409,8 @@ IF(TEST)  	TARGET_LINK_LIBRARIES(ousia_test_osxml  		${GTEST_LIBRARIES}  		ousia_core -		ousia_osxml  		ousia_filesystem +		ousia_osxml  	)  	ADD_EXECUTABLE(ousia_test_xml diff --git a/data/ontology/lists.osxml b/data/ontology/lists.osxml index a177cd4..2cc5e54 100644 --- a/data/ontology/lists.osxml +++ b/data/ontology/lists.osxml @@ -7,7 +7,7 @@  			 mechanism, because a list may occur whereever a paragraph  			 may occur. However we do want to override the default field. -->  		<field> -			<childRef name="item"/> +			<childRef ref="item"/>  		</field>  	</struct>  	<struct name="ol" isa="book.paragraph"> @@ -15,12 +15,10 @@  		     mechanism, because a list may occur whereever a paragraph  		     may occur. However we do want to override the default field. -->  		<field> -			<childRef name="item"/> +			<childRef ref="item"/>  		</field>  	</struct>  	<struct name="item"> -		<field> -			<childRef name="book.paragaph"/> -		</field> +		<fieldRef ref="book.paragraph.$default"/>  	</struct>  </ontology> diff --git a/data/ontology/meta.osxml b/data/ontology/meta.osxml index c03541a..4b1e422 100644 --- a/data/ontology/meta.osxml +++ b/data/ontology/meta.osxml @@ -5,7 +5,6 @@  	<import rel="typesystem" src="email"/>  	<import rel="typesystem" src="version"/>  	<import rel="ontology" src="book"/> -	<import rel="ontology" src="headings"/>  	<struct name="meta" cardinality="{1}" transparent="true">  		<field> diff --git a/src/cli/Main.cpp b/src/cli/Main.cpp index 2fe0585..e7fa614 100644 --- a/src/cli/Main.cpp +++ b/src/cli/Main.cpp @@ -86,7 +86,7 @@ static void createOutput(Handle<Document> doc, std::ostream &out,  {  	if (format == "html") {  		html::DemoHTMLTransformer transform; -		transform.writeHTML(doc, out, true); +		transform.writeHTML(doc, out, logger, true);  	} else if (format == "xml") {  		xml::XmlTransformer transform;  		transform.writeXml(doc, out, logger, resMgr, true); diff --git a/src/core/model/Document.cpp b/src/core/model/Document.cpp index 0bf50e5..894330b 100644 --- a/src/core/model/Document.cpp +++ b/src/core/model/Document.cpp @@ -158,14 +158,15 @@ bool DocumentEntity::doValidate(Logger &logger) const  				const size_t min =  				    childClass->getCardinality().asCardinality().min();  				if (min > 0) { -					logger.error( -					    std::string("Field \"") + fieldDescs[f]->getName() + -					        "\" was empty but needs at least " + -					        std::to_string(min) + " elements of class \"" + -					        childClass->getName() + -					        "\" according to the definition of \"" + -					        descriptor->getName() + "\"", -					    *subInst); +					logger.error(std::string("Field \"") + +					                 fieldDescs[f]->getNameOrDefaultName() + +					                 "\" was empty but needs at least " + +					                 std::to_string(min) + +					                 " elements of class \"" + +					                 childClass->getName() + +					                 "\" according to the definition of \"" + +					                 descriptor->getName() + "\"", +					             *subInst);  					valid = false;  				}  			} @@ -191,7 +192,7 @@ bool DocumentEntity::doValidate(Logger &logger) const  			}  			if (child->isa(&RttiTypes::DocumentPrimitive)) {  				logger.error(std::string("Non-primitive Field \"") + -				                 fieldDescs[f]->getName() + +				                 fieldDescs[f]->getNameOrDefaultName() +  				                 "\" had primitive content!",  				             *child);  				valid = false; @@ -238,8 +239,9 @@ bool DocumentEntity::doValidate(Logger &logger) const  			}  			if (!childClass->getCardinality().asCardinality().contains(num)) {  				logger.error(std::string("Field \"") + -				                 fieldDescs[f]->getName() + "\" had " + -				                 std::to_string(num) + " elements of class \"" + +				                 fieldDescs[f]->getNameOrDefaultName() + +				                 "\" had " + std::to_string(num) + +				                 " elements of class \"" +  				                 childClass->getName() +  				                 "\", which is invalid according to the "  				                 "definition of \"" + @@ -327,9 +329,9 @@ void DocumentEntity::addStructureNode(Handle<StructureNode> s, size_t i)  		if (par != nullptr) {  			if (par->isa(&RttiTypes::StructuredEntity)) {  				par.cast<StructuredEntity>()->removeStructureNode(s); -			} else if(par->isa(&RttiTypes::AnnotationEntity)){ +			} else if (par->isa(&RttiTypes::AnnotationEntity)) {  				par.cast<AnnotationEntity>()->removeStructureNode(s); -			} else if(par->isa(&RttiTypes::Document)){ +			} else if (par->isa(&RttiTypes::Document)) {  				par.cast<Document>()->setRoot(nullptr);  			}  		} @@ -997,4 +999,4 @@ const Rtti AnnotationEntity =          .parent(&Node)          .composedOf({&StructuredEntity, &DocumentPrimitive, &Anchor});  } -} +}
\ No newline at end of file diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp index 23c857a..bd16b43 100644 --- a/src/core/parser/stack/Stack.cpp +++ b/src/core/parser/stack/Stack.cpp @@ -802,8 +802,8 @@ void StackImpl::handleFieldEnd(bool endRange)  		if (info.range && endRange) {  			if (!info.hadDefaultField) {  				bool isDefault = true; -				info.handler->fieldStart(isDefault, true); -				info.fieldStart(true, true, true); +				bool valid = info.handler->fieldStart(isDefault, true); +				info.fieldStart(true, true, valid);  			}  			endCurrentHandler();  			return; diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp index 3c54763..8cf4e13 100644 --- a/src/plugins/html/DemoOutput.cpp +++ b/src/plugins/html/DemoOutput.cpp @@ -27,46 +27,212 @@  namespace ousia {  namespace html { -void DemoHTMLTransformer::writeHTML(Handle<Document> doc, std::ostream &out, -                                    bool pretty) +typedef std::stack<Rooted<AnnotationEntity>> AnnoStack; + +static bool canHandleAnchor(Handle<Anchor> a)  { -	Manager &mgr = doc->getManager(); -	// Create an XML object tree for the document first. -	Rooted<xml::Element> html{new xml::Element{mgr, {nullptr}, "html"}}; -	// add the head Element -	Rooted<xml::Element> head{new xml::Element{mgr, html, "head"}}; -	html->addChild(head); -	// add the meta element. -	Rooted<xml::Element> meta{ -	    new xml::Element{mgr, -	                     head, -	                     "meta", -	                     {{"http-equiv", "Content-Type"}, -	                      {"content", "text/html; charset=utf-8"}}}}; -	head->addChild(meta); -	// add the title Element with Text -	Rooted<xml::Element> title{new xml::Element{mgr, head, "title"}}; -	head->addChild(title); -	title->addChild( -	    new xml::Text(mgr, title, "Test HTML Output for " + doc->getName())); -	// add the body Element -	Rooted<xml::Element> body{new xml::Element{mgr, html, "body"}}; -	html->addChild(body); +	std::string annoClassName = a->getAnnotation()->getDescriptor()->getName(); +	return annoClassName == "emphasized" || annoClassName == "strong"; +} -	// So far was the "preamble". No we have to get to the document content. +static Rooted<xml::Element> openAnnotation(Manager &mgr, AnnoStack &opened, +                                           Handle<AnnotationEntity> entity, +                                           Handle<xml::Element> current, +                                           bool stackOnly) +{ +	// we push the newly opened entity on top of the stack. +	opened.push(entity); +	if (stackOnly) { +		return nullptr; +	} +	// get the elment name +	std::string elemName = entity->getDescriptor()->getName(); +	// emphasized has to be shortened +	if (elemName == "emphasized") { +		elemName = "em"; +	} +	// create the new XML element representing the annotation +	Rooted<xml::Element> tmp{new xml::Element{mgr, current, elemName}}; +	current->addChild(tmp); +	// and return it. +	return tmp; +} -	// extract the book root node. -	Rooted<StructuredEntity> root = doc->getRoot(); -	if (root->getDescriptor()->getName() != "book") { -		throw OusiaException("The given documents root is no book node!"); +static Rooted<xml::Element> transformAnchor(Manager &mgr, Handle<Anchor> a, +                                            Handle<xml::Element> current, +                                            Logger &logger, AnnoStack &opened, +                                            bool stackOnly) +{ +	// check if this is a start Anchor. +	if (a->isStart()) { +		// if we have a start anchor, we open an annotation element. +		current = +		    openAnnotation(mgr, opened, a->getAnnotation(), current, stackOnly); +		// check if this is an end Anchor. +	} else if (a->isEnd()) { +		/* +		 * Now it gets somewhat interesting: We have to close all +		 * tags that started after the one that is closed now and +		 * re-open them afterwards. So we create a lokal stack to +		 * temporarily store all AnnotationEntities that need to +		 * be re-opened. +		 */ +		AnnoStack tmp; +		if (opened.empty()) { +			// if we have no opened entities left, that is a +			// malformed document. +			logger.error("An unopened entity was closed!", *a); +			return current; +		} +		Rooted<AnnotationEntity> closed = opened.top(); +		current = current->getParent(); +		opened.pop(); +		while (closed != a->getAnnotation()) { +			/* +			 * We implicitly close tags by climbing up the XML tree +			 * until we are at the right element. +			 */ +			current = current->getParent(); +			tmp.push(closed); +			if (opened.empty()) { +				// if we have no opened entities left, that is a +				// malformed document. +				logger.error("An unopened entity was closed!", *a); +				return current; +			} +			closed = opened.top(); +			opened.pop(); +		} +		// At this point we have closed all necessary entities. Now we +		// need to re-open some of them. +		while (!tmp.empty()) { +			closed = tmp.top(); +			tmp.pop(); +			current = openAnnotation(mgr, opened, closed, current, stackOnly); +		}  	} -	// transform the book node. -	Rooted<xml::Element> book = transformSection(body, root); -	// add it as child to the body node. -	body->addChild(book); +	// otherwise it is a disconnected Anchor and we can ignore it. +	return current; +} -	// After the content has been transformed, we serialize it. -	html->serialize(out, "<!DOCTYPE html>", pretty); +/** + * Reopens all Annotations in the given AnnoStack but does not manipulate the + * original stack. The input argument is a copy. + * @return the innermost opened element. + */ +static Rooted<xml::Element> reOpenAnnotations(Manager &mgr, AnnoStack opened, +                                              Handle<xml::Element> parent) +{ +	AnnoStack tmp; +	while (!opened.empty()) { +		tmp.push(opened.top()); +		opened.pop(); +	} +	Rooted<xml::Element> current = parent; +	while (!tmp.empty()) { +		Rooted<AnnotationEntity> closed = tmp.top(); +		tmp.pop(); +		current = openAnnotation(mgr, opened, closed, current, false); +	} +	return current; +} + +static Rooted<xml::Element> transformParagraph(Manager &mgr, +                                               Handle<xml::Element> parent, +                                               Handle<StructuredEntity> par, +                                               Logger &logger, +                                               AnnoStack &opened) +{ +	// create the p Element +	Rooted<xml::Element> p{new xml::Element{mgr, parent, "p"}}; + +	// check if we have a heading. +	if (par->getDescriptor()->hasField("heading") && +	    par->getField("heading").size() > 0) { +		Handle<StructuredEntity> heading = +		    par->getField("heading")[0].cast<StructuredEntity>(); +		// put the heading in a strong xml::Element. +		Rooted<xml::Element> strong{new xml::Element{mgr, p, "strong"}}; +		p->addChild(strong); +		// extract the heading text, enveloped in a paragraph Element. +		// in this case we use an empy annotation stack because annotations do +		// not extend on subtree fields. +		AnnoStack emptyStack; +		Rooted<xml::Element> h_content = +		    transformParagraph(mgr, strong, heading, logger, emptyStack); +		// We omit the paragraph Element and add the children directly to the +		// heading Element +		for (auto &n : h_content->getChildren()) { +			strong->addChild(n); +		} +	} +	// reopen all annotations. +	Rooted<xml::Element> current = reOpenAnnotations(mgr, opened, p); +	// transform paragraph children to XML as well +	for (auto &n : par->getField()) { +		if (n->isa(&RttiTypes::Anchor)) { +			Rooted<Anchor> a = n.cast<Anchor>(); +			if (canHandleAnchor(a)) { +				current = +				    transformAnchor(mgr, a, current, logger, opened, false); +			} +			continue; +		} +		// if this is not an anchor, we can only handle text. +		if (!n->isa(&RttiTypes::StructuredEntity)) { +			continue; +		} +		Handle<StructuredEntity> t = n.cast<StructuredEntity>(); + +		std::string childDescriptorName = t->getDescriptor()->getName(); +		if (childDescriptorName == "text") { +			Handle<DocumentPrimitive> primitive = +			    t->getField()[0].cast<DocumentPrimitive>(); +			std::string text_content = primitive->getContent().asString(); +			current->addChild(new xml::Text(mgr, current, text_content)); +		} +	} +	// at this point we implicitly close all annotations that are left opened. +	// they will be reopened in the next paragraph. +	return p; +} + +static Rooted<xml::Element> transformList(Manager &mgr, +                                          Handle<xml::Element> parent, +                                          Handle<StructuredEntity> list, +                                          Logger &logger, AnnoStack &opened) +{ +	// create the list Element, which is either ul or ol (depends on descriptor) +	std::string listclass = list->getDescriptor()->getName(); +	Rooted<xml::Element> l{new xml::Element{mgr, parent, listclass}}; +	// iterate through list items. +	for (auto &it : list->getField()) { +		if (it->isa(&RttiTypes::Anchor)) { +			Rooted<Anchor> a = it.cast<Anchor>(); +			if (canHandleAnchor(a)) { +				// just put the entity on the AnnoStack, but do not open it +				// explicitly. That will be done inside the next paragraph. +				transformAnchor(mgr, a, l, logger, opened, true); +			} +			continue; +		} +		Handle<StructuredEntity> item = it.cast<StructuredEntity>(); +		std::string itDescrName = item->getDescriptor()->getName(); +		if (itDescrName == "item") { +			// create the list item. +			Rooted<xml::Element> li{new xml::Element{mgr, l, "li"}}; +			l->addChild(li); +			// extract the item text, enveloped in a paragraph Element. +			Rooted<xml::Element> li_content = +			    transformParagraph(mgr, li, item, logger, opened); +			// We omit the paragraph Element and add the children directly to +			// the list item +			for (auto &n : li_content->getChildren()) { +				li->addChild(n); +			} +		} +	} +	return l;  }  /** @@ -74,7 +240,7 @@ void DemoHTMLTransformer::writeHTML(Handle<Document> doc, std::ostream &out,   */  enum class SectionType { BOOK, CHAPTER, SECTION, SUBSECTION, NONE }; -SectionType getSectionType(const std::string &name) +static SectionType getSectionType(const std::string &name)  {  	if (name == "book") {  		return SectionType::BOOK; @@ -89,10 +255,11 @@ SectionType getSectionType(const std::string &name)  	}  } -Rooted<xml::Element> DemoHTMLTransformer::transformSection( -    Handle<xml::Element> parent, Handle<StructuredEntity> section) +static Rooted<xml::Element> transformSection(Manager &mgr, +                                             Handle<xml::Element> parent, +                                             Handle<StructuredEntity> section, +                                             Logger &logger, AnnoStack &opened)  { -	Manager &mgr = section->getManager();  	// check the section type.  	const std::string secclass = section->getDescriptor()->getName();  	SectionType type = getSectionType(secclass); @@ -128,9 +295,13 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(  		}  		Rooted<xml::Element> h{new xml::Element{mgr, sec, headingclass}};  		sec->addChild(h); -		// extract the heading text, enveloped in a paragraph Element. -		Rooted<xml::Element> h_content = transformParagraph(h, heading); -		// We omit the paragraph Element and add the children directly to the +		// extract the heading text, wrapped in a paragraph Element. +		// in this case we use an empy annotation stack because annotations do +		// not extend on subtree fields. +		AnnoStack emptyStack; +		Rooted<xml::Element> h_content = +		    transformParagraph(mgr, h, heading, logger, emptyStack); +		// We omit the paragraph element and add the children directly to the  		// heading Element  		for (auto &n : h_content->getChildren()) {  			h->addChild(n); @@ -139,6 +310,15 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(  	// Then we get all the children.  	for (auto &n : section->getField()) { +		if (n->isa(&RttiTypes::Anchor)) { +			Rooted<Anchor> a = n.cast<Anchor>(); +			if (canHandleAnchor(a)) { +				// just put the entity on the AnnoStack, but do not open it +				// explicitly. That will be done inside the next paragraph. +				transformAnchor(mgr, a, sec, logger, opened, true); +			} +			continue; +		}  		if (!n->isa(&RttiTypes::StructuredEntity)) {  			continue;  		} @@ -153,11 +333,11 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(  		const std::string childDescriptorName = s->getDescriptor()->getName();  		Rooted<xml::Element> child;  		if (childDescriptorName == "paragraph") { -			child = transformParagraph(sec, s); +			child = transformParagraph(mgr, sec, s, logger, opened);  		} else if (childDescriptorName == "ul" || childDescriptorName == "ol") { -			child = transformList(sec, s); +			child = transformList(mgr, sec, s, logger, opened);  		} else { -			child = transformSection(sec, s); +			child = transformSection(mgr, sec, s, logger, opened);  		}  		if (!child.isNull()) {  			sec->addChild(child); @@ -166,155 +346,54 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(  	return sec;  } -Rooted<xml::Element> DemoHTMLTransformer::transformList( -    Handle<xml::Element> parent, Handle<StructuredEntity> list) +void DemoHTMLTransformer::writeHTML(Handle<Document> doc, std::ostream &out, +                                    Logger &logger, bool pretty)  { -	Manager &mgr = list->getManager(); -	// create the list Element, which is either ul or ol (depends on descriptor) -	std::string listclass = list->getDescriptor()->getName(); -	Rooted<xml::Element> l{new xml::Element{mgr, parent, listclass}}; -	// iterate through list items. -	for (auto &it : list->getField()) { -		Handle<StructuredEntity> item = it.cast<StructuredEntity>(); -		std::string itDescrName = item->getDescriptor()->getName(); -		if (itDescrName == "item") { -			// create the list item. -			Rooted<xml::Element> li{new xml::Element{mgr, l, "li"}}; -			l->addChild(li); -			// extract the item text, enveloped in a paragraph Element. -			Rooted<xml::Element> li_content = transformParagraph(li, item); -			// We omit the paragraph Element and add the children directly to -			// the list item -			for (auto &n : li_content->getChildren()) { -				li->addChild(n); -			} -		} +	// validate the document. +	if (!doc->validate(logger)) { +		return;  	} -	return l; -} - -typedef std::stack<Rooted<AnnotationEntity>> AnnoStack; -static Rooted<xml::Element> openAnnotation(Manager &mgr, AnnoStack &opened, -                                           Handle<AnnotationEntity> entity, -                                           Handle<xml::Element> current) -{ -	// we push the newly opened entity on top of the stack. -	opened.push(entity); -	// get the elment name -	std::string elemName = entity->getDescriptor()->getName(); -	// emphasized has to be shortened -	if (elemName == "emphasized") { -		elemName = "em"; -	} -	// create the new XML element representing the annotation -	Rooted<xml::Element> tmp{new xml::Element{mgr, current, elemName}}; -	current->addChild(tmp); -	// and return it. -	return tmp; -} +	Manager &mgr = doc->getManager(); +	// initialize an empty annotation Stack. +	AnnoStack opened; +	// Create an XML object tree for the document first. +	Rooted<xml::Element> html{new xml::Element{mgr, {nullptr}, "html"}}; +	// add the head Element +	Rooted<xml::Element> head{new xml::Element{mgr, html, "head"}}; +	html->addChild(head); +	// add the meta element. +	Rooted<xml::Element> meta{ +	    new xml::Element{mgr, +	                     head, +	                     "meta", +	                     {{"http-equiv", "Content-Type"}, +	                      {"content", "text/html; charset=utf-8"}}}}; +	head->addChild(meta); +	// add the title Element with Text +	Rooted<xml::Element> title{new xml::Element{mgr, head, "title"}}; +	head->addChild(title); +	title->addChild( +	    new xml::Text(mgr, title, "Test HTML Output for " + doc->getName())); +	// add the body Element +	Rooted<xml::Element> body{new xml::Element{mgr, html, "body"}}; +	html->addChild(body); -Rooted<xml::Element> DemoHTMLTransformer::transformParagraph( -    Handle<xml::Element> parent, Handle<StructuredEntity> par) -{ -	Manager &mgr = par->getManager(); -	// create the p Element -	Rooted<xml::Element> p{new xml::Element{mgr, parent, "p"}}; +	// So far was the "preamble". No we have to get to the document content. -	// check if we have a heading. -	if (par->getDescriptor()->hasField("heading") && -	    par->getField("heading").size() > 0) { -		Handle<StructuredEntity> heading = -		    par->getField("heading")[0].cast<StructuredEntity>(); -		// put the heading in a strong xml::Element. -		Rooted<xml::Element> strong{new xml::Element{mgr, p, "strong"}}; -		p->addChild(strong); -		// extract the heading text, enveloped in a paragraph Element. -		Rooted<xml::Element> h_content = transformParagraph(strong, heading); -		// We omit the paragraph Element and add the children directly to the -		// heading Element -		for (auto &n : h_content->getChildren()) { -			strong->addChild(n); -		} +	// extract the book root node. +	Rooted<StructuredEntity> root = doc->getRoot(); +	if (root->getDescriptor()->getName() != "book") { +		throw OusiaException("The given documents root is no book node!");  	} +	// transform the book node. +	Rooted<xml::Element> book = +	    transformSection(mgr, body, root, logger, opened); +	// add it as child to the body node. +	body->addChild(book); -	// transform paragraph children to XML as well -	/* -	 * We need a stack of AnnotationEntities that are currently open. -	 * In principle we wouldn't, because the nested structure of XML elements -	 * provides a stack-like structure anyways, but we need to have a mapping of -	 * XML tags to AnnotationEntities, which is implicitly provided by this -	 * stack. -	 */ -	AnnoStack opened; -	// this is a handle for our current XML element for annotation handling. -	Rooted<xml::Element> current = p; -	for (auto &n : par->getField()) { -		if (n->isa(&RttiTypes::Anchor)) { -			Rooted<Anchor> a = n.cast<Anchor>(); -			// check if this is a start Anchor. -			if (a->isStart()) { -				// if we have a start anchor, we open an annotation element. -				current = -				    openAnnotation(mgr, opened, a->getAnnotation(), current); -				continue; -				// check if this is an end Anchor. -			} else if (a->isEnd()) { -				/* -				 * Now it gets somewhat interesting: We have to close all -				 * tags that started after the one that is closed now and -				 * re-open them afterwards. So we create a lokal stack to -				 * temporarily store all AnnotationEntities that need to -				 * be re-opened. -				 */ -				AnnoStack tmp; -				Rooted<AnnotationEntity> closed = opened.top(); -				current = current->getParent(); -				opened.pop(); -				while (closed != a->getAnnotation()) { -					/* -					 * We implicitly do close tags by climbing up the XML tree -					 * until we are at the right element. -					 */ -					current = current->getParent(); -					tmp.push(closed); -					if (opened.empty()) { -						// if we have no opened entities left, that is a -						// malformed document. -						throw OusiaException("An unopened entity was closed!"); -					} -					closed = opened.top(); -					opened.pop(); -				} -				// At this point we have closed all necessary entities. Now we -				// need to re-open some of them. -				while (!tmp.empty()) { -					closed = tmp.top(); -					tmp.pop(); -					current = openAnnotation(mgr, opened, closed, current); -				} -			} -			// otherwise it is a disconnected Anchor and we can ignore it. -			continue; -		} -		// if this is not an anchor, we can only handle text. -		if (!n->isa(&RttiTypes::StructuredEntity)) { -			continue; -		} -		Handle<StructuredEntity> t = n.cast<StructuredEntity>(); - -		std::string childDescriptorName = t->getDescriptor()->getName(); -		if (childDescriptorName == "text") { -			Handle<DocumentPrimitive> primitive = -			    t->getField()[0].cast<DocumentPrimitive>(); -			if (primitive == nullptr) { -				throw OusiaException("Text field is not primitive!"); -			} -			current->addChild(new xml::Text( -			    mgr, current, primitive->getContent().asString())); -		} -	} -	return p; -} +	// After the content has been transformed, we serialize it. +	html->serialize(out, "<!DOCTYPE html>", pretty);  }  } +}
\ No newline at end of file diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp index b038a96..a1c0938 100644 --- a/src/plugins/html/DemoOutput.hpp +++ b/src/plugins/html/DemoOutput.hpp @@ -39,28 +39,6 @@ namespace ousia {  namespace html {  class DemoHTMLTransformer { -private: -	/** -	 * This transforms a section-like entity, namely book, section -	 * and subsection, to an XHTML element, including its header. For the -	 * children of the default field the respective transform function is -	 * called recursively. -	 */ -	Rooted<xml::Element> transformSection(Handle<xml::Element> parent, -	                                      Handle<StructuredEntity> sec); -	/** -	 * This transforms a list entity, namely ul and ol to an XHTML element. -	 * For each item, the transformParagraph function is called. -	 */ -	Rooted<xml::Element> transformList(Handle<xml::Element> parent, -	                                   Handle<StructuredEntity> list); -	/** -	 * This transforms a paragraph-like entity, namely heading, item and -	 * paragraph, to an XHTML element including the text and the anchors -	 * contained. -	 */ -	Rooted<xml::Element> transformParagraph(Handle<xml::Element> parent, -	                                        Handle<StructuredEntity> par);  public:  	/** @@ -80,10 +58,11 @@ public:  	 * @param doc    is a Document using concepts of the book, headings,  	 *               emphasis and lists ontologies but no other.  	 * @param out    is the output stream the data shall be written to. +	 * @param logger is a logger instances for errors.  	 * @param pretty is a flag that manipulates whether newlines and tabs are  	 *               used.  	 */ -	void writeHTML(Handle<Document> doc, std::ostream &out, bool pretty = true); +	void writeHTML(Handle<Document> doc, std::ostream &out, Logger& logger, bool pretty = true);  };  }  } diff --git a/test/core/model/DocumentTest.cpp b/test/core/model/DocumentTest.cpp index 8ae9475..135ba19 100644 --- a/test/core/model/DocumentTest.cpp +++ b/test/core/model/DocumentTest.cpp @@ -249,7 +249,8 @@ TEST(Document, construct)  TEST(Document, validate)  {  	// Let's start with a trivial ontology and a trivial document. -	TerminalLogger logger{std::cerr, true}; +// 	TerminalLogger logger{std::cerr, true}; +	Logger logger;  	Manager mgr{1};  	Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};  	Rooted<Ontology> ontology{new Ontology(mgr, sys, "trivial")}; @@ -403,7 +404,7 @@ TEST(Document, validate)  	    new AnnotationClass(mgr, "anno", ontology)};  	{  		/* -		 * Create a valid document in itself. +		 * Create a document with anchors.  		 */  		Rooted<Document> doc{new Document(mgr, "myDoc.oxd")};  		doc->referenceOntology(ontology); @@ -416,7 +417,8 @@ TEST(Document, validate)  		    new DocumentPrimitive(mgr, child, {2}, "int")};  		Rooted<Anchor> end{new Anchor(mgr, root)};  		ASSERT_EQ(ValidationState::UNKNOWN, doc->getValidationState()); -		ASSERT_TRUE(doc->validate(logger)); +		// This should be invalid due to disconnected Anchors +		ASSERT_FALSE(doc->validate(logger));  		// then add an AnnotationEntity without Anchors.  		Rooted<AnnotationEntity> anno =  		    buildAnnotationEntity(doc, logger, {"anno"}, nullptr, nullptr); diff --git a/test/core/model/TestAdvanced.hpp b/test/core/model/TestAdvanced.hpp index c92effa..58eb965 100644 --- a/test/core/model/TestAdvanced.hpp +++ b/test/core/model/TestAdvanced.hpp @@ -45,9 +45,9 @@ static Rooted<StructuredClass> resolveDescriptor(Handle<Ontology> ontology,   * This constructs the "heading" ontology given the book ontology.   */  static Rooted<Ontology> constructHeadingOntology(Manager &mgr, -                                             Handle<SystemTypesystem> sys, -                                             Handle<Ontology> bookOntology, -                                             Logger &logger) +                                                 Handle<SystemTypesystem> sys, +                                                 Handle<Ontology> bookOntology, +                                                 Logger &logger)  {  	// set up ontology node.  	Rooted<Ontology> ontology{new Ontology(mgr, sys, "headings")}; @@ -78,9 +78,9 @@ static Rooted<Ontology> constructHeadingOntology(Manager &mgr,   * This constructs the "list" ontology given the book ontology.   */  static Rooted<Ontology> constructListOntology(Manager &mgr, -                                          Handle<SystemTypesystem> sys, -                                          Handle<Ontology> bookOntology, -                                          Logger &logger) +                                              Handle<SystemTypesystem> sys, +                                              Handle<Ontology> bookOntology, +                                              Logger &logger)  {  	// set up ontology node.  	Rooted<Ontology> ontology{new Ontology(mgr, sys, "list")}; @@ -97,7 +97,8 @@ static Rooted<Ontology> constructListOntology(Manager &mgr,  	for (auto &listType : listTypes) {  		Rooted<StructuredClass> list{new StructuredClass(  		    mgr, listType, ontology, Cardinality::any(), p, false)}; -		Rooted<FieldDescriptor> list_field{new FieldDescriptor(mgr, list)}; +		Rooted<FieldDescriptor> list_field = +		    list->createFieldDescriptor(logger).first;  		list_field->addChild(item);  	}  	return ontology; @@ -107,15 +108,17 @@ static Rooted<Ontology> constructListOntology(Manager &mgr,   * This constructs the "emphasis" ontology.   */  static Rooted<Ontology> constructEmphasisOntology(Manager &mgr, -                                              Handle<SystemTypesystem> sys, -                                              Logger &logger) +                                                  Handle<SystemTypesystem> sys, +                                                  Logger &logger)  {  	// set up ontology node.  	Rooted<Ontology> ontology{new Ontology(mgr, sys, "emphasis")};  	// create AnnotationClasses -	Rooted<AnnotationClass> em{new AnnotationClass(mgr, "emphasized", ontology)}; +	Rooted<AnnotationClass> em{ +	    new AnnotationClass(mgr, "emphasized", ontology)}; -	Rooted<AnnotationClass> strong{new AnnotationClass(mgr, "strong", ontology)}; +	Rooted<AnnotationClass> strong{ +	    new AnnotationClass(mgr, "strong", ontology)};  	return ontology;  } diff --git a/test/plugins/html/DemoOutputTest.cpp b/test/plugins/html/DemoOutputTest.cpp index debb667..1c54a14 100644 --- a/test/plugins/html/DemoOutputTest.cpp +++ b/test/plugins/html/DemoOutputTest.cpp @@ -24,10 +24,13 @@  #include <plugins/html/DemoOutput.hpp>  #include <core/common/Rtti.hpp> +#include <plugins/filesystem/FileLocator.hpp>  #include <core/frontend/TerminalLogger.hpp>  #include <core/model/Document.hpp>  #include <core/model/Ontology.hpp> +#include <formats/osxml/OsxmlParser.hpp> +#include <core/StandaloneEnvironment.hpp>  #include <core/model/TestAdvanced.hpp>  #include <core/model/TestOntology.hpp> @@ -59,7 +62,7 @@ TEST(DemoHTMLTransformer, writeHTML)  	// we can only do a rough check here.  	DemoHTMLTransformer transformer;  	std::stringstream out; -	transformer.writeHTML(doc, out); +	transformer.writeHTML(doc, out, logger);  	const std::string res = out.str();  	ASSERT_FALSE(res == "");  	ASSERT_TRUE(res.find("Was ist Aufklärung?") != std::string::npos); @@ -106,12 +109,57 @@ TEST(DemoHTMLTransformer, AnnotationProcessing)  	// Check serialization.  	DemoHTMLTransformer transformer;  	std::stringstream out; -	transformer.writeHTML(doc, out, false); +	transformer.writeHTML(doc, out, logger, false);  	const std::string res = out.str();  	// In HTML the overlapping structure must be serialized as follows:  	ASSERT_TRUE(  	    res.find("<em>bla<strong>blub</strong></em><strong>bla</strong>") !=  	    std::string::npos);  } + +struct XmlStandaloneEnvironment : public StandaloneEnvironment { +	OsxmlParser parser; +	FileLocator fileLocator; + +	XmlStandaloneEnvironment(ConcreteLogger &logger) +	    : StandaloneEnvironment(logger) +	{ +		fileLocator.addDefaultSearchPaths(); +		fileLocator.addUnittestSearchPath("osxmlparser"); + +		registry.registerDefaultExtensions(); +		registry.registerParser({"text/vnd.ousia.osml+xml"}, +		                        {&RttiTypes::Node}, &parser); +		registry.registerResourceLocator(&fileLocator); +	} +}; + +TEST(DemoHTMLTransformer, pipelineTest) +{ +	// Construct Manager +	TerminalLogger logger{std::cerr, true}; +	XmlStandaloneEnvironment env(logger); +	Rooted<Node> book_document_node = +	    env.parse("complex_book.osxml", "", "", RttiSet{&RttiTypes::Document}); +	ASSERT_FALSE(logger.hasError()); +	ASSERT_FALSE(book_document_node == nullptr); +	ASSERT_TRUE(book_document_node->isa(&RttiTypes::Document)); +	Rooted<Document> doc = book_document_node.cast<Document>(); +	ASSERT_TRUE(doc->validate(logger)); +	ASSERT_FALSE(logger.hasError()); + +	// we can only do a rough check here. +	DemoHTMLTransformer transformer; +	std::stringstream out; +	transformer.writeHTML(doc, out, logger); +	const std::string res = out.str(); +	ASSERT_FALSE(res == ""); +	ASSERT_TRUE(res.find("Was ist Aufklärung?") != std::string::npos); +	ASSERT_TRUE(res.find( +	                "Aufklärung ist der Ausgang des Menschen aus seiner " +	                "selbstverschuldeten Unmündigkeit") != std::string::npos); +	ASSERT_TRUE(res.find("Sapere aude!") != std::string::npos); +} +  }  } diff --git a/testdata/osxmlparser/affiliation_typesystem.osxml b/testdata/osxmlparser/affiliation_typesystem.osxml new file mode 100644 index 0000000..d84dc30 --- /dev/null +++ b/testdata/osxmlparser/affiliation_typesystem.osxml @@ -0,0 +1,10 @@ +<?xml version="1.0" standalone="yes"?> +<typesystem name="affiliation"> +	<struct name="affiliation"> +		<field name="workgroup" type="string"/> +		<field name="departement" type="string"/> +		<field name="institution" type="string"/> +	</struct> + +	<constant name="citec.sc" type="affiliation" value="[workgroup=Semantic Computing Group,departement=Center of Excellence Cognitive Interaction Technology (CITEC), institution=Bielefeld University]"/> +</typesystem> diff --git a/testdata/osxmlparser/bibliography_ontology.osxml b/testdata/osxmlparser/bibliography_ontology.osxml new file mode 100644 index 0000000..0333133 --- /dev/null +++ b/testdata/osxmlparser/bibliography_ontology.osxml @@ -0,0 +1,42 @@ +<?xml version="1.0"?> +<ontology name="bibliography"> + +	<import rel="ontology" src="./book_ontology"/> +	<import rel="ontology" src="./meta_ontology"/> + +	<struct name="bibliography" transparent="true"> +		<field> +			<childRef ref="bibEntry"/> +		</field> +		<parentRef ref="book"> +			<field name="bibliography" subtree="true"/> +		</parentRef> +	</struct> +	<struct name="bibEntry"> +		<field> +			<childRef ref="meta.authors"/> +			<childRef ref="title"/> +			<childRef ref="year"/> +			<childRef ref="journal"/> +			<childRef ref="pages"/> +			<childRef ref="location"/> +		</field> +	</struct> +	<struct name="title" cardinality="{1}"> +		<primitive type="string"/> +	</struct> +	<struct name="year" cardinality="{1}"> +		<primitive type="int"/> +	</struct> +	<struct name="journal" cardinality="{0-1}"> +		<!-- here some kind of database reference would be better --> +		<primitive type="string"/> +	</struct> +	<struct name="pages" cardinality="{0-1}"> +		<primitive type="cardinality"/> +	</struct> +	<struct name="location" cardinality="{0-1}"> +		<!-- here some kind of database reference would be better --> +		<primitive type="string"/> +	</struct> +</ontology> diff --git a/testdata/osxmlparser/complex_book.osxml b/testdata/osxmlparser/complex_book.osxml index 222b146..8eb5f26 100644 --- a/testdata/osxmlparser/complex_book.osxml +++ b/testdata/osxmlparser/complex_book.osxml @@ -1,13 +1,14 @@  <?xml version="1.0"?>  <document> -	<import rel="ontology" src="book"/> -	<import rel="ontology" src="headings"/> -	<import rel="ontology" src="meta"/> -	<import rel="ontology" src="bibliography"/> -	<!--<import rel="ontology" src="emphasis.oxm"/> -	<import rel="ontology" src="comments.oxm"/> -	<alias tag="paragraph" aka="p"/> +	<import rel="ontology" src="./book_ontology"/> +	<import rel="ontology" src="./headings_ontology"/> +	<import rel="ontology" src="./meta_ontology"/> +	<import rel="ontology" src="./bibliography_ontology"/> +	<import rel="ontology" src="./lists_ontology"/> +	<import rel="ontology" src="./emphasis_ontology"/> +	<import rel="ontology" src="./comments_ontology"/> +	<!--<alias tag="paragraph" aka="p"/>  	<alias tag="emphasized" aka="em"/>-->  	<book> @@ -87,15 +88,20 @@  		<chapter name="content">  			<heading>Was ist Aufklärung?</heading> -			Aufklärung ist der Ausgang des Menschen aus seiner -			selbstverschuldeten Unmündigkeit. Unmündigkeit ist +			<a:start:strong/>Aufklärung ist der Ausgang des Menschen aus seiner +			selbstverschuldeten Unmündigkeit<a:end:strong/>. +			<ul> +				<item><a:start:emphasized/>Unmündigkeit<a:end:emphasized/> ist  			das Unvermögen, sich seines Verstandes ohne Leitung eines anderen zu -			bedienen. Selbstverschuldet ist diese Unmündigkeit, wenn -			die Ursache derselben nicht am Mangel des Verstandes, sondern der -			Entschließung und des Mutes liegt, sich seiner ohne Leitung eines -			andern zu bedienen. -			Sapere aude! Habe Mut, dich deines eigenen Verstandes zu -			bedienen! ist also der Wahlspruch der Aufklärung. +			bedienen.</item> +				<item><a:start:emphasized/>Selbstverschuldet<a:end:emphasized/> +			ist diese Unmündigkeit, wenn die Ursache derselben nicht am Mangel +			des Verstandes, sondern der Entschließung und des Mutes liegt, sich +			seiner ohne Leitung eines andern zu bedienen.</item> +			</ul> +			<a:start:strong/>Sapere aude!<a:end:strong/> Habe Mut, dich deines +			eigenen Verstandes zu bedienen! ist also der Wahlspruch der +			Aufklärung.  		</chapter>  	</book>  </document> diff --git a/testdata/osxmlparser/email_typesystem.osxml b/testdata/osxmlparser/email_typesystem.osxml new file mode 100644 index 0000000..325f89a --- /dev/null +++ b/testdata/osxmlparser/email_typesystem.osxml @@ -0,0 +1,8 @@ +<?xml version="1.0" standalone="yes"?> +<typesystem name="email"> +	<struct name="email"> +		<field name="local" type="string"/> +		<field name="domainName" type="string"/> +		<field name="domainSuffix" type="string"/> +	</struct> +</typesystem> diff --git a/testdata/osxmlparser/emphasis_ontology.osxml b/testdata/osxmlparser/emphasis_ontology.osxml new file mode 100644 index 0000000..0fdd63a --- /dev/null +++ b/testdata/osxmlparser/emphasis_ontology.osxml @@ -0,0 +1,5 @@ +<?xml version="1.0" standalone="yes"?> +<ontology name="emphasis"> +	<annotation name="emphasized"/> +	<annotation name="strong"/> +</ontology> diff --git a/testdata/osxmlparser/lists_ontology.osxml b/testdata/osxmlparser/lists_ontology.osxml new file mode 100644 index 0000000..c18494a --- /dev/null +++ b/testdata/osxmlparser/lists_ontology.osxml @@ -0,0 +1,24 @@ +<?xml version="1.0"?> +<ontology name="lists"> +	<import rel="ontology" src="./book_ontology"/> + +	<struct name="ul" isa="book.paragraph"> +		<!-- Here we solve the problem of parents using the isa +			 mechanism, because a list may occur whereever a paragraph +			 may occur. However we do want to override the default field. --> +		<field> +			<childRef ref="item"/> +		</field> +	</struct> +	<struct name="ol" isa="book.paragraph"> +		<!-- Here we solve the problem of parents using the isa +		     mechanism, because a list may occur whereever a paragraph +		     may occur. However we do want to override the default field. --> +		<field> +			<childRef ref="item"/> +		</field> +	</struct> +	<struct name="item"> +		<fieldRef ref="book.paragraph.$default"/> +	</struct> +</ontology> diff --git a/testdata/osxmlparser/meta_ontology.osxml b/testdata/osxmlparser/meta_ontology.osxml new file mode 100644 index 0000000..eb392ce --- /dev/null +++ b/testdata/osxmlparser/meta_ontology.osxml @@ -0,0 +1,49 @@ +<?xml version="1.0"?> +<ontology name="meta"> + +	<import rel="typesystem" src="./affiliation_typesystem"/> +	<import rel="typesystem" src="./email_typesystem"/> +	<import rel="typesystem" src="./version_typesystem"/> +	<import rel="ontology" src="./book_ontology"/> + +	<struct name="meta" cardinality="{1}" transparent="true"> +		<field> +			<childRef ref="authors"/> +			<childRef ref="version"/> +		</field> +		<parentRef ref="book"> +			<field name="meta" subtree="true" optional="true"/> +		</parentRef> +		<parentRef ref="chapter"> +			<field name="meta" subtree="true" optional="true"/> +		</parentRef> +		<!-- One could also include "article" and other things here --> +	</struct> + +	<struct name="person"> +		<primitive subtree="true" name="firstName" type="string"/> +		<primitive subtree="true" name="secondNames" type="string[]" optional="true"/> +		<primitive subtree="true" name="lastName" type="string"/> +		<primitive subtree="true" name="email" type="email" optional="true"/> +		<primitive subtree="true" name="affiliation" type="affiliation" optional="true"/> +	</struct> + +	<!-- wrapper author tag to allow specifying no authors whatsoever. But if +	     an author is specified it has to be at least one primary author. --> +	<struct name="authors" transparent="true" cardinality="{0-1}"> +		<field> +			<childRef ref="author"/> +		</field> +	</struct> + +	<!-- no explicit cardinality, because we might have multiple authors --> +	<struct name="author" isa="person"/> + +	<!-- but we need at least one primary author --> +	<struct name="primaryAuthor" cardinality="{>0}" isa="author"/> + +	<!-- version intermediate struct --> +	<struct name="version" cardinality="{0-1}"> +		<primitive type="version"/> +	</struct> +</ontology> diff --git a/testdata/osxmlparser/simple_annotation.osxml b/testdata/osxmlparser/simple_annotation.osxml index 7eb1713..c0f426a 100644 --- a/testdata/osxmlparser/simple_annotation.osxml +++ b/testdata/osxmlparser/simple_annotation.osxml @@ -1,3 +1,4 @@ +<?xml version="1.0" standalone="yes"?>  <document>  	<import rel="ontology" src="./book_ontology.osxml"/>  	<import rel="ontology" src="./comments_ontology.osxml"/> diff --git a/testdata/osxmlparser/simple_book.osxml b/testdata/osxmlparser/simple_book.osxml index ec1e45a..ac90927 100644 --- a/testdata/osxmlparser/simple_book.osxml +++ b/testdata/osxmlparser/simple_book.osxml @@ -1,6 +1,6 @@  <?xml version="1.0"?>  <document> -	<import rel="ontology" src="book_ontology.osxml"/> +	<import rel="ontology" src="./book_ontology.osxml"/>  	<book>  		This might be some introductory text or a dedication.  		<!-- Note that a better version of the book ontology might specify diff --git a/testdata/osxmlparser/version_typesystem.osxml b/testdata/osxmlparser/version_typesystem.osxml new file mode 100644 index 0000000..0d52736 --- /dev/null +++ b/testdata/osxmlparser/version_typesystem.osxml @@ -0,0 +1,8 @@ +<?xml version="1.0" standalone="yes"?> +<typesystem name="version"> +	<struct name="version"> +		<field name="major" type="int"/> +		<field name="minor" type="int"/> +		<field name="patch" type="int"/> +	</struct> +</typesystem> | 
