summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt6
-rw-r--r--data/ontology/lists.osxml8
-rw-r--r--data/ontology/meta.osxml1
-rw-r--r--src/cli/Main.cpp2
-rw-r--r--src/core/model/Document.cpp30
-rw-r--r--src/core/parser/stack/Stack.cpp4
-rw-r--r--src/plugins/html/DemoOutput.cpp455
-rw-r--r--src/plugins/html/DemoOutput.hpp25
-rw-r--r--test/core/model/DocumentTest.cpp8
-rw-r--r--test/core/model/TestAdvanced.hpp25
-rw-r--r--test/plugins/html/DemoOutputTest.cpp52
-rw-r--r--testdata/osxmlparser/affiliation_typesystem.osxml10
-rw-r--r--testdata/osxmlparser/bibliography_ontology.osxml42
-rw-r--r--testdata/osxmlparser/complex_book.osxml36
-rw-r--r--testdata/osxmlparser/email_typesystem.osxml8
-rw-r--r--testdata/osxmlparser/emphasis_ontology.osxml5
-rw-r--r--testdata/osxmlparser/lists_ontology.osxml24
-rw-r--r--testdata/osxmlparser/meta_ontology.osxml49
-rw-r--r--testdata/osxmlparser/simple_annotation.osxml1
-rw-r--r--testdata/osxmlparser/simple_book.osxml2
-rw-r--r--testdata/osxmlparser/version_typesystem.osxml8
21 files changed, 533 insertions, 268 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7df07c..c0bb15e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -374,7 +374,9 @@ IF(TEST)
TARGET_LINK_LIBRARIES(ousia_test_html
${GTEST_LIBRARIES}
ousia_core
+ ousia_filesystem
ousia_html
+ ousia_osxml
)
# ADD_EXECUTABLE(ousia_test_mozjs
@@ -395,8 +397,8 @@ IF(TEST)
TARGET_LINK_LIBRARIES(ousia_test_osml
${GTEST_LIBRARIES}
ousia_core
- ousia_osml
ousia_filesystem
+ ousia_osml
)
ADD_EXECUTABLE(ousia_test_osxml
@@ -407,8 +409,8 @@ IF(TEST)
TARGET_LINK_LIBRARIES(ousia_test_osxml
${GTEST_LIBRARIES}
ousia_core
- ousia_osxml
ousia_filesystem
+ ousia_osxml
)
ADD_EXECUTABLE(ousia_test_xml
diff --git a/data/ontology/lists.osxml b/data/ontology/lists.osxml
index a177cd4..2cc5e54 100644
--- a/data/ontology/lists.osxml
+++ b/data/ontology/lists.osxml
@@ -7,7 +7,7 @@
mechanism, because a list may occur whereever a paragraph
may occur. However we do want to override the default field. -->
<field>
- <childRef name="item"/>
+ <childRef ref="item"/>
</field>
</struct>
<struct name="ol" isa="book.paragraph">
@@ -15,12 +15,10 @@
mechanism, because a list may occur whereever a paragraph
may occur. However we do want to override the default field. -->
<field>
- <childRef name="item"/>
+ <childRef ref="item"/>
</field>
</struct>
<struct name="item">
- <field>
- <childRef name="book.paragaph"/>
- </field>
+ <fieldRef ref="book.paragraph.$default"/>
</struct>
</ontology>
diff --git a/data/ontology/meta.osxml b/data/ontology/meta.osxml
index c03541a..4b1e422 100644
--- a/data/ontology/meta.osxml
+++ b/data/ontology/meta.osxml
@@ -5,7 +5,6 @@
<import rel="typesystem" src="email"/>
<import rel="typesystem" src="version"/>
<import rel="ontology" src="book"/>
- <import rel="ontology" src="headings"/>
<struct name="meta" cardinality="{1}" transparent="true">
<field>
diff --git a/src/cli/Main.cpp b/src/cli/Main.cpp
index 2fe0585..e7fa614 100644
--- a/src/cli/Main.cpp
+++ b/src/cli/Main.cpp
@@ -86,7 +86,7 @@ static void createOutput(Handle<Document> doc, std::ostream &out,
{
if (format == "html") {
html::DemoHTMLTransformer transform;
- transform.writeHTML(doc, out, true);
+ transform.writeHTML(doc, out, logger, true);
} else if (format == "xml") {
xml::XmlTransformer transform;
transform.writeXml(doc, out, logger, resMgr, true);
diff --git a/src/core/model/Document.cpp b/src/core/model/Document.cpp
index 0bf50e5..894330b 100644
--- a/src/core/model/Document.cpp
+++ b/src/core/model/Document.cpp
@@ -158,14 +158,15 @@ bool DocumentEntity::doValidate(Logger &logger) const
const size_t min =
childClass->getCardinality().asCardinality().min();
if (min > 0) {
- logger.error(
- std::string("Field \"") + fieldDescs[f]->getName() +
- "\" was empty but needs at least " +
- std::to_string(min) + " elements of class \"" +
- childClass->getName() +
- "\" according to the definition of \"" +
- descriptor->getName() + "\"",
- *subInst);
+ logger.error(std::string("Field \"") +
+ fieldDescs[f]->getNameOrDefaultName() +
+ "\" was empty but needs at least " +
+ std::to_string(min) +
+ " elements of class \"" +
+ childClass->getName() +
+ "\" according to the definition of \"" +
+ descriptor->getName() + "\"",
+ *subInst);
valid = false;
}
}
@@ -191,7 +192,7 @@ bool DocumentEntity::doValidate(Logger &logger) const
}
if (child->isa(&RttiTypes::DocumentPrimitive)) {
logger.error(std::string("Non-primitive Field \"") +
- fieldDescs[f]->getName() +
+ fieldDescs[f]->getNameOrDefaultName() +
"\" had primitive content!",
*child);
valid = false;
@@ -238,8 +239,9 @@ bool DocumentEntity::doValidate(Logger &logger) const
}
if (!childClass->getCardinality().asCardinality().contains(num)) {
logger.error(std::string("Field \"") +
- fieldDescs[f]->getName() + "\" had " +
- std::to_string(num) + " elements of class \"" +
+ fieldDescs[f]->getNameOrDefaultName() +
+ "\" had " + std::to_string(num) +
+ " elements of class \"" +
childClass->getName() +
"\", which is invalid according to the "
"definition of \"" +
@@ -327,9 +329,9 @@ void DocumentEntity::addStructureNode(Handle<StructureNode> s, size_t i)
if (par != nullptr) {
if (par->isa(&RttiTypes::StructuredEntity)) {
par.cast<StructuredEntity>()->removeStructureNode(s);
- } else if(par->isa(&RttiTypes::AnnotationEntity)){
+ } else if (par->isa(&RttiTypes::AnnotationEntity)) {
par.cast<AnnotationEntity>()->removeStructureNode(s);
- } else if(par->isa(&RttiTypes::Document)){
+ } else if (par->isa(&RttiTypes::Document)) {
par.cast<Document>()->setRoot(nullptr);
}
}
@@ -997,4 +999,4 @@ const Rtti AnnotationEntity =
.parent(&Node)
.composedOf({&StructuredEntity, &DocumentPrimitive, &Anchor});
}
-}
+} \ No newline at end of file
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index 23c857a..bd16b43 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -802,8 +802,8 @@ void StackImpl::handleFieldEnd(bool endRange)
if (info.range && endRange) {
if (!info.hadDefaultField) {
bool isDefault = true;
- info.handler->fieldStart(isDefault, true);
- info.fieldStart(true, true, true);
+ bool valid = info.handler->fieldStart(isDefault, true);
+ info.fieldStart(true, true, valid);
}
endCurrentHandler();
return;
diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp
index 3c54763..8cf4e13 100644
--- a/src/plugins/html/DemoOutput.cpp
+++ b/src/plugins/html/DemoOutput.cpp
@@ -27,46 +27,212 @@
namespace ousia {
namespace html {
-void DemoHTMLTransformer::writeHTML(Handle<Document> doc, std::ostream &out,
- bool pretty)
+typedef std::stack<Rooted<AnnotationEntity>> AnnoStack;
+
+static bool canHandleAnchor(Handle<Anchor> a)
{
- Manager &mgr = doc->getManager();
- // Create an XML object tree for the document first.
- Rooted<xml::Element> html{new xml::Element{mgr, {nullptr}, "html"}};
- // add the head Element
- Rooted<xml::Element> head{new xml::Element{mgr, html, "head"}};
- html->addChild(head);
- // add the meta element.
- Rooted<xml::Element> meta{
- new xml::Element{mgr,
- head,
- "meta",
- {{"http-equiv", "Content-Type"},
- {"content", "text/html; charset=utf-8"}}}};
- head->addChild(meta);
- // add the title Element with Text
- Rooted<xml::Element> title{new xml::Element{mgr, head, "title"}};
- head->addChild(title);
- title->addChild(
- new xml::Text(mgr, title, "Test HTML Output for " + doc->getName()));
- // add the body Element
- Rooted<xml::Element> body{new xml::Element{mgr, html, "body"}};
- html->addChild(body);
+ std::string annoClassName = a->getAnnotation()->getDescriptor()->getName();
+ return annoClassName == "emphasized" || annoClassName == "strong";
+}
- // So far was the "preamble". No we have to get to the document content.
+static Rooted<xml::Element> openAnnotation(Manager &mgr, AnnoStack &opened,
+ Handle<AnnotationEntity> entity,
+ Handle<xml::Element> current,
+ bool stackOnly)
+{
+ // we push the newly opened entity on top of the stack.
+ opened.push(entity);
+ if (stackOnly) {
+ return nullptr;
+ }
+ // get the elment name
+ std::string elemName = entity->getDescriptor()->getName();
+ // emphasized has to be shortened
+ if (elemName == "emphasized") {
+ elemName = "em";
+ }
+ // create the new XML element representing the annotation
+ Rooted<xml::Element> tmp{new xml::Element{mgr, current, elemName}};
+ current->addChild(tmp);
+ // and return it.
+ return tmp;
+}
- // extract the book root node.
- Rooted<StructuredEntity> root = doc->getRoot();
- if (root->getDescriptor()->getName() != "book") {
- throw OusiaException("The given documents root is no book node!");
+static Rooted<xml::Element> transformAnchor(Manager &mgr, Handle<Anchor> a,
+ Handle<xml::Element> current,
+ Logger &logger, AnnoStack &opened,
+ bool stackOnly)
+{
+ // check if this is a start Anchor.
+ if (a->isStart()) {
+ // if we have a start anchor, we open an annotation element.
+ current =
+ openAnnotation(mgr, opened, a->getAnnotation(), current, stackOnly);
+ // check if this is an end Anchor.
+ } else if (a->isEnd()) {
+ /*
+ * Now it gets somewhat interesting: We have to close all
+ * tags that started after the one that is closed now and
+ * re-open them afterwards. So we create a lokal stack to
+ * temporarily store all AnnotationEntities that need to
+ * be re-opened.
+ */
+ AnnoStack tmp;
+ if (opened.empty()) {
+ // if we have no opened entities left, that is a
+ // malformed document.
+ logger.error("An unopened entity was closed!", *a);
+ return current;
+ }
+ Rooted<AnnotationEntity> closed = opened.top();
+ current = current->getParent();
+ opened.pop();
+ while (closed != a->getAnnotation()) {
+ /*
+ * We implicitly close tags by climbing up the XML tree
+ * until we are at the right element.
+ */
+ current = current->getParent();
+ tmp.push(closed);
+ if (opened.empty()) {
+ // if we have no opened entities left, that is a
+ // malformed document.
+ logger.error("An unopened entity was closed!", *a);
+ return current;
+ }
+ closed = opened.top();
+ opened.pop();
+ }
+ // At this point we have closed all necessary entities. Now we
+ // need to re-open some of them.
+ while (!tmp.empty()) {
+ closed = tmp.top();
+ tmp.pop();
+ current = openAnnotation(mgr, opened, closed, current, stackOnly);
+ }
}
- // transform the book node.
- Rooted<xml::Element> book = transformSection(body, root);
- // add it as child to the body node.
- body->addChild(book);
+ // otherwise it is a disconnected Anchor and we can ignore it.
+ return current;
+}
- // After the content has been transformed, we serialize it.
- html->serialize(out, "<!DOCTYPE html>", pretty);
+/**
+ * Reopens all Annotations in the given AnnoStack but does not manipulate the
+ * original stack. The input argument is a copy.
+ * @return the innermost opened element.
+ */
+static Rooted<xml::Element> reOpenAnnotations(Manager &mgr, AnnoStack opened,
+ Handle<xml::Element> parent)
+{
+ AnnoStack tmp;
+ while (!opened.empty()) {
+ tmp.push(opened.top());
+ opened.pop();
+ }
+ Rooted<xml::Element> current = parent;
+ while (!tmp.empty()) {
+ Rooted<AnnotationEntity> closed = tmp.top();
+ tmp.pop();
+ current = openAnnotation(mgr, opened, closed, current, false);
+ }
+ return current;
+}
+
+static Rooted<xml::Element> transformParagraph(Manager &mgr,
+ Handle<xml::Element> parent,
+ Handle<StructuredEntity> par,
+ Logger &logger,
+ AnnoStack &opened)
+{
+ // create the p Element
+ Rooted<xml::Element> p{new xml::Element{mgr, parent, "p"}};
+
+ // check if we have a heading.
+ if (par->getDescriptor()->hasField("heading") &&
+ par->getField("heading").size() > 0) {
+ Handle<StructuredEntity> heading =
+ par->getField("heading")[0].cast<StructuredEntity>();
+ // put the heading in a strong xml::Element.
+ Rooted<xml::Element> strong{new xml::Element{mgr, p, "strong"}};
+ p->addChild(strong);
+ // extract the heading text, enveloped in a paragraph Element.
+ // in this case we use an empy annotation stack because annotations do
+ // not extend on subtree fields.
+ AnnoStack emptyStack;
+ Rooted<xml::Element> h_content =
+ transformParagraph(mgr, strong, heading, logger, emptyStack);
+ // We omit the paragraph Element and add the children directly to the
+ // heading Element
+ for (auto &n : h_content->getChildren()) {
+ strong->addChild(n);
+ }
+ }
+ // reopen all annotations.
+ Rooted<xml::Element> current = reOpenAnnotations(mgr, opened, p);
+ // transform paragraph children to XML as well
+ for (auto &n : par->getField()) {
+ if (n->isa(&RttiTypes::Anchor)) {
+ Rooted<Anchor> a = n.cast<Anchor>();
+ if (canHandleAnchor(a)) {
+ current =
+ transformAnchor(mgr, a, current, logger, opened, false);
+ }
+ continue;
+ }
+ // if this is not an anchor, we can only handle text.
+ if (!n->isa(&RttiTypes::StructuredEntity)) {
+ continue;
+ }
+ Handle<StructuredEntity> t = n.cast<StructuredEntity>();
+
+ std::string childDescriptorName = t->getDescriptor()->getName();
+ if (childDescriptorName == "text") {
+ Handle<DocumentPrimitive> primitive =
+ t->getField()[0].cast<DocumentPrimitive>();
+ std::string text_content = primitive->getContent().asString();
+ current->addChild(new xml::Text(mgr, current, text_content));
+ }
+ }
+ // at this point we implicitly close all annotations that are left opened.
+ // they will be reopened in the next paragraph.
+ return p;
+}
+
+static Rooted<xml::Element> transformList(Manager &mgr,
+ Handle<xml::Element> parent,
+ Handle<StructuredEntity> list,
+ Logger &logger, AnnoStack &opened)
+{
+ // create the list Element, which is either ul or ol (depends on descriptor)
+ std::string listclass = list->getDescriptor()->getName();
+ Rooted<xml::Element> l{new xml::Element{mgr, parent, listclass}};
+ // iterate through list items.
+ for (auto &it : list->getField()) {
+ if (it->isa(&RttiTypes::Anchor)) {
+ Rooted<Anchor> a = it.cast<Anchor>();
+ if (canHandleAnchor(a)) {
+ // just put the entity on the AnnoStack, but do not open it
+ // explicitly. That will be done inside the next paragraph.
+ transformAnchor(mgr, a, l, logger, opened, true);
+ }
+ continue;
+ }
+ Handle<StructuredEntity> item = it.cast<StructuredEntity>();
+ std::string itDescrName = item->getDescriptor()->getName();
+ if (itDescrName == "item") {
+ // create the list item.
+ Rooted<xml::Element> li{new xml::Element{mgr, l, "li"}};
+ l->addChild(li);
+ // extract the item text, enveloped in a paragraph Element.
+ Rooted<xml::Element> li_content =
+ transformParagraph(mgr, li, item, logger, opened);
+ // We omit the paragraph Element and add the children directly to
+ // the list item
+ for (auto &n : li_content->getChildren()) {
+ li->addChild(n);
+ }
+ }
+ }
+ return l;
}
/**
@@ -74,7 +240,7 @@ void DemoHTMLTransformer::writeHTML(Handle<Document> doc, std::ostream &out,
*/
enum class SectionType { BOOK, CHAPTER, SECTION, SUBSECTION, NONE };
-SectionType getSectionType(const std::string &name)
+static SectionType getSectionType(const std::string &name)
{
if (name == "book") {
return SectionType::BOOK;
@@ -89,10 +255,11 @@ SectionType getSectionType(const std::string &name)
}
}
-Rooted<xml::Element> DemoHTMLTransformer::transformSection(
- Handle<xml::Element> parent, Handle<StructuredEntity> section)
+static Rooted<xml::Element> transformSection(Manager &mgr,
+ Handle<xml::Element> parent,
+ Handle<StructuredEntity> section,
+ Logger &logger, AnnoStack &opened)
{
- Manager &mgr = section->getManager();
// check the section type.
const std::string secclass = section->getDescriptor()->getName();
SectionType type = getSectionType(secclass);
@@ -128,9 +295,13 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(
}
Rooted<xml::Element> h{new xml::Element{mgr, sec, headingclass}};
sec->addChild(h);
- // extract the heading text, enveloped in a paragraph Element.
- Rooted<xml::Element> h_content = transformParagraph(h, heading);
- // We omit the paragraph Element and add the children directly to the
+ // extract the heading text, wrapped in a paragraph Element.
+ // in this case we use an empy annotation stack because annotations do
+ // not extend on subtree fields.
+ AnnoStack emptyStack;
+ Rooted<xml::Element> h_content =
+ transformParagraph(mgr, h, heading, logger, emptyStack);
+ // We omit the paragraph element and add the children directly to the
// heading Element
for (auto &n : h_content->getChildren()) {
h->addChild(n);
@@ -139,6 +310,15 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(
// Then we get all the children.
for (auto &n : section->getField()) {
+ if (n->isa(&RttiTypes::Anchor)) {
+ Rooted<Anchor> a = n.cast<Anchor>();
+ if (canHandleAnchor(a)) {
+ // just put the entity on the AnnoStack, but do not open it
+ // explicitly. That will be done inside the next paragraph.
+ transformAnchor(mgr, a, sec, logger, opened, true);
+ }
+ continue;
+ }
if (!n->isa(&RttiTypes::StructuredEntity)) {
continue;
}
@@ -153,11 +333,11 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(
const std::string childDescriptorName = s->getDescriptor()->getName();
Rooted<xml::Element> child;
if (childDescriptorName == "paragraph") {
- child = transformParagraph(sec, s);
+ child = transformParagraph(mgr, sec, s, logger, opened);
} else if (childDescriptorName == "ul" || childDescriptorName == "ol") {
- child = transformList(sec, s);
+ child = transformList(mgr, sec, s, logger, opened);
} else {
- child = transformSection(sec, s);
+ child = transformSection(mgr, sec, s, logger, opened);
}
if (!child.isNull()) {
sec->addChild(child);
@@ -166,155 +346,54 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(
return sec;
}
-Rooted<xml::Element> DemoHTMLTransformer::transformList(
- Handle<xml::Element> parent, Handle<StructuredEntity> list)
+void DemoHTMLTransformer::writeHTML(Handle<Document> doc, std::ostream &out,
+ Logger &logger, bool pretty)
{
- Manager &mgr = list->getManager();
- // create the list Element, which is either ul or ol (depends on descriptor)
- std::string listclass = list->getDescriptor()->getName();
- Rooted<xml::Element> l{new xml::Element{mgr, parent, listclass}};
- // iterate through list items.
- for (auto &it : list->getField()) {
- Handle<StructuredEntity> item = it.cast<StructuredEntity>();
- std::string itDescrName = item->getDescriptor()->getName();
- if (itDescrName == "item") {
- // create the list item.
- Rooted<xml::Element> li{new xml::Element{mgr, l, "li"}};
- l->addChild(li);
- // extract the item text, enveloped in a paragraph Element.
- Rooted<xml::Element> li_content = transformParagraph(li, item);
- // We omit the paragraph Element and add the children directly to
- // the list item
- for (auto &n : li_content->getChildren()) {
- li->addChild(n);
- }
- }
+ // validate the document.
+ if (!doc->validate(logger)) {
+ return;
}
- return l;
-}
-
-typedef std::stack<Rooted<AnnotationEntity>> AnnoStack;
-static Rooted<xml::Element> openAnnotation(Manager &mgr, AnnoStack &opened,
- Handle<AnnotationEntity> entity,
- Handle<xml::Element> current)
-{
- // we push the newly opened entity on top of the stack.
- opened.push(entity);
- // get the elment name
- std::string elemName = entity->getDescriptor()->getName();
- // emphasized has to be shortened
- if (elemName == "emphasized") {
- elemName = "em";
- }
- // create the new XML element representing the annotation
- Rooted<xml::Element> tmp{new xml::Element{mgr, current, elemName}};
- current->addChild(tmp);
- // and return it.
- return tmp;
-}
+ Manager &mgr = doc->getManager();
+ // initialize an empty annotation Stack.
+ AnnoStack opened;
+ // Create an XML object tree for the document first.
+ Rooted<xml::Element> html{new xml::Element{mgr, {nullptr}, "html"}};
+ // add the head Element
+ Rooted<xml::Element> head{new xml::Element{mgr, html, "head"}};
+ html->addChild(head);
+ // add the meta element.
+ Rooted<xml::Element> meta{
+ new xml::Element{mgr,
+ head,
+ "meta",
+ {{"http-equiv", "Content-Type"},
+ {"content", "text/html; charset=utf-8"}}}};
+ head->addChild(meta);
+ // add the title Element with Text
+ Rooted<xml::Element> title{new xml::Element{mgr, head, "title"}};
+ head->addChild(title);
+ title->addChild(
+ new xml::Text(mgr, title, "Test HTML Output for " + doc->getName()));
+ // add the body Element
+ Rooted<xml::Element> body{new xml::Element{mgr, html, "body"}};
+ html->addChild(body);
-Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(
- Handle<xml::Element> parent, Handle<StructuredEntity> par)
-{
- Manager &mgr = par->getManager();
- // create the p Element
- Rooted<xml::Element> p{new xml::Element{mgr, parent, "p"}};
+ // So far was the "preamble". No we have to get to the document content.
- // check if we have a heading.
- if (par->getDescriptor()->hasField("heading") &&
- par->getField("heading").size() > 0) {
- Handle<StructuredEntity> heading =
- par->getField("heading")[0].cast<StructuredEntity>();
- // put the heading in a strong xml::Element.
- Rooted<xml::Element> strong{new xml::Element{mgr, p, "strong"}};
- p->addChild(strong);
- // extract the heading text, enveloped in a paragraph Element.
- Rooted<xml::Element> h_content = transformParagraph(strong, heading);
- // We omit the paragraph Element and add the children directly to the
- // heading Element
- for (auto &n : h_content->getChildren()) {
- strong->addChild(n);
- }
+ // extract the book root node.
+ Rooted<StructuredEntity> root = doc->getRoot();
+ if (root->getDescriptor()->getName() != "book") {
+ throw OusiaException("The given documents root is no book node!");
}
+ // transform the book node.
+ Rooted<xml::Element> book =
+ transformSection(mgr, body, root, logger, opened);
+ // add it as child to the body node.
+ body->addChild(book);
- // transform paragraph children to XML as well
- /*
- * We need a stack of AnnotationEntities that are currently open.
- * In principle we wouldn't, because the nested structure of XML elements
- * provides a stack-like structure anyways, but we need to have a mapping of
- * XML tags to AnnotationEntities, which is implicitly provided by this
- * stack.
- */
- AnnoStack opened;
- // this is a handle for our current XML element for annotation handling.
- Rooted<xml::Element> current = p;
- for (auto &n : par->getField()) {
- if (n->isa(&RttiTypes::Anchor)) {
- Rooted<Anchor> a = n.cast<Anchor>();
- // check if this is a start Anchor.
- if (a->isStart()) {
- // if we have a start anchor, we open an annotation element.
- current =
- openAnnotation(mgr, opened, a->getAnnotation(), current);
- continue;
- // check if this is an end Anchor.
- } else if (a->isEnd()) {
- /*
- * Now it gets somewhat interesting: We have to close all
- * tags that started after the one that is closed now and
- * re-open them afterwards. So we create a lokal stack to
- * temporarily store all AnnotationEntities that need to
- * be re-opened.
- */
- AnnoStack tmp;
- Rooted<AnnotationEntity> closed = opened.top();
- current = current->getParent();
- opened.pop();
- while (closed != a->getAnnotation()) {
- /*
- * We implicitly do close tags by climbing up the XML tree
- * until we are at the right element.
- */
- current = current->getParent();
- tmp.push(closed);
- if (opened.empty()) {
- // if we have no opened entities left, that is a
- // malformed document.
- throw OusiaException("An unopened entity was closed!");
- }
- closed = opened.top();
- opened.pop();
- }
- // At this point we have closed all necessary entities. Now we
- // need to re-open some of them.
- while (!tmp.empty()) {
- closed = tmp.top();
- tmp.pop();
- current = openAnnotation(mgr, opened, closed, current);
- }
- }
- // otherwise it is a disconnected Anchor and we can ignore it.
- continue;
- }
- // if this is not an anchor, we can only handle text.
- if (!n->isa(&RttiTypes::StructuredEntity)) {
- continue;
- }
- Handle<StructuredEntity> t = n.cast<StructuredEntity>();
-
- std::string childDescriptorName = t->getDescriptor()->getName();
- if (childDescriptorName == "text") {
- Handle<DocumentPrimitive> primitive =
- t->getField()[0].cast<DocumentPrimitive>();
- if (primitive == nullptr) {
- throw OusiaException("Text field is not primitive!");
- }
- current->addChild(new xml::Text(
- mgr, current, primitive->getContent().asString()));
- }
- }
- return p;
-}
+ // After the content has been transformed, we serialize it.
+ html->serialize(out, "<!DOCTYPE html>", pretty);
}
}
+} \ No newline at end of file
diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp
index b038a96..a1c0938 100644
--- a/src/plugins/html/DemoOutput.hpp
+++ b/src/plugins/html/DemoOutput.hpp
@@ -39,28 +39,6 @@ namespace ousia {
namespace html {
class DemoHTMLTransformer {
-private:
- /**
- * This transforms a section-like entity, namely book, section
- * and subsection, to an XHTML element, including its header. For the
- * children of the default field the respective transform function is
- * called recursively.
- */
- Rooted<xml::Element> transformSection(Handle<xml::Element> parent,
- Handle<StructuredEntity> sec);
- /**
- * This transforms a list entity, namely ul and ol to an XHTML element.
- * For each item, the transformParagraph function is called.
- */
- Rooted<xml::Element> transformList(Handle<xml::Element> parent,
- Handle<StructuredEntity> list);
- /**
- * This transforms a paragraph-like entity, namely heading, item and
- * paragraph, to an XHTML element including the text and the anchors
- * contained.
- */
- Rooted<xml::Element> transformParagraph(Handle<xml::Element> parent,
- Handle<StructuredEntity> par);
public:
/**
@@ -80,10 +58,11 @@ public:
* @param doc is a Document using concepts of the book, headings,
* emphasis and lists ontologies but no other.
* @param out is the output stream the data shall be written to.
+ * @param logger is a logger instances for errors.
* @param pretty is a flag that manipulates whether newlines and tabs are
* used.
*/
- void writeHTML(Handle<Document> doc, std::ostream &out, bool pretty = true);
+ void writeHTML(Handle<Document> doc, std::ostream &out, Logger& logger, bool pretty = true);
};
}
}
diff --git a/test/core/model/DocumentTest.cpp b/test/core/model/DocumentTest.cpp
index 8ae9475..135ba19 100644
--- a/test/core/model/DocumentTest.cpp
+++ b/test/core/model/DocumentTest.cpp
@@ -249,7 +249,8 @@ TEST(Document, construct)
TEST(Document, validate)
{
// Let's start with a trivial ontology and a trivial document.
- TerminalLogger logger{std::cerr, true};
+// TerminalLogger logger{std::cerr, true};
+ Logger logger;
Manager mgr{1};
Rooted<SystemTypesystem> sys{new SystemTypesystem(mgr)};
Rooted<Ontology> ontology{new Ontology(mgr, sys, "trivial")};
@@ -403,7 +404,7 @@ TEST(Document, validate)
new AnnotationClass(mgr, "anno", ontology)};
{
/*
- * Create a valid document in itself.
+ * Create a document with anchors.
*/
Rooted<Document> doc{new Document(mgr, "myDoc.oxd")};
doc->referenceOntology(ontology);
@@ -416,7 +417,8 @@ TEST(Document, validate)
new DocumentPrimitive(mgr, child, {2}, "int")};
Rooted<Anchor> end{new Anchor(mgr, root)};
ASSERT_EQ(ValidationState::UNKNOWN, doc->getValidationState());
- ASSERT_TRUE(doc->validate(logger));
+ // This should be invalid due to disconnected Anchors
+ ASSERT_FALSE(doc->validate(logger));
// then add an AnnotationEntity without Anchors.
Rooted<AnnotationEntity> anno =
buildAnnotationEntity(doc, logger, {"anno"}, nullptr, nullptr);
diff --git a/test/core/model/TestAdvanced.hpp b/test/core/model/TestAdvanced.hpp
index c92effa..58eb965 100644
--- a/test/core/model/TestAdvanced.hpp
+++ b/test/core/model/TestAdvanced.hpp
@@ -45,9 +45,9 @@ static Rooted<StructuredClass> resolveDescriptor(Handle<Ontology> ontology,
* This constructs the "heading" ontology given the book ontology.
*/
static Rooted<Ontology> constructHeadingOntology(Manager &mgr,
- Handle<SystemTypesystem> sys,
- Handle<Ontology> bookOntology,
- Logger &logger)
+ Handle<SystemTypesystem> sys,
+ Handle<Ontology> bookOntology,
+ Logger &logger)
{
// set up ontology node.
Rooted<Ontology> ontology{new Ontology(mgr, sys, "headings")};
@@ -78,9 +78,9 @@ static Rooted<Ontology> constructHeadingOntology(Manager &mgr,
* This constructs the "list" ontology given the book ontology.
*/
static Rooted<Ontology> constructListOntology(Manager &mgr,
- Handle<SystemTypesystem> sys,
- Handle<Ontology> bookOntology,
- Logger &logger)
+ Handle<SystemTypesystem> sys,
+ Handle<Ontology> bookOntology,
+ Logger &logger)
{
// set up ontology node.
Rooted<Ontology> ontology{new Ontology(mgr, sys, "list")};
@@ -97,7 +97,8 @@ static Rooted<Ontology> constructListOntology(Manager &mgr,
for (auto &listType : listTypes) {
Rooted<StructuredClass> list{new StructuredClass(
mgr, listType, ontology, Cardinality::any(), p, false)};
- Rooted<FieldDescriptor> list_field{new FieldDescriptor(mgr, list)};
+ Rooted<FieldDescriptor> list_field =
+ list->createFieldDescriptor(logger).first;
list_field->addChild(item);
}
return ontology;
@@ -107,15 +108,17 @@ static Rooted<Ontology> constructListOntology(Manager &mgr,
* This constructs the "emphasis" ontology.
*/
static Rooted<Ontology> constructEmphasisOntology(Manager &mgr,
- Handle<SystemTypesystem> sys,
- Logger &logger)
+ Handle<SystemTypesystem> sys,
+ Logger &logger)
{
// set up ontology node.
Rooted<Ontology> ontology{new Ontology(mgr, sys, "emphasis")};
// create AnnotationClasses
- Rooted<AnnotationClass> em{new AnnotationClass(mgr, "emphasized", ontology)};
+ Rooted<AnnotationClass> em{
+ new AnnotationClass(mgr, "emphasized", ontology)};
- Rooted<AnnotationClass> strong{new AnnotationClass(mgr, "strong", ontology)};
+ Rooted<AnnotationClass> strong{
+ new AnnotationClass(mgr, "strong", ontology)};
return ontology;
}
diff --git a/test/plugins/html/DemoOutputTest.cpp b/test/plugins/html/DemoOutputTest.cpp
index debb667..1c54a14 100644
--- a/test/plugins/html/DemoOutputTest.cpp
+++ b/test/plugins/html/DemoOutputTest.cpp
@@ -24,10 +24,13 @@
#include <plugins/html/DemoOutput.hpp>
#include <core/common/Rtti.hpp>
+#include <plugins/filesystem/FileLocator.hpp>
#include <core/frontend/TerminalLogger.hpp>
#include <core/model/Document.hpp>
#include <core/model/Ontology.hpp>
+#include <formats/osxml/OsxmlParser.hpp>
+#include <core/StandaloneEnvironment.hpp>
#include <core/model/TestAdvanced.hpp>
#include <core/model/TestOntology.hpp>
@@ -59,7 +62,7 @@ TEST(DemoHTMLTransformer, writeHTML)
// we can only do a rough check here.
DemoHTMLTransformer transformer;
std::stringstream out;
- transformer.writeHTML(doc, out);
+ transformer.writeHTML(doc, out, logger);
const std::string res = out.str();
ASSERT_FALSE(res == "");
ASSERT_TRUE(res.find("Was ist Aufklärung?") != std::string::npos);
@@ -106,12 +109,57 @@ TEST(DemoHTMLTransformer, AnnotationProcessing)
// Check serialization.
DemoHTMLTransformer transformer;
std::stringstream out;
- transformer.writeHTML(doc, out, false);
+ transformer.writeHTML(doc, out, logger, false);
const std::string res = out.str();
// In HTML the overlapping structure must be serialized as follows:
ASSERT_TRUE(
res.find("<em>bla<strong>blub</strong></em><strong>bla</strong>") !=
std::string::npos);
}
+
+struct XmlStandaloneEnvironment : public StandaloneEnvironment {
+ OsxmlParser parser;
+ FileLocator fileLocator;
+
+ XmlStandaloneEnvironment(ConcreteLogger &logger)
+ : StandaloneEnvironment(logger)
+ {
+ fileLocator.addDefaultSearchPaths();
+ fileLocator.addUnittestSearchPath("osxmlparser");
+
+ registry.registerDefaultExtensions();
+ registry.registerParser({"text/vnd.ousia.osml+xml"},
+ {&RttiTypes::Node}, &parser);
+ registry.registerResourceLocator(&fileLocator);
+ }
+};
+
+TEST(DemoHTMLTransformer, pipelineTest)
+{
+ // Construct Manager
+ TerminalLogger logger{std::cerr, true};
+ XmlStandaloneEnvironment env(logger);
+ Rooted<Node> book_document_node =
+ env.parse("complex_book.osxml", "", "", RttiSet{&RttiTypes::Document});
+ ASSERT_FALSE(logger.hasError());
+ ASSERT_FALSE(book_document_node == nullptr);
+ ASSERT_TRUE(book_document_node->isa(&RttiTypes::Document));
+ Rooted<Document> doc = book_document_node.cast<Document>();
+ ASSERT_TRUE(doc->validate(logger));
+ ASSERT_FALSE(logger.hasError());
+
+ // we can only do a rough check here.
+ DemoHTMLTransformer transformer;
+ std::stringstream out;
+ transformer.writeHTML(doc, out, logger);
+ const std::string res = out.str();
+ ASSERT_FALSE(res == "");
+ ASSERT_TRUE(res.find("Was ist Aufklärung?") != std::string::npos);
+ ASSERT_TRUE(res.find(
+ "Aufklärung ist der Ausgang des Menschen aus seiner "
+ "selbstverschuldeten Unmündigkeit") != std::string::npos);
+ ASSERT_TRUE(res.find("Sapere aude!") != std::string::npos);
+}
+
}
}
diff --git a/testdata/osxmlparser/affiliation_typesystem.osxml b/testdata/osxmlparser/affiliation_typesystem.osxml
new file mode 100644
index 0000000..d84dc30
--- /dev/null
+++ b/testdata/osxmlparser/affiliation_typesystem.osxml
@@ -0,0 +1,10 @@
+<?xml version="1.0" standalone="yes"?>
+<typesystem name="affiliation">
+ <struct name="affiliation">
+ <field name="workgroup" type="string"/>
+ <field name="departement" type="string"/>
+ <field name="institution" type="string"/>
+ </struct>
+
+ <constant name="citec.sc" type="affiliation" value="[workgroup=Semantic Computing Group,departement=Center of Excellence Cognitive Interaction Technology (CITEC), institution=Bielefeld University]"/>
+</typesystem>
diff --git a/testdata/osxmlparser/bibliography_ontology.osxml b/testdata/osxmlparser/bibliography_ontology.osxml
new file mode 100644
index 0000000..0333133
--- /dev/null
+++ b/testdata/osxmlparser/bibliography_ontology.osxml
@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<ontology name="bibliography">
+
+ <import rel="ontology" src="./book_ontology"/>
+ <import rel="ontology" src="./meta_ontology"/>
+
+ <struct name="bibliography" transparent="true">
+ <field>
+ <childRef ref="bibEntry"/>
+ </field>
+ <parentRef ref="book">
+ <field name="bibliography" subtree="true"/>
+ </parentRef>
+ </struct>
+ <struct name="bibEntry">
+ <field>
+ <childRef ref="meta.authors"/>
+ <childRef ref="title"/>
+ <childRef ref="year"/>
+ <childRef ref="journal"/>
+ <childRef ref="pages"/>
+ <childRef ref="location"/>
+ </field>
+ </struct>
+ <struct name="title" cardinality="{1}">
+ <primitive type="string"/>
+ </struct>
+ <struct name="year" cardinality="{1}">
+ <primitive type="int"/>
+ </struct>
+ <struct name="journal" cardinality="{0-1}">
+ <!-- here some kind of database reference would be better -->
+ <primitive type="string"/>
+ </struct>
+ <struct name="pages" cardinality="{0-1}">
+ <primitive type="cardinality"/>
+ </struct>
+ <struct name="location" cardinality="{0-1}">
+ <!-- here some kind of database reference would be better -->
+ <primitive type="string"/>
+ </struct>
+</ontology>
diff --git a/testdata/osxmlparser/complex_book.osxml b/testdata/osxmlparser/complex_book.osxml
index 222b146..8eb5f26 100644
--- a/testdata/osxmlparser/complex_book.osxml
+++ b/testdata/osxmlparser/complex_book.osxml
@@ -1,13 +1,14 @@
<?xml version="1.0"?>
<document>
- <import rel="ontology" src="book"/>
- <import rel="ontology" src="headings"/>
- <import rel="ontology" src="meta"/>
- <import rel="ontology" src="bibliography"/>
- <!--<import rel="ontology" src="emphasis.oxm"/>
- <import rel="ontology" src="comments.oxm"/>
- <alias tag="paragraph" aka="p"/>
+ <import rel="ontology" src="./book_ontology"/>
+ <import rel="ontology" src="./headings_ontology"/>
+ <import rel="ontology" src="./meta_ontology"/>
+ <import rel="ontology" src="./bibliography_ontology"/>
+ <import rel="ontology" src="./lists_ontology"/>
+ <import rel="ontology" src="./emphasis_ontology"/>
+ <import rel="ontology" src="./comments_ontology"/>
+ <!--<alias tag="paragraph" aka="p"/>
<alias tag="emphasized" aka="em"/>-->
<book>
@@ -87,15 +88,20 @@
<chapter name="content">
<heading>Was ist Aufklärung?</heading>
- Aufklärung ist der Ausgang des Menschen aus seiner
- selbstverschuldeten Unmündigkeit. Unmündigkeit ist
+ <a:start:strong/>Aufklärung ist der Ausgang des Menschen aus seiner
+ selbstverschuldeten Unmündigkeit<a:end:strong/>.
+ <ul>
+ <item><a:start:emphasized/>Unmündigkeit<a:end:emphasized/> ist
das Unvermögen, sich seines Verstandes ohne Leitung eines anderen zu
- bedienen. Selbstverschuldet ist diese Unmündigkeit, wenn
- die Ursache derselben nicht am Mangel des Verstandes, sondern der
- Entschließung und des Mutes liegt, sich seiner ohne Leitung eines
- andern zu bedienen.
- Sapere aude! Habe Mut, dich deines eigenen Verstandes zu
- bedienen! ist also der Wahlspruch der Aufklärung.
+ bedienen.</item>
+ <item><a:start:emphasized/>Selbstverschuldet<a:end:emphasized/>
+ ist diese Unmündigkeit, wenn die Ursache derselben nicht am Mangel
+ des Verstandes, sondern der Entschließung und des Mutes liegt, sich
+ seiner ohne Leitung eines andern zu bedienen.</item>
+ </ul>
+ <a:start:strong/>Sapere aude!<a:end:strong/> Habe Mut, dich deines
+ eigenen Verstandes zu bedienen! ist also der Wahlspruch der
+ Aufklärung.
</chapter>
</book>
</document>
diff --git a/testdata/osxmlparser/email_typesystem.osxml b/testdata/osxmlparser/email_typesystem.osxml
new file mode 100644
index 0000000..325f89a
--- /dev/null
+++ b/testdata/osxmlparser/email_typesystem.osxml
@@ -0,0 +1,8 @@
+<?xml version="1.0" standalone="yes"?>
+<typesystem name="email">
+ <struct name="email">
+ <field name="local" type="string"/>
+ <field name="domainName" type="string"/>
+ <field name="domainSuffix" type="string"/>
+ </struct>
+</typesystem>
diff --git a/testdata/osxmlparser/emphasis_ontology.osxml b/testdata/osxmlparser/emphasis_ontology.osxml
new file mode 100644
index 0000000..0fdd63a
--- /dev/null
+++ b/testdata/osxmlparser/emphasis_ontology.osxml
@@ -0,0 +1,5 @@
+<?xml version="1.0" standalone="yes"?>
+<ontology name="emphasis">
+ <annotation name="emphasized"/>
+ <annotation name="strong"/>
+</ontology>
diff --git a/testdata/osxmlparser/lists_ontology.osxml b/testdata/osxmlparser/lists_ontology.osxml
new file mode 100644
index 0000000..c18494a
--- /dev/null
+++ b/testdata/osxmlparser/lists_ontology.osxml
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<ontology name="lists">
+ <import rel="ontology" src="./book_ontology"/>
+
+ <struct name="ul" isa="book.paragraph">
+ <!-- Here we solve the problem of parents using the isa
+ mechanism, because a list may occur whereever a paragraph
+ may occur. However we do want to override the default field. -->
+ <field>
+ <childRef ref="item"/>
+ </field>
+ </struct>
+ <struct name="ol" isa="book.paragraph">
+ <!-- Here we solve the problem of parents using the isa
+ mechanism, because a list may occur whereever a paragraph
+ may occur. However we do want to override the default field. -->
+ <field>
+ <childRef ref="item"/>
+ </field>
+ </struct>
+ <struct name="item">
+ <fieldRef ref="book.paragraph.$default"/>
+ </struct>
+</ontology>
diff --git a/testdata/osxmlparser/meta_ontology.osxml b/testdata/osxmlparser/meta_ontology.osxml
new file mode 100644
index 0000000..eb392ce
--- /dev/null
+++ b/testdata/osxmlparser/meta_ontology.osxml
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+<ontology name="meta">
+
+ <import rel="typesystem" src="./affiliation_typesystem"/>
+ <import rel="typesystem" src="./email_typesystem"/>
+ <import rel="typesystem" src="./version_typesystem"/>
+ <import rel="ontology" src="./book_ontology"/>
+
+ <struct name="meta" cardinality="{1}" transparent="true">
+ <field>
+ <childRef ref="authors"/>
+ <childRef ref="version"/>
+ </field>
+ <parentRef ref="book">
+ <field name="meta" subtree="true" optional="true"/>
+ </parentRef>
+ <parentRef ref="chapter">
+ <field name="meta" subtree="true" optional="true"/>
+ </parentRef>
+ <!-- One could also include "article" and other things here -->
+ </struct>
+
+ <struct name="person">
+ <primitive subtree="true" name="firstName" type="string"/>
+ <primitive subtree="true" name="secondNames" type="string[]" optional="true"/>
+ <primitive subtree="true" name="lastName" type="string"/>
+ <primitive subtree="true" name="email" type="email" optional="true"/>
+ <primitive subtree="true" name="affiliation" type="affiliation" optional="true"/>
+ </struct>
+
+ <!-- wrapper author tag to allow specifying no authors whatsoever. But if
+ an author is specified it has to be at least one primary author. -->
+ <struct name="authors" transparent="true" cardinality="{0-1}">
+ <field>
+ <childRef ref="author"/>
+ </field>
+ </struct>
+
+ <!-- no explicit cardinality, because we might have multiple authors -->
+ <struct name="author" isa="person"/>
+
+ <!-- but we need at least one primary author -->
+ <struct name="primaryAuthor" cardinality="{>0}" isa="author"/>
+
+ <!-- version intermediate struct -->
+ <struct name="version" cardinality="{0-1}">
+ <primitive type="version"/>
+ </struct>
+</ontology>
diff --git a/testdata/osxmlparser/simple_annotation.osxml b/testdata/osxmlparser/simple_annotation.osxml
index 7eb1713..c0f426a 100644
--- a/testdata/osxmlparser/simple_annotation.osxml
+++ b/testdata/osxmlparser/simple_annotation.osxml
@@ -1,3 +1,4 @@
+<?xml version="1.0" standalone="yes"?>
<document>
<import rel="ontology" src="./book_ontology.osxml"/>
<import rel="ontology" src="./comments_ontology.osxml"/>
diff --git a/testdata/osxmlparser/simple_book.osxml b/testdata/osxmlparser/simple_book.osxml
index ec1e45a..ac90927 100644
--- a/testdata/osxmlparser/simple_book.osxml
+++ b/testdata/osxmlparser/simple_book.osxml
@@ -1,6 +1,6 @@
<?xml version="1.0"?>
<document>
- <import rel="ontology" src="book_ontology.osxml"/>
+ <import rel="ontology" src="./book_ontology.osxml"/>
<book>
This might be some introductory text or a dedication.
<!-- Note that a better version of the book ontology might specify
diff --git a/testdata/osxmlparser/version_typesystem.osxml b/testdata/osxmlparser/version_typesystem.osxml
new file mode 100644
index 0000000..0d52736
--- /dev/null
+++ b/testdata/osxmlparser/version_typesystem.osxml
@@ -0,0 +1,8 @@
+<?xml version="1.0" standalone="yes"?>
+<typesystem name="version">
+ <struct name="version">
+ <field name="major" type="int"/>
+ <field name="minor" type="int"/>
+ <field name="patch" type="int"/>
+ </struct>
+</typesystem>