diff options
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/html/DemoOutput.cpp | 55 | ||||
-rw-r--r-- | src/plugins/html/DemoOutput.hpp | 17 | ||||
-rw-r--r-- | src/plugins/xml/XmlOutput.cpp | 116 | ||||
-rw-r--r-- | src/plugins/xml/XmlOutput.hpp | 67 |
4 files changed, 207 insertions, 48 deletions
diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp index d041c1d..3c54763 100644 --- a/src/plugins/html/DemoOutput.cpp +++ b/src/plugins/html/DemoOutput.cpp @@ -55,23 +55,13 @@ void DemoHTMLTransformer::writeHTML(Handle<Document> doc, std::ostream &out, // So far was the "preamble". No we have to get to the document content. - // build the start and end map for annotation processing. - AnnoMap startMap; - AnnoMap endMap; - for (auto &a : doc->getAnnotations()) { - // we assume uniquely IDed annotations, which should be checked in the - // validation process. - startMap.emplace(a->getStart()->getName(), a); - endMap.emplace(a->getEnd()->getName(), a); - } - // extract the book root node. Rooted<StructuredEntity> root = doc->getRoot(); if (root->getDescriptor()->getName() != "book") { throw OusiaException("The given documents root is no book node!"); } // transform the book node. - Rooted<xml::Element> book = transformSection(body, root, startMap, endMap); + Rooted<xml::Element> book = transformSection(body, root); // add it as child to the body node. body->addChild(book); @@ -100,8 +90,7 @@ SectionType getSectionType(const std::string &name) } Rooted<xml::Element> DemoHTMLTransformer::transformSection( - Handle<xml::Element> parent, Handle<StructuredEntity> section, - AnnoMap &startMap, AnnoMap &endMap) + Handle<xml::Element> parent, Handle<StructuredEntity> section) { Manager &mgr = section->getManager(); // check the section type. @@ -140,8 +129,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection( Rooted<xml::Element> h{new xml::Element{mgr, sec, headingclass}}; sec->addChild(h); // extract the heading text, enveloped in a paragraph Element. - Rooted<xml::Element> h_content = - transformParagraph(h, heading, startMap, endMap); + Rooted<xml::Element> h_content = transformParagraph(h, heading); // We omit the paragraph Element and add the children directly to the // heading Element for (auto &n : h_content->getChildren()) { @@ -165,11 +153,11 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection( const std::string childDescriptorName = s->getDescriptor()->getName(); Rooted<xml::Element> child; if (childDescriptorName == "paragraph") { - child = transformParagraph(sec, s, startMap, endMap); + child = transformParagraph(sec, s); } else if (childDescriptorName == "ul" || childDescriptorName == "ol") { - child = transformList(sec, s, startMap, endMap); + child = transformList(sec, s); } else { - child = transformSection(sec, s, startMap, endMap); + child = transformSection(sec, s); } if (!child.isNull()) { sec->addChild(child); @@ -179,8 +167,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection( } Rooted<xml::Element> DemoHTMLTransformer::transformList( - Handle<xml::Element> parent, Handle<StructuredEntity> list, - AnnoMap &startMap, AnnoMap &endMap) + Handle<xml::Element> parent, Handle<StructuredEntity> list) { Manager &mgr = list->getManager(); // create the list Element, which is either ul or ol (depends on descriptor) @@ -195,8 +182,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformList( Rooted<xml::Element> li{new xml::Element{mgr, l, "li"}}; l->addChild(li); // extract the item text, enveloped in a paragraph Element. - Rooted<xml::Element> li_content = - transformParagraph(li, item, startMap, endMap); + Rooted<xml::Element> li_content = transformParagraph(li, item); // We omit the paragraph Element and add the children directly to // the list item for (auto &n : li_content->getChildren()) { @@ -229,8 +215,7 @@ static Rooted<xml::Element> openAnnotation(Manager &mgr, AnnoStack &opened, } Rooted<xml::Element> DemoHTMLTransformer::transformParagraph( - Handle<xml::Element> parent, Handle<StructuredEntity> par, - AnnoMap &startMap, AnnoMap &endMap) + Handle<xml::Element> parent, Handle<StructuredEntity> par) { Manager &mgr = par->getManager(); // create the p Element @@ -245,8 +230,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph( Rooted<xml::Element> strong{new xml::Element{mgr, p, "strong"}}; p->addChild(strong); // extract the heading text, enveloped in a paragraph Element. - Rooted<xml::Element> h_content = - transformParagraph(strong, heading, startMap, endMap); + Rooted<xml::Element> h_content = transformParagraph(strong, heading); // We omit the paragraph Element and add the children directly to the // heading Element for (auto &n : h_content->getChildren()) { @@ -267,17 +251,15 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph( Rooted<xml::Element> current = p; for (auto &n : par->getField()) { if (n->isa(&RttiTypes::Anchor)) { + Rooted<Anchor> a = n.cast<Anchor>(); // check if this is a start Anchor. - // here we assume, again, that the ids/names of anchors are unique. - auto it = startMap.find(n->getName()); - if (it != startMap.end()) { + if (a->isStart()) { // if we have a start anchor, we open an annotation element. - current = openAnnotation(mgr, opened, it->second, current); + current = + openAnnotation(mgr, opened, a->getAnnotation(), current); continue; - } - // check if this is an end Anchor. - auto it2 = endMap.find(n->getName()); - if (it2 != endMap.end()) { + // check if this is an end Anchor. + } else if (a->isEnd()) { /* * Now it gets somewhat interesting: We have to close all * tags that started after the one that is closed now and @@ -289,7 +271,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph( Rooted<AnnotationEntity> closed = opened.top(); current = current->getParent(); opened.pop(); - while (closed->getEnd()->getName() != n->getName()) { + while (closed != a->getAnnotation()) { /* * We implicitly do close tags by climbing up the XML tree * until we are at the right element. @@ -312,6 +294,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph( current = openAnnotation(mgr, opened, closed, current); } } + // otherwise it is a disconnected Anchor and we can ignore it. continue; } // if this is not an anchor, we can only handle text. @@ -324,7 +307,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph( if (childDescriptorName == "text") { Handle<DocumentPrimitive> primitive = t->getField()[0].cast<DocumentPrimitive>(); - if (primitive.isNull()) { + if (primitive == nullptr) { throw OusiaException("Text field is not primitive!"); } current->addChild(new xml::Text( diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp index 67b7494..4367202 100644 --- a/src/plugins/html/DemoOutput.hpp +++ b/src/plugins/html/DemoOutput.hpp @@ -30,7 +30,6 @@ #ifndef _OUSIA_HTML_DEMO_OUTPUT_HPP_ #define _OUSIA_HTML_DEMO_OUTPUT_HPP_ -#include <map> #include <ostream> #include <core/model/Document.hpp> @@ -39,8 +38,6 @@ namespace ousia { namespace html { -typedef std::map<std::string, Rooted<AnnotationEntity>> AnnoMap; - class DemoHTMLTransformer { private: /** @@ -50,23 +47,20 @@ private: * called recursively. */ Rooted<xml::Element> transformSection(Handle<xml::Element> parent, - Handle<StructuredEntity> sec, - AnnoMap &startMap, AnnoMap &endMap); + Handle<StructuredEntity> sec); /** * This transforms a list entity, namely ul and ol to an XHTML element. * For each item, the transformParagraph function is called. */ Rooted<xml::Element> transformList(Handle<xml::Element> parent, - Handle<StructuredEntity> list, - AnnoMap &startMap, AnnoMap &endMap); + Handle<StructuredEntity> list); /** * This transforms a paragraph-like entity, namely heading, item and * paragraph, to an XHTML element including the text and the anchors - * contained. For anchor handling we require the AnnoMaps. + * contained. */ Rooted<xml::Element> transformParagraph(Handle<xml::Element> parent, - Handle<StructuredEntity> par, - AnnoMap &startMap, AnnoMap &endMap); + Handle<StructuredEntity> par); public: /** @@ -89,8 +83,7 @@ public: * @param pretty is a flag that manipulates whether newlines and tabs are * used. */ - void writeHTML(Handle<Document> doc, std::ostream &out, - bool pretty = true); + void writeHTML(Handle<Document> doc, std::ostream &out, bool pretty = true); }; } } diff --git a/src/plugins/xml/XmlOutput.cpp b/src/plugins/xml/XmlOutput.cpp new file mode 100644 index 0000000..00aae04 --- /dev/null +++ b/src/plugins/xml/XmlOutput.cpp @@ -0,0 +1,116 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "XmlOutput.hpp" + +#include <core/common/Variant.hpp> +#include <core/common/VariantWriter.hpp> + +namespace ousia { +namespace xml { + +void XmlTransformer::writeXml(Handle<Document> doc, std::ostream &out, + Logger &logger, bool pretty) +{ + Manager &mgr = doc->getManager(); + // the outermost tag is the document itself. + Rooted<Element> document{new Element{mgr, {nullptr}, "document"}}; + // then write imports for all references domains. + for (auto d : doc->getDomains()) { + Rooted<Element> import{ + new Element{mgr, + document, + "import", + {{"rel", "domain"}, {"src", d->getName() + ".oxm"}}}}; + document->addChild(import); + } + // transform the root element (and, using recursion, everything below it) + Rooted<Element> root = + transformStructuredEntity(document, doc->getRoot(), logger, pretty); + document->addChild(root); + // then serialize. + document->serialize(out, "<?xml version=\"1.0\"?>", pretty); +} + +Rooted<Element> XmlTransformer::transformStructuredEntity( + Handle<Element> parent, Handle<StructuredEntity> s, Logger &logger, + bool pretty) +{ + Manager &mgr = parent->getManager(); + // TODO: Is this the right handling? + // copy the attributes. + Variant attrs = s->getAttributes(); + // build them. + s->getDescriptor()->getAttributesDescriptor()->build(attrs, logger); + // get the array representation. + Variant::arrayType attrArr = attrs.asArray(); + // transform them to string key-value pairs. + NodeVector<Attribute> as = + s->getDescriptor()->getAttributesDescriptor()->getAttributes(); + std::map<std::string, std::string> xmlAttrs; + for (size_t a = 0; a < as.size(); a++) { + xmlAttrs.emplace(as[a]->getName(), + VariantWriter::writeJsonToString(attrArr[a], pretty)); + } + // create the XML element itself. + Rooted<Element> elem{ + new Element{mgr, parent, s->getDescriptor()->getName(), xmlAttrs}}; + // then transform the fields. + NodeVector<FieldDescriptor> fieldDescs = + s->getDescriptor()->getFieldDescriptors(); + for (size_t f = 0; f < fieldDescs.size(); f++) { + NodeVector<StructureNode> field = s->getField(f); + Rooted<FieldDescriptor> fieldDesc = fieldDescs[f]; + // if this is not the default node create an intermediate node for it. + Rooted<Element> par = elem; + if (fieldDesc->getFieldType() != FieldDescriptor::FieldType::TREE && + !fieldDesc->isPrimitive()) { + par = Rooted<Element>{new Element(mgr, elem, fieldDesc->getName())}; + elem->addChild(par); + } + for (auto c : field) { + // transform each child. + Rooted<Node> child; + if (c->isa(&RttiTypes::StructuredEntity)) { + child = transformStructuredEntity( + par, c.cast<StructuredEntity>(), logger, pretty); + } else if (c->isa(&RttiTypes::DocumentPrimitive)) { + child = transformPrimitive(par, c.cast<DocumentPrimitive>(), + logger, pretty); + } + // TODO: Handle Anchors + if (child != nullptr) { + par->addChild(child); + } + } + } + return elem; +} +Rooted<Text> XmlTransformer::transformPrimitive(Handle<Element> parent, + Handle<DocumentPrimitive> p, + Logger &logger, bool pretty) +{ + Manager &mgr = parent->getManager(); + // transform the primitive content. + std::string textcontent = + VariantWriter::writeJsonToString(p->getContent(), pretty); + Rooted<Text> text{new Text(mgr, parent, textcontent)}; + return text; +} +} +}
\ No newline at end of file diff --git a/src/plugins/xml/XmlOutput.hpp b/src/plugins/xml/XmlOutput.hpp new file mode 100644 index 0000000..51d03f9 --- /dev/null +++ b/src/plugins/xml/XmlOutput.hpp @@ -0,0 +1,67 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file XmlOutput.hpp + * + * This provices an Output generator to serialize any given document to XML. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ +#ifndef _OUSIA_XML_OUTPUT_HPP_ +#define _OUSIA_XML_OUTPUT_HPP_ + +#include <ostream> + +#include <core/model/Document.hpp> +#include <core/XML.hpp> + +namespace ousia { +namespace xml { + +class XmlTransformer { +private: + Rooted<Element> transformStructuredEntity(Handle<Element> parent, + Handle<StructuredEntity> s, + Logger &logger, bool pretty); + + Rooted<Text> transformPrimitive(Handle<Element> parent, + Handle<DocumentPrimitive> p, + Logger &logger, bool pretty); + +public: + /** + * This writes an XML serialization of the given document to the given + * output stream. The serialization is equivalent to the input XML format, + * safe for the domain references. TODO: Can we change this? If so: how? + * Note, though, that the serialization will not exploit transparency. + * TODO: Can we change that? + * + * @param doc is some Document. + * @param out is the output stream the XML serialization of the document + * shall be written to. + * @param logger is the logger errors shall be written to. + * @param pretty is a flag that manipulates whether newlines and tabs are + * used. + */ + void writeXml(Handle<Document> doc, std::ostream &out, Logger &logger, + bool pretty); +}; +} +} +#endif
\ No newline at end of file |