summaryrefslogtreecommitdiff
path: root/src/plugins
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-15 00:27:11 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-15 00:27:11 +0100
commit253492406f04657fe71e6c0c6603496241280478 (patch)
tree5a9c1b785a5559025ff7d26bf9ed880ce98ff0ce /src/plugins
parent551b7be64f207845cb05b8ec593f9bf2d7f0c940 (diff)
parentb708dd4cce828c1089a18fefcc22804f7cdad908 (diff)
Merge branch 'master' into astoecke_parser_stack_new
Conflicts: application/CMakeLists.txt application/src/core/parser/stack/DocumentHandler.hpp application/src/core/parser/stack/DomainHandler.hpp application/src/core/parser/stack/ImportIncludeHandler.hpp
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/html/DemoOutput.cpp55
-rw-r--r--src/plugins/html/DemoOutput.hpp17
-rw-r--r--src/plugins/xml/XmlOutput.cpp116
-rw-r--r--src/plugins/xml/XmlOutput.hpp67
4 files changed, 207 insertions, 48 deletions
diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp
index d041c1d..3c54763 100644
--- a/src/plugins/html/DemoOutput.cpp
+++ b/src/plugins/html/DemoOutput.cpp
@@ -55,23 +55,13 @@ void DemoHTMLTransformer::writeHTML(Handle<Document> doc, std::ostream &out,
// So far was the "preamble". No we have to get to the document content.
- // build the start and end map for annotation processing.
- AnnoMap startMap;
- AnnoMap endMap;
- for (auto &a : doc->getAnnotations()) {
- // we assume uniquely IDed annotations, which should be checked in the
- // validation process.
- startMap.emplace(a->getStart()->getName(), a);
- endMap.emplace(a->getEnd()->getName(), a);
- }
-
// extract the book root node.
Rooted<StructuredEntity> root = doc->getRoot();
if (root->getDescriptor()->getName() != "book") {
throw OusiaException("The given documents root is no book node!");
}
// transform the book node.
- Rooted<xml::Element> book = transformSection(body, root, startMap, endMap);
+ Rooted<xml::Element> book = transformSection(body, root);
// add it as child to the body node.
body->addChild(book);
@@ -100,8 +90,7 @@ SectionType getSectionType(const std::string &name)
}
Rooted<xml::Element> DemoHTMLTransformer::transformSection(
- Handle<xml::Element> parent, Handle<StructuredEntity> section,
- AnnoMap &startMap, AnnoMap &endMap)
+ Handle<xml::Element> parent, Handle<StructuredEntity> section)
{
Manager &mgr = section->getManager();
// check the section type.
@@ -140,8 +129,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(
Rooted<xml::Element> h{new xml::Element{mgr, sec, headingclass}};
sec->addChild(h);
// extract the heading text, enveloped in a paragraph Element.
- Rooted<xml::Element> h_content =
- transformParagraph(h, heading, startMap, endMap);
+ Rooted<xml::Element> h_content = transformParagraph(h, heading);
// We omit the paragraph Element and add the children directly to the
// heading Element
for (auto &n : h_content->getChildren()) {
@@ -165,11 +153,11 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(
const std::string childDescriptorName = s->getDescriptor()->getName();
Rooted<xml::Element> child;
if (childDescriptorName == "paragraph") {
- child = transformParagraph(sec, s, startMap, endMap);
+ child = transformParagraph(sec, s);
} else if (childDescriptorName == "ul" || childDescriptorName == "ol") {
- child = transformList(sec, s, startMap, endMap);
+ child = transformList(sec, s);
} else {
- child = transformSection(sec, s, startMap, endMap);
+ child = transformSection(sec, s);
}
if (!child.isNull()) {
sec->addChild(child);
@@ -179,8 +167,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(
}
Rooted<xml::Element> DemoHTMLTransformer::transformList(
- Handle<xml::Element> parent, Handle<StructuredEntity> list,
- AnnoMap &startMap, AnnoMap &endMap)
+ Handle<xml::Element> parent, Handle<StructuredEntity> list)
{
Manager &mgr = list->getManager();
// create the list Element, which is either ul or ol (depends on descriptor)
@@ -195,8 +182,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformList(
Rooted<xml::Element> li{new xml::Element{mgr, l, "li"}};
l->addChild(li);
// extract the item text, enveloped in a paragraph Element.
- Rooted<xml::Element> li_content =
- transformParagraph(li, item, startMap, endMap);
+ Rooted<xml::Element> li_content = transformParagraph(li, item);
// We omit the paragraph Element and add the children directly to
// the list item
for (auto &n : li_content->getChildren()) {
@@ -229,8 +215,7 @@ static Rooted<xml::Element> openAnnotation(Manager &mgr, AnnoStack &opened,
}
Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(
- Handle<xml::Element> parent, Handle<StructuredEntity> par,
- AnnoMap &startMap, AnnoMap &endMap)
+ Handle<xml::Element> parent, Handle<StructuredEntity> par)
{
Manager &mgr = par->getManager();
// create the p Element
@@ -245,8 +230,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(
Rooted<xml::Element> strong{new xml::Element{mgr, p, "strong"}};
p->addChild(strong);
// extract the heading text, enveloped in a paragraph Element.
- Rooted<xml::Element> h_content =
- transformParagraph(strong, heading, startMap, endMap);
+ Rooted<xml::Element> h_content = transformParagraph(strong, heading);
// We omit the paragraph Element and add the children directly to the
// heading Element
for (auto &n : h_content->getChildren()) {
@@ -267,17 +251,15 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(
Rooted<xml::Element> current = p;
for (auto &n : par->getField()) {
if (n->isa(&RttiTypes::Anchor)) {
+ Rooted<Anchor> a = n.cast<Anchor>();
// check if this is a start Anchor.
- // here we assume, again, that the ids/names of anchors are unique.
- auto it = startMap.find(n->getName());
- if (it != startMap.end()) {
+ if (a->isStart()) {
// if we have a start anchor, we open an annotation element.
- current = openAnnotation(mgr, opened, it->second, current);
+ current =
+ openAnnotation(mgr, opened, a->getAnnotation(), current);
continue;
- }
- // check if this is an end Anchor.
- auto it2 = endMap.find(n->getName());
- if (it2 != endMap.end()) {
+ // check if this is an end Anchor.
+ } else if (a->isEnd()) {
/*
* Now it gets somewhat interesting: We have to close all
* tags that started after the one that is closed now and
@@ -289,7 +271,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(
Rooted<AnnotationEntity> closed = opened.top();
current = current->getParent();
opened.pop();
- while (closed->getEnd()->getName() != n->getName()) {
+ while (closed != a->getAnnotation()) {
/*
* We implicitly do close tags by climbing up the XML tree
* until we are at the right element.
@@ -312,6 +294,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(
current = openAnnotation(mgr, opened, closed, current);
}
}
+ // otherwise it is a disconnected Anchor and we can ignore it.
continue;
}
// if this is not an anchor, we can only handle text.
@@ -324,7 +307,7 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(
if (childDescriptorName == "text") {
Handle<DocumentPrimitive> primitive =
t->getField()[0].cast<DocumentPrimitive>();
- if (primitive.isNull()) {
+ if (primitive == nullptr) {
throw OusiaException("Text field is not primitive!");
}
current->addChild(new xml::Text(
diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp
index 67b7494..4367202 100644
--- a/src/plugins/html/DemoOutput.hpp
+++ b/src/plugins/html/DemoOutput.hpp
@@ -30,7 +30,6 @@
#ifndef _OUSIA_HTML_DEMO_OUTPUT_HPP_
#define _OUSIA_HTML_DEMO_OUTPUT_HPP_
-#include <map>
#include <ostream>
#include <core/model/Document.hpp>
@@ -39,8 +38,6 @@
namespace ousia {
namespace html {
-typedef std::map<std::string, Rooted<AnnotationEntity>> AnnoMap;
-
class DemoHTMLTransformer {
private:
/**
@@ -50,23 +47,20 @@ private:
* called recursively.
*/
Rooted<xml::Element> transformSection(Handle<xml::Element> parent,
- Handle<StructuredEntity> sec,
- AnnoMap &startMap, AnnoMap &endMap);
+ Handle<StructuredEntity> sec);
/**
* This transforms a list entity, namely ul and ol to an XHTML element.
* For each item, the transformParagraph function is called.
*/
Rooted<xml::Element> transformList(Handle<xml::Element> parent,
- Handle<StructuredEntity> list,
- AnnoMap &startMap, AnnoMap &endMap);
+ Handle<StructuredEntity> list);
/**
* This transforms a paragraph-like entity, namely heading, item and
* paragraph, to an XHTML element including the text and the anchors
- * contained. For anchor handling we require the AnnoMaps.
+ * contained.
*/
Rooted<xml::Element> transformParagraph(Handle<xml::Element> parent,
- Handle<StructuredEntity> par,
- AnnoMap &startMap, AnnoMap &endMap);
+ Handle<StructuredEntity> par);
public:
/**
@@ -89,8 +83,7 @@ public:
* @param pretty is a flag that manipulates whether newlines and tabs are
* used.
*/
- void writeHTML(Handle<Document> doc, std::ostream &out,
- bool pretty = true);
+ void writeHTML(Handle<Document> doc, std::ostream &out, bool pretty = true);
};
}
}
diff --git a/src/plugins/xml/XmlOutput.cpp b/src/plugins/xml/XmlOutput.cpp
new file mode 100644
index 0000000..00aae04
--- /dev/null
+++ b/src/plugins/xml/XmlOutput.cpp
@@ -0,0 +1,116 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "XmlOutput.hpp"
+
+#include <core/common/Variant.hpp>
+#include <core/common/VariantWriter.hpp>
+
+namespace ousia {
+namespace xml {
+
+void XmlTransformer::writeXml(Handle<Document> doc, std::ostream &out,
+ Logger &logger, bool pretty)
+{
+ Manager &mgr = doc->getManager();
+ // the outermost tag is the document itself.
+ Rooted<Element> document{new Element{mgr, {nullptr}, "document"}};
+ // then write imports for all references domains.
+ for (auto d : doc->getDomains()) {
+ Rooted<Element> import{
+ new Element{mgr,
+ document,
+ "import",
+ {{"rel", "domain"}, {"src", d->getName() + ".oxm"}}}};
+ document->addChild(import);
+ }
+ // transform the root element (and, using recursion, everything below it)
+ Rooted<Element> root =
+ transformStructuredEntity(document, doc->getRoot(), logger, pretty);
+ document->addChild(root);
+ // then serialize.
+ document->serialize(out, "<?xml version=\"1.0\"?>", pretty);
+}
+
+Rooted<Element> XmlTransformer::transformStructuredEntity(
+ Handle<Element> parent, Handle<StructuredEntity> s, Logger &logger,
+ bool pretty)
+{
+ Manager &mgr = parent->getManager();
+ // TODO: Is this the right handling?
+ // copy the attributes.
+ Variant attrs = s->getAttributes();
+ // build them.
+ s->getDescriptor()->getAttributesDescriptor()->build(attrs, logger);
+ // get the array representation.
+ Variant::arrayType attrArr = attrs.asArray();
+ // transform them to string key-value pairs.
+ NodeVector<Attribute> as =
+ s->getDescriptor()->getAttributesDescriptor()->getAttributes();
+ std::map<std::string, std::string> xmlAttrs;
+ for (size_t a = 0; a < as.size(); a++) {
+ xmlAttrs.emplace(as[a]->getName(),
+ VariantWriter::writeJsonToString(attrArr[a], pretty));
+ }
+ // create the XML element itself.
+ Rooted<Element> elem{
+ new Element{mgr, parent, s->getDescriptor()->getName(), xmlAttrs}};
+ // then transform the fields.
+ NodeVector<FieldDescriptor> fieldDescs =
+ s->getDescriptor()->getFieldDescriptors();
+ for (size_t f = 0; f < fieldDescs.size(); f++) {
+ NodeVector<StructureNode> field = s->getField(f);
+ Rooted<FieldDescriptor> fieldDesc = fieldDescs[f];
+ // if this is not the default node create an intermediate node for it.
+ Rooted<Element> par = elem;
+ if (fieldDesc->getFieldType() != FieldDescriptor::FieldType::TREE &&
+ !fieldDesc->isPrimitive()) {
+ par = Rooted<Element>{new Element(mgr, elem, fieldDesc->getName())};
+ elem->addChild(par);
+ }
+ for (auto c : field) {
+ // transform each child.
+ Rooted<Node> child;
+ if (c->isa(&RttiTypes::StructuredEntity)) {
+ child = transformStructuredEntity(
+ par, c.cast<StructuredEntity>(), logger, pretty);
+ } else if (c->isa(&RttiTypes::DocumentPrimitive)) {
+ child = transformPrimitive(par, c.cast<DocumentPrimitive>(),
+ logger, pretty);
+ }
+ // TODO: Handle Anchors
+ if (child != nullptr) {
+ par->addChild(child);
+ }
+ }
+ }
+ return elem;
+}
+Rooted<Text> XmlTransformer::transformPrimitive(Handle<Element> parent,
+ Handle<DocumentPrimitive> p,
+ Logger &logger, bool pretty)
+{
+ Manager &mgr = parent->getManager();
+ // transform the primitive content.
+ std::string textcontent =
+ VariantWriter::writeJsonToString(p->getContent(), pretty);
+ Rooted<Text> text{new Text(mgr, parent, textcontent)};
+ return text;
+}
+}
+} \ No newline at end of file
diff --git a/src/plugins/xml/XmlOutput.hpp b/src/plugins/xml/XmlOutput.hpp
new file mode 100644
index 0000000..51d03f9
--- /dev/null
+++ b/src/plugins/xml/XmlOutput.hpp
@@ -0,0 +1,67 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file XmlOutput.hpp
+ *
+ * This provices an Output generator to serialize any given document to XML.
+ *
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
+ */
+#ifndef _OUSIA_XML_OUTPUT_HPP_
+#define _OUSIA_XML_OUTPUT_HPP_
+
+#include <ostream>
+
+#include <core/model/Document.hpp>
+#include <core/XML.hpp>
+
+namespace ousia {
+namespace xml {
+
+class XmlTransformer {
+private:
+ Rooted<Element> transformStructuredEntity(Handle<Element> parent,
+ Handle<StructuredEntity> s,
+ Logger &logger, bool pretty);
+
+ Rooted<Text> transformPrimitive(Handle<Element> parent,
+ Handle<DocumentPrimitive> p,
+ Logger &logger, bool pretty);
+
+public:
+ /**
+ * This writes an XML serialization of the given document to the given
+ * output stream. The serialization is equivalent to the input XML format,
+ * safe for the domain references. TODO: Can we change this? If so: how?
+ * Note, though, that the serialization will not exploit transparency.
+ * TODO: Can we change that?
+ *
+ * @param doc is some Document.
+ * @param out is the output stream the XML serialization of the document
+ * shall be written to.
+ * @param logger is the logger errors shall be written to.
+ * @param pretty is a flag that manipulates whether newlines and tabs are
+ * used.
+ */
+ void writeXml(Handle<Document> doc, std::ostream &out, Logger &logger,
+ bool pretty);
+};
+}
+}
+#endif \ No newline at end of file