summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-01-08 19:41:54 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-01-08 19:41:54 +0100
commitb6197efcf5b97ddcaae99425748b2f2e74bde3c3 (patch)
treed2ccb792ad81f4e7a4594a037c35c20c4f82c6a1 /src
parentf0abafd4367b3b5c58dffdab69edce1d867942cb (diff)
parent33b92b72ed160f22dc627e841d5f84de4ebc0c6c (diff)
Merge branch 'master' of somweyr.de:ousia
Diffstat (limited to 'src')
-rw-r--r--src/core/CSS.hpp5
-rw-r--r--src/core/CodeTokenizer.hpp5
-rw-r--r--src/core/XML.cpp39
-rw-r--r--src/core/XML.hpp130
-rw-r--r--src/plugins/html/DemoOutput.cpp170
-rw-r--r--src/plugins/html/DemoOutput.hpp13
6 files changed, 273 insertions, 89 deletions
diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp
index 60aa91e..75ac73f 100644
--- a/src/core/CSS.hpp
+++ b/src/core/CSS.hpp
@@ -16,6 +16,11 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+/**
+ * @file CSS.hpp
+
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
+ */
#ifndef _OUSIA_CSS_HPP_
#define _OUSIA_CSS_HPP_
diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp
index 4190297..154f949 100644
--- a/src/core/CodeTokenizer.hpp
+++ b/src/core/CodeTokenizer.hpp
@@ -16,6 +16,11 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+/**
+ * @file CodeTokenizer.hpp
+
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
+ */
#ifndef _OUSIA_CODE_TOKENIZER_HPP_
#define _OUSIA_CODE_TOKENIZER_HPP_
diff --git a/src/core/XML.cpp b/src/core/XML.cpp
new file mode 100644
index 0000000..038cb86
--- /dev/null
+++ b/src/core/XML.cpp
@@ -0,0 +1,39 @@
+
+#include "XML.hpp"
+
+namespace ousia {
+namespace xml {
+
+void Node::serialize(std::ostream& out){
+ out << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
+ doSerialize(out, 0);
+}
+
+void Element::doSerialize(std::ostream& out, unsigned int tabdepth)
+{
+ for (unsigned int t = 0; t < tabdepth; t++) {
+ out << '\t';
+ }
+ out << '<' << name;
+ for (auto &a : attributes) {
+ out << ' ' << a.first << "=\"" << a.second << '\"';
+ }
+ out << ">\n";
+ for (auto &n : children) {
+ n->doSerialize(out, tabdepth + 1);
+ }
+ for (unsigned int t = 0; t < tabdepth; t++) {
+ out << '\t';
+ }
+ out << "</" << name << ">\n";
+}
+
+void Text::doSerialize(std::ostream& out, unsigned int tabdepth)
+{
+ for (unsigned int t = 0; t < tabdepth; t++) {
+ out << '\t';
+ }
+ out << text << '\n';
+}
+}
+}
diff --git a/src/core/XML.hpp b/src/core/XML.hpp
new file mode 100644
index 0000000..9ca124a
--- /dev/null
+++ b/src/core/XML.hpp
@@ -0,0 +1,130 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file XML.hpp
+ *
+ * This header provides XML classes to build an XML tree as well as functions
+ * to serialize that XMl tree to text. We do not support the full XML
+ * specification (like described here: http://www.w3.org/TR/REC-xml/ ) but only
+ * a small subset. This subset is defined by the following context-free grammar:
+ *
+ * NODE := ELEMENT | string
+ * ELEMENT := START NODES END
+ * NODES := NODE NODES | epsilon
+ * START := < name ATTRIBUTES >
+ * ATTRIBUTES := ATTRIBUTE ATTRIBUTES | epsilon
+ * ATTRIBUTE := key = "value"
+ * END := </ name >
+ *
+ * where the Axiom of a document is "Element". Note that we accept only a
+ * singular root element and no primitive text at root level. Attributes are
+ * key-value pairs of strings. Start and end tag name have to match.
+ *
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
+ */
+#ifndef _OUSIA_XML_HPP_
+#define _OUSIA_XML_HPP_
+
+#include <map>
+#include <ostream>
+#include <vector>
+
+#include <core/managed/Managed.hpp>
+#include <core/managed/ManagedContainer.hpp>
+
+namespace ousia {
+namespace xml {
+
+/**
+ * Node is the common super-class of actual elements (tag-bounded) and text.
+ * It specifies the pure virtual serialize() function that the subclasses
+ * implement.
+ */
+class Node : public Managed {
+
+public:
+ Node(Manager &mgr) : Managed(mgr){};
+
+ /**
+ * This method writes an XML prolog and the XML representing the current
+ * node, including all children, to the given output stream.
+ * @param out is the output stream the serialized data shall be written to.
+ */
+ void serialize(std::ostream &out);
+ /**
+ * This method just writes the XML representation of this node to the
+ * output stream, without the XML prolog.
+ *
+ * @param out the output stream the serialized data shall be written
+ * to.
+ * @param tabdepth the current tabdepth for prettier output.
+ */
+ virtual void doSerialize(std::ostream &out, unsigned int tabdepth) = 0;
+};
+
+/**
+ * An element in XML is defined as by the W3C:
+ *
+ * http://www.w3.org/TR/REC-xml/#sec-starttags
+ *
+ * For as an element necessarily has a name. It may have key-value pairs as
+ * attributes, where each key is unique (which is enforced by std::map).
+ * Additionally it might have other Nodes as children.
+ */
+class Element : public Node {
+public:
+ const std::string name;
+ std::map<std::string, std::string> attributes;
+ ManagedVector<Node> children;
+
+ Element(Manager &mgr, std::string name) : Node(mgr), name(std::move(name))
+ {
+ }
+
+ Element(Manager &mgr, std::string name,
+ std::map<std::string, std::string> attributes)
+ : Node(mgr), name(std::move(name)), attributes(std::move(attributes))
+ {
+ }
+
+ /**
+ * This writes the following to the output stream:
+ * * The start tag of this element including name and attributes
+ * * The serialized data of all children as ordered by the vector.
+ * * The end tag of this element.
+ *
+ */
+ void doSerialize(std::ostream &out, unsigned int tabdepth) override;
+};
+
+class Text : public Node {
+public:
+ const std::string text;
+
+ Text(Manager &mgr, std::string text) : Node(mgr), text(std::move(text)) {}
+
+ /**
+ * This just writes the text to the output.
+ *
+ */
+ void doSerialize(std::ostream &out, unsigned int tabdepth) override;
+};
+}
+}
+#endif
diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp
index 463a5d2..035ba25 100644
--- a/src/plugins/html/DemoOutput.cpp
+++ b/src/plugins/html/DemoOutput.cpp
@@ -18,6 +18,7 @@
#include <core/common/Exceptions.hpp>
#include <core/common/Rtti.hpp>
+#include <core/common/Variant.hpp>
#include "DemoOutput.hpp"
@@ -27,24 +28,35 @@ namespace html {
void DemoHTMLTransformer::writeHTML(Handle<model::Document> doc,
std::ostream &out)
{
- // write preamble
- out << "<?xml version=\" 1.0 \"?>\n";
- out << "<html>\n";
- out << "\t<head>\n";
- out << "\t\t<title>Test HTML Output for " << doc->getName() << "</title>\n";
- out << "\t</head>\n";
- out << "\t<body>\n";
-
- // look for the book root node.
+ Manager &mgr = doc->getManager();
+ // Create an XML object tree for the document first.
+ Rooted<xml::Element> html{new xml::Element{mgr, "html"}};
+ // add the head Element
+ Rooted<xml::Element> head{new xml::Element{mgr, "head"}};
+ html->children.push_back(head);
+ // add the title Element with Text
+ Rooted<xml::Element> title{new xml::Element{mgr, "title"}};
+ head->children.push_back(title);
+ title->children.push_back(
+ new xml::Text(mgr, "Test HTML Output for " + doc->getName()));
+ // add the body Element
+ Rooted<xml::Element> body{new xml::Element{mgr, "body"}};
+ html->children.push_back(body);
+
+ // So far was the "preamble". No we have to get to the document content.
+
+ // extract the book root node.
Rooted<model::StructuredEntity> root = doc->getRoot();
if (root->getDescriptor()->getName() != "book") {
throw OusiaException("The given documents root is no book node!");
}
- // write it to HTML.
- writeSection(root, out);
- // write end
- out << "\t</body>\n";
- out << "</html>\n";
+ // transform the book node.
+ Rooted<xml::Element> book = transformSection(root);
+ // add it as child to the body node.
+ body->children.push_back(book);
+
+ // After the content has been transformed, we serialize it.
+ html->serialize(out);
}
/**
@@ -67,61 +79,54 @@ SectionType getSectionType(const std::string &name)
}
}
-void DemoHTMLTransformer::writeSection(Handle<model::StructuredEntity> sec,
- std::ostream &out)
+Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::StructuredEntity> section)
{
+ Manager &mgr = section->getManager();
// check the section type.
- SectionType type = getSectionType(sec->getDescriptor()->getName());
+ const std::string secclass = section->getDescriptor()->getName();
+ SectionType type = getSectionType(secclass);
if (type == SectionType::NONE) {
// if the input node is no section, we ignore it.
- return;
+ return {nullptr};
}
+ // create a div tag containing the sections content.
+ Rooted<xml::Element> sec{
+ new xml::Element{mgr, "div", {{"class", secclass}}}};
// check if we have a heading.
- if (sec->hasField("heading")) {
- Rooted<model::StructuredEntity> heading = sec->getField("heading")[0];
- out << "\t\t";
+ if (section->hasField("heading")) {
+ Rooted<model::StructuredEntity> heading =
+ section->getField("heading")[0];
+ std::string headingclass;
switch (type) {
case SectionType::BOOK:
- out << "<h1>";
+ headingclass = "h1";
break;
case SectionType::CHAPTER:
- out << "<h2>";
+ headingclass = "h2";
break;
case SectionType::SECTION:
- out << "<h3>";
+ headingclass = "h3";
break;
case SectionType::SUBSECTION:
- out << "<h4>";
+ headingclass = "h4";
break;
case SectionType::NONE:
// this can not happen;
break;
}
- // the second field marks the heading. So let's write it.
- writeParagraph(heading, out, false);
- // close the heading tag.
- switch (type) {
- case SectionType::BOOK:
- out << "</h1>";
- break;
- case SectionType::CHAPTER:
- out << "</h2>";
- break;
- case SectionType::SECTION:
- out << "</h3>";
- break;
- case SectionType::SUBSECTION:
- out << "</h4>";
- break;
- case SectionType::NONE:
- // this can not happen;
- break;
+ Rooted<xml::Element> h{new xml::Element{mgr, headingclass}};
+ sec->children.push_back(h);
+ // extract the heading text, enveloped in a paragraph Element.
+ Rooted<xml::Element> h_content = transformParagraph(heading);
+ // We omit the paragraph Element and add the children directly to the
+ // heading Element
+ for (auto &n : h_content->children) {
+ h->children.push_back(n);
}
- out << "\n";
}
- // then write the section content recursively.
- NodeVector<model::StructuredEntity> mainField = sec->getField();
+ // Then we get all the children.
+ NodeVector<model::StructuredEntity> mainField = section->getField();
for (auto &n : mainField) {
/*
* Strictly speaking this is the wrong mechanism, because we would have
@@ -130,56 +135,59 @@ void DemoHTMLTransformer::writeSection(Handle<model::StructuredEntity> sec,
* to be a listener structure of transformations that check if they can
* transform this specific node.
*/
- std::string childDescriptorName = n->getDescriptor()->getName();
+ const std::string childDescriptorName = n->getDescriptor()->getName();
+ Rooted<xml::Element> child;
if (childDescriptorName == "paragraph") {
- writeParagraph(n, out);
+ child = transformParagraph(n);
// TODO: Implement
// } else if(childDescriptorName == "ul"){
// writeList(n, out);
} else {
- writeSection(n, out);
+ child = transformSection(n);
+ }
+ if (!child.isNull()) {
+ sec->children.push_back(child);
}
}
+ return sec;
}
-void DemoHTMLTransformer::writeParagraph(Handle<model::StructuredEntity> par,
- std::ostream &out, bool writePTags)
+Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(Handle<model::StructuredEntity> par)
{
- // validate descriptor.
- if (par->getDescriptor()->getName() != "paragraph") {
- throw OusiaException("Expected paragraph!");
- }
+ Manager &mgr = par->getManager();
+ // create the p xml::Element
+ Rooted<xml::Element> p{new xml::Element{mgr, "p"}};
+
// check if we have a heading.
if (par->hasField("heading")) {
Rooted<model::StructuredEntity> heading = par->getField("heading")[0];
- // start the heading tag
- out << "\t\t<h5>";
- // the second field marks the heading. So let's write it.
- writeParagraph(heading, out, false);
- // close the heading tag.
- out << "</h5>\n";
- }
- // write start tag
- if (writePTags) {
- out << "\t\t<p>";
- }
- // write content
- // TODO: What about emphasis?
- for (auto &text : par->getField()) {
- if (text->getDescriptor()->getName() != "text") {
- throw OusiaException("Expected text!");
+ // put the heading in a strong xml::Element.
+ Rooted<xml::Element> strong{new xml::Element{mgr, "strong"}};
+ p->children.push_back(strong);
+ // extract the heading text, enveloped in a paragraph Element.
+ Rooted<xml::Element> h_content = transformParagraph(heading);
+ // We omit the paragraph Element and add the children directly to the
+ // heading Element
+ for (auto &n : h_content->children) {
+ strong->children.push_back(n);
}
- Handle<model::DocumentPrimitive> primitive =
- text->getField()[0].cast<model::DocumentPrimitive>();
- if (primitive.isNull()) {
- throw OusiaException("Text field is not primitive!");
- }
- out << primitive->getContent().asString();
}
- // write end tag
- if (writePTags) {
- out << "</p>\n";
+
+ // transform paragraph children to XML as well
+ for (auto &n : par->getField()) {
+ std::string childDescriptorName = n->getDescriptor()->getName();
+ if (childDescriptorName == "text") {
+ Handle<model::DocumentPrimitive> primitive =
+ n->getField()[0].cast<model::DocumentPrimitive>();
+ if (primitive.isNull()) {
+ throw OusiaException("Text field is not primitive!");
+ }
+ p->children.push_back(
+ new xml::Text(mgr, primitive->getContent().asString()));
+ }
+ // TODO: Handle non-text content
}
+ return p;
}
}
}
diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp
index ca9bcd2..70a5daa 100644
--- a/src/plugins/html/DemoOutput.hpp
+++ b/src/plugins/html/DemoOutput.hpp
@@ -33,6 +33,7 @@
#include <ostream>
#include <core/model/Document.hpp>
+#include <core/XML.hpp>
namespace ousia {
namespace html {
@@ -40,15 +41,11 @@ namespace html {
class DemoHTMLTransformer {
private:
/**
- * This method is to be called recursively to write a chapter, section or
- * subsection to HTML.
+ * These methods are called recursively to transform a document to an XML
+ * tree.
*/
- void writeSection(Handle<model::StructuredEntity> sec, std::ostream& out);
- /**
- * This method is to be called recursively to write a paragraph to HTML.
- */
- void writeParagraph(Handle<model::StructuredEntity> par, std::ostream& out,
- bool writePTags = true);
+ Rooted<xml::Element> transformSection(Handle<model::StructuredEntity> sec);
+ Rooted<xml::Element> transformParagraph(Handle<model::StructuredEntity> par);
/**
* This method is to be called recursively to write a list to HTML.
* TODO: Implement