diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/core/CSS.hpp | 5 | ||||
-rw-r--r-- | src/core/CodeTokenizer.hpp | 5 | ||||
-rw-r--r-- | src/core/XML.cpp | 39 | ||||
-rw-r--r-- | src/core/XML.hpp | 130 | ||||
-rw-r--r-- | src/plugins/html/DemoOutput.cpp | 170 | ||||
-rw-r--r-- | src/plugins/html/DemoOutput.hpp | 13 | ||||
-rw-r--r-- | test/core/XMLTest.cpp | 78 |
8 files changed, 353 insertions, 89 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 10e43ea..e67e2be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,6 +110,7 @@ ADD_LIBRARY(ousia_core src/core/Registry src/core/ResourceLocator src/core/Tokenizer + src/core/XML src/core/common/CharReader src/core/common/Exceptions src/core/common/Function @@ -194,6 +195,7 @@ IF(TEST) test/core/RegistryTest test/core/ResourceLocatorTest test/core/TokenizerTest + test/core/XMLTest test/core/common/CharReaderTest test/core/common/FunctionTest test/core/common/LoggerTest diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp index 60aa91e..75ac73f 100644 --- a/src/core/CSS.hpp +++ b/src/core/CSS.hpp @@ -16,6 +16,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +/** + * @file CSS.hpp + + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ #ifndef _OUSIA_CSS_HPP_ #define _OUSIA_CSS_HPP_ diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp index 4190297..154f949 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/core/CodeTokenizer.hpp @@ -16,6 +16,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +/** + * @file CodeTokenizer.hpp + + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ #ifndef _OUSIA_CODE_TOKENIZER_HPP_ #define _OUSIA_CODE_TOKENIZER_HPP_ diff --git a/src/core/XML.cpp b/src/core/XML.cpp new file mode 100644 index 0000000..038cb86 --- /dev/null +++ b/src/core/XML.cpp @@ -0,0 +1,39 @@ + +#include "XML.hpp" + +namespace ousia { +namespace xml { + +void Node::serialize(std::ostream& out){ + out << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; + doSerialize(out, 0); +} + +void Element::doSerialize(std::ostream& out, unsigned int tabdepth) +{ + for (unsigned int t = 0; t < tabdepth; t++) { + out << '\t'; + } + out << '<' << name; + for (auto &a : attributes) { + out << ' ' << a.first << "=\"" << a.second << '\"'; + } + out << ">\n"; + for (auto &n : children) { + n->doSerialize(out, tabdepth + 1); + } + for (unsigned int t = 0; t < tabdepth; t++) { + out << '\t'; + } + out << "</" << name << ">\n"; +} + +void Text::doSerialize(std::ostream& out, unsigned int tabdepth) +{ + for (unsigned int t = 0; t < tabdepth; t++) { + out << '\t'; + } + out << text << '\n'; +} +} +} diff --git a/src/core/XML.hpp b/src/core/XML.hpp new file mode 100644 index 0000000..9ca124a --- /dev/null +++ b/src/core/XML.hpp @@ -0,0 +1,130 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file XML.hpp + * + * This header provides XML classes to build an XML tree as well as functions + * to serialize that XMl tree to text. We do not support the full XML + * specification (like described here: http://www.w3.org/TR/REC-xml/ ) but only + * a small subset. This subset is defined by the following context-free grammar: + * + * NODE := ELEMENT | string + * ELEMENT := START NODES END + * NODES := NODE NODES | epsilon + * START := < name ATTRIBUTES > + * ATTRIBUTES := ATTRIBUTE ATTRIBUTES | epsilon + * ATTRIBUTE := key = "value" + * END := </ name > + * + * where the Axiom of a document is "Element". Note that we accept only a + * singular root element and no primitive text at root level. Attributes are + * key-value pairs of strings. Start and end tag name have to match. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ +#ifndef _OUSIA_XML_HPP_ +#define _OUSIA_XML_HPP_ + +#include <map> +#include <ostream> +#include <vector> + +#include <core/managed/Managed.hpp> +#include <core/managed/ManagedContainer.hpp> + +namespace ousia { +namespace xml { + +/** + * Node is the common super-class of actual elements (tag-bounded) and text. + * It specifies the pure virtual serialize() function that the subclasses + * implement. + */ +class Node : public Managed { + +public: + Node(Manager &mgr) : Managed(mgr){}; + + /** + * This method writes an XML prolog and the XML representing the current + * node, including all children, to the given output stream. + * @param out is the output stream the serialized data shall be written to. + */ + void serialize(std::ostream &out); + /** + * This method just writes the XML representation of this node to the + * output stream, without the XML prolog. + * + * @param out the output stream the serialized data shall be written + * to. + * @param tabdepth the current tabdepth for prettier output. + */ + virtual void doSerialize(std::ostream &out, unsigned int tabdepth) = 0; +}; + +/** + * An element in XML is defined as by the W3C: + * + * http://www.w3.org/TR/REC-xml/#sec-starttags + * + * For as an element necessarily has a name. It may have key-value pairs as + * attributes, where each key is unique (which is enforced by std::map). + * Additionally it might have other Nodes as children. + */ +class Element : public Node { +public: + const std::string name; + std::map<std::string, std::string> attributes; + ManagedVector<Node> children; + + Element(Manager &mgr, std::string name) : Node(mgr), name(std::move(name)) + { + } + + Element(Manager &mgr, std::string name, + std::map<std::string, std::string> attributes) + : Node(mgr), name(std::move(name)), attributes(std::move(attributes)) + { + } + + /** + * This writes the following to the output stream: + * * The start tag of this element including name and attributes + * * The serialized data of all children as ordered by the vector. + * * The end tag of this element. + * + */ + void doSerialize(std::ostream &out, unsigned int tabdepth) override; +}; + +class Text : public Node { +public: + const std::string text; + + Text(Manager &mgr, std::string text) : Node(mgr), text(std::move(text)) {} + + /** + * This just writes the text to the output. + * + */ + void doSerialize(std::ostream &out, unsigned int tabdepth) override; +}; +} +} +#endif diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp index 463a5d2..035ba25 100644 --- a/src/plugins/html/DemoOutput.cpp +++ b/src/plugins/html/DemoOutput.cpp @@ -18,6 +18,7 @@ #include <core/common/Exceptions.hpp> #include <core/common/Rtti.hpp> +#include <core/common/Variant.hpp> #include "DemoOutput.hpp" @@ -27,24 +28,35 @@ namespace html { void DemoHTMLTransformer::writeHTML(Handle<model::Document> doc, std::ostream &out) { - // write preamble - out << "<?xml version=\" 1.0 \"?>\n"; - out << "<html>\n"; - out << "\t<head>\n"; - out << "\t\t<title>Test HTML Output for " << doc->getName() << "</title>\n"; - out << "\t</head>\n"; - out << "\t<body>\n"; - - // look for the book root node. + Manager &mgr = doc->getManager(); + // Create an XML object tree for the document first. + Rooted<xml::Element> html{new xml::Element{mgr, "html"}}; + // add the head Element + Rooted<xml::Element> head{new xml::Element{mgr, "head"}}; + html->children.push_back(head); + // add the title Element with Text + Rooted<xml::Element> title{new xml::Element{mgr, "title"}}; + head->children.push_back(title); + title->children.push_back( + new xml::Text(mgr, "Test HTML Output for " + doc->getName())); + // add the body Element + Rooted<xml::Element> body{new xml::Element{mgr, "body"}}; + html->children.push_back(body); + + // So far was the "preamble". No we have to get to the document content. + + // extract the book root node. Rooted<model::StructuredEntity> root = doc->getRoot(); if (root->getDescriptor()->getName() != "book") { throw OusiaException("The given documents root is no book node!"); } - // write it to HTML. - writeSection(root, out); - // write end - out << "\t</body>\n"; - out << "</html>\n"; + // transform the book node. + Rooted<xml::Element> book = transformSection(root); + // add it as child to the body node. + body->children.push_back(book); + + // After the content has been transformed, we serialize it. + html->serialize(out); } /** @@ -67,61 +79,54 @@ SectionType getSectionType(const std::string &name) } } -void DemoHTMLTransformer::writeSection(Handle<model::StructuredEntity> sec, - std::ostream &out) +Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::StructuredEntity> section) { + Manager &mgr = section->getManager(); // check the section type. - SectionType type = getSectionType(sec->getDescriptor()->getName()); + const std::string secclass = section->getDescriptor()->getName(); + SectionType type = getSectionType(secclass); if (type == SectionType::NONE) { // if the input node is no section, we ignore it. - return; + return {nullptr}; } + // create a div tag containing the sections content. + Rooted<xml::Element> sec{ + new xml::Element{mgr, "div", {{"class", secclass}}}}; // check if we have a heading. - if (sec->hasField("heading")) { - Rooted<model::StructuredEntity> heading = sec->getField("heading")[0]; - out << "\t\t"; + if (section->hasField("heading")) { + Rooted<model::StructuredEntity> heading = + section->getField("heading")[0]; + std::string headingclass; switch (type) { case SectionType::BOOK: - out << "<h1>"; + headingclass = "h1"; break; case SectionType::CHAPTER: - out << "<h2>"; + headingclass = "h2"; break; case SectionType::SECTION: - out << "<h3>"; + headingclass = "h3"; break; case SectionType::SUBSECTION: - out << "<h4>"; + headingclass = "h4"; break; case SectionType::NONE: // this can not happen; break; } - // the second field marks the heading. So let's write it. - writeParagraph(heading, out, false); - // close the heading tag. - switch (type) { - case SectionType::BOOK: - out << "</h1>"; - break; - case SectionType::CHAPTER: - out << "</h2>"; - break; - case SectionType::SECTION: - out << "</h3>"; - break; - case SectionType::SUBSECTION: - out << "</h4>"; - break; - case SectionType::NONE: - // this can not happen; - break; + Rooted<xml::Element> h{new xml::Element{mgr, headingclass}}; + sec->children.push_back(h); + // extract the heading text, enveloped in a paragraph Element. + Rooted<xml::Element> h_content = transformParagraph(heading); + // We omit the paragraph Element and add the children directly to the + // heading Element + for (auto &n : h_content->children) { + h->children.push_back(n); } - out << "\n"; } - // then write the section content recursively. - NodeVector<model::StructuredEntity> mainField = sec->getField(); + // Then we get all the children. + NodeVector<model::StructuredEntity> mainField = section->getField(); for (auto &n : mainField) { /* * Strictly speaking this is the wrong mechanism, because we would have @@ -130,56 +135,59 @@ void DemoHTMLTransformer::writeSection(Handle<model::StructuredEntity> sec, * to be a listener structure of transformations that check if they can * transform this specific node. */ - std::string childDescriptorName = n->getDescriptor()->getName(); + const std::string childDescriptorName = n->getDescriptor()->getName(); + Rooted<xml::Element> child; if (childDescriptorName == "paragraph") { - writeParagraph(n, out); + child = transformParagraph(n); // TODO: Implement // } else if(childDescriptorName == "ul"){ // writeList(n, out); } else { - writeSection(n, out); + child = transformSection(n); + } + if (!child.isNull()) { + sec->children.push_back(child); } } + return sec; } -void DemoHTMLTransformer::writeParagraph(Handle<model::StructuredEntity> par, - std::ostream &out, bool writePTags) +Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(Handle<model::StructuredEntity> par) { - // validate descriptor. - if (par->getDescriptor()->getName() != "paragraph") { - throw OusiaException("Expected paragraph!"); - } + Manager &mgr = par->getManager(); + // create the p xml::Element + Rooted<xml::Element> p{new xml::Element{mgr, "p"}}; + // check if we have a heading. if (par->hasField("heading")) { Rooted<model::StructuredEntity> heading = par->getField("heading")[0]; - // start the heading tag - out << "\t\t<h5>"; - // the second field marks the heading. So let's write it. - writeParagraph(heading, out, false); - // close the heading tag. - out << "</h5>\n"; - } - // write start tag - if (writePTags) { - out << "\t\t<p>"; - } - // write content - // TODO: What about emphasis? - for (auto &text : par->getField()) { - if (text->getDescriptor()->getName() != "text") { - throw OusiaException("Expected text!"); + // put the heading in a strong xml::Element. + Rooted<xml::Element> strong{new xml::Element{mgr, "strong"}}; + p->children.push_back(strong); + // extract the heading text, enveloped in a paragraph Element. + Rooted<xml::Element> h_content = transformParagraph(heading); + // We omit the paragraph Element and add the children directly to the + // heading Element + for (auto &n : h_content->children) { + strong->children.push_back(n); } - Handle<model::DocumentPrimitive> primitive = - text->getField()[0].cast<model::DocumentPrimitive>(); - if (primitive.isNull()) { - throw OusiaException("Text field is not primitive!"); - } - out << primitive->getContent().asString(); } - // write end tag - if (writePTags) { - out << "</p>\n"; + + // transform paragraph children to XML as well + for (auto &n : par->getField()) { + std::string childDescriptorName = n->getDescriptor()->getName(); + if (childDescriptorName == "text") { + Handle<model::DocumentPrimitive> primitive = + n->getField()[0].cast<model::DocumentPrimitive>(); + if (primitive.isNull()) { + throw OusiaException("Text field is not primitive!"); + } + p->children.push_back( + new xml::Text(mgr, primitive->getContent().asString())); + } + // TODO: Handle non-text content } + return p; } } } diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp index ca9bcd2..70a5daa 100644 --- a/src/plugins/html/DemoOutput.hpp +++ b/src/plugins/html/DemoOutput.hpp @@ -33,6 +33,7 @@ #include <ostream> #include <core/model/Document.hpp> +#include <core/XML.hpp> namespace ousia { namespace html { @@ -40,15 +41,11 @@ namespace html { class DemoHTMLTransformer { private: /** - * This method is to be called recursively to write a chapter, section or - * subsection to HTML. + * These methods are called recursively to transform a document to an XML + * tree. */ - void writeSection(Handle<model::StructuredEntity> sec, std::ostream& out); - /** - * This method is to be called recursively to write a paragraph to HTML. - */ - void writeParagraph(Handle<model::StructuredEntity> par, std::ostream& out, - bool writePTags = true); + Rooted<xml::Element> transformSection(Handle<model::StructuredEntity> sec); + Rooted<xml::Element> transformParagraph(Handle<model::StructuredEntity> par); /** * This method is to be called recursively to write a list to HTML. * TODO: Implement diff --git a/test/core/XMLTest.cpp b/test/core/XMLTest.cpp new file mode 100644 index 0000000..124b58d --- /dev/null +++ b/test/core/XMLTest.cpp @@ -0,0 +1,78 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <gtest/gtest.h> + +#include <core/XML.hpp> + +#include <sstream> + +namespace ousia { +namespace xml { + +TEST(Node, testSerialize) +{ + Manager mgr; + + Rooted<Element> html{new Element{mgr, "html"}}; + Rooted<Element> head{new Element{mgr, "head"}}; + html->children.push_back(head); + Rooted<Element> title{new Element{mgr, "title"}}; + head->children.push_back(title); + title->children.push_back(new Text(mgr, "my title")); + Rooted<Element> body{new Element{mgr, "body"}}; + html->children.push_back(body); + // This div element contains our text. + Rooted<Element> div{ + new Element{mgr, "div", {{"class", "content"}, {"id", "1"}}}}; + body->children.push_back(div); + Rooted<Element> p{new Element{mgr, "p"}}; + div->children.push_back(p); + p->children.push_back(new Text(mgr, "my text")); + Rooted<Element> p2{new Element{mgr, "p"}}; + div->children.push_back(p2); + p2->children.push_back(new Text(mgr, "my text")); + + // Now this is what we expect to see: + std::string expected{ + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<html>\n" + "\t<head>\n" + "\t\t<title>\n" + "\t\t\tmy title\n" + "\t\t</title>\n" + "\t</head>\n" + "\t<body>\n" + "\t\t<div class=\"content\" id=\"1\">\n" + "\t\t\t<p>\n" + "\t\t\t\tmy text\n" + "\t\t\t</p>\n" + "\t\t\t<p>\n" + "\t\t\t\tmy text\n" + "\t\t\t</p>\n" + "\t\t</div>\n" + "\t</body>\n" + "</html>\n"}; + + // check if it is what we see + std::stringstream ss; + html->serialize(ss); + ASSERT_EQ(expected, ss.str()); +} +} +} |