From fd8ce97afb16e17102ec8f109103ed334ad0e939 Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Tue, 6 Jan 2015 22:38:49 +0100 Subject: added XML classes including Serialization functions and added a test for it. I tried not to include Managed.hpp to prevent further overhead but I failed miserably. --- src/core/XML.hpp | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 src/core/XML.hpp (limited to 'src/core/XML.hpp') diff --git a/src/core/XML.hpp b/src/core/XML.hpp new file mode 100644 index 0000000..824d6ce --- /dev/null +++ b/src/core/XML.hpp @@ -0,0 +1,123 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file XML.hpp + * + * This header provides XML classes to build an XML tree as well as functions + * to serialize that XMl tree to text. We do not support the full XML + * specification (like described here: http://www.w3.org/TR/REC-xml/ ) but only + * a small subset. This subset is defined by the following context-free grammar: + * + * NODE := ELEMENT | string + * ELEMENT := START NODES END + * NODES := NODE NODES | epsilon + * START := < name ATTRIBUTES > + * ATTRIBUTES := ATTRIBUTE ATTRIBUTES | epsilon + * ATTRIBUTE := key = "value" + * END := + * + * where the Axiom of a document is "Element". Note that we accept only a + * singular root element and no primitive text at root level. Attributes are + * key-value pairs of strings. Start and end tag name have to match. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ +#ifndef _OUSIA_XML_HPP_ +#define _OUSIA_XML_HPP_ + +#include +#include +#include + +#include +#include + +namespace ousia { +namespace xml { + +/** + * Node is the common super-class of actual elements (tag-bounded) and text. + * It specifies the pure virtual serialize() function that the subclasses + * implement. + */ +class Node : public Managed { +public: + Node(Manager &mgr) : Managed(mgr){}; + + /** + * When called this Node should serialize its data and write it to the + * given output stream. In case of Elements this includes child elements. + * + * @param out the output stream the serialized data shall be written + * to. + * @param tabdepth the current tabdepth for prettier output. + */ + virtual void serialize(std::ostream &out, unsigned int tabdepth) = 0; +}; + +/** + * An element in XML is defined as by the W3C: + * + * http://www.w3.org/TR/REC-xml/#sec-starttags + * + * For as an element necessarily has a name. It may have key-value pairs as + * attributes, where each key is unique (which is enforced by std::map). + * Additionally it might have other Nodes as children. + */ +class Element : public Node { +public: + const std::string name; + std::map attributes; + ManagedVector children; + + Element(Manager &mgr, std::string name) : Node(mgr), name(std::move(name)) + { + } + + Element(Manager &mgr, std::string name, + std::map attributes) + : Node(mgr), name(std::move(name)), attributes(std::move(attributes)) + { + } + + /** + * This writes the following to the output stream: + * * The start tag of this element including name and attributes + * * The serialized data of all children as ordered by the vector. + * * The end tag of this element. + * + */ + void serialize(std::ostream &out, unsigned int tabdepth = 0) override; +}; + +class Text : public Node { +public: + const std::string text; + + Text(Manager &mgr, std::string text) : Node(mgr), text(std::move(text)) {} + + /** + * This just writes the text to the output. + * + */ + void serialize(std::ostream &out, unsigned int tabdepth = 0) override; +}; +} +} +#endif -- cgit v1.2.3 From 33b92b72ed160f22dc627e841d5f84de4ebc0c6c Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Thu, 8 Jan 2015 15:17:40 +0100 Subject: Changed the DemoOutput algorithm as suggested by Andreas: We first transform the document graph to an XML tree and the XML tree in turn has the methods to serialize to XML text, or, in this case, XHTML text. --- src/core/XML.cpp | 11 ++- src/core/XML.hpp | 17 ++-- src/plugins/html/DemoOutput.cpp | 170 +++++++++++++++++++++------------------- src/plugins/html/DemoOutput.hpp | 13 ++- test/core/XMLTest.cpp | 1 + 5 files changed, 115 insertions(+), 97 deletions(-) (limited to 'src/core/XML.hpp') diff --git a/src/core/XML.cpp b/src/core/XML.cpp index ad69ba1..038cb86 100644 --- a/src/core/XML.cpp +++ b/src/core/XML.cpp @@ -4,7 +4,12 @@ namespace ousia { namespace xml { -void Element::serialize(std::ostream& out, unsigned int tabdepth) +void Node::serialize(std::ostream& out){ + out << "\n"; + doSerialize(out, 0); +} + +void Element::doSerialize(std::ostream& out, unsigned int tabdepth) { for (unsigned int t = 0; t < tabdepth; t++) { out << '\t'; @@ -15,7 +20,7 @@ void Element::serialize(std::ostream& out, unsigned int tabdepth) } out << ">\n"; for (auto &n : children) { - n->serialize(out, tabdepth + 1); + n->doSerialize(out, tabdepth + 1); } for (unsigned int t = 0; t < tabdepth; t++) { out << '\t'; @@ -23,7 +28,7 @@ void Element::serialize(std::ostream& out, unsigned int tabdepth) out << "\n"; } -void Text::serialize(std::ostream& out, unsigned int tabdepth) +void Text::doSerialize(std::ostream& out, unsigned int tabdepth) { for (unsigned int t = 0; t < tabdepth; t++) { out << '\t'; diff --git a/src/core/XML.hpp b/src/core/XML.hpp index 824d6ce..9ca124a 100644 --- a/src/core/XML.hpp +++ b/src/core/XML.hpp @@ -57,18 +57,25 @@ namespace xml { * implement. */ class Node : public Managed { + public: Node(Manager &mgr) : Managed(mgr){}; /** - * When called this Node should serialize its data and write it to the - * given output stream. In case of Elements this includes child elements. + * This method writes an XML prolog and the XML representing the current + * node, including all children, to the given output stream. + * @param out is the output stream the serialized data shall be written to. + */ + void serialize(std::ostream &out); + /** + * This method just writes the XML representation of this node to the + * output stream, without the XML prolog. * * @param out the output stream the serialized data shall be written * to. * @param tabdepth the current tabdepth for prettier output. */ - virtual void serialize(std::ostream &out, unsigned int tabdepth) = 0; + virtual void doSerialize(std::ostream &out, unsigned int tabdepth) = 0; }; /** @@ -103,7 +110,7 @@ public: * * The end tag of this element. * */ - void serialize(std::ostream &out, unsigned int tabdepth = 0) override; + void doSerialize(std::ostream &out, unsigned int tabdepth) override; }; class Text : public Node { @@ -116,7 +123,7 @@ public: * This just writes the text to the output. * */ - void serialize(std::ostream &out, unsigned int tabdepth = 0) override; + void doSerialize(std::ostream &out, unsigned int tabdepth) override; }; } } diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp index 463a5d2..035ba25 100644 --- a/src/plugins/html/DemoOutput.cpp +++ b/src/plugins/html/DemoOutput.cpp @@ -18,6 +18,7 @@ #include #include +#include #include "DemoOutput.hpp" @@ -27,24 +28,35 @@ namespace html { void DemoHTMLTransformer::writeHTML(Handle doc, std::ostream &out) { - // write preamble - out << "\n"; - out << "\n"; - out << "\t\n"; - out << "\t\tTest HTML Output for " << doc->getName() << "\n"; - out << "\t\n"; - out << "\t\n"; - - // look for the book root node. + Manager &mgr = doc->getManager(); + // Create an XML object tree for the document first. + Rooted html{new xml::Element{mgr, "html"}}; + // add the head Element + Rooted head{new xml::Element{mgr, "head"}}; + html->children.push_back(head); + // add the title Element with Text + Rooted title{new xml::Element{mgr, "title"}}; + head->children.push_back(title); + title->children.push_back( + new xml::Text(mgr, "Test HTML Output for " + doc->getName())); + // add the body Element + Rooted body{new xml::Element{mgr, "body"}}; + html->children.push_back(body); + + // So far was the "preamble". No we have to get to the document content. + + // extract the book root node. Rooted root = doc->getRoot(); if (root->getDescriptor()->getName() != "book") { throw OusiaException("The given documents root is no book node!"); } - // write it to HTML. - writeSection(root, out); - // write end - out << "\t\n"; - out << "\n"; + // transform the book node. + Rooted book = transformSection(root); + // add it as child to the body node. + body->children.push_back(book); + + // After the content has been transformed, we serialize it. + html->serialize(out); } /** @@ -67,61 +79,54 @@ SectionType getSectionType(const std::string &name) } } -void DemoHTMLTransformer::writeSection(Handle sec, - std::ostream &out) +Rooted DemoHTMLTransformer::transformSection(Handle section) { + Manager &mgr = section->getManager(); // check the section type. - SectionType type = getSectionType(sec->getDescriptor()->getName()); + const std::string secclass = section->getDescriptor()->getName(); + SectionType type = getSectionType(secclass); if (type == SectionType::NONE) { // if the input node is no section, we ignore it. - return; + return {nullptr}; } + // create a div tag containing the sections content. + Rooted sec{ + new xml::Element{mgr, "div", {{"class", secclass}}}}; // check if we have a heading. - if (sec->hasField("heading")) { - Rooted heading = sec->getField("heading")[0]; - out << "\t\t"; + if (section->hasField("heading")) { + Rooted heading = + section->getField("heading")[0]; + std::string headingclass; switch (type) { case SectionType::BOOK: - out << "

"; + headingclass = "h1"; break; case SectionType::CHAPTER: - out << "

"; + headingclass = "h2"; break; case SectionType::SECTION: - out << "

"; + headingclass = "h3"; break; case SectionType::SUBSECTION: - out << "

"; + headingclass = "h4"; break; case SectionType::NONE: // this can not happen; break; } - // the second field marks the heading. So let's write it. - writeParagraph(heading, out, false); - // close the heading tag. - switch (type) { - case SectionType::BOOK: - out << "

"; - break; - case SectionType::CHAPTER: - out << ""; - break; - case SectionType::SECTION: - out << ""; - break; - case SectionType::SUBSECTION: - out << ""; - break; - case SectionType::NONE: - // this can not happen; - break; + Rooted h{new xml::Element{mgr, headingclass}}; + sec->children.push_back(h); + // extract the heading text, enveloped in a paragraph Element. + Rooted h_content = transformParagraph(heading); + // We omit the paragraph Element and add the children directly to the + // heading Element + for (auto &n : h_content->children) { + h->children.push_back(n); } - out << "\n"; } - // then write the section content recursively. - NodeVector mainField = sec->getField(); + // Then we get all the children. + NodeVector mainField = section->getField(); for (auto &n : mainField) { /* * Strictly speaking this is the wrong mechanism, because we would have @@ -130,56 +135,59 @@ void DemoHTMLTransformer::writeSection(Handle sec, * to be a listener structure of transformations that check if they can * transform this specific node. */ - std::string childDescriptorName = n->getDescriptor()->getName(); + const std::string childDescriptorName = n->getDescriptor()->getName(); + Rooted child; if (childDescriptorName == "paragraph") { - writeParagraph(n, out); + child = transformParagraph(n); // TODO: Implement // } else if(childDescriptorName == "ul"){ // writeList(n, out); } else { - writeSection(n, out); + child = transformSection(n); + } + if (!child.isNull()) { + sec->children.push_back(child); } } + return sec; } -void DemoHTMLTransformer::writeParagraph(Handle par, - std::ostream &out, bool writePTags) +Rooted DemoHTMLTransformer::transformParagraph(Handle par) { - // validate descriptor. - if (par->getDescriptor()->getName() != "paragraph") { - throw OusiaException("Expected paragraph!"); - } + Manager &mgr = par->getManager(); + // create the p xml::Element + Rooted p{new xml::Element{mgr, "p"}}; + // check if we have a heading. if (par->hasField("heading")) { Rooted heading = par->getField("heading")[0]; - // start the heading tag - out << "\t\t
"; - // the second field marks the heading. So let's write it. - writeParagraph(heading, out, false); - // close the heading tag. - out << "
\n"; - } - // write start tag - if (writePTags) { - out << "\t\t

"; - } - // write content - // TODO: What about emphasis? - for (auto &text : par->getField()) { - if (text->getDescriptor()->getName() != "text") { - throw OusiaException("Expected text!"); + // put the heading in a strong xml::Element. + Rooted strong{new xml::Element{mgr, "strong"}}; + p->children.push_back(strong); + // extract the heading text, enveloped in a paragraph Element. + Rooted h_content = transformParagraph(heading); + // We omit the paragraph Element and add the children directly to the + // heading Element + for (auto &n : h_content->children) { + strong->children.push_back(n); } - Handle primitive = - text->getField()[0].cast(); - if (primitive.isNull()) { - throw OusiaException("Text field is not primitive!"); - } - out << primitive->getContent().asString(); } - // write end tag - if (writePTags) { - out << "

\n"; + + // transform paragraph children to XML as well + for (auto &n : par->getField()) { + std::string childDescriptorName = n->getDescriptor()->getName(); + if (childDescriptorName == "text") { + Handle primitive = + n->getField()[0].cast(); + if (primitive.isNull()) { + throw OusiaException("Text field is not primitive!"); + } + p->children.push_back( + new xml::Text(mgr, primitive->getContent().asString())); + } + // TODO: Handle non-text content } + return p; } } } diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp index ca9bcd2..70a5daa 100644 --- a/src/plugins/html/DemoOutput.hpp +++ b/src/plugins/html/DemoOutput.hpp @@ -33,6 +33,7 @@ #include #include +#include namespace ousia { namespace html { @@ -40,15 +41,11 @@ namespace html { class DemoHTMLTransformer { private: /** - * This method is to be called recursively to write a chapter, section or - * subsection to HTML. + * These methods are called recursively to transform a document to an XML + * tree. */ - void writeSection(Handle sec, std::ostream& out); - /** - * This method is to be called recursively to write a paragraph to HTML. - */ - void writeParagraph(Handle par, std::ostream& out, - bool writePTags = true); + Rooted transformSection(Handle sec); + Rooted transformParagraph(Handle par); /** * This method is to be called recursively to write a list to HTML. * TODO: Implement diff --git a/test/core/XMLTest.cpp b/test/core/XMLTest.cpp index aeedb86..124b58d 100644 --- a/test/core/XMLTest.cpp +++ b/test/core/XMLTest.cpp @@ -50,6 +50,7 @@ TEST(Node, testSerialize) // Now this is what we expect to see: std::string expected{ + "\n" "\n" "\t\n" "\t\t\n" -- cgit v1.2.3