/*
Ousía
Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include
#include
#include "DemoOutput.hpp"
namespace ousia {
namespace html {
typedef std::stack> AnnoStack;
static bool canHandleAnchor(Handle a)
{
std::string annoClassName = a->getAnnotation()->getDescriptor()->getName();
return annoClassName == "emph" || annoClassName == "strong";
}
static Rooted openAnnotation(Manager &mgr, AnnoStack &opened,
Handle entity,
Handle current,
bool stackOnly)
{
// we push the newly opened entity on top of the stack.
opened.push(entity);
if (stackOnly) {
return nullptr;
}
// get the elment name
std::string elemName = entity->getDescriptor()->getName();
// emphasized has to be shortened
if (elemName == "emph") {
elemName = "em";
}
// create the new XML element representing the annotation
Rooted tmp{new xml::Element{mgr, current, elemName}};
current->addChild(tmp);
// and return it.
return tmp;
}
static Rooted transformAnchor(Manager &mgr, Handle a,
Handle current,
Logger &logger, AnnoStack &opened,
bool stackOnly)
{
// check if this is a start Anchor.
if (a->isStart()) {
// if we have a start anchor, we open an annotation element.
current =
openAnnotation(mgr, opened, a->getAnnotation(), current, stackOnly);
// check if this is an end Anchor.
} else if (a->isEnd()) {
/*
* Now it gets somewhat interesting: We have to close all
* tags that started after the one that is closed now and
* re-open them afterwards. So we create a lokal stack to
* temporarily store all AnnotationEntities that need to
* be re-opened.
*/
AnnoStack tmp;
if (opened.empty()) {
// if we have no opened entities left, that is a
// malformed document.
logger.error("An unopened entity was closed!", *a);
return current;
}
Rooted closed = opened.top();
current = current->getParent();
opened.pop();
while (closed != a->getAnnotation()) {
/*
* We implicitly close tags by climbing up the XML tree
* until we are at the right element.
*/
current = current->getParent();
tmp.push(closed);
if (opened.empty()) {
// if we have no opened entities left, that is a
// malformed document.
logger.error("An unopened entity was closed!", *a);
return current;
}
closed = opened.top();
opened.pop();
}
// At this point we have closed all necessary entities. Now we
// need to re-open some of them.
while (!tmp.empty()) {
closed = tmp.top();
tmp.pop();
current = openAnnotation(mgr, opened, closed, current, stackOnly);
}
}
// otherwise it is a disconnected Anchor and we can ignore it.
return current;
}
/**
* Reopens all Annotations in the given AnnoStack but does not manipulate the
* original stack. The input argument is a copy.
* @return the innermost opened element.
*/
static Rooted reOpenAnnotations(Manager &mgr, AnnoStack opened,
Handle parent)
{
AnnoStack tmp;
while (!opened.empty()) {
tmp.push(opened.top());
opened.pop();
}
Rooted current = parent;
while (!tmp.empty()) {
Rooted closed = tmp.top();
tmp.pop();
current = openAnnotation(mgr, opened, closed, current, false);
}
return current;
}
static Rooted transformParagraph(Manager &mgr,
Handle parent,
Handle par,
Logger &logger,
AnnoStack &opened)
{
// create the p Element
Rooted p{new xml::Element{mgr, parent, "p"}};
// check if we have a heading.
if (par->getDescriptor()->hasField("heading") &&
par->getField("heading").size() > 0) {
Handle heading =
par->getField("heading")[0].cast();
// put the heading in a strong xml::Element.
Rooted strong{new xml::Element{mgr, p, "strong"}};
p->addChild(strong);
// extract the heading text, enveloped in a paragraph Element.
// in this case we use an empy annotation stack because annotations do
// not extend on subtree fields.
AnnoStack emptyStack;
Rooted h_content =
transformParagraph(mgr, strong, heading, logger, emptyStack);
// We omit the paragraph Element and add the children directly to the
// heading Element
for (auto &n : h_content->getChildren()) {
strong->addChild(n);
}
}
// reopen all annotations.
Rooted current = reOpenAnnotations(mgr, opened, p);
// transform paragraph children to XML as well
for (auto &n : par->getField()) {
if (n->isa(&RttiTypes::Anchor)) {
Rooted a = n.cast();
if (canHandleAnchor(a)) {
current =
transformAnchor(mgr, a, current, logger, opened, false);
}
continue;
}
// if this is not an anchor, we can only handle text.
if (!n->isa(&RttiTypes::StructuredEntity)) {
continue;
}
Handle t = n.cast();
std::string childDescriptorName = t->getDescriptor()->getName();
if (childDescriptorName == "text") {
Handle primitive =
t->getField()[0].cast();
std::string text_content = primitive->getContent().asString();
current->addChild(new xml::Text(mgr, current, text_content));
}
}
// at this point we implicitly close all annotations that are left opened.
// they will be reopened in the next paragraph.
return p;
}
static Rooted transformList(Manager &mgr,
Handle parent,
Handle list,
Logger &logger, AnnoStack &opened)
{
// create the list Element, which is either ul or ol (depends on descriptor)
std::string listclass = list->getDescriptor()->getName();
Rooted l{new xml::Element{mgr, parent, listclass}};
// iterate through list items.
for (auto &it : list->getField()) {
if (it->isa(&RttiTypes::Anchor)) {
Rooted a = it.cast();
if (canHandleAnchor(a)) {
// just put the entity on the AnnoStack, but do not open it
// explicitly. That will be done inside the next paragraph.
transformAnchor(mgr, a, l, logger, opened, true);
}
continue;
}
Handle item = it.cast();
std::string itDescrName = item->getDescriptor()->getName();
if (itDescrName == "item") {
// create the list item.
Rooted li{new xml::Element{mgr, l, "li"}};
l->addChild(li);
// extract the item text, enveloped in a paragraph Element.
Rooted li_content =
transformParagraph(mgr, li, item, logger, opened);
// We omit the paragraph Element and add the children directly to
// the list item
for (auto &n : li_content->getChildren()) {
li->addChild(n);
}
}
}
return l;
}
/**
* This is just for easier internal handling.
*/
enum class SectionType { BOOK, CHAPTER, SECTION, SUBSECTION, NONE };
static SectionType getSectionType(const std::string &name)
{
if (name == "book") {
return SectionType::BOOK;
} else if (name == "chapter") {
return SectionType::CHAPTER;
} else if (name == "section") {
return SectionType::SECTION;
} else if (name == "subsection") {
return SectionType::SUBSECTION;
} else {
return SectionType::NONE;
}
}
static Rooted transformSection(Manager &mgr,
Handle parent,
Handle section,
Logger &logger, AnnoStack &opened)
{
// check the section type.
const std::string secclass = section->getDescriptor()->getName();
SectionType type = getSectionType(secclass);
if (type == SectionType::NONE) {
// if the input node is no section, we ignore it.
return {nullptr};
}
// create a section tag containing the sections content.
Rooted sec{
new xml::Element{mgr, parent, "section", {{"class", secclass}}}};
// check if we have a heading.
if (section->getDescriptor()->hasField("heading") &&
section->getField("heading").size() > 0) {
Handle heading =
section->getField("heading")[0].cast();
std::string headingclass;
switch (type) {
case SectionType::BOOK:
headingclass = "h1";
break;
case SectionType::CHAPTER:
headingclass = "h2";
break;
case SectionType::SECTION:
headingclass = "h3";
break;
case SectionType::SUBSECTION:
headingclass = "h4";
break;
case SectionType::NONE:
// this can not happen;
break;
}
Rooted h{new xml::Element{mgr, sec, headingclass}};
sec->addChild(h);
// extract the heading text, wrapped in a paragraph Element.
// in this case we use an empy annotation stack because annotations do
// not extend on subtree fields.
AnnoStack emptyStack;
Rooted h_content =
transformParagraph(mgr, h, heading, logger, emptyStack);
// We omit the paragraph element and add the children directly to the
// heading Element
for (auto &n : h_content->getChildren()) {
h->addChild(n);
}
}
// Then we get all the children.
for (auto &n : section->getField()) {
if (n->isa(&RttiTypes::Anchor)) {
Rooted a = n.cast();
if (canHandleAnchor(a)) {
// just put the entity on the AnnoStack, but do not open it
// explicitly. That will be done inside the next paragraph.
transformAnchor(mgr, a, sec, logger, opened, true);
}
continue;
}
if (!n->isa(&RttiTypes::StructuredEntity)) {
continue;
}
Handle s = n.cast();
/*
* Strictly speaking this is the wrong mechanism, because we would have
* to make an "isa" call here because we can not rely on our knowledge
* that paragraphs can only be paragraphs or lists. There would have
* to be a listener structure of transformations that check if they can
* transform this specific node.
*/
const std::string childDescriptorName = s->getDescriptor()->getName();
Rooted child;
if (childDescriptorName == "paragraph") {
child = transformParagraph(mgr, sec, s, logger, opened);
} else if (childDescriptorName == "ul" || childDescriptorName == "ol") {
child = transformList(mgr, sec, s, logger, opened);
} else {
child = transformSection(mgr, sec, s, logger, opened);
}
if (!child.isNull()) {
sec->addChild(child);
}
}
return sec;
}
void DemoHTMLTransformer::writeHTML(Handle doc, std::ostream &out,
Logger &logger, bool pretty)
{
// validate the document.
if (!doc->validate(logger)) {
return;
}
Manager &mgr = doc->getManager();
// initialize an empty annotation Stack.
AnnoStack opened;
// Create an XML object tree for the document first.
Rooted html{new xml::Element{mgr, {nullptr}, "html"}};
// add the head Element
Rooted head{new xml::Element{mgr, html, "head"}};
html->addChild(head);
// add the meta element.
Rooted meta{
new xml::Element{mgr,
head,
"meta",
{{"http-equiv", "Content-Type"},
{"content", "text/html; charset=utf-8"}}}};
head->addChild(meta);
// add the title Element with Text
Rooted title{new xml::Element{mgr, head, "title"}};
head->addChild(title);
title->addChild(
new xml::Text(mgr, title, "Test HTML Output for " + doc->getName()));
// add some stylish styles
Rooted style{
new xml::Element{mgr, head, "style", {{"type", "text/css"}}}};
head->addChild(style);
Rooted css{new xml::Text{mgr, style,
"body { font-family: 'CMU Serif', "
"serif;}\n p { text-align: justify; "
"hyphens: auto; }"}};
style->addChild(css);
// add the body Element
Rooted body{new xml::Element{mgr, html, "body"}};
html->addChild(body);
// So far was the "preamble". No we have to get to the document content.
// extract the book root node.
Rooted root = doc->getRoot();
if (root->getDescriptor()->getName() != "book") {
throw OusiaException("The given documents root is no book node!");
}
// transform the book node.
Rooted book =
transformSection(mgr, body, root, logger, opened);
// add it as child to the body node.
body->addChild(book);
// After the content has been transformed, we serialize it.
html->serialize(out, "", pretty);
}
}
}