diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/XML.cpp | 29 | ||||
-rw-r--r-- | src/core/XML.hpp | 7 | ||||
-rw-r--r-- | src/core/model/Document.cpp | 52 | ||||
-rw-r--r-- | src/core/model/Document.hpp | 58 | ||||
-rw-r--r-- | src/core/model/Domain.hpp | 7 | ||||
-rw-r--r-- | src/plugins/html/DemoOutput.cpp | 131 | ||||
-rw-r--r-- | src/plugins/html/DemoOutput.hpp | 32 |
7 files changed, 256 insertions, 60 deletions
diff --git a/src/core/XML.cpp b/src/core/XML.cpp index 038cb86..7f03b35 100644 --- a/src/core/XML.cpp +++ b/src/core/XML.cpp @@ -4,12 +4,16 @@ namespace ousia { namespace xml { -void Node::serialize(std::ostream& out){ +void Node::serialize(std::ostream &out, const std::string &doctype) +{ out << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; + if (doctype != "") { + out << doctype << "\n"; + } doSerialize(out, 0); } -void Element::doSerialize(std::ostream& out, unsigned int tabdepth) +void Element::doSerialize(std::ostream &out, unsigned int tabdepth) { for (unsigned int t = 0; t < tabdepth; t++) { out << '\t'; @@ -18,17 +22,22 @@ void Element::doSerialize(std::ostream& out, unsigned int tabdepth) for (auto &a : attributes) { out << ' ' << a.first << "=\"" << a.second << '\"'; } - out << ">\n"; - for (auto &n : children) { - n->doSerialize(out, tabdepth + 1); - } - for (unsigned int t = 0; t < tabdepth; t++) { - out << '\t'; + // if we have no children, we close the tag immediately. + if (children.size() == 0) { + out << "/>\n"; + } else { + out << ">\n"; + for (auto &n : children) { + n->doSerialize(out, tabdepth + 1); + } + for (unsigned int t = 0; t < tabdepth; t++) { + out << '\t'; + } + out << "</" << name << ">\n"; } - out << "</" << name << ">\n"; } -void Text::doSerialize(std::ostream& out, unsigned int tabdepth) +void Text::doSerialize(std::ostream &out, unsigned int tabdepth) { for (unsigned int t = 0; t < tabdepth; t++) { out << '\t'; diff --git a/src/core/XML.hpp b/src/core/XML.hpp index 9ca124a..51ef6fd 100644 --- a/src/core/XML.hpp +++ b/src/core/XML.hpp @@ -64,9 +64,12 @@ public: /** * This method writes an XML prolog and the XML representing the current * node, including all children, to the given output stream. - * @param out is the output stream the serialized data shall be written to. + * @param out is the output stream the serialized data shall be + * written to. + * @param doctype enables you to add a prefix after the XML prolog + * specifying the doctype. */ - void serialize(std::ostream &out); + void serialize(std::ostream &out, const std::string & doctype = ""); /** * This method just writes the XML representation of this node to the * output stream, without the XML prolog. diff --git a/src/core/model/Document.cpp b/src/core/model/Document.cpp index e43337f..945fb3e 100644 --- a/src/core/model/Document.cpp +++ b/src/core/model/Document.cpp @@ -153,6 +153,9 @@ Rooted<StructuredEntity> StructuredEntity::buildEntity( return {nullptr}; } // append the new entity to the right field. + if (!parent->hasField(fieldName)) { + return {nullptr}; + } NodeVector<StructuredEntity> &field = parent->getField(fieldName); field.push_back(entity); @@ -176,13 +179,60 @@ Rooted<DocumentPrimitive> DocumentPrimitive::buildEntity( return {nullptr}; } // append the new entity to the right field. + if (!parent->hasField(fieldName)) { + return {nullptr}; + } NodeVector<StructuredEntity> &field = parent->getField(fieldName); field.push_back(entity); - // and return it. return entity; } +Rooted<AnnotationEntity::Anchor> AnnotationEntity::buildAnchor( + Handle<DocumentEntity> parent, std::string id, const std::string &fieldName) +{ + // If the parent is not set, we can not build the anchor. + if (parent == nullptr) { + return {nullptr}; + } + // Then construct the Anchor itself + Rooted<Anchor> anchor{ + new AnnotationEntity::Anchor(parent->getManager(), parent, id)}; + // append the new entity to the right field. + if (!parent->hasField(fieldName)) { + return {nullptr}; + } + NodeVector<StructuredEntity> &field = parent->getField(fieldName); + field.push_back(anchor); + // and return it. + return anchor; +} + +Rooted<AnnotationEntity> AnnotationEntity::buildEntity( + Handle<Document> parent, std::vector<Handle<Domain>> domains, + const std::string &className, Handle<AnnotationEntity::Anchor> start, + Handle<AnnotationEntity::Anchor> end, Variant attributes, std::string name) +{ + // If the parent is not set, we can not build the AnnotationEntity. + if (parent == nullptr) { + return {nullptr}; + } + // If we can not find the correct descriptor, we can not build the entity + // either. + Rooted<StructuredClass> descriptor = resolveDescriptor(domains, className); + if (descriptor == nullptr) { + return {nullptr}; + } + // Then construct the AnnotationEntity itself + Rooted<AnnotationEntity> anno{ + new AnnotationEntity(parent->getManager(), parent, descriptor, + attributes, start, end, name)}; + // append the new entity to the document + parent->getAnnotations().push_back(anno); + // and return it. + return anno; +} + /* Type registrations */ } diff --git a/src/core/model/Document.hpp b/src/core/model/Document.hpp index 7523962..993df9e 100644 --- a/src/core/model/Document.hpp +++ b/src/core/model/Document.hpp @@ -207,21 +207,15 @@ public: * information please refer to the header documentation above. */ class StructuredEntity : public DocumentEntity { -private: - NodeVector<AnnotationEntity> annotations; - public: StructuredEntity(Manager &mgr, Handle<Node> parent, Handle<StructuredClass> descriptor, Variant attributes, std::string name = "") : DocumentEntity(mgr, parent, descriptor, std::move(attributes), - std::move(name)), - annotations(this) + std::move(name)) { } - NodeVector<AnnotationEntity> &getAnnotations() { return annotations; } - /** * This builds the root StructuredEntity for the given document. It * automatically appends the newly build entity to the given document. @@ -343,12 +337,11 @@ public: public: /** * @param mgr is the Manager instance. - * @param name is the Anchor id. * @param parent is the parent of this Anchor in the Structure Tree (!), * not the AnnotationEntity that references this Anchor. + * @param name is the Anchor id. */ - Anchor(Manager &mgr, Handle<StructuredEntity> parent, - std::string name = "") + Anchor(Manager &mgr, Handle<DocumentEntity> parent, std::string name) : StructuredEntity(mgr, parent, nullptr, Variant(), std::move(name)) { } @@ -372,6 +365,45 @@ public: Rooted<Anchor> getStart() { return start; } Rooted<Anchor> getEnd() { return end; } + + /** + * This builds an Anchor as child of the given DocumentEntity. It + * automatically appends the newly build Anchor to its parent. + * + * @param parent is the parent DocumentEntity. The newly constructed + * Anchor will automatically be appended to it. + * @param id is the id of this Anchor. + * @param fieldName is the name of the field where the newly constructed + * Anchor shall be appended. + * + * @return the newly created Anchor or a nullptr if some + * input handle was empty. + */ + static Rooted<Anchor> buildAnchor(Handle<DocumentEntity> parent, + std::string id, + const std::string &fieldName = ""); + /** + * This builds an AnnotationEntity as child of the given DocumentEntity. It + * automatically appends the newly build entity to its parent. + * + * @param parent is the document the newly constructed AnnotationEntity + * will be appended to. + * @param domains are the domains that are used to find the + * AnnotationClass for the new node. The domains will be + * searched in the given order. + * @param className is the name of the AnnotationClass. + * @param attributes are the attributes of the new node in terms of a Struct + * variant (empty per default). + * @param name is the name of this AnnotationEntity (empty per + * default). + * @return the newly created AnnotationEntity or a nullptr if some + * input handle was empty or the given domains did not + * contain a AnnotationClass with the given name. + */ + static Rooted<AnnotationEntity> buildEntity(Handle<Document> parent, std::vector<Handle<Domain>> domains, + const std::string &className, + Handle<Anchor> start, Handle<Anchor> end, + Variant attributes = Variant(), std::string name = ""); }; /** @@ -382,17 +414,21 @@ class Document : public Node { private: // TODO: Might there be several roots? E.g. metadata? Owned<StructuredEntity> root; + NodeVector<AnnotationEntity> annotations; public: Document(Manager &mgr, std::string name) // TODO: Can a document have a parent? - : Node(mgr, std::move(name), nullptr) + : Node(mgr, std::move(name), nullptr), + annotations(this) { } void setRoot(Handle<StructuredEntity> root) { this->root = acquire(root); }; Rooted<StructuredEntity> getRoot() const { return root; } + + NodeVector<AnnotationEntity> getAnnotations() { return annotations; } }; } diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 18ebfb4..7412ef4 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -521,6 +521,13 @@ public: * This class has no special properties and is in essence just a Descriptor. */ class AnnotationClass : public Descriptor { +public: + AnnotationClass(Manager &mgr, std::string name, Handle<Domain> domain, + // TODO: What would be a wise default value for attributes? + Handle<StructType> attributesDescriptor) + : Descriptor(mgr, std::move(name), domain, attributesDescriptor) + { + } }; /** diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp index 035ba25..92ff88c 100644 --- a/src/plugins/html/DemoOutput.cpp +++ b/src/plugins/html/DemoOutput.cpp @@ -16,6 +16,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <stack> + + #include <core/common/Exceptions.hpp> #include <core/common/Rtti.hpp> #include <core/common/Variant.hpp> @@ -30,10 +33,18 @@ void DemoHTMLTransformer::writeHTML(Handle<model::Document> doc, { Manager &mgr = doc->getManager(); // Create an XML object tree for the document first. - Rooted<xml::Element> html{new xml::Element{mgr, "html"}}; + Rooted<xml::Element> html{new xml::Element{ + mgr, "html", {{"xlmns", "http://www.w3.org/1999/xhtml"}}}}; // add the head Element Rooted<xml::Element> head{new xml::Element{mgr, "head"}}; html->children.push_back(head); + // add the meta element. + Rooted<xml::Element> meta{ + new xml::Element{mgr, + "meta", + {{"http-equiv", "Content-Type"}, + {"content", "text/html; charset=utf-8"}}}}; + head->children.push_back(meta); // add the title Element with Text Rooted<xml::Element> title{new xml::Element{mgr, "title"}}; head->children.push_back(title); @@ -45,31 +56,42 @@ void DemoHTMLTransformer::writeHTML(Handle<model::Document> doc, // So far was the "preamble". No we have to get to the document content. + // build the start and end map for annotation processing. + AnnoMap startMap; + AnnoMap endMap; + for (auto &a : doc->getAnnotations()) { + // we assume uniquely IDed annotations, which should be checked in the + // validation process. + startMap.emplace(a->getStart()->getName(), a); + endMap.emplace(a->getEnd()->getName(), a); + } + // extract the book root node. Rooted<model::StructuredEntity> root = doc->getRoot(); if (root->getDescriptor()->getName() != "book") { throw OusiaException("The given documents root is no book node!"); } // transform the book node. - Rooted<xml::Element> book = transformSection(root); + Rooted<xml::Element> book = transformSection(root, startMap, endMap); // add it as child to the body node. body->children.push_back(book); // After the content has been transformed, we serialize it. - html->serialize(out); + html->serialize( + out, + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n" + "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"); } /** * This is just for easier internal handling. */ -enum class SectionType { BOOK, CHAPTER, SECTION, SUBSECTION, NONE }; +enum class SectionType { BOOK, SECTION, SUBSECTION, NONE }; SectionType getSectionType(const std::string &name) { if (name == "book") { return SectionType::BOOK; - } else if (name == "chapter") { - return SectionType::CHAPTER; } else if (name == "section") { return SectionType::SECTION; } else if (name == "subsection") { @@ -79,7 +101,8 @@ SectionType getSectionType(const std::string &name) } } -Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::StructuredEntity> section) +Rooted<xml::Element> DemoHTMLTransformer::transformSection( + Handle<model::StructuredEntity> section, AnnoMap &startMap, AnnoMap &endMap) { Manager &mgr = section->getManager(); // check the section type. @@ -93,7 +116,8 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::Structu Rooted<xml::Element> sec{ new xml::Element{mgr, "div", {{"class", secclass}}}}; // check if we have a heading. - if (section->hasField("heading")) { + if (section->hasField("heading") && + section->getField("heading").size() > 0) { Rooted<model::StructuredEntity> heading = section->getField("heading")[0]; std::string headingclass; @@ -101,14 +125,11 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::Structu case SectionType::BOOK: headingclass = "h1"; break; - case SectionType::CHAPTER: - headingclass = "h2"; - break; case SectionType::SECTION: - headingclass = "h3"; + headingclass = "h2"; break; case SectionType::SUBSECTION: - headingclass = "h4"; + headingclass = "h3"; break; case SectionType::NONE: // this can not happen; @@ -117,7 +138,8 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::Structu Rooted<xml::Element> h{new xml::Element{mgr, headingclass}}; sec->children.push_back(h); // extract the heading text, enveloped in a paragraph Element. - Rooted<xml::Element> h_content = transformParagraph(heading); + Rooted<xml::Element> h_content = + transformParagraph(heading, startMap, endMap); // We omit the paragraph Element and add the children directly to the // heading Element for (auto &n : h_content->children) { @@ -138,12 +160,11 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::Structu const std::string childDescriptorName = n->getDescriptor()->getName(); Rooted<xml::Element> child; if (childDescriptorName == "paragraph") { - child = transformParagraph(n); - // TODO: Implement - // } else if(childDescriptorName == "ul"){ - // writeList(n, out); + child = transformParagraph(n, startMap, endMap); + } else if (childDescriptorName == "ul" || childDescriptorName == "ol") { + child = transformList(n, startMap, endMap); } else { - child = transformSection(n); + child = transformSection(n, startMap, endMap); } if (!child.isNull()) { sec->children.push_back(child); @@ -152,29 +173,90 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::Structu return sec; } -Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(Handle<model::StructuredEntity> par) +Rooted<xml::Element> DemoHTMLTransformer::transformList( + Handle<model::StructuredEntity> list, AnnoMap &startMap, AnnoMap &endMap) +{ + Manager &mgr = list->getManager(); + // create the list Element, which is either ul or ol (depends on descriptor) + std::string listclass = list->getDescriptor()->getName(); + Rooted<xml::Element> l{new xml::Element{mgr, listclass}}; + // iterate through list items. + for (auto &item : list->getField()) { + std::string itDescrName = item->getDescriptor()->getName(); + if (itDescrName == "item") { + // create the list item. + Rooted<xml::Element> li{new xml::Element{mgr, "li"}}; + l->children.push_back(li); + // extract the item text, enveloped in a paragraph Element. + Rooted<xml::Element> li_content = + transformParagraph(item, startMap, endMap); + // We omit the paragraph Element and add the children directly to + // the list item + for (auto &n : li_content->children) { + li->children.push_back(n); + } + } + } + return l; +} + +typedef model::AnnotationEntity::Anchor Anchor; +typedef std::stack<Rooted<model::AnnotationEntity>> AnnoStack; + +Rooted<xml::Element> DemoHTMLTransformer::transformParagraph( + Handle<model::StructuredEntity> par, AnnoMap &startMap, AnnoMap &endMap) { Manager &mgr = par->getManager(); - // create the p xml::Element + // create the p Element Rooted<xml::Element> p{new xml::Element{mgr, "p"}}; // check if we have a heading. - if (par->hasField("heading")) { + if (par->hasField("heading") && par->getField("heading").size() > 0) { Rooted<model::StructuredEntity> heading = par->getField("heading")[0]; // put the heading in a strong xml::Element. Rooted<xml::Element> strong{new xml::Element{mgr, "strong"}}; p->children.push_back(strong); // extract the heading text, enveloped in a paragraph Element. - Rooted<xml::Element> h_content = transformParagraph(heading); + Rooted<xml::Element> h_content = + transformParagraph(heading, startMap, endMap); // We omit the paragraph Element and add the children directly to the // heading Element for (auto &n : h_content->children) { strong->children.push_back(n); } } - + // transform paragraph children to XML as well for (auto &n : par->getField()) { + if (n->isa(typeOf<Anchor>())) { + //TODO: This needs some more brain work. +// // check if this is a start Anchor. +// auto it = startMap.find(n->getName()); +// if(it != startMap.end()){ +// // if we have a start Anchor, we put another AnnotationEntity +// // on top the stack. +// opened.push(it->second); +// // and we create an open tag. +// +// continue; +// } +// // check if this is an end Anchor. +// auto it = endMap.find(n->getName()); +// if(it != endMap.end()){ +// /* +// * Now it gets somewhat interesting: We have to close all +// * tags that started after the one that is closed now and +// * re-open them afterwards. So we create a lokal stack to +// * temporarily store all AnnotationEntities that need to +// * be re-opened. +// */ +// AnnoStack tmp; +// Rooted< +// while(!opened.empty() && ) +// } +// + continue; + } std::string childDescriptorName = n->getDescriptor()->getName(); if (childDescriptorName == "text") { Handle<model::DocumentPrimitive> primitive = @@ -185,7 +267,6 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(Handle<model::Struc p->children.push_back( new xml::Text(mgr, primitive->getContent().asString())); } - // TODO: Handle non-text content } return p; } diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp index 70a5daa..e08ec2b 100644 --- a/src/plugins/html/DemoOutput.hpp +++ b/src/plugins/html/DemoOutput.hpp @@ -30,6 +30,7 @@ #ifndef _OUSIA_HTML_DEMO_OUTPUT_HPP_ #define _OUSIA_HTML_DEMO_OUTPUT_HPP_ +#include <map> #include <ostream> #include <core/model/Document.hpp> @@ -38,22 +39,31 @@ namespace ousia { namespace html { +typedef std::map<std::string, Rooted<model::AnnotationEntity>> AnnoMap; + class DemoHTMLTransformer { private: /** - * These methods are called recursively to transform a document to an XML - * tree. + * This transforms a section-like entity, namely book, section + * and subsection, to an XHTML element, including its header. For the + * children of the default field the respective transform function is + * called recursively. */ - Rooted<xml::Element> transformSection(Handle<model::StructuredEntity> sec); - Rooted<xml::Element> transformParagraph(Handle<model::StructuredEntity> par); + Rooted<xml::Element> transformSection(Handle<model::StructuredEntity> sec, + AnnoMap& startMap, AnnoMap& endMap); /** - * This method is to be called recursively to write a list to HTML. - * TODO: Implement + * This transforms a list entity, namely ul and ol to an XHTML element. + * For each item, the transformParagraph function is called. */ -// void writeList(Handle<StructuredEntity> sec, std::ostream& out, -// int tabdepth); - - //TODO: Implement emphasis. + Rooted<xml::Element> transformList(Handle<model::StructuredEntity> list, + AnnoMap& startMap, AnnoMap& endMap); + /** + * This transforms a paragraph-like entity, namely heading, item and + * paragraph, to an XHTML element including the text and the anchors + * contained. For anchor handling we require the AnnoMaps. + */ + Rooted<xml::Element> transformParagraph(Handle<model::StructuredEntity> par, + AnnoMap& startMap, AnnoMap& endMap); public: /** @@ -74,7 +84,7 @@ public: * and lists domains but no other. * @param out is the output stream the data shall be written to. */ - void writeHTML(Handle<model::Document> doc, std::ostream& out); + void writeHTML(Handle<model::Document> doc, std::ostream &out); }; } } |