diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-01-04 22:21:52 +0100 |
---|---|---|
committer | Andreas Stöckel <andreas@somweyr.de> | 2015-01-04 22:21:52 +0100 |
commit | 0d671f899da720ff5035bdab7adf6b11cbf80cb1 (patch) | |
tree | 9839d502ff323198d0f9cb479e9809a978c5c550 | |
parent | 7e1c72a5a5d5dc890a49f79c5daec3edcb38a33b (diff) | |
parent | 319ad738f677a20403cc27192f1df7bb65ce8c0e (diff) |
Merge branch 'master' of somweyr.de:ousia
-rw-r--r-- | CMakeLists.txt | 21 | ||||
-rw-r--r-- | src/core/model/Document.cpp | 20 | ||||
-rw-r--r-- | src/core/model/Document.hpp | 97 | ||||
-rw-r--r-- | src/core/model/Domain.hpp | 229 | ||||
-rw-r--r-- | src/plugins/css/CSSParser.hpp | 10 | ||||
-rw-r--r-- | src/plugins/html/DemoOutput.cpp | 185 | ||||
-rw-r--r-- | src/plugins/html/DemoOutput.hpp | 85 | ||||
-rw-r--r-- | test/core/model/DocumentTest.cpp | 62 | ||||
-rw-r--r-- | test/core/model/TestDocument.hpp | 26 | ||||
-rw-r--r-- | test/core/model/TestDomain.hpp | 15 | ||||
-rw-r--r-- | test/plugins/html/DemoOutputTest.cpp | 49 |
11 files changed, 671 insertions, 128 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a494c3..10e43ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,6 +153,14 @@ TARGET_LINK_LIBRARIES(ousia_css ousia_core ) +ADD_LIBRARY(ousia_html + src/plugins/html/DemoOutput +) + +TARGET_LINK_LIBRARIES(ousia_html + ousia_core +) + ADD_LIBRARY(ousia_xml src/plugins/xml/XmlParser ) @@ -233,6 +241,16 @@ IF(TEST) ousia_css ) + ADD_EXECUTABLE(ousia_test_html + test/plugins/html/DemoOutputTest + ) + + TARGET_LINK_LIBRARIES(ousia_test_html + ${GTEST_LIBRARIES} + ousia_core + ousia_html + ) + ADD_EXECUTABLE(ousia_test_xml test/plugins/xml/XmlParserTest ) @@ -256,8 +274,9 @@ IF(TEST) # Register the unit tests ADD_TEST(ousia_test_core ousia_test_core) ADD_TEST(ousia_test_boost ousia_test_boost) - ADD_TEST(ousia_test_xml ousia_test_xml) ADD_TEST(ousia_test_css ousia_test_css) + ADD_TEST(ousia_test_html ousia_test_html) + ADD_TEST(ousia_test_xml ousia_test_xml) # ADD_TEST(ousia_test_mozjs ousia_test_mozjs) ENDIF() diff --git a/src/core/model/Document.cpp b/src/core/model/Document.cpp index 854e717..e5d0755 100644 --- a/src/core/model/Document.cpp +++ b/src/core/model/Document.cpp @@ -57,20 +57,22 @@ int DocumentEntity::getFieldDescriptorIndex(const std::string &fieldName) return -1; } -void DocumentEntity::getField(NodeVector<StructuredEntity> &res, - const std::string &fieldName) +NodeVector<StructuredEntity> &DocumentEntity::getField( + const std::string &fieldName) { int f = getFieldDescriptorIndex(fieldName); if (f < 0) { - NodeVector<StructuredEntity> empty{this}; - res = NodeVector<StructuredEntity>(this); + throw OusiaException("No field for the given name exists!"); } - res = fields[f]; + return fields[f]; } NodeVector<StructuredEntity> &DocumentEntity::getField( - Rooted<FieldDescriptor> fieldDescriptor) + Handle<FieldDescriptor> fieldDescriptor) { + if(fieldDescriptor.isNull()){ + throw OusiaException("The given FieldDescriptor handle is null!"); + } const NodeVector<FieldDescriptor> &fds = descriptor->getFieldDescriptors(); int f = 0; for (auto &fd : fds) { @@ -155,8 +157,7 @@ Rooted<StructuredEntity> StructuredEntity::buildEntity( return {nullptr}; } // append the new entity to the right field. - NodeVector<StructuredEntity> field(parent); - parent->getField(field, fieldName); + NodeVector<StructuredEntity>& field = parent->getField(fieldName); field.push_back(entity); // and return it. @@ -179,8 +180,7 @@ Rooted<DocumentPrimitive> DocumentPrimitive::buildEntity( return {nullptr}; } // append the new entity to the right field. - NodeVector<StructuredEntity> field(parent); - parent->getField(field, fieldName); + NodeVector<StructuredEntity>& field = parent->getField(fieldName); field.push_back(entity); // and return it. diff --git a/src/core/model/Document.hpp b/src/core/model/Document.hpp index 2d792c5..fabdcaf 100644 --- a/src/core/model/Document.hpp +++ b/src/core/model/Document.hpp @@ -32,21 +32,33 @@ * Structure Nodes, effectively resulting in a Document Graph instead of a * Document Tree (other references may introduce cycles as well). * - * Consider this simplified XML representation of a document (TODO: Use - * non-simplified XML as soon as possible): + * Consider this XML representation of a document using the "book" domain: * - * <Document implements="book"> - * <StructureEntity class="book"> - * <StructureEntity class="section"> - * <DocumentPrimitive> - * This is some text with some <Anchor id="1"/>emphasized and - * <Anchor id="2"/>strong<Anchor id="3"/> text. - * </DocumentPrimitive> - * <AnnotationEntity class="emphasized" start="1", end="3"/> - * <AnnotationEntity class="strong" start="2", end="3"/> - * </StructureEntity> - * </StructureEntity> - * </Document> + * <doc> + * <head> + * <import rel="domain" src="book_domain.oxm"/> + * <import rel="domain" src="emphasized_domain.oxm"/> + * <alias tag="paragraph" aka="p"/> + * </head> + * <book> + * This might be some introductory text or a dedication. Ideally, of + * course, such elements would be semantically specified as such in + * additional domains (or in this one). + * <chapter name="myFirstChapter"> + * Here we might have an introduction to the chapter, including some + * overview of the chapters structure. + * <section name="myFirstSection"> + * Here we might find the actual section content. + * </section> + * <section name="mySndSection"> + * Here we might find the actual section <em>content</em>. + * + * + * And there might even be another paragraph. + * </section> + * </chapter> + * </book> + * </doc> * * As can be seen the StructureEntities inherently follow a tree structure that * is restricted by the implicit context free grammar of the "book" Domain @@ -56,12 +68,32 @@ * Another interesting fact is the special place of AnnotationEntities: They are * Defined by start and end Anchors in the text. Note that this allows for * overlapping annotations and provides a more intuitive (and semantically - * sound) handling of such span-like concepts. + * sound) handling of such span-like concepts. So the + * + * <em>content</em> + * + * is implicitly expanded to: + * + * <a id="1"/>content<a id="2"/> + * <emphasized start="1" end="2"/> + * * Note that the place of an AnnotationEntity within the XML above is not * strictly defined. It might as well be placed as a child of the "book" node. * In general it is recommended to use the lowest possible place in the * StructureTree to include the AnnotationEntity for better readability. * + * Also note that text content like + * + * Here we might find the actual section content. + * + * is implicitly expanded using transparency to: + * + * <paragraph> + * <text> + * Here we might find the actual section content. + * </text> + * </paragraph> + * * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) */ @@ -124,12 +156,6 @@ public: Variant getAttributes() const { return attributes; } /** - * This allows a direct manipulation of the internal data structure of a - * DocumentEntity and is not recommended. TODO: Delete this? - */ - std::vector<NodeVector<StructuredEntity>> &getFields() { return fields; } - - /** * This returns true if there is a FieldDescriptor in the Descriptor for * this DocumentEntity which has the given name. If an empty name is * given it is assumed that the 'default' FieldDescriptor is referenced, @@ -148,29 +174,18 @@ public: /** * This returns the vector of entities containing all members of the field - * for which the FieldDescriptor has the specified name. If an empty name is - * given it is assumed that the 'default' FieldDescriptor is referenced, - * where 'default' means either: + * with the given name. If an empty name is given it is assumed that the + * 'default' FieldDescriptor is referenced, where 'default' means either: * 1.) The only TREE typed FieldDescriptor (if present) or * 2.) the only FieldDescriptor (if only one is specified). * - * Note that the output of this method might well be ambigous: If no - * FieldDescriptor matches the given name an empty NodeVector is - * returned. This is also the case, however, if there are no members for an - * existing field. Therefore it is recommended to additionally check the - * output of "hasField" or use the version of this method with - * a FieldDescriptor as input. + * If the name is unknown an exception is thrown. * - * @param fieldName is the name of the field as specified in the + * @param fieldName is the name of a field as specified in the * FieldDescriptor in the Domain description. - * @param res is a NodeVector reference where the result will be - * stored. After using this method the reference will - * either refer to all StructuredEntities in that field. If - * the field is unknown or if no members exist in that - * field yet, the NodeVector will be empty. + * @return a NodeVector of all StructuredEntities in that field. */ - void getField(NodeVector<StructuredEntity> &res, - const std::string &fieldName = ""); + NodeVector<StructuredEntity> &getField(const std::string &fieldName = ""); /** * This returns the vector of entities containing all members of the field @@ -184,7 +199,7 @@ public: * @return a NodeVector of all StructuredEntities in that field. */ NodeVector<StructuredEntity> &getField( - Rooted<FieldDescriptor> fieldDescriptor); + Handle<FieldDescriptor> fieldDescriptor); }; /** @@ -365,7 +380,7 @@ public: */ class Document : public Node { private: - //TODO: Might there be several roots? E.g. metadata? + // TODO: Might there be several roots? E.g. metadata? Owned<StructuredEntity> root; public: @@ -375,7 +390,7 @@ public: { } - void setRoot(Handle<StructuredEntity> root) { root = acquire(root); }; + void setRoot(Handle<StructuredEntity> root) { this->root = acquire(root); }; Rooted<StructuredEntity> getRoot() const { return root; } }; diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index cd74a19..7e4e9f7 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -30,24 +30,65 @@ * terms "StructuredClass" and "FieldDescriptor". * On the top level you would start with a StructuredClass, say "book", which * in turn might contain two FieldDescriptors, one for the meta data of ones - * book and one for the actual structure. Consider the following (simplified) - * XML notation (TODO: Use a non-simplified notation as soon as the format is - * clear.) - * - * <StructuredClass name="book"> - * <FieldDescriptor name="structure", type="TREE", optional="false"> - * <children> - * Here we would reference the possible child classes, e.g. section, - * paragraph, etc. - * </children> - * </FieldDescriptor> - * <FieldDescriptor name="meta", type="SUBTREE", optional="true"> - * <children> - * Here we would reference the possible child classes for meta, - * information, e.g. authors, date, version, etc. - * </children> - * </FieldDescriptor> - * </StructuredClass> + * book and one for the actual structure. Consider the following XML: + * + * <domain name="book"> + * <structs> + * <struct name="book" cardinality="1" isRoot="true"> + * <fields> + * <field> + * <children> + * <child name="book.chapter"/> + * <child name="book.paragraph"/> + * </children> + * </field> + * </fields> + * </struct> + * <struct name="chapter"> + * <fields> + * <field> + * <children> + * <child name="book.section"/> + * <child name="book.paragraph"/> + * </children> + * </field> + * </fields> + * </struct> + * <struct name="section"> + * <fields> + * <field> + * <children> + * <child name="book.subsection"/> + * <child name="book.paragraph"/> + * </children> + * </field> + * </fields> + * </struct> + * <struct name="subsection"> + * <fields> + * <field> + * <children> + * <child name="book.paragraph"/> + * </children> + * </field> + * </fields> + * </struct> + * <struct name="paragraph" transparent="true" role="paragraph"> + * <fields> + * <field> + * <children> + * <child name="book.text"/> + * </children> + * </field> + * </fields> + * </struct> + * <struct name="text" transparent="true" role="text"> + * <fields> + * <field name="content" type="PRIMITIVE" primitiveType="string"/> + * </fields> + * </struct> + * </structs> + * </domain> * * Note that we define one field as the TREE (meaning the main or default * document structure) and one mearly as SUBTREE, relating to supporting @@ -58,11 +99,19 @@ * TREE field and at least one permitted child must exist, either primitive or * as another StructuredClass. * - * The translation to context free grammars is roughly as follows: + * The translation to context free grammars is as follows: * - * BOOK := book BOOK_STRUCTURE BOOK_META - * BOOK_STRUCTURE := SECTION BOOK_STRUCTURE | PARAGRAPH BOOK_STRUCTURE | epsilon - * BOOK_META := AUTHOR BOOK_META | DATE BOOK_META + * BOOK := <book> BOOK_TREE </book> + * BOOK_TREE := CHAPTER BOOK_TREE | PARAGRAPH BOOK_TREE | epsilon + * CHAPTER := <chapter> CHAPTER_TREE </chapter> + * CHAPTER_TREE := SECTION CHAPTER_TREE | PARAGRAPH CHAPTER_TREE | epsilon + * SECTION := <section> SECTION_TREE </section> + * SECTION_TREE := SUBSECTION SECTION_TREE | PARAGRAPH SECTION_TREE | + * epsilon + * SUBSECTION := <subsection> SUBSECTION_TREE </subsection> + * SUBSECTION_TREE := PARAGRAPH SUBSECTION_TREE | epsilon + * PARAGRAPH := <paragraph> PARAGRAPH_CONTENT </paragraph> + * PARAGRAPH_CONTENT := string * * Note that this translation recurs to further nonterminals like SECTION but * necessarily produces one "book" terminal. Also note that, in principle, @@ -70,11 +119,72 @@ * the proper StructuredClass. This can be regulated by the "cardinality" * property of a StructuredClass. * + * It is possible to add further fields, like we would in the "headings" domain + * to add titles to our structure. + * + * <domain name="headings"> + * <head> + * <import rel="domain" src="book.oxm"/> + * </head> + * <structs> + * <struct name="heading" cardinality="0-1" transparent="true"> + * <parents> + * <parent name="book.book"> + * <field name="heading" type="SUBTREE"/> + * </parent> + * ... + * </parents> + * <fields> + * <fieldRef name="book.paragraph."> + * </fields> + * </structs> + * </domain> + * + * This would change the context free grammar as follows: + * + * BOOK := <book> HEADING BOOK_TREE </book> + * HEADING := <heading> PARAGRAPH </heading> + * * AnnotationClasses on the other hand do not specify a context free grammar. * They merely specify what kinds of Annotations are allowed within this domain * and which fields or attributes they have. Note that Annotations are allowed * to define structured children that manifest e.g. meta information of that - * Annotation. + * Annotation. An example for that would be the "comment" domain: + * + * <domain name="comments"> + * <head> + * <import rel="domain" src="book.oxm"/> + * </head> + * <annos> + * <anno name="comment"> + * <fields> + * <field name="replies" type="SUBTREE"> + * <children> + * <child name="reply"/> + * </children> + * </field> + * </fields> + * </anno> + * </annos> + * <structs> + * <struct name="reply"> + * <fields> + * <field name="replies" type="SUBTREE"> + * <children> + * <child name="reply"/> + * </children> + * </field> + * <field name="content" type="SUBTREE"> + * <children> + * <child name="book.paragraph"/> + * </children> + * </field> + * </fields> + * </struct> + * </structs> + * </domain> + * + * Here we have comment annotations, which have a reply tree as sub structure. * * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) */ @@ -105,13 +215,17 @@ class Domain; * Hierarchy. * * As an example consider the "paragraph" StructuredClass, which might allow - * the actual text content. Here is the according simplified XML (TODO: replace - * with a non-simplified version as soon as the XML syntax is clear.) + * the actual text content. Here is the according XML: * - * <StructuredClass name="paragraph"> - * <FieldDescriptor name="text", type="PRIMITIVE", optional="false", - * primitiveType="string"/> - * </StructuredClass> + * <struct name="paragraph" transparent="true" role="paragraph"> + * <fields> + * <field> + * <children> + * <child name="book.text"/> + * </children> + * </field> + * </fields> + * </struct> * * Accordingly the primitiveType field of a FieldDescriptor may only be * defined if the type is set to "PRIMITIVE". If the type is something else @@ -286,36 +400,28 @@ typedef RangeSet<size_t> Cardinality; * consult the Header documentation above. * * Note that a StructuredClass may "invade" an existing Domain description by - * defining itself as a viable child in one existing field. Consider a "section" - * StructuredClass (continuing the example in the header documentation): - * - * <StructuredClass name="section"> - * <FieldDescriptor name="structure", type="TREE", optional="false"> - * <children> - * <classRef>paragraph</classRef> - * </children> - * </FieldDescriptor> - * </StructuredClass> - * - * Of course in most cases we do not only want to allow paragraphs inside - * sections, but also (for example) lists. How would one add that - * without manipulating the existing domain or having to define an entirely - * new domain in which section allows for lists? - * - * Our solution to this problem is the parent mechanism. The simplified XML - * (TODO: Use non-simplified version as soon as possible) for the "list" - * StructuredClass would look like this: - * - * <StructuredClass name="list"> - * <FieldDescriptor name="structure", type="TREE", optional="false"> - * <children> - * <classRef>item</classRef> - * </children> - * </FieldDescriptor> - * <parents> - * <fieldRef>section.structure</fieldRef> - * </parents> - * </StructuredClass> + * defining itself as a viable child in one existing field. Consider the + * example of the "heading" domain from the header documentation again: + * + * <domain name="headings"> + * <head> + * <import rel="domain" src="book.oxm"/> + * </head> + * <structs> + * <struct name="heading" cardinality="0-1" transparent="true"> + * <parents> + * <parent name="book.book"> + * <field name="heading" type="SUBTREE"/> + * </parent> + * ... + * </parents> + * <fields> + * <fieldRef name="book.paragraph."> + * </fields> + * </structs> + * </domain> + * + * The "parent" construct allows to "invade" another domain. * * This does indeed interfere with an existing domain and one must carefully * craft such parent references to not create undesired side effects. However @@ -404,8 +510,7 @@ public: Handle<StructType> attributesDescriptor = nullptr, // TODO: What would be a wise default value for isa? Handle<StructuredClass> isa = nullptr, - bool transparent = false, - bool root = false) + bool transparent = false, bool root = false) : Descriptor(mgr, std::move(name), domain, attributesDescriptor), cardinality(cardinality), isa(acquire(isa)), @@ -497,9 +602,7 @@ extern const Rtti<model::Descriptor> Descriptor; extern const Rtti<model::StructuredClass> StructuredClass; extern const Rtti<model::AnnotationClass> AnnotationClass; extern const Rtti<model::Domain> Domain; - } - } #endif /* _OUSIA_MODEL_DOMAIN_HPP_ */ diff --git a/src/plugins/css/CSSParser.hpp b/src/plugins/css/CSSParser.hpp index 6d84dbf..1ec54f5 100644 --- a/src/plugins/css/CSSParser.hpp +++ b/src/plugins/css/CSSParser.hpp @@ -15,7 +15,15 @@ You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ - +/** + * @file CSSParser.hpp + * + * Contains the classes needed to transform a CSS string to a CSS SelectorTree + * with attached RuleSets. The details are explained in the class + * documentations. + * + * @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de + */ #ifndef _OUSIA_CSS_PARSER_HPP_ #define _OUSIA_CSS_PARSER_HPP_ diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp new file mode 100644 index 0000000..463a5d2 --- /dev/null +++ b/src/plugins/html/DemoOutput.cpp @@ -0,0 +1,185 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <core/common/Exceptions.hpp> +#include <core/common/Rtti.hpp> + +#include "DemoOutput.hpp" + +namespace ousia { +namespace html { + +void DemoHTMLTransformer::writeHTML(Handle<model::Document> doc, + std::ostream &out) +{ + // write preamble + out << "<?xml version=\" 1.0 \"?>\n"; + out << "<html>\n"; + out << "\t<head>\n"; + out << "\t\t<title>Test HTML Output for " << doc->getName() << "</title>\n"; + out << "\t</head>\n"; + out << "\t<body>\n"; + + // look for the book root node. + Rooted<model::StructuredEntity> root = doc->getRoot(); + if (root->getDescriptor()->getName() != "book") { + throw OusiaException("The given documents root is no book node!"); + } + // write it to HTML. + writeSection(root, out); + // write end + out << "\t</body>\n"; + out << "</html>\n"; +} + +/** + * This is just for easier internal handling. + */ +enum class SectionType { BOOK, CHAPTER, SECTION, SUBSECTION, NONE }; + +SectionType getSectionType(const std::string &name) +{ + if (name == "book") { + return SectionType::BOOK; + } else if (name == "chapter") { + return SectionType::CHAPTER; + } else if (name == "section") { + return SectionType::SECTION; + } else if (name == "subsection") { + return SectionType::SUBSECTION; + } else { + return SectionType::NONE; + } +} + +void DemoHTMLTransformer::writeSection(Handle<model::StructuredEntity> sec, + std::ostream &out) +{ + // check the section type. + SectionType type = getSectionType(sec->getDescriptor()->getName()); + if (type == SectionType::NONE) { + // if the input node is no section, we ignore it. + return; + } + // check if we have a heading. + if (sec->hasField("heading")) { + Rooted<model::StructuredEntity> heading = sec->getField("heading")[0]; + out << "\t\t"; + switch (type) { + case SectionType::BOOK: + out << "<h1>"; + break; + case SectionType::CHAPTER: + out << "<h2>"; + break; + case SectionType::SECTION: + out << "<h3>"; + break; + case SectionType::SUBSECTION: + out << "<h4>"; + break; + case SectionType::NONE: + // this can not happen; + break; + } + // the second field marks the heading. So let's write it. + writeParagraph(heading, out, false); + // close the heading tag. + switch (type) { + case SectionType::BOOK: + out << "</h1>"; + break; + case SectionType::CHAPTER: + out << "</h2>"; + break; + case SectionType::SECTION: + out << "</h3>"; + break; + case SectionType::SUBSECTION: + out << "</h4>"; + break; + case SectionType::NONE: + // this can not happen; + break; + } + out << "\n"; + } + + // then write the section content recursively. + NodeVector<model::StructuredEntity> mainField = sec->getField(); + for (auto &n : mainField) { + /* + * Strictly speaking this is the wrong mechanism, because we would have + * to make an "isa" call here because we can not rely on our knowledge + * that paragraphs can only be paragraphs or lists. There would have + * to be a listener structure of transformations that check if they can + * transform this specific node. + */ + std::string childDescriptorName = n->getDescriptor()->getName(); + if (childDescriptorName == "paragraph") { + writeParagraph(n, out); + // TODO: Implement + // } else if(childDescriptorName == "ul"){ + // writeList(n, out); + } else { + writeSection(n, out); + } + } +} + +void DemoHTMLTransformer::writeParagraph(Handle<model::StructuredEntity> par, + std::ostream &out, bool writePTags) +{ + // validate descriptor. + if (par->getDescriptor()->getName() != "paragraph") { + throw OusiaException("Expected paragraph!"); + } + // check if we have a heading. + if (par->hasField("heading")) { + Rooted<model::StructuredEntity> heading = par->getField("heading")[0]; + // start the heading tag + out << "\t\t<h5>"; + // the second field marks the heading. So let's write it. + writeParagraph(heading, out, false); + // close the heading tag. + out << "</h5>\n"; + } + // write start tag + if (writePTags) { + out << "\t\t<p>"; + } + // write content + // TODO: What about emphasis? + for (auto &text : par->getField()) { + if (text->getDescriptor()->getName() != "text") { + throw OusiaException("Expected text!"); + } + Handle<model::DocumentPrimitive> primitive = + text->getField()[0].cast<model::DocumentPrimitive>(); + if (primitive.isNull()) { + throw OusiaException("Text field is not primitive!"); + } + out << primitive->getContent().asString(); + } + // write end tag + if (writePTags) { + out << "</p>\n"; + } +} +} +} diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp new file mode 100644 index 0000000..ca9bcd2 --- /dev/null +++ b/src/plugins/html/DemoOutput.hpp @@ -0,0 +1,85 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file DemoOutput.hpp + * + * This implements a Demo HTML output for the following domains: + * * book + * * headings + * * emphasis + * * lists + * + * @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de + */ +#ifndef _OUSIA_HTML_DEMO_OUTPUT_HPP_ +#define _OUSIA_HTML_DEMO_OUTPUT_HPP_ + +#include <ostream> + +#include <core/model/Document.hpp> + +namespace ousia { +namespace html { + +class DemoHTMLTransformer { +private: + /** + * This method is to be called recursively to write a chapter, section or + * subsection to HTML. + */ + void writeSection(Handle<model::StructuredEntity> sec, std::ostream& out); + /** + * This method is to be called recursively to write a paragraph to HTML. + */ + void writeParagraph(Handle<model::StructuredEntity> par, std::ostream& out, + bool writePTags = true); + /** + * This method is to be called recursively to write a list to HTML. + * TODO: Implement + */ +// void writeList(Handle<StructuredEntity> sec, std::ostream& out, +// int tabdepth); + + //TODO: Implement emphasis. + +public: + /** + * This writes a HTML representation of the given document to the given + * output stream. Note that this method lacks the generality of our Ousia + * approach with respect to two important points: + * 1.) It hardcodes the dependency to a certain set of domains in the C++ + * code. + * 2.) It does not use the proposed pipeline of first copying the document + * graph, then attaching style attributes and then transforming it to a + * specific output format but does all of these steps at once. + * 3.) It does not use different transformers for the different domains but + * does all transformations at once. + * Therefore this is not an adequate model of our algorithms but only a + * Demo. + * + * @param doc is a Document using concepts of the book, headings, emphasis + * and lists domains but no other. + * @param out is the output stream the data shall be written to. + */ + void writeHTML(Handle<model::Document> doc, std::ostream& out); +}; +} +} + +#endif diff --git a/test/core/model/DocumentTest.cpp b/test/core/model/DocumentTest.cpp index 9e3229c..a671d2c 100644 --- a/test/core/model/DocumentTest.cpp +++ b/test/core/model/DocumentTest.cpp @@ -37,8 +37,68 @@ TEST(Document, testDocumentConstruction) // Construct the document. Rooted<Document> doc = constructBookDocument(mgr, domain); - // If that works we are happy already. + // Check the document content. ASSERT_FALSE(doc.isNull()); + // get root node. + Rooted<StructuredEntity> root = doc->getRoot(); + ASSERT_FALSE(root.isNull()); + ASSERT_EQ("book", root->getDescriptor()->getName()); + ASSERT_TRUE(root->hasField()); + ASSERT_EQ(2, root->getField().size()); + // get foreword (paragraph) + { + Rooted<StructuredEntity> foreword = root->getField()[0]; + ASSERT_FALSE(foreword.isNull()); + ASSERT_EQ("paragraph", foreword->getDescriptor()->getName()); + // it should contain one text node + ASSERT_TRUE(foreword->hasField()); + ASSERT_EQ(1, foreword->getField().size()); + // which in turn should have a primitive content field containing the + // right text. + { + Rooted<StructuredEntity> text = foreword->getField()[0]; + ASSERT_FALSE(text.isNull()); + ASSERT_EQ("text", text->getDescriptor()->getName()); + ASSERT_TRUE(text->hasField()); + ASSERT_EQ(1, text->getField().size()); + ASSERT_TRUE(text->getField()[0]->isa(typeOf<DocumentPrimitive>())); + Variant content = + text->getField()[0].cast<DocumentPrimitive>()->getContent(); + ASSERT_EQ("Some introductory text", content.asString()); + } + } + // get section + { + Rooted<StructuredEntity> section = root->getField()[1]; + ASSERT_FALSE(section.isNull()); + ASSERT_EQ("section", section->getDescriptor()->getName()); + // it should contain one paragraph + ASSERT_TRUE(section->hasField()); + ASSERT_EQ(1, section->getField().size()); + { + Rooted<StructuredEntity> par = section->getField()[0]; + ASSERT_FALSE(par.isNull()); + ASSERT_EQ("paragraph", par->getDescriptor()->getName()); + // it should contain one text node + ASSERT_TRUE(par->hasField()); + ASSERT_EQ(1, par->getField().size()); + // which in turn should have a primitive content field containing + // the + // right text. + { + Rooted<StructuredEntity> text = par->getField()[0]; + ASSERT_FALSE(text.isNull()); + ASSERT_EQ("text", text->getDescriptor()->getName()); + ASSERT_TRUE(text->hasField()); + ASSERT_EQ(1, text->getField().size()); + ASSERT_TRUE( + text->getField()[0]->isa(typeOf<DocumentPrimitive>())); + Variant content = + text->getField()[0].cast<DocumentPrimitive>()->getContent(); + ASSERT_EQ("Some actual text", content.asString()); + } + } + } } } } diff --git a/test/core/model/TestDocument.hpp b/test/core/model/TestDocument.hpp index a1a3434..6b0267a 100644 --- a/test/core/model/TestDocument.hpp +++ b/test/core/model/TestDocument.hpp @@ -50,13 +50,18 @@ static Rooted<Document> constructBookDocument(Manager &mgr, return {nullptr}; } // Add its text. - Variant text{std::map<std::string, Variant>{ - {"content", Variant("Some introductory text")}}}; - Rooted<DocumentPrimitive> foreword_text = - DocumentPrimitive::buildEntity(foreword, text, "text"); + Rooted<StructuredEntity> foreword_text = + StructuredEntity::buildEntity(foreword, {bookDomain}, "text"); if (foreword_text.isNull()) { return {nullptr}; } + // And its primitive content + Variant text{"Some introductory text"}; + Rooted<DocumentPrimitive> foreword_primitive = + DocumentPrimitive::buildEntity(foreword_text, text, "content"); + if (foreword_primitive.isNull()) { + return {nullptr}; + } // Add a section. Rooted<StructuredEntity> section = StructuredEntity::buildEntity(root, {bookDomain}, "section"); @@ -67,13 +72,18 @@ static Rooted<Document> constructBookDocument(Manager &mgr, return {nullptr}; } // Add its text. - text = Variant{std::map<std::string, Variant>{ - {"content", Variant("Some introductory text")}}}; - Rooted<DocumentPrimitive> main_text = - DocumentPrimitive::buildEntity(foreword, text, "text"); + Rooted<StructuredEntity> main_text = + StructuredEntity::buildEntity(main, {bookDomain}, "text"); if (main_text.isNull()) { return {nullptr}; } + // And its primitive content + text = Variant{"Some actual text"}; + Rooted<DocumentPrimitive> main_primitive = + DocumentPrimitive::buildEntity(main_text, text, "content"); + if (main_primitive.isNull()) { + return {nullptr}; + } return doc; } diff --git a/test/core/model/TestDomain.hpp b/test/core/model/TestDomain.hpp index d55bff7..f457531 100644 --- a/test/core/model/TestDomain.hpp +++ b/test/core/model/TestDomain.hpp @@ -81,11 +81,20 @@ static Rooted<Domain> constructBookDomain(Manager &mgr, Logger &logger) section_field->getChildren().push_back(paragraph); book_field->getChildren().push_back(paragraph); domain->getStructureClasses().push_back(paragraph); + // And the field of it. + Rooted<FieldDescriptor> paragraph_field{new FieldDescriptor(mgr, paragraph)}; + paragraph->getFieldDescriptors().push_back(paragraph_field); + + // Finally we add the "text" node, which is transparent as well. + Rooted<StructuredClass> text{new StructuredClass( + mgr, "text", domain, any, {nullptr}, {nullptr}, true)}; + paragraph_field->getChildren().push_back(text); + domain->getStructureClasses().push_back(text); // ... and has a primitive field. - Rooted<FieldDescriptor> paragraph_field{new FieldDescriptor( - mgr, paragraph, domain->getTypesystems()[0]->getTypes()[1], "text", + Rooted<FieldDescriptor> text_field{new FieldDescriptor( + mgr, text, domain->getTypesystems()[0]->getTypes()[0], "content", false)}; - paragraph->getFieldDescriptors().push_back(paragraph_field); + text->getFieldDescriptors().push_back(text_field); return domain; } diff --git a/test/plugins/html/DemoOutputTest.cpp b/test/plugins/html/DemoOutputTest.cpp new file mode 100644 index 0000000..b81a001 --- /dev/null +++ b/test/plugins/html/DemoOutputTest.cpp @@ -0,0 +1,49 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <gtest/gtest.h> + +#include <iostream> + +#include <plugins/html/DemoOutput.hpp> + +#include <core/model/Document.hpp> +#include <core/model/Domain.hpp> + +#include <core/model/TestDocument.hpp> +#include <core/model/TestDomain.hpp> + +namespace ousia { +namespace html { + +TEST(DemoHTMLTransformer, writeHTML) +{ + // Construct Manager + Logger logger; + Manager mgr{1}; + // Get the domain. + Rooted<model::Domain> domain = model::constructBookDomain(mgr, logger); + // Construct the document. + Rooted<model::Document> doc = model::constructBookDocument(mgr, domain); + + // print it + DemoHTMLTransformer transformer; + transformer.writeHTML(doc, std::cout); +} +} +} |