summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-01-04 22:21:52 +0100
committerAndreas Stöckel <andreas@somweyr.de>2015-01-04 22:21:52 +0100
commit0d671f899da720ff5035bdab7adf6b11cbf80cb1 (patch)
tree9839d502ff323198d0f9cb479e9809a978c5c550
parent7e1c72a5a5d5dc890a49f79c5daec3edcb38a33b (diff)
parent319ad738f677a20403cc27192f1df7bb65ce8c0e (diff)
Merge branch 'master' of somweyr.de:ousia
-rw-r--r--CMakeLists.txt21
-rw-r--r--src/core/model/Document.cpp20
-rw-r--r--src/core/model/Document.hpp97
-rw-r--r--src/core/model/Domain.hpp229
-rw-r--r--src/plugins/css/CSSParser.hpp10
-rw-r--r--src/plugins/html/DemoOutput.cpp185
-rw-r--r--src/plugins/html/DemoOutput.hpp85
-rw-r--r--test/core/model/DocumentTest.cpp62
-rw-r--r--test/core/model/TestDocument.hpp26
-rw-r--r--test/core/model/TestDomain.hpp15
-rw-r--r--test/plugins/html/DemoOutputTest.cpp49
11 files changed, 671 insertions, 128 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5a494c3..10e43ea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -153,6 +153,14 @@ TARGET_LINK_LIBRARIES(ousia_css
ousia_core
)
+ADD_LIBRARY(ousia_html
+ src/plugins/html/DemoOutput
+)
+
+TARGET_LINK_LIBRARIES(ousia_html
+ ousia_core
+)
+
ADD_LIBRARY(ousia_xml
src/plugins/xml/XmlParser
)
@@ -233,6 +241,16 @@ IF(TEST)
ousia_css
)
+ ADD_EXECUTABLE(ousia_test_html
+ test/plugins/html/DemoOutputTest
+ )
+
+ TARGET_LINK_LIBRARIES(ousia_test_html
+ ${GTEST_LIBRARIES}
+ ousia_core
+ ousia_html
+ )
+
ADD_EXECUTABLE(ousia_test_xml
test/plugins/xml/XmlParserTest
)
@@ -256,8 +274,9 @@ IF(TEST)
# Register the unit tests
ADD_TEST(ousia_test_core ousia_test_core)
ADD_TEST(ousia_test_boost ousia_test_boost)
- ADD_TEST(ousia_test_xml ousia_test_xml)
ADD_TEST(ousia_test_css ousia_test_css)
+ ADD_TEST(ousia_test_html ousia_test_html)
+ ADD_TEST(ousia_test_xml ousia_test_xml)
# ADD_TEST(ousia_test_mozjs ousia_test_mozjs)
ENDIF()
diff --git a/src/core/model/Document.cpp b/src/core/model/Document.cpp
index 854e717..e5d0755 100644
--- a/src/core/model/Document.cpp
+++ b/src/core/model/Document.cpp
@@ -57,20 +57,22 @@ int DocumentEntity::getFieldDescriptorIndex(const std::string &fieldName)
return -1;
}
-void DocumentEntity::getField(NodeVector<StructuredEntity> &res,
- const std::string &fieldName)
+NodeVector<StructuredEntity> &DocumentEntity::getField(
+ const std::string &fieldName)
{
int f = getFieldDescriptorIndex(fieldName);
if (f < 0) {
- NodeVector<StructuredEntity> empty{this};
- res = NodeVector<StructuredEntity>(this);
+ throw OusiaException("No field for the given name exists!");
}
- res = fields[f];
+ return fields[f];
}
NodeVector<StructuredEntity> &DocumentEntity::getField(
- Rooted<FieldDescriptor> fieldDescriptor)
+ Handle<FieldDescriptor> fieldDescriptor)
{
+ if(fieldDescriptor.isNull()){
+ throw OusiaException("The given FieldDescriptor handle is null!");
+ }
const NodeVector<FieldDescriptor> &fds = descriptor->getFieldDescriptors();
int f = 0;
for (auto &fd : fds) {
@@ -155,8 +157,7 @@ Rooted<StructuredEntity> StructuredEntity::buildEntity(
return {nullptr};
}
// append the new entity to the right field.
- NodeVector<StructuredEntity> field(parent);
- parent->getField(field, fieldName);
+ NodeVector<StructuredEntity>& field = parent->getField(fieldName);
field.push_back(entity);
// and return it.
@@ -179,8 +180,7 @@ Rooted<DocumentPrimitive> DocumentPrimitive::buildEntity(
return {nullptr};
}
// append the new entity to the right field.
- NodeVector<StructuredEntity> field(parent);
- parent->getField(field, fieldName);
+ NodeVector<StructuredEntity>& field = parent->getField(fieldName);
field.push_back(entity);
// and return it.
diff --git a/src/core/model/Document.hpp b/src/core/model/Document.hpp
index 2d792c5..fabdcaf 100644
--- a/src/core/model/Document.hpp
+++ b/src/core/model/Document.hpp
@@ -32,21 +32,33 @@
* Structure Nodes, effectively resulting in a Document Graph instead of a
* Document Tree (other references may introduce cycles as well).
*
- * Consider this simplified XML representation of a document (TODO: Use
- * non-simplified XML as soon as possible):
+ * Consider this XML representation of a document using the "book" domain:
*
- * <Document implements="book">
- * <StructureEntity class="book">
- * <StructureEntity class="section">
- * <DocumentPrimitive>
- * This is some text with some <Anchor id="1"/>emphasized and
- * <Anchor id="2"/>strong<Anchor id="3"/> text.
- * </DocumentPrimitive>
- * <AnnotationEntity class="emphasized" start="1", end="3"/>
- * <AnnotationEntity class="strong" start="2", end="3"/>
- * </StructureEntity>
- * </StructureEntity>
- * </Document>
+ * <doc>
+ * <head>
+ * <import rel="domain" src="book_domain.oxm"/>
+ * <import rel="domain" src="emphasized_domain.oxm"/>
+ * <alias tag="paragraph" aka="p"/>
+ * </head>
+ * <book>
+ * This might be some introductory text or a dedication. Ideally, of
+ * course, such elements would be semantically specified as such in
+ * additional domains (or in this one).
+ * <chapter name="myFirstChapter">
+ * Here we might have an introduction to the chapter, including some
+ * overview of the chapters structure.
+ * <section name="myFirstSection">
+ * Here we might find the actual section content.
+ * </section>
+ * <section name="mySndSection">
+ * Here we might find the actual section <em>content</em>.
+ *
+ *
+ * And there might even be another paragraph.
+ * </section>
+ * </chapter>
+ * </book>
+ * </doc>
*
* As can be seen the StructureEntities inherently follow a tree structure that
* is restricted by the implicit context free grammar of the "book" Domain
@@ -56,12 +68,32 @@
* Another interesting fact is the special place of AnnotationEntities: They are
* Defined by start and end Anchors in the text. Note that this allows for
* overlapping annotations and provides a more intuitive (and semantically
- * sound) handling of such span-like concepts.
+ * sound) handling of such span-like concepts. So the
+ *
+ * <em>content</em>
+ *
+ * is implicitly expanded to:
+ *
+ * <a id="1"/>content<a id="2"/>
+ * <emphasized start="1" end="2"/>
+ *
* Note that the place of an AnnotationEntity within the XML above is not
* strictly defined. It might as well be placed as a child of the "book" node.
* In general it is recommended to use the lowest possible place in the
* StructureTree to include the AnnotationEntity for better readability.
*
+ * Also note that text content like
+ *
+ * Here we might find the actual section content.
+ *
+ * is implicitly expanded using transparency to:
+ *
+ * <paragraph>
+ * <text>
+ * Here we might find the actual section content.
+ * </text>
+ * </paragraph>
+ *
* @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
*/
@@ -124,12 +156,6 @@ public:
Variant getAttributes() const { return attributes; }
/**
- * This allows a direct manipulation of the internal data structure of a
- * DocumentEntity and is not recommended. TODO: Delete this?
- */
- std::vector<NodeVector<StructuredEntity>> &getFields() { return fields; }
-
- /**
* This returns true if there is a FieldDescriptor in the Descriptor for
* this DocumentEntity which has the given name. If an empty name is
* given it is assumed that the 'default' FieldDescriptor is referenced,
@@ -148,29 +174,18 @@ public:
/**
* This returns the vector of entities containing all members of the field
- * for which the FieldDescriptor has the specified name. If an empty name is
- * given it is assumed that the 'default' FieldDescriptor is referenced,
- * where 'default' means either:
+ * with the given name. If an empty name is given it is assumed that the
+ * 'default' FieldDescriptor is referenced, where 'default' means either:
* 1.) The only TREE typed FieldDescriptor (if present) or
* 2.) the only FieldDescriptor (if only one is specified).
*
- * Note that the output of this method might well be ambigous: If no
- * FieldDescriptor matches the given name an empty NodeVector is
- * returned. This is also the case, however, if there are no members for an
- * existing field. Therefore it is recommended to additionally check the
- * output of "hasField" or use the version of this method with
- * a FieldDescriptor as input.
+ * If the name is unknown an exception is thrown.
*
- * @param fieldName is the name of the field as specified in the
+ * @param fieldName is the name of a field as specified in the
* FieldDescriptor in the Domain description.
- * @param res is a NodeVector reference where the result will be
- * stored. After using this method the reference will
- * either refer to all StructuredEntities in that field. If
- * the field is unknown or if no members exist in that
- * field yet, the NodeVector will be empty.
+ * @return a NodeVector of all StructuredEntities in that field.
*/
- void getField(NodeVector<StructuredEntity> &res,
- const std::string &fieldName = "");
+ NodeVector<StructuredEntity> &getField(const std::string &fieldName = "");
/**
* This returns the vector of entities containing all members of the field
@@ -184,7 +199,7 @@ public:
* @return a NodeVector of all StructuredEntities in that field.
*/
NodeVector<StructuredEntity> &getField(
- Rooted<FieldDescriptor> fieldDescriptor);
+ Handle<FieldDescriptor> fieldDescriptor);
};
/**
@@ -365,7 +380,7 @@ public:
*/
class Document : public Node {
private:
- //TODO: Might there be several roots? E.g. metadata?
+ // TODO: Might there be several roots? E.g. metadata?
Owned<StructuredEntity> root;
public:
@@ -375,7 +390,7 @@ public:
{
}
- void setRoot(Handle<StructuredEntity> root) { root = acquire(root); };
+ void setRoot(Handle<StructuredEntity> root) { this->root = acquire(root); };
Rooted<StructuredEntity> getRoot() const { return root; }
};
diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp
index cd74a19..7e4e9f7 100644
--- a/src/core/model/Domain.hpp
+++ b/src/core/model/Domain.hpp
@@ -30,24 +30,65 @@
* terms "StructuredClass" and "FieldDescriptor".
* On the top level you would start with a StructuredClass, say "book", which
* in turn might contain two FieldDescriptors, one for the meta data of ones
- * book and one for the actual structure. Consider the following (simplified)
- * XML notation (TODO: Use a non-simplified notation as soon as the format is
- * clear.)
- *
- * <StructuredClass name="book">
- * <FieldDescriptor name="structure", type="TREE", optional="false">
- * <children>
- * Here we would reference the possible child classes, e.g. section,
- * paragraph, etc.
- * </children>
- * </FieldDescriptor>
- * <FieldDescriptor name="meta", type="SUBTREE", optional="true">
- * <children>
- * Here we would reference the possible child classes for meta,
- * information, e.g. authors, date, version, etc.
- * </children>
- * </FieldDescriptor>
- * </StructuredClass>
+ * book and one for the actual structure. Consider the following XML:
+ *
+ * <domain name="book">
+ * <structs>
+ * <struct name="book" cardinality="1" isRoot="true">
+ * <fields>
+ * <field>
+ * <children>
+ * <child name="book.chapter"/>
+ * <child name="book.paragraph"/>
+ * </children>
+ * </field>
+ * </fields>
+ * </struct>
+ * <struct name="chapter">
+ * <fields>
+ * <field>
+ * <children>
+ * <child name="book.section"/>
+ * <child name="book.paragraph"/>
+ * </children>
+ * </field>
+ * </fields>
+ * </struct>
+ * <struct name="section">
+ * <fields>
+ * <field>
+ * <children>
+ * <child name="book.subsection"/>
+ * <child name="book.paragraph"/>
+ * </children>
+ * </field>
+ * </fields>
+ * </struct>
+ * <struct name="subsection">
+ * <fields>
+ * <field>
+ * <children>
+ * <child name="book.paragraph"/>
+ * </children>
+ * </field>
+ * </fields>
+ * </struct>
+ * <struct name="paragraph" transparent="true" role="paragraph">
+ * <fields>
+ * <field>
+ * <children>
+ * <child name="book.text"/>
+ * </children>
+ * </field>
+ * </fields>
+ * </struct>
+ * <struct name="text" transparent="true" role="text">
+ * <fields>
+ * <field name="content" type="PRIMITIVE" primitiveType="string"/>
+ * </fields>
+ * </struct>
+ * </structs>
+ * </domain>
*
* Note that we define one field as the TREE (meaning the main or default
* document structure) and one mearly as SUBTREE, relating to supporting
@@ -58,11 +99,19 @@
* TREE field and at least one permitted child must exist, either primitive or
* as another StructuredClass.
*
- * The translation to context free grammars is roughly as follows:
+ * The translation to context free grammars is as follows:
*
- * BOOK := book BOOK_STRUCTURE BOOK_META
- * BOOK_STRUCTURE := SECTION BOOK_STRUCTURE | PARAGRAPH BOOK_STRUCTURE | epsilon
- * BOOK_META := AUTHOR BOOK_META | DATE BOOK_META
+ * BOOK := <book> BOOK_TREE </book>
+ * BOOK_TREE := CHAPTER BOOK_TREE | PARAGRAPH BOOK_TREE | epsilon
+ * CHAPTER := <chapter> CHAPTER_TREE </chapter>
+ * CHAPTER_TREE := SECTION CHAPTER_TREE | PARAGRAPH CHAPTER_TREE | epsilon
+ * SECTION := <section> SECTION_TREE </section>
+ * SECTION_TREE := SUBSECTION SECTION_TREE | PARAGRAPH SECTION_TREE |
+ * epsilon
+ * SUBSECTION := <subsection> SUBSECTION_TREE </subsection>
+ * SUBSECTION_TREE := PARAGRAPH SUBSECTION_TREE | epsilon
+ * PARAGRAPH := <paragraph> PARAGRAPH_CONTENT </paragraph>
+ * PARAGRAPH_CONTENT := string
*
* Note that this translation recurs to further nonterminals like SECTION but
* necessarily produces one "book" terminal. Also note that, in principle,
@@ -70,11 +119,72 @@
* the proper StructuredClass. This can be regulated by the "cardinality"
* property of a StructuredClass.
*
+ * It is possible to add further fields, like we would in the "headings" domain
+ * to add titles to our structure.
+ *
+ * <domain name="headings">
+ * <head>
+ * <import rel="domain" src="book.oxm"/>
+ * </head>
+ * <structs>
+ * <struct name="heading" cardinality="0-1" transparent="true">
+ * <parents>
+ * <parent name="book.book">
+ * <field name="heading" type="SUBTREE"/>
+ * </parent>
+ * ...
+ * </parents>
+ * <fields>
+ * <fieldRef name="book.paragraph.">
+ * </fields>
+ * </structs>
+ * </domain>
+ *
+ * This would change the context free grammar as follows:
+ *
+ * BOOK := <book> HEADING BOOK_TREE </book>
+ * HEADING := <heading> PARAGRAPH </heading>
+ *
* AnnotationClasses on the other hand do not specify a context free grammar.
* They merely specify what kinds of Annotations are allowed within this domain
* and which fields or attributes they have. Note that Annotations are allowed
* to define structured children that manifest e.g. meta information of that
- * Annotation.
+ * Annotation. An example for that would be the "comment" domain:
+ *
+ * <domain name="comments">
+ * <head>
+ * <import rel="domain" src="book.oxm"/>
+ * </head>
+ * <annos>
+ * <anno name="comment">
+ * <fields>
+ * <field name="replies" type="SUBTREE">
+ * <children>
+ * <child name="reply"/>
+ * </children>
+ * </field>
+ * </fields>
+ * </anno>
+ * </annos>
+ * <structs>
+ * <struct name="reply">
+ * <fields>
+ * <field name="replies" type="SUBTREE">
+ * <children>
+ * <child name="reply"/>
+ * </children>
+ * </field>
+ * <field name="content" type="SUBTREE">
+ * <children>
+ * <child name="book.paragraph"/>
+ * </children>
+ * </field>
+ * </fields>
+ * </struct>
+ * </structs>
+ * </domain>
+ *
+ * Here we have comment annotations, which have a reply tree as sub structure.
*
* @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
*/
@@ -105,13 +215,17 @@ class Domain;
* Hierarchy.
*
* As an example consider the "paragraph" StructuredClass, which might allow
- * the actual text content. Here is the according simplified XML (TODO: replace
- * with a non-simplified version as soon as the XML syntax is clear.)
+ * the actual text content. Here is the according XML:
*
- * <StructuredClass name="paragraph">
- * <FieldDescriptor name="text", type="PRIMITIVE", optional="false",
- * primitiveType="string"/>
- * </StructuredClass>
+ * <struct name="paragraph" transparent="true" role="paragraph">
+ * <fields>
+ * <field>
+ * <children>
+ * <child name="book.text"/>
+ * </children>
+ * </field>
+ * </fields>
+ * </struct>
*
* Accordingly the primitiveType field of a FieldDescriptor may only be
* defined if the type is set to "PRIMITIVE". If the type is something else
@@ -286,36 +400,28 @@ typedef RangeSet<size_t> Cardinality;
* consult the Header documentation above.
*
* Note that a StructuredClass may "invade" an existing Domain description by
- * defining itself as a viable child in one existing field. Consider a "section"
- * StructuredClass (continuing the example in the header documentation):
- *
- * <StructuredClass name="section">
- * <FieldDescriptor name="structure", type="TREE", optional="false">
- * <children>
- * <classRef>paragraph</classRef>
- * </children>
- * </FieldDescriptor>
- * </StructuredClass>
- *
- * Of course in most cases we do not only want to allow paragraphs inside
- * sections, but also (for example) lists. How would one add that
- * without manipulating the existing domain or having to define an entirely
- * new domain in which section allows for lists?
- *
- * Our solution to this problem is the parent mechanism. The simplified XML
- * (TODO: Use non-simplified version as soon as possible) for the "list"
- * StructuredClass would look like this:
- *
- * <StructuredClass name="list">
- * <FieldDescriptor name="structure", type="TREE", optional="false">
- * <children>
- * <classRef>item</classRef>
- * </children>
- * </FieldDescriptor>
- * <parents>
- * <fieldRef>section.structure</fieldRef>
- * </parents>
- * </StructuredClass>
+ * defining itself as a viable child in one existing field. Consider the
+ * example of the "heading" domain from the header documentation again:
+ *
+ * <domain name="headings">
+ * <head>
+ * <import rel="domain" src="book.oxm"/>
+ * </head>
+ * <structs>
+ * <struct name="heading" cardinality="0-1" transparent="true">
+ * <parents>
+ * <parent name="book.book">
+ * <field name="heading" type="SUBTREE"/>
+ * </parent>
+ * ...
+ * </parents>
+ * <fields>
+ * <fieldRef name="book.paragraph.">
+ * </fields>
+ * </structs>
+ * </domain>
+ *
+ * The "parent" construct allows to "invade" another domain.
*
* This does indeed interfere with an existing domain and one must carefully
* craft such parent references to not create undesired side effects. However
@@ -404,8 +510,7 @@ public:
Handle<StructType> attributesDescriptor = nullptr,
// TODO: What would be a wise default value for isa?
Handle<StructuredClass> isa = nullptr,
- bool transparent = false,
- bool root = false)
+ bool transparent = false, bool root = false)
: Descriptor(mgr, std::move(name), domain, attributesDescriptor),
cardinality(cardinality),
isa(acquire(isa)),
@@ -497,9 +602,7 @@ extern const Rtti<model::Descriptor> Descriptor;
extern const Rtti<model::StructuredClass> StructuredClass;
extern const Rtti<model::AnnotationClass> AnnotationClass;
extern const Rtti<model::Domain> Domain;
-
}
-
}
#endif /* _OUSIA_MODEL_DOMAIN_HPP_ */
diff --git a/src/plugins/css/CSSParser.hpp b/src/plugins/css/CSSParser.hpp
index 6d84dbf..1ec54f5 100644
--- a/src/plugins/css/CSSParser.hpp
+++ b/src/plugins/css/CSSParser.hpp
@@ -15,7 +15,15 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-
+/**
+ * @file CSSParser.hpp
+ *
+ * Contains the classes needed to transform a CSS string to a CSS SelectorTree
+ * with attached RuleSets. The details are explained in the class
+ * documentations.
+ *
+ * @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de
+ */
#ifndef _OUSIA_CSS_PARSER_HPP_
#define _OUSIA_CSS_PARSER_HPP_
diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp
new file mode 100644
index 0000000..463a5d2
--- /dev/null
+++ b/src/plugins/html/DemoOutput.cpp
@@ -0,0 +1,185 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <core/common/Exceptions.hpp>
+#include <core/common/Rtti.hpp>
+
+#include "DemoOutput.hpp"
+
+namespace ousia {
+namespace html {
+
+void DemoHTMLTransformer::writeHTML(Handle<model::Document> doc,
+ std::ostream &out)
+{
+ // write preamble
+ out << "<?xml version=\" 1.0 \"?>\n";
+ out << "<html>\n";
+ out << "\t<head>\n";
+ out << "\t\t<title>Test HTML Output for " << doc->getName() << "</title>\n";
+ out << "\t</head>\n";
+ out << "\t<body>\n";
+
+ // look for the book root node.
+ Rooted<model::StructuredEntity> root = doc->getRoot();
+ if (root->getDescriptor()->getName() != "book") {
+ throw OusiaException("The given documents root is no book node!");
+ }
+ // write it to HTML.
+ writeSection(root, out);
+ // write end
+ out << "\t</body>\n";
+ out << "</html>\n";
+}
+
+/**
+ * This is just for easier internal handling.
+ */
+enum class SectionType { BOOK, CHAPTER, SECTION, SUBSECTION, NONE };
+
+SectionType getSectionType(const std::string &name)
+{
+ if (name == "book") {
+ return SectionType::BOOK;
+ } else if (name == "chapter") {
+ return SectionType::CHAPTER;
+ } else if (name == "section") {
+ return SectionType::SECTION;
+ } else if (name == "subsection") {
+ return SectionType::SUBSECTION;
+ } else {
+ return SectionType::NONE;
+ }
+}
+
+void DemoHTMLTransformer::writeSection(Handle<model::StructuredEntity> sec,
+ std::ostream &out)
+{
+ // check the section type.
+ SectionType type = getSectionType(sec->getDescriptor()->getName());
+ if (type == SectionType::NONE) {
+ // if the input node is no section, we ignore it.
+ return;
+ }
+ // check if we have a heading.
+ if (sec->hasField("heading")) {
+ Rooted<model::StructuredEntity> heading = sec->getField("heading")[0];
+ out << "\t\t";
+ switch (type) {
+ case SectionType::BOOK:
+ out << "<h1>";
+ break;
+ case SectionType::CHAPTER:
+ out << "<h2>";
+ break;
+ case SectionType::SECTION:
+ out << "<h3>";
+ break;
+ case SectionType::SUBSECTION:
+ out << "<h4>";
+ break;
+ case SectionType::NONE:
+ // this can not happen;
+ break;
+ }
+ // the second field marks the heading. So let's write it.
+ writeParagraph(heading, out, false);
+ // close the heading tag.
+ switch (type) {
+ case SectionType::BOOK:
+ out << "</h1>";
+ break;
+ case SectionType::CHAPTER:
+ out << "</h2>";
+ break;
+ case SectionType::SECTION:
+ out << "</h3>";
+ break;
+ case SectionType::SUBSECTION:
+ out << "</h4>";
+ break;
+ case SectionType::NONE:
+ // this can not happen;
+ break;
+ }
+ out << "\n";
+ }
+
+ // then write the section content recursively.
+ NodeVector<model::StructuredEntity> mainField = sec->getField();
+ for (auto &n : mainField) {
+ /*
+ * Strictly speaking this is the wrong mechanism, because we would have
+ * to make an "isa" call here because we can not rely on our knowledge
+ * that paragraphs can only be paragraphs or lists. There would have
+ * to be a listener structure of transformations that check if they can
+ * transform this specific node.
+ */
+ std::string childDescriptorName = n->getDescriptor()->getName();
+ if (childDescriptorName == "paragraph") {
+ writeParagraph(n, out);
+ // TODO: Implement
+ // } else if(childDescriptorName == "ul"){
+ // writeList(n, out);
+ } else {
+ writeSection(n, out);
+ }
+ }
+}
+
+void DemoHTMLTransformer::writeParagraph(Handle<model::StructuredEntity> par,
+ std::ostream &out, bool writePTags)
+{
+ // validate descriptor.
+ if (par->getDescriptor()->getName() != "paragraph") {
+ throw OusiaException("Expected paragraph!");
+ }
+ // check if we have a heading.
+ if (par->hasField("heading")) {
+ Rooted<model::StructuredEntity> heading = par->getField("heading")[0];
+ // start the heading tag
+ out << "\t\t<h5>";
+ // the second field marks the heading. So let's write it.
+ writeParagraph(heading, out, false);
+ // close the heading tag.
+ out << "</h5>\n";
+ }
+ // write start tag
+ if (writePTags) {
+ out << "\t\t<p>";
+ }
+ // write content
+ // TODO: What about emphasis?
+ for (auto &text : par->getField()) {
+ if (text->getDescriptor()->getName() != "text") {
+ throw OusiaException("Expected text!");
+ }
+ Handle<model::DocumentPrimitive> primitive =
+ text->getField()[0].cast<model::DocumentPrimitive>();
+ if (primitive.isNull()) {
+ throw OusiaException("Text field is not primitive!");
+ }
+ out << primitive->getContent().asString();
+ }
+ // write end tag
+ if (writePTags) {
+ out << "</p>\n";
+ }
+}
+}
+}
diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp
new file mode 100644
index 0000000..ca9bcd2
--- /dev/null
+++ b/src/plugins/html/DemoOutput.hpp
@@ -0,0 +1,85 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file DemoOutput.hpp
+ *
+ * This implements a Demo HTML output for the following domains:
+ * * book
+ * * headings
+ * * emphasis
+ * * lists
+ *
+ * @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de
+ */
+#ifndef _OUSIA_HTML_DEMO_OUTPUT_HPP_
+#define _OUSIA_HTML_DEMO_OUTPUT_HPP_
+
+#include <ostream>
+
+#include <core/model/Document.hpp>
+
+namespace ousia {
+namespace html {
+
+class DemoHTMLTransformer {
+private:
+ /**
+ * This method is to be called recursively to write a chapter, section or
+ * subsection to HTML.
+ */
+ void writeSection(Handle<model::StructuredEntity> sec, std::ostream& out);
+ /**
+ * This method is to be called recursively to write a paragraph to HTML.
+ */
+ void writeParagraph(Handle<model::StructuredEntity> par, std::ostream& out,
+ bool writePTags = true);
+ /**
+ * This method is to be called recursively to write a list to HTML.
+ * TODO: Implement
+ */
+// void writeList(Handle<StructuredEntity> sec, std::ostream& out,
+// int tabdepth);
+
+ //TODO: Implement emphasis.
+
+public:
+ /**
+ * This writes a HTML representation of the given document to the given
+ * output stream. Note that this method lacks the generality of our Ousia
+ * approach with respect to two important points:
+ * 1.) It hardcodes the dependency to a certain set of domains in the C++
+ * code.
+ * 2.) It does not use the proposed pipeline of first copying the document
+ * graph, then attaching style attributes and then transforming it to a
+ * specific output format but does all of these steps at once.
+ * 3.) It does not use different transformers for the different domains but
+ * does all transformations at once.
+ * Therefore this is not an adequate model of our algorithms but only a
+ * Demo.
+ *
+ * @param doc is a Document using concepts of the book, headings, emphasis
+ * and lists domains but no other.
+ * @param out is the output stream the data shall be written to.
+ */
+ void writeHTML(Handle<model::Document> doc, std::ostream& out);
+};
+}
+}
+
+#endif
diff --git a/test/core/model/DocumentTest.cpp b/test/core/model/DocumentTest.cpp
index 9e3229c..a671d2c 100644
--- a/test/core/model/DocumentTest.cpp
+++ b/test/core/model/DocumentTest.cpp
@@ -37,8 +37,68 @@ TEST(Document, testDocumentConstruction)
// Construct the document.
Rooted<Document> doc = constructBookDocument(mgr, domain);
- // If that works we are happy already.
+ // Check the document content.
ASSERT_FALSE(doc.isNull());
+ // get root node.
+ Rooted<StructuredEntity> root = doc->getRoot();
+ ASSERT_FALSE(root.isNull());
+ ASSERT_EQ("book", root->getDescriptor()->getName());
+ ASSERT_TRUE(root->hasField());
+ ASSERT_EQ(2, root->getField().size());
+ // get foreword (paragraph)
+ {
+ Rooted<StructuredEntity> foreword = root->getField()[0];
+ ASSERT_FALSE(foreword.isNull());
+ ASSERT_EQ("paragraph", foreword->getDescriptor()->getName());
+ // it should contain one text node
+ ASSERT_TRUE(foreword->hasField());
+ ASSERT_EQ(1, foreword->getField().size());
+ // which in turn should have a primitive content field containing the
+ // right text.
+ {
+ Rooted<StructuredEntity> text = foreword->getField()[0];
+ ASSERT_FALSE(text.isNull());
+ ASSERT_EQ("text", text->getDescriptor()->getName());
+ ASSERT_TRUE(text->hasField());
+ ASSERT_EQ(1, text->getField().size());
+ ASSERT_TRUE(text->getField()[0]->isa(typeOf<DocumentPrimitive>()));
+ Variant content =
+ text->getField()[0].cast<DocumentPrimitive>()->getContent();
+ ASSERT_EQ("Some introductory text", content.asString());
+ }
+ }
+ // get section
+ {
+ Rooted<StructuredEntity> section = root->getField()[1];
+ ASSERT_FALSE(section.isNull());
+ ASSERT_EQ("section", section->getDescriptor()->getName());
+ // it should contain one paragraph
+ ASSERT_TRUE(section->hasField());
+ ASSERT_EQ(1, section->getField().size());
+ {
+ Rooted<StructuredEntity> par = section->getField()[0];
+ ASSERT_FALSE(par.isNull());
+ ASSERT_EQ("paragraph", par->getDescriptor()->getName());
+ // it should contain one text node
+ ASSERT_TRUE(par->hasField());
+ ASSERT_EQ(1, par->getField().size());
+ // which in turn should have a primitive content field containing
+ // the
+ // right text.
+ {
+ Rooted<StructuredEntity> text = par->getField()[0];
+ ASSERT_FALSE(text.isNull());
+ ASSERT_EQ("text", text->getDescriptor()->getName());
+ ASSERT_TRUE(text->hasField());
+ ASSERT_EQ(1, text->getField().size());
+ ASSERT_TRUE(
+ text->getField()[0]->isa(typeOf<DocumentPrimitive>()));
+ Variant content =
+ text->getField()[0].cast<DocumentPrimitive>()->getContent();
+ ASSERT_EQ("Some actual text", content.asString());
+ }
+ }
+ }
}
}
}
diff --git a/test/core/model/TestDocument.hpp b/test/core/model/TestDocument.hpp
index a1a3434..6b0267a 100644
--- a/test/core/model/TestDocument.hpp
+++ b/test/core/model/TestDocument.hpp
@@ -50,13 +50,18 @@ static Rooted<Document> constructBookDocument(Manager &mgr,
return {nullptr};
}
// Add its text.
- Variant text{std::map<std::string, Variant>{
- {"content", Variant("Some introductory text")}}};
- Rooted<DocumentPrimitive> foreword_text =
- DocumentPrimitive::buildEntity(foreword, text, "text");
+ Rooted<StructuredEntity> foreword_text =
+ StructuredEntity::buildEntity(foreword, {bookDomain}, "text");
if (foreword_text.isNull()) {
return {nullptr};
}
+ // And its primitive content
+ Variant text{"Some introductory text"};
+ Rooted<DocumentPrimitive> foreword_primitive =
+ DocumentPrimitive::buildEntity(foreword_text, text, "content");
+ if (foreword_primitive.isNull()) {
+ return {nullptr};
+ }
// Add a section.
Rooted<StructuredEntity> section =
StructuredEntity::buildEntity(root, {bookDomain}, "section");
@@ -67,13 +72,18 @@ static Rooted<Document> constructBookDocument(Manager &mgr,
return {nullptr};
}
// Add its text.
- text = Variant{std::map<std::string, Variant>{
- {"content", Variant("Some introductory text")}}};
- Rooted<DocumentPrimitive> main_text =
- DocumentPrimitive::buildEntity(foreword, text, "text");
+ Rooted<StructuredEntity> main_text =
+ StructuredEntity::buildEntity(main, {bookDomain}, "text");
if (main_text.isNull()) {
return {nullptr};
}
+ // And its primitive content
+ text = Variant{"Some actual text"};
+ Rooted<DocumentPrimitive> main_primitive =
+ DocumentPrimitive::buildEntity(main_text, text, "content");
+ if (main_primitive.isNull()) {
+ return {nullptr};
+ }
return doc;
}
diff --git a/test/core/model/TestDomain.hpp b/test/core/model/TestDomain.hpp
index d55bff7..f457531 100644
--- a/test/core/model/TestDomain.hpp
+++ b/test/core/model/TestDomain.hpp
@@ -81,11 +81,20 @@ static Rooted<Domain> constructBookDomain(Manager &mgr, Logger &logger)
section_field->getChildren().push_back(paragraph);
book_field->getChildren().push_back(paragraph);
domain->getStructureClasses().push_back(paragraph);
+ // And the field of it.
+ Rooted<FieldDescriptor> paragraph_field{new FieldDescriptor(mgr, paragraph)};
+ paragraph->getFieldDescriptors().push_back(paragraph_field);
+
+ // Finally we add the "text" node, which is transparent as well.
+ Rooted<StructuredClass> text{new StructuredClass(
+ mgr, "text", domain, any, {nullptr}, {nullptr}, true)};
+ paragraph_field->getChildren().push_back(text);
+ domain->getStructureClasses().push_back(text);
// ... and has a primitive field.
- Rooted<FieldDescriptor> paragraph_field{new FieldDescriptor(
- mgr, paragraph, domain->getTypesystems()[0]->getTypes()[1], "text",
+ Rooted<FieldDescriptor> text_field{new FieldDescriptor(
+ mgr, text, domain->getTypesystems()[0]->getTypes()[0], "content",
false)};
- paragraph->getFieldDescriptors().push_back(paragraph_field);
+ text->getFieldDescriptors().push_back(text_field);
return domain;
}
diff --git a/test/plugins/html/DemoOutputTest.cpp b/test/plugins/html/DemoOutputTest.cpp
new file mode 100644
index 0000000..b81a001
--- /dev/null
+++ b/test/plugins/html/DemoOutputTest.cpp
@@ -0,0 +1,49 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <iostream>
+
+#include <plugins/html/DemoOutput.hpp>
+
+#include <core/model/Document.hpp>
+#include <core/model/Domain.hpp>
+
+#include <core/model/TestDocument.hpp>
+#include <core/model/TestDomain.hpp>
+
+namespace ousia {
+namespace html {
+
+TEST(DemoHTMLTransformer, writeHTML)
+{
+ // Construct Manager
+ Logger logger;
+ Manager mgr{1};
+ // Get the domain.
+ Rooted<model::Domain> domain = model::constructBookDomain(mgr, logger);
+ // Construct the document.
+ Rooted<model::Document> doc = model::constructBookDocument(mgr, domain);
+
+ // print it
+ DemoHTMLTransformer transformer;
+ transformer.writeHTML(doc, std::cout);
+}
+}
+}