diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/XML.cpp | 29 | ||||
| -rw-r--r-- | src/core/XML.hpp | 7 | ||||
| -rw-r--r-- | src/core/model/Document.cpp | 52 | ||||
| -rw-r--r-- | src/core/model/Document.hpp | 58 | ||||
| -rw-r--r-- | src/core/model/Domain.hpp | 7 | ||||
| -rw-r--r-- | src/plugins/html/DemoOutput.cpp | 131 | ||||
| -rw-r--r-- | src/plugins/html/DemoOutput.hpp | 32 | 
7 files changed, 256 insertions, 60 deletions
diff --git a/src/core/XML.cpp b/src/core/XML.cpp index 038cb86..7f03b35 100644 --- a/src/core/XML.cpp +++ b/src/core/XML.cpp @@ -4,12 +4,16 @@  namespace ousia {  namespace xml { -void Node::serialize(std::ostream& out){ +void Node::serialize(std::ostream &out, const std::string &doctype) +{  	out << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; +	if (doctype != "") { +		out << doctype << "\n"; +	}  	doSerialize(out, 0);  } -void Element::doSerialize(std::ostream& out, unsigned int tabdepth) +void Element::doSerialize(std::ostream &out, unsigned int tabdepth)  {  	for (unsigned int t = 0; t < tabdepth; t++) {  		out << '\t'; @@ -18,17 +22,22 @@ void Element::doSerialize(std::ostream& out, unsigned int tabdepth)  	for (auto &a : attributes) {  		out << ' ' << a.first << "=\"" << a.second << '\"';  	} -	out << ">\n"; -	for (auto &n : children) { -		n->doSerialize(out, tabdepth + 1); -	} -	for (unsigned int t = 0; t < tabdepth; t++) { -		out << '\t'; +	// if we have no children, we close the tag immediately. +	if (children.size() == 0) { +		out << "/>\n"; +	} else { +		out << ">\n"; +		for (auto &n : children) { +			n->doSerialize(out, tabdepth + 1); +		} +		for (unsigned int t = 0; t < tabdepth; t++) { +			out << '\t'; +		} +		out << "</" << name << ">\n";  	} -	out << "</" << name << ">\n";  } -void Text::doSerialize(std::ostream& out, unsigned int tabdepth) +void Text::doSerialize(std::ostream &out, unsigned int tabdepth)  {  	for (unsigned int t = 0; t < tabdepth; t++) {  		out << '\t'; diff --git a/src/core/XML.hpp b/src/core/XML.hpp index 9ca124a..51ef6fd 100644 --- a/src/core/XML.hpp +++ b/src/core/XML.hpp @@ -64,9 +64,12 @@ public:  	/**  	 * This method writes an XML prolog and the XML representing the current  	 * node, including all children, to the given output stream. -	 * @param out is the output stream the serialized data shall be written to. +	 * @param out     is the output stream the serialized data shall be +	 *                written to. +	 * @param doctype enables you to add a prefix after the XML prolog +	 *                specifying the doctype.  	 */ -	void serialize(std::ostream &out); +	void serialize(std::ostream &out, const std::string & doctype = "");  	/**  	 * This method just writes the XML representation of this node to the  	 * output stream, without the XML prolog. diff --git a/src/core/model/Document.cpp b/src/core/model/Document.cpp index e43337f..945fb3e 100644 --- a/src/core/model/Document.cpp +++ b/src/core/model/Document.cpp @@ -153,6 +153,9 @@ Rooted<StructuredEntity> StructuredEntity::buildEntity(  		return {nullptr};  	}  	// append the new entity to the right field. +	if (!parent->hasField(fieldName)) { +		return {nullptr}; +	}  	NodeVector<StructuredEntity> &field = parent->getField(fieldName);  	field.push_back(entity); @@ -176,13 +179,60 @@ Rooted<DocumentPrimitive> DocumentPrimitive::buildEntity(  		return {nullptr};  	}  	// append the new entity to the right field. +	if (!parent->hasField(fieldName)) { +		return {nullptr}; +	}  	NodeVector<StructuredEntity> &field = parent->getField(fieldName);  	field.push_back(entity); -  	// and return it.  	return entity;  } +Rooted<AnnotationEntity::Anchor> AnnotationEntity::buildAnchor( +    Handle<DocumentEntity> parent, std::string id, const std::string &fieldName) +{ +	// If the parent is not set, we can not build the anchor. +	if (parent == nullptr) { +		return {nullptr}; +	} +	// Then construct the Anchor itself +	Rooted<Anchor> anchor{ +	    new AnnotationEntity::Anchor(parent->getManager(), parent, id)}; +	// append the new entity to the right field. +	if (!parent->hasField(fieldName)) { +		return {nullptr}; +	} +	NodeVector<StructuredEntity> &field = parent->getField(fieldName); +	field.push_back(anchor); +	// and return it. +	return anchor; +} + +Rooted<AnnotationEntity> AnnotationEntity::buildEntity( +    Handle<Document> parent, std::vector<Handle<Domain>> domains, +    const std::string &className, Handle<AnnotationEntity::Anchor> start, +    Handle<AnnotationEntity::Anchor> end, Variant attributes, std::string name) +{ +	// If the parent is not set, we can not build the AnnotationEntity. +	if (parent == nullptr) { +		return {nullptr}; +	} +	// If we can not find the correct descriptor, we can not build the entity +	// either. +	Rooted<StructuredClass> descriptor = resolveDescriptor(domains, className); +	if (descriptor == nullptr) { +		return {nullptr}; +	} +	// Then construct the AnnotationEntity itself +	Rooted<AnnotationEntity> anno{ +	    new AnnotationEntity(parent->getManager(), parent, descriptor, +	                         attributes, start, end, name)}; +	// append the new entity to the document +	parent->getAnnotations().push_back(anno); +	// and return it. +	return anno; +} +  /* Type registrations */  } diff --git a/src/core/model/Document.hpp b/src/core/model/Document.hpp index 7523962..993df9e 100644 --- a/src/core/model/Document.hpp +++ b/src/core/model/Document.hpp @@ -207,21 +207,15 @@ public:   * information please refer to the header documentation above.   */  class StructuredEntity : public DocumentEntity { -private: -	NodeVector<AnnotationEntity> annotations; -  public:  	StructuredEntity(Manager &mgr, Handle<Node> parent,  	                 Handle<StructuredClass> descriptor, Variant attributes,  	                 std::string name = "")  	    : DocumentEntity(mgr, parent, descriptor, std::move(attributes), -	                     std::move(name)), -	      annotations(this) +	                     std::move(name))  	{  	} -	NodeVector<AnnotationEntity> &getAnnotations() { return annotations; } -  	/**  	 * This builds the root StructuredEntity for the given document. It  	 * automatically appends the newly build entity to the given document. @@ -343,12 +337,11 @@ public:  	public:  		/**  		 * @param mgr    is the Manager instance. -		 * @param name   is the Anchor id.  		 * @param parent is the parent of this Anchor in the Structure Tree (!),  		 *               not the AnnotationEntity that references this Anchor. +		 * @param name   is the Anchor id.  		 */ -		Anchor(Manager &mgr, Handle<StructuredEntity> parent, -		       std::string name = "") +		Anchor(Manager &mgr, Handle<DocumentEntity> parent, std::string name)  		    : StructuredEntity(mgr, parent, nullptr, Variant(), std::move(name))  		{  		} @@ -372,6 +365,45 @@ public:  	Rooted<Anchor> getStart() { return start; }  	Rooted<Anchor> getEnd() { return end; } + +	/** +	 * This builds an Anchor as child of the given DocumentEntity. It +	 * automatically appends the newly build Anchor to its parent. +	 * +	 * @param parent     is the parent DocumentEntity. The newly constructed +	 *                   Anchor will automatically be appended to it. +	 * @param id         is the id of this Anchor. +	 * @param fieldName  is the name of the field where the newly constructed +	 *                   Anchor shall be appended. +	 * +	 * @return           the newly created Anchor or a nullptr if some +	 *                   input handle was empty. +	 */ +	static Rooted<Anchor> buildAnchor(Handle<DocumentEntity> parent, +	                                  std::string id, +	                                  const std::string &fieldName = ""); +	/** +	 * This builds an AnnotationEntity as child of the given DocumentEntity. It +	 * automatically appends the newly build entity to its parent. +	 * +	 * @param parent     is the document the newly constructed AnnotationEntity +	 *                   will be appended to. +	 * @param domains    are the domains that are used to find the +	 *                   AnnotationClass for the new node. The domains will be +	 *                   searched in the given order. +	 * @param className  is the name of the AnnotationClass. +	 * @param attributes are the attributes of the new node in terms of a Struct +	 *                   variant (empty per default). +	 * @param name       is the name of this AnnotationEntity (empty per +	 *                   default). +	 * @return           the newly created AnnotationEntity or a nullptr if some +	 *                   input handle was empty or the given domains did not +	 *                   contain a AnnotationClass with the given name. +	 */ +	static Rooted<AnnotationEntity> buildEntity(Handle<Document> parent, std::vector<Handle<Domain>> domains, +	    const std::string &className, +	    Handle<Anchor> start, Handle<Anchor> end, +	    Variant attributes = Variant(), std::string name = "");  };  /** @@ -382,17 +414,21 @@ class Document : public Node {  private:  	// TODO: Might there be several roots? E.g. metadata?  	Owned<StructuredEntity> root; +	NodeVector<AnnotationEntity> annotations;  public:  	Document(Manager &mgr, std::string name)  	    // TODO: Can a document have a parent? -	    : Node(mgr, std::move(name), nullptr) +	    : Node(mgr, std::move(name), nullptr), +	      annotations(this)  	{  	}  	void setRoot(Handle<StructuredEntity> root) { this->root = acquire(root); };  	Rooted<StructuredEntity> getRoot() const { return root; } + +	NodeVector<AnnotationEntity> getAnnotations() { return annotations; }  };  } diff --git a/src/core/model/Domain.hpp b/src/core/model/Domain.hpp index 18ebfb4..7412ef4 100644 --- a/src/core/model/Domain.hpp +++ b/src/core/model/Domain.hpp @@ -521,6 +521,13 @@ public:   * This class has no special properties and is in essence just a Descriptor.   */  class AnnotationClass : public Descriptor { +public: +	AnnotationClass(Manager &mgr, std::string name, Handle<Domain> domain, +	                // TODO: What would be a wise default value for attributes? +	                Handle<StructType> attributesDescriptor) +	    : Descriptor(mgr, std::move(name), domain, attributesDescriptor) +	{ +	}  };  /** diff --git a/src/plugins/html/DemoOutput.cpp b/src/plugins/html/DemoOutput.cpp index 035ba25..92ff88c 100644 --- a/src/plugins/html/DemoOutput.cpp +++ b/src/plugins/html/DemoOutput.cpp @@ -16,6 +16,9 @@      along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ +#include <stack> + +  #include <core/common/Exceptions.hpp>  #include <core/common/Rtti.hpp>  #include <core/common/Variant.hpp> @@ -30,10 +33,18 @@ void DemoHTMLTransformer::writeHTML(Handle<model::Document> doc,  {  	Manager &mgr = doc->getManager();  	// Create an XML object tree for the document first. -	Rooted<xml::Element> html{new xml::Element{mgr, "html"}}; +	Rooted<xml::Element> html{new xml::Element{ +	    mgr, "html", {{"xlmns", "http://www.w3.org/1999/xhtml"}}}};  	// add the head Element  	Rooted<xml::Element> head{new xml::Element{mgr, "head"}};  	html->children.push_back(head); +	// add the meta element. +	Rooted<xml::Element> meta{ +	    new xml::Element{mgr, +	                     "meta", +	                     {{"http-equiv", "Content-Type"}, +	                      {"content", "text/html; charset=utf-8"}}}}; +	head->children.push_back(meta);  	// add the title Element with Text  	Rooted<xml::Element> title{new xml::Element{mgr, "title"}};  	head->children.push_back(title); @@ -45,31 +56,42 @@ void DemoHTMLTransformer::writeHTML(Handle<model::Document> doc,  	// So far was the "preamble". No we have to get to the document content. +	// build the start and end map for annotation processing. +	AnnoMap startMap; +	AnnoMap endMap; +	for (auto &a : doc->getAnnotations()) { +		// we assume uniquely IDed annotations, which should be checked in the +		// validation process. +		startMap.emplace(a->getStart()->getName(), a); +		endMap.emplace(a->getEnd()->getName(), a); +	} +  	// extract the book root node.  	Rooted<model::StructuredEntity> root = doc->getRoot();  	if (root->getDescriptor()->getName() != "book") {  		throw OusiaException("The given documents root is no book node!");  	}  	// transform the book node. -	Rooted<xml::Element> book = transformSection(root); +	Rooted<xml::Element> book = transformSection(root, startMap, endMap);  	// add it as child to the body node.  	body->children.push_back(book);  	// After the content has been transformed, we serialize it. -	html->serialize(out); +	html->serialize( +	    out, +	    "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n" +	    "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">");  }  /**   * This is just for easier internal handling.   */ -enum class SectionType { BOOK, CHAPTER, SECTION, SUBSECTION, NONE }; +enum class SectionType { BOOK, SECTION, SUBSECTION, NONE };  SectionType getSectionType(const std::string &name)  {  	if (name == "book") {  		return SectionType::BOOK; -	} else if (name == "chapter") { -		return SectionType::CHAPTER;  	} else if (name == "section") {  		return SectionType::SECTION;  	} else if (name == "subsection") { @@ -79,7 +101,8 @@ SectionType getSectionType(const std::string &name)  	}  } -Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::StructuredEntity> section) +Rooted<xml::Element> DemoHTMLTransformer::transformSection( +    Handle<model::StructuredEntity> section, AnnoMap &startMap, AnnoMap &endMap)  {  	Manager &mgr = section->getManager();  	// check the section type. @@ -93,7 +116,8 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::Structu  	Rooted<xml::Element> sec{  	    new xml::Element{mgr, "div", {{"class", secclass}}}};  	// check if we have a heading. -	if (section->hasField("heading")) { +	if (section->hasField("heading") && +	    section->getField("heading").size() > 0) {  		Rooted<model::StructuredEntity> heading =  		    section->getField("heading")[0];  		std::string headingclass; @@ -101,14 +125,11 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::Structu  			case SectionType::BOOK:  				headingclass = "h1";  				break; -			case SectionType::CHAPTER: -				headingclass = "h2"; -				break;  			case SectionType::SECTION: -				headingclass = "h3"; +				headingclass = "h2";  				break;  			case SectionType::SUBSECTION: -				headingclass = "h4"; +				headingclass = "h3";  				break;  			case SectionType::NONE:  				// this can not happen; @@ -117,7 +138,8 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::Structu  		Rooted<xml::Element> h{new xml::Element{mgr, headingclass}};  		sec->children.push_back(h);  		// extract the heading text, enveloped in a paragraph Element. -		Rooted<xml::Element> h_content = transformParagraph(heading); +		Rooted<xml::Element> h_content = +		    transformParagraph(heading, startMap, endMap);  		// We omit the paragraph Element and add the children directly to the  		// heading Element  		for (auto &n : h_content->children) { @@ -138,12 +160,11 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::Structu  		const std::string childDescriptorName = n->getDescriptor()->getName();  		Rooted<xml::Element> child;  		if (childDescriptorName == "paragraph") { -			child = transformParagraph(n); -			// TODO: Implement -			//		} else if(childDescriptorName == "ul"){ -			//			writeList(n, out); +			child = transformParagraph(n, startMap, endMap); +		} else if (childDescriptorName == "ul" || childDescriptorName == "ol") { +			child = transformList(n, startMap, endMap);  		} else { -			child = transformSection(n); +			child = transformSection(n, startMap, endMap);  		}  		if (!child.isNull()) {  			sec->children.push_back(child); @@ -152,29 +173,90 @@ Rooted<xml::Element> DemoHTMLTransformer::transformSection(Handle<model::Structu  	return sec;  } -Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(Handle<model::StructuredEntity> par) +Rooted<xml::Element> DemoHTMLTransformer::transformList( +    Handle<model::StructuredEntity> list, AnnoMap &startMap, AnnoMap &endMap) +{ +	Manager &mgr = list->getManager(); +	// create the list Element, which is either ul or ol (depends on descriptor) +	std::string listclass = list->getDescriptor()->getName(); +	Rooted<xml::Element> l{new xml::Element{mgr, listclass}}; +	// iterate through list items. +	for (auto &item : list->getField()) { +		std::string itDescrName = item->getDescriptor()->getName(); +		if (itDescrName == "item") { +			// create the list item. +			Rooted<xml::Element> li{new xml::Element{mgr, "li"}}; +			l->children.push_back(li); +			// extract the item text, enveloped in a paragraph Element. +			Rooted<xml::Element> li_content = +			    transformParagraph(item, startMap, endMap); +			// We omit the paragraph Element and add the children directly to +			// the list item +			for (auto &n : li_content->children) { +				li->children.push_back(n); +			} +		} +	} +	return l; +} + +typedef model::AnnotationEntity::Anchor Anchor; +typedef std::stack<Rooted<model::AnnotationEntity>> AnnoStack; + +Rooted<xml::Element> DemoHTMLTransformer::transformParagraph( +    Handle<model::StructuredEntity> par, AnnoMap &startMap, AnnoMap &endMap)  {  	Manager &mgr = par->getManager(); -	// create the p xml::Element +	// create the p Element  	Rooted<xml::Element> p{new xml::Element{mgr, "p"}};  	// check if we have a heading. -	if (par->hasField("heading")) { +	if (par->hasField("heading") && par->getField("heading").size() > 0) {  		Rooted<model::StructuredEntity> heading = par->getField("heading")[0];  		// put the heading in a strong xml::Element.  		Rooted<xml::Element> strong{new xml::Element{mgr, "strong"}};  		p->children.push_back(strong);  		// extract the heading text, enveloped in a paragraph Element. -		Rooted<xml::Element> h_content = transformParagraph(heading); +		Rooted<xml::Element> h_content = +		    transformParagraph(heading, startMap, endMap);  		// We omit the paragraph Element and add the children directly to the  		// heading Element  		for (auto &n : h_content->children) {  			strong->children.push_back(n);  		}  	} -	 +  	// transform paragraph children to XML as well  	for (auto &n : par->getField()) { +		if (n->isa(typeOf<Anchor>())) { +			//TODO: This needs some more brain work. +//			// check if this is a start Anchor. +//			auto it = startMap.find(n->getName()); +//			if(it != startMap.end()){ +//				// if we have a start Anchor, we put another AnnotationEntity +//				// on top the stack. +//				opened.push(it->second); +//				// and we create an open tag. +//				 +//				continue; +//			} +//			// check if this is an end Anchor. +//			auto it = endMap.find(n->getName()); +//			if(it != endMap.end()){ +//				/* +//				 * Now it gets somewhat interesting: We have to close all +//				 * tags that started after the one that is closed now and +//				 * re-open them afterwards. So we create a lokal stack to +//				 * temporarily store all AnnotationEntities that need to +//				 * be re-opened. +//				 */ +//				 AnnoStack tmp; +//				 Rooted< +//				 while(!opened.empty() && ) +//			} +//			 +			continue; +		}  		std::string childDescriptorName = n->getDescriptor()->getName();  		if (childDescriptorName == "text") {  			Handle<model::DocumentPrimitive> primitive = @@ -185,7 +267,6 @@ Rooted<xml::Element> DemoHTMLTransformer::transformParagraph(Handle<model::Struc  			p->children.push_back(  			    new xml::Text(mgr, primitive->getContent().asString()));  		} -		// TODO: Handle non-text content  	}  	return p;  } diff --git a/src/plugins/html/DemoOutput.hpp b/src/plugins/html/DemoOutput.hpp index 70a5daa..e08ec2b 100644 --- a/src/plugins/html/DemoOutput.hpp +++ b/src/plugins/html/DemoOutput.hpp @@ -30,6 +30,7 @@  #ifndef _OUSIA_HTML_DEMO_OUTPUT_HPP_  #define _OUSIA_HTML_DEMO_OUTPUT_HPP_ +#include <map>  #include <ostream>  #include <core/model/Document.hpp> @@ -38,22 +39,31 @@  namespace ousia {  namespace html { +typedef std::map<std::string, Rooted<model::AnnotationEntity>> AnnoMap; +  class DemoHTMLTransformer {  private:  	/** -	 * These methods are called recursively to transform a document to an XML -	 * tree. +	 * This transforms a section-like entity, namely book, section +	 * and subsection, to an XHTML element, including its header. For the +	 * children of the default field the respective transform function is +	 * called recursively.  	 */ -	Rooted<xml::Element> transformSection(Handle<model::StructuredEntity> sec); -	Rooted<xml::Element> transformParagraph(Handle<model::StructuredEntity> par); +	Rooted<xml::Element> transformSection(Handle<model::StructuredEntity> sec, +	                                       AnnoMap& startMap,  AnnoMap& endMap);  	/** -	 * This method is to be called recursively to write a list to HTML. -	 * TODO: Implement +	 * This transforms a list entity, namely ul and ol to an XHTML element. +	 * For each item, the transformParagraph function is called.  	 */ -//	void writeList(Handle<StructuredEntity> sec, std::ostream& out, -//	               int tabdepth); - -	//TODO: Implement emphasis. +	Rooted<xml::Element> transformList(Handle<model::StructuredEntity> list, +	                                    AnnoMap& startMap, AnnoMap& endMap); +	/** +	 * This transforms a paragraph-like entity, namely heading, item and +	 * paragraph, to an XHTML element including the text and the anchors +	 * contained. For anchor handling we require the AnnoMaps. +	 */ +	Rooted<xml::Element> transformParagraph(Handle<model::StructuredEntity> par, +	                                        AnnoMap& startMap, AnnoMap& endMap);  public:  	/** @@ -74,7 +84,7 @@ public:  	 *            and lists domains but no other.  	 * @param out is the output stream the data shall be written to.  	 */ -	void writeHTML(Handle<model::Document> doc, std::ostream& out); +	void writeHTML(Handle<model::Document> doc, std::ostream &out);  };  }  }  | 
