From fd8ce97afb16e17102ec8f109103ed334ad0e939 Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Tue, 6 Jan 2015 22:38:49 +0100 Subject: added XML classes including Serialization functions and added a test for it. I tried not to include Managed.hpp to prevent further overhead but I failed miserably. --- src/core/CSS.hpp | 5 ++ src/core/CodeTokenizer.hpp | 5 ++ src/core/XML.cpp | 34 +++++++++++++ src/core/XML.hpp | 123 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 167 insertions(+) create mode 100644 src/core/XML.cpp create mode 100644 src/core/XML.hpp (limited to 'src') diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp index 60aa91e..75ac73f 100644 --- a/src/core/CSS.hpp +++ b/src/core/CSS.hpp @@ -16,6 +16,11 @@ along with this program. If not, see . */ +/** + * @file CSS.hpp + + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ #ifndef _OUSIA_CSS_HPP_ #define _OUSIA_CSS_HPP_ diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp index 4190297..154f949 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/core/CodeTokenizer.hpp @@ -16,6 +16,11 @@ along with this program. If not, see . */ +/** + * @file CodeTokenizer.hpp + + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ #ifndef _OUSIA_CODE_TOKENIZER_HPP_ #define _OUSIA_CODE_TOKENIZER_HPP_ diff --git a/src/core/XML.cpp b/src/core/XML.cpp new file mode 100644 index 0000000..ad69ba1 --- /dev/null +++ b/src/core/XML.cpp @@ -0,0 +1,34 @@ + +#include "XML.hpp" + +namespace ousia { +namespace xml { + +void Element::serialize(std::ostream& out, unsigned int tabdepth) +{ + for (unsigned int t = 0; t < tabdepth; t++) { + out << '\t'; + } + out << '<' << name; + for (auto &a : attributes) { + out << ' ' << a.first << "=\"" << a.second << '\"'; + } + out << ">\n"; + for (auto &n : children) { + n->serialize(out, tabdepth + 1); + } + for (unsigned int t = 0; t < tabdepth; t++) { + out << '\t'; + } + out << "\n"; +} + +void Text::serialize(std::ostream& out, unsigned int tabdepth) +{ + for (unsigned int t = 0; t < tabdepth; t++) { + out << '\t'; + } + out << text << '\n'; +} +} +} diff --git a/src/core/XML.hpp b/src/core/XML.hpp new file mode 100644 index 0000000..824d6ce --- /dev/null +++ b/src/core/XML.hpp @@ -0,0 +1,123 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * @file XML.hpp + * + * This header provides XML classes to build an XML tree as well as functions + * to serialize that XMl tree to text. We do not support the full XML + * specification (like described here: http://www.w3.org/TR/REC-xml/ ) but only + * a small subset. This subset is defined by the following context-free grammar: + * + * NODE := ELEMENT | string + * ELEMENT := START NODES END + * NODES := NODE NODES | epsilon + * START := < name ATTRIBUTES > + * ATTRIBUTES := ATTRIBUTE ATTRIBUTES | epsilon + * ATTRIBUTE := key = "value" + * END := + * + * where the Axiom of a document is "Element". Note that we accept only a + * singular root element and no primitive text at root level. Attributes are + * key-value pairs of strings. Start and end tag name have to match. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ +#ifndef _OUSIA_XML_HPP_ +#define _OUSIA_XML_HPP_ + +#include +#include +#include + +#include +#include + +namespace ousia { +namespace xml { + +/** + * Node is the common super-class of actual elements (tag-bounded) and text. + * It specifies the pure virtual serialize() function that the subclasses + * implement. + */ +class Node : public Managed { +public: + Node(Manager &mgr) : Managed(mgr){}; + + /** + * When called this Node should serialize its data and write it to the + * given output stream. In case of Elements this includes child elements. + * + * @param out the output stream the serialized data shall be written + * to. + * @param tabdepth the current tabdepth for prettier output. + */ + virtual void serialize(std::ostream &out, unsigned int tabdepth) = 0; +}; + +/** + * An element in XML is defined as by the W3C: + * + * http://www.w3.org/TR/REC-xml/#sec-starttags + * + * For as an element necessarily has a name. It may have key-value pairs as + * attributes, where each key is unique (which is enforced by std::map). + * Additionally it might have other Nodes as children. + */ +class Element : public Node { +public: + const std::string name; + std::map attributes; + ManagedVector children; + + Element(Manager &mgr, std::string name) : Node(mgr), name(std::move(name)) + { + } + + Element(Manager &mgr, std::string name, + std::map attributes) + : Node(mgr), name(std::move(name)), attributes(std::move(attributes)) + { + } + + /** + * This writes the following to the output stream: + * * The start tag of this element including name and attributes + * * The serialized data of all children as ordered by the vector. + * * The end tag of this element. + * + */ + void serialize(std::ostream &out, unsigned int tabdepth = 0) override; +}; + +class Text : public Node { +public: + const std::string text; + + Text(Manager &mgr, std::string text) : Node(mgr), text(std::move(text)) {} + + /** + * This just writes the text to the output. + * + */ + void serialize(std::ostream &out, unsigned int tabdepth = 0) override; +}; +} +} +#endif -- cgit v1.2.3