diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/core/CSS.hpp | 5 | ||||
-rw-r--r-- | src/core/CodeTokenizer.hpp | 5 | ||||
-rw-r--r-- | src/core/XML.cpp | 34 | ||||
-rw-r--r-- | src/core/XML.hpp | 123 | ||||
-rw-r--r-- | test/core/XMLTest.cpp | 77 |
6 files changed, 246 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 10e43ea..e67e2be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,6 +110,7 @@ ADD_LIBRARY(ousia_core src/core/Registry src/core/ResourceLocator src/core/Tokenizer + src/core/XML src/core/common/CharReader src/core/common/Exceptions src/core/common/Function @@ -194,6 +195,7 @@ IF(TEST) test/core/RegistryTest test/core/ResourceLocatorTest test/core/TokenizerTest + test/core/XMLTest test/core/common/CharReaderTest test/core/common/FunctionTest test/core/common/LoggerTest diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp index 60aa91e..75ac73f 100644 --- a/src/core/CSS.hpp +++ b/src/core/CSS.hpp @@ -16,6 +16,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +/** + * @file CSS.hpp + + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ #ifndef _OUSIA_CSS_HPP_ #define _OUSIA_CSS_HPP_ diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp index 4190297..154f949 100644 --- a/src/core/CodeTokenizer.hpp +++ b/src/core/CodeTokenizer.hpp @@ -16,6 +16,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +/** + * @file CodeTokenizer.hpp + + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ #ifndef _OUSIA_CODE_TOKENIZER_HPP_ #define _OUSIA_CODE_TOKENIZER_HPP_ diff --git a/src/core/XML.cpp b/src/core/XML.cpp new file mode 100644 index 0000000..ad69ba1 --- /dev/null +++ b/src/core/XML.cpp @@ -0,0 +1,34 @@ + +#include "XML.hpp" + +namespace ousia { +namespace xml { + +void Element::serialize(std::ostream& out, unsigned int tabdepth) +{ + for (unsigned int t = 0; t < tabdepth; t++) { + out << '\t'; + } + out << '<' << name; + for (auto &a : attributes) { + out << ' ' << a.first << "=\"" << a.second << '\"'; + } + out << ">\n"; + for (auto &n : children) { + n->serialize(out, tabdepth + 1); + } + for (unsigned int t = 0; t < tabdepth; t++) { + out << '\t'; + } + out << "</" << name << ">\n"; +} + +void Text::serialize(std::ostream& out, unsigned int tabdepth) +{ + for (unsigned int t = 0; t < tabdepth; t++) { + out << '\t'; + } + out << text << '\n'; +} +} +} diff --git a/src/core/XML.hpp b/src/core/XML.hpp new file mode 100644 index 0000000..824d6ce --- /dev/null +++ b/src/core/XML.hpp @@ -0,0 +1,123 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * @file XML.hpp + * + * This header provides XML classes to build an XML tree as well as functions + * to serialize that XMl tree to text. We do not support the full XML + * specification (like described here: http://www.w3.org/TR/REC-xml/ ) but only + * a small subset. This subset is defined by the following context-free grammar: + * + * NODE := ELEMENT | string + * ELEMENT := START NODES END + * NODES := NODE NODES | epsilon + * START := < name ATTRIBUTES > + * ATTRIBUTES := ATTRIBUTE ATTRIBUTES | epsilon + * ATTRIBUTE := key = "value" + * END := </ name > + * + * where the Axiom of a document is "Element". Note that we accept only a + * singular root element and no primitive text at root level. Attributes are + * key-value pairs of strings. Start and end tag name have to match. + * + * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de) + */ +#ifndef _OUSIA_XML_HPP_ +#define _OUSIA_XML_HPP_ + +#include <map> +#include <ostream> +#include <vector> + +#include <core/managed/Managed.hpp> +#include <core/managed/ManagedContainer.hpp> + +namespace ousia { +namespace xml { + +/** + * Node is the common super-class of actual elements (tag-bounded) and text. + * It specifies the pure virtual serialize() function that the subclasses + * implement. + */ +class Node : public Managed { +public: + Node(Manager &mgr) : Managed(mgr){}; + + /** + * When called this Node should serialize its data and write it to the + * given output stream. In case of Elements this includes child elements. + * + * @param out the output stream the serialized data shall be written + * to. + * @param tabdepth the current tabdepth for prettier output. + */ + virtual void serialize(std::ostream &out, unsigned int tabdepth) = 0; +}; + +/** + * An element in XML is defined as by the W3C: + * + * http://www.w3.org/TR/REC-xml/#sec-starttags + * + * For as an element necessarily has a name. It may have key-value pairs as + * attributes, where each key is unique (which is enforced by std::map). + * Additionally it might have other Nodes as children. + */ +class Element : public Node { +public: + const std::string name; + std::map<std::string, std::string> attributes; + ManagedVector<Node> children; + + Element(Manager &mgr, std::string name) : Node(mgr), name(std::move(name)) + { + } + + Element(Manager &mgr, std::string name, + std::map<std::string, std::string> attributes) + : Node(mgr), name(std::move(name)), attributes(std::move(attributes)) + { + } + + /** + * This writes the following to the output stream: + * * The start tag of this element including name and attributes + * * The serialized data of all children as ordered by the vector. + * * The end tag of this element. + * + */ + void serialize(std::ostream &out, unsigned int tabdepth = 0) override; +}; + +class Text : public Node { +public: + const std::string text; + + Text(Manager &mgr, std::string text) : Node(mgr), text(std::move(text)) {} + + /** + * This just writes the text to the output. + * + */ + void serialize(std::ostream &out, unsigned int tabdepth = 0) override; +}; +} +} +#endif diff --git a/test/core/XMLTest.cpp b/test/core/XMLTest.cpp new file mode 100644 index 0000000..aeedb86 --- /dev/null +++ b/test/core/XMLTest.cpp @@ -0,0 +1,77 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <gtest/gtest.h> + +#include <core/XML.hpp> + +#include <sstream> + +namespace ousia { +namespace xml { + +TEST(Node, testSerialize) +{ + Manager mgr; + + Rooted<Element> html{new Element{mgr, "html"}}; + Rooted<Element> head{new Element{mgr, "head"}}; + html->children.push_back(head); + Rooted<Element> title{new Element{mgr, "title"}}; + head->children.push_back(title); + title->children.push_back(new Text(mgr, "my title")); + Rooted<Element> body{new Element{mgr, "body"}}; + html->children.push_back(body); + // This div element contains our text. + Rooted<Element> div{ + new Element{mgr, "div", {{"class", "content"}, {"id", "1"}}}}; + body->children.push_back(div); + Rooted<Element> p{new Element{mgr, "p"}}; + div->children.push_back(p); + p->children.push_back(new Text(mgr, "my text")); + Rooted<Element> p2{new Element{mgr, "p"}}; + div->children.push_back(p2); + p2->children.push_back(new Text(mgr, "my text")); + + // Now this is what we expect to see: + std::string expected{ + "<html>\n" + "\t<head>\n" + "\t\t<title>\n" + "\t\t\tmy title\n" + "\t\t</title>\n" + "\t</head>\n" + "\t<body>\n" + "\t\t<div class=\"content\" id=\"1\">\n" + "\t\t\t<p>\n" + "\t\t\t\tmy text\n" + "\t\t\t</p>\n" + "\t\t\t<p>\n" + "\t\t\t\tmy text\n" + "\t\t\t</p>\n" + "\t\t</div>\n" + "\t</body>\n" + "</html>\n"}; + + // check if it is what we see + std::stringstream ss; + html->serialize(ss); + ASSERT_EQ(expected, ss.str()); +} +} +} |