summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Paassen <bpaassen@techfak.uni-bielefeld.de>2015-01-06 22:38:49 +0100
committerBenjamin Paassen <bpaassen@techfak.uni-bielefeld.de>2015-01-06 22:38:49 +0100
commitfd8ce97afb16e17102ec8f109103ed334ad0e939 (patch)
treebe609366b106940252c36bdfb3f02e677e9c3e3a
parent0778a9446dc3475b887d20515165a4dc63ed0cd0 (diff)
added XML classes including Serialization functions and added a test for it. I tried not to include Managed.hpp to prevent further overhead but I failed miserably.
-rw-r--r--CMakeLists.txt2
-rw-r--r--src/core/CSS.hpp5
-rw-r--r--src/core/CodeTokenizer.hpp5
-rw-r--r--src/core/XML.cpp34
-rw-r--r--src/core/XML.hpp123
-rw-r--r--test/core/XMLTest.cpp77
6 files changed, 246 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 10e43ea..e67e2be 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -110,6 +110,7 @@ ADD_LIBRARY(ousia_core
src/core/Registry
src/core/ResourceLocator
src/core/Tokenizer
+ src/core/XML
src/core/common/CharReader
src/core/common/Exceptions
src/core/common/Function
@@ -194,6 +195,7 @@ IF(TEST)
test/core/RegistryTest
test/core/ResourceLocatorTest
test/core/TokenizerTest
+ test/core/XMLTest
test/core/common/CharReaderTest
test/core/common/FunctionTest
test/core/common/LoggerTest
diff --git a/src/core/CSS.hpp b/src/core/CSS.hpp
index 60aa91e..75ac73f 100644
--- a/src/core/CSS.hpp
+++ b/src/core/CSS.hpp
@@ -16,6 +16,11 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+/**
+ * @file CSS.hpp
+
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
+ */
#ifndef _OUSIA_CSS_HPP_
#define _OUSIA_CSS_HPP_
diff --git a/src/core/CodeTokenizer.hpp b/src/core/CodeTokenizer.hpp
index 4190297..154f949 100644
--- a/src/core/CodeTokenizer.hpp
+++ b/src/core/CodeTokenizer.hpp
@@ -16,6 +16,11 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+/**
+ * @file CodeTokenizer.hpp
+
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
+ */
#ifndef _OUSIA_CODE_TOKENIZER_HPP_
#define _OUSIA_CODE_TOKENIZER_HPP_
diff --git a/src/core/XML.cpp b/src/core/XML.cpp
new file mode 100644
index 0000000..ad69ba1
--- /dev/null
+++ b/src/core/XML.cpp
@@ -0,0 +1,34 @@
+
+#include "XML.hpp"
+
+namespace ousia {
+namespace xml {
+
+void Element::serialize(std::ostream& out, unsigned int tabdepth)
+{
+ for (unsigned int t = 0; t < tabdepth; t++) {
+ out << '\t';
+ }
+ out << '<' << name;
+ for (auto &a : attributes) {
+ out << ' ' << a.first << "=\"" << a.second << '\"';
+ }
+ out << ">\n";
+ for (auto &n : children) {
+ n->serialize(out, tabdepth + 1);
+ }
+ for (unsigned int t = 0; t < tabdepth; t++) {
+ out << '\t';
+ }
+ out << "</" << name << ">\n";
+}
+
+void Text::serialize(std::ostream& out, unsigned int tabdepth)
+{
+ for (unsigned int t = 0; t < tabdepth; t++) {
+ out << '\t';
+ }
+ out << text << '\n';
+}
+}
+}
diff --git a/src/core/XML.hpp b/src/core/XML.hpp
new file mode 100644
index 0000000..824d6ce
--- /dev/null
+++ b/src/core/XML.hpp
@@ -0,0 +1,123 @@
+/*
+ Ousía
+ Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file XML.hpp
+ *
+ * This header provides XML classes to build an XML tree as well as functions
+ * to serialize that XMl tree to text. We do not support the full XML
+ * specification (like described here: http://www.w3.org/TR/REC-xml/ ) but only
+ * a small subset. This subset is defined by the following context-free grammar:
+ *
+ * NODE := ELEMENT | string
+ * ELEMENT := START NODES END
+ * NODES := NODE NODES | epsilon
+ * START := < name ATTRIBUTES >
+ * ATTRIBUTES := ATTRIBUTE ATTRIBUTES | epsilon
+ * ATTRIBUTE := key = "value"
+ * END := </ name >
+ *
+ * where the Axiom of a document is "Element". Note that we accept only a
+ * singular root element and no primitive text at root level. Attributes are
+ * key-value pairs of strings. Start and end tag name have to match.
+ *
+ * @author Benjamin Paaßen (bpaassen@techfak.uni-bielefeld.de)
+ */
+#ifndef _OUSIA_XML_HPP_
+#define _OUSIA_XML_HPP_
+
+#include <map>
+#include <ostream>
+#include <vector>
+
+#include <core/managed/Managed.hpp>
+#include <core/managed/ManagedContainer.hpp>
+
+namespace ousia {
+namespace xml {
+
+/**
+ * Node is the common super-class of actual elements (tag-bounded) and text.
+ * It specifies the pure virtual serialize() function that the subclasses
+ * implement.
+ */
+class Node : public Managed {
+public:
+ Node(Manager &mgr) : Managed(mgr){};
+
+ /**
+ * When called this Node should serialize its data and write it to the
+ * given output stream. In case of Elements this includes child elements.
+ *
+ * @param out the output stream the serialized data shall be written
+ * to.
+ * @param tabdepth the current tabdepth for prettier output.
+ */
+ virtual void serialize(std::ostream &out, unsigned int tabdepth) = 0;
+};
+
+/**
+ * An element in XML is defined as by the W3C:
+ *
+ * http://www.w3.org/TR/REC-xml/#sec-starttags
+ *
+ * For as an element necessarily has a name. It may have key-value pairs as
+ * attributes, where each key is unique (which is enforced by std::map).
+ * Additionally it might have other Nodes as children.
+ */
+class Element : public Node {
+public:
+ const std::string name;
+ std::map<std::string, std::string> attributes;
+ ManagedVector<Node> children;
+
+ Element(Manager &mgr, std::string name) : Node(mgr), name(std::move(name))
+ {
+ }
+
+ Element(Manager &mgr, std::string name,
+ std::map<std::string, std::string> attributes)
+ : Node(mgr), name(std::move(name)), attributes(std::move(attributes))
+ {
+ }
+
+ /**
+ * This writes the following to the output stream:
+ * * The start tag of this element including name and attributes
+ * * The serialized data of all children as ordered by the vector.
+ * * The end tag of this element.
+ *
+ */
+ void serialize(std::ostream &out, unsigned int tabdepth = 0) override;
+};
+
+class Text : public Node {
+public:
+ const std::string text;
+
+ Text(Manager &mgr, std::string text) : Node(mgr), text(std::move(text)) {}
+
+ /**
+ * This just writes the text to the output.
+ *
+ */
+ void serialize(std::ostream &out, unsigned int tabdepth = 0) override;
+};
+}
+}
+#endif
diff --git a/test/core/XMLTest.cpp b/test/core/XMLTest.cpp
new file mode 100644
index 0000000..aeedb86
--- /dev/null
+++ b/test/core/XMLTest.cpp
@@ -0,0 +1,77 @@
+/*
+ Ousía
+ Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <gtest/gtest.h>
+
+#include <core/XML.hpp>
+
+#include <sstream>
+
+namespace ousia {
+namespace xml {
+
+TEST(Node, testSerialize)
+{
+ Manager mgr;
+
+ Rooted<Element> html{new Element{mgr, "html"}};
+ Rooted<Element> head{new Element{mgr, "head"}};
+ html->children.push_back(head);
+ Rooted<Element> title{new Element{mgr, "title"}};
+ head->children.push_back(title);
+ title->children.push_back(new Text(mgr, "my title"));
+ Rooted<Element> body{new Element{mgr, "body"}};
+ html->children.push_back(body);
+ // This div element contains our text.
+ Rooted<Element> div{
+ new Element{mgr, "div", {{"class", "content"}, {"id", "1"}}}};
+ body->children.push_back(div);
+ Rooted<Element> p{new Element{mgr, "p"}};
+ div->children.push_back(p);
+ p->children.push_back(new Text(mgr, "my text"));
+ Rooted<Element> p2{new Element{mgr, "p"}};
+ div->children.push_back(p2);
+ p2->children.push_back(new Text(mgr, "my text"));
+
+ // Now this is what we expect to see:
+ std::string expected{
+ "<html>\n"
+ "\t<head>\n"
+ "\t\t<title>\n"
+ "\t\t\tmy title\n"
+ "\t\t</title>\n"
+ "\t</head>\n"
+ "\t<body>\n"
+ "\t\t<div class=\"content\" id=\"1\">\n"
+ "\t\t\t<p>\n"
+ "\t\t\t\tmy text\n"
+ "\t\t\t</p>\n"
+ "\t\t\t<p>\n"
+ "\t\t\t\tmy text\n"
+ "\t\t\t</p>\n"
+ "\t\t</div>\n"
+ "\t</body>\n"
+ "</html>\n"};
+
+ // check if it is what we see
+ std::stringstream ss;
+ html->serialize(ss);
+ ASSERT_EQ(expected, ss.str());
+}
+}
+}