summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Paassen <bpaassen@techfak.uni-bielefeld.de>2015-02-12 20:12:07 +0100
committerBenjamin Paassen <bpaassen@techfak.uni-bielefeld.de>2015-02-12 20:12:07 +0100
commit37a691eb6a5ed31dfc28e15db9d076322ab5fdde (patch)
tree04d5c1f901d8fcc1f3e90d3ca24e40f37bb0d5de
parent89f01a0a49f4fd23034d532b37d54d3f3f612082 (diff)
improved XML parser to support transparency on default fields.
-rw-r--r--src/plugins/xml/XmlParser.cpp122
-rw-r--r--testdata/xmlparser/simple_book.oxd28
2 files changed, 85 insertions, 65 deletions
diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp
index 63d9df5..df56e62 100644
--- a/src/plugins/xml/XmlParser.cpp
+++ b/src/plugins/xml/XmlParser.cpp
@@ -117,6 +117,17 @@ public:
}
}
+ void createPath(const NodeVector<Node> &path, DocumentEntity *&parent)
+ {
+ size_t S = path.size();
+ for (size_t p = 1; p < S; p = p + 2) {
+ parent = static_cast<DocumentEntity *>(
+ parent->createChildStructuredEntity(
+ path[p].cast<StructuredClass>(), Variant::mapType{},
+ path[p - 1]->getName(), "").get());
+ }
+ }
+
void start(Variant::mapType &args) override
{
scope().setFlag(ParserFlag::POST_HEAD, true);
@@ -176,13 +187,7 @@ public:
}
// create all transparent entities until the last field.
- for (size_t p = 1; p < path.size() - 1; p = p + 2) {
- parent = static_cast<DocumentEntity *>(
- parent->createChildStructuredEntity(
- path[p].cast<StructuredClass>(),
- Variant::mapType{}, path[p - 1]->getName(),
- "").get());
- }
+ createPath(path, parent);
entity = parent->createChildStructuredEntity(strct, args, fieldName,
name);
}
@@ -204,38 +209,81 @@ public:
preamble(parentNode, fieldName, parent, inField);
- // retrieve the correct FieldDescriptor.
- // TODO: Consider fields of transparent classes
Rooted<Descriptor> desc = parent->getDescriptor();
- Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName);
- if (field == nullptr) {
- logger().error(
- std::string("Can't handle data because no field with name \"") +
- fieldName + "\" exists in descriptor\"" + desc->getName() +
- "\".",
- location());
- return;
- }
- if (!field->isPrimitive()) {
- logger().error(std::string("Can't handle data because field \"") +
- fieldName + "\" of descriptor \"" +
- desc->getName() + "\" is not primitive!",
- location());
- return;
- }
-
- // try to parse the content.
- auto res = VariantReader::parseGenericString(
- data, logger(), location().getSourceId(), location().getStart());
- if (!res.first) {
- return;
- }
- // try to convert it to the correct type.
- if (!field->getPrimitiveType()->build(res.second, logger())) {
- return;
+ /*
+ * We distinguish two cases here: One for fields that are given.
+ */
+ if (fieldName != DEFAULT_FIELD_NAME) {
+ // retrieve the actual FieldDescriptor
+ Rooted<FieldDescriptor> field = desc->getFieldDescriptor(fieldName);
+ if (field == nullptr) {
+ logger().error(
+ std::string(
+ "Can't handle data because no field with name \"") +
+ fieldName + "\" exists in descriptor\"" +
+ desc->getName() + "\".",
+ location());
+ return;
+ }
+ // if it is not primitive at all, we can't parse the content.
+ if (!field->isPrimitive()) {
+ logger().error(
+ std::string("Can't handle data because field \"") +
+ fieldName + "\" of descriptor \"" + desc->getName() +
+ "\" is not primitive!",
+ location());
+ return;
+ }
+ // then try to parse the content using the type specification.
+ // TODO: Improve with new parse method.
+ // try to parse the content.
+ auto res = VariantReader::parseGenericString(
+ data, logger(), location().getSourceId(),
+ location().getStart());
+ if (!res.first) {
+ return;
+ }
+ // try to convert it to the correct type.
+ if (!field->getPrimitiveType()->build(res.second, logger())) {
+ return;
+ }
+ // add it as primitive content.
+ parent->createChildDocumentPrimitive(res.second, fieldName);
+ } else {
+ /*
+ * The second case is for primitive fields. Here we search through
+ * all FieldDescriptors that allow primitive content at this point
+ * and could be constructed via transparent intermediate entities.
+ * We then try to parse the data using the type specified by the
+ * respective field. If that does not work we proceed to the next
+ * possible field.
+ */
+ // retrieve all fields.
+ NodeVector<FieldDescriptor> fields = desc->getDefaultFields();
+ // TODO: Improve with new parse method.
+ // try to parse the content
+ auto res = VariantReader::parseGenericString(
+ data, logger(), location().getSourceId(),
+ location().getStart());
+ if (!res.first) {
+ return;
+ }
+ for (auto field : fields) {
+ // then try to parse the content using the type specification.
+ // TODO: Improve with new parse method.
+ // try to convert it to the correct type.
+ if (!field->getPrimitiveType()->build(res.second, logger())) {
+ continue;
+ }
+ // if that worked, construct the necessary path.
+ auto pathRes = desc->pathTo(field, logger());
+ assert(pathRes.second);
+ NodeVector<Node> path = pathRes.first;
+ createPath(path, parent);
+ // then create the primitive element.
+ parent->createChildDocumentPrimitive(res.second, fieldName);
+ }
}
- // add it as primitive content.
- parent->createChildDocumentPrimitive(res.second, fieldName);
}
static Handler *create(const HandlerData &handlerData)
diff --git a/testdata/xmlparser/simple_book.oxd b/testdata/xmlparser/simple_book.oxd
index de33536..abf575f 100644
--- a/testdata/xmlparser/simple_book.oxd
+++ b/testdata/xmlparser/simple_book.oxd
@@ -1,45 +1,17 @@
<?xml version="1.0"?>
<document>
<import rel="domain" src="book_domain.oxm"/>
- <!-- Currently we have only one root. Thus we need no wrapper. -->
- <!-- Note that we only reference "book" here, which resolves to the book
- domain as well as the book StructuredClass. This is unambigous however,
- because we are looking for a StructuredClass. The resolving mechanism
- should be able to handle this. -->
<book>
- <!-- implicitly:
- <book name="">
- -->
- <!-- note that we do not refer to the attributes explicitly. Attributes are
- referenced by their key-value pairs as defined in the according
- StructType. For an example please refer to the more complex book
- domain. -->
- <!--<paragraph>-->
- <text>
This might be some introductory text or a dedication.
- </text>
- <!--</paragraph>-->
<!-- Note that a better version of the book domain might specify
headings here. -->
<chapter name="myFirstChapter">
- <paragraph>
- <text>
Here we might have an introduction to the chapter.
- </text>
- </paragraph>
<section name="myFirstSection">
- <paragraph>
- <text>
Here we might find the actual section content.
- </text>
- </paragraph>
</section>
<section name="mySndSection">
- <paragraph>
- <text>
Here we might find the actual section content.
- </text>
- </paragraph>
</section>
</chapter>
</book>