summaryrefslogtreecommitdiff
path: root/src/plugins/xml
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-03 17:05:54 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-03 17:05:54 +0100
commitf21e8c6d72eb498435094f07ec5ca120d58e1e02 (patch)
tree01f53dc9abacc9124a5d3d6c58184e384def99b7 /src/plugins/xml
parentbc12a7af4061247a16a62089089793c3c41c5091 (diff)
Implemented small state machine retrieving the byte offsets of each argument of the current xml tag
Diffstat (limited to 'src/plugins/xml')
-rw-r--r--src/plugins/xml/XmlParser.cpp144
1 files changed, 137 insertions, 7 deletions
diff --git a/src/plugins/xml/XmlParser.cpp b/src/plugins/xml/XmlParser.cpp
index 48dd1c2..f4e5caf 100644
--- a/src/plugins/xml/XmlParser.cpp
+++ b/src/plugins/xml/XmlParser.cpp
@@ -17,6 +17,8 @@
*/
#include <iostream>
+#include <map>
+#include <sstream>
#include <vector>
#include <expat.h>
@@ -488,8 +490,7 @@ static const std::multimap<std::string, const ParserState *> XmlStates{
{"field", &TypesystemStructField},
{"constant", &TypesystemConstant},
{"import", &Import},
- {"include", &Include}
- };
+ {"include", &Include}};
}
/**
@@ -508,12 +509,19 @@ struct XMLUserData {
ParserStack *stack;
/**
+ * Reference at the CharReader instance.
+ */
+ CharReader *reader;
+
+ /**
* Constructor of the XMLUserData struct.
*
* @param stack is a pointer at the ParserStack instance.
+ * @param reader is a pointer at the CharReader instance.
*/
- XMLUserData(ParserStack *stack) : depth(0), stack(stack){
-
+ XMLUserData(ParserStack *stack, CharReader *reader)
+ : depth(0), stack(stack), reader(reader)
+ {
}
};
@@ -582,6 +590,111 @@ static SourceLocation syncLoggerPosition(XML_Parser p, size_t len = 0)
return loc;
}
+enum class XMLAttributeState {
+ IN_TAG_NAME,
+ SEARCH_ATTR,
+ IN_ATTR_NAME,
+ HAS_ATTR_NAME,
+ HAS_ATTR_EQUALS,
+ IN_ATTR_DATA
+};
+
+static std::map<std::string, SourceLocation> reconstructXMLAttributeOffsets(
+ CharReader &reader, SourceLocation location)
+{
+ std::map<std::string, SourceLocation> res;
+
+ // Fork the reader, we don't want to mess up the XML parsing process, do we?
+ CharReaderFork readerFork = reader.fork();
+
+ // Move the read cursor to the start location, abort if this does not work
+ size_t offs = location.getStart();
+ if (!location.isValid() || offs != readerFork.seek(offs)) {
+ return res;
+ }
+
+ // Now all we need to do is to implement one half of an XML parser. As this
+ // is inherently complicated we'll totaly fail at it. Don't care. All we
+ // want to get is those darn offsets for pretty error messages... (and we
+ // can assume the XML is valid as it was already read by expat)
+ XMLAttributeState state = XMLAttributeState::IN_TAG_NAME;
+ char c;
+ std::stringstream attrName;
+ while (readerFork.read(c)) {
+ // Abort at the end of the tag
+ if (c == '>' && state != XMLAttributeState::IN_ATTR_DATA) {
+ return res;
+ }
+
+ // One state machine to rule them all, one state machine to find them,
+ // One state machine to bring them all and in the darkness bind them
+ // (the byte offsets)
+ switch (state) {
+ case XMLAttributeState::IN_TAG_NAME:
+ if (Utils::isWhitespace(c)) {
+ state = XMLAttributeState::SEARCH_ATTR;
+ }
+ break;
+ case XMLAttributeState::SEARCH_ATTR:
+ if (!Utils::isWhitespace(c)) {
+ state = XMLAttributeState::IN_ATTR_NAME;
+ attrName << c;
+ }
+ break;
+ case XMLAttributeState::IN_ATTR_NAME:
+ if (Utils::isWhitespace(c)) {
+ state = XMLAttributeState::HAS_ATTR_NAME;
+ } else if (c == '=') {
+ state = XMLAttributeState::HAS_ATTR_EQUALS;
+ } else {
+ attrName << c;
+ }
+ break;
+ case XMLAttributeState::HAS_ATTR_NAME:
+ if (!Utils::isWhitespace(c)) {
+ if (c == '=') {
+ state = XMLAttributeState::HAS_ATTR_EQUALS;
+ break;
+ }
+ // Well, this is a strange XML file... We expected to
+ // see a '=' here! Try to continue with the
+ // "HAS_ATTR_EQUALS" state as this state will hopefully
+ // inlcude some error recovery
+ } else {
+ // Skip whitespace here
+ break;
+ }
+ // Fallthrough
+ case XMLAttributeState::HAS_ATTR_EQUALS:
+ if (!Utils::isWhitespace(c)) {
+ if (c == '"') {
+ // Here we are! We have found the beginning of an
+ // attribute. Let's quickly lock the current offset away
+ // in the result map
+ res.emplace(attrName.str(),
+ SourceLocation{reader.getSourceId(),
+ readerFork.getOffset()});
+ attrName.str(std::string{});
+ state = XMLAttributeState::IN_ATTR_DATA;
+ } else {
+ // No, this XML file is not well formed. Assume we're in
+ // an attribute name once again
+ attrName.str(std::string{&c, 1});
+ state = XMLAttributeState::IN_ATTR_NAME;
+ }
+ }
+ break;
+ case XMLAttributeState::IN_ATTR_DATA:
+ if (c == '"') {
+ // We're at the end of the attribute data, start anew
+ state = XMLAttributeState::SEARCH_ATTR;
+ }
+ break;
+ }
+ }
+ return res;
+}
+
static void xmlStartElementHandler(void *p, const XML_Char *name,
const XML_Char **attrs)
{
@@ -591,14 +704,31 @@ static void xmlStartElementHandler(void *p, const XML_Char *name,
SourceLocation loc = syncLoggerPosition(parser);
+ // Read the argument locations -- this is only a stupid and slow hack,
+ // but it is necessary, as expat doesn't give use the byte offset of the
+ // arguments.
+ std::map<std::string, SourceLocation> offs =
+ reconstructXMLAttributeOffsets(*userData->reader, loc);
+
// Assemble the arguments
Variant::mapType args;
const XML_Char **attr = attrs;
while (*attr) {
+ // Convert the C string to a std::string
const std::string key{*(attr++)};
+
+ // Search the location of the key
+ SourceLocation keyLoc;
+ auto it = offs.find(key);
+ if (it != offs.end()) {
+ keyLoc = it->second;
+ }
+
+ // Parse the string, pass the location of the key
std::pair<bool, Variant> value = VariantReader::parseGenericString(
- *(attr++), stack->getContext().getLogger());
- args.emplace(std::make_pair(key, value.second));
+ *(attr++), stack->getContext().getLogger(), keyLoc.getSourceId(),
+ keyLoc.getStart());
+ args.emplace(key, value.second);
}
// Call the start function
@@ -660,7 +790,7 @@ void XmlParser::doParse(CharReader &reader, ParserContext &ctx)
}
// Pass the reference to the ParserStack to the XML handler
- XMLUserData data(&stack);
+ XMLUserData data(&stack, &reader);
XML_SetUserData(&p, &data);
XML_UseParserAsHandlerArg(&p);