/*
Ousía
Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include
#include "OsxmlAttributeLocator.hpp"
namespace ousia {
/**
* Enum used internally in the statemachine of the xml argument parser.
*/
enum class XmlAttributeState {
IN_TAG_NAME,
SEARCH_ATTR,
IN_ATTR_NAME,
HAS_ATTR_NAME,
HAS_ATTR_EQUALS,
IN_ATTR_DATA
};
std::map OsxmlAttributeLocator::locate(
CharReader &reader, size_t offs)
{
std::map res;
// Fork the reader, we don't want to mess up the XML parsing process, do we?
CharReaderFork readerFork = reader.fork();
// Move the read cursor to the start location, abort if this does not work
if (offs != readerFork.seek(offs)) {
return res;
}
// Now all we need to do is to implement one half of an XML parser. As this
// is inherently complicated we'll totaly fail at it. Don't care. All we
// want to get is those darn offsets for pretty error messages... (and we
// can assume the XML is valid as it was already read by expat)
XmlAttributeState state = XmlAttributeState::IN_TAG_NAME;
char c;
std::stringstream attrName;
while (readerFork.read(c)) {
// Abort at the end of the tag
if (c == '>' && state != XmlAttributeState::IN_ATTR_DATA) {
return res;
}
// One state machine to rule them all, one state machine to find them,
// One state machine to bring them all and in the darkness bind them
// (the byte offsets)
switch (state) {
case XmlAttributeState::IN_TAG_NAME:
if (Utils::isWhitespace(c)) {
res.emplace("$tag",
SourceLocation{reader.getSourceId(), offs + 1,
readerFork.getOffset() - 1});
state = XmlAttributeState::SEARCH_ATTR;
}
break;
case XmlAttributeState::SEARCH_ATTR:
if (!Utils::isWhitespace(c)) {
state = XmlAttributeState::IN_ATTR_NAME;
attrName << c;
}
break;
case XmlAttributeState::IN_ATTR_NAME:
if (Utils::isWhitespace(c)) {
state = XmlAttributeState::HAS_ATTR_NAME;
} else if (c == '=') {
state = XmlAttributeState::HAS_ATTR_EQUALS;
} else {
attrName << c;
}
break;
case XmlAttributeState::HAS_ATTR_NAME:
if (!Utils::isWhitespace(c)) {
if (c == '=') {
state = XmlAttributeState::HAS_ATTR_EQUALS;
break;
}
// Well, this is a strange XML file... We expected to
// see a '=' here! Try to continue with the
// "HAS_ATTR_EQUALS" state as this state will hopefully
// inlcude some error recovery
} else {
// Skip whitespace here
break;
}
// Fallthrough
case XmlAttributeState::HAS_ATTR_EQUALS:
if (!Utils::isWhitespace(c)) {
if (c == '"') {
// Here we are! We have found the beginning of an
// attribute. Let's quickly lock the current offset away
// in the result map
res.emplace(attrName.str(),
SourceLocation{reader.getSourceId(),
readerFork.getOffset()});
state = XmlAttributeState::IN_ATTR_DATA;
} else {
// No, this XML file is not well formed. Assume we're in
// an attribute name once again
attrName.str(std::string{&c, 1});
state = XmlAttributeState::IN_ATTR_NAME;
}
}
break;
case XmlAttributeState::IN_ATTR_DATA:
if (c == '"') {
// We're at the end of the attribute data, set the end
// location
auto it = res.find(attrName.str());
if (it != res.end()) {
it->second.setEnd(readerFork.getOffset() - 1);
}
// Reset the attribute name and restart the search
attrName.str(std::string{});
state = XmlAttributeState::SEARCH_ATTR;
}
break;
}
}
return res;
}
}