/*
    Ousía
    Copyright (C) 2014, 2015  Benjamin Paaßen, Andreas Stöckel
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see .
*/
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include "OsxmlAttributeLocator.hpp"
#include "OsxmlEventParser.hpp"
namespace ousia {
/* Class OsxmlEventParser */
/**
 * Class containing data used by the internal functions.
 */
class OsxmlEventParserData {
public:
	/**
	 * Current character data buffer.
	 */
	TokenizedData data;
	/**
	 * Contains the current depth of the parsing process.
	 */
	ssize_t depth;
	/**
	 * Set to a value larger or equal to zero if the parser is currently inside
	 * an annotation end tag -- the value represents the depth in which the
	 * tag was opened.
	 */
	ssize_t annotationEndTagDepth;
	/**
	 * Constructor taking the sourceId of the file from which the XML is being
	 * parsed.
	 *
	 * @param sourceId is the source if of the XML file from which the data is
	 * currently being parsed.
	 */
	OsxmlEventParserData(SourceId sourceId);
	/**
	 * Increments the depth.
	 */
	void incrDepth();
	/**
	 * Decrement the depth and reset the annotationEndTagDepth flag.
	 */
	void decrDepth();
	/**
	 * Returns true if we're currently inside an end tag.
	 */
	bool inAnnotationEndTag();
	/**
	 * Returns true if character data is available.
	 *
	 * @return true if character data is available.
	 */
	bool hasText();
};
/* Class GuardedExpatXmlParser */
/**
 * Wrapper class around the XML_Parser pointer which safely frees it whenever
 * the scope is left (e.g. because an exception was thrown).
 */
class GuardedExpatXmlParser {
private:
	/**
	 * Internal pointer to the XML_Parser instance.
	 */
	XML_Parser parser;
public:
	/**
	 * Constructor of the GuardedExpatXmlParser class. Calls XML_ParserCreateNS
	 * from the expat library. Throws a parser exception if the XML parser
	 * cannot be initialized.
	 *
	 * @param encoding is the protocol-defined encoding passed to expat (or
	 * nullptr if expat should determine the encoding by itself).
	 */
	GuardedExpatXmlParser(const XML_Char *encoding) : parser(nullptr)
	{
		parser = XML_ParserCreate(encoding);
		if (!parser) {
			throw LoggableException{
			    "Internal error: Could not create expat XML parser!"};
		}
	}
	/**
	 * Destuctor of the GuardedExpatXmlParser, frees the XML parser instance.
	 */
	~GuardedExpatXmlParser()
	{
		if (parser) {
			XML_ParserFree(parser);
			parser = nullptr;
		}
	}
	/**
	 * Returns the XML_Parser pointer.
	 */
	XML_Parser operator&() { return parser; }
};
/**
 * Name of the special outer tag used for allowing multiple top-level elements
 * in an xml file.
 */
static const std::string TOP_LEVEL_TAG{"ousia"};
/**
 * Prefix used to indicate the start of an annoation (note the trailing colon).
 */
static const std::string ANNOTATION_START_PREFIX{"a:start:"};
/**
 * Prefix used to indicate the end of an annotation.
 */
static const std::string ANNOTATION_END_PREFIX{"a:end"};
/**
 * Synchronizes the position of the xml parser with the default location of the
 * logger instance.
 *
 * @param p is a pointer at the xml parser instance.
 * @param len is the length of the string that should be refered to.
 * @return the SourceLocation that has been set in the logger.
 */
static SourceLocation xmlSyncLoggerPosition(XML_Parser p, size_t len = 0)
{
	// Fetch the OsxmlEventParser instance
	OsxmlEventParser *parser =
	    static_cast(XML_GetUserData(p));
	// Fetch the current location in the XML file and set the default location
	// in the logger
	size_t offs = XML_GetCurrentByteIndex(p);
	SourceLocation loc =
	    SourceLocation{parser->getReader().getSourceId(), offs, offs + len};
	parser->getLogger().setDefaultLocation(loc);
	// Return the fetched location
	return loc;
}
/**
 * Callback called by eXpat whenever a start handler is reached.
 */
static void xmlStartElementHandler(void *ref, const XML_Char *name,
                                   const XML_Char **attrs)
{
	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
	XML_Parser p = static_cast(ref);
	OsxmlEventParser *parser =
	    static_cast(XML_GetUserData(p));
	// If there is any text data in the buffer, issue that first
	if (parser->getData().hasText()) {
		TokenizedData &data = parser->getData().data;
		parser->getEvents().data(data);
		data.clear();
	}
	// Read the argument locations -- this is only a stupid and slow hack,
	// but it is necessary, as expat doesn't give use the byte offset of the
	// arguments.
	std::map attributeOffsets =
	    OsxmlAttributeLocator::locate(parser->getReader(),
	                                  XML_GetCurrentByteIndex(p));
	// Update the logger position
	SourceLocation loc = xmlSyncLoggerPosition(p);
	// Fetch the location of the name
	SourceLocation nameLoc = loc;
	auto it = attributeOffsets.find("$tag");
	if (it != attributeOffsets.end()) {
		nameLoc = it->second;
	}
	// Increment the current depth
	parser->getData().incrDepth();
	// Make sure we're currently not inside an annotation end tag -- this would
	// be highly illegal!
	if (parser->getData().inAnnotationEndTag()) {
		parser->getLogger().error(
		    "No tags allowed inside an annotation end tag", nameLoc);
		return;
	}
	// Assemble the arguments
	Variant::mapType args;
	const XML_Char **attr = attrs;
	while (*attr) {
		// Convert the C string to a std::string
		const std::string key{*(attr++)};
		// Ignore xml namespace declarations
		if (Utils::startsWith(key, "xmlns:") && parser->getData().depth == 1) {
			attr++;
			continue;
		}
		// Search the location of the key
		SourceLocation keyLoc;
		auto it = attributeOffsets.find(key);
		if (it != attributeOffsets.end()) {
			keyLoc = it->second;
		}
		// Parse the string, pass the location of the key
		std::pair value = VariantReader::parseGenericString(
		    *(attr++), parser->getLogger(), keyLoc.getSourceId(),
		    keyLoc.getStart());
		// Set the overall location of the parsed element to the attribute
		// location
		value.second.setLocation(keyLoc);
		// Store the keys in the map
		args.emplace(key, value.second);
	}
	// Fetch the name of the tag, check for special tags
	std::string nameStr(name);
	if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 1) {
		// We're in the top-level and the magic tag is reached -- just
		// ignore it and issue a warning for each argument that has been given
		for (const auto &arg : args) {
			parser->getLogger().warning(std::string("Ignoring attribute \"") +
			                                arg.first +
			                                std::string("\" for magic tag \"") +
			                                TOP_LEVEL_TAG + std::string("\""),
			                            arg.second);
		}
	} else if (Utils::startsWith(nameStr, ANNOTATION_START_PREFIX)) {
		// Assemble a name variant containing the name minus the prefix
		Variant nameVar =
		    Variant::fromString(nameStr.substr(ANNOTATION_START_PREFIX.size()));
		nameVar.setLocation(nameLoc);
		// Issue the "annotationStart" event
		parser->getEvents().annotationStart(nameVar, args);
	} else if (Utils::startsWith(nameStr, ANNOTATION_END_PREFIX)) {
		// Assemble a name variant containing the name minus the prefix
		nameStr = nameStr.substr(ANNOTATION_END_PREFIX.size());
		// Discard a potentially leading colon
		if (!nameStr.empty() && nameStr[0] == ':') {
			nameStr = nameStr.substr(1);
		}
		// Assemble the variant containing the name and its location
		Variant nameVar = Variant::fromString(nameStr);
		nameVar.setLocation(nameLoc);
		// Check whether a "name" attribute was given
		Variant elementName;
		for (const auto &arg : args) {
			if (arg.first == "name") {
				elementName = arg.second;
			} else {
				parser->getLogger().warning(
				    std::string("Ignoring attribute \"") + arg.first +
				        "\" in annotation end tag",
				    arg.second);
			}
		}
		// Set the annotationEndTagDepth to disallow any further tags to be
		// opened inside the annotation end tag.
		parser->getData().annotationEndTagDepth = parser->getData().depth;
		// Issue the "annotationEnd" event
		parser->getEvents().annotationEnd(nameVar, args);
	} else {
		// Just issue a "commandStart" event in any other case
		Variant nameVar = Variant::fromString(nameStr);
		nameVar.setLocation(nameLoc);
		parser->getEvents().commandStart(nameVar, args);
	}
}
static void xmlEndElementHandler(void *ref, const XML_Char *name)
{
	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
	XML_Parser p = static_cast(ref);
	OsxmlEventParser *parser =
	    static_cast(XML_GetUserData(p));
	// Synchronize the position of the logger with teh position
	xmlSyncLoggerPosition(p);
	// Abort as long as we're in an annotation end tag
	if (parser->getData().inAnnotationEndTag()) {
		parser->getData().decrDepth();
		return;
	}
	// Decrement the current depth
	parser->getData().decrDepth();
	// If there is any text data in the buffer, issue that first
	if (parser->getData().hasText()) {
		TokenizedData &data = parser->getData().data;
		parser->getEvents().data(data);
		data.clear();
	}
	// Abort if the special ousia tag ends here
	std::string nameStr{name};
	if (nameStr == TOP_LEVEL_TAG && parser->getData().depth == 0) {
		return;
	}
	// Issue the "rangeEnd" event
	parser->getEvents().rangeEnd();
}
static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len)
{
	// Fetch the XML_Parser pointer p and a pointer at the OsxmlEventParser
	XML_Parser p = static_cast(ref);
	OsxmlEventParser *parser =
	    static_cast(XML_GetUserData(p));
	// Abort as long as we're in an annotation end tag
	if (parser->getData().inAnnotationEndTag()) {
		return;
	}
	// Convert the signed (smell the 90's C library here?) length to an usigned
	// value
	size_t ulen = len > 0 ? static_cast(len) : 0;
	// Synchronize the logger position
	SourceLocation loc = xmlSyncLoggerPosition(p, ulen);
	// Append the data to the buffer
	parser->getData().data.append(std::string(s, ulen), loc.getStart());
}
/* Class OsxmlEvents */
OsxmlEvents::~OsxmlEvents() {}
/* Class OsxmlEventParser */
OsxmlEventParserData::OsxmlEventParserData(SourceId sourceId)
    : data(sourceId), depth(0), annotationEndTagDepth(-1)
{
}
void OsxmlEventParserData::incrDepth() { depth++; }
void OsxmlEventParserData::decrDepth()
{
	if (depth > 0) {
		depth--;
	}
	if (depth < annotationEndTagDepth) {
		annotationEndTagDepth = -1;
	}
}
bool OsxmlEventParserData::inAnnotationEndTag()
{
	return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth);
}
bool OsxmlEventParserData::hasText() { return !data.empty(); }
/* Class OsxmlEventParser */
OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events,
                                   Logger &logger)
    : reader(reader),
      events(events),
      logger(logger),
      data(new OsxmlEventParserData(reader.getSourceId()))
{
}
OsxmlEventParser::~OsxmlEventParser() {}
void OsxmlEventParser::parse()
{
	// Create the parser object
	GuardedExpatXmlParser p{"UTF-8"};
	// Reset the depth
	data->depth = 0;
	// Pass the reference to this parser instance to the XML handler
	XML_SetUserData(&p, this);
	XML_UseParserAsHandlerArg(&p);
	// Set the callback functions
	XML_SetStartElementHandler(&p, xmlStartElementHandler);
	XML_SetEndElementHandler(&p, xmlEndElementHandler);
	XML_SetCharacterDataHandler(&p, xmlCharacterDataHandler);
	// Feed data into expat while there is data to process
	constexpr size_t BUFFER_SIZE = 64 * 1024;
	while (true) {
		// Fetch a buffer from expat for the input data
		char *buf = static_cast(XML_GetBuffer(&p, BUFFER_SIZE));
		if (!buf) {
			throw OusiaException{"Internal error: XML parser out of memory!"};
		}
		// Read into the buffer
		size_t bytesRead = reader.readRaw(buf, BUFFER_SIZE);
		// Parse the data and handle any XML error as exception
		if (!XML_ParseBuffer(&p, bytesRead, bytesRead == 0)) {
			throw LoggableException{
			    "XML: " + std::string{XML_ErrorString(XML_GetErrorCode(&p))},
			    xmlSyncLoggerPosition(&p)};
		}
		// Abort once there are no more bytes in the stream
		if (bytesRead == 0) {
			break;
		}
	}
}
CharReader &OsxmlEventParser::getReader() const { return reader; }
Logger &OsxmlEventParser::getLogger() const { return logger; }
OsxmlEvents &OsxmlEventParser::getEvents() const { return events; }
OsxmlEventParserData &OsxmlEventParser::getData() const { return *data; }
}