diff options
Diffstat (limited to 'src/plugins/plain/PlainFormatStreamReader.cpp')
-rw-r--r-- | src/plugins/plain/PlainFormatStreamReader.cpp | 641 |
1 files changed, 0 insertions, 641 deletions
diff --git a/src/plugins/plain/PlainFormatStreamReader.cpp b/src/plugins/plain/PlainFormatStreamReader.cpp deleted file mode 100644 index 05769f0..0000000 --- a/src/plugins/plain/PlainFormatStreamReader.cpp +++ /dev/null @@ -1,641 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <core/common/CharReader.hpp> -#include <core/common/Logger.hpp> -#include <core/common/Utils.hpp> -#include <core/common/VariantReader.hpp> - -#include "PlainFormatStreamReader.hpp" - -namespace ousia { - -/** - * Plain format default tokenizer. - */ -class PlainFormatTokens : public DynamicTokenizer { -public: - /** - * Id of the backslash token. - */ - TokenTypeId Backslash; - - /** - * Id of the line comment token. - */ - TokenTypeId LineComment; - - /** - * Id of the block comment start token. - */ - TokenTypeId BlockCommentStart; - - /** - * Id of the block comment end token. - */ - TokenTypeId BlockCommentEnd; - - /** - * Id of the field start token. - */ - TokenTypeId FieldStart; - - /** - * Id of the field end token. - */ - TokenTypeId FieldEnd; - - /** - * Registers the plain format tokens in the internal tokenizer. - */ - PlainFormatTokens() - { - Backslash = registerToken("\\"); - LineComment = registerToken("%"); - BlockCommentStart = registerToken("%{"); - BlockCommentEnd = registerToken("}%"); - FieldStart = registerToken("{"); - FieldEnd = registerToken("}"); - } -}; - -static const PlainFormatTokens Tokens; - -/** - * Class used internally to collect data issued via "DATA" event. - */ -class DataHandler { -private: - /** - * Internal character buffer. - */ - std::vector<char> buf; - - /** - * Start location of the character data. - */ - SourceOffset start; - - /** - * End location of the character data. - */ - SourceOffset end; - -public: - /** - * Default constructor, initializes start and end with zeros. - */ - DataHandler() : start(0), end(0) {} - - /** - * Returns true if the internal buffer is empty. - * - * @return true if no characters were added to the internal buffer, false - * otherwise. - */ - bool isEmpty() { return buf.empty(); } - - /** - * Appends a single character to the internal buffer. - * - * @param c is the character that should be added to the internal buffer. - * @param charStart is the start position of the character. - * @param charEnd is the end position of the character. - */ - void append(char c, SourceOffset charStart, SourceOffset charEnd) - { - if (isEmpty()) { - start = charStart; - } - buf.push_back(c); - end = charEnd; - } - - /** - * Appends a string to the internal buffer. - * - * @param s is the string that should be added to the internal buffer. - * @param stringStart is the start position of the string. - * @param stringEnd is the end position of the string. - */ - void append(const std::string &s, SourceOffset stringStart, - SourceOffset stringEnd) - { - if (isEmpty()) { - start = stringStart; - } - std::copy(s.c_str(), s.c_str() + s.size(), back_inserter(buf)); - end = stringEnd; - } - - /** - * Converts the internal buffer to a variant with attached location - * information. - * - * @param sourceId is the source id which is needed for building the - * location information. - * @return a Variant with the internal buffer content as string and - * the correct start and end location. - */ - Variant toVariant(SourceId sourceId) - { - Variant res = Variant::fromString(std::string(buf.data(), buf.size())); - res.setLocation({sourceId, start, end}); - return res; - } -}; - -PlainFormatStreamReader::PlainFormatStreamReader(CharReader &reader, - Logger &logger) - : reader(reader), logger(logger), tokenizer(Tokens) -{ - // Place an intial command representing the complete file on the stack - commands.push(Command{"", Variant::mapType{}, true, true, true}); -} - -Variant PlainFormatStreamReader::parseIdentifier(size_t start, bool allowNSSep) -{ - bool first = true; - bool hasCharSiceNSSep = false; - std::vector<char> identifier; - size_t end = reader.getPeekOffset(); - char c, c2; - while (reader.peek(c)) { - // Abort if this character is not a valid identifer character - if ((first && Utils::isIdentifierStartCharacter(c)) || - (!first && Utils::isIdentifierCharacter(c))) { - identifier.push_back(c); - } else if (c == ':' && hasCharSiceNSSep && reader.fetchPeek(c2) && Utils::isIdentifierStartCharacter(c2)) { - identifier.push_back(c); - } else { - if (c == ':' && allowNSSep) { - logger.error( - "Expected character before and after namespace separator \":\"", - reader); - } - reader.resetPeek(); - break; - } - - // This is no longer the first character - first = false; - - // Advance the hasCharSiceNSSep flag - hasCharSiceNSSep = allowNSSep && (c != ':'); - - end = reader.getPeekOffset(); - reader.consumePeek(); - } - - // Return the identifier at its location - Variant res = - Variant::fromString(std::string(identifier.data(), identifier.size())); - res.setLocation({reader.getSourceId(), start, end}); - return res; -} - -PlainFormatStreamReader::State PlainFormatStreamReader::parseBeginCommand() -{ - // Expect a '{' after the command - reader.consumeWhitespace(); - if (!reader.expect('{')) { - logger.error("Expected \"{\" after \\begin", reader); - return State::NONE; - } - - // Parse the name of the command that should be opened - Variant commandName = parseIdentifier(reader.getOffset(), true); - if (commandName.asString().empty()) { - logger.error("Expected identifier", commandName); - return State::ERROR; - } - - // Check whether the next character is a '#', indicating the start of the - // command name - Variant commandArgName; - SourceOffset start = reader.getOffset(); - if (reader.expect('#')) { - commandArgName = parseIdentifier(start); - if (commandArgName.asString().empty()) { - logger.error("Expected identifier after \"#\"", commandArgName); - } - } - - if (!reader.expect('}')) { - logger.error("Expected \"}\"", reader); - return State::ERROR; - } - - // Parse the arguments - Variant commandArguments = parseCommandArguments(std::move(commandArgName)); - - // Push the command onto the command stack - pushCommand(std::move(commandName), std::move(commandArguments), true); - - return State::COMMAND; -} - -static bool checkStillInField(const PlainFormatStreamReader::Command &cmd, - const Variant &endName, Logger &logger) -{ - if (cmd.inField && !cmd.inRangeField) { - logger.error(std::string("\\end in open field of command \"") + - cmd.name.asString() + std::string("\""), - endName); - logger.note(std::string("Open command started here:"), cmd.name); - return true; - } - return false; -} - -PlainFormatStreamReader::State PlainFormatStreamReader::parseEndCommand() -{ - // Expect a '{' after the command - if (!reader.expect('{')) { - logger.error("Expected \"{\" after \\end", reader); - return State::NONE; - } - - // Fetch the name of the command that should be ended here - Variant name = parseIdentifier(reader.getOffset(), true); - - // Make sure the given command name is not empty - if (name.asString().empty()) { - logger.error("Expected identifier", name); - return State::ERROR; - } - - // Make sure the command name is terminated with a '}' - if (!reader.expect('}')) { - logger.error("Expected \"}\"", reader); - return State::ERROR; - } - - // Unroll the command stack up to the last range command - while (!commands.top().hasRange) { - if (checkStillInField(commands.top(), name, logger)) { - return State::ERROR; - } - commands.pop(); - } - - // Make sure we're not in an open field of this command - if (checkStillInField(commands.top(), name, logger)) { - return State::ERROR; - } - - // Special error message if the top-level command is reached - if (commands.size() == 1) { - logger.error(std::string("Cannot end command \"") + name.asString() + - std::string("\" here, no command open"), - name); - return State::ERROR; - } - - // Inform the about command mismatches - const Command &cmd = commands.top(); - if (commands.top().name.asString() != name.asString()) { - logger.error(std::string("Trying to end command \"") + - cmd.name.asString() + - std::string("\", but open command is \"") + - name.asString() + std::string("\""), - name); - logger.note("Last command was opened here:", cmd.name); - return State::ERROR; - } - - // Set the location to the location of the command that was ended, then end - // the current command - location = name.getLocation(); - commands.pop(); - return cmd.inRangeField ? State::FIELD_END : State::NONE; -} - -Variant PlainFormatStreamReader::parseCommandArguments(Variant commandArgName) -{ - // Parse the arguments using the universal VariantReader - Variant commandArguments; - if (reader.expect('[')) { - auto res = VariantReader::parseObject(reader, logger, ']'); - commandArguments = res.second; - } else { - commandArguments = Variant::mapType{}; - } - - // Insert the parsed name, make sure "name" was not specified in the - // arguments - if (commandArgName.isString()) { - auto res = - commandArguments.asMap().emplace("name", std::move(commandArgName)); - if (!res.second) { - logger.error("Name argument specified multiple times", - SourceLocation{}, MessageMode::NO_CONTEXT); - logger.note("First occurance is here: ", commandArgName); - logger.note("Second occurance is here: ", res.first->second); - } - } - return commandArguments; -} - -void PlainFormatStreamReader::pushCommand(Variant commandName, - Variant commandArguments, - bool hasRange) -{ - // Store the location on the stack - location = commandName.getLocation(); - - // Place the command on the command stack, remove the last commands if we're - // not currently inside a field of these commands - while (!commands.top().inField) { - commands.pop(); - } - commands.push(Command{std::move(commandName), std::move(commandArguments), - hasRange, false, false}); -} - -PlainFormatStreamReader::State PlainFormatStreamReader::parseCommand( - size_t start) -{ - // Parse the commandName as a first identifier - Variant commandName = parseIdentifier(start, true); - if (commandName.asString().empty()) { - logger.error("Empty command name", reader); - return State::NONE; - } - - // Handle the special "begin" and "end" commands - const auto commandNameComponents = - Utils::split(commandName.asString(), ':'); - const bool isBegin = commandNameComponents[0] == "begin"; - const bool isEnd = commandNameComponents[0] == "end"; - if (isBegin || isEnd) { - if (commandNameComponents.size() > 1) { - logger.error( - "Special commands \"\\begin\" and \"\\end\" may not contain a " - "namespace separator \":\"", - commandName); - } - if (isBegin) { - return parseBeginCommand(); - } else if (isEnd) { - return parseEndCommand(); - } - } - - // Check whether the next character is a '#', indicating the start of the - // command name - Variant commandArgName; - start = reader.getOffset(); - if (reader.expect('#')) { - commandArgName = parseIdentifier(start); - if (commandArgName.asString().empty()) { - logger.error("Expected identifier after \"#\"", commandArgName); - } - } - - // Parse the arugments - Variant commandArguments = parseCommandArguments(std::move(commandArgName)); - - // Push the command onto the command stack - pushCommand(std::move(commandName), std::move(commandArguments), false); - - return State::COMMAND; -} - -void PlainFormatStreamReader::parseBlockComment() -{ - DynamicToken token; - size_t depth = 1; - while (tokenizer.read(reader, token)) { - if (token.type == Tokens.BlockCommentEnd) { - depth--; - if (depth == 0) { - return; - } - } - if (token.type == Tokens.BlockCommentStart) { - depth++; - } - } - - // Issue an error if the file ends while we are in a block comment - logger.error("File ended while being in a block comment", reader); -} - -void PlainFormatStreamReader::parseLineComment() -{ - char c; - while (reader.read(c)) { - if (c == '\n') { - return; - } - } -} - -bool PlainFormatStreamReader::checkIssueData(DataHandler &handler) -{ - if (!handler.isEmpty()) { - data = handler.toVariant(reader.getSourceId()); - location = data.getLocation(); - reader.resetPeek(); - return true; - } - return false; -} - -bool PlainFormatStreamReader::checkIssueFieldStart() -{ - // Fetch the current command, and check whether we're currently inside a - // field of this command - Command &cmd = commands.top(); - if (!cmd.inField) { - // If this is a range command, we're now implicitly inside the field of - // this command -- we'll have to issue a field start command! - if (cmd.hasRange) { - cmd.inField = true; - cmd.inRangeField = true; - reader.resetPeek(); - return true; - } - - // This was not a range command, so obviously we're now inside within - // a field of some command -- so unroll the commands stack until a - // command with open field is reached - while (!commands.top().inField) { - commands.pop(); - } - } - return false; -} - -PlainFormatStreamReader::State PlainFormatStreamReader::parse() -{ - // Handler for incomming data - DataHandler handler; - - // Read tokens until the outer loop should be left - DynamicToken token; - while (tokenizer.peek(reader, token)) { - const TokenTypeId type = token.type; - - // Special handling for Backslash and Text - if (type == Tokens.Backslash) { - // Before appending anything to the output data or starting a new - // command, check whether FIELD_START has to be issued, as the - // current command is a command with range - if (checkIssueFieldStart()) { - location = token.location; - return State::FIELD_START; - } - - // Check whether a command starts now, without advancing the peek - // cursor - char c; - if (!reader.fetchPeek(c)) { - logger.error("Trailing backslash at the end of the file.", - token); - return State::END; - } - - // Try to parse a command - if (Utils::isIdentifierStartCharacter(c)) { - // Make sure to issue any data before it is to late - if (checkIssueData(handler)) { - return State::DATA; - } - - // Parse the actual command - State res = parseCommand(token.location.getStart()); - switch (res) { - case State::ERROR: - throw LoggableException( - "Last error was irrecoverable, ending parsing " - "process"); - case State::NONE: - continue; - default: - return res; - } - } - - // This was not a special character, just append the given character - // to the data buffer, use the escape character start as start - // location and the peek offset as end location - reader.peek(c); // Peek the previously fetched character - handler.append(c, token.location.getStart(), - reader.getPeekOffset()); - reader.consumePeek(); - continue; - } else if (type == TextToken) { - // Check whether FIELD_START has to be issued before appending text - if (checkIssueFieldStart()) { - location = token.location; - return State::FIELD_START; - } - - // Append the text to the data handler - handler.append(token.content, token.location.getStart(), - token.location.getEnd()); - - reader.consumePeek(); - continue; - } - - // A non-text token was reached, make sure all pending data commands - // have been issued - if (checkIssueData(handler)) { - return State::DATA; - } - - // We will handle the token now, consume the peeked characters - reader.consumePeek(); - - // Update the location to the current token location - location = token.location; - - if (token.type == Tokens.LineComment) { - parseLineComment(); - } else if (token.type == Tokens.BlockCommentStart) { - parseBlockComment(); - } else if (token.type == Tokens.FieldStart) { - Command &cmd = commands.top(); - if (!cmd.inField) { - cmd.inField = true; - return State::FIELD_START; - } - logger.error( - "Got field start token \"{\", but no command for which to " - "start the field. Did you mean \"\\{\"?", - token); - } else if (token.type == Tokens.FieldEnd) { - // Try to end an open field of the current command -- if the current - // command is not inside an open field, end this command and try to - // close the next one - for (int i = 0; i < 2 && commands.size() > 1; i++) { - Command &cmd = commands.top(); - if (!cmd.inRangeField) { - if (cmd.inField) { - cmd.inField = false; - return State::FIELD_END; - } - commands.pop(); - } else { - break; - } - } - logger.error( - "Got field end token \"}\", but there is no field to end. Did " - "you mean \"\\}\"?", - token); - } else { - logger.error("Unexpected token \"" + token.content + "\"", token); - } - } - - // Issue available data - if (checkIssueData(handler)) { - return State::DATA; - } - - // Make sure all open commands and fields have been ended at the end of the - // stream - while (commands.size() > 1) { - Command &cmd = commands.top(); - if (cmd.inField || cmd.hasRange) { - logger.error("Reached end of stream, but command \"" + - cmd.name.asString() + "\" has not been ended", - cmd.name); - } - commands.pop(); - } - - location = SourceLocation{reader.getSourceId(), reader.getOffset()}; - return State::END; -} - -const Variant &PlainFormatStreamReader::getCommandName() -{ - return commands.top().name; -} - -const Variant &PlainFormatStreamReader::getCommandArguments() -{ - return commands.top().arguments; -} -} - |