diff options
Diffstat (limited to 'src/plugins/plain/PlainFormatStreamReader.cpp')
-rw-r--r-- | src/plugins/plain/PlainFormatStreamReader.cpp | 261 |
1 files changed, 203 insertions, 58 deletions
diff --git a/src/plugins/plain/PlainFormatStreamReader.cpp b/src/plugins/plain/PlainFormatStreamReader.cpp index 4469536..1bff24b 100644 --- a/src/plugins/plain/PlainFormatStreamReader.cpp +++ b/src/plugins/plain/PlainFormatStreamReader.cpp @@ -25,7 +25,56 @@ namespace ousia { -namespace { +/** + * Plain format default tokenizer. + */ +class PlainFormatTokens : public DynamicTokenizer { +public: + /** + * Id of the backslash token. + */ + TokenTypeId Backslash; + + /** + * Id of the line comment token. + */ + TokenTypeId LineComment; + + /** + * Id of the block comment start token. + */ + TokenTypeId BlockCommentStart; + + /** + * Id of the block comment end token. + */ + TokenTypeId BlockCommentEnd; + + /** + * Id of the field start token. + */ + TokenTypeId FieldStart; + + /** + * Id of the field end token. + */ + TokenTypeId FieldEnd; + + /** + * Registers the plain format tokens in the internal tokenizer. + */ + PlainFormatTokens() + { + Backslash = registerToken("\\"); + LineComment = registerToken("%"); + BlockCommentStart = registerToken("%{"); + BlockCommentEnd = registerToken("}%"); + FieldStart = registerToken("{"); + FieldEnd = registerToken("}"); + } +}; + +static const PlainFormatTokens Tokens; /** * Class used internally to collect data issued via "DATA" event. @@ -110,17 +159,13 @@ public: return res; } }; -} PlainFormatStreamReader::PlainFormatStreamReader(CharReader &reader, Logger &logger) - : reader(reader), logger(logger), fieldIdx(0) + : reader(reader), logger(logger), tokenizer(Tokens) { - tokenBackslash = tokenizer.registerToken("\\"); - tokenLinebreak = tokenizer.registerToken("\n"); - tokenLineComment = tokenizer.registerToken("%"); - tokenBlockCommentStart = tokenizer.registerToken("%{"); - tokenBlockCommentEnd = tokenizer.registerToken("}%"); + // Place an intial command representing the complete file on the stack + commands.push(Command{"", Variant::mapType{}, true, true, true}); } Variant PlainFormatStreamReader::parseIdentifier(size_t start) @@ -155,7 +200,7 @@ Variant PlainFormatStreamReader::parseIdentifier(size_t start) void PlainFormatStreamReader::parseCommand(size_t start) { // Parse the commandName as a first identifier - commandName = parseIdentifier(start); + Variant commandName = parseIdentifier(start); // Check whether the next character is a '#', indicating the start of the // command name @@ -169,6 +214,7 @@ void PlainFormatStreamReader::parseCommand(size_t start) } // Read the arguments (if they are available), otherwise reset them + Variant commandArguments; if (reader.expect('[')) { auto res = VariantReader::parseObject(reader, logger, ']'); commandArguments = res.second; @@ -187,6 +233,13 @@ void PlainFormatStreamReader::parseCommand(size_t start) logger.note("Second occurance is here: ", res.first->second); } } + + // Place the command on the command stack, remove the last commands if we're + // not currently inside a field of these commands + while (!commands.top().inField) { + commands.pop(); + } + commands.push(Command{commandName, commandArguments, false, false, false}); } void PlainFormatStreamReader::parseBlockComment() @@ -194,13 +247,13 @@ void PlainFormatStreamReader::parseBlockComment() DynamicToken token; size_t depth = 1; while (tokenizer.read(reader, token)) { - if (token.type == tokenBlockCommentEnd) { + if (token.type == Tokens.BlockCommentEnd) { depth--; if (depth == 0) { return; } } - if (token.type == tokenBlockCommentStart) { + if (token.type == Tokens.BlockCommentStart) { depth++; } } @@ -212,7 +265,6 @@ void PlainFormatStreamReader::parseBlockComment() void PlainFormatStreamReader::parseLineComment() { char c; - reader.consumePeek(); while (reader.read(c)) { if (c == '\n') { return; @@ -220,78 +272,171 @@ void PlainFormatStreamReader::parseLineComment() } } -PlainFormatStreamReader::State PlainFormatStreamReader::parse() +bool PlainFormatStreamReader::checkIssueData(DataHandler &handler) { -// Macro (sorry for that) used for checking whether there is data to issue, and -// if yes, aborting the loop, allowing for a reentry on a later parse call by -// resetting the peek cursor -#define CHECK_ISSUE_DATA() \ - { \ - if (!dataHandler.isEmpty()) { \ - reader.resetPeek(); \ - abort = true; \ - break; \ - } \ + if (!handler.isEmpty()) { + data = handler.toVariant(reader.getSourceId()); + location = data.getLocation(); + reader.resetPeek(); + return true; } + return false; +} - // Handler for incomming data - DataHandler dataHandler; +bool PlainFormatStreamReader::checkIssueFieldStart() +{ + // Fetch the current command, and check whether we're currently inside a + // field of this command + Command &cmd = commands.top(); + if (!cmd.inField) { + // If this is a range command, we're now implicitly inside the field of + // this command -- we'll have to issue a field start command! + if (cmd.hasRange) { + cmd.inField = true; + reader.resetPeek(); + return true; + } - // Variable set to true if the parser loop should be left - bool abort = false; + // This was not a range command, so obviously we're now inside within + // a field of some command -- so unroll the commands stack until a + // command with open field is reached + while (!commands.top().inField) { + commands.pop(); + } + } + return false; +} + +PlainFormatStreamReader::State PlainFormatStreamReader::parse() +{ + // Handler for incomming data + DataHandler handler; // Read tokens until the outer loop should be left DynamicToken token; - while (!abort && tokenizer.peek(reader, token)) { - // Check whether this backslash just escaped some special or - // whitespace character or was the beginning of a command - if (token.type == tokenBackslash) { - // Check whether this character could be the start of a command + while (tokenizer.peek(reader, token)) { + const TokenTypeId type = token.type; + + // Special handling for Backslash and Text + if (type == Tokens.Backslash) { + // Check whether a command starts now, without advancing the peek + // cursor char c; - reader.consumePeek(); - reader.peek(c); + if (!reader.fetchPeek(c)) { + logger.error("Trailing backslash at the end of the file.", + token); + return State::END; + } + + // Try to parse a command if (Utils::isIdentifierStartCharacter(c)) { - CHECK_ISSUE_DATA(); - reader.resetPeek(); parseCommand(token.location.getStart()); + if (checkIssueData(handler)) { + return State::DATA; + } + location = commands.top().name.getLocation(); return State::COMMAND; } + // Before appending anything to the output data, check whether + // FIELD_START has to be issued, as the current command is a command + // with range + if (checkIssueFieldStart()) { + location = token.location; + return State::FIELD_START; + } + // This was not a special character, just append the given character // to the data buffer, use the escape character start as start // location and the peek offset as end location - dataHandler.append(c, token.location.getStart(), - reader.getPeekOffset()); - } else if (token.type == tokenLineComment) { - CHECK_ISSUE_DATA(); - reader.consumePeek(); - parseLineComment(); - } else if (token.type == tokenBlockCommentStart) { - CHECK_ISSUE_DATA(); + reader.peek(c); // Peek the previously fetched character + handler.append(c, token.location.getStart(), + reader.getPeekOffset()); reader.consumePeek(); - parseBlockComment(); - } else if (token.type == tokenLinebreak) { - CHECK_ISSUE_DATA(); + continue; + } else if (type == TextToken) { + // Check whether FIELD_START has to be issued before appending text + if (checkIssueFieldStart()) { + location = token.location; + return State::FIELD_START; + } + + // Append the text to the data handler + handler.append(token.content, token.location.getStart(), + token.location.getEnd()); + reader.consumePeek(); - return State::LINEBREAK; - } else if (token.type == TextToken) { - dataHandler.append(token.content, token.location.getStart(), - token.location.getEnd()); + continue; } - // Consume the peeked character if we did not abort, otherwise abort - if (!abort) { - reader.consumePeek(); + // A non-text token was reached, make sure all pending data commands + // have been issued + if (checkIssueData(handler)) { + return State::DATA; + } + + // We will handle the token now, consume the peeked characters + reader.consumePeek(); + + // Update the location to the current token location + location = token.location; + + if (token.type == Tokens.LineComment) { + parseLineComment(); + } else if (token.type == Tokens.BlockCommentStart) { + parseBlockComment(); + } else if (token.type == Tokens.FieldStart) { + Command &cmd = commands.top(); + if (!cmd.inField) { + cmd.inField = true; + return State::FIELD_START; + } + logger.error( + "Got field start token \"{\", but no command for which to " + "start the field. Did you mean to write \"\\{\"?", + token); + } else if (token.type == Tokens.FieldEnd) { + // Try to end an open field of the current command -- if the current + // command is not inside an open field, end this command and try to + // close the next one + for (int i = 0; i < 2 && commands.size() > 1; i++) { + Command &cmd = commands.top(); + if (!cmd.inRangeField) { + if (cmd.inField) { + cmd.inField = false; + return State::FIELD_END; + } + commands.pop(); + } else { + break; + } + } + logger.error( + "Got field end token \"}\" but there is no field to end. Did you " + "mean to write \"\\}\"?", + token); + } else { + logger.error("Unexpected token \"" + token.content + "\"", token); } } - // Send out pending output data, otherwise we are at the end of the stream - if (!dataHandler.isEmpty()) { - data = dataHandler.toVariant(reader.getSourceId()); + // Issue available data + if (checkIssueData(handler)) { return State::DATA; } + + location = SourceLocation{reader.getSourceId(), reader.getOffset()}; return State::END; -#undef CHECK_ISSUE_DATA +} + +const Variant &PlainFormatStreamReader::getCommandName() +{ + return commands.top().name; +} + +const Variant &PlainFormatStreamReader::getCommandArguments() +{ + return commands.top().arguments; } } |