summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-12 16:21:36 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-15 00:06:07 +0100
commit205810b44c980998958dcd857c2cb34a914dc760 (patch)
tree47f2a5a1b5415442773b8a8255a46f26f33ca4c4 /src
parent856fa8298d55c07313d9638d9f8b8c0913202b2c (diff)
Implemented annotation start and end field
Diffstat (limited to 'src')
-rw-r--r--src/formats/osml/OsmlStreamParser.cpp116
-rw-r--r--src/formats/osml/OsmlStreamParser.hpp16
2 files changed, 109 insertions, 23 deletions
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 6606120..0174fa4 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -66,6 +66,16 @@ public:
TokenTypeId DefaultFieldStart;
/**
+ * Id of the annotation start token.
+ */
+ TokenTypeId AnnotationStart;
+
+ /**
+ * Id of the annotation end token.
+ */
+ TokenTypeId AnnotationEnd;
+
+ /**
* Registers the plain format tokens in the internal tokenizer.
*/
PlainFormatTokens()
@@ -77,6 +87,8 @@ public:
FieldStart = registerToken("{");
FieldEnd = registerToken("}");
DefaultFieldStart = registerToken("{!");
+ AnnotationStart = registerToken("<\\");
+ AnnotationEnd = registerToken("\\>");
}
};
@@ -374,7 +386,8 @@ void OsmlStreamParser::pushCommand(Variant commandName,
hasRange, false, false, false});
}
-OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
+OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
+ bool isAnnotation)
{
// Parse the commandName as a first identifier
Variant commandName = parseIdentifier(start, true);
@@ -388,6 +401,9 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
Utils::split(commandName.asString(), ':');
const bool isBegin = commandNameComponents[0] == "begin";
const bool isEnd = commandNameComponents[0] == "end";
+
+ // Parse the begin or end command
+ State res = State::COMMAND;
if (isBegin || isEnd) {
if (commandNameComponents.size() > 1) {
logger.error(
@@ -396,30 +412,76 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
commandName);
}
if (isBegin) {
- return parseBeginCommand();
+ res = parseBeginCommand();
} else if (isEnd) {
- return parseEndCommand();
+ res = parseEndCommand();
+ }
+ } else {
+ // Check whether the next character is a '#', indicating the start of
+ // the command name
+ Variant commandArgName;
+ start = reader.getOffset();
+ if (reader.expect('#')) {
+ commandArgName = parseIdentifier(start);
+ if (commandArgName.asString().empty()) {
+ logger.error("Expected identifier after \"#\"", commandArgName);
+ }
}
+
+ // Parse the arugments
+ Variant commandArguments =
+ parseCommandArguments(std::move(commandArgName));
+
+ // Push the command onto the command stack
+ pushCommand(std::move(commandName), std::move(commandArguments), false);
}
- // Check whether the next character is a '#', indicating the start of the
- // command name
- Variant commandArgName;
- start = reader.getOffset();
- if (reader.expect('#')) {
- commandArgName = parseIdentifier(start);
- if (commandArgName.asString().empty()) {
- logger.error("Expected identifier after \"#\"", commandArgName);
+ // Check whether a ">" character is the next character that is to be read.
+ // In that case the current command could be an annotation end command!
+ char c;
+ if (reader.fetch(c) && c == '>') {
+ // Ignore the character after a begin or end command
+ if (isBegin || isEnd) {
+ logger.warning(
+ "Ignoring annotation end character \">\" after special "
+ "commands \"begin\" or \"end\". Write \"\\>\" to end a "
+ "\"begin\"/\"end\" enclosed annotation.",
+ reader);
+ return res;
}
- }
- // Parse the arugments
- Variant commandArguments = parseCommandArguments(std::move(commandArgName));
+ // If this should be an annoation, ignore the character
+ if (isAnnotation) {
+ logger.warning(
+ "Ignoring annotation end character \">\" after annotation "
+ "start command. Write \"\\>\" to end the annotation.",
+ reader);
+ } else {
+ // Make sure no arguments apart from the "name" argument are given
+ // to an annotation end
+ Variant::mapType &map = commands.top().arguments.asMap();
+ if (!map.empty()) {
+ if (map.count("name") == 0 || map.size() > 1U) {
+ logger.error(
+ "An annotation end command may not have any arguments "
+ "other than \"name\"");
+ return res;
+ }
+ }
- // Push the command onto the command stack
- pushCommand(std::move(commandName), std::move(commandArguments), false);
+ // If we got here, this is a valid ANNOTATION_END command, issue it
+ reader.peek(c);
+ reader.consumePeek();
+ return State::ANNOTATION_END;
+ }
+ }
- return State::COMMAND;
+ // If we're starting an annotation, return the command as annotation start
+ // instead of command
+ if (isAnnotation && res == State::COMMAND) {
+ return State::ANNOTATION_START;
+ }
+ return res;
}
void OsmlStreamParser::parseBlockComment()
@@ -522,7 +584,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
const TokenTypeId type = token.type;
// Special handling for Backslash and Text
- if (type == Tokens.Backslash) {
+ if (type == Tokens.Backslash || type == Tokens.AnnotationStart) {
// Before appending anything to the output data or starting a new
// command, check whether FIELD_START has to be issued, as the
// current command is a command with range
@@ -548,7 +610,8 @@ OsmlStreamParser::State OsmlStreamParser::parse()
}
// Parse the actual command
- State res = parseCommand(token.location.getStart());
+ State res = parseCommand(token.location.getStart(),
+ type == Tokens.AnnotationStart);
switch (res) {
case State::ERROR:
throw LoggableException(
@@ -565,6 +628,14 @@ OsmlStreamParser::State OsmlStreamParser::parse()
// to the data buffer, use the escape character start as start
// location and the peek offset as end location
reader.peek(c); // Peek the previously fetched character
+
+ // If this was an annotation start token, add the parsed < to the
+ // output
+ if (type == Tokens.AnnotationStart) {
+ handler.append('<', token.location.getStart(),
+ token.location.getStart() + 1);
+ }
+
handler.append(c, token.location.getStart(),
reader.getPeekOffset());
reader.consumePeek();
@@ -632,6 +703,13 @@ OsmlStreamParser::State OsmlStreamParser::parse()
"which to start the field. Write \"\\{!\" to insert this "
"sequence as text",
token);
+ } else if (token.type == Tokens.AnnotationEnd) {
+ // We got a single annotation end token "\>" -- simply issue the
+ // ANNOTATION_END event
+ Variant annotationName = Variant::fromString("");
+ annotationName.setLocation(token.location);
+ pushCommand(annotationName, Variant::mapType{}, false);
+ return State::ANNOTATION_END;
} else {
logger.error("Unexpected token \"" + token.content + "\"", token);
}
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index bb5db65..3827118 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -161,7 +161,13 @@ public:
/**
* Default constructor.
*/
- Command() : hasRange(false), inField(false), inRangeField(false), inDefaultField() {}
+ Command()
+ : hasRange(false),
+ inField(false),
+ inRangeField(false),
+ inDefaultField()
+ {
+ }
/**
* Constructor of the Command class.
@@ -179,8 +185,8 @@ public:
* @param inDefaultField is set to true if we currently are in a
* specially marked default field.
*/
- Command(Variant name, Variant arguments, bool hasRange, bool inField,
- bool inRangeField, bool inDefaultField)
+ Command(Variant name, Variant arguments, bool hasRange,
+ bool inField, bool inRangeField, bool inDefaultField)
: name(std::move(name)),
arguments(std::move(arguments)),
hasRange(hasRange),
@@ -266,9 +272,11 @@ private:
*
* @param start is the start byte offset of the command (including the
* backslash)
+ * @param isAnnotation if true, the command is not returned as command, but
+ * as annotation start.
* @return true if a command was actuall parsed, false otherwise.
*/
- State parseCommand(size_t start);
+ State parseCommand(size_t start, bool isAnnotation);
/**
* Function used internally to parse a block comment.