summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-12 16:21:36 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-15 00:06:07 +0100
commit205810b44c980998958dcd857c2cb34a914dc760 (patch)
tree47f2a5a1b5415442773b8a8255a46f26f33ca4c4
parent856fa8298d55c07313d9638d9f8b8c0913202b2c (diff)
Implemented annotation start and end field
-rw-r--r--contrib/test.osml (renamed from contrib/test.osdm)0
-rw-r--r--src/formats/osml/OsmlStreamParser.cpp116
-rw-r--r--src/formats/osml/OsmlStreamParser.hpp16
-rw-r--r--test/formats/osml/OsmlStreamParserTest.cpp228
4 files changed, 334 insertions, 26 deletions
diff --git a/contrib/test.osdm b/contrib/test.osml
index 100bc77..100bc77 100644
--- a/contrib/test.osdm
+++ b/contrib/test.osml
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 6606120..0174fa4 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -66,6 +66,16 @@ public:
TokenTypeId DefaultFieldStart;
/**
+ * Id of the annotation start token.
+ */
+ TokenTypeId AnnotationStart;
+
+ /**
+ * Id of the annotation end token.
+ */
+ TokenTypeId AnnotationEnd;
+
+ /**
* Registers the plain format tokens in the internal tokenizer.
*/
PlainFormatTokens()
@@ -77,6 +87,8 @@ public:
FieldStart = registerToken("{");
FieldEnd = registerToken("}");
DefaultFieldStart = registerToken("{!");
+ AnnotationStart = registerToken("<\\");
+ AnnotationEnd = registerToken("\\>");
}
};
@@ -374,7 +386,8 @@ void OsmlStreamParser::pushCommand(Variant commandName,
hasRange, false, false, false});
}
-OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
+OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start,
+ bool isAnnotation)
{
// Parse the commandName as a first identifier
Variant commandName = parseIdentifier(start, true);
@@ -388,6 +401,9 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
Utils::split(commandName.asString(), ':');
const bool isBegin = commandNameComponents[0] == "begin";
const bool isEnd = commandNameComponents[0] == "end";
+
+ // Parse the begin or end command
+ State res = State::COMMAND;
if (isBegin || isEnd) {
if (commandNameComponents.size() > 1) {
logger.error(
@@ -396,30 +412,76 @@ OsmlStreamParser::State OsmlStreamParser::parseCommand(size_t start)
commandName);
}
if (isBegin) {
- return parseBeginCommand();
+ res = parseBeginCommand();
} else if (isEnd) {
- return parseEndCommand();
+ res = parseEndCommand();
+ }
+ } else {
+ // Check whether the next character is a '#', indicating the start of
+ // the command name
+ Variant commandArgName;
+ start = reader.getOffset();
+ if (reader.expect('#')) {
+ commandArgName = parseIdentifier(start);
+ if (commandArgName.asString().empty()) {
+ logger.error("Expected identifier after \"#\"", commandArgName);
+ }
}
+
+ // Parse the arugments
+ Variant commandArguments =
+ parseCommandArguments(std::move(commandArgName));
+
+ // Push the command onto the command stack
+ pushCommand(std::move(commandName), std::move(commandArguments), false);
}
- // Check whether the next character is a '#', indicating the start of the
- // command name
- Variant commandArgName;
- start = reader.getOffset();
- if (reader.expect('#')) {
- commandArgName = parseIdentifier(start);
- if (commandArgName.asString().empty()) {
- logger.error("Expected identifier after \"#\"", commandArgName);
+ // Check whether a ">" character is the next character that is to be read.
+ // In that case the current command could be an annotation end command!
+ char c;
+ if (reader.fetch(c) && c == '>') {
+ // Ignore the character after a begin or end command
+ if (isBegin || isEnd) {
+ logger.warning(
+ "Ignoring annotation end character \">\" after special "
+ "commands \"begin\" or \"end\". Write \"\\>\" to end a "
+ "\"begin\"/\"end\" enclosed annotation.",
+ reader);
+ return res;
}
- }
- // Parse the arugments
- Variant commandArguments = parseCommandArguments(std::move(commandArgName));
+ // If this should be an annoation, ignore the character
+ if (isAnnotation) {
+ logger.warning(
+ "Ignoring annotation end character \">\" after annotation "
+ "start command. Write \"\\>\" to end the annotation.",
+ reader);
+ } else {
+ // Make sure no arguments apart from the "name" argument are given
+ // to an annotation end
+ Variant::mapType &map = commands.top().arguments.asMap();
+ if (!map.empty()) {
+ if (map.count("name") == 0 || map.size() > 1U) {
+ logger.error(
+ "An annotation end command may not have any arguments "
+ "other than \"name\"");
+ return res;
+ }
+ }
- // Push the command onto the command stack
- pushCommand(std::move(commandName), std::move(commandArguments), false);
+ // If we got here, this is a valid ANNOTATION_END command, issue it
+ reader.peek(c);
+ reader.consumePeek();
+ return State::ANNOTATION_END;
+ }
+ }
- return State::COMMAND;
+ // If we're starting an annotation, return the command as annotation start
+ // instead of command
+ if (isAnnotation && res == State::COMMAND) {
+ return State::ANNOTATION_START;
+ }
+ return res;
}
void OsmlStreamParser::parseBlockComment()
@@ -522,7 +584,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
const TokenTypeId type = token.type;
// Special handling for Backslash and Text
- if (type == Tokens.Backslash) {
+ if (type == Tokens.Backslash || type == Tokens.AnnotationStart) {
// Before appending anything to the output data or starting a new
// command, check whether FIELD_START has to be issued, as the
// current command is a command with range
@@ -548,7 +610,8 @@ OsmlStreamParser::State OsmlStreamParser::parse()
}
// Parse the actual command
- State res = parseCommand(token.location.getStart());
+ State res = parseCommand(token.location.getStart(),
+ type == Tokens.AnnotationStart);
switch (res) {
case State::ERROR:
throw LoggableException(
@@ -565,6 +628,14 @@ OsmlStreamParser::State OsmlStreamParser::parse()
// to the data buffer, use the escape character start as start
// location and the peek offset as end location
reader.peek(c); // Peek the previously fetched character
+
+ // If this was an annotation start token, add the parsed < to the
+ // output
+ if (type == Tokens.AnnotationStart) {
+ handler.append('<', token.location.getStart(),
+ token.location.getStart() + 1);
+ }
+
handler.append(c, token.location.getStart(),
reader.getPeekOffset());
reader.consumePeek();
@@ -632,6 +703,13 @@ OsmlStreamParser::State OsmlStreamParser::parse()
"which to start the field. Write \"\\{!\" to insert this "
"sequence as text",
token);
+ } else if (token.type == Tokens.AnnotationEnd) {
+ // We got a single annotation end token "\>" -- simply issue the
+ // ANNOTATION_END event
+ Variant annotationName = Variant::fromString("");
+ annotationName.setLocation(token.location);
+ pushCommand(annotationName, Variant::mapType{}, false);
+ return State::ANNOTATION_END;
} else {
logger.error("Unexpected token \"" + token.content + "\"", token);
}
diff --git a/src/formats/osml/OsmlStreamParser.hpp b/src/formats/osml/OsmlStreamParser.hpp
index bb5db65..3827118 100644
--- a/src/formats/osml/OsmlStreamParser.hpp
+++ b/src/formats/osml/OsmlStreamParser.hpp
@@ -161,7 +161,13 @@ public:
/**
* Default constructor.
*/
- Command() : hasRange(false), inField(false), inRangeField(false), inDefaultField() {}
+ Command()
+ : hasRange(false),
+ inField(false),
+ inRangeField(false),
+ inDefaultField()
+ {
+ }
/**
* Constructor of the Command class.
@@ -179,8 +185,8 @@ public:
* @param inDefaultField is set to true if we currently are in a
* specially marked default field.
*/
- Command(Variant name, Variant arguments, bool hasRange, bool inField,
- bool inRangeField, bool inDefaultField)
+ Command(Variant name, Variant arguments, bool hasRange,
+ bool inField, bool inRangeField, bool inDefaultField)
: name(std::move(name)),
arguments(std::move(arguments)),
hasRange(hasRange),
@@ -266,9 +272,11 @@ private:
*
* @param start is the start byte offset of the command (including the
* backslash)
+ * @param isAnnotation if true, the command is not returned as command, but
+ * as annotation start.
* @return true if a command was actuall parsed, false otherwise.
*/
- State parseCommand(size_t start);
+ State parseCommand(size_t start, bool isAnnotation);
/**
* Function used internally to parse a block comment.
diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp
index 5f23822..d52fa5b 100644
--- a/test/formats/osml/OsmlStreamParserTest.cpp
+++ b/test/formats/osml/OsmlStreamParserTest.cpp
@@ -98,6 +98,56 @@ static void assertFieldEnd(OsmlStreamParser &reader,
}
}
+static void assertAnnotationStart(OsmlStreamParser &reader,
+ const std::string &name,
+ SourceOffset start = InvalidSourceOffset,
+ SourceOffset end = InvalidSourceOffset)
+{
+ ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_START, reader.parse());
+ EXPECT_EQ(name, reader.getCommandName().asString());
+ if (start != InvalidSourceOffset) {
+ EXPECT_EQ(start, reader.getCommandName().getLocation().getStart());
+ EXPECT_EQ(start, reader.getLocation().getStart());
+ }
+ if (end != InvalidSourceOffset) {
+ EXPECT_EQ(end, reader.getCommandName().getLocation().getEnd());
+ EXPECT_EQ(end, reader.getLocation().getEnd());
+ }
+}
+
+static void assertAnnotationStart(OsmlStreamParser &reader,
+ const std::string &name,
+ const Variant::mapType &args,
+ SourceOffset start = InvalidSourceOffset,
+ SourceOffset end = InvalidSourceOffset)
+{
+ assertAnnotationStart(reader, name, start, end);
+ EXPECT_EQ(args, reader.getCommandArguments());
+}
+
+static void assertAnnotationEnd(OsmlStreamParser &reader,
+ const std::string &name,
+ const std::string &elementName,
+ SourceOffset start = InvalidSourceOffset,
+ SourceOffset end = InvalidSourceOffset)
+{
+ ASSERT_EQ(OsmlStreamParser::State::ANNOTATION_END, reader.parse());
+ ASSERT_EQ(name, reader.getCommandName().asString());
+ if (!elementName.empty()) {
+ ASSERT_EQ(1U, reader.getCommandArguments().asMap().size());
+ ASSERT_EQ(1U, reader.getCommandArguments().asMap().count("name"));
+
+ auto it = reader.getCommandArguments().asMap().find("name");
+ ASSERT_EQ(elementName, it->second.asString());
+ }
+ if (start != InvalidSourceOffset) {
+ EXPECT_EQ(start, reader.getLocation().getStart());
+ }
+ if (end != InvalidSourceOffset) {
+ EXPECT_EQ(end, reader.getLocation().getEnd());
+ }
+}
+
static void assertEnd(OsmlStreamParser &reader,
SourceOffset start = InvalidSourceOffset,
SourceOffset end = InvalidSourceOffset)
@@ -184,9 +234,6 @@ TEST(OsmlStreamParser, escapeSpecialCharacters)
testEscapeSpecialCharacter("\\");
testEscapeSpecialCharacter("{");
testEscapeSpecialCharacter("}");
- testEscapeSpecialCharacter("<");
- testEscapeSpecialCharacter(">");
- testEscapeSpecialCharacter("|");
}
TEST(OsmlStreamParser, simpleSingleLineComment)
@@ -1035,5 +1082,180 @@ TEST(OsmlStreamParser, errorFieldAfterExplicitDefaultField)
assertEnd(reader, 10, 10);
}
+TEST(OsmlStreamParser, annotationStart)
+{
+ const char *testString = "<\\a";
+ // 0 12
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
+ assertEnd(reader, 3, 3);
+}
+
+TEST(OsmlStreamParser, annotationStartWithName)
+{
+ const char *testString = "<\\annotationWithName#aName";
+ // 0 1234567890123456789012345
+ // 0 1 2
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertAnnotationStart(reader, "annotationWithName",
+ Variant::mapType{{"name", "aName"}}, 0, 20);
+ assertEnd(reader, 26, 26);
+}
+
+TEST(OsmlStreamParser, annotationStartWithArguments)
+{
+ const char *testString = "<\\annotationWithName#aName[a=1,b=2]";
+ // 0 1234567890123456789012345678901234
+ // 0 1 2 3
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertAnnotationStart(
+ reader, "annotationWithName",
+ Variant::mapType{{"name", "aName"}, {"a", 1}, {"b", 2}}, 0, 20);
+ assertEnd(reader, 35, 35);
+}
+
+TEST(OsmlStreamParser, simpleAnnotationStartBeginEnd)
+{
+ const char *testString = "<\\begin{ab#name}[a=1,b=2] a \\end{ab}\\>";
+ // 0 123456789012345678901234567 89012345 67
+ // 0 1 2 3
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertAnnotationStart(
+ reader, "ab", Variant::mapType{{"name", "name"}, {"a", 1}, {"b", 2}}, 8,
+ 10);
+ assertFieldStart(reader, true, 26, 27);
+ assertData(reader, "a", 26, 27);
+ assertFieldEnd(reader, 33, 35);
+ assertAnnotationEnd(reader, "", "", 36, 38);
+ assertEnd(reader, 38, 38);
+}
+
+TEST(OsmlStreamParser, annotationEnd)
+{
+ const char *testString = "\\a>";
+ // 012
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertAnnotationEnd(reader, "a", "", 0, 2);
+ assertEnd(reader, 3, 3);
+}
+
+TEST(OsmlStreamParser, annotationEndWithName)
+{
+ const char *testString = "\\a#name>";
+ // 01234567
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertAnnotationEnd(reader, "a", "name", 0, 2);
+ assertEnd(reader, 8, 8);
+}
+
+TEST(OsmlStreamParser, annotationEndWithNameAsArgs)
+{
+ const char *testString = "\\a[name=name]>";
+ // 01234567890123
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertAnnotationEnd(reader, "a", "name", 0, 2);
+ assertEnd(reader, 14, 14);
+}
+
+TEST(OsmlStreamParser, errorAnnotationEndWithArguments)
+{
+ const char *testString = "\\a[foo=bar]>";
+ // 012345678901
+ // 0 1
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ logger.reset();
+ ASSERT_FALSE(logger.hasError());
+ assertCommand(reader, "a", Variant::mapType{{"foo", "bar"}}, 0, 2);
+ ASSERT_TRUE(logger.hasError());
+ assertData(reader, ">", 11, 12);
+ assertEnd(reader, 12, 12);
+}
+
+TEST(OsmlStreamParser, closingAnnotation)
+{
+ const char *testString = "<\\a>";
+ // 0 123
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertAnnotationStart(reader, "a", Variant::mapType{}, 0, 3);
+ assertData(reader, ">", 3, 4);
+ assertEnd(reader, 4, 4);
+}
+
+TEST(OsmlStreamParser, annotationWithFields)
+{
+ const char *testString = "a <\\b{c}{d}{!e} f \\> g";
+ // 012 345678901234567 8901
+ // 0 1 2
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertData(reader, "a", 0, 1);
+ assertAnnotationStart(reader, "b", Variant::mapType{}, 2, 5);
+ assertFieldStart(reader, false, 5, 6);
+ assertData(reader, "c", 6, 7);
+ assertFieldEnd(reader, 7, 8);
+ assertFieldStart(reader, false, 8, 9);
+ assertData(reader, "d", 9, 10);
+ assertFieldEnd(reader, 10, 11);
+ assertFieldStart(reader, true, 11, 13);
+ assertData(reader, "e", 13, 14);
+ assertFieldEnd(reader, 14, 15);
+ assertData(reader, "f", 16, 17);
+ assertAnnotationEnd(reader, "", "", 18, 20);
+ assertData(reader, "g", 21, 22);
+ assertEnd(reader, 22, 22);
+}
+
+TEST(OsmlStreamParser, annotationStartEscape)
+{
+ const char *testString = "<\\%test";
+ // 0 123456
+ // 0
+
+ CharReader charReader(testString);
+
+ OsmlStreamParser reader(charReader, logger);
+
+ assertData(reader, "<%test", 0, 7);
+ assertEnd(reader, 7, 7);
+}
}