From 3899cd3c8fb3eccb73a43208e90d88cfcc64c41c Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Fri, 3 Apr 2015 22:24:35 +0200 Subject: Identifiers may not end with an underscore --- src/core/common/Utils.cpp | 4 ++-- src/core/common/Utils.hpp | 8 ++++++++ src/formats/osml/OsmlStreamParser.cpp | 10 +++++++++- test/core/common/UtilsTest.cpp | 1 + test/formats/osml/OsmlStreamParserTest.cpp | 30 ++++++++++++++++++++++++++++++ 5 files changed, 50 insertions(+), 3 deletions(-) diff --git a/src/core/common/Utils.cpp b/src/core/common/Utils.cpp index a87ff6d..67920c2 100644 --- a/src/core/common/Utils.cpp +++ b/src/core/common/Utils.cpp @@ -36,7 +36,7 @@ bool Utils::isIdentifier(const std::string &name) } first = false; } - return !first; + return !first && isIdentifierEndCharacter(name.back()); } bool Utils::isIdentifierOrEmpty(const std::string &name) @@ -150,4 +150,4 @@ bool Utils::isUserDefinedToken(const std::string &token) } return false; } -} \ No newline at end of file +} diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index d9e26da..c3b49a0 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -65,6 +65,14 @@ public: return isAlphabetic(c); } + /** + * Returns true if the given character is in [A-Za-z0-9]. + */ + static bool isIdentifierEndCharacter(const char c) + { + return isAlphanumeric(c); + } + /** * Returns true if the given character is in [A-Za-z0-9_-]. */ diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp index daf800a..acad57b 100644 --- a/src/formats/osml/OsmlStreamParser.cpp +++ b/src/formats/osml/OsmlStreamParser.cpp @@ -441,7 +441,15 @@ Variant OsmlStreamParserImpl::parseIdentifier(size_t start, bool allowNSSep) // Abort if this character is not a valid identifer character if ((first && Utils::isIdentifierStartCharacter(c)) || (!first && Utils::isIdentifierCharacter(c))) { - identifier.push_back(c); + if (Utils::isIdentifierEndCharacter(c) || + (reader.fetchPeek(c2) && Utils::isIdentifierCharacter(c2))) { + identifier.push_back(c); + } else { + // Break if a non-identifier-end character is reached and the + // next character is a non-identifer character + reader.resetPeek(); + break; + } } else if (c == ':' && hasCharSinceNSSep && reader.fetchPeek(c2) && Utils::isIdentifierStartCharacter(c2)) { identifier.push_back(c); diff --git a/test/core/common/UtilsTest.cpp b/test/core/common/UtilsTest.cpp index 2aaa430..f1a9af3 100644 --- a/test/core/common/UtilsTest.cpp +++ b/test/core/common/UtilsTest.cpp @@ -31,6 +31,7 @@ TEST(Utils, isIdentifier) EXPECT_FALSE(Utils::isIdentifier("0t-_EST")); EXPECT_FALSE(Utils::isIdentifier("_A")); EXPECT_FALSE(Utils::isIdentifier("invalid key")); + EXPECT_FALSE(Utils::isIdentifier("A_")); EXPECT_FALSE(Utils::isIdentifier("")); } diff --git a/test/formats/osml/OsmlStreamParserTest.cpp b/test/formats/osml/OsmlStreamParserTest.cpp index d47f529..d45a799 100644 --- a/test/formats/osml/OsmlStreamParserTest.cpp +++ b/test/formats/osml/OsmlStreamParserTest.cpp @@ -1435,5 +1435,35 @@ TEST(OsmlStreamParser, userDefinedTokens) assertText(reader, " said.", tokens, WhitespaceMode::PRESERVE, 34, 40); assertEnd(reader); } + +TEST(OsmlStreamParser, commandWithUnderscoreAndEnd) +{ + const char *testString = "\\sum_"; + // 01234 + // 0 + + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); + + assertCommandStart(parser, "sum", false, Variant::mapType{}, 0, 4); + assertData(parser, "_", 4, 5); + assertEnd(parser); +} + +TEST(OsmlStreamParser, commandWithUnderscore) +{ + const char *testString = "\\sum_ a"; + // 0123456 + // 0 + + CharReader charReader(testString); + + OsmlStreamParser parser(charReader, logger); + + assertCommandStart(parser, "sum", false, Variant::mapType{}, 0, 4); + assertData(parser, "_ a", 4, 7); + assertEnd(parser); +} } -- cgit v1.2.3