summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/parser/utils/TokenTrie.cpp20
-rw-r--r--src/core/parser/utils/TokenTrie.hpp23
-rw-r--r--src/core/parser/utils/Tokenizer.cpp38
-rw-r--r--src/core/parser/utils/Tokenizer.hpp73
-rw-r--r--src/formats/osml/OsmlStreamParser.cpp50
-rw-r--r--test/core/parser/utils/TokenTrieTest.cpp24
-rw-r--r--test/core/parser/utils/TokenizerTest.cpp80
7 files changed, 121 insertions, 187 deletions
diff --git a/src/core/parser/utils/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp
index 4a0430b..80cc945 100644
--- a/src/core/parser/utils/TokenTrie.cpp
+++ b/src/core/parser/utils/TokenTrie.cpp
@@ -22,12 +22,12 @@ namespace ousia {
/* Class DynamicTokenTree::Node */
-TokenTrie::Node::Node() : type(EmptyToken) {}
+TokenTrie::Node::Node() : type(Tokens::Empty) {}
/* Class DynamicTokenTree */
bool TokenTrie::registerToken(const std::string &token,
- TokenTypeId type) noexcept
+ TokenId type) noexcept
{
// Abort if the token is empty -- this would taint the root node
if (token.empty()) {
@@ -48,7 +48,7 @@ bool TokenTrie::registerToken(const std::string &token,
}
// If the resulting node already has a type set, we're screwed.
- if (node->type != EmptyToken) {
+ if (node->type != Tokens::Empty) {
return false;
}
@@ -78,22 +78,22 @@ bool TokenTrie::unregisterToken(const std::string &token) noexcept
// Reset the subtree handler if this node has another type
node = it->second.get();
- if ((node->type != EmptyToken || node->children.size() > 1) &&
+ if ((node->type != Tokens::Empty || node->children.size() > 1) &&
(i + 1 != token.size())) {
subtreeRoot = node;
subtreeKey = token[i + 1];
}
}
- // If the node type is already EmptyToken, we cannot do anything here
- if (node->type == EmptyToken) {
+ // If the node type is already Tokens::Empty, we cannot do anything here
+ if (node->type == Tokens::Empty) {
return false;
}
// If the target node has children, we cannot delete the subtree. Set the
- // type to EmptyToken instead
+ // type to Tokens::Empty instead
if (!node->children.empty()) {
- node->type = EmptyToken;
+ node->type = Tokens::Empty;
return true;
}
@@ -102,14 +102,14 @@ bool TokenTrie::unregisterToken(const std::string &token) noexcept
return true;
}
-TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept
+TokenId TokenTrie::hasToken(const std::string &token) const noexcept
{
Node const *node = &root;
for (size_t i = 0; i < token.size(); i++) {
const char c = token[i];
auto it = node->children.find(c);
if (it == node->children.end()) {
- return EmptyToken;
+ return Tokens::Empty;
}
node = it->second.get();
}
diff --git a/src/core/parser/utils/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp
index 36c2ffa..b2d1539 100644
--- a/src/core/parser/utils/TokenTrie.hpp
+++ b/src/core/parser/utils/TokenTrie.hpp
@@ -33,22 +33,9 @@
#include <limits>
#include <unordered_map>
-namespace ousia {
-
-/**
- * The TokenTypeId is used to give each token type a unique id.
- */
-using TokenTypeId = uint32_t;
-
-/**
- * Token which is not a token.
- */
-constexpr TokenTypeId EmptyToken = std::numeric_limits<TokenTypeId>::max();
+#include "Token.hpp"
-/**
- * Token which represents a text token.
- */
-constexpr TokenTypeId TextToken = std::numeric_limits<TokenTypeId>::max() - 1;
+namespace ousia {
/**
* The Tokenizer internally uses a TokenTrie to be efficiently able to identify
@@ -91,7 +78,7 @@ public:
* Reference at the corresponding token descriptor. Set to nullptr if
* no token is attached to this node.
*/
- TokenTypeId type;
+ TokenId type;
/**
* Default constructor, initializes the descriptor with nullptr.
@@ -115,7 +102,7 @@ public:
* @param type is the descriptor that should be set for this token.
* @return true if the operation is successful, false otherwise.
*/
- bool registerToken(const std::string &token, TokenTypeId type) noexcept;
+ bool registerToken(const std::string &token, TokenId type) noexcept;
/**
* Unregisters the token from the token tree. Returns true if the token was
@@ -134,7 +121,7 @@ public:
* @return the attached token descriptor or nullptr if the given token is
* not found.
*/
- TokenTypeId hasToken(const std::string &token) const noexcept;
+ TokenId hasToken(const std::string &token) const noexcept;
/**
* Returns a reference at the root node to be used for traversing the token
diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp
index 3c8177d..2e0ac13 100644
--- a/src/core/parser/utils/Tokenizer.cpp
+++ b/src/core/parser/utils/Tokenizer.cpp
@@ -61,7 +61,7 @@ struct TokenMatch {
/**
* Returns true if this TokenMatch instance actually represents a match.
*/
- bool hasMatch() { return token.type != EmptyToken; }
+ bool hasMatch() { return token.id != Tokens::Empty; }
};
/* Internal class TokenLookup */
@@ -138,7 +138,7 @@ public:
// Check whether the new node represents a complete token a whether it
// is longer than the current token. If yes, replace the current token.
node = it->second.get();
- if (node->type != EmptyToken) {
+ if (node->type != Tokens::Empty) {
const std::string &str = tokens[node->type];
size_t len = str.size();
if (len > match.token.content.size()) {
@@ -157,14 +157,14 @@ public:
};
/**
- * Transforms the given token into a text token containing the extracted
+ * Transforms the given token into a data token containing the extracted
* text.
*
* @param handler is the WhitespaceHandler containing the collected data.
* @param token is the output token to which the text should be written.
* @param sourceId is the source id of the underlying file.
*/
-static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match,
+static void buildDataToken(const WhitespaceHandler &handler, TokenMatch &match,
SourceId sourceId)
{
if (match.hasMatch()) {
@@ -177,14 +177,14 @@ static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match,
match.token.location =
SourceLocation{sourceId, handler.textStart, handler.textEnd};
}
- match.token.type = TextToken;
+ match.token.id = Tokens::Data;
}
}
/* Class Tokenizer */
Tokenizer::Tokenizer(WhitespaceMode whitespaceMode)
- : whitespaceMode(whitespaceMode), nextTokenTypeId(0)
+ : whitespaceMode(whitespaceMode), nextTokenId(0)
{
}
@@ -248,7 +248,7 @@ bool Tokenizer::next(CharReader &reader, Token &token)
// If we found text, emit that text
if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) {
- buildTextToken(textHandler, match, sourceId);
+ buildDataToken(textHandler, match, sourceId);
}
// Move the read/peek cursor to the end of the token, abort if an error
@@ -299,16 +299,16 @@ bool Tokenizer::peek(CharReader &reader, Token &token)
return false;
}
-TokenTypeId Tokenizer::registerToken(const std::string &token)
+TokenId Tokenizer::registerToken(const std::string &token)
{
// Abort if an empty token should be registered
if (token.empty()) {
- return EmptyToken;
+ return Tokens::Empty;
}
// Search for a new slot in the tokens list
- TokenTypeId type = EmptyToken;
- for (size_t i = nextTokenTypeId; i < tokens.size(); i++) {
+ TokenId type = Tokens::Empty;
+ for (size_t i = nextTokenId; i < tokens.size(); i++) {
if (tokens[i].empty()) {
tokens[i] = token;
type = i;
@@ -318,37 +318,37 @@ TokenTypeId Tokenizer::registerToken(const std::string &token)
// No existing slot was found, add a new one -- make sure we do not
// override the special token type handles
- if (type == EmptyToken) {
+ if (type == Tokens::Empty) {
type = tokens.size();
- if (type == TextToken || type == EmptyToken) {
+ if (type == Tokens::Data || type == Tokens::Empty) {
throw OusiaException{"Token type ids depleted!"};
}
tokens.emplace_back(token);
}
- nextTokenTypeId = type + 1;
+ nextTokenId = type + 1;
// Try to register the token in the trie -- if this fails, remove it
// from the tokens list
if (!trie.registerToken(token, type)) {
tokens[type] = std::string{};
- nextTokenTypeId = type;
- return EmptyToken;
+ nextTokenId = type;
+ return Tokens::Empty;
}
return type;
}
-bool Tokenizer::unregisterToken(TokenTypeId type)
+bool Tokenizer::unregisterToken(TokenId type)
{
// Unregister the token from the trie, abort if an invalid type is given
if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
tokens[type] = std::string{};
- nextTokenTypeId = type;
+ nextTokenId = type;
return true;
}
return false;
}
-std::string Tokenizer::getTokenString(TokenTypeId type)
+std::string Tokenizer::getTokenString(TokenId type)
{
if (type < tokens.size()) {
return tokens[type];
diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp
index 6b4e116..f21c6a3 100644
--- a/src/core/parser/utils/Tokenizer.hpp
+++ b/src/core/parser/utils/Tokenizer.hpp
@@ -35,6 +35,7 @@
#include <core/common/Location.hpp>
#include <core/common/Whitespace.hpp>
+#include "Token.hpp"
#include "TokenTrie.hpp"
namespace ousia {
@@ -43,60 +44,6 @@ namespace ousia {
class CharReader;
/**
- * The Token structure describes a token discovered by the Tokenizer.
- */
-struct Token {
- /**
- * Id of the type of this token.
- */
- TokenTypeId type;
-
- /**
- * String that was matched.
- */
- std::string content;
-
- /**
- * Location from which the string was extracted.
- */
- SourceLocation location;
-
- /**
- * Default constructor.
- */
- Token() : type(EmptyToken) {}
-
- /**
- * Constructor of the Token struct.
- *
- * @param id represents the token type.
- * @param content is the string content that has been extracted.
- * @param location is the location of the extracted string content in the
- * source file.
- */
- Token(TokenTypeId type, const std::string &content,
- SourceLocation location)
- : type(type), content(content), location(location)
- {
- }
-
- /**
- * Constructor of the Token struct, only initializes the token type
- *
- * @param type is the id corresponding to the type of the token.
- */
- Token(TokenTypeId type) : type(type) {}
-
- /**
- * The getLocation function allows the tokens to be directly passed as
- * parameter to Logger or LoggableException instances.
- *
- * @return a reference at the location field
- */
- const SourceLocation &getLocation() const { return location; }
-};
-
-/**
* The Tokenizer is used to extract tokens and chunks of text from a
* CharReader. It allows to register and unregister tokens while parsing and
* to modify the handling of whitespace characters. Note that the
@@ -123,7 +70,7 @@ private:
/**
* Next index in the tokens list where to search for a new token id.
*/
- size_t nextTokenTypeId;
+ size_t nextTokenId;
/**
* Templated function used internally to read the current token. The
@@ -158,31 +105,31 @@ public:
* @return a unique identifier for the registered token or EmptyToken if
* an error occured.
*/
- TokenTypeId registerToken(const std::string &token);
+ TokenId registerToken(const std::string &token);
/**
- * Unregisters the token belonging to the given TokenTypeId.
+ * Unregisters the token belonging to the given TokenId.
*
* @param type is the token type that should be unregistered. The
- *TokenTypeId
+ *TokenId
* must have been returned by registerToken.
* @return true if the operation was successful, false otherwise (e.g.
* because the given TokenDescriptor was already unregistered).
*/
- bool unregisterToken(TokenTypeId type);
+ bool unregisterToken(TokenId type);
/**
- * Returns the token that was registered under the given TokenTypeId id or
+ * Returns the token that was registered under the given TokenId id or
*an
- * empty string if an invalid TokenTypeId id is given.
+ * empty string if an invalid TokenId id is given.
*
- * @param type is the TokenTypeId id for which the corresponding token
+ * @param type is the TokenId id for which the corresponding token
*string
* should be returned.
* @return the registered token string or an empty string if the given type
* was invalid.
*/
- std::string getTokenString(TokenTypeId type);
+ std::string getTokenString(TokenId type);
/**
* Sets the whitespace mode.
diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp
index 0174fa4..f61ac7d 100644
--- a/src/formats/osml/OsmlStreamParser.cpp
+++ b/src/formats/osml/OsmlStreamParser.cpp
@@ -33,47 +33,47 @@ public:
/**
* Id of the backslash token.
*/
- TokenTypeId Backslash;
+ TokenId Backslash;
/**
* Id of the line comment token.
*/
- TokenTypeId LineComment;
+ TokenId LineComment;
/**
* Id of the block comment start token.
*/
- TokenTypeId BlockCommentStart;
+ TokenId BlockCommentStart;
/**
* Id of the block comment end token.
*/
- TokenTypeId BlockCommentEnd;
+ TokenId BlockCommentEnd;
/**
* Id of the field start token.
*/
- TokenTypeId FieldStart;
+ TokenId FieldStart;
/**
* Id of the field end token.
*/
- TokenTypeId FieldEnd;
+ TokenId FieldEnd;
/**
* Id of the default field start token.
*/
- TokenTypeId DefaultFieldStart;
+ TokenId DefaultFieldStart;
/**
* Id of the annotation start token.
*/
- TokenTypeId AnnotationStart;
+ TokenId AnnotationStart;
/**
* Id of the annotation end token.
*/
- TokenTypeId AnnotationEnd;
+ TokenId AnnotationEnd;
/**
* Registers the plain format tokens in the internal tokenizer.
@@ -92,7 +92,7 @@ public:
}
};
-static const PlainFormatTokens Tokens;
+static const PlainFormatTokens OsmlTokens;
/**
* Class used internally to collect data issued via "DATA" event.
@@ -179,7 +179,7 @@ public:
};
OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger)
- : reader(reader), logger(logger), tokenizer(Tokens)
+ : reader(reader), logger(logger), tokenizer(OsmlTokens)
{
// Place an intial command representing the complete file on the stack
commands.push(Command{"", Variant::mapType{}, true, true, true, false});
@@ -489,13 +489,13 @@ void OsmlStreamParser::parseBlockComment()
Token token;
size_t depth = 1;
while (tokenizer.read(reader, token)) {
- if (token.type == Tokens.BlockCommentEnd) {
+ if (token.id == OsmlTokens.BlockCommentEnd) {
depth--;
if (depth == 0) {
return;
}
}
- if (token.type == Tokens.BlockCommentStart) {
+ if (token.id == OsmlTokens.BlockCommentStart) {
depth++;
}
}
@@ -581,10 +581,11 @@ OsmlStreamParser::State OsmlStreamParser::parse()
// Read tokens until the outer loop should be left
Token token;
while (tokenizer.peek(reader, token)) {
- const TokenTypeId type = token.type;
+ const TokenId type = token.id;
// Special handling for Backslash and Text
- if (type == Tokens.Backslash || type == Tokens.AnnotationStart) {
+ if (type == OsmlTokens.Backslash ||
+ type == OsmlTokens.AnnotationStart) {
// Before appending anything to the output data or starting a new
// command, check whether FIELD_START has to be issued, as the
// current command is a command with range
@@ -611,7 +612,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
// Parse the actual command
State res = parseCommand(token.location.getStart(),
- type == Tokens.AnnotationStart);
+ type == OsmlTokens.AnnotationStart);
switch (res) {
case State::ERROR:
throw LoggableException(
@@ -631,7 +632,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
// If this was an annotation start token, add the parsed < to the
// output
- if (type == Tokens.AnnotationStart) {
+ if (type == OsmlTokens.AnnotationStart) {
handler.append('<', token.location.getStart(),
token.location.getStart() + 1);
}
@@ -640,7 +641,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
reader.getPeekOffset());
reader.consumePeek();
continue;
- } else if (type == TextToken) {
+ } else if (type == Tokens::Data) {
// Check whether FIELD_START has to be issued before appending text
if (checkIssueFieldStart()) {
location = token.location;
@@ -667,11 +668,11 @@ OsmlStreamParser::State OsmlStreamParser::parse()
// Update the location to the current token location
location = token.location;
- if (token.type == Tokens.LineComment) {
+ if (token.id == OsmlTokens.LineComment) {
parseLineComment();
- } else if (token.type == Tokens.BlockCommentStart) {
+ } else if (token.id == OsmlTokens.BlockCommentStart) {
parseBlockComment();
- } else if (token.type == Tokens.FieldStart) {
+ } else if (token.id == OsmlTokens.FieldStart) {
Command &cmd = commands.top();
if (!cmd.inField) {
cmd.inField = true;
@@ -682,7 +683,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
"start the field. Write \"\\{\" to insert this sequence as "
"text.",
token);
- } else if (token.type == Tokens.FieldEnd) {
+ } else if (token.id == OsmlTokens.FieldEnd) {
if (closeField()) {
return State::FIELD_END;
}
@@ -690,7 +691,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
"Got field end token \"}\", but there is no field to end. "
"Write \"\\}\" to insert this sequence as text.",
token);
- } else if (token.type == Tokens.DefaultFieldStart) {
+ } else if (token.id == OsmlTokens.DefaultFieldStart) {
// Try to start a default field the first time the token is reached
Command &topCmd = commands.top();
if (!topCmd.inField) {
@@ -703,7 +704,7 @@ OsmlStreamParser::State OsmlStreamParser::parse()
"which to start the field. Write \"\\{!\" to insert this "
"sequence as text",
token);
- } else if (token.type == Tokens.AnnotationEnd) {
+ } else if (token.id == OsmlTokens.AnnotationEnd) {
// We got a single annotation end token "\>" -- simply issue the
// ANNOTATION_END event
Variant annotationName = Variant::fromString("");
@@ -751,4 +752,3 @@ bool OsmlStreamParser::inDefaultField() const
return commands.top().inRangeField || commands.top().inDefaultField;
}
}
-
diff --git a/test/core/parser/utils/TokenTrieTest.cpp b/test/core/parser/utils/TokenTrieTest.cpp
index 087e6e6..d9d5164 100644
--- a/test/core/parser/utils/TokenTrieTest.cpp
+++ b/test/core/parser/utils/TokenTrieTest.cpp
@@ -22,10 +22,10 @@
namespace ousia {
-static const TokenTypeId t1 = 0;
-static const TokenTypeId t2 = 1;
-static const TokenTypeId t3 = 2;
-static const TokenTypeId t4 = 3;
+static const TokenId t1 = 0;
+static const TokenId t2 = 1;
+static const TokenId t3 = 2;
+static const TokenId t4 = 3;
TEST(TokenTrie, registerToken)
{
@@ -46,8 +46,8 @@ TEST(TokenTrie, registerToken)
ASSERT_EQ(t2, tree.hasToken("ab"));
ASSERT_EQ(t3, tree.hasToken("b"));
ASSERT_EQ(t4, tree.hasToken("hello"));
- ASSERT_EQ(EmptyToken, tree.hasToken(""));
- ASSERT_EQ(EmptyToken, tree.hasToken("abc"));
+ ASSERT_EQ(Tokens::Empty, tree.hasToken(""));
+ ASSERT_EQ(Tokens::Empty, tree.hasToken("abc"));
}
TEST(TokenTrie, unregisterToken)
@@ -70,23 +70,23 @@ TEST(TokenTrie, unregisterToken)
ASSERT_TRUE(tree.unregisterToken("a"));
ASSERT_FALSE(tree.unregisterToken("a"));
- ASSERT_EQ(EmptyToken, tree.hasToken("a"));
+ ASSERT_EQ(Tokens::Empty, tree.hasToken("a"));
ASSERT_EQ(t2, tree.hasToken("ab"));
ASSERT_EQ(t3, tree.hasToken("b"));
ASSERT_TRUE(tree.unregisterToken("b"));
ASSERT_FALSE(tree.unregisterToken("b"));
- ASSERT_EQ(EmptyToken, tree.hasToken("a"));
+ ASSERT_EQ(Tokens::Empty, tree.hasToken("a"));
ASSERT_EQ(t2, tree.hasToken("ab"));
- ASSERT_EQ(EmptyToken, tree.hasToken("b"));
+ ASSERT_EQ(Tokens::Empty, tree.hasToken("b"));
ASSERT_TRUE(tree.unregisterToken("ab"));
ASSERT_FALSE(tree.unregisterToken("ab"));
- ASSERT_EQ(EmptyToken, tree.hasToken("a"));
- ASSERT_EQ(EmptyToken, tree.hasToken("ab"));
- ASSERT_EQ(EmptyToken, tree.hasToken("b"));
+ ASSERT_EQ(Tokens::Empty, tree.hasToken("a"));
+ ASSERT_EQ(Tokens::Empty, tree.hasToken("ab"));
+ ASSERT_EQ(Tokens::Empty, tree.hasToken("b"));
}
}
diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp
index 8565057..3809a12 100644
--- a/test/core/parser/utils/TokenizerTest.cpp
+++ b/test/core/parser/utils/TokenizerTest.cpp
@@ -27,18 +27,18 @@ TEST(Tokenizer, tokenRegistration)
{
Tokenizer tokenizer;
- ASSERT_EQ(EmptyToken, tokenizer.registerToken(""));
+ ASSERT_EQ(Tokens::Empty, tokenizer.registerToken(""));
ASSERT_EQ(0U, tokenizer.registerToken("a"));
- ASSERT_EQ(EmptyToken, tokenizer.registerToken("a"));
+ ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("a"));
ASSERT_EQ("a", tokenizer.getTokenString(0U));
ASSERT_EQ(1U, tokenizer.registerToken("b"));
- ASSERT_EQ(EmptyToken, tokenizer.registerToken("b"));
+ ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("b"));
ASSERT_EQ("b", tokenizer.getTokenString(1U));
ASSERT_EQ(2U, tokenizer.registerToken("c"));
- ASSERT_EQ(EmptyToken, tokenizer.registerToken("c"));
+ ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("c"));
ASSERT_EQ("c", tokenizer.getTokenString(2U));
ASSERT_TRUE(tokenizer.unregisterToken(1U));
@@ -46,7 +46,7 @@ TEST(Tokenizer, tokenRegistration)
ASSERT_EQ("", tokenizer.getTokenString(1U));
ASSERT_EQ(1U, tokenizer.registerToken("d"));
- ASSERT_EQ(EmptyToken, tokenizer.registerToken("d"));
+ ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("d"));
ASSERT_EQ("d", tokenizer.getTokenString(1U));
}
@@ -60,7 +60,7 @@ TEST(Tokenizer, textTokenPreserveWhitespace)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ(" this \t is only a \n\n test text ", token.content);
SourceLocation loc = token.location;
@@ -78,7 +78,7 @@ TEST(Tokenizer, textTokenPreserveWhitespace)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("this \t is only a \n\n test text", token.content);
SourceLocation loc = token.location;
@@ -99,7 +99,7 @@ TEST(Tokenizer, textTokenTrimWhitespace)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("this \t is only a \n\n test text", token.content);
SourceLocation loc = token.location;
@@ -117,7 +117,7 @@ TEST(Tokenizer, textTokenTrimWhitespace)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("this \t is only a \n\n test text", token.content);
SourceLocation loc = token.location;
@@ -138,7 +138,7 @@ TEST(Tokenizer, textTokenCollapseWhitespace)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("this is only a test text", token.content);
SourceLocation loc = token.location;
@@ -156,7 +156,7 @@ TEST(Tokenizer, textTokenCollapseWhitespace)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("this is only a test text", token.content);
SourceLocation loc = token.location;
@@ -172,14 +172,14 @@ TEST(Tokenizer, simpleReadToken)
CharReader reader{"test1:test2"};
Tokenizer tokenizer;
- const TokenTypeId tid = tokenizer.registerToken(":");
+ const TokenId tid = tokenizer.registerToken(":");
ASSERT_EQ(0U, tid);
{
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("test1", token.content);
SourceLocation loc = token.location;
@@ -195,7 +195,7 @@ TEST(Tokenizer, simpleReadToken)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(tid, token.type);
+ ASSERT_EQ(tid, token.id);
ASSERT_EQ(":", token.content);
SourceLocation loc = token.location;
@@ -211,7 +211,7 @@ TEST(Tokenizer, simpleReadToken)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("test2", token.content);
SourceLocation loc = token.location;
@@ -228,14 +228,14 @@ TEST(Tokenizer, simplePeekToken)
CharReader reader{"test1:test2"};
Tokenizer tokenizer;
- const TokenTypeId tid = tokenizer.registerToken(":");
+ const TokenId tid = tokenizer.registerToken(":");
ASSERT_EQ(0U, tid);
{
Token token;
ASSERT_TRUE(tokenizer.peek(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("test1", token.content);
SourceLocation loc = token.location;
@@ -249,7 +249,7 @@ TEST(Tokenizer, simplePeekToken)
Token token;
ASSERT_TRUE(tokenizer.peek(reader, token));
- ASSERT_EQ(tid, token.type);
+ ASSERT_EQ(tid, token.id);
ASSERT_EQ(":", token.content);
SourceLocation loc = token.location;
@@ -263,7 +263,7 @@ TEST(Tokenizer, simplePeekToken)
Token token;
ASSERT_TRUE(tokenizer.peek(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("test2", token.content);
SourceLocation loc = token.location;
@@ -277,7 +277,7 @@ TEST(Tokenizer, simplePeekToken)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("test1", token.content);
SourceLocation loc = token.location;
@@ -291,7 +291,7 @@ TEST(Tokenizer, simplePeekToken)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(tid, token.type);
+ ASSERT_EQ(tid, token.id);
ASSERT_EQ(":", token.content);
SourceLocation loc = token.location;
@@ -305,7 +305,7 @@ TEST(Tokenizer, simplePeekToken)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("test2", token.content);
SourceLocation loc = token.location;
@@ -321,8 +321,8 @@ TEST(Tokenizer, ambiguousTokens)
CharReader reader{"abc"};
Tokenizer tokenizer;
- TokenTypeId t1 = tokenizer.registerToken("abd");
- TokenTypeId t2 = tokenizer.registerToken("bc");
+ TokenId t1 = tokenizer.registerToken("abd");
+ TokenId t2 = tokenizer.registerToken("bc");
ASSERT_EQ(0U, t1);
ASSERT_EQ(1U, t2);
@@ -330,7 +330,7 @@ TEST(Tokenizer, ambiguousTokens)
Token token;
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(TextToken, token.type);
+ ASSERT_EQ(Tokens::Data, token.id);
ASSERT_EQ("a", token.content);
SourceLocation loc = token.location;
@@ -339,7 +339,7 @@ TEST(Tokenizer, ambiguousTokens)
ASSERT_TRUE(tokenizer.read(reader, token));
- ASSERT_EQ(t2, token.type);
+ ASSERT_EQ(t2, token.id);
ASSERT_EQ("bc", token.content);
loc = token.location;
@@ -356,22 +356,22 @@ TEST(Tokenizer, commentTestWhitespacePreserve)
// 0 1 2
Tokenizer tokenizer(WhitespaceMode::PRESERVE);
- const TokenTypeId t1 = tokenizer.registerToken("/");
- const TokenTypeId t2 = tokenizer.registerToken("/*");
- const TokenTypeId t3 = tokenizer.registerToken("*/");
+ const TokenId t1 = tokenizer.registerToken("/");
+ const TokenId t2 = tokenizer.registerToken("/*");
+ const TokenId t3 = tokenizer.registerToken("*/");
std::vector<Token> expected = {
- {TextToken, "Test", SourceLocation{0, 0, 4}},
+ {Tokens::Data, "Test", SourceLocation{0, 0, 4}},
{t1, "/", SourceLocation{0, 4, 5}},
- {TextToken, "Test ", SourceLocation{0, 5, 10}},
+ {Tokens::Data, "Test ", SourceLocation{0, 5, 10}},
{t2, "/*", SourceLocation{0, 10, 12}},
- {TextToken, " Block Comment ", SourceLocation{0, 12, 27}},
+ {Tokens::Data, " Block Comment ", SourceLocation{0, 12, 27}},
{t3, "*/", SourceLocation{0, 27, 29}}};
Token t;
for (auto &te : expected) {
EXPECT_TRUE(tokenizer.read(reader, t));
- EXPECT_EQ(te.type, t.type);
+ EXPECT_EQ(te.id, t.id);
EXPECT_EQ(te.content, t.content);
EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
EXPECT_EQ(te.location.getStart(), t.location.getStart());
@@ -387,22 +387,22 @@ TEST(Tokenizer, commentTestWhitespaceCollapse)
// 0 1 2
Tokenizer tokenizer(WhitespaceMode::COLLAPSE);
- const TokenTypeId t1 = tokenizer.registerToken("/");
- const TokenTypeId t2 = tokenizer.registerToken("/*");
- const TokenTypeId t3 = tokenizer.registerToken("*/");
+ const TokenId t1 = tokenizer.registerToken("/");
+ const TokenId t2 = tokenizer.registerToken("/*");
+ const TokenId t3 = tokenizer.registerToken("*/");
std::vector<Token> expected = {
- {TextToken, "Test", SourceLocation{0, 0, 4}},
+ {Tokens::Data, "Test", SourceLocation{0, 0, 4}},
{t1, "/", SourceLocation{0, 4, 5}},
- {TextToken, "Test", SourceLocation{0, 5, 9}},
+ {Tokens::Data, "Test", SourceLocation{0, 5, 9}},
{t2, "/*", SourceLocation{0, 10, 12}},
- {TextToken, "Block Comment", SourceLocation{0, 13, 26}},
+ {Tokens::Data, "Block Comment", SourceLocation{0, 13, 26}},
{t3, "*/", SourceLocation{0, 27, 29}}};
Token t;
for (auto &te : expected) {
EXPECT_TRUE(tokenizer.read(reader, t));
- EXPECT_EQ(te.type, t.type);
+ EXPECT_EQ(te.id, t.id);
EXPECT_EQ(te.content, t.content);
EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId());
EXPECT_EQ(te.location.getStart(), t.location.getStart());