diff options
-rw-r--r-- | src/core/parser/utils/TokenTrie.cpp | 20 | ||||
-rw-r--r-- | src/core/parser/utils/TokenTrie.hpp | 23 | ||||
-rw-r--r-- | src/core/parser/utils/Tokenizer.cpp | 38 | ||||
-rw-r--r-- | src/core/parser/utils/Tokenizer.hpp | 73 | ||||
-rw-r--r-- | src/formats/osml/OsmlStreamParser.cpp | 50 | ||||
-rw-r--r-- | test/core/parser/utils/TokenTrieTest.cpp | 24 | ||||
-rw-r--r-- | test/core/parser/utils/TokenizerTest.cpp | 80 |
7 files changed, 121 insertions, 187 deletions
diff --git a/src/core/parser/utils/TokenTrie.cpp b/src/core/parser/utils/TokenTrie.cpp index 4a0430b..80cc945 100644 --- a/src/core/parser/utils/TokenTrie.cpp +++ b/src/core/parser/utils/TokenTrie.cpp @@ -22,12 +22,12 @@ namespace ousia { /* Class DynamicTokenTree::Node */ -TokenTrie::Node::Node() : type(EmptyToken) {} +TokenTrie::Node::Node() : type(Tokens::Empty) {} /* Class DynamicTokenTree */ bool TokenTrie::registerToken(const std::string &token, - TokenTypeId type) noexcept + TokenId type) noexcept { // Abort if the token is empty -- this would taint the root node if (token.empty()) { @@ -48,7 +48,7 @@ bool TokenTrie::registerToken(const std::string &token, } // If the resulting node already has a type set, we're screwed. - if (node->type != EmptyToken) { + if (node->type != Tokens::Empty) { return false; } @@ -78,22 +78,22 @@ bool TokenTrie::unregisterToken(const std::string &token) noexcept // Reset the subtree handler if this node has another type node = it->second.get(); - if ((node->type != EmptyToken || node->children.size() > 1) && + if ((node->type != Tokens::Empty || node->children.size() > 1) && (i + 1 != token.size())) { subtreeRoot = node; subtreeKey = token[i + 1]; } } - // If the node type is already EmptyToken, we cannot do anything here - if (node->type == EmptyToken) { + // If the node type is already Tokens::Empty, we cannot do anything here + if (node->type == Tokens::Empty) { return false; } // If the target node has children, we cannot delete the subtree. Set the - // type to EmptyToken instead + // type to Tokens::Empty instead if (!node->children.empty()) { - node->type = EmptyToken; + node->type = Tokens::Empty; return true; } @@ -102,14 +102,14 @@ bool TokenTrie::unregisterToken(const std::string &token) noexcept return true; } -TokenTypeId TokenTrie::hasToken(const std::string &token) const noexcept +TokenId TokenTrie::hasToken(const std::string &token) const noexcept { Node const *node = &root; for (size_t i = 0; i < token.size(); i++) { const char c = token[i]; auto it = node->children.find(c); if (it == node->children.end()) { - return EmptyToken; + return Tokens::Empty; } node = it->second.get(); } diff --git a/src/core/parser/utils/TokenTrie.hpp b/src/core/parser/utils/TokenTrie.hpp index 36c2ffa..b2d1539 100644 --- a/src/core/parser/utils/TokenTrie.hpp +++ b/src/core/parser/utils/TokenTrie.hpp @@ -33,22 +33,9 @@ #include <limits> #include <unordered_map> -namespace ousia { - -/** - * The TokenTypeId is used to give each token type a unique id. - */ -using TokenTypeId = uint32_t; - -/** - * Token which is not a token. - */ -constexpr TokenTypeId EmptyToken = std::numeric_limits<TokenTypeId>::max(); +#include "Token.hpp" -/** - * Token which represents a text token. - */ -constexpr TokenTypeId TextToken = std::numeric_limits<TokenTypeId>::max() - 1; +namespace ousia { /** * The Tokenizer internally uses a TokenTrie to be efficiently able to identify @@ -91,7 +78,7 @@ public: * Reference at the corresponding token descriptor. Set to nullptr if * no token is attached to this node. */ - TokenTypeId type; + TokenId type; /** * Default constructor, initializes the descriptor with nullptr. @@ -115,7 +102,7 @@ public: * @param type is the descriptor that should be set for this token. * @return true if the operation is successful, false otherwise. */ - bool registerToken(const std::string &token, TokenTypeId type) noexcept; + bool registerToken(const std::string &token, TokenId type) noexcept; /** * Unregisters the token from the token tree. Returns true if the token was @@ -134,7 +121,7 @@ public: * @return the attached token descriptor or nullptr if the given token is * not found. */ - TokenTypeId hasToken(const std::string &token) const noexcept; + TokenId hasToken(const std::string &token) const noexcept; /** * Returns a reference at the root node to be used for traversing the token diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp index 3c8177d..2e0ac13 100644 --- a/src/core/parser/utils/Tokenizer.cpp +++ b/src/core/parser/utils/Tokenizer.cpp @@ -61,7 +61,7 @@ struct TokenMatch { /** * Returns true if this TokenMatch instance actually represents a match. */ - bool hasMatch() { return token.type != EmptyToken; } + bool hasMatch() { return token.id != Tokens::Empty; } }; /* Internal class TokenLookup */ @@ -138,7 +138,7 @@ public: // Check whether the new node represents a complete token a whether it // is longer than the current token. If yes, replace the current token. node = it->second.get(); - if (node->type != EmptyToken) { + if (node->type != Tokens::Empty) { const std::string &str = tokens[node->type]; size_t len = str.size(); if (len > match.token.content.size()) { @@ -157,14 +157,14 @@ public: }; /** - * Transforms the given token into a text token containing the extracted + * Transforms the given token into a data token containing the extracted * text. * * @param handler is the WhitespaceHandler containing the collected data. * @param token is the output token to which the text should be written. * @param sourceId is the source id of the underlying file. */ -static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match, +static void buildDataToken(const WhitespaceHandler &handler, TokenMatch &match, SourceId sourceId) { if (match.hasMatch()) { @@ -177,14 +177,14 @@ static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match, match.token.location = SourceLocation{sourceId, handler.textStart, handler.textEnd}; } - match.token.type = TextToken; + match.token.id = Tokens::Data; } } /* Class Tokenizer */ Tokenizer::Tokenizer(WhitespaceMode whitespaceMode) - : whitespaceMode(whitespaceMode), nextTokenTypeId(0) + : whitespaceMode(whitespaceMode), nextTokenId(0) { } @@ -248,7 +248,7 @@ bool Tokenizer::next(CharReader &reader, Token &token) // If we found text, emit that text if (textHandler.hasText() && (!match.hasMatch() || match.textLength > 0)) { - buildTextToken(textHandler, match, sourceId); + buildDataToken(textHandler, match, sourceId); } // Move the read/peek cursor to the end of the token, abort if an error @@ -299,16 +299,16 @@ bool Tokenizer::peek(CharReader &reader, Token &token) return false; } -TokenTypeId Tokenizer::registerToken(const std::string &token) +TokenId Tokenizer::registerToken(const std::string &token) { // Abort if an empty token should be registered if (token.empty()) { - return EmptyToken; + return Tokens::Empty; } // Search for a new slot in the tokens list - TokenTypeId type = EmptyToken; - for (size_t i = nextTokenTypeId; i < tokens.size(); i++) { + TokenId type = Tokens::Empty; + for (size_t i = nextTokenId; i < tokens.size(); i++) { if (tokens[i].empty()) { tokens[i] = token; type = i; @@ -318,37 +318,37 @@ TokenTypeId Tokenizer::registerToken(const std::string &token) // No existing slot was found, add a new one -- make sure we do not // override the special token type handles - if (type == EmptyToken) { + if (type == Tokens::Empty) { type = tokens.size(); - if (type == TextToken || type == EmptyToken) { + if (type == Tokens::Data || type == Tokens::Empty) { throw OusiaException{"Token type ids depleted!"}; } tokens.emplace_back(token); } - nextTokenTypeId = type + 1; + nextTokenId = type + 1; // Try to register the token in the trie -- if this fails, remove it // from the tokens list if (!trie.registerToken(token, type)) { tokens[type] = std::string{}; - nextTokenTypeId = type; - return EmptyToken; + nextTokenId = type; + return Tokens::Empty; } return type; } -bool Tokenizer::unregisterToken(TokenTypeId type) +bool Tokenizer::unregisterToken(TokenId type) { // Unregister the token from the trie, abort if an invalid type is given if (type < tokens.size() && trie.unregisterToken(tokens[type])) { tokens[type] = std::string{}; - nextTokenTypeId = type; + nextTokenId = type; return true; } return false; } -std::string Tokenizer::getTokenString(TokenTypeId type) +std::string Tokenizer::getTokenString(TokenId type) { if (type < tokens.size()) { return tokens[type]; diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp index 6b4e116..f21c6a3 100644 --- a/src/core/parser/utils/Tokenizer.hpp +++ b/src/core/parser/utils/Tokenizer.hpp @@ -35,6 +35,7 @@ #include <core/common/Location.hpp> #include <core/common/Whitespace.hpp> +#include "Token.hpp" #include "TokenTrie.hpp" namespace ousia { @@ -43,60 +44,6 @@ namespace ousia { class CharReader; /** - * The Token structure describes a token discovered by the Tokenizer. - */ -struct Token { - /** - * Id of the type of this token. - */ - TokenTypeId type; - - /** - * String that was matched. - */ - std::string content; - - /** - * Location from which the string was extracted. - */ - SourceLocation location; - - /** - * Default constructor. - */ - Token() : type(EmptyToken) {} - - /** - * Constructor of the Token struct. - * - * @param id represents the token type. - * @param content is the string content that has been extracted. - * @param location is the location of the extracted string content in the - * source file. - */ - Token(TokenTypeId type, const std::string &content, - SourceLocation location) - : type(type), content(content), location(location) - { - } - - /** - * Constructor of the Token struct, only initializes the token type - * - * @param type is the id corresponding to the type of the token. - */ - Token(TokenTypeId type) : type(type) {} - - /** - * The getLocation function allows the tokens to be directly passed as - * parameter to Logger or LoggableException instances. - * - * @return a reference at the location field - */ - const SourceLocation &getLocation() const { return location; } -}; - -/** * The Tokenizer is used to extract tokens and chunks of text from a * CharReader. It allows to register and unregister tokens while parsing and * to modify the handling of whitespace characters. Note that the @@ -123,7 +70,7 @@ private: /** * Next index in the tokens list where to search for a new token id. */ - size_t nextTokenTypeId; + size_t nextTokenId; /** * Templated function used internally to read the current token. The @@ -158,31 +105,31 @@ public: * @return a unique identifier for the registered token or EmptyToken if * an error occured. */ - TokenTypeId registerToken(const std::string &token); + TokenId registerToken(const std::string &token); /** - * Unregisters the token belonging to the given TokenTypeId. + * Unregisters the token belonging to the given TokenId. * * @param type is the token type that should be unregistered. The - *TokenTypeId + *TokenId * must have been returned by registerToken. * @return true if the operation was successful, false otherwise (e.g. * because the given TokenDescriptor was already unregistered). */ - bool unregisterToken(TokenTypeId type); + bool unregisterToken(TokenId type); /** - * Returns the token that was registered under the given TokenTypeId id or + * Returns the token that was registered under the given TokenId id or *an - * empty string if an invalid TokenTypeId id is given. + * empty string if an invalid TokenId id is given. * - * @param type is the TokenTypeId id for which the corresponding token + * @param type is the TokenId id for which the corresponding token *string * should be returned. * @return the registered token string or an empty string if the given type * was invalid. */ - std::string getTokenString(TokenTypeId type); + std::string getTokenString(TokenId type); /** * Sets the whitespace mode. diff --git a/src/formats/osml/OsmlStreamParser.cpp b/src/formats/osml/OsmlStreamParser.cpp index 0174fa4..f61ac7d 100644 --- a/src/formats/osml/OsmlStreamParser.cpp +++ b/src/formats/osml/OsmlStreamParser.cpp @@ -33,47 +33,47 @@ public: /** * Id of the backslash token. */ - TokenTypeId Backslash; + TokenId Backslash; /** * Id of the line comment token. */ - TokenTypeId LineComment; + TokenId LineComment; /** * Id of the block comment start token. */ - TokenTypeId BlockCommentStart; + TokenId BlockCommentStart; /** * Id of the block comment end token. */ - TokenTypeId BlockCommentEnd; + TokenId BlockCommentEnd; /** * Id of the field start token. */ - TokenTypeId FieldStart; + TokenId FieldStart; /** * Id of the field end token. */ - TokenTypeId FieldEnd; + TokenId FieldEnd; /** * Id of the default field start token. */ - TokenTypeId DefaultFieldStart; + TokenId DefaultFieldStart; /** * Id of the annotation start token. */ - TokenTypeId AnnotationStart; + TokenId AnnotationStart; /** * Id of the annotation end token. */ - TokenTypeId AnnotationEnd; + TokenId AnnotationEnd; /** * Registers the plain format tokens in the internal tokenizer. @@ -92,7 +92,7 @@ public: } }; -static const PlainFormatTokens Tokens; +static const PlainFormatTokens OsmlTokens; /** * Class used internally to collect data issued via "DATA" event. @@ -179,7 +179,7 @@ public: }; OsmlStreamParser::OsmlStreamParser(CharReader &reader, Logger &logger) - : reader(reader), logger(logger), tokenizer(Tokens) + : reader(reader), logger(logger), tokenizer(OsmlTokens) { // Place an intial command representing the complete file on the stack commands.push(Command{"", Variant::mapType{}, true, true, true, false}); @@ -489,13 +489,13 @@ void OsmlStreamParser::parseBlockComment() Token token; size_t depth = 1; while (tokenizer.read(reader, token)) { - if (token.type == Tokens.BlockCommentEnd) { + if (token.id == OsmlTokens.BlockCommentEnd) { depth--; if (depth == 0) { return; } } - if (token.type == Tokens.BlockCommentStart) { + if (token.id == OsmlTokens.BlockCommentStart) { depth++; } } @@ -581,10 +581,11 @@ OsmlStreamParser::State OsmlStreamParser::parse() // Read tokens until the outer loop should be left Token token; while (tokenizer.peek(reader, token)) { - const TokenTypeId type = token.type; + const TokenId type = token.id; // Special handling for Backslash and Text - if (type == Tokens.Backslash || type == Tokens.AnnotationStart) { + if (type == OsmlTokens.Backslash || + type == OsmlTokens.AnnotationStart) { // Before appending anything to the output data or starting a new // command, check whether FIELD_START has to be issued, as the // current command is a command with range @@ -611,7 +612,7 @@ OsmlStreamParser::State OsmlStreamParser::parse() // Parse the actual command State res = parseCommand(token.location.getStart(), - type == Tokens.AnnotationStart); + type == OsmlTokens.AnnotationStart); switch (res) { case State::ERROR: throw LoggableException( @@ -631,7 +632,7 @@ OsmlStreamParser::State OsmlStreamParser::parse() // If this was an annotation start token, add the parsed < to the // output - if (type == Tokens.AnnotationStart) { + if (type == OsmlTokens.AnnotationStart) { handler.append('<', token.location.getStart(), token.location.getStart() + 1); } @@ -640,7 +641,7 @@ OsmlStreamParser::State OsmlStreamParser::parse() reader.getPeekOffset()); reader.consumePeek(); continue; - } else if (type == TextToken) { + } else if (type == Tokens::Data) { // Check whether FIELD_START has to be issued before appending text if (checkIssueFieldStart()) { location = token.location; @@ -667,11 +668,11 @@ OsmlStreamParser::State OsmlStreamParser::parse() // Update the location to the current token location location = token.location; - if (token.type == Tokens.LineComment) { + if (token.id == OsmlTokens.LineComment) { parseLineComment(); - } else if (token.type == Tokens.BlockCommentStart) { + } else if (token.id == OsmlTokens.BlockCommentStart) { parseBlockComment(); - } else if (token.type == Tokens.FieldStart) { + } else if (token.id == OsmlTokens.FieldStart) { Command &cmd = commands.top(); if (!cmd.inField) { cmd.inField = true; @@ -682,7 +683,7 @@ OsmlStreamParser::State OsmlStreamParser::parse() "start the field. Write \"\\{\" to insert this sequence as " "text.", token); - } else if (token.type == Tokens.FieldEnd) { + } else if (token.id == OsmlTokens.FieldEnd) { if (closeField()) { return State::FIELD_END; } @@ -690,7 +691,7 @@ OsmlStreamParser::State OsmlStreamParser::parse() "Got field end token \"}\", but there is no field to end. " "Write \"\\}\" to insert this sequence as text.", token); - } else if (token.type == Tokens.DefaultFieldStart) { + } else if (token.id == OsmlTokens.DefaultFieldStart) { // Try to start a default field the first time the token is reached Command &topCmd = commands.top(); if (!topCmd.inField) { @@ -703,7 +704,7 @@ OsmlStreamParser::State OsmlStreamParser::parse() "which to start the field. Write \"\\{!\" to insert this " "sequence as text", token); - } else if (token.type == Tokens.AnnotationEnd) { + } else if (token.id == OsmlTokens.AnnotationEnd) { // We got a single annotation end token "\>" -- simply issue the // ANNOTATION_END event Variant annotationName = Variant::fromString(""); @@ -751,4 +752,3 @@ bool OsmlStreamParser::inDefaultField() const return commands.top().inRangeField || commands.top().inDefaultField; } } - diff --git a/test/core/parser/utils/TokenTrieTest.cpp b/test/core/parser/utils/TokenTrieTest.cpp index 087e6e6..d9d5164 100644 --- a/test/core/parser/utils/TokenTrieTest.cpp +++ b/test/core/parser/utils/TokenTrieTest.cpp @@ -22,10 +22,10 @@ namespace ousia { -static const TokenTypeId t1 = 0; -static const TokenTypeId t2 = 1; -static const TokenTypeId t3 = 2; -static const TokenTypeId t4 = 3; +static const TokenId t1 = 0; +static const TokenId t2 = 1; +static const TokenId t3 = 2; +static const TokenId t4 = 3; TEST(TokenTrie, registerToken) { @@ -46,8 +46,8 @@ TEST(TokenTrie, registerToken) ASSERT_EQ(t2, tree.hasToken("ab")); ASSERT_EQ(t3, tree.hasToken("b")); ASSERT_EQ(t4, tree.hasToken("hello")); - ASSERT_EQ(EmptyToken, tree.hasToken("")); - ASSERT_EQ(EmptyToken, tree.hasToken("abc")); + ASSERT_EQ(Tokens::Empty, tree.hasToken("")); + ASSERT_EQ(Tokens::Empty, tree.hasToken("abc")); } TEST(TokenTrie, unregisterToken) @@ -70,23 +70,23 @@ TEST(TokenTrie, unregisterToken) ASSERT_TRUE(tree.unregisterToken("a")); ASSERT_FALSE(tree.unregisterToken("a")); - ASSERT_EQ(EmptyToken, tree.hasToken("a")); + ASSERT_EQ(Tokens::Empty, tree.hasToken("a")); ASSERT_EQ(t2, tree.hasToken("ab")); ASSERT_EQ(t3, tree.hasToken("b")); ASSERT_TRUE(tree.unregisterToken("b")); ASSERT_FALSE(tree.unregisterToken("b")); - ASSERT_EQ(EmptyToken, tree.hasToken("a")); + ASSERT_EQ(Tokens::Empty, tree.hasToken("a")); ASSERT_EQ(t2, tree.hasToken("ab")); - ASSERT_EQ(EmptyToken, tree.hasToken("b")); + ASSERT_EQ(Tokens::Empty, tree.hasToken("b")); ASSERT_TRUE(tree.unregisterToken("ab")); ASSERT_FALSE(tree.unregisterToken("ab")); - ASSERT_EQ(EmptyToken, tree.hasToken("a")); - ASSERT_EQ(EmptyToken, tree.hasToken("ab")); - ASSERT_EQ(EmptyToken, tree.hasToken("b")); + ASSERT_EQ(Tokens::Empty, tree.hasToken("a")); + ASSERT_EQ(Tokens::Empty, tree.hasToken("ab")); + ASSERT_EQ(Tokens::Empty, tree.hasToken("b")); } } diff --git a/test/core/parser/utils/TokenizerTest.cpp b/test/core/parser/utils/TokenizerTest.cpp index 8565057..3809a12 100644 --- a/test/core/parser/utils/TokenizerTest.cpp +++ b/test/core/parser/utils/TokenizerTest.cpp @@ -27,18 +27,18 @@ TEST(Tokenizer, tokenRegistration) { Tokenizer tokenizer; - ASSERT_EQ(EmptyToken, tokenizer.registerToken("")); + ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("")); ASSERT_EQ(0U, tokenizer.registerToken("a")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("a")); + ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("a")); ASSERT_EQ("a", tokenizer.getTokenString(0U)); ASSERT_EQ(1U, tokenizer.registerToken("b")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("b")); + ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("b")); ASSERT_EQ("b", tokenizer.getTokenString(1U)); ASSERT_EQ(2U, tokenizer.registerToken("c")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("c")); + ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("c")); ASSERT_EQ("c", tokenizer.getTokenString(2U)); ASSERT_TRUE(tokenizer.unregisterToken(1U)); @@ -46,7 +46,7 @@ TEST(Tokenizer, tokenRegistration) ASSERT_EQ("", tokenizer.getTokenString(1U)); ASSERT_EQ(1U, tokenizer.registerToken("d")); - ASSERT_EQ(EmptyToken, tokenizer.registerToken("d")); + ASSERT_EQ(Tokens::Empty, tokenizer.registerToken("d")); ASSERT_EQ("d", tokenizer.getTokenString(1U)); } @@ -60,7 +60,7 @@ TEST(Tokenizer, textTokenPreserveWhitespace) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ(" this \t is only a \n\n test text ", token.content); SourceLocation loc = token.location; @@ -78,7 +78,7 @@ TEST(Tokenizer, textTokenPreserveWhitespace) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("this \t is only a \n\n test text", token.content); SourceLocation loc = token.location; @@ -99,7 +99,7 @@ TEST(Tokenizer, textTokenTrimWhitespace) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("this \t is only a \n\n test text", token.content); SourceLocation loc = token.location; @@ -117,7 +117,7 @@ TEST(Tokenizer, textTokenTrimWhitespace) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("this \t is only a \n\n test text", token.content); SourceLocation loc = token.location; @@ -138,7 +138,7 @@ TEST(Tokenizer, textTokenCollapseWhitespace) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("this is only a test text", token.content); SourceLocation loc = token.location; @@ -156,7 +156,7 @@ TEST(Tokenizer, textTokenCollapseWhitespace) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("this is only a test text", token.content); SourceLocation loc = token.location; @@ -172,14 +172,14 @@ TEST(Tokenizer, simpleReadToken) CharReader reader{"test1:test2"}; Tokenizer tokenizer; - const TokenTypeId tid = tokenizer.registerToken(":"); + const TokenId tid = tokenizer.registerToken(":"); ASSERT_EQ(0U, tid); { Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("test1", token.content); SourceLocation loc = token.location; @@ -195,7 +195,7 @@ TEST(Tokenizer, simpleReadToken) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(tid, token.type); + ASSERT_EQ(tid, token.id); ASSERT_EQ(":", token.content); SourceLocation loc = token.location; @@ -211,7 +211,7 @@ TEST(Tokenizer, simpleReadToken) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("test2", token.content); SourceLocation loc = token.location; @@ -228,14 +228,14 @@ TEST(Tokenizer, simplePeekToken) CharReader reader{"test1:test2"}; Tokenizer tokenizer; - const TokenTypeId tid = tokenizer.registerToken(":"); + const TokenId tid = tokenizer.registerToken(":"); ASSERT_EQ(0U, tid); { Token token; ASSERT_TRUE(tokenizer.peek(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("test1", token.content); SourceLocation loc = token.location; @@ -249,7 +249,7 @@ TEST(Tokenizer, simplePeekToken) Token token; ASSERT_TRUE(tokenizer.peek(reader, token)); - ASSERT_EQ(tid, token.type); + ASSERT_EQ(tid, token.id); ASSERT_EQ(":", token.content); SourceLocation loc = token.location; @@ -263,7 +263,7 @@ TEST(Tokenizer, simplePeekToken) Token token; ASSERT_TRUE(tokenizer.peek(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("test2", token.content); SourceLocation loc = token.location; @@ -277,7 +277,7 @@ TEST(Tokenizer, simplePeekToken) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("test1", token.content); SourceLocation loc = token.location; @@ -291,7 +291,7 @@ TEST(Tokenizer, simplePeekToken) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(tid, token.type); + ASSERT_EQ(tid, token.id); ASSERT_EQ(":", token.content); SourceLocation loc = token.location; @@ -305,7 +305,7 @@ TEST(Tokenizer, simplePeekToken) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("test2", token.content); SourceLocation loc = token.location; @@ -321,8 +321,8 @@ TEST(Tokenizer, ambiguousTokens) CharReader reader{"abc"}; Tokenizer tokenizer; - TokenTypeId t1 = tokenizer.registerToken("abd"); - TokenTypeId t2 = tokenizer.registerToken("bc"); + TokenId t1 = tokenizer.registerToken("abd"); + TokenId t2 = tokenizer.registerToken("bc"); ASSERT_EQ(0U, t1); ASSERT_EQ(1U, t2); @@ -330,7 +330,7 @@ TEST(Tokenizer, ambiguousTokens) Token token; ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(TextToken, token.type); + ASSERT_EQ(Tokens::Data, token.id); ASSERT_EQ("a", token.content); SourceLocation loc = token.location; @@ -339,7 +339,7 @@ TEST(Tokenizer, ambiguousTokens) ASSERT_TRUE(tokenizer.read(reader, token)); - ASSERT_EQ(t2, token.type); + ASSERT_EQ(t2, token.id); ASSERT_EQ("bc", token.content); loc = token.location; @@ -356,22 +356,22 @@ TEST(Tokenizer, commentTestWhitespacePreserve) // 0 1 2 Tokenizer tokenizer(WhitespaceMode::PRESERVE); - const TokenTypeId t1 = tokenizer.registerToken("/"); - const TokenTypeId t2 = tokenizer.registerToken("/*"); - const TokenTypeId t3 = tokenizer.registerToken("*/"); + const TokenId t1 = tokenizer.registerToken("/"); + const TokenId t2 = tokenizer.registerToken("/*"); + const TokenId t3 = tokenizer.registerToken("*/"); std::vector<Token> expected = { - {TextToken, "Test", SourceLocation{0, 0, 4}}, + {Tokens::Data, "Test", SourceLocation{0, 0, 4}}, {t1, "/", SourceLocation{0, 4, 5}}, - {TextToken, "Test ", SourceLocation{0, 5, 10}}, + {Tokens::Data, "Test ", SourceLocation{0, 5, 10}}, {t2, "/*", SourceLocation{0, 10, 12}}, - {TextToken, " Block Comment ", SourceLocation{0, 12, 27}}, + {Tokens::Data, " Block Comment ", SourceLocation{0, 12, 27}}, {t3, "*/", SourceLocation{0, 27, 29}}}; Token t; for (auto &te : expected) { EXPECT_TRUE(tokenizer.read(reader, t)); - EXPECT_EQ(te.type, t.type); + EXPECT_EQ(te.id, t.id); EXPECT_EQ(te.content, t.content); EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); EXPECT_EQ(te.location.getStart(), t.location.getStart()); @@ -387,22 +387,22 @@ TEST(Tokenizer, commentTestWhitespaceCollapse) // 0 1 2 Tokenizer tokenizer(WhitespaceMode::COLLAPSE); - const TokenTypeId t1 = tokenizer.registerToken("/"); - const TokenTypeId t2 = tokenizer.registerToken("/*"); - const TokenTypeId t3 = tokenizer.registerToken("*/"); + const TokenId t1 = tokenizer.registerToken("/"); + const TokenId t2 = tokenizer.registerToken("/*"); + const TokenId t3 = tokenizer.registerToken("*/"); std::vector<Token> expected = { - {TextToken, "Test", SourceLocation{0, 0, 4}}, + {Tokens::Data, "Test", SourceLocation{0, 0, 4}}, {t1, "/", SourceLocation{0, 4, 5}}, - {TextToken, "Test", SourceLocation{0, 5, 9}}, + {Tokens::Data, "Test", SourceLocation{0, 5, 9}}, {t2, "/*", SourceLocation{0, 10, 12}}, - {TextToken, "Block Comment", SourceLocation{0, 13, 26}}, + {Tokens::Data, "Block Comment", SourceLocation{0, 13, 26}}, {t3, "*/", SourceLocation{0, 27, 29}}}; Token t; for (auto &te : expected) { EXPECT_TRUE(tokenizer.read(reader, t)); - EXPECT_EQ(te.type, t.type); + EXPECT_EQ(te.id, t.id); EXPECT_EQ(te.content, t.content); EXPECT_EQ(te.location.getSourceId(), t.location.getSourceId()); EXPECT_EQ(te.location.getStart(), t.location.getStart()); |