summaryrefslogtreecommitdiff
path: root/src/core/parser/utils/Tokenizer.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/parser/utils/Tokenizer.hpp')
-rw-r--r--src/core/parser/utils/Tokenizer.hpp73
1 files changed, 10 insertions, 63 deletions
diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp
index 6b4e116..f21c6a3 100644
--- a/src/core/parser/utils/Tokenizer.hpp
+++ b/src/core/parser/utils/Tokenizer.hpp
@@ -35,6 +35,7 @@
#include <core/common/Location.hpp>
#include <core/common/Whitespace.hpp>
+#include "Token.hpp"
#include "TokenTrie.hpp"
namespace ousia {
@@ -43,60 +44,6 @@ namespace ousia {
class CharReader;
/**
- * The Token structure describes a token discovered by the Tokenizer.
- */
-struct Token {
- /**
- * Id of the type of this token.
- */
- TokenTypeId type;
-
- /**
- * String that was matched.
- */
- std::string content;
-
- /**
- * Location from which the string was extracted.
- */
- SourceLocation location;
-
- /**
- * Default constructor.
- */
- Token() : type(EmptyToken) {}
-
- /**
- * Constructor of the Token struct.
- *
- * @param id represents the token type.
- * @param content is the string content that has been extracted.
- * @param location is the location of the extracted string content in the
- * source file.
- */
- Token(TokenTypeId type, const std::string &content,
- SourceLocation location)
- : type(type), content(content), location(location)
- {
- }
-
- /**
- * Constructor of the Token struct, only initializes the token type
- *
- * @param type is the id corresponding to the type of the token.
- */
- Token(TokenTypeId type) : type(type) {}
-
- /**
- * The getLocation function allows the tokens to be directly passed as
- * parameter to Logger or LoggableException instances.
- *
- * @return a reference at the location field
- */
- const SourceLocation &getLocation() const { return location; }
-};
-
-/**
* The Tokenizer is used to extract tokens and chunks of text from a
* CharReader. It allows to register and unregister tokens while parsing and
* to modify the handling of whitespace characters. Note that the
@@ -123,7 +70,7 @@ private:
/**
* Next index in the tokens list where to search for a new token id.
*/
- size_t nextTokenTypeId;
+ size_t nextTokenId;
/**
* Templated function used internally to read the current token. The
@@ -158,31 +105,31 @@ public:
* @return a unique identifier for the registered token or EmptyToken if
* an error occured.
*/
- TokenTypeId registerToken(const std::string &token);
+ TokenId registerToken(const std::string &token);
/**
- * Unregisters the token belonging to the given TokenTypeId.
+ * Unregisters the token belonging to the given TokenId.
*
* @param type is the token type that should be unregistered. The
- *TokenTypeId
+ *TokenId
* must have been returned by registerToken.
* @return true if the operation was successful, false otherwise (e.g.
* because the given TokenDescriptor was already unregistered).
*/
- bool unregisterToken(TokenTypeId type);
+ bool unregisterToken(TokenId type);
/**
- * Returns the token that was registered under the given TokenTypeId id or
+ * Returns the token that was registered under the given TokenId id or
*an
- * empty string if an invalid TokenTypeId id is given.
+ * empty string if an invalid TokenId id is given.
*
- * @param type is the TokenTypeId id for which the corresponding token
+ * @param type is the TokenId id for which the corresponding token
*string
* should be returned.
* @return the registered token string or an empty string if the given type
* was invalid.
*/
- std::string getTokenString(TokenTypeId type);
+ std::string getTokenString(TokenId type);
/**
* Sets the whitespace mode.