summaryrefslogtreecommitdiff
path: root/src/core/parser/utils
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-14 23:58:55 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-14 23:58:55 +0100
commit9f9e51974e782c4eb6f393ca3d4c3382df093bf1 (patch)
tree2bf507b16e9c1bf1999c20af8c212ef4557441fe /src/core/parser/utils
parent0b93aa3ff50756fbb1d93c7134fe2cc7f093fa75 (diff)
Moved Tokenizer to core/parser/utils and adapted name
Diffstat (limited to 'src/core/parser/utils')
-rw-r--r--src/core/parser/utils/Tokenizer.cpp56
-rw-r--r--src/core/parser/utils/Tokenizer.hpp34
2 files changed, 45 insertions, 45 deletions
diff --git a/src/core/parser/utils/Tokenizer.cpp b/src/core/parser/utils/Tokenizer.cpp
index 1fac25a..3c8177d 100644
--- a/src/core/parser/utils/Tokenizer.cpp
+++ b/src/core/parser/utils/Tokenizer.cpp
@@ -24,7 +24,7 @@
#include <core/common/Utils.hpp>
#include <core/common/WhitespaceHandler.hpp>
-#include "DynamicTokenizer.hpp"
+#include "Tokenizer.hpp"
namespace ousia {
@@ -39,7 +39,7 @@ struct TokenMatch {
/**
* Token that was matched.
*/
- DynamicToken token;
+ Token token;
/**
* Current length of the data within the text handler. The text buffer needs
@@ -117,10 +117,10 @@ public:
* @param c is the character that should be appended to the current prefix.
* @param lookups is a list to which new TokeLookup instances are added --
* which could potentially be expanded in the next iteration.
- * @param match is the DynamicToken instance to which the matching token
+ * @param match is the Token instance to which the matching token
* should be written.
* @param tokens is a reference at the internal token list of the
- * DynamicTokenizer.
+ * Tokenizer.
* @param end is the end byte offset of the current character.
* @param sourceId is the source if of this file.
*/
@@ -143,7 +143,7 @@ public:
size_t len = str.size();
if (len > match.token.content.size()) {
match.token =
- DynamicToken{node->type, str, {sourceId, start, end}};
+ Token{node->type, str, {sourceId, start, end}};
match.textLength = textLength;
match.textEnd = textEnd;
}
@@ -181,15 +181,15 @@ static void buildTextToken(const WhitespaceHandler &handler, TokenMatch &match,
}
}
-/* Class DynamicTokenizer */
+/* Class Tokenizer */
-DynamicTokenizer::DynamicTokenizer(WhitespaceMode whitespaceMode)
+Tokenizer::Tokenizer(WhitespaceMode whitespaceMode)
: whitespaceMode(whitespaceMode), nextTokenTypeId(0)
{
}
template <typename TextHandler, bool read>
-bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
+bool Tokenizer::next(CharReader &reader, Token &token)
{
// If we're in the read mode, reset the char reader peek position to the
// current read position
@@ -268,12 +268,12 @@ bool DynamicTokenizer::next(CharReader &reader, DynamicToken &token)
}
token = match.token;
} else {
- token = DynamicToken{};
+ token = Token{};
}
return match.hasMatch();
}
-bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token)
+bool Tokenizer::read(CharReader &reader, Token &token)
{
switch (whitespaceMode) {
case WhitespaceMode::PRESERVE:
@@ -286,7 +286,7 @@ bool DynamicTokenizer::read(CharReader &reader, DynamicToken &token)
return false;
}
-bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token)
+bool Tokenizer::peek(CharReader &reader, Token &token)
{
switch (whitespaceMode) {
case WhitespaceMode::PRESERVE:
@@ -299,7 +299,7 @@ bool DynamicTokenizer::peek(CharReader &reader, DynamicToken &token)
return false;
}
-TokenTypeId DynamicTokenizer::registerToken(const std::string &token)
+TokenTypeId Tokenizer::registerToken(const std::string &token)
{
// Abort if an empty token should be registered
if (token.empty()) {
@@ -337,7 +337,7 @@ TokenTypeId DynamicTokenizer::registerToken(const std::string &token)
return type;
}
-bool DynamicTokenizer::unregisterToken(TokenTypeId type)
+bool Tokenizer::unregisterToken(TokenTypeId type)
{
// Unregister the token from the trie, abort if an invalid type is given
if (type < tokens.size() && trie.unregisterToken(tokens[type])) {
@@ -348,7 +348,7 @@ bool DynamicTokenizer::unregisterToken(TokenTypeId type)
return false;
}
-std::string DynamicTokenizer::getTokenString(TokenTypeId type)
+std::string Tokenizer::getTokenString(TokenTypeId type)
{
if (type < tokens.size()) {
return tokens[type];
@@ -356,26 +356,26 @@ std::string DynamicTokenizer::getTokenString(TokenTypeId type)
return std::string{};
}
-void DynamicTokenizer::setWhitespaceMode(WhitespaceMode mode)
+void Tokenizer::setWhitespaceMode(WhitespaceMode mode)
{
whitespaceMode = mode;
}
-WhitespaceMode DynamicTokenizer::getWhitespaceMode() { return whitespaceMode; }
+WhitespaceMode Tokenizer::getWhitespaceMode() { return whitespaceMode; }
/* Explicitly instantiate all possible instantiations of the "next" member
function */
-template bool DynamicTokenizer::next<PreservingWhitespaceHandler, false>(
- CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, false>(
- CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, false>(
- CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<PreservingWhitespaceHandler, true>(
- CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<TrimmingWhitespaceHandler, true>(
- CharReader &reader, DynamicToken &token);
-template bool DynamicTokenizer::next<CollapsingWhitespaceHandler, true>(
- CharReader &reader, DynamicToken &token);
+template bool Tokenizer::next<PreservingWhitespaceHandler, false>(
+ CharReader &reader, Token &token);
+template bool Tokenizer::next<TrimmingWhitespaceHandler, false>(
+ CharReader &reader, Token &token);
+template bool Tokenizer::next<CollapsingWhitespaceHandler, false>(
+ CharReader &reader, Token &token);
+template bool Tokenizer::next<PreservingWhitespaceHandler, true>(
+ CharReader &reader, Token &token);
+template bool Tokenizer::next<TrimmingWhitespaceHandler, true>(
+ CharReader &reader, Token &token);
+template bool Tokenizer::next<CollapsingWhitespaceHandler, true>(
+ CharReader &reader, Token &token);
}
diff --git a/src/core/parser/utils/Tokenizer.hpp b/src/core/parser/utils/Tokenizer.hpp
index 3e5aeb3..6b4e116 100644
--- a/src/core/parser/utils/Tokenizer.hpp
+++ b/src/core/parser/utils/Tokenizer.hpp
@@ -17,7 +17,7 @@
*/
/**
- * @file DynamicTokenizer.hpp
+ * @file Tokenizer.hpp
*
* Tokenizer that can be reconfigured at runtime used for parsing the plain
* text format.
@@ -43,9 +43,9 @@ namespace ousia {
class CharReader;
/**
- * The DynamicToken structure describes a token discovered by the Tokenizer.
+ * The Token structure describes a token discovered by the Tokenizer.
*/
-struct DynamicToken {
+struct Token {
/**
* Id of the type of this token.
*/
@@ -64,28 +64,28 @@ struct DynamicToken {
/**
* Default constructor.
*/
- DynamicToken() : type(EmptyToken) {}
+ Token() : type(EmptyToken) {}
/**
- * Constructor of the DynamicToken struct.
+ * Constructor of the Token struct.
*
* @param id represents the token type.
* @param content is the string content that has been extracted.
* @param location is the location of the extracted string content in the
* source file.
*/
- DynamicToken(TokenTypeId type, const std::string &content,
+ Token(TokenTypeId type, const std::string &content,
SourceLocation location)
: type(type), content(content), location(location)
{
}
/**
- * Constructor of the DynamicToken struct, only initializes the token type
+ * Constructor of the Token struct, only initializes the token type
*
* @param type is the id corresponding to the type of the token.
*/
- DynamicToken(TokenTypeId type) : type(type) {}
+ Token(TokenTypeId type) : type(type) {}
/**
* The getLocation function allows the tokens to be directly passed as
@@ -97,13 +97,13 @@ struct DynamicToken {
};
/**
- * The DynamicTokenizer is used to extract tokens and chunks of text from a
+ * The Tokenizer is used to extract tokens and chunks of text from a
* CharReader. It allows to register and unregister tokens while parsing and
* to modify the handling of whitespace characters. Note that the
- * DynamicTokenizer always tries to extract the longest possible token from the
+ * Tokenizer always tries to extract the longest possible token from the
* tokenizer.
*/
-class DynamicTokenizer {
+class Tokenizer {
private:
/**
* Internally used token trie. This object holds all registered tokens.
@@ -140,15 +140,15 @@ private:
* @return false if the end of the stream has been reached, true otherwise.
*/
template <typename TextHandler, bool read>
- bool next(CharReader &reader, DynamicToken &token);
+ bool next(CharReader &reader, Token &token);
public:
/**
- * Constructor of the DynamicTokenizer class.
+ * Constructor of the Tokenizer class.
*
* @param whitespaceMode specifies how whitespace should be handled.
*/
- DynamicTokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
+ Tokenizer(WhitespaceMode whitespaceMode = WhitespaceMode::COLLAPSE);
/**
* Registers the given string as a token. Returns a const pointer at a
@@ -201,7 +201,7 @@ public:
/**
* Reads a new token from the CharReader and stores it in the given
- * DynamicToken instance.
+ * Token instance.
*
* @param reader is the CharReader instance from which the data should be
* read.
@@ -210,7 +210,7 @@ public:
* @return true if a token could be read, false if the end of the stream
* has been reached.
*/
- bool read(CharReader &reader, DynamicToken &token);
+ bool read(CharReader &reader, Token &token);
/**
* The peek method does not advance the read position of the char reader,
@@ -223,7 +223,7 @@ public:
* @return true if a token could be read, false if the end of the stream
* has been reached.
*/
- bool peek(CharReader &reader, DynamicToken &token);
+ bool peek(CharReader &reader, Token &token);
};
}