diff options
author | Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de> | 2014-10-31 14:56:13 +0000 |
---|---|---|
committer | benjamin <benjamin@daaaf23c-2e50-4459-9457-1e69db5a47bf> | 2014-10-31 14:56:13 +0000 |
commit | 72c1845961e77f7625db47ebd3de129aa90f4f5d (patch) | |
tree | 790cfaba53fee7b02038bc7513d5bf62b974a4c7 /src/core/utils/Tokenizer.hpp | |
parent | 9e233b3f13daebb9ac4c5cae0da073d0c6f782c0 (diff) |
finished first draft of tokenizer code.
git-svn-id: file:///var/local/svn/basicwriter@90 daaaf23c-2e50-4459-9457-1e69db5a47bf
Diffstat (limited to 'src/core/utils/Tokenizer.hpp')
-rw-r--r-- | src/core/utils/Tokenizer.hpp | 48 |
1 files changed, 32 insertions, 16 deletions
diff --git a/src/core/utils/Tokenizer.hpp b/src/core/utils/Tokenizer.hpp index 24c4f30..924b670 100644 --- a/src/core/utils/Tokenizer.hpp +++ b/src/core/utils/Tokenizer.hpp @@ -21,7 +21,9 @@ #include <istream> #include <map> -#include <queue> +#include <deque> + +#include "BufferedCharReader.hpp" namespace ousia { namespace utils { @@ -44,33 +46,47 @@ public: }; struct Token { - const int tokenId; - const std::string content; - const int column; - const int line; - - Token(int tokenId, std::string content, int column, int line) - : tokenId(tokenId), content(content), column(column), line(line) + int tokenId; + std::string content; + int startColumn; + int startLine; + int endColumn; + int endLine; + + Token(int tokenId, std::string content, int startColumn, int startLine, + int endColumn, int endLine) + : tokenId(tokenId), + content(content), + startColumn(startColumn), + startLine(startLine), + endColumn(endColumn), + endLine(endLine) { } }; +static const int TOKEN_NONE = -1; +static const int TOKEN_TEXT = -2; + class Tokenizer { private: - const std::istream &input; - const TokenTreeNode root; - const std::queue<Token> peekQueue; + BufferedCharReader &input; + const TokenTreeNode &root; + std::deque<Token> peeked; + unsigned int peekCursor = 0; + + bool prepare(); public: - Tokenizer(const TokenTreeNode &root, std::istream &input); + Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); - bool hasNext(); + bool next(Token &t); - const Token &next(); + bool peek(Token &t); - const Token &peek(); + void resetPeek(); - void reset(); + void consumePeek(); }; } } |