From 72c1845961e77f7625db47ebd3de129aa90f4f5d Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Fri, 31 Oct 2014 14:56:13 +0000 Subject: finished first draft of tokenizer code. git-svn-id: file:///var/local/svn/basicwriter@90 daaaf23c-2e50-4459-9457-1e69db5a47bf --- src/core/utils/Tokenizer.hpp | 48 +++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'src/core/utils/Tokenizer.hpp') diff --git a/src/core/utils/Tokenizer.hpp b/src/core/utils/Tokenizer.hpp index 24c4f30..924b670 100644 --- a/src/core/utils/Tokenizer.hpp +++ b/src/core/utils/Tokenizer.hpp @@ -21,7 +21,9 @@ #include #include -#include +#include + +#include "BufferedCharReader.hpp" namespace ousia { namespace utils { @@ -44,33 +46,47 @@ public: }; struct Token { - const int tokenId; - const std::string content; - const int column; - const int line; - - Token(int tokenId, std::string content, int column, int line) - : tokenId(tokenId), content(content), column(column), line(line) + int tokenId; + std::string content; + int startColumn; + int startLine; + int endColumn; + int endLine; + + Token(int tokenId, std::string content, int startColumn, int startLine, + int endColumn, int endLine) + : tokenId(tokenId), + content(content), + startColumn(startColumn), + startLine(startLine), + endColumn(endColumn), + endLine(endLine) { } }; +static const int TOKEN_NONE = -1; +static const int TOKEN_TEXT = -2; + class Tokenizer { private: - const std::istream &input; - const TokenTreeNode root; - const std::queue peekQueue; + BufferedCharReader &input; + const TokenTreeNode &root; + std::deque peeked; + unsigned int peekCursor = 0; + + bool prepare(); public: - Tokenizer(const TokenTreeNode &root, std::istream &input); + Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); - bool hasNext(); + bool next(Token &t); - const Token &next(); + bool peek(Token &t); - const Token &peek(); + void resetPeek(); - void reset(); + void consumePeek(); }; } } -- cgit v1.2.3