diff options
| author | Benjamin Paassen <bpaassen@techfak.uni-bielefeld.de> | 2014-10-31 14:56:13 +0000 | 
|---|---|---|
| committer | benjamin <benjamin@daaaf23c-2e50-4459-9457-1e69db5a47bf> | 2014-10-31 14:56:13 +0000 | 
| commit | 72c1845961e77f7625db47ebd3de129aa90f4f5d (patch) | |
| tree | 790cfaba53fee7b02038bc7513d5bf62b974a4c7 /src/core/utils/Tokenizer.hpp | |
| parent | 9e233b3f13daebb9ac4c5cae0da073d0c6f782c0 (diff) | |
finished first draft of tokenizer code.
git-svn-id: file:///var/local/svn/basicwriter@90 daaaf23c-2e50-4459-9457-1e69db5a47bf
Diffstat (limited to 'src/core/utils/Tokenizer.hpp')
| -rw-r--r-- | src/core/utils/Tokenizer.hpp | 48 | 
1 files changed, 32 insertions, 16 deletions
diff --git a/src/core/utils/Tokenizer.hpp b/src/core/utils/Tokenizer.hpp index 24c4f30..924b670 100644 --- a/src/core/utils/Tokenizer.hpp +++ b/src/core/utils/Tokenizer.hpp @@ -21,7 +21,9 @@  #include <istream>  #include <map> -#include <queue> +#include <deque> + +#include "BufferedCharReader.hpp"  namespace ousia {  namespace utils { @@ -44,33 +46,47 @@ public:  };  struct Token { -	const int tokenId; -	const std::string content; -	const int column; -	const int line; - -	Token(int tokenId, std::string content, int column, int line) -	    : tokenId(tokenId), content(content), column(column), line(line) +	int tokenId; +	std::string content; +	int startColumn; +	int startLine; +	int endColumn; +	int endLine; + +	Token(int tokenId, std::string content, int startColumn, int startLine, +	      int endColumn, int endLine) +	    : tokenId(tokenId), +	      content(content), +	      startColumn(startColumn), +	      startLine(startLine), +	      endColumn(endColumn), +	      endLine(endLine)  	{  	}  }; +static const int TOKEN_NONE = -1; +static const int TOKEN_TEXT = -2; +  class Tokenizer {  private: -	const std::istream &input; -	const TokenTreeNode root; -	const std::queue<Token> peekQueue; +	BufferedCharReader &input; +	const TokenTreeNode &root; +	std::deque<Token> peeked; +	unsigned int peekCursor = 0; + +	bool prepare();  public: -	Tokenizer(const TokenTreeNode &root, std::istream &input); +	Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); -	bool hasNext(); +	bool next(Token &t); -	const Token &next(); +	bool peek(Token &t); -	const Token &peek(); +	void resetPeek(); -	void reset(); +	void consumePeek();  };  }  }  | 
