summaryrefslogtreecommitdiff
path: root/src/core/utils/Tokenizer.hpp
diff options
context:
space:
mode:
authorBenjamin Paassen <bpaassen@techfak.uni-bielefeld.de>2014-10-31 14:56:13 +0000
committerbenjamin <benjamin@daaaf23c-2e50-4459-9457-1e69db5a47bf>2014-10-31 14:56:13 +0000
commit72c1845961e77f7625db47ebd3de129aa90f4f5d (patch)
tree790cfaba53fee7b02038bc7513d5bf62b974a4c7 /src/core/utils/Tokenizer.hpp
parent9e233b3f13daebb9ac4c5cae0da073d0c6f782c0 (diff)
finished first draft of tokenizer code.
git-svn-id: file:///var/local/svn/basicwriter@90 daaaf23c-2e50-4459-9457-1e69db5a47bf
Diffstat (limited to 'src/core/utils/Tokenizer.hpp')
-rw-r--r--src/core/utils/Tokenizer.hpp48
1 files changed, 32 insertions, 16 deletions
diff --git a/src/core/utils/Tokenizer.hpp b/src/core/utils/Tokenizer.hpp
index 24c4f30..924b670 100644
--- a/src/core/utils/Tokenizer.hpp
+++ b/src/core/utils/Tokenizer.hpp
@@ -21,7 +21,9 @@
#include <istream>
#include <map>
-#include <queue>
+#include <deque>
+
+#include "BufferedCharReader.hpp"
namespace ousia {
namespace utils {
@@ -44,33 +46,47 @@ public:
};
struct Token {
- const int tokenId;
- const std::string content;
- const int column;
- const int line;
-
- Token(int tokenId, std::string content, int column, int line)
- : tokenId(tokenId), content(content), column(column), line(line)
+ int tokenId;
+ std::string content;
+ int startColumn;
+ int startLine;
+ int endColumn;
+ int endLine;
+
+ Token(int tokenId, std::string content, int startColumn, int startLine,
+ int endColumn, int endLine)
+ : tokenId(tokenId),
+ content(content),
+ startColumn(startColumn),
+ startLine(startLine),
+ endColumn(endColumn),
+ endLine(endLine)
{
}
};
+static const int TOKEN_NONE = -1;
+static const int TOKEN_TEXT = -2;
+
class Tokenizer {
private:
- const std::istream &input;
- const TokenTreeNode root;
- const std::queue<Token> peekQueue;
+ BufferedCharReader &input;
+ const TokenTreeNode &root;
+ std::deque<Token> peeked;
+ unsigned int peekCursor = 0;
+
+ bool prepare();
public:
- Tokenizer(const TokenTreeNode &root, std::istream &input);
+ Tokenizer(BufferedCharReader &input, const TokenTreeNode &root);
- bool hasNext();
+ bool next(Token &t);
- const Token &next();
+ bool peek(Token &t);
- const Token &peek();
+ void resetPeek();
- void reset();
+ void consumePeek();
};
}
}