From 165cf9a5c6ab03dab64d5eb5a5577f8c216bb832 Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Fri, 14 Nov 2014 17:41:03 +0100 Subject: implemented tokenizer test and started implementing CodeTokenizer under supervision of Maester Stoeckel. --- src/core/utils/CodeTokenizer.hpp | 83 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 src/core/utils/CodeTokenizer.hpp (limited to 'src/core/utils/CodeTokenizer.hpp') diff --git a/src/core/utils/CodeTokenizer.hpp b/src/core/utils/CodeTokenizer.hpp new file mode 100644 index 0000000..f26a74c --- /dev/null +++ b/src/core/utils/CodeTokenizer.hpp @@ -0,0 +1,83 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef _OUSIA_UTILS_CODE_TOKENIZER_HPP_ +#define _OUSIA_UTILS_CODE_TOKENIZER_HPP_ + +#include +#include + +#include "BufferedCharReader.hpp" +#include "Tokenizer.hpp" + +namespace ousia { +namespace utils { + +enum class CodeTokenMode { + STRING_START_END, + LINE_COMMENT, + BLOCK_COMMENT_START, + BLOCK_COMMENT_END, + LINEBREAK, + ESCAPE, + NONE +}; + +struct CodeTokenDescriptor { + CodeTokenMode mode; + int id; + + CodeTokenDescriptor(CodeTokenMode mode, int id) : mode(mode), id(id) {} +}; + + +enum class CodeTokenizerState { + NORMAL, + IN_BLOCK_COMMENT, + IN_LINE_COMMENT, + IN_STRING +}; + +class CodeTokenizer : public Tokenizer { +private: + std::map descriptors; + CodeTokenizerState state; + std::stringstream buf; + Token startToken; + int returnTokenId; + bool escaped = false; + + Token constructToken(const Token& t); + void buffer(const Token& t); + +protected: + bool doPrepare(const Token &t, std::deque &peeked) override; + +public: + bool ignoreComments = false; + + CodeTokenizer(BufferedCharReader &input, const TokenTreeNode &root, + std::map descriptors) + : Tokenizer(input, root), descriptors(descriptors) + { + } +}; +} +} + +#endif -- cgit v1.2.3