From ca2040ba16459f5b3dfe8a86a14680eecea7ef7d Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Fri, 21 Nov 2014 10:51:11 +0100 Subject: According to my lords holy command I enacted his graces order of moving all peasants and lordlings, big and small, young and old, to our common land and inner sanctuary to be safe from outer threats by those usurpers. --- src/core/utils/CodeTokenizer.cpp | 166 --------------------------------------- 1 file changed, 166 deletions(-) delete mode 100644 src/core/utils/CodeTokenizer.cpp (limited to 'src/core/utils/CodeTokenizer.cpp') diff --git a/src/core/utils/CodeTokenizer.cpp b/src/core/utils/CodeTokenizer.cpp deleted file mode 100644 index e5b8610..0000000 --- a/src/core/utils/CodeTokenizer.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "CodeTokenizer.hpp" - -namespace ousia { -namespace utils { - -Token CodeTokenizer::constructToken(const Token &t) -{ - std::string content = buf.str(); - buf.str(std::string()); - return Token{returnTokenId, content, startToken.startColumn, - startToken.startLine, t.endColumn, t.endLine}; -} - -void CodeTokenizer::buffer(const Token &t) { buf << t.content; } - -bool CodeTokenizer::doPrepare(const Token &t, std::deque &peeked) -{ - auto it = descriptors.find(t.tokenId); - CodeTokenMode mode = CodeTokenMode::NONE; - if (it != descriptors.end()) { - mode = it->second.mode; - } - - if (t.startLine != t.endLine && mode != CodeTokenMode::LINEBREAK) { - throw TokenizerException( - "We did not expect a multiline token (except linebreaks). Most " - "likely you did not add a linebreak token to your tokenizer!"); - } - - switch (state) { - case CodeTokenizerState::NORMAL: - switch (mode) { - case CodeTokenMode::STRING_START_END: - state = CodeTokenizerState::IN_STRING; - break; - case CodeTokenMode::BLOCK_COMMENT_START: - state = CodeTokenizerState::IN_BLOCK_COMMENT; - break; - case CodeTokenMode::LINE_COMMENT: - state = CodeTokenizerState::IN_LINE_COMMENT; - break; - case CodeTokenMode::LINEBREAK: - peeked.push_back({it->second.id, t.content, t.startColumn, - t.startLine, t.endColumn, t.endLine}); - return true; - default: - if (t.tokenId == TOKEN_TEXT) { - int begin = -1; - for (size_t c = 0; c < t.content.length(); c++) { - bool isWhitespace = - t.content[c] == ' ' || t.content[c] == '\t'; - if (begin < 0) { - // if we have not yet set our beginning, - // we wait for the first - // non-whitespace-character to set it. - if (!isWhitespace) { - begin = c; - } - } else { - // if we have set our beginning, we wait for the - // first whitespace character, which marks the - // end of the current word. - if (isWhitespace) { - peeked.push_back(Token{ - TOKEN_TEXT, - t.content.substr(begin, (int)c - begin), - t.startColumn + begin, t.startLine, - t.startColumn + (int)c, t.endLine}); - begin = -1; - } - } - } - if(begin >= 0){ - peeked.push_back(Token{ - TOKEN_TEXT, - t.content.substr(begin), - t.startColumn + begin, t.startLine, - t.endColumn, t.endLine}); - } - } else { - peeked.push_back(t); - } - return true; - } - startToken = t; - returnTokenId = it->second.id; - return false; - case CodeTokenizerState::IN_LINE_COMMENT: - switch (mode) { - case CodeTokenMode::LINEBREAK: - state = CodeTokenizerState::NORMAL; - if (!ignoreComments) { - peeked.push_back(constructToken(t)); - } - return !ignoreComments; - default: - if (!ignoreComments) { - buffer(t); - } - return false; - } - case CodeTokenizerState::IN_BLOCK_COMMENT: - switch (mode) { - case CodeTokenMode::BLOCK_COMMENT_END: - state = CodeTokenizerState::NORMAL; - if (!ignoreComments) { - peeked.push_back(constructToken(t)); - } - return !ignoreComments; - default: - if (!ignoreComments) { - buffer(t); - } - return false; - } - case CodeTokenizerState::IN_STRING: - switch (mode) { - case CodeTokenMode::ESCAPE: - if (escaped) { - buffer(t); - } - escaped = !escaped; - return false; - case CodeTokenMode::STRING_START_END: - if (escaped) { - buffer(t); - escaped = false; - return false; - } else { - peeked.push_back(constructToken(t)); - state = CodeTokenizerState::NORMAL; - return true; - } - default: - if (escaped) { - // TODO: handle escaped characters? - escaped = false; - } - buffer(t); - return false; - } - } - assert(false); -} -} -} -- cgit v1.2.3