diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-15 21:32:54 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-15 21:32:54 +0100 |
commit | 8e5e08c4f293434585d2a88f7f331f8ce49b67b9 (patch) | |
tree | fa82a937b1ea80f45d7955938c333f68f8a0f3f6 /src/core/CodeTokenizer.cpp | |
parent | 2544749215bc2465bfeca431e271110ca86d8a83 (diff) | |
parent | 40f4666c43211d9071a827ad8a2524688e7f678f (diff) |
Merge branch 'astoecke_parser_stack_new'
Conflicts:
application/src/core/parser/stack/DocumentHandler.cpp
application/src/core/parser/stack/DocumentHandler.hpp
Diffstat (limited to 'src/core/CodeTokenizer.cpp')
-rw-r--r-- | src/core/CodeTokenizer.cpp | 169 |
1 files changed, 0 insertions, 169 deletions
diff --git a/src/core/CodeTokenizer.cpp b/src/core/CodeTokenizer.cpp deleted file mode 100644 index d65c514..0000000 --- a/src/core/CodeTokenizer.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <cassert> - -#include "CodeTokenizer.hpp" - -namespace ousia { - -Token CodeTokenizer::constructToken(const Token &t) -{ - std::string content = buf.str(); - buf.str(std::string()); - return Token{ - returnTokenId, content, - SourceLocation{t.location.getSourceId(), startToken.location.getStart(), - t.location.getEnd()}}; -} - -void CodeTokenizer::buffer(const Token &t) { buf << t.content; } - -bool CodeTokenizer::doPrepare(const Token &t, std::deque<Token> &peeked) -{ - auto it = descriptors.find(t.tokenId); - CodeTokenMode mode = CodeTokenMode::NONE; - if (it != descriptors.end()) { - mode = it->second.mode; - } - - switch (state) { - case CodeTokenizerState::NORMAL: - switch (mode) { - case CodeTokenMode::STRING_START_END: - state = CodeTokenizerState::IN_STRING; - break; - case CodeTokenMode::BLOCK_COMMENT_START: - state = CodeTokenizerState::IN_BLOCK_COMMENT; - break; - case CodeTokenMode::LINE_COMMENT: - state = CodeTokenizerState::IN_LINE_COMMENT; - break; - case CodeTokenMode::LINEBREAK: - if (!ignoreLinebreaks) { - peeked.push_back( - {it->second.id, t.content, t.location}); - } - return !ignoreLinebreaks; - default: - bool empty = true; - if (t.tokenId == TOKEN_TEXT) { - int begin = -1; - for (size_t c = 0; c < t.content.length(); c++) { - bool isWhitespace = - t.content[c] == ' ' || t.content[c] == '\t'; - if (begin < 0) { - // if we have not yet set our beginning, - // we wait for the first - // non-whitespace-character to set it. - if (!isWhitespace) { - begin = c; - } - } else { - // if we have set our beginning, we wait for the - // first whitespace character, which marks the - // end of the current word. - if (isWhitespace) { - peeked.push_back(Token{ - TOKEN_TEXT, - t.content.substr(begin, (int)c - begin), - SourceLocation{ - t.location.getSourceId(), - t.location.getStart() + begin, - t.location.getStart() + c}}); - begin = -1; - empty = false; - } - } - } - if (begin >= 0) { - peeked.push_back(Token{ - TOKEN_TEXT, t.content.substr(begin), - SourceLocation{t.location.getSourceId(), - t.location.getStart() + begin, - t.location.getEnd()}}); - empty = false; - } - } else { - empty = false; - peeked.push_back(t); - } - return !empty; - } - startToken = t; - returnTokenId = it->second.id; - return false; - case CodeTokenizerState::IN_LINE_COMMENT: - switch (mode) { - case CodeTokenMode::LINEBREAK: - state = CodeTokenizerState::NORMAL; - if (!ignoreComments) { - peeked.push_back(constructToken(t)); - } - return !ignoreComments; - default: - if (!ignoreComments) { - buffer(t); - } - return false; - } - case CodeTokenizerState::IN_BLOCK_COMMENT: - switch (mode) { - case CodeTokenMode::BLOCK_COMMENT_END: - state = CodeTokenizerState::NORMAL; - if (!ignoreComments) { - peeked.push_back(constructToken(t)); - } - return !ignoreComments; - default: - if (!ignoreComments) { - buffer(t); - } - return false; - } - case CodeTokenizerState::IN_STRING: - switch (mode) { - case CodeTokenMode::ESCAPE: - if (escaped) { - buffer(t); - } - escaped = !escaped; - return false; - case CodeTokenMode::STRING_START_END: - if (escaped) { - buffer(t); - escaped = false; - return false; - } else { - peeked.push_back(constructToken(t)); - state = CodeTokenizerState::NORMAL; - return true; - } - default: - if (escaped) { - // TODO: handle escaped characters? - escaped = false; - } - buffer(t); - return false; - } - } - assert(false); - return false; -} -} |