summaryrefslogtreecommitdiff
path: root/src/core/CodeTokenizer.cpp
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-15 21:32:54 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-15 21:32:54 +0100
commit8e5e08c4f293434585d2a88f7f331f8ce49b67b9 (patch)
treefa82a937b1ea80f45d7955938c333f68f8a0f3f6 /src/core/CodeTokenizer.cpp
parent2544749215bc2465bfeca431e271110ca86d8a83 (diff)
parent40f4666c43211d9071a827ad8a2524688e7f678f (diff)
Merge branch 'astoecke_parser_stack_new'
Conflicts: application/src/core/parser/stack/DocumentHandler.cpp application/src/core/parser/stack/DocumentHandler.hpp
Diffstat (limited to 'src/core/CodeTokenizer.cpp')
-rw-r--r--src/core/CodeTokenizer.cpp169
1 files changed, 0 insertions, 169 deletions
diff --git a/src/core/CodeTokenizer.cpp b/src/core/CodeTokenizer.cpp
deleted file mode 100644
index d65c514..0000000
--- a/src/core/CodeTokenizer.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <cassert>
-
-#include "CodeTokenizer.hpp"
-
-namespace ousia {
-
-Token CodeTokenizer::constructToken(const Token &t)
-{
- std::string content = buf.str();
- buf.str(std::string());
- return Token{
- returnTokenId, content,
- SourceLocation{t.location.getSourceId(), startToken.location.getStart(),
- t.location.getEnd()}};
-}
-
-void CodeTokenizer::buffer(const Token &t) { buf << t.content; }
-
-bool CodeTokenizer::doPrepare(const Token &t, std::deque<Token> &peeked)
-{
- auto it = descriptors.find(t.tokenId);
- CodeTokenMode mode = CodeTokenMode::NONE;
- if (it != descriptors.end()) {
- mode = it->second.mode;
- }
-
- switch (state) {
- case CodeTokenizerState::NORMAL:
- switch (mode) {
- case CodeTokenMode::STRING_START_END:
- state = CodeTokenizerState::IN_STRING;
- break;
- case CodeTokenMode::BLOCK_COMMENT_START:
- state = CodeTokenizerState::IN_BLOCK_COMMENT;
- break;
- case CodeTokenMode::LINE_COMMENT:
- state = CodeTokenizerState::IN_LINE_COMMENT;
- break;
- case CodeTokenMode::LINEBREAK:
- if (!ignoreLinebreaks) {
- peeked.push_back(
- {it->second.id, t.content, t.location});
- }
- return !ignoreLinebreaks;
- default:
- bool empty = true;
- if (t.tokenId == TOKEN_TEXT) {
- int begin = -1;
- for (size_t c = 0; c < t.content.length(); c++) {
- bool isWhitespace =
- t.content[c] == ' ' || t.content[c] == '\t';
- if (begin < 0) {
- // if we have not yet set our beginning,
- // we wait for the first
- // non-whitespace-character to set it.
- if (!isWhitespace) {
- begin = c;
- }
- } else {
- // if we have set our beginning, we wait for the
- // first whitespace character, which marks the
- // end of the current word.
- if (isWhitespace) {
- peeked.push_back(Token{
- TOKEN_TEXT,
- t.content.substr(begin, (int)c - begin),
- SourceLocation{
- t.location.getSourceId(),
- t.location.getStart() + begin,
- t.location.getStart() + c}});
- begin = -1;
- empty = false;
- }
- }
- }
- if (begin >= 0) {
- peeked.push_back(Token{
- TOKEN_TEXT, t.content.substr(begin),
- SourceLocation{t.location.getSourceId(),
- t.location.getStart() + begin,
- t.location.getEnd()}});
- empty = false;
- }
- } else {
- empty = false;
- peeked.push_back(t);
- }
- return !empty;
- }
- startToken = t;
- returnTokenId = it->second.id;
- return false;
- case CodeTokenizerState::IN_LINE_COMMENT:
- switch (mode) {
- case CodeTokenMode::LINEBREAK:
- state = CodeTokenizerState::NORMAL;
- if (!ignoreComments) {
- peeked.push_back(constructToken(t));
- }
- return !ignoreComments;
- default:
- if (!ignoreComments) {
- buffer(t);
- }
- return false;
- }
- case CodeTokenizerState::IN_BLOCK_COMMENT:
- switch (mode) {
- case CodeTokenMode::BLOCK_COMMENT_END:
- state = CodeTokenizerState::NORMAL;
- if (!ignoreComments) {
- peeked.push_back(constructToken(t));
- }
- return !ignoreComments;
- default:
- if (!ignoreComments) {
- buffer(t);
- }
- return false;
- }
- case CodeTokenizerState::IN_STRING:
- switch (mode) {
- case CodeTokenMode::ESCAPE:
- if (escaped) {
- buffer(t);
- }
- escaped = !escaped;
- return false;
- case CodeTokenMode::STRING_START_END:
- if (escaped) {
- buffer(t);
- escaped = false;
- return false;
- } else {
- peeked.push_back(constructToken(t));
- state = CodeTokenizerState::NORMAL;
- return true;
- }
- default:
- if (escaped) {
- // TODO: handle escaped characters?
- escaped = false;
- }
- buffer(t);
- return false;
- }
- }
- assert(false);
- return false;
-}
-}