diff options
Diffstat (limited to 'src/core/utils')
-rw-r--r-- | src/core/utils/BufferedCharReader.cpp | 216 | ||||
-rw-r--r-- | src/core/utils/BufferedCharReader.hpp | 240 | ||||
-rw-r--r-- | src/core/utils/CSSParser.cpp | 81 | ||||
-rw-r--r-- | src/core/utils/CSSParser.hpp | 167 | ||||
-rw-r--r-- | src/core/utils/CodeTokenizer.cpp | 166 | ||||
-rw-r--r-- | src/core/utils/CodeTokenizer.hpp | 130 | ||||
-rw-r--r-- | src/core/utils/RangeSet.hpp | 326 | ||||
-rw-r--r-- | src/core/utils/Tokenizer.cpp | 212 | ||||
-rw-r--r-- | src/core/utils/Tokenizer.hpp | 231 | ||||
-rw-r--r-- | src/core/utils/Utils.cpp | 39 | ||||
-rw-r--r-- | src/core/utils/Utils.hpp | 65 |
11 files changed, 0 insertions, 1873 deletions
diff --git a/src/core/utils/BufferedCharReader.cpp b/src/core/utils/BufferedCharReader.cpp deleted file mode 100644 index c13628f..0000000 --- a/src/core/utils/BufferedCharReader.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include "BufferedCharReader.hpp" - -namespace ousia { -namespace utils { - -// Constants used within the linebreak statemachine. -static const uint8_t LB_STATE_NONE = 0x00; -static const uint8_t LB_STATE_ONE = 0x01; -static const uint8_t LB_STATE_LF = 0x10; -static const uint8_t LB_STATE_CR = 0x20; -static const uint8_t LB_STATE_MASK_CNT = 0x0F; -static const uint8_t LB_STATE_MASK_TYPE = 0xF0; - -/******************************************************************************* - * Struct BufferedCharReader::ReadCursor - ******************************************************************************/ - -BufferedCharReader::ReadCursor::ReadCursor(const bool destructive) : - destructive(destructive) -{ - reset(); -} - -void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) -{ - this->line = cursor.line; - this->column = cursor.column; - this->bufferElem = cursor.bufferElem; - this->bufferPos = cursor.bufferPos; - this->lbState = cursor.lbState; -} - -void BufferedCharReader::ReadCursor::reset() -{ - this->line = 1; - this->column = 1; - this->bufferElem = 0; - this->bufferPos = 0; - this->lbState = LB_STATE_NONE; -} - -/******************************************************************************* - * Class BufferedCharReader - ******************************************************************************/ - -BufferedCharReader::BufferedCharReader() : - readCursor(true), peekCursor(false) -{ - reset(); -} - -void BufferedCharReader::reset() -{ - readCursor.reset(); - peekCursor.reset(); - buffer.clear(); - closed = false; -} - -bool BufferedCharReader::feed(const std::string &data) -{ - // Abort if the BufferedCharReader was closed - if (closed) { - return false; - } - - // Append the data onto the queue - buffer.push_back(data); - return true; -} - -void BufferedCharReader::close() -{ - closed = true; -} - -bool BufferedCharReader::substituteLinebreaks(ReadCursor *cursor, char *c) -{ - // Handle line breaks, inserts breakes after the following character - // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n - if ((*c == '\n') || (*c == '\r')) { - // Determine the type of the current linebreak character - const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; - - // Read the last count and the last type from the state - const uint8_t lastCount = cursor->lbState & LB_STATE_MASK_CNT; - const uint8_t lastType = cursor->lbState & LB_STATE_MASK_TYPE; - - // Set the current linebreak type and counter in the state - cursor->lbState = ((lastCount + 1) & 1) | type; - - // If either this is the first instance of this character or the same - // return character is repeated - if (!lastCount || (lastType == type)) { - *c = '\n'; - return true; - } - return false; - } - - // Find the state - cursor->lbState = LB_STATE_NONE; - return true; -} - -bool BufferedCharReader::readCharacterAtCursor(ReadCursor *cursor, - char *c) -{ - bool hasChar = false; - while (!hasChar) { - // Abort if the current buffer element does not point to a valid entry - // in the buffer -- we must wait until another data block has been fed - // into the buffer - if (cursor->bufferElem >= buffer.size()) { - return false; - } - - // Fetch the current element the peek pointer points to - const std::string &data = buffer[cursor->bufferElem]; - - // Handle the "no data" case -- either in a destructive or - // non-destructive manner. - if (cursor->bufferPos >= data.length()) { - if (cursor->destructive) { - buffer.pop_front(); - } else { - cursor->bufferElem++; - } - cursor->bufferPos = 0; - continue; - } - - // Read the character, advance the buffer position - *c = *(data.data() + cursor->bufferPos); - cursor->bufferPos++; - - // Substitute linebreaks with a single LF (0x0A) - hasChar = substituteLinebreaks(cursor, c); - } - - // Update the position counter - if (*c == '\n') { - cursor->line++; - cursor->column = 1; - } else { - // Ignore UTF-8 continuation bytes - if (!((*c & 0x80) && !(*c & 0x40))) { - cursor->column++; - } - } - - return true; -} - -bool BufferedCharReader::peek(char *c) -{ - return readCharacterAtCursor(&peekCursor, c); -} - -bool BufferedCharReader::read(char *c) -{ - resetPeek(); - return readCharacterAtCursor(&readCursor, c); -} - -void BufferedCharReader::consumePeek() -{ - // Remove all no longer needed buffer elements - for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { - buffer.pop_front(); - } - peekCursor.bufferElem = 0; - - // Copy the peek cursor to the read cursor - readCursor.assign(peekCursor); -} - -void BufferedCharReader::resetPeek() -{ - // Reset the peek cursor to the read cursor - peekCursor.assign(readCursor); -} - -bool BufferedCharReader::atEnd() -{ - if (closed) { - if (buffer.size() <= 0) { - return true; - } else if (buffer.size() == 1) { - return buffer[0].size() == readCursor.bufferPos; - } - } - return false; -} - -} -} - diff --git a/src/core/utils/BufferedCharReader.hpp b/src/core/utils/BufferedCharReader.hpp deleted file mode 100644 index b13cde6..0000000 --- a/src/core/utils/BufferedCharReader.hpp +++ /dev/null @@ -1,240 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ -#define _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ - -#include <deque> -#include <string> -#include <cstdint> - -namespace ousia { -namespace utils { - -/** - * The BufferedCharReader class is used for storing incomming data that - * is fed into the pipeline as well as reading/peeking single characters - * from that buffer. Additionally it counts the current column/row - * (with correct handling for UTF-8) and contains an internal state - * machine that handles the detection of linebreaks. - * - * Additionally the BufferedCharReader performs the following tasks: - * 1. Convert the incomming character encoding to UTF-8 (TODO: implement) - * 2. Convert arbitrary linebreaks to a single "\n" - */ -class BufferedCharReader { - -private: - - /** - * The ReadCursor structure is responsible for representing the read - * position within the text an all state machine states belonging to the - * cursor. There are two types of read cursors: destructive and - * non-destructive read cursors. - */ - struct ReadCursor { - /** - * Specifies whether this is a destructive cursor (bytes are discarded - * once they were read from the buffer). - */ - const bool destructive; - - /** - * The line the cursor currently points to. - */ - unsigned int line; - - /** - * The column the cursor currently points to. - */ - unsigned int column; - - /** - * The index of the element in the data buffer we're currently reading - * from. - */ - unsigned int bufferElem; - - /** - * The byte position within this data buffer. - */ - unsigned int bufferPos; - - /** - * State variable used in the internal state machine of the - * line feed detection. - */ - uint8_t lbState; - - /** - * Constructor of the ReadCursor structure. - * - * @param destructive specifies whether the ReadCursor is destructive - * (consumes all read characters, as used in the "read cursor") or - * non-destructive (as used in the "peek cursor"). - */ - ReadCursor(const bool destructive); - - /** - * Copys the data from another ReadCursor without overriding the - * "destructive" flag. - */ - void assign(const ReadCursor &cursor); - - /** - * Resets the cursor without changing the "destructive" flag. - */ - void reset(); - }; - - /** - * Queue containing the data that has been fed into the char reader. - */ - std::deque<std::string> buffer; - - /** - * The read and the peek cursor. - */ - ReadCursor readCursor, peekCursor; - - /** - * Determines whether the reader has been closed. - */ - bool closed; - - /** - * Substitute any combination of linebreaks in the incomming code with "\n". - * Returns true if the current character is meant as output, false - * otherwise. - */ - bool substituteLinebreaks(ReadCursor *cursor, char *c); - - /** - * Reads a character from the input buffer and advances the given read - * cursor. - * - * @param cursor is a reference to the read cursor that should be used - * for reading. - * @param hasChar is set to true, if a character is available, false if - * no character is available (e.g. because line breaks are substituted or - * the end of a buffer boundary is reached -- in this case this function - * should be called again with the same parameters.) - * @param c is a output parameter, which will be set to the read character. - * @param returns true if there was enough data in the buffer, false - * otherwise. - */ - bool readCharacterAtCursor(ReadCursor *cursor, char *c); - - /** - * Function that is called for each read character -- updates the row and - * column count. - */ - void updatePositionCounters(const char c); - -public: - - /** - * Constructor of the buffered char reader class. - */ - BufferedCharReader(); - - /** - * Resets the reader to its initial state. - */ - void reset(); - - /** - * Feeds new data into the internal buffer of the BufferedCharReader - * class. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - * @return true if the operation was successful, false otherwise (e.g. - * because the reader is closed). - */ - bool feed(const std::string &data); - - /** - * Marks the end of the input, allowing successors in the pipeline - * to react properly (e.g. creating the end of stream token). - */ - void close(); - - /** - * Peeks a single character. If called multiple times, returns the - * character after the previously peeked character. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool peek(char *c); - - /** - * Reads a character from the input data. If "peek" was called - * beforehand resets the peek pointer. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool read(char *c); - - /** - * Advances the read pointer to the peek pointer -- so if the "peek" - * function was called, "read" will now return the character after - * the last peeked character. - */ - void consumePeek(); - - /** - * Resets the peek pointer to the "read" pointer. - */ - void resetPeek(); - - /** - * Returns true if there are no more characters as the stream was - * closed. - */ - bool atEnd(); - - /** - * Returns the current line (starting with one). - */ - inline int getLine() - { - return readCursor.line; - } - - /** - * Returns the current column (starting with one). - */ - inline int getColumn() - { - return readCursor.column; - } - -}; - -} -} - -#endif /* _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ */ - diff --git a/src/core/utils/CSSParser.cpp b/src/core/utils/CSSParser.cpp deleted file mode 100644 index 1763cc2..0000000 --- a/src/core/utils/CSSParser.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include "BufferedCharReader.hpp" -#include "CodeTokenizer.hpp" -#include "Tokenizer.hpp" - -#include "CSSParser.hpp" - -namespace ousia { -namespace utils { - -// CSS code tokens -static const int CURLY_OPEN = 1; -static const int CURLY_CLOSE = 2; -static const int COLON = 3; -static const int SEMICOLON = 4; -static const int HASH = 5; -static const int BRACKET_OPEN = 6; -static const int BRACKET_CLOSE = 7; -static const int PAREN_OPEN = 8; -static const int PAREN_CLOSE = 9; -// comments -static const int COMMENT = 100; -static const int COMMENT_OPEN = 101; -static const int COMMENT_CLOSE = 102; -// strings -static const int STRING = 200; -static const int SINGLE_QUOTE = 201; -static const int DOUBLE_QUOTE = 202; -static const int ESCAPE = 203; -// general syntax -static const int LINEBREAK = 300; - -static const TokenTreeNode CSS_ROOT{{{"{", CURLY_OPEN}, - {"}", CURLY_CLOSE}, - {":", COLON}, - {";", SEMICOLON}, - {"#", HASH}, - {"[", BRACKET_OPEN}, - {"]", BRACKET_CLOSE}, - {"(", PAREN_OPEN}, - {")", PAREN_CLOSE}, - {"/*", COMMENT_OPEN}, - {"*/", COMMENT_CLOSE}, - {"\\", ESCAPE}, - {"\''", SINGLE_QUOTE}, - {"\"", DOUBLE_QUOTE}, - {"\n", LINEBREAK}}}; - -static const std::map<int, CodeTokenDescriptor> CSS_DESCRIPTORS = { - {COMMENT_OPEN, {CodeTokenMode::BLOCK_COMMENT_START, COMMENT}}, - {COMMENT_CLOSE, {CodeTokenMode::BLOCK_COMMENT_END, COMMENT}}, - {SINGLE_QUOTE, {CodeTokenMode::STRING_START_END, STRING}}, - {DOUBLE_QUOTE, {CodeTokenMode::STRING_START_END, STRING}}, - {ESCAPE, {CodeTokenMode::ESCAPE, ESCAPE}}, - {LINEBREAK, {CodeTokenMode::LINEBREAK, LINEBREAK}}}; - -StyleNode CSSParser::parse(BufferedCharReader &input) -{ - CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS}; - tokenizer.ignoreComments = true; - // TODO: implement -} -} -} diff --git a/src/core/utils/CSSParser.hpp b/src/core/utils/CSSParser.hpp deleted file mode 100644 index c8b772d..0000000 --- a/src/core/utils/CSSParser.hpp +++ /dev/null @@ -1,167 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef _OUSIA_UTILS_CSS_PARSER_HPP_ -#define _OUSIA_UTILS_CSS_PARSER_HPP_ - -#include <istream> -#include <map> -#include <vector> -#include <tuple> - -#include <core/Managed.hpp> -#include <core/dom/Node.hpp> - -#include "BufferedCharReader.hpp" - -namespace ousia { -namespace utils { - -/* - * The Specificity or Precedence of a CSS RuleSet, which decides which - * rules are applied when different RuleSets contain conflicting information. - * - * The Specificity is calculated using the official W3C recommendation - * http://www.w3.org/TR/CSS2/cascade.html#specificity - * - * Note that we do not need to use the integer 'a', since we do not allow - * local style definitions for single nodes. - */ -struct Specificity { - int b; - int c; - int d; - - Specificity(int b, int c, int d) : b(b), c(c), d(d) {} -}; - -bool operator<(const Specificity &x, const Specificity &y) -{ - return std::tie(x.b, x.c, x.d) < std::tie(y.b, y.c, y.d); -} - -bool operator>(const Specificity &x, const Specificity &y) -{ - return std::tie(x.b, x.c, x.d) > std::tie(y.b, y.c, y.d); -} - -bool operator==(const Specificity &x, const Specificity &y) -{ - return std::tie(x.b, x.c, x.d) == std::tie(y.b, y.c, y.d); -} - -class RuleSet : public Managed { -private: - const std::map<std::string, std::string> values; - const Specificity specificity; - -public: - RuleSet(Manager &mgr, std::map<std::string, std::string> values, - Specificity specificity) - : Managed(mgr), values(std::move(values)), specificity(specificity) - { - } - - const std::map<std::string, std::string> &getValues() const - { - return values; - } - - const Specificity &getSpecificity() const { return specificity; } -}; - -class PseudoSelector { -private: - const std::string name; - const std::vector<std::string> args; - const bool generative; - -public: - PseudoSelector(std::string name, std::vector<std::string> args, - bool generative) - : name(std::move(name)), args(std::move(args)), generative(generative) - { - } - - const std::string &getName() const { return name; } - - const std::vector<std::string> &getArgs() const { return args; } - - const bool &isGenerative() const { return generative; } -}; - -enum class SelectionOperator { DESCENDANT, DIRECT_DESCENDANT }; - -class StyleNode : public dom::Node { -public: - class StyleEdge : public Managed { - private: - Owned<StyleNode> target; - const SelectionOperator selectionOperator; - - public: - StyleEdge(Manager &mgr, Handle<StyleNode> target, - SelectionOperator selectionOperator) - : Managed(mgr), - target(acquire(target)), - selectionOperator(selectionOperator) - { - } - - Rooted<StyleNode> getTarget() const { return target; } - - const SelectionOperator &getSelectionOperator() const - { - return selectionOperator; - } - }; - -private: - const PseudoSelector pseudoSelector; - std::vector<Owned<StyleEdge>> edges; - const std::vector<Owned<RuleSet>> ruleSets; - -public: - StyleNode(Manager &mgr, std::string name, - PseudoSelector pseudoSelector, - const std::vector<Handle<StyleEdge>> &edges, - const std::vector<Handle<RuleSet>> &ruleSets) - : dom::Node(mgr, std::move(name)), - pseudoSelector(std::move(pseudoSelector)), - edges(acquire(edges)), - ruleSets(acquire(ruleSets)) - { - } - - const PseudoSelector &getPseudoSelector() const { return pseudoSelector; } - - const std::vector<Owned<StyleEdge>> &getEdges() const { return edges; } - - const std::vector<Owned<RuleSet>> &getRuleSets() const { return ruleSets; } -}; - -class CSSParser { - -private: - -public: - StyleNode parse(BufferedCharReader &input); -}; -} -} -#endif diff --git a/src/core/utils/CodeTokenizer.cpp b/src/core/utils/CodeTokenizer.cpp deleted file mode 100644 index e5b8610..0000000 --- a/src/core/utils/CodeTokenizer.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <cassert> - -#include "CodeTokenizer.hpp" - -namespace ousia { -namespace utils { - -Token CodeTokenizer::constructToken(const Token &t) -{ - std::string content = buf.str(); - buf.str(std::string()); - return Token{returnTokenId, content, startToken.startColumn, - startToken.startLine, t.endColumn, t.endLine}; -} - -void CodeTokenizer::buffer(const Token &t) { buf << t.content; } - -bool CodeTokenizer::doPrepare(const Token &t, std::deque<Token> &peeked) -{ - auto it = descriptors.find(t.tokenId); - CodeTokenMode mode = CodeTokenMode::NONE; - if (it != descriptors.end()) { - mode = it->second.mode; - } - - if (t.startLine != t.endLine && mode != CodeTokenMode::LINEBREAK) { - throw TokenizerException( - "We did not expect a multiline token (except linebreaks). Most " - "likely you did not add a linebreak token to your tokenizer!"); - } - - switch (state) { - case CodeTokenizerState::NORMAL: - switch (mode) { - case CodeTokenMode::STRING_START_END: - state = CodeTokenizerState::IN_STRING; - break; - case CodeTokenMode::BLOCK_COMMENT_START: - state = CodeTokenizerState::IN_BLOCK_COMMENT; - break; - case CodeTokenMode::LINE_COMMENT: - state = CodeTokenizerState::IN_LINE_COMMENT; - break; - case CodeTokenMode::LINEBREAK: - peeked.push_back({it->second.id, t.content, t.startColumn, - t.startLine, t.endColumn, t.endLine}); - return true; - default: - if (t.tokenId == TOKEN_TEXT) { - int begin = -1; - for (size_t c = 0; c < t.content.length(); c++) { - bool isWhitespace = - t.content[c] == ' ' || t.content[c] == '\t'; - if (begin < 0) { - // if we have not yet set our beginning, - // we wait for the first - // non-whitespace-character to set it. - if (!isWhitespace) { - begin = c; - } - } else { - // if we have set our beginning, we wait for the - // first whitespace character, which marks the - // end of the current word. - if (isWhitespace) { - peeked.push_back(Token{ - TOKEN_TEXT, - t.content.substr(begin, (int)c - begin), - t.startColumn + begin, t.startLine, - t.startColumn + (int)c, t.endLine}); - begin = -1; - } - } - } - if(begin >= 0){ - peeked.push_back(Token{ - TOKEN_TEXT, - t.content.substr(begin), - t.startColumn + begin, t.startLine, - t.endColumn, t.endLine}); - } - } else { - peeked.push_back(t); - } - return true; - } - startToken = t; - returnTokenId = it->second.id; - return false; - case CodeTokenizerState::IN_LINE_COMMENT: - switch (mode) { - case CodeTokenMode::LINEBREAK: - state = CodeTokenizerState::NORMAL; - if (!ignoreComments) { - peeked.push_back(constructToken(t)); - } - return !ignoreComments; - default: - if (!ignoreComments) { - buffer(t); - } - return false; - } - case CodeTokenizerState::IN_BLOCK_COMMENT: - switch (mode) { - case CodeTokenMode::BLOCK_COMMENT_END: - state = CodeTokenizerState::NORMAL; - if (!ignoreComments) { - peeked.push_back(constructToken(t)); - } - return !ignoreComments; - default: - if (!ignoreComments) { - buffer(t); - } - return false; - } - case CodeTokenizerState::IN_STRING: - switch (mode) { - case CodeTokenMode::ESCAPE: - if (escaped) { - buffer(t); - } - escaped = !escaped; - return false; - case CodeTokenMode::STRING_START_END: - if (escaped) { - buffer(t); - escaped = false; - return false; - } else { - peeked.push_back(constructToken(t)); - state = CodeTokenizerState::NORMAL; - return true; - } - default: - if (escaped) { - // TODO: handle escaped characters? - escaped = false; - } - buffer(t); - return false; - } - } - assert(false); -} -} -} diff --git a/src/core/utils/CodeTokenizer.hpp b/src/core/utils/CodeTokenizer.hpp deleted file mode 100644 index 0fc0862..0000000 --- a/src/core/utils/CodeTokenizer.hpp +++ /dev/null @@ -1,130 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef _OUSIA_UTILS_CODE_TOKENIZER_HPP_ -#define _OUSIA_UTILS_CODE_TOKENIZER_HPP_ - -#include <map> -#include <sstream> - -#include "BufferedCharReader.hpp" -#include "Tokenizer.hpp" - -namespace ousia { -namespace utils { - -/* - * This enum contains all special Token the CodeTokenizer supports, namely: - * - * 1.) An ambigous Tokens - in post programming languages single-quotes ' or - * double-quotes " - to delimit string tokens. - * 2.) A start token for line comments, which would e.g. be // in Java. - * 3.) A start token for a block comment - * 4.) An end token for a block comment. - * 5.) A linebreak token - * 6.) The escape token, which would e.g. be \ in java. - */ -enum class CodeTokenMode { - STRING_START_END, - LINE_COMMENT, - BLOCK_COMMENT_START, - BLOCK_COMMENT_END, - LINEBREAK, - ESCAPE, - NONE -}; - -/** - * A CodeTokenDescriptor defines the id the user likes to have returned for - * a Token of the mode specified, e.g. if you want to get the id 4 for a - * String Token the corresponding CodeTokenDescriptor would be inizialized - * with CodeTokenDescriptor myDesc {CodeTokenMode::STRING_START_END, 4}; - */ -struct CodeTokenDescriptor { - CodeTokenMode mode; - int id; - - CodeTokenDescriptor(CodeTokenMode mode, int id) : mode(mode), id(id) {} -}; - -/** - * The CodeTokenizer is a finite state machine with the states NORMAL, being - * IN_BLOCK_COMMENT, being IN_LINE_COMMENT or being IN_STRING. - */ -enum class CodeTokenizerState { - NORMAL, - IN_BLOCK_COMMENT, - IN_LINE_COMMENT, - IN_STRING -}; - -/** - * The purpose of a CodeTokenizer is to make it easier to parse classical - * programming Code. It adds the following features to a regular Tokenizer: - * 1.) String tokens (e.g. "string" in Java Code) instead of 3 separate tokens - * for the opening delimiter, the text and the closing delimiter. - * 2.) Escaping in String tokens. - * 3.) Comment Tokens (for line comments as well as block comments) - */ -class CodeTokenizer : public Tokenizer { -private: - std::map<int, CodeTokenDescriptor> descriptors; - CodeTokenizerState state; - std::stringstream buf; - Token startToken; - int returnTokenId; - bool escaped = false; - - Token constructToken(const Token &t); - void buffer(const Token &t); - -protected: - bool doPrepare(const Token &t, std::deque<Token> &peeked) override; - -public: - /** - * If you do not want comment tokens to be returned you can set this to - * true. - */ - bool ignoreComments = false; - - /** - * - * @param input a BufferedCharReader containing the input for this - *tokenizer, - * as with a regular tokenizer. - * @param root a TokenTreeNode representing the root of the TokenTree. - * Please note that you have to specify all tokenIDs here that you use - * in the descriptors map. - * @param descriptors a map mapping tokenIDs to CodeTokenDescriptors. - * In this way you can specify the meaning of certain Tokens. Say you - * specified the Token "//" with the id 1 in the TokenTree. Then you could - * add the entry "1" with the Mode "LINE_COMMENT" to the descriptors map - * and this CodeTokenizer would recognize the token "//" as starting a - * line comment. - */ - CodeTokenizer(BufferedCharReader &input, const TokenTreeNode &root, - std::map<int, CodeTokenDescriptor> descriptors) - : Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL) - { - } -}; -} -} - -#endif diff --git a/src/core/utils/RangeSet.hpp b/src/core/utils/RangeSet.hpp deleted file mode 100644 index 841d476..0000000 --- a/src/core/utils/RangeSet.hpp +++ /dev/null @@ -1,326 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef _OUSIA_MODEL_RANGE_SET_HPP_ -#define _OUSIA_MODEL_RANGE_SET_HPP_ - -#include <limits> -#include <set> - -namespace ousia { -namespace model { -/** - * The Range structure represents an interval of numerical values of type T. - */ -template <typename T> -struct Range { - /** - * Start is the start value of the range. - */ - T start; - - /** - * End is the end value of the range (inclusively). - */ - T end; - - /** - * Default constructor of the range class. The range is initialized as - * invalid, with start being set to the maximum possible value of the - * numerical type T, and end being set to the minimum possible value. - */ - Range() : - start(std::numeric_limits<T>::max()), - end(std::numeric_limits<T>::min()) - { - // Do nothing here - } - - /** - * Copies the given start and end value. The given values are not checked - * for validity. Use the "isValid" - * - * @param start is the minimum value the range still covers. - * @param end is the maximum value the range still covers. - */ - Range(const T &start, const T &end) : - start(start), end(end) - { - // Do nothing here - } - - /** - * Creates a range that covers exactly one element, namely the value given - * as parameter n. - */ - Range(const T &n) : - start(n), end(n) - { - // Do nothing here - } - - /** - * Returns true if this range is valid, e.g. its start value is smaller or - * equal to its end value. - * - * @return true if start is smaller or equal to end, false otherwise. - */ - bool isValid() const - { - return start <= end; - } - - /** - * Checks whether the given value lies inside the range. - * - * @param v is the value that is being checked. - * @return true if the value lies within the range, false otherwise. - */ - bool inRange(T v) const - { - return (v >= start) && (v <= end); - } - - /** - * Checks whether the given range overlaps with another range. Not that - * this check is only meaningful if both ranges are valid. - * - * @param r is the range that should be checked for overlapping with this - * range. - */ - bool overlaps(const Range<T> &r) const - { - return (((r.start >= start) || (r.end >= start)) - && ((r.start <= end) || (r.end <= end))); - } - - /** - * Returns true if the two given ranges are neighbours (their limits only - * differ in the smallest representable difference between them). - */ - bool neighbours(const Range<T> &r) const - { - constexpr T eps = std::numeric_limits<T>::is_integer - ? 1 : std::numeric_limits<T>::epsilon(); - return ((r.start > end) && ((r.start - eps) <= end)) - || ((r.end < start) && ((r.end + eps) >= start)); - } - - /** - * Checks whether the given range completely covers this range. - */ - bool coveredBy(const Range<T> &r) const - { - return (r.start <= start) && (r.end >= end); - } - - /** - * Checks whether this range completely covers the given range. - */ - bool covers(const Range<T> &r) const - { - return r.coveredBy(*this); - } - - /** - * Calculates the union of the two ranges -- not that this operation is only - * valid if the ranges overlapp. Use the RangeSet class if you cannot - * guarantee that. - */ - Range<T> merge(const Range<T> &r) const - { - return Range(std::min(start, r.start), std::max(end, r.end)); - } - - /** - * Returns a range that represents the spans the complete set defined by the - * given type T. - */ - static Range<T> typeRange() - { - return Range(std::numeric_limits<T>::min(), - std::numeric_limits<T>::max()); - } - - /** - * Returns a range that represents the spans the complete set defined by the - * given type T up to a given value. - * - * @param till is the value up to which the range should be defined (till is - * included in the set). - */ - static Range<T> typeRangeUntil(const T &till) - { - return Range(std::numeric_limits<T>::min(), till); - } - - /** - * Returns a range that represents the spans the complete set defined by the - * given type T up to a given value. - * - * @param from is the value from which the range should be defined (from is - * included in the set). - */ - static Range<T> typeRangeFrom(const T &from) - { - return Range(from, std::numeric_limits<T>::max()); - } -}; - -/** - * RangeComp is a comperator used to order to sort the ranges within the - * ranges list. Sorts by the start element. - */ -template<typename T> -struct RangeComp { - bool operator() (const Range<T>& lhs, const Range<T>& rhs) const - { - return lhs.start < rhs.start; - } -}; - -/** - * RangeSet represents a set of ranges of the given numerical type and is thus - * capable of representing any possible subset of the given numerical type T. - */ -template<typename T> -class RangeSet { - -protected: - /** - * Set of ranges used internally. - */ - std::set<Range<T>, RangeComp<T>> ranges; - - /** - * Returns an iterator to the first element in the ranges list that overlaps - * with the given range. - * - * @param r is the range for which the first overlapping element should be - * found. - * @return an iterator pointing to the first overlapping element or to the - * end of the list if no such element was found. - */ - typename std::set<Range<T>, RangeComp<T>>::iterator firstOverlapping( - const Range<T> &r, const bool allowNeighbours) - { - // Find the element with the next larger start value compared to the - // start value given in r. - auto it = ranges.upper_bound(r); - - // Go back one element - if (it != ranges.begin()) { - it--; - } - - // Iterate until an overlapping element is found - while (!(it->overlaps(r) || (allowNeighbours && it->neighbours(r))) - && (it != ranges.end())) { - it++; - } - return it; - } - -public: - /** - * Calculates the union of this range set and the given range. - * - * @param range is the range that should be merged into this range set. - */ - void merge(Range<T> r) - { - // Calculate a new range that covers both the new range and all old - // ranges in the set -- delete all old elements on the way - auto it = firstOverlapping(r, true); - while ((it->overlaps(r) || it->neighbours(r)) && it != ranges.end()) { - r = r.merge(*it); - it = ranges.erase(it); - } - - // Insert the new range - ranges.insert(r); - } - - /** - * Calculates the union of this range set and the given range set. - * - * @param ranges is another range set for which the union with this set - * should be calculated. - */ - void merge(const RangeSet<T> &s) - { - for (Range<T> &r : s.ranges) { - merge(r); - } - } - - /** - * Checks whether this range set S contains the given range R: - * S u R = R - * (The intersection between R and S equals the given range) - * - * @param r is the range for which the containment should be checked. - * @return true if the above condition is met, false otherwise. - */ - bool contains(const Range<T> &r) - { - auto it = firstOverlapping(r, false); - if (it != ranges.end()) { - return (*it).covers(r); - } - return false; - } - - /** - * Checks whether this range set S1 contains the given range set S2: - * - * @param s is the range for which the containment should be checked. - * @return true if the above condition is met, false otherwise. - */ - bool contains(const RangeSet<T> &s) - { - bool res = true; - for (Range<T> &r : s.ranges) { - res = res && contains(r); - } - return res; - } - - /** - * Empties the set. - */ - void clear() - { - ranges.clear(); - } - - /** - * Returns the current list of ranges as a const reference. - */ - const std::set<Range<T>, RangeComp<T>>& getRanges() - { - return this->ranges; - } - -}; - -} -} - -#endif /* _OUSIA_MODEL_RANGE_SET_HPP_ */ - diff --git a/src/core/utils/Tokenizer.cpp b/src/core/utils/Tokenizer.cpp deleted file mode 100644 index a0ca3aa..0000000 --- a/src/core/utils/Tokenizer.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <sstream> - -#include "Tokenizer.hpp" - -namespace ousia { -namespace utils { - -static std::map<char, TokenTreeNode> buildChildren( - const std::map<std::string, int> &inputs) -{ - std::map<char, TokenTreeNode> children; - std::map<char, std::map<std::string, int>> nexts; - - for (auto &e : inputs) { - const std::string &s = e.first; - const int id = e.second; - if (s.empty()) { - continue; - } - char start = s[0]; - const std::string suffix = s.substr(1); - if (nexts.find(start) != nexts.end()) { - nexts[start].insert(std::make_pair(suffix, id)); - } else { - nexts.insert(std::make_pair( - start, std::map<std::string, int>{{suffix, id}})); - } - } - - for (auto &n : nexts) { - children.insert(std::make_pair(n.first, TokenTreeNode{n.second})); - } - - return children; -} - -static int buildId(const std::map<std::string, int> &inputs) -{ - int tokenId = TOKEN_NONE; - for (auto &e : inputs) { - if (e.first.empty()) { - if (tokenId != TOKEN_NONE) { - throw TokenizerException{std::string{"Ambigous token found: "} + - std::to_string(e.second)}; - } else { - tokenId = e.second; - } - } - } - return tokenId; -} - -TokenTreeNode::TokenTreeNode(const std::map<std::string, int> &inputs) - : children(buildChildren(inputs)), tokenId(buildId(inputs)) -{ -} - -Tokenizer::Tokenizer(BufferedCharReader &input, const TokenTreeNode &root) - : input(input), root(root) -{ -} - -bool Tokenizer::prepare() -{ - std::stringstream buffer; - char c; - int startColumn = input.getColumn(); - int startLine = input.getLine(); - bool bufEmpty = true; - while (input.peek(&c)) { - if (root.children.find(c) != root.children.end()) { - // if there might be a special token, keep peeking forward - // until we find the token (or we don't). - TokenTreeNode const *n = &root; - std::stringstream tBuf; - int match = TOKEN_NONE; - while (true) { - tBuf << c; - n = &(n->children.at(c)); - if (n->tokenId != TOKEN_NONE) { - match = n->tokenId; - // from here on we found a token. If we have something - // in our buffer already, we end the search now. - if (!bufEmpty) { - break; - } else { - // if we want to return this token ( = we have nothing - // in our buffer yet) we look greedily for the longest - // possible token we can construct. - input.consumePeek(); - } - } - if (!input.peek(&c)) { - // if we are at the end we break off the search. - break; - } - if (n->children.find(c) == n->children.end()) { - // if we do not find a possible continuation anymore, - // break off the search. - break; - } - } - //reset the peek pointer to the last valid position. - input.resetPeek(); - // check if we did indeed find a special token. - if (match != TOKEN_NONE) { - if (bufEmpty) { - // if we did not have text before, construct that token. - if (doPrepare( - Token{match, tBuf.str(), startColumn, startLine, - input.getColumn(), input.getLine()}, - peeked)) { - return true; - } else { - startColumn = input.getColumn(); - startLine = input.getLine(); - continue; - } - } else { - // otherwise we return the text before the token. - if (doPrepare(Token{TOKEN_TEXT, buffer.str(), startColumn, - startLine, input.getColumn(), - input.getLine()}, - peeked)) { - return true; - } else{ - //we need to clear the buffer here. After all the token - //corresponding to this buffer segment is already - //constructed. - buffer.str(std::string()); - bufEmpty = true; - startColumn = input.getColumn(); - startLine = input.getLine(); - continue; - } - } - } else{ - //if we found nothing, read at least one character. - input.peek(&c); - } - } - buffer << c; - bufEmpty = false; - input.consumePeek(); - } - if (!bufEmpty) { - return doPrepare(Token{TOKEN_TEXT, buffer.str(), startColumn, startLine, - input.getColumn(), input.getLine()}, - peeked); - } - return false; -} - -bool Tokenizer::doPrepare(const Token &t, std::deque<Token> &peeked) -{ - peeked.push_back(t); - return true; -} - -bool Tokenizer::next(Token &t) -{ - if (peeked.empty()) { - if (!prepare()) { - return false; - } - } - t = peeked.front(); - peeked.pop_front(); - resetPeek(); - return true; -} - -bool Tokenizer::peek(Token &t) -{ - if (peekCursor >= peeked.size()) { - if (!prepare()) { - return false; - } - } - t = peeked[peekCursor]; - return true; -} - -void Tokenizer::resetPeek() { peekCursor = 0; } - -void Tokenizer::consumePeek() -{ - while (peekCursor > 0) { - peeked.pop_front(); - peekCursor--; - } -} -} -} diff --git a/src/core/utils/Tokenizer.hpp b/src/core/utils/Tokenizer.hpp deleted file mode 100644 index 2debc75..0000000 --- a/src/core/utils/Tokenizer.hpp +++ /dev/null @@ -1,231 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef _OUSIA_UTILS_TOKENIZER_HPP_ -#define _OUSIA_UTILS_TOKENIZER_HPP_ - -#include <istream> -#include <map> -#include <deque> - -#include "BufferedCharReader.hpp" - -namespace ousia { -namespace utils { - -/** - * This exception is currently only thrown if errors are made during the - * initialization of the Tokenizer. Have a closer look at the documentation - * of the TokenTreeNode constructor for more information. - */ -class TokenizerException : public std::exception { -public: - const std::string msg; - - TokenizerException(const std::string &msg) : msg(msg){}; - - virtual const char *what() const noexcept override { return msg.c_str(); } -}; - -/** - * The Tokenizer internally uses a TokenTree to be efficiently able to identify - * the longest consecutive token in the text. This is equivalent to a prefix - * trie. - * - * The TokenTree is a construct that structures all special tokens this - * Tokenizer recognizes. Consider the Tokens "aab", "a" and "aac". Then - * the TokenTree would look like this: - * - * a - * | \ - * a $ - * | \ - * b c - * | | - * $ $ - * - * Every node in the TokenTree is a valid end state that has a $ attached to it. - * During the search algorithm the Tokenizer goes through the tree and stores - * the last valid position. If a character follows that does not lead to a new - * node in the TokenTree the search ends (and starts again at this character). - * The token corresponding to the last valid position is returned. - * - * This allows us to uniquely identify the matching token given a certain - * input text. Note that this is a greedy matching approach that does not - * work if you're using truly ambiguous tokens (that have the same text). - * - * It is also not allowed that tokens have common middle parts but varying - * pre- and suffixes. Consider the example of two tokens "abd" and "bc" and - * the input string "abc". In that case we start looking for "abd" at the - * start, won't find it, wenn we hit "c" and start the scanning process - * anew. Thus the "bc" token is not found. - * - * For most (well-behaved) tokenization schemes this is not the case, - * though. - */ -class TokenTreeNode { -public: - const std::map<char, TokenTreeNode> children; - const int tokenId; - - /** - * The TokenTreeNode constructor builds a TokenTree from the given token - * specifications. The node returned by this constructor then is the root of - * said TokenTree. - * @param inputs Specifications of tokens in map form. Each specification - * is a tuple of the text that should be matched and some unique ID (>= 0) - * that is returned to you if that Token is found in the text. - * An example for such a map would be - * { - * { "#" , 1}, - * { "##", 2}, - * { "/" , 3} - * } - * Note that IDs below zero are reserved for system Ids, mainly TOKEN_NONE - * (-1) and TOKEN_TEXT (-2). - */ - TokenTreeNode(const std::map<std::string, int> &inputs); -}; - -/** - * This is a reserved constant for the empty token. - */ -static const int TOKEN_NONE = -1; -/** - * This is a reserved constant for every part of the input text that is not a - * specified token. - */ -static const int TOKEN_TEXT = -2; - -/** - * A token for us is identified by an integer tokenID (either one of the - * constants TOKEN_NONE or TOKEN_TEXT or one of the user-defined constants). - * Additionally we return the matched text (which should only be really - * interesting in case of TOKEN_TEXT tokens) and the position in the input text. - */ -struct Token { - int tokenId; - std::string content; - int startColumn; - int startLine; - int endColumn; - int endLine; - - Token(int tokenId, std::string content, int startColumn, int startLine, - int endColumn, int endLine) - : tokenId(tokenId), - content(content), - startColumn(startColumn), - startLine(startLine), - endColumn(endColumn), - endLine(endLine) - { - } - - Token() : tokenId(TOKEN_NONE) {} -}; - -/** - * A Tokenizer has the purpose of subdividing an input text into tokens. In our - * definition here we distinguish between two kinds of tokens: - * 1.) User-specified tokens that match a fixed text. - * 2.) Any other text between those tokens. - * The user might want to specify the tokens '#{' and '#}' for example, because - * they have some meaning in her code. The user sets the IDs to 1 and 2. - * Given the input text - * "some text #{ special command #} some text" - * the tokenizer would return the tokens: - * 1.) "some text " with the id TOKEN_TEXT (-2). - * 2.) "#{" with the id 1. - * 3.) " special command " with the id TOKEN_TEXT (-2). - * 4.) "#}" with the id 2. - * 5.) " some text" with the id TOKEN_TEXT (-2). - * This makes the subsequent parsing of files of a specific type easier. - * Note that in case of tokens with that are prefixes of other tokens the - * longest possible match is returned. - */ -class Tokenizer { -private: - BufferedCharReader &input; - const TokenTreeNode &root; - std::deque<Token> peeked; - unsigned int peekCursor = 0; - - bool prepare(); - -protected: - /** - * This method is an interface to build multiple tokens from a single one in - * derived classes. This might be interesting if you want to implement - * further logic on text tokens or similar applications. - * - * @param t a Token the "basic" tokenizer found. - * @param peeked a reference to the deque containing all temporary Tokens. - * You are supposed to append your tokens there. In the trivial case you just - * put the given Token on top of the deque. - * @return false if no token was appended to the deque (meaning that you want - * to ignore the given token explicitly) and true in all other cases. - */ - virtual bool doPrepare(const Token &t, std::deque<Token> &peeked); - -public: - /** - * @param input The input of a Tokenizer is given in the form of a - * BufferedCharReader. Please refer to the respective documentation. - * @param root This is meant to be the root of a TokenTree giving the - * specification of user-defined tokens this Tokenizer should recognize. - * The Tokenizer promises to not change the TokenTree such that you can - * re-use the same specification for multiple inputs. - * Please refer to the TokenTreeNode documentation for more information. - */ - Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); - - /** - * The next method consumes one Token from the input stream and gives - * it to the user (stored in the input argument). - * - * @param t a Token reference that is set to the next found token. - * @return true if a next token was found and false if the input is at its - * end. - */ - bool next(Token &t); - /** - * The peek method does not consume the next Token but buffers it and - * shows it to the user (stored in the input argument). - * - * @param t a Token reference that is set to the next found token. - * @return true if a next token was found and false if the input is at its - * end. - */ - bool peek(Token &t); - - /** - * Resets the peek pointer to the current position in the stream (to the - * beginning of the buffer). - */ - void resetPeek(); - - /** - * Clears the peek buffer, such that all peeked Tokens are consumed. - */ - void consumePeek(); -}; -} -} - -#endif diff --git a/src/core/utils/Utils.cpp b/src/core/utils/Utils.cpp deleted file mode 100644 index 184fdd0..0000000 --- a/src/core/utils/Utils.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include "Utils.hpp" - -namespace ousia { - -bool Utils::isIdentifier(const std::string &name) -{ - bool first = true; - for (char c : name) { - if (first && !(isAlphabetic(c) || c == '_')) { - return false; - } - if (first && !(isAlphanumeric(c) || c == '_' || c == '-')) { - return false; - } - first = false; - } - return true; -} - -} - diff --git a/src/core/utils/Utils.hpp b/src/core/utils/Utils.hpp deleted file mode 100644 index 2fcd794..0000000 --- a/src/core/utils/Utils.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - - -#ifndef _OUSIA_UTILS_H_ -#define _OUSIA_UTILS_H_ - -#include <string> - -namespace ousia { - -class Utils { - -public: - - /** - * Returns true if the given character is in [A-Za-z] - */ - static bool isAlphabetic(const char c) - { - return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); - } - - /** - * Returns true if the given character is in [0-9] - */ - static bool isNumeric(const char c) - { - return (c >= '0') && (c <= '9'); - } - - /** - * Returns true if the given character is in [A-Za-z0-9] - */ - static bool isAlphanumeric(const char c) - { - return isAlphabetic(c) || isNumeric(c); - } - - /** - * Returns true if the given character is in [A-Za-z_][A-Za-z0-9_-]* - */ - static bool isIdentifier(const std::string &name); - -}; - -} - -#endif /* _OUSIA_UTILS_H_ */ - |