From ca2040ba16459f5b3dfe8a86a14680eecea7ef7d Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Fri, 21 Nov 2014 10:51:11 +0100 Subject: According to my lords holy command I enacted his graces order of moving all peasants and lordlings, big and small, young and old, to our common land and inner sanctuary to be safe from outer threats by those usurpers. --- src/core/utils/BufferedCharReader.cpp | 216 ---------------------- src/core/utils/BufferedCharReader.hpp | 240 ------------------------- src/core/utils/CSSParser.cpp | 81 --------- src/core/utils/CSSParser.hpp | 167 ----------------- src/core/utils/CodeTokenizer.cpp | 166 ----------------- src/core/utils/CodeTokenizer.hpp | 130 -------------- src/core/utils/RangeSet.hpp | 326 ---------------------------------- src/core/utils/Tokenizer.cpp | 212 ---------------------- src/core/utils/Tokenizer.hpp | 231 ------------------------ src/core/utils/Utils.cpp | 39 ---- src/core/utils/Utils.hpp | 65 ------- 11 files changed, 1873 deletions(-) delete mode 100644 src/core/utils/BufferedCharReader.cpp delete mode 100644 src/core/utils/BufferedCharReader.hpp delete mode 100644 src/core/utils/CSSParser.cpp delete mode 100644 src/core/utils/CSSParser.hpp delete mode 100644 src/core/utils/CodeTokenizer.cpp delete mode 100644 src/core/utils/CodeTokenizer.hpp delete mode 100644 src/core/utils/RangeSet.hpp delete mode 100644 src/core/utils/Tokenizer.cpp delete mode 100644 src/core/utils/Tokenizer.hpp delete mode 100644 src/core/utils/Utils.cpp delete mode 100644 src/core/utils/Utils.hpp (limited to 'src/core/utils') diff --git a/src/core/utils/BufferedCharReader.cpp b/src/core/utils/BufferedCharReader.cpp deleted file mode 100644 index c13628f..0000000 --- a/src/core/utils/BufferedCharReader.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "BufferedCharReader.hpp" - -namespace ousia { -namespace utils { - -// Constants used within the linebreak statemachine. -static const uint8_t LB_STATE_NONE = 0x00; -static const uint8_t LB_STATE_ONE = 0x01; -static const uint8_t LB_STATE_LF = 0x10; -static const uint8_t LB_STATE_CR = 0x20; -static const uint8_t LB_STATE_MASK_CNT = 0x0F; -static const uint8_t LB_STATE_MASK_TYPE = 0xF0; - -/******************************************************************************* - * Struct BufferedCharReader::ReadCursor - ******************************************************************************/ - -BufferedCharReader::ReadCursor::ReadCursor(const bool destructive) : - destructive(destructive) -{ - reset(); -} - -void BufferedCharReader::ReadCursor::assign(const ReadCursor &cursor) -{ - this->line = cursor.line; - this->column = cursor.column; - this->bufferElem = cursor.bufferElem; - this->bufferPos = cursor.bufferPos; - this->lbState = cursor.lbState; -} - -void BufferedCharReader::ReadCursor::reset() -{ - this->line = 1; - this->column = 1; - this->bufferElem = 0; - this->bufferPos = 0; - this->lbState = LB_STATE_NONE; -} - -/******************************************************************************* - * Class BufferedCharReader - ******************************************************************************/ - -BufferedCharReader::BufferedCharReader() : - readCursor(true), peekCursor(false) -{ - reset(); -} - -void BufferedCharReader::reset() -{ - readCursor.reset(); - peekCursor.reset(); - buffer.clear(); - closed = false; -} - -bool BufferedCharReader::feed(const std::string &data) -{ - // Abort if the BufferedCharReader was closed - if (closed) { - return false; - } - - // Append the data onto the queue - buffer.push_back(data); - return true; -} - -void BufferedCharReader::close() -{ - closed = true; -} - -bool BufferedCharReader::substituteLinebreaks(ReadCursor *cursor, char *c) -{ - // Handle line breaks, inserts breakes after the following character - // combinations: \n, \r, \n\r, \r\n TODO: Change behaviour to \n, \n\r, \r\n - if ((*c == '\n') || (*c == '\r')) { - // Determine the type of the current linebreak character - const uint8_t type = (*c == '\n') ? LB_STATE_LF : LB_STATE_CR; - - // Read the last count and the last type from the state - const uint8_t lastCount = cursor->lbState & LB_STATE_MASK_CNT; - const uint8_t lastType = cursor->lbState & LB_STATE_MASK_TYPE; - - // Set the current linebreak type and counter in the state - cursor->lbState = ((lastCount + 1) & 1) | type; - - // If either this is the first instance of this character or the same - // return character is repeated - if (!lastCount || (lastType == type)) { - *c = '\n'; - return true; - } - return false; - } - - // Find the state - cursor->lbState = LB_STATE_NONE; - return true; -} - -bool BufferedCharReader::readCharacterAtCursor(ReadCursor *cursor, - char *c) -{ - bool hasChar = false; - while (!hasChar) { - // Abort if the current buffer element does not point to a valid entry - // in the buffer -- we must wait until another data block has been fed - // into the buffer - if (cursor->bufferElem >= buffer.size()) { - return false; - } - - // Fetch the current element the peek pointer points to - const std::string &data = buffer[cursor->bufferElem]; - - // Handle the "no data" case -- either in a destructive or - // non-destructive manner. - if (cursor->bufferPos >= data.length()) { - if (cursor->destructive) { - buffer.pop_front(); - } else { - cursor->bufferElem++; - } - cursor->bufferPos = 0; - continue; - } - - // Read the character, advance the buffer position - *c = *(data.data() + cursor->bufferPos); - cursor->bufferPos++; - - // Substitute linebreaks with a single LF (0x0A) - hasChar = substituteLinebreaks(cursor, c); - } - - // Update the position counter - if (*c == '\n') { - cursor->line++; - cursor->column = 1; - } else { - // Ignore UTF-8 continuation bytes - if (!((*c & 0x80) && !(*c & 0x40))) { - cursor->column++; - } - } - - return true; -} - -bool BufferedCharReader::peek(char *c) -{ - return readCharacterAtCursor(&peekCursor, c); -} - -bool BufferedCharReader::read(char *c) -{ - resetPeek(); - return readCharacterAtCursor(&readCursor, c); -} - -void BufferedCharReader::consumePeek() -{ - // Remove all no longer needed buffer elements - for (unsigned int i = 0; i < peekCursor.bufferElem; i++) { - buffer.pop_front(); - } - peekCursor.bufferElem = 0; - - // Copy the peek cursor to the read cursor - readCursor.assign(peekCursor); -} - -void BufferedCharReader::resetPeek() -{ - // Reset the peek cursor to the read cursor - peekCursor.assign(readCursor); -} - -bool BufferedCharReader::atEnd() -{ - if (closed) { - if (buffer.size() <= 0) { - return true; - } else if (buffer.size() == 1) { - return buffer[0].size() == readCursor.bufferPos; - } - } - return false; -} - -} -} - diff --git a/src/core/utils/BufferedCharReader.hpp b/src/core/utils/BufferedCharReader.hpp deleted file mode 100644 index b13cde6..0000000 --- a/src/core/utils/BufferedCharReader.hpp +++ /dev/null @@ -1,240 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ -#define _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ - -#include -#include -#include - -namespace ousia { -namespace utils { - -/** - * The BufferedCharReader class is used for storing incomming data that - * is fed into the pipeline as well as reading/peeking single characters - * from that buffer. Additionally it counts the current column/row - * (with correct handling for UTF-8) and contains an internal state - * machine that handles the detection of linebreaks. - * - * Additionally the BufferedCharReader performs the following tasks: - * 1. Convert the incomming character encoding to UTF-8 (TODO: implement) - * 2. Convert arbitrary linebreaks to a single "\n" - */ -class BufferedCharReader { - -private: - - /** - * The ReadCursor structure is responsible for representing the read - * position within the text an all state machine states belonging to the - * cursor. There are two types of read cursors: destructive and - * non-destructive read cursors. - */ - struct ReadCursor { - /** - * Specifies whether this is a destructive cursor (bytes are discarded - * once they were read from the buffer). - */ - const bool destructive; - - /** - * The line the cursor currently points to. - */ - unsigned int line; - - /** - * The column the cursor currently points to. - */ - unsigned int column; - - /** - * The index of the element in the data buffer we're currently reading - * from. - */ - unsigned int bufferElem; - - /** - * The byte position within this data buffer. - */ - unsigned int bufferPos; - - /** - * State variable used in the internal state machine of the - * line feed detection. - */ - uint8_t lbState; - - /** - * Constructor of the ReadCursor structure. - * - * @param destructive specifies whether the ReadCursor is destructive - * (consumes all read characters, as used in the "read cursor") or - * non-destructive (as used in the "peek cursor"). - */ - ReadCursor(const bool destructive); - - /** - * Copys the data from another ReadCursor without overriding the - * "destructive" flag. - */ - void assign(const ReadCursor &cursor); - - /** - * Resets the cursor without changing the "destructive" flag. - */ - void reset(); - }; - - /** - * Queue containing the data that has been fed into the char reader. - */ - std::deque buffer; - - /** - * The read and the peek cursor. - */ - ReadCursor readCursor, peekCursor; - - /** - * Determines whether the reader has been closed. - */ - bool closed; - - /** - * Substitute any combination of linebreaks in the incomming code with "\n". - * Returns true if the current character is meant as output, false - * otherwise. - */ - bool substituteLinebreaks(ReadCursor *cursor, char *c); - - /** - * Reads a character from the input buffer and advances the given read - * cursor. - * - * @param cursor is a reference to the read cursor that should be used - * for reading. - * @param hasChar is set to true, if a character is available, false if - * no character is available (e.g. because line breaks are substituted or - * the end of a buffer boundary is reached -- in this case this function - * should be called again with the same parameters.) - * @param c is a output parameter, which will be set to the read character. - * @param returns true if there was enough data in the buffer, false - * otherwise. - */ - bool readCharacterAtCursor(ReadCursor *cursor, char *c); - - /** - * Function that is called for each read character -- updates the row and - * column count. - */ - void updatePositionCounters(const char c); - -public: - - /** - * Constructor of the buffered char reader class. - */ - BufferedCharReader(); - - /** - * Resets the reader to its initial state. - */ - void reset(); - - /** - * Feeds new data into the internal buffer of the BufferedCharReader - * class. - * - * @param data is a string containing the data that should be - * appended to the internal buffer. - * @return true if the operation was successful, false otherwise (e.g. - * because the reader is closed). - */ - bool feed(const std::string &data); - - /** - * Marks the end of the input, allowing successors in the pipeline - * to react properly (e.g. creating the end of stream token). - */ - void close(); - - /** - * Peeks a single character. If called multiple times, returns the - * character after the previously peeked character. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool peek(char *c); - - /** - * Reads a character from the input data. If "peek" was called - * beforehand resets the peek pointer. - * - * @param c is a reference to the character to which the result should be - * writtern. - * @return true if the character was successfully read, false if there are - * no more characters to be read in the buffer. - */ - bool read(char *c); - - /** - * Advances the read pointer to the peek pointer -- so if the "peek" - * function was called, "read" will now return the character after - * the last peeked character. - */ - void consumePeek(); - - /** - * Resets the peek pointer to the "read" pointer. - */ - void resetPeek(); - - /** - * Returns true if there are no more characters as the stream was - * closed. - */ - bool atEnd(); - - /** - * Returns the current line (starting with one). - */ - inline int getLine() - { - return readCursor.line; - } - - /** - * Returns the current column (starting with one). - */ - inline int getColumn() - { - return readCursor.column; - } - -}; - -} -} - -#endif /* _OUSIA_UTILS_BUFFERED_CHAR_READER_H_ */ - diff --git a/src/core/utils/CSSParser.cpp b/src/core/utils/CSSParser.cpp deleted file mode 100644 index 1763cc2..0000000 --- a/src/core/utils/CSSParser.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "BufferedCharReader.hpp" -#include "CodeTokenizer.hpp" -#include "Tokenizer.hpp" - -#include "CSSParser.hpp" - -namespace ousia { -namespace utils { - -// CSS code tokens -static const int CURLY_OPEN = 1; -static const int CURLY_CLOSE = 2; -static const int COLON = 3; -static const int SEMICOLON = 4; -static const int HASH = 5; -static const int BRACKET_OPEN = 6; -static const int BRACKET_CLOSE = 7; -static const int PAREN_OPEN = 8; -static const int PAREN_CLOSE = 9; -// comments -static const int COMMENT = 100; -static const int COMMENT_OPEN = 101; -static const int COMMENT_CLOSE = 102; -// strings -static const int STRING = 200; -static const int SINGLE_QUOTE = 201; -static const int DOUBLE_QUOTE = 202; -static const int ESCAPE = 203; -// general syntax -static const int LINEBREAK = 300; - -static const TokenTreeNode CSS_ROOT{{{"{", CURLY_OPEN}, - {"}", CURLY_CLOSE}, - {":", COLON}, - {";", SEMICOLON}, - {"#", HASH}, - {"[", BRACKET_OPEN}, - {"]", BRACKET_CLOSE}, - {"(", PAREN_OPEN}, - {")", PAREN_CLOSE}, - {"/*", COMMENT_OPEN}, - {"*/", COMMENT_CLOSE}, - {"\\", ESCAPE}, - {"\''", SINGLE_QUOTE}, - {"\"", DOUBLE_QUOTE}, - {"\n", LINEBREAK}}}; - -static const std::map CSS_DESCRIPTORS = { - {COMMENT_OPEN, {CodeTokenMode::BLOCK_COMMENT_START, COMMENT}}, - {COMMENT_CLOSE, {CodeTokenMode::BLOCK_COMMENT_END, COMMENT}}, - {SINGLE_QUOTE, {CodeTokenMode::STRING_START_END, STRING}}, - {DOUBLE_QUOTE, {CodeTokenMode::STRING_START_END, STRING}}, - {ESCAPE, {CodeTokenMode::ESCAPE, ESCAPE}}, - {LINEBREAK, {CodeTokenMode::LINEBREAK, LINEBREAK}}}; - -StyleNode CSSParser::parse(BufferedCharReader &input) -{ - CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS}; - tokenizer.ignoreComments = true; - // TODO: implement -} -} -} diff --git a/src/core/utils/CSSParser.hpp b/src/core/utils/CSSParser.hpp deleted file mode 100644 index c8b772d..0000000 --- a/src/core/utils/CSSParser.hpp +++ /dev/null @@ -1,167 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_UTILS_CSS_PARSER_HPP_ -#define _OUSIA_UTILS_CSS_PARSER_HPP_ - -#include -#include -#include -#include - -#include -#include - -#include "BufferedCharReader.hpp" - -namespace ousia { -namespace utils { - -/* - * The Specificity or Precedence of a CSS RuleSet, which decides which - * rules are applied when different RuleSets contain conflicting information. - * - * The Specificity is calculated using the official W3C recommendation - * http://www.w3.org/TR/CSS2/cascade.html#specificity - * - * Note that we do not need to use the integer 'a', since we do not allow - * local style definitions for single nodes. - */ -struct Specificity { - int b; - int c; - int d; - - Specificity(int b, int c, int d) : b(b), c(c), d(d) {} -}; - -bool operator<(const Specificity &x, const Specificity &y) -{ - return std::tie(x.b, x.c, x.d) < std::tie(y.b, y.c, y.d); -} - -bool operator>(const Specificity &x, const Specificity &y) -{ - return std::tie(x.b, x.c, x.d) > std::tie(y.b, y.c, y.d); -} - -bool operator==(const Specificity &x, const Specificity &y) -{ - return std::tie(x.b, x.c, x.d) == std::tie(y.b, y.c, y.d); -} - -class RuleSet : public Managed { -private: - const std::map values; - const Specificity specificity; - -public: - RuleSet(Manager &mgr, std::map values, - Specificity specificity) - : Managed(mgr), values(std::move(values)), specificity(specificity) - { - } - - const std::map &getValues() const - { - return values; - } - - const Specificity &getSpecificity() const { return specificity; } -}; - -class PseudoSelector { -private: - const std::string name; - const std::vector args; - const bool generative; - -public: - PseudoSelector(std::string name, std::vector args, - bool generative) - : name(std::move(name)), args(std::move(args)), generative(generative) - { - } - - const std::string &getName() const { return name; } - - const std::vector &getArgs() const { return args; } - - const bool &isGenerative() const { return generative; } -}; - -enum class SelectionOperator { DESCENDANT, DIRECT_DESCENDANT }; - -class StyleNode : public dom::Node { -public: - class StyleEdge : public Managed { - private: - Owned target; - const SelectionOperator selectionOperator; - - public: - StyleEdge(Manager &mgr, Handle target, - SelectionOperator selectionOperator) - : Managed(mgr), - target(acquire(target)), - selectionOperator(selectionOperator) - { - } - - Rooted getTarget() const { return target; } - - const SelectionOperator &getSelectionOperator() const - { - return selectionOperator; - } - }; - -private: - const PseudoSelector pseudoSelector; - std::vector> edges; - const std::vector> ruleSets; - -public: - StyleNode(Manager &mgr, std::string name, - PseudoSelector pseudoSelector, - const std::vector> &edges, - const std::vector> &ruleSets) - : dom::Node(mgr, std::move(name)), - pseudoSelector(std::move(pseudoSelector)), - edges(acquire(edges)), - ruleSets(acquire(ruleSets)) - { - } - - const PseudoSelector &getPseudoSelector() const { return pseudoSelector; } - - const std::vector> &getEdges() const { return edges; } - - const std::vector> &getRuleSets() const { return ruleSets; } -}; - -class CSSParser { - -private: - -public: - StyleNode parse(BufferedCharReader &input); -}; -} -} -#endif diff --git a/src/core/utils/CodeTokenizer.cpp b/src/core/utils/CodeTokenizer.cpp deleted file mode 100644 index e5b8610..0000000 --- a/src/core/utils/CodeTokenizer.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "CodeTokenizer.hpp" - -namespace ousia { -namespace utils { - -Token CodeTokenizer::constructToken(const Token &t) -{ - std::string content = buf.str(); - buf.str(std::string()); - return Token{returnTokenId, content, startToken.startColumn, - startToken.startLine, t.endColumn, t.endLine}; -} - -void CodeTokenizer::buffer(const Token &t) { buf << t.content; } - -bool CodeTokenizer::doPrepare(const Token &t, std::deque &peeked) -{ - auto it = descriptors.find(t.tokenId); - CodeTokenMode mode = CodeTokenMode::NONE; - if (it != descriptors.end()) { - mode = it->second.mode; - } - - if (t.startLine != t.endLine && mode != CodeTokenMode::LINEBREAK) { - throw TokenizerException( - "We did not expect a multiline token (except linebreaks). Most " - "likely you did not add a linebreak token to your tokenizer!"); - } - - switch (state) { - case CodeTokenizerState::NORMAL: - switch (mode) { - case CodeTokenMode::STRING_START_END: - state = CodeTokenizerState::IN_STRING; - break; - case CodeTokenMode::BLOCK_COMMENT_START: - state = CodeTokenizerState::IN_BLOCK_COMMENT; - break; - case CodeTokenMode::LINE_COMMENT: - state = CodeTokenizerState::IN_LINE_COMMENT; - break; - case CodeTokenMode::LINEBREAK: - peeked.push_back({it->second.id, t.content, t.startColumn, - t.startLine, t.endColumn, t.endLine}); - return true; - default: - if (t.tokenId == TOKEN_TEXT) { - int begin = -1; - for (size_t c = 0; c < t.content.length(); c++) { - bool isWhitespace = - t.content[c] == ' ' || t.content[c] == '\t'; - if (begin < 0) { - // if we have not yet set our beginning, - // we wait for the first - // non-whitespace-character to set it. - if (!isWhitespace) { - begin = c; - } - } else { - // if we have set our beginning, we wait for the - // first whitespace character, which marks the - // end of the current word. - if (isWhitespace) { - peeked.push_back(Token{ - TOKEN_TEXT, - t.content.substr(begin, (int)c - begin), - t.startColumn + begin, t.startLine, - t.startColumn + (int)c, t.endLine}); - begin = -1; - } - } - } - if(begin >= 0){ - peeked.push_back(Token{ - TOKEN_TEXT, - t.content.substr(begin), - t.startColumn + begin, t.startLine, - t.endColumn, t.endLine}); - } - } else { - peeked.push_back(t); - } - return true; - } - startToken = t; - returnTokenId = it->second.id; - return false; - case CodeTokenizerState::IN_LINE_COMMENT: - switch (mode) { - case CodeTokenMode::LINEBREAK: - state = CodeTokenizerState::NORMAL; - if (!ignoreComments) { - peeked.push_back(constructToken(t)); - } - return !ignoreComments; - default: - if (!ignoreComments) { - buffer(t); - } - return false; - } - case CodeTokenizerState::IN_BLOCK_COMMENT: - switch (mode) { - case CodeTokenMode::BLOCK_COMMENT_END: - state = CodeTokenizerState::NORMAL; - if (!ignoreComments) { - peeked.push_back(constructToken(t)); - } - return !ignoreComments; - default: - if (!ignoreComments) { - buffer(t); - } - return false; - } - case CodeTokenizerState::IN_STRING: - switch (mode) { - case CodeTokenMode::ESCAPE: - if (escaped) { - buffer(t); - } - escaped = !escaped; - return false; - case CodeTokenMode::STRING_START_END: - if (escaped) { - buffer(t); - escaped = false; - return false; - } else { - peeked.push_back(constructToken(t)); - state = CodeTokenizerState::NORMAL; - return true; - } - default: - if (escaped) { - // TODO: handle escaped characters? - escaped = false; - } - buffer(t); - return false; - } - } - assert(false); -} -} -} diff --git a/src/core/utils/CodeTokenizer.hpp b/src/core/utils/CodeTokenizer.hpp deleted file mode 100644 index 0fc0862..0000000 --- a/src/core/utils/CodeTokenizer.hpp +++ /dev/null @@ -1,130 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_UTILS_CODE_TOKENIZER_HPP_ -#define _OUSIA_UTILS_CODE_TOKENIZER_HPP_ - -#include -#include - -#include "BufferedCharReader.hpp" -#include "Tokenizer.hpp" - -namespace ousia { -namespace utils { - -/* - * This enum contains all special Token the CodeTokenizer supports, namely: - * - * 1.) An ambigous Tokens - in post programming languages single-quotes ' or - * double-quotes " - to delimit string tokens. - * 2.) A start token for line comments, which would e.g. be // in Java. - * 3.) A start token for a block comment - * 4.) An end token for a block comment. - * 5.) A linebreak token - * 6.) The escape token, which would e.g. be \ in java. - */ -enum class CodeTokenMode { - STRING_START_END, - LINE_COMMENT, - BLOCK_COMMENT_START, - BLOCK_COMMENT_END, - LINEBREAK, - ESCAPE, - NONE -}; - -/** - * A CodeTokenDescriptor defines the id the user likes to have returned for - * a Token of the mode specified, e.g. if you want to get the id 4 for a - * String Token the corresponding CodeTokenDescriptor would be inizialized - * with CodeTokenDescriptor myDesc {CodeTokenMode::STRING_START_END, 4}; - */ -struct CodeTokenDescriptor { - CodeTokenMode mode; - int id; - - CodeTokenDescriptor(CodeTokenMode mode, int id) : mode(mode), id(id) {} -}; - -/** - * The CodeTokenizer is a finite state machine with the states NORMAL, being - * IN_BLOCK_COMMENT, being IN_LINE_COMMENT or being IN_STRING. - */ -enum class CodeTokenizerState { - NORMAL, - IN_BLOCK_COMMENT, - IN_LINE_COMMENT, - IN_STRING -}; - -/** - * The purpose of a CodeTokenizer is to make it easier to parse classical - * programming Code. It adds the following features to a regular Tokenizer: - * 1.) String tokens (e.g. "string" in Java Code) instead of 3 separate tokens - * for the opening delimiter, the text and the closing delimiter. - * 2.) Escaping in String tokens. - * 3.) Comment Tokens (for line comments as well as block comments) - */ -class CodeTokenizer : public Tokenizer { -private: - std::map descriptors; - CodeTokenizerState state; - std::stringstream buf; - Token startToken; - int returnTokenId; - bool escaped = false; - - Token constructToken(const Token &t); - void buffer(const Token &t); - -protected: - bool doPrepare(const Token &t, std::deque &peeked) override; - -public: - /** - * If you do not want comment tokens to be returned you can set this to - * true. - */ - bool ignoreComments = false; - - /** - * - * @param input a BufferedCharReader containing the input for this - *tokenizer, - * as with a regular tokenizer. - * @param root a TokenTreeNode representing the root of the TokenTree. - * Please note that you have to specify all tokenIDs here that you use - * in the descriptors map. - * @param descriptors a map mapping tokenIDs to CodeTokenDescriptors. - * In this way you can specify the meaning of certain Tokens. Say you - * specified the Token "//" with the id 1 in the TokenTree. Then you could - * add the entry "1" with the Mode "LINE_COMMENT" to the descriptors map - * and this CodeTokenizer would recognize the token "//" as starting a - * line comment. - */ - CodeTokenizer(BufferedCharReader &input, const TokenTreeNode &root, - std::map descriptors) - : Tokenizer(input, root), descriptors(descriptors), state(CodeTokenizerState::NORMAL) - { - } -}; -} -} - -#endif diff --git a/src/core/utils/RangeSet.hpp b/src/core/utils/RangeSet.hpp deleted file mode 100644 index 841d476..0000000 --- a/src/core/utils/RangeSet.hpp +++ /dev/null @@ -1,326 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_MODEL_RANGE_SET_HPP_ -#define _OUSIA_MODEL_RANGE_SET_HPP_ - -#include -#include - -namespace ousia { -namespace model { -/** - * The Range structure represents an interval of numerical values of type T. - */ -template -struct Range { - /** - * Start is the start value of the range. - */ - T start; - - /** - * End is the end value of the range (inclusively). - */ - T end; - - /** - * Default constructor of the range class. The range is initialized as - * invalid, with start being set to the maximum possible value of the - * numerical type T, and end being set to the minimum possible value. - */ - Range() : - start(std::numeric_limits::max()), - end(std::numeric_limits::min()) - { - // Do nothing here - } - - /** - * Copies the given start and end value. The given values are not checked - * for validity. Use the "isValid" - * - * @param start is the minimum value the range still covers. - * @param end is the maximum value the range still covers. - */ - Range(const T &start, const T &end) : - start(start), end(end) - { - // Do nothing here - } - - /** - * Creates a range that covers exactly one element, namely the value given - * as parameter n. - */ - Range(const T &n) : - start(n), end(n) - { - // Do nothing here - } - - /** - * Returns true if this range is valid, e.g. its start value is smaller or - * equal to its end value. - * - * @return true if start is smaller or equal to end, false otherwise. - */ - bool isValid() const - { - return start <= end; - } - - /** - * Checks whether the given value lies inside the range. - * - * @param v is the value that is being checked. - * @return true if the value lies within the range, false otherwise. - */ - bool inRange(T v) const - { - return (v >= start) && (v <= end); - } - - /** - * Checks whether the given range overlaps with another range. Not that - * this check is only meaningful if both ranges are valid. - * - * @param r is the range that should be checked for overlapping with this - * range. - */ - bool overlaps(const Range &r) const - { - return (((r.start >= start) || (r.end >= start)) - && ((r.start <= end) || (r.end <= end))); - } - - /** - * Returns true if the two given ranges are neighbours (their limits only - * differ in the smallest representable difference between them). - */ - bool neighbours(const Range &r) const - { - constexpr T eps = std::numeric_limits::is_integer - ? 1 : std::numeric_limits::epsilon(); - return ((r.start > end) && ((r.start - eps) <= end)) - || ((r.end < start) && ((r.end + eps) >= start)); - } - - /** - * Checks whether the given range completely covers this range. - */ - bool coveredBy(const Range &r) const - { - return (r.start <= start) && (r.end >= end); - } - - /** - * Checks whether this range completely covers the given range. - */ - bool covers(const Range &r) const - { - return r.coveredBy(*this); - } - - /** - * Calculates the union of the two ranges -- not that this operation is only - * valid if the ranges overlapp. Use the RangeSet class if you cannot - * guarantee that. - */ - Range merge(const Range &r) const - { - return Range(std::min(start, r.start), std::max(end, r.end)); - } - - /** - * Returns a range that represents the spans the complete set defined by the - * given type T. - */ - static Range typeRange() - { - return Range(std::numeric_limits::min(), - std::numeric_limits::max()); - } - - /** - * Returns a range that represents the spans the complete set defined by the - * given type T up to a given value. - * - * @param till is the value up to which the range should be defined (till is - * included in the set). - */ - static Range typeRangeUntil(const T &till) - { - return Range(std::numeric_limits::min(), till); - } - - /** - * Returns a range that represents the spans the complete set defined by the - * given type T up to a given value. - * - * @param from is the value from which the range should be defined (from is - * included in the set). - */ - static Range typeRangeFrom(const T &from) - { - return Range(from, std::numeric_limits::max()); - } -}; - -/** - * RangeComp is a comperator used to order to sort the ranges within the - * ranges list. Sorts by the start element. - */ -template -struct RangeComp { - bool operator() (const Range& lhs, const Range& rhs) const - { - return lhs.start < rhs.start; - } -}; - -/** - * RangeSet represents a set of ranges of the given numerical type and is thus - * capable of representing any possible subset of the given numerical type T. - */ -template -class RangeSet { - -protected: - /** - * Set of ranges used internally. - */ - std::set, RangeComp> ranges; - - /** - * Returns an iterator to the first element in the ranges list that overlaps - * with the given range. - * - * @param r is the range for which the first overlapping element should be - * found. - * @return an iterator pointing to the first overlapping element or to the - * end of the list if no such element was found. - */ - typename std::set, RangeComp>::iterator firstOverlapping( - const Range &r, const bool allowNeighbours) - { - // Find the element with the next larger start value compared to the - // start value given in r. - auto it = ranges.upper_bound(r); - - // Go back one element - if (it != ranges.begin()) { - it--; - } - - // Iterate until an overlapping element is found - while (!(it->overlaps(r) || (allowNeighbours && it->neighbours(r))) - && (it != ranges.end())) { - it++; - } - return it; - } - -public: - /** - * Calculates the union of this range set and the given range. - * - * @param range is the range that should be merged into this range set. - */ - void merge(Range r) - { - // Calculate a new range that covers both the new range and all old - // ranges in the set -- delete all old elements on the way - auto it = firstOverlapping(r, true); - while ((it->overlaps(r) || it->neighbours(r)) && it != ranges.end()) { - r = r.merge(*it); - it = ranges.erase(it); - } - - // Insert the new range - ranges.insert(r); - } - - /** - * Calculates the union of this range set and the given range set. - * - * @param ranges is another range set for which the union with this set - * should be calculated. - */ - void merge(const RangeSet &s) - { - for (Range &r : s.ranges) { - merge(r); - } - } - - /** - * Checks whether this range set S contains the given range R: - * S u R = R - * (The intersection between R and S equals the given range) - * - * @param r is the range for which the containment should be checked. - * @return true if the above condition is met, false otherwise. - */ - bool contains(const Range &r) - { - auto it = firstOverlapping(r, false); - if (it != ranges.end()) { - return (*it).covers(r); - } - return false; - } - - /** - * Checks whether this range set S1 contains the given range set S2: - * - * @param s is the range for which the containment should be checked. - * @return true if the above condition is met, false otherwise. - */ - bool contains(const RangeSet &s) - { - bool res = true; - for (Range &r : s.ranges) { - res = res && contains(r); - } - return res; - } - - /** - * Empties the set. - */ - void clear() - { - ranges.clear(); - } - - /** - * Returns the current list of ranges as a const reference. - */ - const std::set, RangeComp>& getRanges() - { - return this->ranges; - } - -}; - -} -} - -#endif /* _OUSIA_MODEL_RANGE_SET_HPP_ */ - diff --git a/src/core/utils/Tokenizer.cpp b/src/core/utils/Tokenizer.cpp deleted file mode 100644 index a0ca3aa..0000000 --- a/src/core/utils/Tokenizer.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include "Tokenizer.hpp" - -namespace ousia { -namespace utils { - -static std::map buildChildren( - const std::map &inputs) -{ - std::map children; - std::map> nexts; - - for (auto &e : inputs) { - const std::string &s = e.first; - const int id = e.second; - if (s.empty()) { - continue; - } - char start = s[0]; - const std::string suffix = s.substr(1); - if (nexts.find(start) != nexts.end()) { - nexts[start].insert(std::make_pair(suffix, id)); - } else { - nexts.insert(std::make_pair( - start, std::map{{suffix, id}})); - } - } - - for (auto &n : nexts) { - children.insert(std::make_pair(n.first, TokenTreeNode{n.second})); - } - - return children; -} - -static int buildId(const std::map &inputs) -{ - int tokenId = TOKEN_NONE; - for (auto &e : inputs) { - if (e.first.empty()) { - if (tokenId != TOKEN_NONE) { - throw TokenizerException{std::string{"Ambigous token found: "} + - std::to_string(e.second)}; - } else { - tokenId = e.second; - } - } - } - return tokenId; -} - -TokenTreeNode::TokenTreeNode(const std::map &inputs) - : children(buildChildren(inputs)), tokenId(buildId(inputs)) -{ -} - -Tokenizer::Tokenizer(BufferedCharReader &input, const TokenTreeNode &root) - : input(input), root(root) -{ -} - -bool Tokenizer::prepare() -{ - std::stringstream buffer; - char c; - int startColumn = input.getColumn(); - int startLine = input.getLine(); - bool bufEmpty = true; - while (input.peek(&c)) { - if (root.children.find(c) != root.children.end()) { - // if there might be a special token, keep peeking forward - // until we find the token (or we don't). - TokenTreeNode const *n = &root; - std::stringstream tBuf; - int match = TOKEN_NONE; - while (true) { - tBuf << c; - n = &(n->children.at(c)); - if (n->tokenId != TOKEN_NONE) { - match = n->tokenId; - // from here on we found a token. If we have something - // in our buffer already, we end the search now. - if (!bufEmpty) { - break; - } else { - // if we want to return this token ( = we have nothing - // in our buffer yet) we look greedily for the longest - // possible token we can construct. - input.consumePeek(); - } - } - if (!input.peek(&c)) { - // if we are at the end we break off the search. - break; - } - if (n->children.find(c) == n->children.end()) { - // if we do not find a possible continuation anymore, - // break off the search. - break; - } - } - //reset the peek pointer to the last valid position. - input.resetPeek(); - // check if we did indeed find a special token. - if (match != TOKEN_NONE) { - if (bufEmpty) { - // if we did not have text before, construct that token. - if (doPrepare( - Token{match, tBuf.str(), startColumn, startLine, - input.getColumn(), input.getLine()}, - peeked)) { - return true; - } else { - startColumn = input.getColumn(); - startLine = input.getLine(); - continue; - } - } else { - // otherwise we return the text before the token. - if (doPrepare(Token{TOKEN_TEXT, buffer.str(), startColumn, - startLine, input.getColumn(), - input.getLine()}, - peeked)) { - return true; - } else{ - //we need to clear the buffer here. After all the token - //corresponding to this buffer segment is already - //constructed. - buffer.str(std::string()); - bufEmpty = true; - startColumn = input.getColumn(); - startLine = input.getLine(); - continue; - } - } - } else{ - //if we found nothing, read at least one character. - input.peek(&c); - } - } - buffer << c; - bufEmpty = false; - input.consumePeek(); - } - if (!bufEmpty) { - return doPrepare(Token{TOKEN_TEXT, buffer.str(), startColumn, startLine, - input.getColumn(), input.getLine()}, - peeked); - } - return false; -} - -bool Tokenizer::doPrepare(const Token &t, std::deque &peeked) -{ - peeked.push_back(t); - return true; -} - -bool Tokenizer::next(Token &t) -{ - if (peeked.empty()) { - if (!prepare()) { - return false; - } - } - t = peeked.front(); - peeked.pop_front(); - resetPeek(); - return true; -} - -bool Tokenizer::peek(Token &t) -{ - if (peekCursor >= peeked.size()) { - if (!prepare()) { - return false; - } - } - t = peeked[peekCursor]; - return true; -} - -void Tokenizer::resetPeek() { peekCursor = 0; } - -void Tokenizer::consumePeek() -{ - while (peekCursor > 0) { - peeked.pop_front(); - peekCursor--; - } -} -} -} diff --git a/src/core/utils/Tokenizer.hpp b/src/core/utils/Tokenizer.hpp deleted file mode 100644 index 2debc75..0000000 --- a/src/core/utils/Tokenizer.hpp +++ /dev/null @@ -1,231 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_UTILS_TOKENIZER_HPP_ -#define _OUSIA_UTILS_TOKENIZER_HPP_ - -#include -#include -#include - -#include "BufferedCharReader.hpp" - -namespace ousia { -namespace utils { - -/** - * This exception is currently only thrown if errors are made during the - * initialization of the Tokenizer. Have a closer look at the documentation - * of the TokenTreeNode constructor for more information. - */ -class TokenizerException : public std::exception { -public: - const std::string msg; - - TokenizerException(const std::string &msg) : msg(msg){}; - - virtual const char *what() const noexcept override { return msg.c_str(); } -}; - -/** - * The Tokenizer internally uses a TokenTree to be efficiently able to identify - * the longest consecutive token in the text. This is equivalent to a prefix - * trie. - * - * The TokenTree is a construct that structures all special tokens this - * Tokenizer recognizes. Consider the Tokens "aab", "a" and "aac". Then - * the TokenTree would look like this: - * - * a - * | \ - * a $ - * | \ - * b c - * | | - * $ $ - * - * Every node in the TokenTree is a valid end state that has a $ attached to it. - * During the search algorithm the Tokenizer goes through the tree and stores - * the last valid position. If a character follows that does not lead to a new - * node in the TokenTree the search ends (and starts again at this character). - * The token corresponding to the last valid position is returned. - * - * This allows us to uniquely identify the matching token given a certain - * input text. Note that this is a greedy matching approach that does not - * work if you're using truly ambiguous tokens (that have the same text). - * - * It is also not allowed that tokens have common middle parts but varying - * pre- and suffixes. Consider the example of two tokens "abd" and "bc" and - * the input string "abc". In that case we start looking for "abd" at the - * start, won't find it, wenn we hit "c" and start the scanning process - * anew. Thus the "bc" token is not found. - * - * For most (well-behaved) tokenization schemes this is not the case, - * though. - */ -class TokenTreeNode { -public: - const std::map children; - const int tokenId; - - /** - * The TokenTreeNode constructor builds a TokenTree from the given token - * specifications. The node returned by this constructor then is the root of - * said TokenTree. - * @param inputs Specifications of tokens in map form. Each specification - * is a tuple of the text that should be matched and some unique ID (>= 0) - * that is returned to you if that Token is found in the text. - * An example for such a map would be - * { - * { "#" , 1}, - * { "##", 2}, - * { "/" , 3} - * } - * Note that IDs below zero are reserved for system Ids, mainly TOKEN_NONE - * (-1) and TOKEN_TEXT (-2). - */ - TokenTreeNode(const std::map &inputs); -}; - -/** - * This is a reserved constant for the empty token. - */ -static const int TOKEN_NONE = -1; -/** - * This is a reserved constant for every part of the input text that is not a - * specified token. - */ -static const int TOKEN_TEXT = -2; - -/** - * A token for us is identified by an integer tokenID (either one of the - * constants TOKEN_NONE or TOKEN_TEXT or one of the user-defined constants). - * Additionally we return the matched text (which should only be really - * interesting in case of TOKEN_TEXT tokens) and the position in the input text. - */ -struct Token { - int tokenId; - std::string content; - int startColumn; - int startLine; - int endColumn; - int endLine; - - Token(int tokenId, std::string content, int startColumn, int startLine, - int endColumn, int endLine) - : tokenId(tokenId), - content(content), - startColumn(startColumn), - startLine(startLine), - endColumn(endColumn), - endLine(endLine) - { - } - - Token() : tokenId(TOKEN_NONE) {} -}; - -/** - * A Tokenizer has the purpose of subdividing an input text into tokens. In our - * definition here we distinguish between two kinds of tokens: - * 1.) User-specified tokens that match a fixed text. - * 2.) Any other text between those tokens. - * The user might want to specify the tokens '#{' and '#}' for example, because - * they have some meaning in her code. The user sets the IDs to 1 and 2. - * Given the input text - * "some text #{ special command #} some text" - * the tokenizer would return the tokens: - * 1.) "some text " with the id TOKEN_TEXT (-2). - * 2.) "#{" with the id 1. - * 3.) " special command " with the id TOKEN_TEXT (-2). - * 4.) "#}" with the id 2. - * 5.) " some text" with the id TOKEN_TEXT (-2). - * This makes the subsequent parsing of files of a specific type easier. - * Note that in case of tokens with that are prefixes of other tokens the - * longest possible match is returned. - */ -class Tokenizer { -private: - BufferedCharReader &input; - const TokenTreeNode &root; - std::deque peeked; - unsigned int peekCursor = 0; - - bool prepare(); - -protected: - /** - * This method is an interface to build multiple tokens from a single one in - * derived classes. This might be interesting if you want to implement - * further logic on text tokens or similar applications. - * - * @param t a Token the "basic" tokenizer found. - * @param peeked a reference to the deque containing all temporary Tokens. - * You are supposed to append your tokens there. In the trivial case you just - * put the given Token on top of the deque. - * @return false if no token was appended to the deque (meaning that you want - * to ignore the given token explicitly) and true in all other cases. - */ - virtual bool doPrepare(const Token &t, std::deque &peeked); - -public: - /** - * @param input The input of a Tokenizer is given in the form of a - * BufferedCharReader. Please refer to the respective documentation. - * @param root This is meant to be the root of a TokenTree giving the - * specification of user-defined tokens this Tokenizer should recognize. - * The Tokenizer promises to not change the TokenTree such that you can - * re-use the same specification for multiple inputs. - * Please refer to the TokenTreeNode documentation for more information. - */ - Tokenizer(BufferedCharReader &input, const TokenTreeNode &root); - - /** - * The next method consumes one Token from the input stream and gives - * it to the user (stored in the input argument). - * - * @param t a Token reference that is set to the next found token. - * @return true if a next token was found and false if the input is at its - * end. - */ - bool next(Token &t); - /** - * The peek method does not consume the next Token but buffers it and - * shows it to the user (stored in the input argument). - * - * @param t a Token reference that is set to the next found token. - * @return true if a next token was found and false if the input is at its - * end. - */ - bool peek(Token &t); - - /** - * Resets the peek pointer to the current position in the stream (to the - * beginning of the buffer). - */ - void resetPeek(); - - /** - * Clears the peek buffer, such that all peeked Tokens are consumed. - */ - void consumePeek(); -}; -} -} - -#endif diff --git a/src/core/utils/Utils.cpp b/src/core/utils/Utils.cpp deleted file mode 100644 index 184fdd0..0000000 --- a/src/core/utils/Utils.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "Utils.hpp" - -namespace ousia { - -bool Utils::isIdentifier(const std::string &name) -{ - bool first = true; - for (char c : name) { - if (first && !(isAlphabetic(c) || c == '_')) { - return false; - } - if (first && !(isAlphanumeric(c) || c == '_' || c == '-')) { - return false; - } - first = false; - } - return true; -} - -} - diff --git a/src/core/utils/Utils.hpp b/src/core/utils/Utils.hpp deleted file mode 100644 index 2fcd794..0000000 --- a/src/core/utils/Utils.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - - -#ifndef _OUSIA_UTILS_H_ -#define _OUSIA_UTILS_H_ - -#include - -namespace ousia { - -class Utils { - -public: - - /** - * Returns true if the given character is in [A-Za-z] - */ - static bool isAlphabetic(const char c) - { - return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); - } - - /** - * Returns true if the given character is in [0-9] - */ - static bool isNumeric(const char c) - { - return (c >= '0') && (c <= '9'); - } - - /** - * Returns true if the given character is in [A-Za-z0-9] - */ - static bool isAlphanumeric(const char c) - { - return isAlphabetic(c) || isNumeric(c); - } - - /** - * Returns true if the given character is in [A-Za-z_][A-Za-z0-9_-]* - */ - static bool isIdentifier(const std::string &name); - -}; - -} - -#endif /* _OUSIA_UTILS_H_ */ - -- cgit v1.2.3