diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/BufferedCharReader.cpp | 2 | ||||
-rw-r--r-- | src/core/BufferedCharReader.hpp | 6 | ||||
-rw-r--r-- | src/core/CSSParser.cpp | 307 | ||||
-rw-r--r-- | src/core/CSSParser.hpp | 118 | ||||
-rw-r--r-- | src/core/parser/Parser.hpp | 14 | ||||
-rw-r--r-- | src/core/parser/Scope.hpp | 7 | ||||
-rw-r--r-- | src/core/variant/Reader.cpp | 14 | ||||
-rw-r--r-- | src/core/variant/Reader.hpp | 54 |
8 files changed, 37 insertions, 485 deletions
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp index cf481df..23c219a 100644 --- a/src/core/BufferedCharReader.cpp +++ b/src/core/BufferedCharReader.cpp @@ -224,7 +224,7 @@ void BufferedCharReader::resetPeek() peekCursor.assign(readCursor); } -bool BufferedCharReader::atEnd() +bool BufferedCharReader::atEnd() const { if (depleted || !inputStream) { if (buffer.size() <= 0) { diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp index ec76b03..bd19d4a 100644 --- a/src/core/BufferedCharReader.hpp +++ b/src/core/BufferedCharReader.hpp @@ -252,21 +252,21 @@ public: * * @return true if there is no more data. */ - bool atEnd(); + bool atEnd() const; /** * Returns the current line (starting with one). * * @return the current line number. */ - inline int getLine() { return readCursor.line; } + int getLine() const { return readCursor.line; } /** * Returns the current column (starting with one). * * @return the current column number. */ - inline int getColumn() { return readCursor.column; } + int getColumn() const { return readCursor.column; } }; } diff --git a/src/core/CSSParser.cpp b/src/core/CSSParser.cpp deleted file mode 100644 index b762844..0000000 --- a/src/core/CSSParser.cpp +++ /dev/null @@ -1,307 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include "CSSParser.hpp" - -namespace ousia { - -// CSS code tokens -static const int CURLY_OPEN = 1; -static const int CURLY_CLOSE = 2; -static const int COLON = 3; -static const int DOUBLE_COLON = 4; -static const int SEMICOLON = 5; -static const int HASH = 6; -static const int BRACKET_OPEN = 7; -static const int BRACKET_CLOSE = 8; -static const int PAREN_OPEN = 9; -static const int PAREN_CLOSE = 10; -static const int EQUALS = 11; -static const int ARROW = 12; -static const int COMMA = 13; -// comments -static const int COMMENT = 100; -static const int COMMENT_OPEN = 101; -static const int COMMENT_CLOSE = 102; -// strings -static const int STRING = 200; -static const int DOUBLE_QUOTE = 201; -static const int ESCAPE = 202; -// general syntax -static const int LINEBREAK = 300; - -static const TokenTreeNode CSS_ROOT{{{"{", CURLY_OPEN}, - {"}", CURLY_CLOSE}, - {":", COLON}, - {"::", DOUBLE_COLON}, - {";", SEMICOLON}, - {"#", HASH}, - {"[", BRACKET_OPEN}, - {"]", BRACKET_CLOSE}, - {"(", PAREN_OPEN}, - {")", PAREN_CLOSE}, - {"=", EQUALS}, - {">", ARROW}, - {",", COMMA}, - {"/*", COMMENT_OPEN}, - {"*/", COMMENT_CLOSE}, - {"\"", DOUBLE_QUOTE}, - {"\\", ESCAPE}, - // linux linebreak - {"\n", LINEBREAK}, - // windows linebreak - {"\r\n", LINEBREAK}, - // Mac OS linebreak - {"\r", LINEBREAK}}}; - -static const std::map<int, CodeTokenDescriptor> CSS_DESCRIPTORS = { - {COMMENT_OPEN, {CodeTokenMode::BLOCK_COMMENT_START, COMMENT}}, - {COMMENT_CLOSE, {CodeTokenMode::BLOCK_COMMENT_END, COMMENT}}, - {DOUBLE_QUOTE, {CodeTokenMode::STRING_START_END, STRING}}, - {ESCAPE, {CodeTokenMode::ESCAPE, ESCAPE}}, - {LINEBREAK, {CodeTokenMode::LINEBREAK, LINEBREAK}}}; - -Rooted<SelectorNode> CSSParser::parse(BufferedCharReader &input) -{ - CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS}; - tokenizer.ignoreComments = true; - // TODO: Is this the correct way to retrieve the Manager? - Manager mgr; - Rooted<SelectorNode> root = {new SelectorNode{mgr, "root"}}; - parseDocument(root, tokenizer); - return root; -} - -void CSSParser::parseDocument(Rooted<SelectorNode> root, - CodeTokenizer &tokenizer) -{ - Token t; - if (!tokenizer.peek(t)) { - return; - } - tokenizer.resetPeek(); - std::vector<Rooted<SelectorNode>> leafList; - parseSelectors(root, tokenizer, leafList); - // TODO: Parse Ruleset - parseDocument(root, tokenizer); -} - -void CSSParser::parseSelectors(Rooted<SelectorNode> root, - CodeTokenizer &tokenizer, - std::vector<Rooted<SelectorNode>> &leafList) -{ - auto tuple = parseSelector(tokenizer); - // append the SelectorPath to the root node. - std::vector<Rooted<SelectorNode>> unmergedLeafs = - root->append(std::get<0>(tuple)); - // append the leaf to the leafList. - switch (unmergedLeafs.size()) { - case 0: - // if the leaf could be merged we take the leaf reference from the - // parseSelector method. - leafList.push_back(std::get<1>(tuple)); - break; - case 1: - // if the leaf could not be merged we take the existing leaf. - leafList.push_back(unmergedLeafs[0]); - break; - case 2: - // as the parseSelector is supposed to parse only a SelectorPath - // there should not be more than one leaf. - throw LoggableException{ - "Internal Error: More than one leaf in SelectorPath!", true, - tokenizer.getInput()}; - } - // if we find a comma, we can proceed parsing selectors. - Token t; - if (expect(COMMA, tokenizer, t, false)) { - parseSelectors(root, tokenizer, leafList); - } -} - -std::tuple<Rooted<SelectorNode>, Rooted<SelectorNode>> CSSParser::parseSelector( - CodeTokenizer &tokenizer) -{ - Rooted<SelectorNode> s = parsePrimitiveSelector(tokenizer); - Token t; - if (!tokenizer.peek(t)) { - // if we are at the end the found selector is the immediate child as - // well as the leaf. - return std::make_tuple(s, s); - } - switch (t.tokenId) { - case TOKEN_TEXT: { - // if we find text there is a next token in a DESCENDANT - // relationship (A B) - tokenizer.resetPeek(); - // so we parse the rest of the subsequent SelectorPath - auto tuple = parseSelector(tokenizer); - // then we establish the DESCENDANT relationship - // TODO: Is this the correct way to retrieve the Manager? - Manager mgr; - s->getEdges().push_back( - new SelectorNode::SelectorEdge(mgr, std::get<0>(tuple))); - // and we return this node as well as the leaf. - return std::make_tuple(s, std::get<1>(tuple)); - } - case ARROW: { - tokenizer.consumePeek(); - // if we find an arrow there is a next token in a CHILD - // relationship (A > B) - // so we parse the rest of the subsequent SelectorPath - auto tuple = parseSelector(tokenizer); - // then we establish the DESCENDANT relationship - // TODO: Is this the correct way to retrieve the Manager? - Manager mgr; - s->getEdges().push_back(new SelectorNode::SelectorEdge( - mgr, std::get<0>(tuple), SelectionOperator::DIRECT_DESCENDANT)); - // and we return this node as well as the leaf. - return std::make_tuple(s, std::get<1>(tuple)); - } - default: - // everything else is not part of the SelectorPath anymore. - tokenizer.resetPeek(); - return std::make_tuple(s, s); - } -} - -Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer) -{ - // first and foremost we expect a class name. - Token t; - expect(TOKEN_TEXT, tokenizer, t, true); - const std::string name = t.content; - // TODO: Is this the correct way to retrieve the Manager? - Manager mgr; - if (!tokenizer.peek(t)) { - // if we are at the end, we just return this selector with its name. - Rooted<SelectorNode> n{new SelectorNode(mgr, name)}; - return n; - } - - bool isGenerative = false; - - switch (t.tokenId) { - case DOUBLE_COLON: - // if we find a double colon we have a generative PseudoSelector. - isGenerative = true; - case COLON: { - // if we find a colon we have a restrictive PseudoSelector. - tokenizer.consumePeek(); - // get the PseudoSelector name. - expect(TOKEN_TEXT, tokenizer, t, true); - const std::string pseudo_select_name = t.content; - // look for additional arguments. - if (!expect(PAREN_OPEN, tokenizer, t, false)) { - // if we don't have any, we return here. - Rooted<SelectorNode> n{new SelectorNode( - mgr, name, {pseudo_select_name, isGenerative})}; - return n; - } - // parse the argument list. - std::vector<std::string> args; - // we require at least one argument, if parantheses are used - expect(TOKEN_TEXT, tokenizer, t, true); - args.push_back(t.content); - while (expect(COMMA, tokenizer, t, false)) { - // as long as we find commas we expect new arguments. - expect(TOKEN_TEXT, tokenizer, t, true); - args.push_back(t.content); - } - expect(PAREN_CLOSE, tokenizer, t, true); - // and we return with the finished Selector. - Rooted<SelectorNode> n{new SelectorNode( - mgr, name, {pseudo_select_name, args, isGenerative})}; - return n; - } - case HASH: { - // a hash symbol is syntactic sugar for the PseudoSelector - // :has_id(id) - // so we expect an ID now. - Token t; - expect(TOKEN_TEXT, tokenizer, t, true); - std::vector<std::string> args{t.content}; - // and we return the finished Selector - Rooted<SelectorNode> n{ - new SelectorNode(mgr, name, {"has_id", args, false})}; - return n; - } - case BRACKET_OPEN: { - // in case of brackets we have one of two restrictive - // PseudoSelectors - // has_attribute ([attribute_name]) - // or - // has_value [attribute_name="value"] - // in both cases the attribute name comes first. - Token t; - expect(TOKEN_TEXT, tokenizer, t, true); - std::vector<std::string> args{t.content}; - if (!expect(EQUALS, tokenizer, t, false)) { - // if no equals sign follows we have a has_attribute - // PseudoSelector - // we expect a closing bracket. - expect(BRACKET_CLOSE, tokenizer, t, true); - // and then we can return the result. - Rooted<SelectorNode> n{new SelectorNode( - mgr, name, {"has_attribute", args, false})}; - return n; - } else { - // with an equals sign we have a has_value PseudoSelector and - // expect the value next. - expect(STRING, tokenizer, t, true); - args.push_back(t.content); - // then we expect a closing bracket. - expect(BRACKET_CLOSE, tokenizer, t, true); - // and then we can return the result. - Rooted<SelectorNode> n{ - new SelectorNode(mgr, name, {"has_value", args, false})}; - return n; - } - } - default: - // everything else is not part of the Selector anymore. - tokenizer.resetPeek(); - Rooted<SelectorNode> n{new SelectorNode(mgr, name)}; - return n; - } -} - -// TODO: Add RuleSet parsing methods. - -bool CSSParser::expect(int expectedType, CodeTokenizer &tokenizer, Token &t, - bool force) -{ - bool end = !tokenizer.peek(t); - if (end || t.tokenId != expectedType) { - if (force) { - if (end) { - throw LoggableException{"Unexpected end of file!", true, - tokenizer.getInput()}; - } else { - throw LoggableException{"Unexpected token!", true, - tokenizer.getInput()}; - } - } else { - tokenizer.resetPeek(); - return false; - } - } - tokenizer.consumePeek(); - return true; -} -} diff --git a/src/core/CSSParser.hpp b/src/core/CSSParser.hpp deleted file mode 100644 index 7dfc872..0000000 --- a/src/core/CSSParser.hpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef _OUSIA_CSS_PARSER_HPP_ -#define _OUSIA_CSS_PARSER_HPP_ - -#include <vector> -#include <tuple> - -#include "BufferedCharReader.hpp" -#include "CodeTokenizer.hpp" -#include "CSS.hpp" -#include "Exceptions.hpp" - -namespace ousia { - -/** - * This is a context free, recursive parser for a subset of the CSS3 language - * as defined by W3C. We allow the following grammar: - * - * DOC := SELECT RULESET DOC | epsilon - * SELECTORS := SELECT , SELECTORS | SELECT - * SELECT := SELECT' OPERATOR SELECT | SELECT' - * SELECT' := TYPE | TYPE:PSEUDO | TYPE::GEN_PSEUDO | - * TYPE:PSEUDO(ARGUMENTS) | - * TYPE::GEN_PSEUDO(ARGUMENTS) | TYPE#ID | - * TYPE[ATTRIBUTE] | TYPE[ATTRIBUTE=VALUE] - * TYPE := string - * PSEUDO := string - * GEN_PSEUDO := string - * ARGUMENTS := string , ARGUMENTS - * ID := string - * ATTRIBUTE := string - * VALUE := string - * OPERATOR := epsilon | > - * RULESET := epsilon | { RULES } - * RULES := RULE RULES | epsilon - * RULE := KEY : VALUE ; - * KEY := string - * VALUE := type-specific parser - * - * - * @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de - */ -class CSSParser { -private: - /** - * Implements the DOC Nonterminal - */ - void parseDocument(Rooted<SelectorNode> root, CodeTokenizer &tokenizer); - /** - * Implements the SELECTORS Nonterminal and adds all leaf nodes of the - * resulting SelectorTree to the input leafList so that a parsed RuleSet can - * be inserted there. - */ - void parseSelectors(Rooted<SelectorNode> root, CodeTokenizer &tokenizer, - std::vector<Rooted<SelectorNode>> &leafList); - /** - * Implements the SELECT Nonterminal, which in effect parses a SelectorPath - * of the SelectorTree and returns the beginning node of the path as first - * element as well as the leaf of the path as second tuple element. - */ - std::tuple<Rooted<SelectorNode>, Rooted<SelectorNode>> parseSelector( - CodeTokenizer &tokenizer); - - /** - * Implements the SELECT' Nonterminal, which parses a single Selector with - * its PseudoSelector and returns it. - */ - Rooted<SelectorNode> parsePrimitiveSelector(CodeTokenizer &tokenizer); - - // TODO: Add RuleSet parsing methods. - - /** - * A convenience function to wrap around the tokenizer peek() function that - * only returns true if an instance of the expected type occurs. - * - * @param expectedType the ID of the expected type according to the - * CodeTokenizer specification. - * @param tokenizer the tokenizer for the input. - * @param t an empty token that gets the parsed token content - * if it has the expected type. - * @param force a flag to be set if it would be fatal for the - * parsing process to get the wrong type. In that case - * an exception is thrown. - * @return true iff a token of the expected type was found. - */ - bool expect(int expectedType, CodeTokenizer &tokenizer, Token &t, - bool force); - -public: - /** - * This parses the given input as CSS content as specified by the grammar - * seen above. The return value is a Rooted reference to the root of the - * SelectorTree. - * TODO: The RuleSet at the respective node at the tree lists all CSS Style - * rules that apply. - */ - Rooted<SelectorNode> parse(BufferedCharReader &input); -}; -} - -#endif diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index fa5dd49..5dac956 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -70,6 +70,10 @@ struct ParserContext { * Reference to the Logger the parser should log any messages to. */ Logger &logger; + /** + * Reference to the Manager the parser should append nodes to. + */ + Manager &manager; /** * Constructor of the ParserContext class. @@ -81,9 +85,12 @@ struct ParserContext { * implementations. * @param logger is a reference to the Logger instance that should be used * to log error messages and warnings that occur while parsing the document. + * @param manager is a Reference to the Manager the parser should append + *nodes to. */ - ParserContext(Scope &scope, Registry ®istry, Logger &logger) - : scope(scope), registry(registry), logger(logger){}; + ParserContext(Scope &scope, Registry ®istry, Logger &logger, + Manager &manager) + : scope(scope), registry(registry), logger(logger), manager(manager){}; }; struct StandaloneParserContext : public ParserContext { @@ -91,10 +98,11 @@ private: Logger logger; Scope scope; Registry registry; + Manager manager; public: StandaloneParserContext() - : ParserContext(scope, registry, logger), + : ParserContext(scope, registry, logger, manager), scope(nullptr), registry(logger){}; }; diff --git a/src/core/parser/Scope.hpp b/src/core/parser/Scope.hpp index 9c5504f..5b19b3d 100644 --- a/src/core/parser/Scope.hpp +++ b/src/core/parser/Scope.hpp @@ -55,7 +55,7 @@ public: * Creates a new ScopedScope instance. * * @param scope is the backing Scope instance. - * @param node is the Node instance that should be poped onto the stack of + * @param node is the Node instance that should be pushed onto the stack of * the Scope instance. */ ScopedScope(Scope *scope, Handle<Node> node); @@ -108,11 +108,6 @@ public: Scope(Handle<Node> rootNode) { nodes.push_back(rootNode); } /** - * Returns a reference at the Manager instance all nodes belong to. - */ - Manager &getManager() { return getRoot()->getManager(); } - - /** * Pushes a new node onto the scope. * * @param node is the node that should be used for local lookup. diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp index 6142ecf..e9a58a1 100644 --- a/src/core/variant/Reader.cpp +++ b/src/core/variant/Reader.cpp @@ -26,12 +26,17 @@ namespace ousia { namespace variant { +static const char *ERR_UNEXPECTED_CHARACTER = "Unexpected character"; +static const char *ERR_UNEXPECTED_END = "Unexpected end"; +static const char *ERR_UNTERMINATED = "Unterminated literal"; + static const int STATE_INIT = 0; static const int STATE_IN_STRING = 1; static const int STATE_ESCAPE = 2; static std::pair<Err, std::string> parseString( - BufferedCharReader &reader, const unordered_set<char> *delims = nullptr) + BufferedCharReader &reader, const unordered_set<char> *delims = nullptr, + Logger *logger = nullptr) { // Initialize the internal state Err errCode = Err::OK; @@ -51,9 +56,13 @@ static std::pair<Err, std::string> parseString( quote = c; state = STATE_IN_STRING; } else if (delims && delims.count(c)) { + Logger.log(ERR_UNTERMINATED, reader); return std::make_pair(Err::UNEXPECTED_END, res.str()); + } else if (Utils::isWhitespace(c)) { + reader.consumePeek(); + continue; } - reader.consumePeek(); + return std::make_pair(Err::UNEXPECTED_CHARACTER, res.str()); break; case STATE_IN_STRING: if (c == q) { @@ -171,7 +180,6 @@ static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader, } return std::make_pair(Err::UNEXPECTED_END, res.str()); } - } } diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp index 3f945f0..339127f 100644 --- a/src/core/variant/Reader.hpp +++ b/src/core/variant/Reader.hpp @@ -32,6 +32,7 @@ #include <utility> #include <core/BufferedCharReader.hpp> +#include <core/Logger.hpp> #include "Variant.hpp" @@ -40,44 +41,6 @@ namespace variant { class Reader { public: - // TODO: Pass logger instance instead of using error codes? - - /** - * The Err enum describes possible error codes that may be encountered when - * parsing the microtypes. - */ - enum class Err : int { - /** - * Reached the end of the stream, but expected more data. - */ - ERR_UNEXPECTED_END = -1, - - /** - * The stream is malformed. - */ - ERR_MALFORMED = -2, - - /** - * Unexpected character. - */ - ERR_UNEXPECTED_CHARACTER = -3, - - /** - * Unterminated literal. - */ - ERR_UNTERMINATED = -4, - - /** - * Invalid escape character. - */ - ERR_INVALID_ESCAPE = -5, - - /** - * A value of the requested type was extracted successfully. - */ - OK = 0 - }; - /** * Parses a string which may either be enclosed by " or ', unescapes * entities in the string as specified for JavaScript. @@ -91,9 +54,10 @@ public: * outside). If nullptr is given, no delimiter is used and a complete string * is read. */ - static std::pair<Err, std::string> parseString( + static std::pair<bool, std::string> parseString( BufferedCharReader &reader, - const unordered_set<char> *delims = nullptr); + const unordered_set<char> *delims = nullptr, + Logger *logger = nullptr); /** * Extracts an unescaped string from the given buffered char reader @@ -106,8 +70,9 @@ public: * @param delims is a set of characters which will terminate the string. * These characters are not included in the result. May not be nullptr. */ - static std::pair<Err, std::string> parseUnescapedString( - BufferedCharReader &reader, const unordered_set<char> *delims); + static std::pair<bool, std::string> parseUnescapedString( + BufferedCharReader &reader, const unordered_set<char> *delims, + Logger *logger = nullptr); /** * Tries to parse the most specific item from the given stream until one of @@ -120,8 +85,9 @@ public: * @param delims is a set of characters which will terminate the string. * These characters are not included in the result. May not be nullptr. */ - static std::pair<Err, Variant> parseGeneric( - BufferedCharReader &reader, const unordered_set<char> *delims); + static std::pair<bool, Variant> parseGeneric( + BufferedCharReader &reader, const unordered_set<char> *delims, + Logger *logger = nullptr); }; } } |