From d98fc013878ab28cb062a4f25a45199b9ee9574a Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Fri, 5 Dec 2014 12:18:02 +0100 Subject: Made the CSSParser a valid subclass of Parser. --- test/core/CSSParserTest.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'test/core/CSSParserTest.cpp') diff --git a/test/core/CSSParserTest.cpp b/test/core/CSSParserTest.cpp index a717093..c231092 100644 --- a/test/core/CSSParserTest.cpp +++ b/test/core/CSSParserTest.cpp @@ -18,14 +18,18 @@ #include +#include + #include namespace ousia { +namespace parser { +namespace css { TEST(CSSParser, testParseSelectors) { - // create a selector Tree as input. - BufferedCharReader reader; - reader.feed("A>B,A B:r, C#a A[bla=\"blub\"], A::g(4,2,3)"); + // create a string describing a SelectorTree as input. + std::stringstream input; + input << "A>B,A B:r, C#a A[bla=\"blub\"], A::g(4,2,3)"; /* This should describe the tree: * root_____ * | \ \ @@ -34,9 +38,12 @@ TEST(CSSParser, testParseSelectors) * B B::r A[bla="blub"] */ + // initialize an empty parser context. + StandaloneParserContext ctx; + // parse the input. CSSParser instance; - Rooted root = instance.parse(reader); + Rooted root = instance.parse(input, ctx).cast(); // we expect three children of the root node overall. ASSERT_EQ(3, root->getEdges().size()); @@ -109,4 +116,5 @@ TEST(CSSParser, testParseSelectors) ASSERT_EQ(0, Ag->getEdges().size()); } } - +} +} -- cgit v1.2.3 From 0a4fbfe17951498f9a5f650f6da81d90f4a533b4 Mon Sep 17 00:00:00 2001 From: Benjamin Paassen Date: Fri, 5 Dec 2014 12:35:58 +0100 Subject: refactored CSSParser into plugins. --- CMakeLists.txt | 20 ++- src/core/CSSParser.cpp | 319 ------------------------------------- src/core/CSSParser.hpp | 142 ----------------- src/plugins/css/CSSParser.cpp | 319 +++++++++++++++++++++++++++++++++++++ src/plugins/css/CSSParser.hpp | 140 ++++++++++++++++ test/core/CSSParserTest.cpp | 120 -------------- test/plugins/css/CSSParserTest.cpp | 120 ++++++++++++++ 7 files changed, 597 insertions(+), 583 deletions(-) delete mode 100644 src/core/CSSParser.cpp delete mode 100644 src/core/CSSParser.hpp create mode 100644 src/plugins/css/CSSParser.cpp create mode 100644 src/plugins/css/CSSParser.hpp delete mode 100644 test/core/CSSParserTest.cpp create mode 100644 test/plugins/css/CSSParserTest.cpp (limited to 'test/core/CSSParserTest.cpp') diff --git a/CMakeLists.txt b/CMakeLists.txt index ceae5d1..3e469a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,7 +100,6 @@ ADD_LIBRARY(ousia_core src/core/BufferedCharReader src/core/CodeTokenizer src/core/CSS - src/core/CSSParser src/core/Exceptions src/core/Logger src/core/Managed @@ -118,6 +117,14 @@ ADD_LIBRARY(ousia_core src/core/variant/Variant ) +ADD_LIBRARY(ousia_css + src/plugins/css/CSSParser +) + +TARGET_LINK_LIBRARIES(ousia_css + ousia_core +) + ADD_LIBRARY(ousia_xml src/plugins/xml/XmlParser ) @@ -147,7 +154,6 @@ IF(TEST) test/core/BufferedCharReaderTest test/core/CodeTokenizerTest test/core/CSSTest - test/core/CSSParserTest test/core/LoggerTest test/core/ManagedTest test/core/ManagedContainersTest @@ -167,6 +173,16 @@ IF(TEST) ousia_core ) + ADD_EXECUTABLE(ousia_test_css + test/plugins/css/CSSParserTest + ) + + TARGET_LINK_LIBRARIES(ousia_test_css + ${GTEST_LIBRARIES} + ousia_core + ousia_css + ) + ADD_EXECUTABLE(ousia_test_xml test/plugins/xml/XmlParserTest ) diff --git a/src/core/CSSParser.cpp b/src/core/CSSParser.cpp deleted file mode 100644 index d239359..0000000 --- a/src/core/CSSParser.cpp +++ /dev/null @@ -1,319 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "CSSParser.hpp" - -namespace ousia { -namespace parser { -namespace css { - -// CSS code tokens -static const int CURLY_OPEN = 1; -static const int CURLY_CLOSE = 2; -static const int COLON = 3; -static const int DOUBLE_COLON = 4; -static const int SEMICOLON = 5; -static const int HASH = 6; -static const int BRACKET_OPEN = 7; -static const int BRACKET_CLOSE = 8; -static const int PAREN_OPEN = 9; -static const int PAREN_CLOSE = 10; -static const int EQUALS = 11; -static const int ARROW = 12; -static const int COMMA = 13; -// comments -static const int COMMENT = 100; -static const int COMMENT_OPEN = 101; -static const int COMMENT_CLOSE = 102; -// strings -static const int STRING = 200; -static const int DOUBLE_QUOTE = 201; -static const int ESCAPE = 202; -// general syntax -static const int LINEBREAK = 300; - -static const TokenTreeNode CSS_ROOT{{{"{", CURLY_OPEN}, - {"}", CURLY_CLOSE}, - {":", COLON}, - {"::", DOUBLE_COLON}, - {";", SEMICOLON}, - {"#", HASH}, - {"[", BRACKET_OPEN}, - {"]", BRACKET_CLOSE}, - {"(", PAREN_OPEN}, - {")", PAREN_CLOSE}, - {"=", EQUALS}, - {">", ARROW}, - {",", COMMA}, - {"/*", COMMENT_OPEN}, - {"*/", COMMENT_CLOSE}, - {"\"", DOUBLE_QUOTE}, - {"\\", ESCAPE}, - // linux linebreak - {"\n", LINEBREAK}, - // windows linebreak - {"\r\n", LINEBREAK}, - // Mac OS linebreak - {"\r", LINEBREAK}}}; - -static const std::map CSS_DESCRIPTORS = { - {COMMENT_OPEN, {CodeTokenMode::BLOCK_COMMENT_START, COMMENT}}, - {COMMENT_CLOSE, {CodeTokenMode::BLOCK_COMMENT_END, COMMENT}}, - {DOUBLE_QUOTE, {CodeTokenMode::STRING_START_END, STRING}}, - {ESCAPE, {CodeTokenMode::ESCAPE, ESCAPE}}, - {LINEBREAK, {CodeTokenMode::LINEBREAK, LINEBREAK}}}; - -Rooted CSSParser::parse(std::istream &is, ParserContext &ctx) -{ - BufferedCharReader input{is}; - CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS}; - tokenizer.ignoreComments = true; - Rooted root = {new SelectorNode{ctx.manager, "root"}}; - parseDocument(root, tokenizer, ctx); - return root; -} - -void CSSParser::parseDocument(Rooted root, - CodeTokenizer &tokenizer, ParserContext &ctx) -{ - Token t; - if (!tokenizer.peek(t)) { - return; - } - tokenizer.resetPeek(); - std::vector> leafList; - parseSelectors(root, tokenizer, leafList, ctx); - // TODO: Parse Ruleset - parseDocument(root, tokenizer, ctx); -} - -void CSSParser::parseSelectors(Rooted root, - CodeTokenizer &tokenizer, - std::vector> &leafList, - ParserContext &ctx) -{ - auto tuple = parseSelector(tokenizer, ctx); - // append the SelectorPath to the root node. - std::vector> unmergedLeafs = - root->append(std::get<0>(tuple)); - // append the leaf to the leafList. - switch (unmergedLeafs.size()) { - case 0: - // if the leaf could be merged we take the leaf reference from the - // parseSelector method. - leafList.push_back(std::get<1>(tuple)); - break; - case 1: - // if the leaf could not be merged we take the existing leaf. - leafList.push_back(unmergedLeafs[0]); - break; - case 2: - // as the parseSelector is supposed to parse only a SelectorPath - // there should not be more than one leaf. - throw ParserException{ - "Internal Error: More than one leaf in SelectorPath!", "", - // TODO: Line handling? - // tokenizer.getInput().getLine(), - // tokenizer.getInput().getColumn() - }; - } - // if we find a comma, we can proceed parsing selectors. - Token t; - if (expect(COMMA, tokenizer, t, false, ctx)) { - parseSelectors(root, tokenizer, leafList, ctx); - } -} - -std::tuple, Rooted> CSSParser::parseSelector( - CodeTokenizer &tokenizer, ParserContext &ctx) -{ - Rooted s = parsePrimitiveSelector(tokenizer, ctx); - Token t; - if (!tokenizer.peek(t)) { - // if we are at the end the found selector is the immediate child as - // well as the leaf. - return std::make_tuple(s, s); - } - switch (t.tokenId) { - case TOKEN_TEXT: { - // if we find text there is a next token in a DESCENDANT - // relationship (A B) - tokenizer.resetPeek(); - // so we parse the rest of the subsequent SelectorPath - auto tuple = parseSelector(tokenizer, ctx); - // then we establish the DESCENDANT relationship - s->getEdges().push_back(new SelectorNode::SelectorEdge( - ctx.manager, std::get<0>(tuple))); - // and we return this node as well as the leaf. - return std::make_tuple(s, std::get<1>(tuple)); - } - case ARROW: { - tokenizer.consumePeek(); - // if we find an arrow there is a next token in a CHILD - // relationship (A > B) - // so we parse the rest of the subsequent SelectorPath - auto tuple = parseSelector(tokenizer, ctx); - // then we establish the DESCENDANT relationship - s->getEdges().push_back(new SelectorNode::SelectorEdge( - ctx.manager, std::get<0>(tuple), - SelectionOperator::DIRECT_DESCENDANT)); - // and we return this node as well as the leaf. - return std::make_tuple(s, std::get<1>(tuple)); - } - default: - // everything else is not part of the SelectorPath anymore. - tokenizer.resetPeek(); - return std::make_tuple(s, s); - } -} - -Rooted CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer, - ParserContext &ctx) -{ - // first and foremost we expect a class name. - Token t; - expect(TOKEN_TEXT, tokenizer, t, true, ctx); - const std::string name = t.content; - if (!tokenizer.peek(t)) { - // if we are at the end, we just return this selector with its name. - Rooted n{new SelectorNode(ctx.manager, name)}; - return n; - } - - bool isGenerative = false; - - switch (t.tokenId) { - case DOUBLE_COLON: - // if we find a double colon we have a generative PseudoSelector. - isGenerative = true; - // this is supposed to fall through; no missing break. - case COLON: { - // if we find a colon we have a restrictive PseudoSelector. - tokenizer.consumePeek(); - // get the PseudoSelector name. - expect(TOKEN_TEXT, tokenizer, t, true, ctx); - const std::string pseudo_select_name = t.content; - // look for additional arguments. - if (!expect(PAREN_OPEN, tokenizer, t, false, ctx)) { - // if we don't have any, we return here. - Rooted n{new SelectorNode( - ctx.manager, name, {pseudo_select_name, isGenerative})}; - return n; - } - // parse the argument list. - std::vector args; - // we require at least one argument, if parantheses are used - expect(TOKEN_TEXT, tokenizer, t, true, ctx); - args.push_back(t.content); - while (expect(COMMA, tokenizer, t, false, ctx)) { - // as long as we find commas we expect new arguments. - expect(TOKEN_TEXT, tokenizer, t, true, ctx); - args.push_back(t.content); - } - expect(PAREN_CLOSE, tokenizer, t, true, ctx); - // and we return with the finished Selector. - Rooted n{new SelectorNode( - ctx.manager, name, {pseudo_select_name, args, isGenerative})}; - return n; - } - case HASH: { - // a hash symbol is syntactic sugar for the PseudoSelector - // :has_id(id) - // so we expect an ID now. - Token t; - expect(TOKEN_TEXT, tokenizer, t, true, ctx); - std::vector args{t.content}; - // and we return the finished Selector - Rooted n{ - new SelectorNode(ctx.manager, name, {"has_id", args, false})}; - return n; - } - case BRACKET_OPEN: { - // in case of brackets we have one of two restrictive - // PseudoSelectors - // has_attribute ([attribute_name]) - // or - // has_value [attribute_name="value"] - // in both cases the attribute name comes first. - Token t; - expect(TOKEN_TEXT, tokenizer, t, true, ctx); - std::vector args{t.content}; - if (!expect(EQUALS, tokenizer, t, false, ctx)) { - // if no equals sign follows we have a has_attribute - // PseudoSelector - // we expect a closing bracket. - expect(BRACKET_CLOSE, tokenizer, t, true, ctx); - // and then we can return the result. - Rooted n{new SelectorNode( - ctx.manager, name, {"has_attribute", args, false})}; - return n; - } else { - // with an equals sign we have a has_value PseudoSelector and - // expect the value next. - expect(STRING, tokenizer, t, true, ctx); - args.push_back(t.content); - // then we expect a closing bracket. - expect(BRACKET_CLOSE, tokenizer, t, true, ctx); - // and then we can return the result. - Rooted n{new SelectorNode( - ctx.manager, name, {"has_value", args, false})}; - return n; - } - } - default: - // everything else is not part of the Selector anymore. - tokenizer.resetPeek(); - Rooted n{new SelectorNode(ctx.manager, name)}; - return n; - } -} - -// TODO: Add RuleSet parsing methods. - -bool CSSParser::expect(int expectedType, CodeTokenizer &tokenizer, Token &t, - bool force, ParserContext &ctx) -{ - bool end = !tokenizer.peek(t); - if (end || t.tokenId != expectedType) { - if (force) { - if (end) { - throw ParserException{ - "Unexpected end of file!", "", - // TODO: Line handling? - // tokenizer.getInput().getLine(), - // tokenizer.getInput().getColumn() - }; - } else { - throw ParserException{ - "Unexpected token!", "", - // TODO: Line handling? - // tokenizer.getInput().getLine(), - // tokenizer.getInput().getColumn() - }; - } - } else { - tokenizer.resetPeek(); - return false; - } - } - tokenizer.consumePeek(); - return true; -} -} -} -} diff --git a/src/core/CSSParser.hpp b/src/core/CSSParser.hpp deleted file mode 100644 index 870ce37..0000000 --- a/src/core/CSSParser.hpp +++ /dev/null @@ -1,142 +0,0 @@ -/* - Ousía - Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _OUSIA_CSS_PARSER_HPP_ -#define _OUSIA_CSS_PARSER_HPP_ - -#include -#include - -#include - -#include "BufferedCharReader.hpp" -#include "CodeTokenizer.hpp" -#include "CSS.hpp" -#include "Exceptions.hpp" - -namespace ousia { -namespace parser { -namespace css { - -/** - * This is a context free, recursive parser for a subset of the CSS3 language - * as defined by W3C. We allow the following grammar: - * - * DOC := SELECT RULESET DOC | epsilon - * SELECTORS := SELECT , SELECTORS | SELECT - * SELECT := SELECT' OPERATOR SELECT | SELECT' - * SELECT' := TYPE | TYPE:PSEUDO | TYPE::GEN_PSEUDO | - * TYPE:PSEUDO(ARGUMENTS) | - * TYPE::GEN_PSEUDO(ARGUMENTS) | TYPE#ID | - * TYPE[ATTRIBUTE] | TYPE[ATTRIBUTE=VALUE] - * TYPE := string - * PSEUDO := string - * GEN_PSEUDO := string - * ARGUMENTS := string , ARGUMENTS - * ID := string - * ATTRIBUTE := string - * VALUE := string - * OPERATOR := epsilon | > - * RULESET := epsilon | { RULES } - * RULES := RULE RULES | epsilon - * RULE := KEY : VALUE ; - * KEY := string - * VALUE := type-specific parser - * - * - * @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de - */ -class CSSParser : public Parser { -private: - /** - * Implements the DOC Nonterminal - */ - void parseDocument(Rooted root, CodeTokenizer &tokenizer, - ParserContext &ctx); - /** - * Implements the SELECTORS Nonterminal and adds all leaf nodes of the - * resulting SelectorTree to the input leafList so that a parsed RuleSet can - * be inserted there. - */ - void parseSelectors(Rooted root, CodeTokenizer &tokenizer, - std::vector> &leafList, - ParserContext &ctx); - /** - * Implements the SELECT Nonterminal, which in effect parses a SelectorPath - * of the SelectorTree and returns the beginning node of the path as first - * element as well as the leaf of the path as second tuple element. - */ - std::tuple, Rooted> parseSelector( - CodeTokenizer &tokenizer, ParserContext &ctx); - - /** - * Implements the SELECT' Nonterminal, which parses a single Selector with - * its PseudoSelector and returns it. - */ - Rooted parsePrimitiveSelector(CodeTokenizer &tokenizer, - ParserContext &ctx); - - // TODO: Add RuleSet parsing methods. - - /** - * A convenience function to wrap around the tokenizer peek() function that - * only returns true if an instance of the expected type occurs. - * - * @param expectedType the ID of the expected type according to the - * CodeTokenizer specification. - * @param tokenizer the tokenizer for the input. - * @param t an empty token that gets the parsed token content - * if it has the expected type. - * @param force a flag to be set if it would be fatal for the - * parsing process to get the wrong type. In that case - * an exception is thrown. - * @return true iff a token of the expected type was found. - */ - bool expect(int expectedType, CodeTokenizer &tokenizer, Token &t, - bool force, ParserContext &ctx); - -public: - /** - * This parses the given input as CSS content as specified by the grammar - * seen above. The return value is a Rooted reference to the root of the - * SelectorTree. - * TODO: The RuleSet at the respective node at the tree lists all CSS Style - * rules that apply. - * - * @param is is a reference to the input stream that should be parsed. - * @param ctx is a reference to the context that should be used while - * parsing the document. - * @return returns the root node of the resulting SelectorTree. For more - * information on the return conventions consult the Parser.hpp. - */ - Rooted parse(std::istream &is, ParserContext &ctx) override; - - /** - * As befits a class called CSSParser, this Parser parses CSS. - */ - std::set mimetypes() - { - std::set out{"text/css"}; - return out; - } -}; -} -} -} - -#endif diff --git a/src/plugins/css/CSSParser.cpp b/src/plugins/css/CSSParser.cpp new file mode 100644 index 0000000..d239359 --- /dev/null +++ b/src/plugins/css/CSSParser.cpp @@ -0,0 +1,319 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "CSSParser.hpp" + +namespace ousia { +namespace parser { +namespace css { + +// CSS code tokens +static const int CURLY_OPEN = 1; +static const int CURLY_CLOSE = 2; +static const int COLON = 3; +static const int DOUBLE_COLON = 4; +static const int SEMICOLON = 5; +static const int HASH = 6; +static const int BRACKET_OPEN = 7; +static const int BRACKET_CLOSE = 8; +static const int PAREN_OPEN = 9; +static const int PAREN_CLOSE = 10; +static const int EQUALS = 11; +static const int ARROW = 12; +static const int COMMA = 13; +// comments +static const int COMMENT = 100; +static const int COMMENT_OPEN = 101; +static const int COMMENT_CLOSE = 102; +// strings +static const int STRING = 200; +static const int DOUBLE_QUOTE = 201; +static const int ESCAPE = 202; +// general syntax +static const int LINEBREAK = 300; + +static const TokenTreeNode CSS_ROOT{{{"{", CURLY_OPEN}, + {"}", CURLY_CLOSE}, + {":", COLON}, + {"::", DOUBLE_COLON}, + {";", SEMICOLON}, + {"#", HASH}, + {"[", BRACKET_OPEN}, + {"]", BRACKET_CLOSE}, + {"(", PAREN_OPEN}, + {")", PAREN_CLOSE}, + {"=", EQUALS}, + {">", ARROW}, + {",", COMMA}, + {"/*", COMMENT_OPEN}, + {"*/", COMMENT_CLOSE}, + {"\"", DOUBLE_QUOTE}, + {"\\", ESCAPE}, + // linux linebreak + {"\n", LINEBREAK}, + // windows linebreak + {"\r\n", LINEBREAK}, + // Mac OS linebreak + {"\r", LINEBREAK}}}; + +static const std::map CSS_DESCRIPTORS = { + {COMMENT_OPEN, {CodeTokenMode::BLOCK_COMMENT_START, COMMENT}}, + {COMMENT_CLOSE, {CodeTokenMode::BLOCK_COMMENT_END, COMMENT}}, + {DOUBLE_QUOTE, {CodeTokenMode::STRING_START_END, STRING}}, + {ESCAPE, {CodeTokenMode::ESCAPE, ESCAPE}}, + {LINEBREAK, {CodeTokenMode::LINEBREAK, LINEBREAK}}}; + +Rooted CSSParser::parse(std::istream &is, ParserContext &ctx) +{ + BufferedCharReader input{is}; + CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS}; + tokenizer.ignoreComments = true; + Rooted root = {new SelectorNode{ctx.manager, "root"}}; + parseDocument(root, tokenizer, ctx); + return root; +} + +void CSSParser::parseDocument(Rooted root, + CodeTokenizer &tokenizer, ParserContext &ctx) +{ + Token t; + if (!tokenizer.peek(t)) { + return; + } + tokenizer.resetPeek(); + std::vector> leafList; + parseSelectors(root, tokenizer, leafList, ctx); + // TODO: Parse Ruleset + parseDocument(root, tokenizer, ctx); +} + +void CSSParser::parseSelectors(Rooted root, + CodeTokenizer &tokenizer, + std::vector> &leafList, + ParserContext &ctx) +{ + auto tuple = parseSelector(tokenizer, ctx); + // append the SelectorPath to the root node. + std::vector> unmergedLeafs = + root->append(std::get<0>(tuple)); + // append the leaf to the leafList. + switch (unmergedLeafs.size()) { + case 0: + // if the leaf could be merged we take the leaf reference from the + // parseSelector method. + leafList.push_back(std::get<1>(tuple)); + break; + case 1: + // if the leaf could not be merged we take the existing leaf. + leafList.push_back(unmergedLeafs[0]); + break; + case 2: + // as the parseSelector is supposed to parse only a SelectorPath + // there should not be more than one leaf. + throw ParserException{ + "Internal Error: More than one leaf in SelectorPath!", "", + // TODO: Line handling? + // tokenizer.getInput().getLine(), + // tokenizer.getInput().getColumn() + }; + } + // if we find a comma, we can proceed parsing selectors. + Token t; + if (expect(COMMA, tokenizer, t, false, ctx)) { + parseSelectors(root, tokenizer, leafList, ctx); + } +} + +std::tuple, Rooted> CSSParser::parseSelector( + CodeTokenizer &tokenizer, ParserContext &ctx) +{ + Rooted s = parsePrimitiveSelector(tokenizer, ctx); + Token t; + if (!tokenizer.peek(t)) { + // if we are at the end the found selector is the immediate child as + // well as the leaf. + return std::make_tuple(s, s); + } + switch (t.tokenId) { + case TOKEN_TEXT: { + // if we find text there is a next token in a DESCENDANT + // relationship (A B) + tokenizer.resetPeek(); + // so we parse the rest of the subsequent SelectorPath + auto tuple = parseSelector(tokenizer, ctx); + // then we establish the DESCENDANT relationship + s->getEdges().push_back(new SelectorNode::SelectorEdge( + ctx.manager, std::get<0>(tuple))); + // and we return this node as well as the leaf. + return std::make_tuple(s, std::get<1>(tuple)); + } + case ARROW: { + tokenizer.consumePeek(); + // if we find an arrow there is a next token in a CHILD + // relationship (A > B) + // so we parse the rest of the subsequent SelectorPath + auto tuple = parseSelector(tokenizer, ctx); + // then we establish the DESCENDANT relationship + s->getEdges().push_back(new SelectorNode::SelectorEdge( + ctx.manager, std::get<0>(tuple), + SelectionOperator::DIRECT_DESCENDANT)); + // and we return this node as well as the leaf. + return std::make_tuple(s, std::get<1>(tuple)); + } + default: + // everything else is not part of the SelectorPath anymore. + tokenizer.resetPeek(); + return std::make_tuple(s, s); + } +} + +Rooted CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer, + ParserContext &ctx) +{ + // first and foremost we expect a class name. + Token t; + expect(TOKEN_TEXT, tokenizer, t, true, ctx); + const std::string name = t.content; + if (!tokenizer.peek(t)) { + // if we are at the end, we just return this selector with its name. + Rooted n{new SelectorNode(ctx.manager, name)}; + return n; + } + + bool isGenerative = false; + + switch (t.tokenId) { + case DOUBLE_COLON: + // if we find a double colon we have a generative PseudoSelector. + isGenerative = true; + // this is supposed to fall through; no missing break. + case COLON: { + // if we find a colon we have a restrictive PseudoSelector. + tokenizer.consumePeek(); + // get the PseudoSelector name. + expect(TOKEN_TEXT, tokenizer, t, true, ctx); + const std::string pseudo_select_name = t.content; + // look for additional arguments. + if (!expect(PAREN_OPEN, tokenizer, t, false, ctx)) { + // if we don't have any, we return here. + Rooted n{new SelectorNode( + ctx.manager, name, {pseudo_select_name, isGenerative})}; + return n; + } + // parse the argument list. + std::vector args; + // we require at least one argument, if parantheses are used + expect(TOKEN_TEXT, tokenizer, t, true, ctx); + args.push_back(t.content); + while (expect(COMMA, tokenizer, t, false, ctx)) { + // as long as we find commas we expect new arguments. + expect(TOKEN_TEXT, tokenizer, t, true, ctx); + args.push_back(t.content); + } + expect(PAREN_CLOSE, tokenizer, t, true, ctx); + // and we return with the finished Selector. + Rooted n{new SelectorNode( + ctx.manager, name, {pseudo_select_name, args, isGenerative})}; + return n; + } + case HASH: { + // a hash symbol is syntactic sugar for the PseudoSelector + // :has_id(id) + // so we expect an ID now. + Token t; + expect(TOKEN_TEXT, tokenizer, t, true, ctx); + std::vector args{t.content}; + // and we return the finished Selector + Rooted n{ + new SelectorNode(ctx.manager, name, {"has_id", args, false})}; + return n; + } + case BRACKET_OPEN: { + // in case of brackets we have one of two restrictive + // PseudoSelectors + // has_attribute ([attribute_name]) + // or + // has_value [attribute_name="value"] + // in both cases the attribute name comes first. + Token t; + expect(TOKEN_TEXT, tokenizer, t, true, ctx); + std::vector args{t.content}; + if (!expect(EQUALS, tokenizer, t, false, ctx)) { + // if no equals sign follows we have a has_attribute + // PseudoSelector + // we expect a closing bracket. + expect(BRACKET_CLOSE, tokenizer, t, true, ctx); + // and then we can return the result. + Rooted n{new SelectorNode( + ctx.manager, name, {"has_attribute", args, false})}; + return n; + } else { + // with an equals sign we have a has_value PseudoSelector and + // expect the value next. + expect(STRING, tokenizer, t, true, ctx); + args.push_back(t.content); + // then we expect a closing bracket. + expect(BRACKET_CLOSE, tokenizer, t, true, ctx); + // and then we can return the result. + Rooted n{new SelectorNode( + ctx.manager, name, {"has_value", args, false})}; + return n; + } + } + default: + // everything else is not part of the Selector anymore. + tokenizer.resetPeek(); + Rooted n{new SelectorNode(ctx.manager, name)}; + return n; + } +} + +// TODO: Add RuleSet parsing methods. + +bool CSSParser::expect(int expectedType, CodeTokenizer &tokenizer, Token &t, + bool force, ParserContext &ctx) +{ + bool end = !tokenizer.peek(t); + if (end || t.tokenId != expectedType) { + if (force) { + if (end) { + throw ParserException{ + "Unexpected end of file!", "", + // TODO: Line handling? + // tokenizer.getInput().getLine(), + // tokenizer.getInput().getColumn() + }; + } else { + throw ParserException{ + "Unexpected token!", "", + // TODO: Line handling? + // tokenizer.getInput().getLine(), + // tokenizer.getInput().getColumn() + }; + } + } else { + tokenizer.resetPeek(); + return false; + } + } + tokenizer.consumePeek(); + return true; +} +} +} +} diff --git a/src/plugins/css/CSSParser.hpp b/src/plugins/css/CSSParser.hpp new file mode 100644 index 0000000..eb51dfa --- /dev/null +++ b/src/plugins/css/CSSParser.hpp @@ -0,0 +1,140 @@ +/* + Ousía + Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef _OUSIA_CSS_PARSER_HPP_ +#define _OUSIA_CSS_PARSER_HPP_ + +#include +#include + +#include +#include +#include +#include + +namespace ousia { +namespace parser { +namespace css { + +/** + * This is a context free, recursive parser for a subset of the CSS3 language + * as defined by W3C. We allow the following grammar: + * + * DOC := SELECT RULESET DOC | epsilon + * SELECTORS := SELECT , SELECTORS | SELECT + * SELECT := SELECT' OPERATOR SELECT | SELECT' + * SELECT' := TYPE | TYPE:PSEUDO | TYPE::GEN_PSEUDO | + * TYPE:PSEUDO(ARGUMENTS) | + * TYPE::GEN_PSEUDO(ARGUMENTS) | TYPE#ID | + * TYPE[ATTRIBUTE] | TYPE[ATTRIBUTE=VALUE] + * TYPE := string + * PSEUDO := string + * GEN_PSEUDO := string + * ARGUMENTS := string , ARGUMENTS + * ID := string + * ATTRIBUTE := string + * VALUE := string + * OPERATOR := epsilon | > + * RULESET := epsilon | { RULES } + * RULES := RULE RULES | epsilon + * RULE := KEY : VALUE ; + * KEY := string + * VALUE := type-specific parser + * + * + * @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de + */ +class CSSParser : public Parser { +private: + /** + * Implements the DOC Nonterminal + */ + void parseDocument(Rooted root, CodeTokenizer &tokenizer, + ParserContext &ctx); + /** + * Implements the SELECTORS Nonterminal and adds all leaf nodes of the + * resulting SelectorTree to the input leafList so that a parsed RuleSet can + * be inserted there. + */ + void parseSelectors(Rooted root, CodeTokenizer &tokenizer, + std::vector> &leafList, + ParserContext &ctx); + /** + * Implements the SELECT Nonterminal, which in effect parses a SelectorPath + * of the SelectorTree and returns the beginning node of the path as first + * element as well as the leaf of the path as second tuple element. + */ + std::tuple, Rooted> parseSelector( + CodeTokenizer &tokenizer, ParserContext &ctx); + + /** + * Implements the SELECT' Nonterminal, which parses a single Selector with + * its PseudoSelector and returns it. + */ + Rooted parsePrimitiveSelector(CodeTokenizer &tokenizer, + ParserContext &ctx); + + // TODO: Add RuleSet parsing methods. + + /** + * A convenience function to wrap around the tokenizer peek() function that + * only returns true if an instance of the expected type occurs. + * + * @param expectedType the ID of the expected type according to the + * CodeTokenizer specification. + * @param tokenizer the tokenizer for the input. + * @param t an empty token that gets the parsed token content + * if it has the expected type. + * @param force a flag to be set if it would be fatal for the + * parsing process to get the wrong type. In that case + * an exception is thrown. + * @return true iff a token of the expected type was found. + */ + bool expect(int expectedType, CodeTokenizer &tokenizer, Token &t, + bool force, ParserContext &ctx); + +public: + /** + * This parses the given input as CSS content as specified by the grammar + * seen above. The return value is a Rooted reference to the root of the + * SelectorTree. + * TODO: The RuleSet at the respective node at the tree lists all CSS Style + * rules that apply. + * + * @param is is a reference to the input stream that should be parsed. + * @param ctx is a reference to the context that should be used while + * parsing the document. + * @return returns the root node of the resulting SelectorTree. For more + * information on the return conventions consult the Parser.hpp. + */ + Rooted parse(std::istream &is, ParserContext &ctx) override; + + /** + * As befits a class called CSSParser, this Parser parses CSS. + */ + std::set mimetypes() + { + std::set out{"text/css"}; + return out; + } +}; +} +} +} + +#endif diff --git a/test/core/CSSParserTest.cpp b/test/core/CSSParserTest.cpp deleted file mode 100644 index c231092..0000000 --- a/test/core/CSSParserTest.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - Ousía - Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include - -#include - -#include - -namespace ousia { -namespace parser { -namespace css { -TEST(CSSParser, testParseSelectors) -{ - // create a string describing a SelectorTree as input. - std::stringstream input; - input << "A>B,A B:r, C#a A[bla=\"blub\"], A::g(4,2,3)"; - /* This should describe the tree: - * root_____ - * | \ \ - * A C#a A::g(4,2,3) - * |\ \ - * B B::r A[bla="blub"] - */ - - // initialize an empty parser context. - StandaloneParserContext ctx; - - // parse the input. - CSSParser instance; - Rooted root = instance.parse(input, ctx).cast(); - - // we expect three children of the root node overall. - ASSERT_EQ(3, root->getEdges().size()); - // get all "A" children, which should be two. - std::vector> children = root->getChildren("A"); - ASSERT_EQ(2, children.size()); - // assert A - Rooted A = children[0]; - ASSERT_EQ("A", A->getName()); - { - PseudoSelector select{"true", false}; - ASSERT_EQ(select, A->getPseudoSelector()); - } - ASSERT_EQ(2, A->getEdges().size()); - { - // assert A > B - std::vector> Achildren = - A->getChildren(SelectionOperator::DIRECT_DESCENDANT, "B"); - ASSERT_EQ(1, Achildren.size()); - Rooted B = Achildren[0]; - ASSERT_EQ("B", B->getName()); - { - PseudoSelector select{"true", false}; - ASSERT_EQ(select, B->getPseudoSelector()); - } - ASSERT_EQ(0, B->getEdges().size()); - // assert A B:r - Achildren = A->getChildren(SelectionOperator::DESCENDANT, "B"); - ASSERT_EQ(1, Achildren.size()); - Rooted Br = Achildren[0]; - ASSERT_EQ("B", Br->getName()); - { - PseudoSelector select{"r", false}; - ASSERT_EQ(select, Br->getPseudoSelector()); - } - ASSERT_EQ(0, Br->getEdges().size()); - } - // assert C#a - children = root->getChildren("C"); - ASSERT_EQ(1, children.size()); - Rooted C = children[0]; - ASSERT_EQ("C", C->getName()); - { - PseudoSelector select{"has_id", {"a"}, false}; - ASSERT_EQ(select, C->getPseudoSelector()); - } - ASSERT_EQ(1, C->getEdges().size()); - { - // assert C#a A[bla=\"blub\"] - std::vector> Cchildren = - C->getChildren(SelectionOperator::DESCENDANT, "A"); - ASSERT_EQ(1, Cchildren.size()); - Rooted A = Cchildren[0]; - ASSERT_EQ("A", A->getName()); - { - PseudoSelector select{"has_value", {"bla", "blub"}, false}; - ASSERT_EQ(select, A->getPseudoSelector()); - } - ASSERT_EQ(0, A->getEdges().size()); - } - // assert A::g(4,2,3) - children = root->getChildren("A"); - ASSERT_EQ(2, children.size()); - Rooted Ag = children[1]; - ASSERT_EQ("A", Ag->getName()); - { - PseudoSelector select{"g", {"4", "2", "3"}, true}; - ASSERT_EQ(select, Ag->getPseudoSelector()); - } - ASSERT_EQ(0, Ag->getEdges().size()); -} -} -} -} diff --git a/test/plugins/css/CSSParserTest.cpp b/test/plugins/css/CSSParserTest.cpp new file mode 100644 index 0000000..84d4893 --- /dev/null +++ b/test/plugins/css/CSSParserTest.cpp @@ -0,0 +1,120 @@ +/* + Ousía + Copyright (C) 2014, 2015 Benjamin Paaßen, Andreas Stöckel + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include + +#include + +namespace ousia { +namespace parser { +namespace css { +TEST(CSSParser, testParseSelectors) +{ + // create a string describing a SelectorTree as input. + std::stringstream input; + input << "A>B,A B:r, C#a A[bla=\"blub\"], A::g(4,2,3)"; + /* This should describe the tree: + * root_____ + * | \ \ + * A C#a A::g(4,2,3) + * |\ \ + * B B::r A[bla="blub"] + */ + + // initialize an empty parser context. + StandaloneParserContext ctx; + + // parse the input. + CSSParser instance; + Rooted root = instance.parse(input, ctx).cast(); + + // we expect three children of the root node overall. + ASSERT_EQ(3, root->getEdges().size()); + // get all "A" children, which should be two. + std::vector> children = root->getChildren("A"); + ASSERT_EQ(2, children.size()); + // assert A + Rooted A = children[0]; + ASSERT_EQ("A", A->getName()); + { + PseudoSelector select{"true", false}; + ASSERT_EQ(select, A->getPseudoSelector()); + } + ASSERT_EQ(2, A->getEdges().size()); + { + // assert A > B + std::vector> Achildren = + A->getChildren(SelectionOperator::DIRECT_DESCENDANT, "B"); + ASSERT_EQ(1, Achildren.size()); + Rooted B = Achildren[0]; + ASSERT_EQ("B", B->getName()); + { + PseudoSelector select{"true", false}; + ASSERT_EQ(select, B->getPseudoSelector()); + } + ASSERT_EQ(0, B->getEdges().size()); + // assert A B:r + Achildren = A->getChildren(SelectionOperator::DESCENDANT, "B"); + ASSERT_EQ(1, Achildren.size()); + Rooted Br = Achildren[0]; + ASSERT_EQ("B", Br->getName()); + { + PseudoSelector select{"r", false}; + ASSERT_EQ(select, Br->getPseudoSelector()); + } + ASSERT_EQ(0, Br->getEdges().size()); + } + // assert C#a + children = root->getChildren("C"); + ASSERT_EQ(1, children.size()); + Rooted C = children[0]; + ASSERT_EQ("C", C->getName()); + { + PseudoSelector select{"has_id", {"a"}, false}; + ASSERT_EQ(select, C->getPseudoSelector()); + } + ASSERT_EQ(1, C->getEdges().size()); + { + // assert C#a A[bla=\"blub\"] + std::vector> Cchildren = + C->getChildren(SelectionOperator::DESCENDANT, "A"); + ASSERT_EQ(1, Cchildren.size()); + Rooted A = Cchildren[0]; + ASSERT_EQ("A", A->getName()); + { + PseudoSelector select{"has_value", {"bla", "blub"}, false}; + ASSERT_EQ(select, A->getPseudoSelector()); + } + ASSERT_EQ(0, A->getEdges().size()); + } + // assert A::g(4,2,3) + children = root->getChildren("A"); + ASSERT_EQ(2, children.size()); + Rooted Ag = children[1]; + ASSERT_EQ("A", Ag->getName()); + { + PseudoSelector select{"g", {"4", "2", "3"}, true}; + ASSERT_EQ(select, Ag->getPseudoSelector()); + } + ASSERT_EQ(0, Ag->getEdges().size()); +} +} +} +} -- cgit v1.2.3