/*
Ousía
Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/**
* @file CSSParser.hpp
*
* Contains the classes needed to transform a CSS string to a CSS SelectorTree
* with attached RuleSets. The details are explained in the class
* documentations.
*
* @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de
*/
#ifndef _OUSIA_CSS_PARSER_HPP_
#define _OUSIA_CSS_PARSER_HPP_
#include
#include
#include
#include
#include
#include
#include
namespace ousia {
/**
* This is a context free, recursive parser for a subset of the CSS3 language
* as defined by W3C. We allow the following grammar:
*
* DOC := SELECT RULESET DOC | epsilon
* SELECTORS := SELECT , SELECTORS | SELECT
* SELECT := SELECT' OPERATOR SELECT | SELECT'
* SELECT' := TYPE | TYPE:PSEUDO | TYPE::GEN_PSEUDO |
* TYPE:PSEUDO(ARGUMENTS) |
* TYPE::GEN_PSEUDO(ARGUMENTS) | TYPE#ID |
* TYPE[ATTRIBUTE] | TYPE[ATTRIBUTE=VALUE]
* TYPE := string
* PSEUDO := string
* GEN_PSEUDO := string
* ARGUMENTS := string , ARGUMENTS
* ID := string
* ATTRIBUTE := string
* VALUE := string
* OPERATOR := epsilon | >
* RULESET := epsilon | { RULES }
* RULES := RULE RULES | epsilon
* RULE := KEY : VALUE ;
* KEY := string
* VALUE := type-specific parser
*
*
* @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de
*/
class CSSParser : public Parser {
private:
/**
* Implements the DOC Nonterminal
*/
void parseDocument(Rooted root,
CodeTokenizer &tokenizer, ParserContext &ctx);
/**
* Implements the SELECTORS Nonterminal and adds all leaf nodes of the
* resulting SelectorTree to the input leafList so that a parsed RuleSet can
* be inserted there.
*/
void parseSelectors(Rooted root,
CodeTokenizer &tokenizer,
std::vector> &leafList,
ParserContext &ctx);
/**
* Implements the SELECT Nonterminal, which in effect parses a SelectorPath
* of the SelectorTree and returns the beginning node of the path as first
* element as well as the leaf of the path as second tuple element.
*/
std::pair, Rooted>
parseSelector(CodeTokenizer &tokenizer, ParserContext &ctx);
/**
* Implements the SELECT' Nonterminal, which parses a single Selector with
* its PseudoSelector and returns it.
*/
Rooted parsePrimitiveSelector(CodeTokenizer &tokenizer,
ParserContext &ctx);
/**
* Implements the RULESET Nonterminal, which parses an entire RuleSet. Note
* that we do not require RuleSets to be parsed. It is permitted to just
* insert Selector expressions.
*/
Rooted parseRuleSet(CodeTokenizer &tokenizer,
ParserContext &ctx);
/**
* Implements the RULES Nonterminal, which parses CSSRules inside a RuleSet.
*/
void parseRules(CodeTokenizer &tokenizer, Rooted ruleSet,
ParserContext &ctx);
/**
* Implements the RULE Nonterminal, which parses one single CSSRule. Key
* and value are stored in the input references.
*
* @param key is a (possibly empty) string reference for the key found.
* @param value is a (possibly empty) Variant reference for the value found.
*
* @return true if a rule was found.
*/
bool parseRule(CodeTokenizer &tokenizer, ParserContext &ctx,
std::string &key, Variant &value);
/**
* A convenience function to wrap around the tokenizer peek() function that
* only returns true if an instance of the expected type occurs.
*
* @param expectedType the ID of the expected type according to the
* CodeTokenizer specification.
* @param tokenizer the tokenizer for the input.
* @param t an empty token that gets the parsed token content
* if it has the expected type.
* @param force a flag to be set if it would be fatal for the
* parsing process to get the wrong type. In that case
* an exception is thrown.
* @return true iff a token of the expected type was found.
*/
bool expect(int expectedType, CodeTokenizer &tokenizer, Token &t,
bool force, ParserContext &ctx);
protected:
/**
* This parses the given input as CSS content as specified by the grammar
* seen above. The return value is a Rooted reference to the root of the
* SelectorTree. SelectorTrees are documented in detail in the CSS.hpp
* The RuleSet at the respective node at the tree lists all CSS Style
* rules that apply.
* Note that you are not required to insert CSS code containing actual
* rules. You are permitted to just insert a CSS Selector expression
* specifying some part of a DocumentTree you want to refer to.
*
* @param reader is a reference to the CharReader instance from which the
* input data should be read.
* @param ctx is a reference to the context that should be used while
* parsing the document.
* @return returns the root node of the resulting SelectorTree. For more
* information on the return conventions consult the Parser.hpp.
*/
void doParse(CharReader &reader, ParserContext &ctx) override;
};
}
#endif