/*
Ousía
Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include "CSSParser.hpp"
#include
#include
namespace ousia {
// CSS code tokens
static const int CURLY_OPEN = 1;
static const int CURLY_CLOSE = 2;
static const int COLON = 3;
static const int DOUBLE_COLON = 4;
static const int SEMICOLON = 5;
static const int HASH = 6;
static const int BRACKET_OPEN = 7;
static const int BRACKET_CLOSE = 8;
static const int PAREN_OPEN = 9;
static const int PAREN_CLOSE = 10;
static const int EQUALS = 11;
static const int ARROW = 12;
static const int COMMA = 13;
// comments
static const int COMMENT = 100;
static const int COMMENT_OPEN = 101;
static const int COMMENT_CLOSE = 102;
// strings
static const int STRING = 200;
static const int DOUBLE_QUOTE = 201;
static const int ESCAPE = 202;
// general syntax
static const int LINEBREAK = 300;
static const TokenTreeNode CSS_ROOT{{{"{", CURLY_OPEN},
{"}", CURLY_CLOSE},
{":", COLON},
{"::", DOUBLE_COLON},
{";", SEMICOLON},
{"#", HASH},
{"[", BRACKET_OPEN},
{"]", BRACKET_CLOSE},
{"(", PAREN_OPEN},
{")", PAREN_CLOSE},
{"=", EQUALS},
{">", ARROW},
{",", COMMA},
{"/*", COMMENT_OPEN},
{"*/", COMMENT_CLOSE},
{"\"", DOUBLE_QUOTE},
{"\\", ESCAPE},
{"\n", LINEBREAK}}};
static const std::map CSS_DESCRIPTORS = {
{COMMENT_OPEN, {CodeTokenMode::BLOCK_COMMENT_START, COMMENT}},
{COMMENT_CLOSE, {CodeTokenMode::BLOCK_COMMENT_END, COMMENT}},
{DOUBLE_QUOTE, {CodeTokenMode::STRING_START_END, STRING}},
{ESCAPE, {CodeTokenMode::ESCAPE, ESCAPE}},
{LINEBREAK, {CodeTokenMode::LINEBREAK, LINEBREAK}}};
void CSSParser::doParse(CharReader &reader, ParserContext &ctx)
{
CodeTokenizer tokenizer{reader, CSS_ROOT, CSS_DESCRIPTORS};
tokenizer.ignoreComments = true;
tokenizer.ignoreLinebreaks = true;
// Create the root node and push it onto the parser scope
Rooted root = {
new model::SelectorNode{ctx.getManager(), "root"}};
ctx.getScope().push(root);
// Parse the document into the root node
parseDocument(root, tokenizer, ctx);
// Remove the element from the parser scope
ctx.getScope().pop();
}
void CSSParser::parseDocument(Rooted root,
CodeTokenizer &tokenizer, ParserContext &ctx)
{
Token t;
if (!tokenizer.peek(t)) {
return;
}
tokenizer.resetPeek();
std::vector> leafList;
// parse the SelectorTree for this ruleSet.
parseSelectors(root, tokenizer, leafList, ctx);
// parse the RuleSet itself.
Rooted ruleSet = parseRuleSet(tokenizer, ctx);
for (auto &leaf : leafList) {
/*
* every leaf is an accepting node, if one considers the SelectorTree
* to be a finite state machine. This is relevant, if users do not use
* the CSS Parser to parse actual Ruleset content but to construct a
* SelectorTree just to identify a part of the DocumentTree.
*/
leaf->setAccepting(true);
/*
* similarly we append the found rules to all leafs.
*/
leaf->getRuleSet()->merge(ruleSet);
}
parseDocument(root, tokenizer, ctx);
}
void CSSParser::parseSelectors(
Rooted root, CodeTokenizer &tokenizer,
std::vector> &leafList, ParserContext &ctx)
{
auto tuple = parseSelector(tokenizer, ctx);
// append the SelectorPath to the root node.
std::vector> unmergedLeafs =
root->append(tuple.first);
// append the leaf to the leafList.
switch (unmergedLeafs.size()) {
case 0:
// if the leaf could be merged we take the leaf reference from the
// parseSelector method.
leafList.push_back(tuple.second);
break;
case 1:
// if the leaf could not be merged we take the existing leaf.
leafList.push_back(unmergedLeafs[0]);
break;
case 2:
// as the parseSelector is supposed to parse only a SelectorPath
// there should not be more than one leaf.
throw LoggableException{
"Internal Error: More than one leaf in SelectorPath!",
tokenizer.getInput()};
}
// if we find a comma, we can proceed parsing selectors.
Token t;
if (expect(COMMA, tokenizer, t, false, ctx)) {
parseSelectors(root, tokenizer, leafList, ctx);
}
}
std::pair, Rooted>
CSSParser::parseSelector(CodeTokenizer &tokenizer, ParserContext &ctx)
{
Rooted s = parsePrimitiveSelector(tokenizer, ctx);
Token t;
if (!tokenizer.peek(t)) {
// if we are at the end the found selector is the immediate child as
// well as the leaf.
return std::make_pair(s, s);
}
switch (t.tokenId) {
case TOKEN_TEXT: {
// if we find text there is a next token in a DESCENDANT
// relationship (A B)
tokenizer.resetPeek();
// so we parse the rest of the subsequent SelectorPath
auto tuple = parseSelector(tokenizer, ctx);
// then we establish the DESCENDANT relationship
s->getEdges().push_back(new model::SelectorNode::SelectorEdge(
ctx.getManager(), tuple.first));
// and we return this node as well as the leaf.
return std::make_pair(s, tuple.second);
}
case ARROW: {
tokenizer.consumePeek();
// if we find an arrow there is a next token in a CHILD
// relationship (A > B)
// so we parse the rest of the subsequent SelectorPath
auto tuple = parseSelector(tokenizer, ctx);
// then we establish the DESCENDANT relationship
s->getEdges().push_back(new model::SelectorNode::SelectorEdge(
ctx.getManager(), tuple.first,
model::SelectionOperator::DIRECT_DESCENDANT));
// and we return this node as well as the leaf.
return std::make_pair(s, tuple.second);
}
default:
// everything else is not part of the SelectorPath anymore.
tokenizer.resetPeek();
return std::make_pair(s, s);
}
}
Rooted CSSParser::parsePrimitiveSelector(
CodeTokenizer &tokenizer, ParserContext &ctx)
{
// first and foremost we expect a class name.
Token t;
expect(TOKEN_TEXT, tokenizer, t, true, ctx);
const std::string name = t.content;
if (!tokenizer.peek(t)) {
// if we are at the end, we just return this selector with its name.
Rooted n{
new model::SelectorNode(ctx.getManager(), name)};
return n;
}
bool isGenerative = false;
switch (t.tokenId) {
case DOUBLE_COLON:
// if we find a double colon we have a generative PseudoSelector.
isGenerative = true;
// this is supposed to fall through; no missing break.
case COLON: {
// if we find a colon we have a restrictive PseudoSelector.
tokenizer.consumePeek();
// get the PseudoSelector name.
expect(TOKEN_TEXT, tokenizer, t, true, ctx);
const std::string pseudo_select_name = t.content;
// look for additional arguments.
if (!expect(PAREN_OPEN, tokenizer, t, false, ctx)) {
// if we don't have any, we return here.
Rooted n{
new model::SelectorNode(ctx.getManager(), name,
{pseudo_select_name, isGenerative})};
return n;
}
// parse the argument list.
Variant::arrayType args;
// we require at least one argument, if parantheses are used
// XXX
args.push_back(VariantReader::parseGeneric(tokenizer.getInput(),
ctx.getLogger(),
{',', ')'}).second);
while (expect(COMMA, tokenizer, t, false, ctx)) {
// as long as we find commas we expect new arguments.
args.push_back(VariantReader::parseGeneric(tokenizer.getInput(),
ctx.getLogger(),
{',', ')'}).second);
}
expect(PAREN_CLOSE, tokenizer, t, true, ctx);
// and we return with the finished Selector.
Rooted n{
new model::SelectorNode(ctx.getManager(), name,
{pseudo_select_name, args, isGenerative})};
return n;
}
case HASH: {
// a hash symbol is syntactic sugar for the PseudoSelector
// :has_id(id)
// so we expect an ID now.
Token t;
expect(TOKEN_TEXT, tokenizer, t, true, ctx);
Variant::arrayType args{Variant(t.content.c_str())};
// and we return the finished Selector
Rooted n{new model::SelectorNode(
ctx.getManager(), name, {"has_id", args, false})};
return n;
}
case BRACKET_OPEN: {
// in case of brackets we have one of two restrictive
// PseudoSelectors
// has_attribute ([attribute_name])
// or
// has_value [attribute_name="value"]
// in both cases the attribute name comes first.
Token t;
expect(TOKEN_TEXT, tokenizer, t, true, ctx);
Variant::arrayType args{Variant(t.content.c_str())};
if (!expect(EQUALS, tokenizer, t, false, ctx)) {
// if no equals sign follows we have a has_attribute
// PseudoSelector
// we expect a closing bracket.
expect(BRACKET_CLOSE, tokenizer, t, true, ctx);
// and then we can return the result.
Rooted n{new model::SelectorNode(
ctx.getManager(), name, {"has_attribute", args, false})};
return n;
} else {
// with an equals sign we have a has_value PseudoSelector and
// expect the value next.
expect(STRING, tokenizer, t, true, ctx);
args.push_back(Variant(t.content.c_str()));
// then we expect a closing bracket.
expect(BRACKET_CLOSE, tokenizer, t, true, ctx);
// and then we can return the result.
Rooted n{new model::SelectorNode(
ctx.getManager(), name, {"has_value", args, false})};
return n;
}
}
default:
// everything else is not part of the Selector anymore.
tokenizer.resetPeek();
Rooted n{
new model::SelectorNode(ctx.getManager(), name)};
return n;
}
}
Rooted CSSParser::parseRuleSet(CodeTokenizer &tokenizer,
ParserContext &ctx)
{
Rooted ruleSet{new model::RuleSet(ctx.getManager())};
// if we have no ruleset content, we return an empty ruleset.
Token t;
if (!expect(CURLY_OPEN, tokenizer, t, false, ctx)) {
return ruleSet;
}
// otherwise we parse the rules.
parseRules(tokenizer, ruleSet, ctx);
// and we expect closing curly braces.
expect(CURLY_CLOSE, tokenizer, t, true, ctx);
return ruleSet;
}
void CSSParser::parseRules(CodeTokenizer &tokenizer,
Rooted ruleSet, ParserContext &ctx)
{
std::string key;
Variant value;
while (parseRule(tokenizer, ctx, key, value)) {
ruleSet->getRules().insert({key, value});
}
}
bool CSSParser::parseRule(CodeTokenizer &tokenizer, ParserContext &ctx,
std::string &key, Variant &value)
{
Token t;
if (!expect(TOKEN_TEXT, tokenizer, t, false, ctx)) {
return false;
}
// if we find text that is the key first.
key = t.content;
// then we expect a :
expect(COLON, tokenizer, t, true, ctx);
// then the value
// TODO: Resolve key for appropriate parsing function here.
value = VariantReader::parseGeneric(tokenizer.getInput(), ctx.getLogger(),
{';'}).second;
// and a ;
expect(SEMICOLON, tokenizer, t, true, ctx);
return true;
}
bool CSSParser::expect(int expectedType, CodeTokenizer &tokenizer, Token &t,
bool force, ParserContext &ctx)
{
bool end = !tokenizer.peek(t);
if (end || t.tokenId != expectedType) {
if (force) {
if (end) {
throw LoggableException{"Unexpected end of file!",
tokenizer.getInput()};
} else {
throw LoggableException{"Unexpected token!",
tokenizer.getInput()};
}
} else {
tokenizer.resetPeek();
return false;
}
}
tokenizer.consumePeek();
return true;
}
}