diff options
Diffstat (limited to 'src/core')
| -rw-r--r-- | src/core/BufferedCharReader.cpp | 2 | ||||
| -rw-r--r-- | src/core/BufferedCharReader.hpp | 6 | ||||
| -rw-r--r-- | src/core/CSSParser.cpp | 103 | ||||
| -rw-r--r-- | src/core/CSSParser.hpp | 38 | ||||
| -rw-r--r-- | src/core/parser/Parser.hpp | 14 | ||||
| -rw-r--r-- | src/core/parser/Scope.hpp | 7 | 
6 files changed, 99 insertions, 71 deletions
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp index cf481df..23c219a 100644 --- a/src/core/BufferedCharReader.cpp +++ b/src/core/BufferedCharReader.cpp @@ -224,7 +224,7 @@ void BufferedCharReader::resetPeek()  	peekCursor.assign(readCursor);  } -bool BufferedCharReader::atEnd() +bool BufferedCharReader::atEnd() const  {  	if (depleted || !inputStream) {  		if (buffer.size() <= 0) { diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp index ec76b03..bd19d4a 100644 --- a/src/core/BufferedCharReader.hpp +++ b/src/core/BufferedCharReader.hpp @@ -252,21 +252,21 @@ public:  	 *  	 * @return true if there is no more data.  	 */ -	bool atEnd(); +	bool atEnd() const;  	/**  	 * Returns the current line (starting with one).  	 *  	 * @return the current line number.  	 */ -	inline int getLine() { return readCursor.line; } +	int getLine() const { return readCursor.line; }  	/**  	 * Returns the current column (starting with one).  	 *  	 * @return the current column number.  	 */ -	inline int getColumn() { return readCursor.column; } +	int getColumn() const { return readCursor.column; }  };  } diff --git a/src/core/CSSParser.cpp b/src/core/CSSParser.cpp index 3a86f45..d239359 100644 --- a/src/core/CSSParser.cpp +++ b/src/core/CSSParser.cpp @@ -19,6 +19,8 @@  #include "CSSParser.hpp"  namespace ousia { +namespace parser { +namespace css {  // CSS code tokens  static const int CURLY_OPEN = 1; @@ -76,19 +78,18 @@ static const std::map<int, CodeTokenDescriptor> CSS_DESCRIPTORS = {      {ESCAPE, {CodeTokenMode::ESCAPE, ESCAPE}},      {LINEBREAK, {CodeTokenMode::LINEBREAK, LINEBREAK}}}; -Rooted<SelectorNode> CSSParser::parse(BufferedCharReader &input) +Rooted<Node> CSSParser::parse(std::istream &is, ParserContext &ctx)  { +	BufferedCharReader input{is};  	CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS};  	tokenizer.ignoreComments = true; -	// TODO: Is this the correct way to retrieve the Manager? -	Manager mgr; -	Rooted<SelectorNode> root = {new SelectorNode{mgr, "root"}}; -	parseDocument(root, tokenizer); +	Rooted<SelectorNode> root = {new SelectorNode{ctx.manager, "root"}}; +	parseDocument(root, tokenizer, ctx);  	return root;  }  void CSSParser::parseDocument(Rooted<SelectorNode> root, -                              CodeTokenizer &tokenizer) +                              CodeTokenizer &tokenizer, ParserContext &ctx)  {  	Token t;  	if (!tokenizer.peek(t)) { @@ -96,16 +97,17 @@ void CSSParser::parseDocument(Rooted<SelectorNode> root,  	}  	tokenizer.resetPeek();  	std::vector<Rooted<SelectorNode>> leafList; -	parseSelectors(root, tokenizer, leafList); +	parseSelectors(root, tokenizer, leafList, ctx);  	// TODO: Parse Ruleset -	parseDocument(root, tokenizer); +	parseDocument(root, tokenizer, ctx);  }  void CSSParser::parseSelectors(Rooted<SelectorNode> root,                                 CodeTokenizer &tokenizer, -                               std::vector<Rooted<SelectorNode>> &leafList) +                               std::vector<Rooted<SelectorNode>> &leafList, +                               ParserContext &ctx)  { -	auto tuple = parseSelector(tokenizer); +	auto tuple = parseSelector(tokenizer, ctx);  	// append the SelectorPath to the root node.  	std::vector<Rooted<SelectorNode>> unmergedLeafs =  	    root->append(std::get<0>(tuple)); @@ -123,7 +125,7 @@ void CSSParser::parseSelectors(Rooted<SelectorNode> root,  		case 2:  			// as the parseSelector is supposed to parse only a SelectorPath  			// there should not be more than one leaf. -			throw LoggableException{ +			throw ParserException{  			    "Internal Error: More than one leaf in SelectorPath!", "",  			    // TODO: Line handling?  			    //			    tokenizer.getInput().getLine(), @@ -132,15 +134,15 @@ void CSSParser::parseSelectors(Rooted<SelectorNode> root,  	}  	// if we find a comma, we can proceed parsing selectors.  	Token t; -	if (expect(COMMA, tokenizer, t, false)) { -		parseSelectors(root, tokenizer, leafList); +	if (expect(COMMA, tokenizer, t, false, ctx)) { +		parseSelectors(root, tokenizer, leafList, ctx);  	}  }  std::tuple<Rooted<SelectorNode>, Rooted<SelectorNode>> CSSParser::parseSelector( -    CodeTokenizer &tokenizer) +    CodeTokenizer &tokenizer, ParserContext &ctx)  { -	Rooted<SelectorNode> s = parsePrimitiveSelector(tokenizer); +	Rooted<SelectorNode> s = parsePrimitiveSelector(tokenizer, ctx);  	Token t;  	if (!tokenizer.peek(t)) {  		// if we are at the end the found selector is the immediate child as @@ -153,12 +155,10 @@ std::tuple<Rooted<SelectorNode>, Rooted<SelectorNode>> CSSParser::parseSelector(  			// relationship (A B)  			tokenizer.resetPeek();  			// so we parse the rest of the subsequent SelectorPath -			auto tuple = parseSelector(tokenizer); +			auto tuple = parseSelector(tokenizer, ctx);  			// then we establish the DESCENDANT relationship -			// TODO: Is this the correct way to retrieve the Manager? -			Manager mgr; -			s->getEdges().push_back( -			    new SelectorNode::SelectorEdge(mgr, std::get<0>(tuple))); +			s->getEdges().push_back(new SelectorNode::SelectorEdge( +			    ctx.manager, std::get<0>(tuple)));  			// and we return this node as well as the leaf.  			return std::make_tuple(s, std::get<1>(tuple));  		} @@ -167,12 +167,11 @@ std::tuple<Rooted<SelectorNode>, Rooted<SelectorNode>> CSSParser::parseSelector(  			// if we find an arrow there is a next token in a CHILD  			// relationship (A > B)  			// so we parse the rest of the subsequent SelectorPath -			auto tuple = parseSelector(tokenizer); +			auto tuple = parseSelector(tokenizer, ctx);  			// then we establish the DESCENDANT relationship -			// TODO: Is this the correct way to retrieve the Manager? -			Manager mgr;  			s->getEdges().push_back(new SelectorNode::SelectorEdge( -			    mgr, std::get<0>(tuple), SelectionOperator::DIRECT_DESCENDANT)); +			    ctx.manager, std::get<0>(tuple), +			    SelectionOperator::DIRECT_DESCENDANT));  			// and we return this node as well as the leaf.  			return std::make_tuple(s, std::get<1>(tuple));  		} @@ -183,17 +182,16 @@ std::tuple<Rooted<SelectorNode>, Rooted<SelectorNode>> CSSParser::parseSelector(  	}  } -Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer) +Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer, +                                                       ParserContext &ctx)  {  	// first and foremost we expect a class name.  	Token t; -	expect(TOKEN_TEXT, tokenizer, t, true); +	expect(TOKEN_TEXT, tokenizer, t, true, ctx);  	const std::string name = t.content; -	// TODO: Is this the correct way to retrieve the Manager? -	Manager mgr;  	if (!tokenizer.peek(t)) {  		// if we are at the end, we just return this selector with its name. -		Rooted<SelectorNode> n{new SelectorNode(mgr, name)}; +		Rooted<SelectorNode> n{new SelectorNode(ctx.manager, name)};  		return n;  	} @@ -203,33 +201,34 @@ Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer)  		case DOUBLE_COLON:  			// if we find a double colon we have a generative PseudoSelector.  			isGenerative = true; +		// this is supposed to fall through; no missing break.  		case COLON: {  			// if we find a colon we have a restrictive PseudoSelector.  			tokenizer.consumePeek();  			// get the PseudoSelector name. -			expect(TOKEN_TEXT, tokenizer, t, true); +			expect(TOKEN_TEXT, tokenizer, t, true, ctx);  			const std::string pseudo_select_name = t.content;  			// look for additional arguments. -			if (!expect(PAREN_OPEN, tokenizer, t, false)) { +			if (!expect(PAREN_OPEN, tokenizer, t, false, ctx)) {  				// if we don't have any, we return here.  				Rooted<SelectorNode> n{new SelectorNode( -				    mgr, name, {pseudo_select_name, isGenerative})}; +				    ctx.manager, name, {pseudo_select_name, isGenerative})};  				return n;  			}  			// parse the argument list.  			std::vector<std::string> args;  			// we require at least one argument, if parantheses are used -			expect(TOKEN_TEXT, tokenizer, t, true); +			expect(TOKEN_TEXT, tokenizer, t, true, ctx);  			args.push_back(t.content); -			while (expect(COMMA, tokenizer, t, false)) { +			while (expect(COMMA, tokenizer, t, false, ctx)) {  				// as long as we find commas we expect new arguments. -				expect(TOKEN_TEXT, tokenizer, t, true); +				expect(TOKEN_TEXT, tokenizer, t, true, ctx);  				args.push_back(t.content);  			} -			expect(PAREN_CLOSE, tokenizer, t, true); +			expect(PAREN_CLOSE, tokenizer, t, true, ctx);  			// and we return with the finished Selector.  			Rooted<SelectorNode> n{new SelectorNode( -			    mgr, name, {pseudo_select_name, args, isGenerative})}; +			    ctx.manager, name, {pseudo_select_name, args, isGenerative})};  			return n;  		}  		case HASH: { @@ -237,11 +236,11 @@ Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer)  			// :has_id(id)  			// so we expect an ID now.  			Token t; -			expect(TOKEN_TEXT, tokenizer, t, true); +			expect(TOKEN_TEXT, tokenizer, t, true, ctx);  			std::vector<std::string> args{t.content};  			// and we return the finished Selector  			Rooted<SelectorNode> n{ -			    new SelectorNode(mgr, name, {"has_id", args, false})}; +			    new SelectorNode(ctx.manager, name, {"has_id", args, false})};  			return n;  		}  		case BRACKET_OPEN: { @@ -252,34 +251,34 @@ Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer)  			// has_value [attribute_name="value"]  			// in both cases the attribute name comes first.  			Token t; -			expect(TOKEN_TEXT, tokenizer, t, true); +			expect(TOKEN_TEXT, tokenizer, t, true, ctx);  			std::vector<std::string> args{t.content}; -			if (!expect(EQUALS, tokenizer, t, false)) { +			if (!expect(EQUALS, tokenizer, t, false, ctx)) {  				// if no equals sign follows we have a has_attribute  				// PseudoSelector  				// we expect a closing bracket. -				expect(BRACKET_CLOSE, tokenizer, t, true); +				expect(BRACKET_CLOSE, tokenizer, t, true, ctx);  				// and then we can return the result.  				Rooted<SelectorNode> n{new SelectorNode( -				    mgr, name, {"has_attribute", args, false})}; +				    ctx.manager, name, {"has_attribute", args, false})};  				return n;  			} else {  				// with an equals sign we have a has_value PseudoSelector and  				// expect the value next. -				expect(STRING, tokenizer, t, true); +				expect(STRING, tokenizer, t, true, ctx);  				args.push_back(t.content);  				// then we expect a closing bracket. -				expect(BRACKET_CLOSE, tokenizer, t, true); +				expect(BRACKET_CLOSE, tokenizer, t, true, ctx);  				// and then we can return the result. -				Rooted<SelectorNode> n{ -				    new SelectorNode(mgr, name, {"has_value", args, false})}; +				Rooted<SelectorNode> n{new SelectorNode( +				    ctx.manager, name, {"has_value", args, false})};  				return n;  			}  		}  		default:  			// everything else is not part of the Selector anymore.  			tokenizer.resetPeek(); -			Rooted<SelectorNode> n{new SelectorNode(mgr, name)}; +			Rooted<SelectorNode> n{new SelectorNode(ctx.manager, name)};  			return n;  	}  } @@ -287,20 +286,20 @@ Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer)  // TODO: Add RuleSet parsing methods.  bool CSSParser::expect(int expectedType, CodeTokenizer &tokenizer, Token &t, -                       bool force) +                       bool force, ParserContext &ctx)  {  	bool end = !tokenizer.peek(t);  	if (end || t.tokenId != expectedType) {  		if (force) {  			if (end) { -				throw LoggableException{ +				throw ParserException{  				    "Unexpected end of file!", "",  				    // TODO: Line handling?  				    //			    tokenizer.getInput().getLine(),  				    //			    tokenizer.getInput().getColumn()  				};  			} else { -				throw LoggableException{ +				throw ParserException{  				    "Unexpected token!", "",  				    // TODO: Line handling?  				    //			    tokenizer.getInput().getLine(), @@ -316,3 +315,5 @@ bool CSSParser::expect(int expectedType, CodeTokenizer &tokenizer, Token &t,  	return true;  }  } +} +} diff --git a/src/core/CSSParser.hpp b/src/core/CSSParser.hpp index 7dfc872..870ce37 100644 --- a/src/core/CSSParser.hpp +++ b/src/core/CSSParser.hpp @@ -22,12 +22,16 @@  #include <vector>  #include <tuple> +#include <core/parser/Parser.hpp> +  #include "BufferedCharReader.hpp"  #include "CodeTokenizer.hpp"  #include "CSS.hpp"  #include "Exceptions.hpp"  namespace ousia { +namespace parser { +namespace css {  /**   * This is a context free, recursive parser for a subset of the CSS3 language @@ -57,32 +61,35 @@ namespace ousia {   *   * @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de   */ -class CSSParser { +class CSSParser : public Parser {  private:  	/**  	 * Implements the DOC Nonterminal  	 */ -	void parseDocument(Rooted<SelectorNode> root, CodeTokenizer &tokenizer); +	void parseDocument(Rooted<SelectorNode> root, CodeTokenizer &tokenizer, +	                   ParserContext &ctx);  	/**  	 * Implements the SELECTORS Nonterminal and adds all leaf nodes of the  	 * resulting SelectorTree to the input leafList so that a parsed RuleSet can  	 * be inserted there.  	 */  	void parseSelectors(Rooted<SelectorNode> root, CodeTokenizer &tokenizer, -	                    std::vector<Rooted<SelectorNode>> &leafList); +	                    std::vector<Rooted<SelectorNode>> &leafList, +	                    ParserContext &ctx);  	/**  	 * Implements the SELECT Nonterminal, which in effect parses a SelectorPath  	 * of the SelectorTree and returns the beginning node of the path as first  	 * element as well as the leaf of the path as second tuple element.  	 */  	std::tuple<Rooted<SelectorNode>, Rooted<SelectorNode>> parseSelector( -	    CodeTokenizer &tokenizer); +	    CodeTokenizer &tokenizer, ParserContext &ctx);  	/**  	 * Implements the SELECT' Nonterminal, which parses a single Selector with  	 * its PseudoSelector and returns it.  	 */ -	Rooted<SelectorNode> parsePrimitiveSelector(CodeTokenizer &tokenizer); +	Rooted<SelectorNode> parsePrimitiveSelector(CodeTokenizer &tokenizer, +	                                            ParserContext &ctx);  	// TODO: Add RuleSet parsing methods. @@ -101,7 +108,7 @@ private:  	 * @return             true iff a token of the expected type was found.  	 */  	bool expect(int expectedType, CodeTokenizer &tokenizer, Token &t, -	            bool force); +	            bool force, ParserContext &ctx);  public:  	/** @@ -110,9 +117,26 @@ public:  	 * SelectorTree.  	 * TODO: The RuleSet at the respective node at the tree lists all CSS Style  	 * rules that apply. +	 * +	 * @param is  is a reference to the input stream that should be parsed. +	 * @param ctx is a reference to the context that should be used while +	 *            parsing the document. +	 * @return    returns the root node of the resulting SelectorTree. For more +	 *            information on the return conventions consult the Parser.hpp.  	 */ -	Rooted<SelectorNode> parse(BufferedCharReader &input); +	Rooted<Node> parse(std::istream &is, ParserContext &ctx) override; + +	/** +	 * As befits a class called CSSParser, this Parser parses CSS. +	 */ +	std::set<std::string> mimetypes() +	{ +		std::set<std::string> out{"text/css"}; +		return out; +	}  };  } +} +}  #endif diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp index fa5dd49..5dac956 100644 --- a/src/core/parser/Parser.hpp +++ b/src/core/parser/Parser.hpp @@ -70,6 +70,10 @@ struct ParserContext {  	 * Reference to the Logger the parser should log any messages to.  	 */  	Logger &logger; +	/** +	 * Reference to the Manager the parser should append nodes to. +	 */ +	Manager &manager;  	/**  	 * Constructor of the ParserContext class. @@ -81,9 +85,12 @@ struct ParserContext {  	 * implementations.  	 * @param logger is a reference to the Logger instance that should be used  	 * to log error messages and warnings that occur while parsing the document. +	 * @param manager is a Reference to the Manager the parser should append +	 *nodes to.  	 */ -	ParserContext(Scope &scope, Registry ®istry, Logger &logger) -	    : scope(scope), registry(registry), logger(logger){}; +	ParserContext(Scope &scope, Registry ®istry, Logger &logger, +	              Manager &manager) +	    : scope(scope), registry(registry), logger(logger), manager(manager){};  };  struct StandaloneParserContext : public ParserContext { @@ -91,10 +98,11 @@ private:  	Logger logger;  	Scope scope;  	Registry registry; +	Manager manager;  public:  	StandaloneParserContext() -	    : ParserContext(scope, registry, logger), +	    : ParserContext(scope, registry, logger, manager),  	      scope(nullptr),  	      registry(logger){};  }; diff --git a/src/core/parser/Scope.hpp b/src/core/parser/Scope.hpp index 9c5504f..5b19b3d 100644 --- a/src/core/parser/Scope.hpp +++ b/src/core/parser/Scope.hpp @@ -55,7 +55,7 @@ public:  	 * Creates a new ScopedScope instance.  	 *  	 * @param scope is the backing Scope instance. -	 * @param node is the Node instance that should be poped onto the stack of +	 * @param node is the Node instance that should be pushed onto the stack of  	 * the Scope instance.  	 */  	ScopedScope(Scope *scope, Handle<Node> node); @@ -108,11 +108,6 @@ public:  	Scope(Handle<Node> rootNode) { nodes.push_back(rootNode); }  	/** -	 * Returns a reference at the Manager instance all nodes belong to. -	 */ -	Manager &getManager() { return getRoot()->getManager(); } - -	/**  	 * Pushes a new node onto the scope.  	 *  	 * @param node is the node that should be used for local lookup.  | 
