summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
Diffstat (limited to 'src/core')
-rw-r--r--src/core/BufferedCharReader.cpp2
-rw-r--r--src/core/BufferedCharReader.hpp6
-rw-r--r--src/core/CSSParser.cpp307
-rw-r--r--src/core/CSSParser.hpp118
-rw-r--r--src/core/parser/Parser.hpp14
-rw-r--r--src/core/parser/Scope.hpp7
-rw-r--r--src/core/variant/Reader.cpp14
-rw-r--r--src/core/variant/Reader.hpp54
8 files changed, 37 insertions, 485 deletions
diff --git a/src/core/BufferedCharReader.cpp b/src/core/BufferedCharReader.cpp
index cf481df..23c219a 100644
--- a/src/core/BufferedCharReader.cpp
+++ b/src/core/BufferedCharReader.cpp
@@ -224,7 +224,7 @@ void BufferedCharReader::resetPeek()
peekCursor.assign(readCursor);
}
-bool BufferedCharReader::atEnd()
+bool BufferedCharReader::atEnd() const
{
if (depleted || !inputStream) {
if (buffer.size() <= 0) {
diff --git a/src/core/BufferedCharReader.hpp b/src/core/BufferedCharReader.hpp
index ec76b03..bd19d4a 100644
--- a/src/core/BufferedCharReader.hpp
+++ b/src/core/BufferedCharReader.hpp
@@ -252,21 +252,21 @@ public:
*
* @return true if there is no more data.
*/
- bool atEnd();
+ bool atEnd() const;
/**
* Returns the current line (starting with one).
*
* @return the current line number.
*/
- inline int getLine() { return readCursor.line; }
+ int getLine() const { return readCursor.line; }
/**
* Returns the current column (starting with one).
*
* @return the current column number.
*/
- inline int getColumn() { return readCursor.column; }
+ int getColumn() const { return readCursor.column; }
};
}
diff --git a/src/core/CSSParser.cpp b/src/core/CSSParser.cpp
deleted file mode 100644
index b762844..0000000
--- a/src/core/CSSParser.cpp
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "CSSParser.hpp"
-
-namespace ousia {
-
-// CSS code tokens
-static const int CURLY_OPEN = 1;
-static const int CURLY_CLOSE = 2;
-static const int COLON = 3;
-static const int DOUBLE_COLON = 4;
-static const int SEMICOLON = 5;
-static const int HASH = 6;
-static const int BRACKET_OPEN = 7;
-static const int BRACKET_CLOSE = 8;
-static const int PAREN_OPEN = 9;
-static const int PAREN_CLOSE = 10;
-static const int EQUALS = 11;
-static const int ARROW = 12;
-static const int COMMA = 13;
-// comments
-static const int COMMENT = 100;
-static const int COMMENT_OPEN = 101;
-static const int COMMENT_CLOSE = 102;
-// strings
-static const int STRING = 200;
-static const int DOUBLE_QUOTE = 201;
-static const int ESCAPE = 202;
-// general syntax
-static const int LINEBREAK = 300;
-
-static const TokenTreeNode CSS_ROOT{{{"{", CURLY_OPEN},
- {"}", CURLY_CLOSE},
- {":", COLON},
- {"::", DOUBLE_COLON},
- {";", SEMICOLON},
- {"#", HASH},
- {"[", BRACKET_OPEN},
- {"]", BRACKET_CLOSE},
- {"(", PAREN_OPEN},
- {")", PAREN_CLOSE},
- {"=", EQUALS},
- {">", ARROW},
- {",", COMMA},
- {"/*", COMMENT_OPEN},
- {"*/", COMMENT_CLOSE},
- {"\"", DOUBLE_QUOTE},
- {"\\", ESCAPE},
- // linux linebreak
- {"\n", LINEBREAK},
- // windows linebreak
- {"\r\n", LINEBREAK},
- // Mac OS linebreak
- {"\r", LINEBREAK}}};
-
-static const std::map<int, CodeTokenDescriptor> CSS_DESCRIPTORS = {
- {COMMENT_OPEN, {CodeTokenMode::BLOCK_COMMENT_START, COMMENT}},
- {COMMENT_CLOSE, {CodeTokenMode::BLOCK_COMMENT_END, COMMENT}},
- {DOUBLE_QUOTE, {CodeTokenMode::STRING_START_END, STRING}},
- {ESCAPE, {CodeTokenMode::ESCAPE, ESCAPE}},
- {LINEBREAK, {CodeTokenMode::LINEBREAK, LINEBREAK}}};
-
-Rooted<SelectorNode> CSSParser::parse(BufferedCharReader &input)
-{
- CodeTokenizer tokenizer{input, CSS_ROOT, CSS_DESCRIPTORS};
- tokenizer.ignoreComments = true;
- // TODO: Is this the correct way to retrieve the Manager?
- Manager mgr;
- Rooted<SelectorNode> root = {new SelectorNode{mgr, "root"}};
- parseDocument(root, tokenizer);
- return root;
-}
-
-void CSSParser::parseDocument(Rooted<SelectorNode> root,
- CodeTokenizer &tokenizer)
-{
- Token t;
- if (!tokenizer.peek(t)) {
- return;
- }
- tokenizer.resetPeek();
- std::vector<Rooted<SelectorNode>> leafList;
- parseSelectors(root, tokenizer, leafList);
- // TODO: Parse Ruleset
- parseDocument(root, tokenizer);
-}
-
-void CSSParser::parseSelectors(Rooted<SelectorNode> root,
- CodeTokenizer &tokenizer,
- std::vector<Rooted<SelectorNode>> &leafList)
-{
- auto tuple = parseSelector(tokenizer);
- // append the SelectorPath to the root node.
- std::vector<Rooted<SelectorNode>> unmergedLeafs =
- root->append(std::get<0>(tuple));
- // append the leaf to the leafList.
- switch (unmergedLeafs.size()) {
- case 0:
- // if the leaf could be merged we take the leaf reference from the
- // parseSelector method.
- leafList.push_back(std::get<1>(tuple));
- break;
- case 1:
- // if the leaf could not be merged we take the existing leaf.
- leafList.push_back(unmergedLeafs[0]);
- break;
- case 2:
- // as the parseSelector is supposed to parse only a SelectorPath
- // there should not be more than one leaf.
- throw LoggableException{
- "Internal Error: More than one leaf in SelectorPath!", true,
- tokenizer.getInput()};
- }
- // if we find a comma, we can proceed parsing selectors.
- Token t;
- if (expect(COMMA, tokenizer, t, false)) {
- parseSelectors(root, tokenizer, leafList);
- }
-}
-
-std::tuple<Rooted<SelectorNode>, Rooted<SelectorNode>> CSSParser::parseSelector(
- CodeTokenizer &tokenizer)
-{
- Rooted<SelectorNode> s = parsePrimitiveSelector(tokenizer);
- Token t;
- if (!tokenizer.peek(t)) {
- // if we are at the end the found selector is the immediate child as
- // well as the leaf.
- return std::make_tuple(s, s);
- }
- switch (t.tokenId) {
- case TOKEN_TEXT: {
- // if we find text there is a next token in a DESCENDANT
- // relationship (A B)
- tokenizer.resetPeek();
- // so we parse the rest of the subsequent SelectorPath
- auto tuple = parseSelector(tokenizer);
- // then we establish the DESCENDANT relationship
- // TODO: Is this the correct way to retrieve the Manager?
- Manager mgr;
- s->getEdges().push_back(
- new SelectorNode::SelectorEdge(mgr, std::get<0>(tuple)));
- // and we return this node as well as the leaf.
- return std::make_tuple(s, std::get<1>(tuple));
- }
- case ARROW: {
- tokenizer.consumePeek();
- // if we find an arrow there is a next token in a CHILD
- // relationship (A > B)
- // so we parse the rest of the subsequent SelectorPath
- auto tuple = parseSelector(tokenizer);
- // then we establish the DESCENDANT relationship
- // TODO: Is this the correct way to retrieve the Manager?
- Manager mgr;
- s->getEdges().push_back(new SelectorNode::SelectorEdge(
- mgr, std::get<0>(tuple), SelectionOperator::DIRECT_DESCENDANT));
- // and we return this node as well as the leaf.
- return std::make_tuple(s, std::get<1>(tuple));
- }
- default:
- // everything else is not part of the SelectorPath anymore.
- tokenizer.resetPeek();
- return std::make_tuple(s, s);
- }
-}
-
-Rooted<SelectorNode> CSSParser::parsePrimitiveSelector(CodeTokenizer &tokenizer)
-{
- // first and foremost we expect a class name.
- Token t;
- expect(TOKEN_TEXT, tokenizer, t, true);
- const std::string name = t.content;
- // TODO: Is this the correct way to retrieve the Manager?
- Manager mgr;
- if (!tokenizer.peek(t)) {
- // if we are at the end, we just return this selector with its name.
- Rooted<SelectorNode> n{new SelectorNode(mgr, name)};
- return n;
- }
-
- bool isGenerative = false;
-
- switch (t.tokenId) {
- case DOUBLE_COLON:
- // if we find a double colon we have a generative PseudoSelector.
- isGenerative = true;
- case COLON: {
- // if we find a colon we have a restrictive PseudoSelector.
- tokenizer.consumePeek();
- // get the PseudoSelector name.
- expect(TOKEN_TEXT, tokenizer, t, true);
- const std::string pseudo_select_name = t.content;
- // look for additional arguments.
- if (!expect(PAREN_OPEN, tokenizer, t, false)) {
- // if we don't have any, we return here.
- Rooted<SelectorNode> n{new SelectorNode(
- mgr, name, {pseudo_select_name, isGenerative})};
- return n;
- }
- // parse the argument list.
- std::vector<std::string> args;
- // we require at least one argument, if parantheses are used
- expect(TOKEN_TEXT, tokenizer, t, true);
- args.push_back(t.content);
- while (expect(COMMA, tokenizer, t, false)) {
- // as long as we find commas we expect new arguments.
- expect(TOKEN_TEXT, tokenizer, t, true);
- args.push_back(t.content);
- }
- expect(PAREN_CLOSE, tokenizer, t, true);
- // and we return with the finished Selector.
- Rooted<SelectorNode> n{new SelectorNode(
- mgr, name, {pseudo_select_name, args, isGenerative})};
- return n;
- }
- case HASH: {
- // a hash symbol is syntactic sugar for the PseudoSelector
- // :has_id(id)
- // so we expect an ID now.
- Token t;
- expect(TOKEN_TEXT, tokenizer, t, true);
- std::vector<std::string> args{t.content};
- // and we return the finished Selector
- Rooted<SelectorNode> n{
- new SelectorNode(mgr, name, {"has_id", args, false})};
- return n;
- }
- case BRACKET_OPEN: {
- // in case of brackets we have one of two restrictive
- // PseudoSelectors
- // has_attribute ([attribute_name])
- // or
- // has_value [attribute_name="value"]
- // in both cases the attribute name comes first.
- Token t;
- expect(TOKEN_TEXT, tokenizer, t, true);
- std::vector<std::string> args{t.content};
- if (!expect(EQUALS, tokenizer, t, false)) {
- // if no equals sign follows we have a has_attribute
- // PseudoSelector
- // we expect a closing bracket.
- expect(BRACKET_CLOSE, tokenizer, t, true);
- // and then we can return the result.
- Rooted<SelectorNode> n{new SelectorNode(
- mgr, name, {"has_attribute", args, false})};
- return n;
- } else {
- // with an equals sign we have a has_value PseudoSelector and
- // expect the value next.
- expect(STRING, tokenizer, t, true);
- args.push_back(t.content);
- // then we expect a closing bracket.
- expect(BRACKET_CLOSE, tokenizer, t, true);
- // and then we can return the result.
- Rooted<SelectorNode> n{
- new SelectorNode(mgr, name, {"has_value", args, false})};
- return n;
- }
- }
- default:
- // everything else is not part of the Selector anymore.
- tokenizer.resetPeek();
- Rooted<SelectorNode> n{new SelectorNode(mgr, name)};
- return n;
- }
-}
-
-// TODO: Add RuleSet parsing methods.
-
-bool CSSParser::expect(int expectedType, CodeTokenizer &tokenizer, Token &t,
- bool force)
-{
- bool end = !tokenizer.peek(t);
- if (end || t.tokenId != expectedType) {
- if (force) {
- if (end) {
- throw LoggableException{"Unexpected end of file!", true,
- tokenizer.getInput()};
- } else {
- throw LoggableException{"Unexpected token!", true,
- tokenizer.getInput()};
- }
- } else {
- tokenizer.resetPeek();
- return false;
- }
- }
- tokenizer.consumePeek();
- return true;
-}
-}
diff --git a/src/core/CSSParser.hpp b/src/core/CSSParser.hpp
deleted file mode 100644
index 7dfc872..0000000
--- a/src/core/CSSParser.hpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- Ousía
- Copyright (C) 2014 Benjamin Paaßen, Andreas Stöckel
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _OUSIA_CSS_PARSER_HPP_
-#define _OUSIA_CSS_PARSER_HPP_
-
-#include <vector>
-#include <tuple>
-
-#include "BufferedCharReader.hpp"
-#include "CodeTokenizer.hpp"
-#include "CSS.hpp"
-#include "Exceptions.hpp"
-
-namespace ousia {
-
-/**
- * This is a context free, recursive parser for a subset of the CSS3 language
- * as defined by W3C. We allow the following grammar:
- *
- * DOC := SELECT RULESET DOC | epsilon
- * SELECTORS := SELECT , SELECTORS | SELECT
- * SELECT := SELECT' OPERATOR SELECT | SELECT'
- * SELECT' := TYPE | TYPE:PSEUDO | TYPE::GEN_PSEUDO |
- * TYPE:PSEUDO(ARGUMENTS) |
- * TYPE::GEN_PSEUDO(ARGUMENTS) | TYPE#ID |
- * TYPE[ATTRIBUTE] | TYPE[ATTRIBUTE=VALUE]
- * TYPE := string
- * PSEUDO := string
- * GEN_PSEUDO := string
- * ARGUMENTS := string , ARGUMENTS
- * ID := string
- * ATTRIBUTE := string
- * VALUE := string
- * OPERATOR := epsilon | &gt;
- * RULESET := epsilon | { RULES }
- * RULES := RULE RULES | epsilon
- * RULE := KEY : VALUE ;
- * KEY := string
- * VALUE := type-specific parser
- *
- *
- * @author Benjamin Paassen - bpaassen@techfak.uni-bielefeld.de
- */
-class CSSParser {
-private:
- /**
- * Implements the DOC Nonterminal
- */
- void parseDocument(Rooted<SelectorNode> root, CodeTokenizer &tokenizer);
- /**
- * Implements the SELECTORS Nonterminal and adds all leaf nodes of the
- * resulting SelectorTree to the input leafList so that a parsed RuleSet can
- * be inserted there.
- */
- void parseSelectors(Rooted<SelectorNode> root, CodeTokenizer &tokenizer,
- std::vector<Rooted<SelectorNode>> &leafList);
- /**
- * Implements the SELECT Nonterminal, which in effect parses a SelectorPath
- * of the SelectorTree and returns the beginning node of the path as first
- * element as well as the leaf of the path as second tuple element.
- */
- std::tuple<Rooted<SelectorNode>, Rooted<SelectorNode>> parseSelector(
- CodeTokenizer &tokenizer);
-
- /**
- * Implements the SELECT' Nonterminal, which parses a single Selector with
- * its PseudoSelector and returns it.
- */
- Rooted<SelectorNode> parsePrimitiveSelector(CodeTokenizer &tokenizer);
-
- // TODO: Add RuleSet parsing methods.
-
- /**
- * A convenience function to wrap around the tokenizer peek() function that
- * only returns true if an instance of the expected type occurs.
- *
- * @param expectedType the ID of the expected type according to the
- * CodeTokenizer specification.
- * @param tokenizer the tokenizer for the input.
- * @param t an empty token that gets the parsed token content
- * if it has the expected type.
- * @param force a flag to be set if it would be fatal for the
- * parsing process to get the wrong type. In that case
- * an exception is thrown.
- * @return true iff a token of the expected type was found.
- */
- bool expect(int expectedType, CodeTokenizer &tokenizer, Token &t,
- bool force);
-
-public:
- /**
- * This parses the given input as CSS content as specified by the grammar
- * seen above. The return value is a Rooted reference to the root of the
- * SelectorTree.
- * TODO: The RuleSet at the respective node at the tree lists all CSS Style
- * rules that apply.
- */
- Rooted<SelectorNode> parse(BufferedCharReader &input);
-};
-}
-
-#endif
diff --git a/src/core/parser/Parser.hpp b/src/core/parser/Parser.hpp
index fa5dd49..5dac956 100644
--- a/src/core/parser/Parser.hpp
+++ b/src/core/parser/Parser.hpp
@@ -70,6 +70,10 @@ struct ParserContext {
* Reference to the Logger the parser should log any messages to.
*/
Logger &logger;
+ /**
+ * Reference to the Manager the parser should append nodes to.
+ */
+ Manager &manager;
/**
* Constructor of the ParserContext class.
@@ -81,9 +85,12 @@ struct ParserContext {
* implementations.
* @param logger is a reference to the Logger instance that should be used
* to log error messages and warnings that occur while parsing the document.
+ * @param manager is a Reference to the Manager the parser should append
+ *nodes to.
*/
- ParserContext(Scope &scope, Registry &registry, Logger &logger)
- : scope(scope), registry(registry), logger(logger){};
+ ParserContext(Scope &scope, Registry &registry, Logger &logger,
+ Manager &manager)
+ : scope(scope), registry(registry), logger(logger), manager(manager){};
};
struct StandaloneParserContext : public ParserContext {
@@ -91,10 +98,11 @@ private:
Logger logger;
Scope scope;
Registry registry;
+ Manager manager;
public:
StandaloneParserContext()
- : ParserContext(scope, registry, logger),
+ : ParserContext(scope, registry, logger, manager),
scope(nullptr),
registry(logger){};
};
diff --git a/src/core/parser/Scope.hpp b/src/core/parser/Scope.hpp
index 9c5504f..5b19b3d 100644
--- a/src/core/parser/Scope.hpp
+++ b/src/core/parser/Scope.hpp
@@ -55,7 +55,7 @@ public:
* Creates a new ScopedScope instance.
*
* @param scope is the backing Scope instance.
- * @param node is the Node instance that should be poped onto the stack of
+ * @param node is the Node instance that should be pushed onto the stack of
* the Scope instance.
*/
ScopedScope(Scope *scope, Handle<Node> node);
@@ -108,11 +108,6 @@ public:
Scope(Handle<Node> rootNode) { nodes.push_back(rootNode); }
/**
- * Returns a reference at the Manager instance all nodes belong to.
- */
- Manager &getManager() { return getRoot()->getManager(); }
-
- /**
* Pushes a new node onto the scope.
*
* @param node is the node that should be used for local lookup.
diff --git a/src/core/variant/Reader.cpp b/src/core/variant/Reader.cpp
index 6142ecf..e9a58a1 100644
--- a/src/core/variant/Reader.cpp
+++ b/src/core/variant/Reader.cpp
@@ -26,12 +26,17 @@
namespace ousia {
namespace variant {
+static const char *ERR_UNEXPECTED_CHARACTER = "Unexpected character";
+static const char *ERR_UNEXPECTED_END = "Unexpected end";
+static const char *ERR_UNTERMINATED = "Unterminated literal";
+
static const int STATE_INIT = 0;
static const int STATE_IN_STRING = 1;
static const int STATE_ESCAPE = 2;
static std::pair<Err, std::string> parseString(
- BufferedCharReader &reader, const unordered_set<char> *delims = nullptr)
+ BufferedCharReader &reader, const unordered_set<char> *delims = nullptr,
+ Logger *logger = nullptr)
{
// Initialize the internal state
Err errCode = Err::OK;
@@ -51,9 +56,13 @@ static std::pair<Err, std::string> parseString(
quote = c;
state = STATE_IN_STRING;
} else if (delims && delims.count(c)) {
+ Logger.log(ERR_UNTERMINATED, reader);
return std::make_pair(Err::UNEXPECTED_END, res.str());
+ } else if (Utils::isWhitespace(c)) {
+ reader.consumePeek();
+ continue;
}
- reader.consumePeek();
+ return std::make_pair(Err::UNEXPECTED_CHARACTER, res.str());
break;
case STATE_IN_STRING:
if (c == q) {
@@ -171,7 +180,6 @@ static std::pair<Err, Variant> parseGeneric(BufferedCharReader &reader,
}
return std::make_pair(Err::UNEXPECTED_END, res.str());
}
-
}
}
diff --git a/src/core/variant/Reader.hpp b/src/core/variant/Reader.hpp
index 3f945f0..339127f 100644
--- a/src/core/variant/Reader.hpp
+++ b/src/core/variant/Reader.hpp
@@ -32,6 +32,7 @@
#include <utility>
#include <core/BufferedCharReader.hpp>
+#include <core/Logger.hpp>
#include "Variant.hpp"
@@ -40,44 +41,6 @@ namespace variant {
class Reader {
public:
- // TODO: Pass logger instance instead of using error codes?
-
- /**
- * The Err enum describes possible error codes that may be encountered when
- * parsing the microtypes.
- */
- enum class Err : int {
- /**
- * Reached the end of the stream, but expected more data.
- */
- ERR_UNEXPECTED_END = -1,
-
- /**
- * The stream is malformed.
- */
- ERR_MALFORMED = -2,
-
- /**
- * Unexpected character.
- */
- ERR_UNEXPECTED_CHARACTER = -3,
-
- /**
- * Unterminated literal.
- */
- ERR_UNTERMINATED = -4,
-
- /**
- * Invalid escape character.
- */
- ERR_INVALID_ESCAPE = -5,
-
- /**
- * A value of the requested type was extracted successfully.
- */
- OK = 0
- };
-
/**
* Parses a string which may either be enclosed by " or ', unescapes
* entities in the string as specified for JavaScript.
@@ -91,9 +54,10 @@ public:
* outside). If nullptr is given, no delimiter is used and a complete string
* is read.
*/
- static std::pair<Err, std::string> parseString(
+ static std::pair<bool, std::string> parseString(
BufferedCharReader &reader,
- const unordered_set<char> *delims = nullptr);
+ const unordered_set<char> *delims = nullptr,
+ Logger *logger = nullptr);
/**
* Extracts an unescaped string from the given buffered char reader
@@ -106,8 +70,9 @@ public:
* @param delims is a set of characters which will terminate the string.
* These characters are not included in the result. May not be nullptr.
*/
- static std::pair<Err, std::string> parseUnescapedString(
- BufferedCharReader &reader, const unordered_set<char> *delims);
+ static std::pair<bool, std::string> parseUnescapedString(
+ BufferedCharReader &reader, const unordered_set<char> *delims,
+ Logger *logger = nullptr);
/**
* Tries to parse the most specific item from the given stream until one of
@@ -120,8 +85,9 @@ public:
* @param delims is a set of characters which will terminate the string.
* These characters are not included in the result. May not be nullptr.
*/
- static std::pair<Err, Variant> parseGeneric(
- BufferedCharReader &reader, const unordered_set<char> *delims);
+ static std::pair<bool, Variant> parseGeneric(
+ BufferedCharReader &reader, const unordered_set<char> *delims,
+ Logger *logger = nullptr);
};
}
}