diff options
| -rw-r--r-- | src/core/parser/stack/DocumentHandler.cpp | 74 | ||||
| -rw-r--r-- | src/core/parser/stack/DocumentHandler.hpp | 2 | ||||
| -rw-r--r-- | src/core/parser/stack/Handler.cpp | 17 | ||||
| -rw-r--r-- | src/core/parser/stack/Handler.hpp | 71 | ||||
| -rw-r--r-- | src/core/parser/stack/Stack.cpp | 184 | ||||
| -rw-r--r-- | test/core/parser/stack/StackTest.cpp | 6 | ||||
| -rw-r--r-- | testdata/integration/user_defined_syntax/python_code.in.osml | 34 | ||||
| -rw-r--r-- | testdata/integration/user_defined_syntax/python_code.out.osxml | 51 | 
8 files changed, 310 insertions, 129 deletions
| diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp index 34d4d17..ce5d8a2 100644 --- a/src/core/parser/stack/DocumentHandler.cpp +++ b/src/core/parser/stack/DocumentHandler.cpp @@ -583,60 +583,70 @@ bool DocumentChildHandler::startToken(Handle<Node> node)  	}  } -DocumentChildHandler::EndTokenResult DocumentChildHandler::endToken( -    const Token &token, Handle<Node> node) +EndTokenResult DocumentChildHandler::endToken(Handle<Node> node, size_t maxStackDepth)  { -	// Iterate over the transparent elements in the scope stack +	// Fetch the current scope stack  	const NodeVector<Node> &stack = scope().getStack(); -	ssize_t depth = -1; -	for (auto sit = stack.crbegin(); sit != stack.crend(); sit++, depth++) { + +	bool found = false;            // true once the given node has been found +	bool repeat = false; +	size_t scopeStackDepth = 0;    // # of elems on the scope stack +	size_t currentStackDepth = 0;  // # of "explicit" elems on the parser stack + +	// Iterate over the elements in the scope stack +	for (auto sit = stack.crbegin(); sit != stack.crend(); +	     sit++, scopeStackDepth++) {  		Rooted<Node> leaf = *sit; +		bool isExplicit = false;  		if (leaf->isa(&RttiTypes::DocumentField)) {  			Rooted<DocumentField> field = leaf.cast<DocumentField>();  			if (field->getDescriptor() == node) {  				// If the field is transparent, end it by incrementing the depth  				// counter -- both the field itself and the consecutive element  				// need to be removed +				found = true;  				if (field->transparent) { -					depth += 2; -					break; +					repeat = true; +					scopeStackDepth++;  				} -				return EndTokenResult::ENDED_THIS; -			} - -			// Abort if the field is explicit -			if (!field->transparent) { -				return EndTokenResult::ENDED_NONE;  			} +			isExplicit = field->explicitField; +		} else if (leaf->isa(&RttiTypes::StructuredEntity)) { +			Rooted<StructuredEntity> entity = leaf.cast<StructuredEntity>(); +			found = entity->getDescriptor() == node; +			repeat = found && entity->isTransparent(); +			isExplicit = !entity->isTransparent();  		} -		if (leaf->isa(&RttiTypes::StructuredEntity)) { -			Rooted<StructuredEntity> entity = leaf.cast<StructuredEntity>(); -			if (entity->getDescriptor() == node) { -				// If the entity is transparent, end it by incrementing the -				// depth counter and aborting -				if (entity->isTransparent()) { -					depth++; -					break; -				} -				return EndTokenResult::ENDED_THIS; -			} +		// TODO: End annotations! -			// Abort if this entity is explicit -			if (!entity->isTransparent()) { -				return EndTokenResult::ENDED_NONE; -			} +		// If the given structure is a explicit sturcture (represents a handler) +		// increment the stack depth and abort once the maximum stack depth has +		// been surpassed. +		if (isExplicit) { +			currentStackDepth++; +		} +		if (found || currentStackDepth > maxStackDepth) { +			break;  		} +	} -		// TODO: End annotations! +	// Abort with a value smaller than zero if the element has not been found +	if (!found || currentStackDepth > maxStackDepth) { +		return EndTokenResult(); +	} + +	// If the element has been found, return the number of handlers that have to +	// be popped from the parser stack +	if (currentStackDepth > 0) { +		return EndTokenResult(currentStackDepth, true, repeat);  	}  	// End all elements that were marked for being closed -	for (ssize_t i = 0; i <= depth; i++) { +	for (size_t i = 0; i < scopeStackDepth + 1; i++) {  		scope().pop(logger());  	} -	return (depth >= 0) ? EndTokenResult::ENDED_HIDDEN -	                    : EndTokenResult::ENDED_NONE; +	return EndTokenResult(0, true, false);  }  void DocumentChildHandler::end() diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp index 75e32fd..3ef5f08 100644 --- a/src/core/parser/stack/DocumentHandler.hpp +++ b/src/core/parser/stack/DocumentHandler.hpp @@ -222,7 +222,7 @@ public:  	bool startCommand(Variant::mapType &args) override;  	bool startAnnotation(Variant::mapType &args) override;  	bool startToken(Handle<Node> node) override; -	EndTokenResult endToken(const Token &token, Handle<Node> node) override; +	EndTokenResult endToken(Handle<Node> node, size_t maxStackDepth) override;  	void end() override;  	bool data() override;  	bool fieldStart(bool &isDefault, size_t fieldIdx) override; diff --git a/src/core/parser/stack/Handler.cpp b/src/core/parser/stack/Handler.cpp index 1399fef..69bfc76 100644 --- a/src/core/parser/stack/Handler.cpp +++ b/src/core/parser/stack/Handler.cpp @@ -47,7 +47,8 @@ Handler::Handler(const HandlerData &handlerData)  {  } -Handler::~Handler() { +Handler::~Handler() +{  	while (tokenStackDepth > 0) {  		popTokens();  	} @@ -90,7 +91,8 @@ void Handler::pushTokens(const std::vector<SyntaxDescriptor> &tokens)  	handlerData.callbacks.pushTokens(tokens);  } -void Handler::popTokens() { +void Handler::popTokens() +{  	assert(tokenStackDepth > 0 && "popTokens called too often");  	tokenStackDepth--;  	handlerData.callbacks.popTokens(); @@ -133,11 +135,10 @@ bool EmptyHandler::startToken(Handle<Node> node)  	return false;  } -Handler::EndTokenResult EmptyHandler::endToken(const Token &token, -                                               Handle<Node> node) +EndTokenResult EmptyHandler::endToken(Handle<Node> node, size_t maxStackDepth)  {  	// There are no tokens to end here. -	return EndTokenResult::ENDED_NONE; +	return EndTokenResult();  }  void EmptyHandler::end() @@ -179,10 +180,10 @@ bool StaticHandler::startAnnotation(Variant::mapType &args) { return false; }  bool StaticHandler::startToken(Handle<Node> node) { return false; } -Handler::EndTokenResult StaticHandler::endToken(const Token &token, -                                                Handle<Node> node) +EndTokenResult StaticHandler::endToken(Handle<Node> node, size_t maxStackDepth)  { -	return EndTokenResult::ENDED_NONE; +	// There are no tokens to end here. +	return EndTokenResult();  }  void StaticHandler::end() diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp index 9355e09..bd6ea72 100644 --- a/src/core/parser/stack/Handler.hpp +++ b/src/core/parser/stack/Handler.hpp @@ -60,6 +60,47 @@ class State;  enum class HandlerType { COMMAND, ANNOTATION_START, ANNOTATION_END, TOKEN };  /** + * Structure returned by the endToken method. + */ +struct EndTokenResult { +	/** +	 * Contains the number of explicit elements that need to be unrolled. +	 */ +	size_t depth; + +	/** +	 * Set to true if the given descriptor was found. +	 */ +	bool found; + +	/** +	 * Set to true if the endToken function must be called again after +	 * unrolling. +	 */ +	bool repeat; + +	/** +	 * Default constructor, initializes all members to sane default values, +	 * the descriptor has not been found. +	 */ +	EndTokenResult() : depth(0), found(false), repeat(false) {} + +	/** +	 * Constructor, initializes all member variables with the given values. +	 * +	 * @param depth is the number of explicit elements that need to be unrolled. +	 * @param found if true, an element corresponding to the descriptor has been +	 * found. +	 * @param repeat if true, the endToken method needs to be called again once +	 * the elements have been unrolled. +	 */ +	EndTokenResult(size_t depth, bool found, bool repeat) +	    : depth(depth), found(found), repeat(repeat) +	{ +	} +}; + +/**   * Class collecting all the data that is being passed to a Handler   * instance.   */ @@ -214,11 +255,6 @@ protected:  public:  	/** -	 * Enum type representing the possible outcomes of the endToken() method. -	 */ -	enum class EndTokenResult { ENDED_THIS, ENDED_HIDDEN, ENDED_NONE }; - -	/**  	 * Virtual destructor.  	 */  	virtual ~Handler(); @@ -362,22 +398,15 @@ public:  	/**  	 * Called whenever a token is marked as "end" token and this handler happens -	 * to be the currently active handler. This operation may have three -	 * outcomes: -	 * <ol> -	 *   <li>The token marks the end of the complete handler and the calling -	 *   code should call the "end" method.</li> -	 *   <li>The token marks the end of some element that is unknown the calling -	 *   code. So the operation itself was a success, but the calling code -	 *   should not call the "end" method. -	 *   <li>The token did not match anything in this context. Basically this -	 *   should never happen, but who knows.</li> -	 * </ol> +	 * to be the currently active handler.  	 * -	 * @param id is the Token for which the handler should be started. -	 * @param node is the node for which this token was registered. +	 * @param node is the node for for which a close token was found. +	 * @param maxStackDepth contains the number of handlers on the stack that +	 * can end. +	 * @return an EndTokenResult instance specifiying how to procede.  	 */ -	virtual EndTokenResult endToken(const Token &token, Handle<Node> node) = 0; +	virtual EndTokenResult endToken(Handle<Node> node, +	                                size_t maxStackDepth) = 0;  	/**  	 * Called before the command for which this handler is defined ends (is @@ -439,7 +468,7 @@ public:  	bool startCommand(Variant::mapType &args) override;  	bool startAnnotation(Variant::mapType &args) override;  	bool startToken(Handle<Node> node) override; -	EndTokenResult endToken(const Token &token, Handle<Node> node) override; +	EndTokenResult endToken(Handle<Node> node, size_t maxStackDepth) override;  	void end() override;  	bool fieldStart(bool &isDefault, size_t fieldIdx) override;  	void fieldEnd() override; @@ -464,7 +493,7 @@ public:  	bool startCommand(Variant::mapType &args) override;  	bool startAnnotation(Variant::mapType &args) override;  	bool startToken(Handle<Node> node) override; -	EndTokenResult endToken(const Token &token, Handle<Node> node) override; +	EndTokenResult endToken(Handle<Node> node, size_t maxStackDepth) override;  	void end() override;  	bool fieldStart(bool &isDefault, size_t fieldIdx) override;  	void fieldEnd() override; diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp index 697e663..3c17de7 100644 --- a/src/core/parser/stack/Stack.cpp +++ b/src/core/parser/stack/Stack.cpp @@ -386,13 +386,25 @@ private:  	HandlerInfo &lastInfo();  	/** -	 * Returns a list containing the currently pending close tokens. +	 * Returns the maximum stack depth that can be unrolled.  	 * -	 * @param token is a TokenId for which the result list should be filtered. -	 * If set to Tokens::Empty, all tokens are returned. +	 * @return the number of elements that can currently be removed from the +	 * stack. +	 */ +	size_t maxUnrollStackDepth() const; + +	/** +	 * Returns the index of the next reachable handler on the stack having the +	 * given token as close token -- or in case Tokens::Empty is given, the +	 * index of the next reachable handler with a set close token. +	 * +	 * @param token is the TokenId of the close token that is being searched. If +	 * Tokens::Empty is passed, the first handler with a non-empty close token +	 * will be returned. +	 * @return the index of the corresponding handler on the stack or a negative +	 * value if no such handler could be found.  	 */ -	std::vector<SyntaxDescriptor> pendingCloseTokens( -	    TokenId token = Tokens::Empty) const; +	ssize_t pendingCloseTokenHandlerIdx(TokenId token = Tokens::Empty) const;  	/**  	 * Returns a set containing the tokens that should currently be processed @@ -650,33 +662,40 @@ std::string StackImpl::currentCommandName() const  	return stack.empty() ? std::string{} : stack.back().name();  } -std::vector<SyntaxDescriptor> StackImpl::pendingCloseTokens(TokenId token) const +size_t StackImpl::maxUnrollStackDepth() const  { -	// TODO: Are there cases in which the returned vector will contain more -	// than one element? -	std::vector<SyntaxDescriptor> res; -	for (auto it = stack.crbegin(); it != stack.crend(); it++) { -		if (it->closeToken != Tokens::Empty && -		    (token == Tokens::Empty || token == it->closeToken)) { -			res.push_back(SyntaxDescriptor(Tokens::Empty, it->closeToken, -			                               Tokens::Empty, it->tokenDesciptor, -			                               -1)); -		} -		if (it->range || it->inField) { +	size_t res = 0; +	for (ssize_t i = stack.size() - 1; i >= 0; i--) { +		const HandlerInfo &info = stack[i]; +		if (info.range || (info.inField && !info.inImplicitDefaultField)) {  			break;  		} +		res++;  	}  	return res;  } +ssize_t StackImpl::pendingCloseTokenHandlerIdx(TokenId token) const +{ +	const ssize_t minIdx = std::max<ssize_t>(0, stack.size() - maxUnrollStackDepth() - 1); +	for (ssize_t i = stack.size() - 1; i >= minIdx; i--) { +		const HandlerInfo &info = stack[i]; +		if (info.closeToken != Tokens::Empty && +		    (token == Tokens::Empty || token == info.closeToken)) { +			return i; +		} +	} +	return -1; +} +  TokenSet StackImpl::currentTokens() const  {  	TokenSet res;  	if (currentInfo().state().supportsTokens) {  		res = tokenStack.tokens(); -		std::vector<SyntaxDescriptor> pending = pendingCloseTokens(); -		for (const auto &descr : pending) { -			res.insert(descr.close); +		ssize_t idx = pendingCloseTokenHandlerIdx(); +		if (idx >= 0) { +			res.insert(stack[idx].closeToken);  		}  	}  	return res; @@ -903,24 +922,52 @@ static void strayTokenError(const Token &token, TokenDescriptor &descr,  bool StackImpl::handleCloseTokens(const Token &token,                                    const std::vector<SyntaxDescriptor> &descrs)  { -	// Fetch the current information -	HandlerInfo &info = currentInfo(); +	// Abort if the stack is empty -- nothing can be ended in that case +	if (stack.empty()) { +		return false; +	} -	// Iterate over all possible SyntaxDescriptors and try to end the token -	for (const SyntaxDescriptor &descr : descrs) { -		Handler::EndTokenResult res = -		    info.handler->endToken(token.id, descr.descriptor); -		switch (res) { -			case Handler::EndTokenResult::ENDED_THIS: -				endCurrentHandler(); -				return true; -			case Handler::EndTokenResult::ENDED_HIDDEN: -				return true; -			case Handler::EndTokenResult::ENDED_NONE: -				break; +	// Check whether any of the given token descriptors can be ended -- select +	// the one that needs the fewest unrolling +	const size_t maxStackDepth = maxUnrollStackDepth(); +	const HandlerInfo &info = currentInfo(); +	size_t idx = 0; +	EndTokenResult bestRes = EndTokenResult(); +	for (size_t i = 0; i < descrs.size(); i++) { +		// Try to end the handler +		const EndTokenResult res = +		    info.handler->endToken(descrs[i].descriptor, maxStackDepth); + +		// Abort if the "endToken" function ended a transparent field -- in this +		// case this method has already been successful +		if (res.depth == 0 && res.found) { +			return true; +		} + +		// Otherwise check whether the result is positive and smaller than any +		// previous result +		if (res.found && (!bestRes.found || res.depth < bestRes.depth)) { +			idx = i; +			bestRes = res;  		}  	} -	return false; + +	// Abort if no descriptor can be ended +	if (!bestRes.found) { +		return false; +	} + +	// End as many handlers as indicated by the "depth" counter, repeat the +	// process if needed +	for (size_t i = 0; i < bestRes.depth; i++) { +		endCurrentHandler(); +	} +	if (!stack.empty() && bestRes.repeat) { +		currentInfo().handler->endToken( +	                                descrs[idx].descriptor, +	                                0); +	} +	return true;  }  bool StackImpl::handleOpenTokens(Logger &logger, const Token &token, @@ -978,32 +1025,46 @@ bool StackImpl::handleOpenTokens(Logger &logger, const Token &token,  void StackImpl::handleToken(const Token &token)  { -	// Fetch the TokenDescriptor and the "pendingClose" list +	// If the token matches one from the "pendingCloseTokens" list, then just +	// end the corresponding handler +	const ssize_t pendingCloseIndex = pendingCloseTokenHandlerIdx(token.id); +	if (pendingCloseIndex >= 0) { +		for (ssize_t i = stack.size() - 1; i >= pendingCloseIndex; i--) { +			endCurrentHandler(); +		} +		return; +	} + +	// Fetch the TokenDescriptor  	TokenDescriptor descr = tokenStack.lookup(token.id); -	std::vector<SyntaxDescriptor> pendingClose = pendingCloseTokens(token.id); -	// Iterate until the stack can no longer be unwound +	// First try to close pending open tokens, issue an error if this does not +	// work and no shortForm or open tokens are declared and the token is not +	// a special whitespace token +	if (handleCloseTokens(token, descr.close)) { +		return; +	} else if (descr.shortForm.empty() && descr.open.empty()) { +		if (!Token::isSpecial(token.id)) { +			strayTokenError(token, descr, logger()); +		} +		return; +	} + +	// Now try to handle open or short form tokens. Iterate until the stack can +	// no longer be unwound.  	while (!stack.empty()) {  		LoggerFork loggerFork = logger().fork(); +		// TODO: Instead of using hadError flag here implement a "hasError" +		// method for LoggerFork  		bool hadError = false;  		try { -			// Try to close the current handlers -			if (handleCloseTokens(token, pendingClose) || -			    (pendingClose.empty() && -			     handleCloseTokens(token, descr.close))) { -				return; -			} -  			// Try to open an implicit default field and try to start the token  			// as short form or as start token -			prepareCurrentHandler(pendingClose.empty()); -			if (pendingClose.empty()) { -				if (handleOpenTokens(loggerFork, token, true, -				                     descr.shortForm) || -				    handleOpenTokens(loggerFork, token, false, descr.open)) { -					return; -				} +			prepareCurrentHandler(true); +			if (handleOpenTokens(loggerFork, token, true, descr.shortForm) || +			    handleOpenTokens(loggerFork, token, false, descr.open)) { +				return;  			}  		}  		catch (LoggableException ex) { @@ -1015,19 +1076,14 @@ void StackImpl::handleToken(const Token &token)  		HandlerInfo &info = currentInfo();  		if (info.inImplicitDefaultField && !stack.empty()) {  			endCurrentHandler(); -		} else if (info.inDefaultField && info.closeToken == token.id) { -			endCurrentField();  		} else { -			// Ignore close-only special (whitespace) rules, in all other cases -			// issue an error -			if (!Token::isSpecial(token.id) || descr.close.empty() || -			    !descr.open.empty() || !descr.shortForm.empty()) { -				loggerFork.commit(); - -				// If there was no other error, issue a "stray token" error -				if (!hadError) { -					strayTokenError(token, descr, logger()); -				} +			// Commit all encountered errors +			loggerFork.commit(); + +			// If there was no other error message already, issue a "stray +			// token" error +			if (!hadError) { +				strayTokenError(token, descr, logger());  			}  			return;  		} diff --git a/test/core/parser/stack/StackTest.cpp b/test/core/parser/stack/StackTest.cpp index af2b8e8..e23cde7 100644 --- a/test/core/parser/stack/StackTest.cpp +++ b/test/core/parser/stack/StackTest.cpp @@ -66,7 +66,7 @@ struct Tracker {  	bool startCommandResult;  	bool startAnnotationResult;  	bool startTokenResult; -	Handler::EndTokenResult endTokenResult; +	EndTokenResult endTokenResult;  	bool fieldStartResult;  	bool dataResult; @@ -96,7 +96,7 @@ struct Tracker {  		startCommandResult = true;  		startAnnotationResult = true;  		startTokenResult = true; -		endTokenResult = Handler::EndTokenResult::ENDED_THIS; +		endTokenResult = EndTokenResult();  		fieldStartResult = true;  		dataResult = true; @@ -157,7 +157,7 @@ public:  		return tracker.startTokenResult;  	} -	EndTokenResult endToken(const Token &token, Handle<Node> node) override +	EndTokenResult endToken(Handle<Node> node, size_t maxStackDepth) override  	{  		tracker.endTokenCount++;  		return tracker.endTokenResult; diff --git a/testdata/integration/user_defined_syntax/python_code.in.osml b/testdata/integration/user_defined_syntax/python_code.in.osml new file mode 100644 index 0000000..2d553b4 --- /dev/null +++ b/testdata/integration/user_defined_syntax/python_code.in.osml @@ -0,0 +1,34 @@ +\document + +\ontology#python +	\struct#code[root=true] +		\field +			\childRef[ref=block] +	\struct#block[transparent=true] +		\syntax +			\open{\indent} +			\close{\dedent} +		\field +			\childRef[ref=line] +			\childRef[ref=block] +	\struct#line[transparent=true] +		\primitive[type=string] +		\syntax +			\close{\newline} + +\begin{code} +import random +import sys + +for i in range(int(sys.argv[1])): +	randomNumber = random.randint(1, i) + +	print("Generated a random number between 1 and \{\}." \% i) +	print("It is: \{\}" \% randomNumber) +	for k in range(randomNumber): +		print(k) + +	print("Done with this number!") + +print("Done.") +\end{code} diff --git a/testdata/integration/user_defined_syntax/python_code.out.osxml b/testdata/integration/user_defined_syntax/python_code.out.osxml new file mode 100644 index 0000000..6235833 --- /dev/null +++ b/testdata/integration/user_defined_syntax/python_code.out.osxml @@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<document> +	<ontology name="python"> +		<struct name="code" root="true"> +			<field> +				<childRef ref="block"/> +			</field> +		</struct> +		<struct name="block" transparent="true"> +			<field> +				<childRef ref="block"/> +				<childRef ref="line"/> +			</field> +			<syntax> +				<open> +					<indent/> +				</open> +				<close> +					<dedent/> +				</close> +			</syntax> +		</struct> +		<struct name="line" transparent="true"> +			<primitive type="string"> +				<syntax> +					<close> +						<newline/> +					</close> +				</syntax> +			</primitive> +		</struct> +	</ontology> +	<python:code> +		<python:block> +			<python:line>import random</python:line> +			<python:line>import sys</python:line> +			<python:line>for i in range(int(sys.argv[1])):</python:line> +			<python:block> +				<python:line>randomNumber = random.randint(1, i)</python:line> +				<python:line>print("Generated a random number between 1 and {}." % i)</python:line> +				<python:line>print("It is: {}" % randomNumber)</python:line> +				<python:line>for k in range(randomNumber):</python:line> +				<python:block> +					<python:line>print(k)</python:line> +				</python:block> +				<python:line>print("Done with this number!")</python:line> +			</python:block> +			<python:line>print("Done.")</python:line> +		</python:block> +	</python:code> +</document> | 
