summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBenjamin Paassen <bpaassen@techfak.uni-bielefeld.de>2015-03-02 15:58:55 +0100
committerBenjamin Paassen <bpaassen@techfak.uni-bielefeld.de>2015-03-02 15:58:55 +0100
commit24c7a8d1e62dc52298ea1abdc8b44d70fff94b54 (patch)
treec82a161ce02e177b6be5e63a65c5a2f42eecf2e6 /src
parent4b5f37d07e4e691848b243ae795bb59893a6379c (diff)
parent5b81f755a5303c3eab05c605711ecca32c071b6d (diff)
Merge branch 'astoecke_tokens' of somweyr.de:ousia into astoecke_tokens
Conflicts: application/src/core/parser/stack/Callbacks.hpp application/src/core/parser/stack/Handler.hpp
Diffstat (limited to 'src')
-rw-r--r--src/core/parser/stack/Callbacks.hpp26
-rw-r--r--src/core/parser/stack/DocumentHandler.cpp18
-rw-r--r--src/core/parser/stack/DocumentHandler.hpp4
-rw-r--r--src/core/parser/stack/Handler.hpp78
-rw-r--r--src/core/parser/stack/Stack.cpp380
-rw-r--r--src/core/parser/stack/Stack.hpp19
-rw-r--r--src/formats/osml/OsmlParser.cpp2
-rw-r--r--src/formats/osxml/OsxmlEventParser.cpp87
-rw-r--r--src/formats/osxml/OsxmlEventParser.hpp10
-rw-r--r--src/formats/osxml/OsxmlParser.cpp17
10 files changed, 366 insertions, 275 deletions
diff --git a/src/core/parser/stack/Callbacks.hpp b/src/core/parser/stack/Callbacks.hpp
index e471881..092664a 100644
--- a/src/core/parser/stack/Callbacks.hpp
+++ b/src/core/parser/stack/Callbacks.hpp
@@ -77,19 +77,9 @@ public:
* Interface defining a set of callback functions that act as a basis for the
* StateStackCallbacks and the ParserCallbacks.
*/
-class HandlerCallbacks: public ParserCallbacks {
+class HandlerCallbacks : public ParserCallbacks {
public:
/**
- * Reads a string variant form the current input stream. This function must
- * be called from the data() method.
- *
- * @return a string variant containing the current text data. The return
- * value depends on the currently set whitespace mode and the tokens that
- * were enabled using the enableTokens callback method.
- */
- Variant readData();
-
- /**
* Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
* The tokens described in the token list are the tokens that are currently
* enabled.
@@ -98,13 +88,21 @@ public:
* stored on the stack.
*/
void pushTokens(const std::vector<SyntaxDescriptor> &tokens);
-
/**
* Removes the previously pushed list of tokens from the stack.
*/
- void popTokens();
-};
+ virtual void popTokens() = 0;
+ /**
+ * Reads a string variant form the current input stream. This function must
+ * be called from the data() method.
+ *
+ * @return a string variant containing the current text data. The return
+ * value depends on the currently set whitespace mode and the tokens that
+ * were enabled using the enableTokens callback method.
+ */
+ virtual Variant readData() = 0;
+};
}
}
diff --git a/src/core/parser/stack/DocumentHandler.cpp b/src/core/parser/stack/DocumentHandler.cpp
index d44176a..714ab1b 100644
--- a/src/core/parser/stack/DocumentHandler.cpp
+++ b/src/core/parser/stack/DocumentHandler.cpp
@@ -246,8 +246,6 @@ bool DocumentChildHandler::start(Variant::mapType &args)
parent->getDescriptor()->getFieldDescriptorIndex();
}
// create the entity for the new element at last.
- // TODO: REMOVE
- strct_name = strct->getName();
entity = parent->createChildStructuredEntity(strct, lastFieldIdx,
args, nameAttr);
}
@@ -373,15 +371,8 @@ bool DocumentChildHandler::convertData(Handle<FieldDescriptor> field,
return valid && scope().resolveValue(data, type, logger);
}
-bool DocumentChildHandler::data(TokenizedData &data)
+bool DocumentChildHandler::data()
{
- // TODO: Handle this correctly
- Variant text = data.text(WhitespaceMode::TRIM);
- if (text == nullptr) {
- // For now, except "no data" as success
- return true;
- }
-
// We're past the region in which explicit fields can be defined in the
// parent structure element
scope().setFlag(ParserFlag::POST_EXPLICIT_FIELDS, true);
@@ -401,6 +392,7 @@ bool DocumentChildHandler::data(TokenizedData &data)
// If it is a primitive field directly, try to parse the content.
if (field->isPrimitive()) {
// Add it as primitive content.
+ Variant text = readData();
if (!convertData(field, text, logger())) {
return false;
}
@@ -419,6 +411,10 @@ bool DocumentChildHandler::data(TokenizedData &data)
for (auto primitiveField : defaultFields) {
// Then try to parse the content using the type specification.
forks.emplace_back(logger().fork());
+
+ // TODO: Actually the data has to be read after the path has been
+ // created (as createPath may push more tokens onto the stack)
+ Variant text = readData();
if (!convertData(primitiveField, text, forks.back())) {
continue;
}
@@ -428,7 +424,6 @@ bool DocumentChildHandler::data(TokenizedData &data)
// Construct the necessary path
NodeVector<Node> path = field->pathTo(primitiveField, logger());
- // TODO: Create methods with indices instead of names.
createPath(fieldIdx, path, parent);
// Then create the primitive element
@@ -439,6 +434,7 @@ bool DocumentChildHandler::data(TokenizedData &data)
// No field was found that might take the data -- dump the error messages
// from the loggers -- or, if there were no primitive fields, clearly state
// this fact
+ Variant text = readData();
if (defaultFields.empty()) {
logger().error("Got data, but structure \"" + name() +
"\" does not have any primitive field",
diff --git a/src/core/parser/stack/DocumentHandler.hpp b/src/core/parser/stack/DocumentHandler.hpp
index dda7d8b..c51c188 100644
--- a/src/core/parser/stack/DocumentHandler.hpp
+++ b/src/core/parser/stack/DocumentHandler.hpp
@@ -93,8 +93,6 @@ public:
class DocumentChildHandler : public Handler {
private:
bool isExplicitField = false;
- //TODO: REMOVE
- std::string strct_name;
/**
* Code shared by both the start(), fieldStart() and the data() method.
@@ -167,7 +165,7 @@ public:
bool start(Variant::mapType &args) override;
void end() override;
- bool data(TokenizedData &data) override;
+ bool data() override;
bool fieldStart(bool &isDefault, size_t fieldIdx) override;
diff --git a/src/core/parser/stack/Handler.hpp b/src/core/parser/stack/Handler.hpp
index 19660d0..baf31c9 100644
--- a/src/core/parser/stack/Handler.hpp
+++ b/src/core/parser/stack/Handler.hpp
@@ -38,7 +38,7 @@ class Variant;
namespace parser_stack {
// More forward declarations
-class Callbacks;
+class HandlerCallbacks;
class State;
/**
@@ -162,36 +162,26 @@ protected:
const std::string &name() const;
/**
- * Calls the corresponding method in the HandlerCallbacks instance. Reads a
- * string variant form the current input stream. This function must be
- * called from the data() method.
+ * Calls the corresponding function in the HandlerCallbacks instance. This
+ * method registers the given tokens as tokens that are generally available,
+ * tokens must be explicitly enabled using the "pushTokens" and "popTokens"
+ * method. Tokens that have not been registered are not guaranteed to be
+ * reported (except for special tokens, these do not have to be registerd).
*
- * @return a string variant containing the current text data. The return
- * value depends on the currently set whitespace mode and the tokens that
- * were enabled using the enableTokens callback method.
+ * @param token is the token string that should be made available.
+ * @return the TokenId that will be used to refer to the token.
*/
- Variant readData();
+ TokenId registerToken(const std::string &token);
/**
- * Calls the corresponding function in the Callbacks instance. Sets the
- * whitespace mode that specifies how string data should be processed. The
- * calls to this function are placed on a stack by the underlying Stack
- * class. This function should be called from the "fieldStart" callback and
- * the "start" callback. If no whitespace mode is pushed in the "start"
- * method the whitespace mode "TRIM" is implicitly assumed.
+ * Calls the corresponding function in the HandlerCallbacks instance. This
+ * method unregisters the given token. Note that for a token to be no longer
+ * reported, this function has to be called as many times as registerToken()
+ * for the corresponding token.
*
- * @param whitespaceMode specifies one of the three WhitespaceMode constants
- * PRESERVE, TRIM or COLLAPSE.
- */
- // void pushWhitespaceMode(WhitespaceMode whitespaceMode);
-
- /**
- * Pops a previously pushed whitespace mode. Calls to this function should
- * occur in the "end" callback and the "fieldEnd" callback. This function
- * can only undo pushs that were performed by the pushWhitespaceMode()
- * method of the same handler.
+ * @param id is the id of the Token that should be unregistered.
*/
- // void popWhitespaceMode();
+ void unregisterToken(TokenId id);
/**
* Pushes a list of TokenSyntaxDescriptor instances onto the internal stack.
@@ -210,26 +200,36 @@ protected:
void popTokens();
/**
- * Calls the corresponding function in the HandlerCallbacks instance. This
- * method registers the given tokens as tokens that are generally available,
- * tokens must be explicitly enabled using the "pushTokens" and "popTokens"
- * method. Tokens that have not been registered are not guaranteed to be
- * reported (except for special tokens, these do not have to be registerd).
+ * Calls the corresponding method in the HandlerCallbacks instance. Reads a
+ * string variant form the current input stream. This function must be
+ * called from the data() method.
*
- * @param token is the token string that should be made available.
- * @return the TokenId that will be used to refer to the token.
+ * @return a string variant containing the current text data. The return
+ * value depends on the currently set whitespace mode and the tokens that
+ * were enabled using the enableTokens callback method.
*/
- TokenId registerToken(const std::string &token);
+ Variant readData();
/**
- * Calls the corresponding function in the HandlerCallbacks instance. This
- * method unregisters the given token. Note that for a token to be no longer
- * reported, this function has to be called as many times as registerToken()
- * for the corresponding token.
+ * Calls the corresponding function in the Callbacks instance. Sets the
+ * whitespace mode that specifies how string data should be processed. The
+ * calls to this function are placed on a stack by the underlying Stack
+ * class. This function should be called from the "fieldStart" callback and
+ * the "start" callback. If no whitespace mode is pushed in the "start"
+ * method the whitespace mode "TRIM" is implicitly assumed.
*
- * @param id is the id of the Token that should be unregistered.
+ * @param whitespaceMode specifies one of the three WhitespaceMode constants
+ * PRESERVE, TRIM or COLLAPSE.
*/
- void unregisterToken(TokenId id);
+ // void pushWhitespaceMode(WhitespaceMode whitespaceMode);
+
+ /**
+ * Pops a previously pushed whitespace mode. Calls to this function should
+ * occur in the "end" callback and the "fieldEnd" callback. This function
+ * can only undo pushs that were performed by the pushWhitespaceMode()
+ * method of the same handler.
+ */
+ // void popWhitespaceMode();
public:
/**
diff --git a/src/core/parser/stack/Stack.cpp b/src/core/parser/stack/Stack.cpp
index 292e7e2..a556999 100644
--- a/src/core/parser/stack/Stack.cpp
+++ b/src/core/parser/stack/Stack.cpp
@@ -23,9 +23,12 @@
#include <core/parser/ParserScope.hpp>
#include <core/parser/ParserContext.hpp>
+#include "Callbacks.hpp"
#include "Handler.hpp"
#include "Stack.hpp"
#include "State.hpp"
+#include "TokenRegistry.hpp"
+#include "TokenStack.hpp"
namespace ousia {
namespace parser_stack {
@@ -209,10 +212,15 @@ static LoggableException buildInvalidCommandException(
/* Class StackImpl */
-class StackImpl {
-
+class StackImpl : public HandlerCallbacks {
private:
/**
+ * Reference at an implementation of the ParserCallbacks instance to which
+ * certain handler callbacks are directed.
+ */
+ ParserCallbacks &parser;
+
+ /**
* Reference at the parser context.
*/
ParserContext &ctx;
@@ -224,6 +232,18 @@ private:
const std::multimap<std::string, const State *> &states;
/**
+ * Registry responsible for registering the tokens proposed by the
+ * Handlers in the parser.
+ */
+ TokenRegistry tokenRegistry;
+
+ /**
+ * Pointer at a TokenizedDataReader instance from which the data should
+ * currently be read.
+ */
+ TokenizedDataReader *dataReader;
+
+ /**
* Internal stack used for managing the currently active Handler instances.
*/
std::vector<HandlerInfo> stack;
@@ -231,7 +251,7 @@ private:
/**
* Return the reference in the Logger instance stored within the context.
*/
- Logger &logger();
+ Logger &logger() { return ctx.getLogger(); }
/**
* Used internally to get all expected command names for the current state.
@@ -311,14 +331,41 @@ private:
* @return true if all handlers on the stack are valid.
*/
bool handlersValid();
-};
+public:
+ StackImpl(ParserCallbacks &parser, ParserContext &ctx,
+ const std::multimap<std::string, const State *> &states);
-/* Class Stack */
+ ~StackImpl();
-Stack::Stack(ParserContext &ctx,
- const std::multimap<std::string, const State *> &states)
- : ctx(ctx), states(states)
+ const State &currentState() const;
+ std::string currentCommandName() const;
+
+ void commandStart(const Variant &name, const Variant::mapType &args,
+ bool range);
+ void annotationStart(const Variant &className, const Variant &args,
+ bool range);
+ void annotationEnd(const Variant &className, const Variant &elementName);
+ void rangeEnd();
+ void fieldStart(bool isDefault);
+ void fieldEnd();
+ void data(const TokenizedData &data);
+
+ TokenId registerToken(const std::string &token) override;
+ void unregisterToken(TokenId id) override;
+ Variant readData() override;
+ bool hasData();
+ void pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens) override;
+ void popTokens() override;
+};
+
+StackImpl::StackImpl(ParserCallbacks &parser, ParserContext &ctx,
+ const std::multimap<std::string, const State *> &states)
+ : parser(parser),
+ ctx(ctx),
+ states(states),
+ tokenRegistry(parser),
+ dataReader(nullptr)
{
// If the scope instance is not empty we need to deduce the current parser
// state
@@ -327,7 +374,7 @@ Stack::Stack(ParserContext &ctx,
}
}
-Stack::~Stack()
+StackImpl::~StackImpl()
{
while (!stack.empty()) {
// Fetch the topmost stack element
@@ -351,7 +398,7 @@ Stack::~Stack()
}
}
-void Stack::deduceState()
+void StackImpl::deduceState()
{
// Assemble all states
std::vector<const State *> states;
@@ -374,8 +421,8 @@ void Stack::deduceState()
HandlerConstructor ctor =
state.elementHandler ? state.elementHandler : EmptyHandler::create;
- std::shared_ptr<Handler> handler =
- std::shared_ptr<Handler>{ctor({ctx, "", state, SourceLocation{}})};
+ std::shared_ptr<Handler> handler = std::shared_ptr<Handler>{
+ ctor({ctx, *this, "", state, SourceLocation{}})};
stack.emplace_back(handler);
// Set the correct flags for this implicit handler
@@ -384,7 +431,7 @@ void Stack::deduceState()
info.fieldStart(true, false, true);
}
-std::set<std::string> Stack::expectedCommands()
+std::set<std::string> StackImpl::expectedCommands()
{
const State *currentState = &(this->currentState());
std::set<std::string> res;
@@ -396,17 +443,17 @@ std::set<std::string> Stack::expectedCommands()
return res;
}
-const State &Stack::currentState()
+const State &StackImpl::currentState() const
{
return stack.empty() ? States::None : stack.back().handler->getState();
}
-std::string Stack::currentCommandName()
+std::string StackImpl::currentCommandName() const
{
return stack.empty() ? std::string{} : stack.back().handler->getName();
}
-const State *Stack::findTargetState(const std::string &name)
+const State *StackImpl::findTargetState(const std::string &name)
{
const State *currentState = &(this->currentState());
auto range = states.equal_range(name);
@@ -420,7 +467,7 @@ const State *Stack::findTargetState(const std::string &name)
return nullptr;
}
-const State *Stack::findTargetStateOrWildcard(const std::string &name)
+const State *StackImpl::findTargetStateOrWildcard(const std::string &name)
{
// Try to find the target state with the given name, if none is found, try
// find a matching "*" state.
@@ -431,16 +478,16 @@ const State *Stack::findTargetStateOrWildcard(const std::string &name)
return targetState;
}
-HandlerInfo &Stack::currentInfo()
+HandlerInfo &StackImpl::currentInfo()
{
return stack.empty() ? EmptyHandlerInfo : stack.back();
}
-HandlerInfo &Stack::lastInfo()
+HandlerInfo &StackImpl::lastInfo()
{
return stack.size() < 2U ? EmptyHandlerInfo : stack[stack.size() - 2];
}
-void Stack::endCurrentHandler()
+void StackImpl::endCurrentHandler()
{
if (!stack.empty()) {
// Fetch the handler info for the current top-level element
@@ -467,7 +514,7 @@ void Stack::endCurrentHandler()
}
}
-void Stack::endOverdueHandlers()
+void StackImpl::endOverdueHandlers()
{
if (!stack.empty()) {
// Fetch the handler info for the current top-level element
@@ -483,7 +530,7 @@ void Stack::endOverdueHandlers()
}
}
-bool Stack::ensureHandlerIsInField()
+bool StackImpl::ensureHandlerIsInField()
{
// If the current handler is not in a field (and actually has a handler)
// try to start a default field
@@ -507,7 +554,7 @@ bool Stack::ensureHandlerIsInField()
return true;
}
-bool Stack::handlersValid()
+bool StackImpl::handlersValid()
{
for (auto it = stack.crbegin(); it != stack.crend(); it++) {
if (!it->valid) {
@@ -517,9 +564,8 @@ bool Stack::handlersValid()
return true;
}
-Logger &Stack::logger() { return ctx.getLogger(); }
-
-void Stack::command(const Variant &name, const Variant::mapType &args)
+void StackImpl::commandStart(const Variant &name, const Variant::mapType &args,
+ bool range)
{
// End handlers that already had a default field and are currently not
// active.
@@ -562,8 +608,8 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
HandlerConstructor ctor = targetState->elementHandler
? targetState->elementHandler
: EmptyHandler::create;
- std::shared_ptr<Handler> handler{
- ctor({ctx, name.asString(), *targetState, name.getLocation()})};
+ std::shared_ptr<Handler> handler{ctor(
+ {ctx, *this, name.asString(), *targetState, name.getLocation()})};
stack.emplace_back(handler);
// Fetch the HandlerInfo for the parent element and the current element
@@ -611,108 +657,113 @@ void Stack::command(const Variant &name, const Variant::mapType &args)
}
}
-void Stack::data(TokenizedData data)
+void StackImpl::annotationStart(const Variant &className, const Variant &args,
+ bool range)
{
- // TODO: Rewrite this function for token handling
- // TODO: This loop needs to be refactored out
- while (!data.atEnd()) {
- // End handlers that already had a default field and are currently not
- // active.
- endOverdueHandlers();
-
- const bool hasNonWhitespaceText = data.hasNonWhitespaceText();
-
- // Check whether there is any command the data can be sent to -- if not,
- // make sure the data actually is data
- if (stack.empty()) {
- if (hasNonWhitespaceText) {
- throw LoggableException("No command here to receive data.", data);
- }
- return;
- }
-
- // Fetch the current command handler information
- HandlerInfo &info = currentInfo();
-
- // Make sure the current handler has an open field
- if (!ensureHandlerIsInField()) {
- endCurrentHandler();
- continue;
- }
-
- // If this field should not get any data, log an error and do not call
- // the "data" handler
- if (!info.inValidField) {
- // If the "hadDefaultField" flag is set, we already issued an error
- // message
- if (!info.hadDefaultField) {
- if (hasNonWhitespaceText) {
- logger().error("Did not expect any data here", data);
- }
- return;
- }
- }
-
- if (handlersValid() && info.inValidField) {
- // Fork the logger and set it as temporary logger for the "start"
- // method. We only want to keep error messages if this was not a try
- // to implicitly open a default field.
- LoggerFork loggerFork = logger().fork();
- info.handler->setLogger(loggerFork);
-
- // Pass the data to the current Handler instance
- bool valid = false;
- try {
- // Create a fork of the TokenizedData and let the handler work
- // on it
- TokenizedData dataFork = data;
- valid = info.handler->data(dataFork);
-
- // If the data was validly handled by the handler, commit the
- // change
- if (valid) {
- data = dataFork;
- }
- }
- catch (LoggableException ex) {
- loggerFork.log(ex);
- }
-
- // Reset the logger instance as soon as possible
- info.handler->resetLogger();
-
- // If placing the data here failed and we're currently in an
- // implicitly opened field, just unroll the stack to the next field
- // and try again
- if (!valid && info.inImplicitDefaultField) {
- endCurrentHandler();
- continue;
- }
-
- // Commit the content of the logger fork. Do not change the valid
- // flag.
- loggerFork.commit();
- }
-
- // There was no reason to unroll the stack any further, so continue
- return;
- }
+ // TODO
}
-void Stack::data(const Variant &stringData)
+void StackImpl::annotationEnd(const Variant &className,
+ const Variant &elementName)
{
- // Fetch the SourceLocation of the given stringData variant
- SourceLocation loc = stringData.getLocation();
+ // TODO
+}
- // Create a TokenizedData instance and feed the given string data into it
- TokenizedData tokenizedData(loc.getSourceId());
- tokenizedData.append(stringData.asString(), loc.getStart());
+void StackImpl::rangeEnd()
+{
+ // TODO
+}
- // Call the actual "data" method
- data(tokenizedData);
+void StackImpl::data(const TokenizedData &data)
+{
+ // TODO: Rewrite this function for token handling
+ // TODO: This loop needs to be refactored out
+ /*while (!data.atEnd()) {
+ // End handlers that already had a default field and are currently not
+ // active.
+ endOverdueHandlers();
+
+ const bool hasNonWhitespaceText = data.hasNonWhitespaceText();
+
+ // Check whether there is any command the data can be sent to -- if not,
+ // make sure the data actually is data
+ if (stack.empty()) {
+ if (hasNonWhitespaceText) {
+ throw LoggableException("No command here to receive data.",
+ data);
+ }
+ return;
+ }
+
+ // Fetch the current command handler information
+ HandlerInfo &info = currentInfo();
+
+ // Make sure the current handler has an open field
+ if (!ensureHandlerIsInField()) {
+ endCurrentHandler();
+ continue;
+ }
+
+ // If this field should not get any data, log an error and do not call
+ // the "data" handler
+ if (!info.inValidField) {
+ // If the "hadDefaultField" flag is set, we already issued an error
+ // message
+ if (!info.hadDefaultField) {
+ if (hasNonWhitespaceText) {
+ logger().error("Did not expect any data here", data);
+ }
+ return;
+ }
+ }
+
+ if (handlersValid() && info.inValidField) {
+ // Fork the logger and set it as temporary logger for the "start"
+ // method. We only want to keep error messages if this was not a try
+ // to implicitly open a default field.
+ LoggerFork loggerFork = logger().fork();
+ info.handler->setLogger(loggerFork);
+
+ // Pass the data to the current Handler instance
+ bool valid = false;
+ try {
+ // Create a fork of the TokenizedData and let the handler work
+ // on it
+ TokenizedData dataFork = data;
+ valid = info.handler->data(dataFork);
+
+ // If the data was validly handled by the handler, commit the
+ // change
+ if (valid) {
+ data = dataFork;
+ }
+ }
+ catch (LoggableException ex) {
+ loggerFork.log(ex);
+ }
+
+ // Reset the logger instance as soon as possible
+ info.handler->resetLogger();
+
+ // If placing the data here failed and we're currently in an
+ // implicitly opened field, just unroll the stack to the next field
+ // and try again
+ if (!valid && info.inImplicitDefaultField) {
+ endCurrentHandler();
+ continue;
+ }
+
+ // Commit the content of the logger fork. Do not change the valid
+ // flag.
+ loggerFork.commit();
+ }
+
+ // There was no reason to unroll the stack any further, so continue
+ return;
+ }*/
}
-void Stack::fieldStart(bool isDefault)
+void StackImpl::fieldStart(bool isDefault)
{
// Make sure the current handler stack is not empty
if (stack.empty()) {
@@ -764,7 +815,7 @@ void Stack::fieldStart(bool isDefault)
info.fieldStart(defaultField, false, valid);
}
-void Stack::fieldEnd()
+void StackImpl::fieldEnd()
{
// Unroll the stack until the next explicitly open field
while (!stack.empty()) {
@@ -799,14 +850,93 @@ void Stack::fieldEnd()
info.fieldEnd();
}
-void Stack::annotationStart(const Variant &className, const Variant &args)
+TokenId StackImpl::registerToken(const std::string &token)
+{
+ return tokenRegistry.registerToken(token);
+}
+
+void StackImpl::unregisterToken(TokenId id)
+{
+ tokenRegistry.unregisterToken(id);
+}
+
+void StackImpl::pushTokens(const std::vector<TokenSyntaxDescriptor> &tokens)
{
// TODO
}
-void Stack::annotationEnd(const Variant &className, const Variant &elementName)
+void StackImpl::popTokens()
{
// TODO
}
+
+Variant StackImpl::readData()
+{
+ if (dataReader != nullptr) {
+ TokenizedDataReaderFork dataReaderFork = dataReader->fork();
+ Token token;
+
+ // TODO: Use correct token set
+ TokenSet tokens;
+
+ // TODO: Use correct whitespace mode
+ WhitespaceMode mode = WhitespaceMode::COLLAPSE;
+
+ dataReaderFork.read(token, tokens, mode);
+ if (token.id == Tokens::Data) {
+ Variant res = Variant::fromString(token.content);
+ res.setLocation(token.getLocation());
+ return res;
+ }
+ }
+ return Variant{};
+}
+
+bool StackImpl::hasData() { return readData() != nullptr; }
+
+/* Class Stack */
+
+Stack::Stack(ParserCallbacks &parser, ParserContext &ctx,
+ const std::multimap<std::string, const State *> &states)
+ : impl(new StackImpl(parser, ctx, states))
+{
+}
+
+Stack::~Stack()
+{
+ // Do nothing here, stub needed because StackImpl is incomplete in hpp
+}
+
+const State &Stack::currentState() const { return impl->currentState(); }
+
+std::string Stack::currentCommandName() const
+{
+ return impl->currentCommandName();
+}
+
+void Stack::commandStart(const Variant &name, const Variant::mapType &args,
+ bool range)
+{
+ impl->commandStart(name, args, range);
+}
+
+void Stack::annotationStart(const Variant &className, const Variant &args,
+ bool range)
+{
+ impl->annotationStart(className, args, range);
+}
+
+void Stack::annotationEnd(const Variant &className, const Variant &elementName)
+{
+ impl->annotationEnd(className, elementName);
+}
+
+void Stack::rangeEnd() { impl->rangeEnd(); }
+
+void Stack::fieldStart(bool isDefault) { impl->fieldStart(isDefault); }
+
+void Stack::fieldEnd() { impl->fieldEnd(); }
+
+void Stack::data(const TokenizedData &data) { impl->data(data); }
}
}
diff --git a/src/core/parser/stack/Stack.hpp b/src/core/parser/stack/Stack.hpp
index e1173d0..de281d4 100644
--- a/src/core/parser/stack/Stack.hpp
+++ b/src/core/parser/stack/Stack.hpp
@@ -42,6 +42,7 @@ class Variant;
namespace parser_stack {
// Forward declarations
+class ParserCallbacks;
class StackImpl;
class State;
@@ -63,11 +64,13 @@ public:
/**
* Creates a new instance of the Stack class.
*
+ * @param parser is an implementation of the ParserCallbacks instance to
+ * which certain calls are directed.
* @param ctx is the parser context the parser stack is working on.
* @param states is a map containing the command names and pointers at the
* corresponding State instances.
*/
- Stack(ParserContext &ctx,
+ Stack(ParserCallbacks &parser, ParserContext &ctx,
const std::multimap<std::string, const State *> &states);
/**
@@ -81,7 +84,7 @@ public:
* @return the state of the currently active Handler instance or
* States::None if no handler is on the stack.
*/
- const State &currentState();
+ const State &currentState() const;
/**
* Returns the command name that is currently being handled.
@@ -89,7 +92,7 @@ public:
* @return the name of the command currently being handled by the active
* Handler instance or an empty string if no handler is currently active.
*/
- std::string currentCommandName();
+ std::string currentCommandName() const;
/**
* Function that should be called whenever a new command is reached.
@@ -154,16 +157,6 @@ public:
* that should be read.
*/
void data(const TokenizedData &data);
-
- /**
- * Function that shuold be called whenever character data is found in the
- * input stream. The given string variant is converted into a TokenizedData
- * instance internally.
- *
- * @param stringData is a string variant containing the data that has been
- * found.
- */
- void data(const Variant &stringData);
};
}
}
diff --git a/src/formats/osml/OsmlParser.cpp b/src/formats/osml/OsmlParser.cpp
index c25974f..36ef2b6 100644
--- a/src/formats/osml/OsmlParser.cpp
+++ b/src/formats/osml/OsmlParser.cpp
@@ -73,7 +73,7 @@ public:
: logger(ctx.getLogger()),
ctx(ctx),
parser(reader, logger),
- stack(ctx, GenericParserStates)
+ stack(parser, ctx, GenericParserStates)
{
}
diff --git a/src/formats/osxml/OsxmlEventParser.cpp b/src/formats/osxml/OsxmlEventParser.cpp
index 83c16f0..79a8dbe 100644
--- a/src/formats/osxml/OsxmlEventParser.cpp
+++ b/src/formats/osxml/OsxmlEventParser.cpp
@@ -25,6 +25,7 @@
#include <core/common/Variant.hpp>
#include <core/common/VariantReader.hpp>
#include <core/common/Utils.hpp>
+#include <core/parser/utils/TokenizedData.hpp>
#include "OsxmlAttributeLocator.hpp"
#include "OsxmlEventParser.hpp"
@@ -39,6 +40,11 @@ namespace ousia {
class OsxmlEventParserData {
public:
/**
+ * Current character data buffer.
+ */
+ TokenizedData data;
+
+ /**
* Contains the current depth of the parsing process.
*/
ssize_t depth;
@@ -51,24 +57,13 @@ public:
ssize_t annotationEndTagDepth;
/**
- * Current character data buffer.
- */
- std::vector<char> textBuf;
-
- /**
- * Current character data start.
- */
- size_t textStart;
-
- /**
- * Current character data end.
- */
- size_t textEnd;
-
- /**
- * Default constructor.
+ * Constructor taking the sourceId of the file from which the XML is being
+ * parsed.
+ *
+ * @param sourceId is the source if of the XML file from which the data is
+ * currently being parsed.
*/
- OsxmlEventParserData();
+ OsxmlEventParserData(SourceId sourceId);
/**
* Increments the depth.
@@ -91,14 +86,6 @@ public:
* @return true if character data is available.
*/
bool hasText();
-
- /**
- * Returns a Variant containing the character data and its location.
- *
- * @return a string variant containing the text data and the character
- * location.
- */
- Variant getText(SourceId sourceId);
};
/* Class GuardedExpatXmlParser */
@@ -156,7 +143,7 @@ public:
static const std::string TOP_LEVEL_TAG{"ousia"};
/**
- * Prefix used to indicate the start of an annoation (note the trailing colon)
+ * Prefix used to indicate the start of an annoation (note the trailing colon).
*/
static const std::string ANNOTATION_START_PREFIX{"a:start:"};
@@ -203,8 +190,9 @@ static void xmlStartElementHandler(void *ref, const XML_Char *name,
// If there is any text data in the buffer, issue that first
if (parser->getData().hasText()) {
- parser->getEvents().data(
- parser->getData().getText(parser->getReader().getSourceId()));
+ TokenizedData &data = parser->getData().data;
+ parser->getEvents().data(data);
+ data.clear();
}
// Read the argument locations -- this is only a stupid and slow hack,
@@ -348,8 +336,9 @@ static void xmlEndElementHandler(void *ref, const XML_Char *name)
// If there is any text data in the buffer, issue that first
if (parser->getData().hasText()) {
- parser->getEvents().data(
- parser->getData().getText(parser->getReader().getSourceId()));
+ TokenizedData &data = parser->getData().data;
+ parser->getEvents().data(data);
+ data.clear();
}
// Abort if the special ousia tag ends here
@@ -381,18 +370,8 @@ static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len)
// Synchronize the logger position
SourceLocation loc = xmlSyncLoggerPosition(p, ulen);
- // Fetch some variables for convenience
- OsxmlEventParserData &data = parser->getData();
- std::vector<char> &textBuf = data.textBuf;
-
- // Update start and end position
- if (textBuf.empty()) {
- data.textStart = loc.getStart();
- }
- data.textEnd = loc.getEnd();
-
- // Insert the data into the text buffer
- textBuf.insert(textBuf.end(), &s[0], &s[ulen]);
+ // Append the data to the buffer
+ parser->getData().data.append(std::string(s, ulen), loc.getStart());
}
/* Class OsxmlEvents */
@@ -401,8 +380,8 @@ OsxmlEvents::~OsxmlEvents() {}
/* Class OsxmlEventParser */
-OsxmlEventParserData::OsxmlEventParserData()
- : depth(0), annotationEndTagDepth(-1), textStart(0), textEnd(0)
+OsxmlEventParserData::OsxmlEventParserData(SourceId sourceId)
+ : data(sourceId), depth(0), annotationEndTagDepth(-1)
{
}
@@ -423,23 +402,7 @@ bool OsxmlEventParserData::inAnnotationEndTag()
return (annotationEndTagDepth > 0) && (depth >= annotationEndTagDepth);
}
-bool OsxmlEventParserData::hasText() { return !textBuf.empty(); }
-
-Variant OsxmlEventParserData::getText(SourceId sourceId)
-{
- // Create a variant containing the string data and the location
- Variant var =
- Variant::fromString(std::string{textBuf.data(), textBuf.size()});
- var.setLocation({sourceId, textStart, textEnd});
-
- // Reset the text buffers
- textBuf.clear();
- textStart = 0;
- textEnd = 0;
-
- // Return the variant
- return var;
-}
+bool OsxmlEventParserData::hasText() { return !data.empty(); }
/* Class OsxmlEventParser */
@@ -448,7 +411,7 @@ OsxmlEventParser::OsxmlEventParser(CharReader &reader, OsxmlEvents &events,
: reader(reader),
events(events),
logger(logger),
- data(new OsxmlEventParserData())
+ data(new OsxmlEventParserData(reader.getSourceId()))
{
}
diff --git a/src/formats/osxml/OsxmlEventParser.hpp b/src/formats/osxml/OsxmlEventParser.hpp
index 7a8c96d..4c5a485 100644
--- a/src/formats/osxml/OsxmlEventParser.hpp
+++ b/src/formats/osxml/OsxmlEventParser.hpp
@@ -96,10 +96,10 @@ public:
/**
* Called whenever string data is found.
*
- * @param data is a Variant containing the string data that was found in the
- * XML file.
+ * @param data is a TokenizedData instance containing the string data that
+ * was found in the XML file.
*/
- virtual void data(const Variant &data) = 0;
+ virtual void data(const TokenizedData &data) = 0;
};
/**
@@ -179,7 +179,9 @@ public:
OsxmlEvents &getEvents() const;
/**
- * Returns a reference at the internal data.
+ * Used internally to fetch a reference at the internal data.
+ *
+ * @return a reference at the internal OsxmlEventParserData structure.
*/
OsxmlEventParserData &getData() const;
};
diff --git a/src/formats/osxml/OsxmlParser.cpp b/src/formats/osxml/OsxmlParser.cpp
index 924d11b..10cc77a 100644
--- a/src/formats/osxml/OsxmlParser.cpp
+++ b/src/formats/osxml/OsxmlParser.cpp
@@ -18,6 +18,7 @@
#include <core/common/Variant.hpp>
#include <core/common/CharReader.hpp>
+#include <core/parser/stack/Callbacks.hpp>
#include <core/parser/stack/GenericParserStates.hpp>
#include <core/parser/stack/Stack.hpp>
#include <core/parser/ParserContext.hpp>
@@ -32,7 +33,7 @@ using namespace parser_stack;
/**
* Class containing the actual OsxmlParser implementation.
*/
-class OsxmlParserImplementation : public OsxmlEvents {
+class OsxmlParserImplementation : public OsxmlEvents, ParserCallbacks {
private:
/**
* Actual xml parser -- converts the xml stream into a set of events.
@@ -56,7 +57,7 @@ public:
*/
OsxmlParserImplementation(CharReader &reader, ParserContext &ctx)
: parser(reader, *this, ctx.getLogger()),
- stack(ctx, GenericParserStates)
+ stack(*this, ctx, GenericParserStates)
{
}
@@ -85,7 +86,17 @@ public:
void rangeEnd() override { stack.rangeEnd(); }
- void data(const Variant &data) override { stack.data(data); }
+ void data(const TokenizedData &data) override { stack.data(data); }
+
+ TokenId registerToken(const std::string &token) override
+ {
+ return Tokens::Empty;
+ }
+
+ void unregisterToken(TokenId id) override
+ {
+ // Do nothing here
+ }
};
/* Class OsxmlParser */