summaryrefslogtreecommitdiff
path: root/src/plugins/plain/PlainFormatStreamReader.hpp
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-09 17:29:58 +0100
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-02-09 17:29:58 +0100
commit85a4bb306a648964ddbf166e88e3263ace293768 (patch)
treee6d31d3f5b805bd2d57196ca0acd748718350984 /src/plugins/plain/PlainFormatStreamReader.hpp
parent7092a4e862e784cae34ac1346eebd4038a42144d (diff)
Implemented correct handling of fields in PlainFormatStreamReader and added a lot of unit tests.
Diffstat (limited to 'src/plugins/plain/PlainFormatStreamReader.hpp')
-rw-r--r--src/plugins/plain/PlainFormatStreamReader.hpp144
1 files changed, 100 insertions, 44 deletions
diff --git a/src/plugins/plain/PlainFormatStreamReader.hpp b/src/plugins/plain/PlainFormatStreamReader.hpp
index 737bbe8..4a11b8e 100644
--- a/src/plugins/plain/PlainFormatStreamReader.hpp
+++ b/src/plugins/plain/PlainFormatStreamReader.hpp
@@ -16,9 +16,6 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_
-#define _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_
-
/**
* @file PlainFormatStreamReader.hpp
*
@@ -29,6 +26,11 @@
* @author Andreas Stöckel (astoecke@techfak.uni-bielefeld.de)
*/
+#ifndef _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_
+#define _OUSIA_PLAIN_FORMAT_STREAM_READER_HPP_
+
+#include <stack>
+
#include <core/common/Variant.hpp>
#include "DynamicTokenizer.hpp"
@@ -38,6 +40,7 @@ namespace ousia {
// Forward declarations
class CharReader;
class Logger;
+class DataHandler;
/**
* The PlainFormatStreamReader class provides a low-level reader for the plain
@@ -69,11 +72,6 @@ public:
DATA,
/**
- * State returned if a linebreak has been reached (outside of comments).
- */
- LINEBREAK,
-
- /**
* A user-defined entity has been found. The entity sequence is stored
* in the command name.
*/
@@ -112,6 +110,66 @@ public:
END
};
+ /**
+ * Entry used for the command stack.
+ */
+ struct Command {
+ /**
+ * Name and location of the current command.
+ */
+ Variant name;
+
+ /**
+ * Arguments that were passed to the command.
+ */
+ Variant arguments;
+
+ /**
+ * Set to true if this is a command with clear begin and end.
+ */
+ bool hasRange;
+
+ /**
+ * Set to true if we are currently inside a field of this command.
+ */
+ bool inField;
+
+ /**
+ * Set to true if we are currently in the range field of the command
+ * (implies inField being set to true).
+ */
+ bool inRangeField;
+
+ /**
+ * Default constructor.
+ */
+ Command() : hasRange(false), inField(false), inRangeField(false) {}
+
+ /**
+ * Constructor of the Command class.
+ *
+ * @param name is a string variant with name and location of the
+ * command.
+ * @param arguments is a map variant with the arguments given to the
+ * command.
+ * @param hasRange should be set to true if this is a command with
+ * explicit range.
+ * @param inField is set to true if we currently are inside a field
+ * of this command.
+ * @param inRangeField is set to true if we currently inside the outer
+ * field of the command.
+ */
+ Command(const Variant &name, const Variant &arguments, bool hasRange,
+ bool inField, bool inRangeField)
+ : name(name),
+ arguments(arguments),
+ hasRange(hasRange),
+ inField(inField),
+ inRangeField(inRangeField)
+ {
+ }
+ };
+
private:
/**
* Reference to the CharReader instance from which the incomming bytes are
@@ -130,16 +188,9 @@ private:
DynamicTokenizer tokenizer;
/**
- * Variant containing the current command name (always is a string variant,
- * but additionally contains the correct locatino of the name).
- */
- Variant commandName;
-
- /**
- * Variant containing the command arguments (always is a map or array
- * variant, but additionally contains the source location of the arguments).
+ * Stack containing the current commands.
*/
- Variant commandArguments;
+ std::stack<Command> commands;
/**
* Variant containing the data that has been read (always is a string,
@@ -148,29 +199,9 @@ private:
Variant data;
/**
- * Id of the backslash token.
- */
- TokenTypeId tokenBackslash;
-
- /**
- * Id of the linebreak token.
+ * Contains the location of the last token.
*/
- TokenTypeId tokenLinebreak;
-
- /**
- * Id of the line comment token.
- */
- TokenTypeId tokenLineComment;
-
- /**
- * Id of the block comment start token.
- */
- TokenTypeId tokenBlockCommentStart;
-
- /**
- * If of the block comment end token.
- */
- TokenTypeId tokenBlockCommentEnd;
+ SourceLocation location;
/**
* Contains the field index of the current command.
@@ -189,7 +220,7 @@ private:
* Function used internally to parse a command.
*
* @param start is the start byte offset of the command (including the
- * backslash).
+ * backslash)
*/
void parseCommand(size_t start);
@@ -203,6 +234,24 @@ private:
*/
void parseLineComment();
+ /**
+ * Checks whether there is any data pending to be issued, if yes, issues it.
+ *
+ * @param handler is the data handler that contains the data that may be
+ * returned to the user.
+ * @return true if there was any data and DATA should be returned by the
+ * parse function, false otherwise.
+ */
+ bool checkIssueData(DataHandler &handler);
+
+ /**
+ * Called before any data is appended to the internal data handler. Checks
+ * whether a new field should be started or implicitly ended.
+ *
+ * @return true if FIELD_START should be returned by the parse function.
+ */
+ bool checkIssueFieldStart();
+
public:
/**
* Constructor of the PlainFormatStreamReader class. Attaches the new
@@ -224,14 +273,14 @@ public:
*/
State parse();
- /**
+ /**
* Returns a reference at the internally stored data. Only valid if
* State::DATA was returned by the "parse" function.
*
* @return a reference at a variant containing the data parsed by the
* "parse" function.
*/
- const Variant& getData() {return data;}
+ const Variant &getData() { return data; }
/**
* Returns a reference at the internally stored command name. Only valid if
@@ -240,7 +289,7 @@ public:
* @return a reference at a variant containing name and location of the
* parsed command.
*/
- const Variant& getCommandName() {return commandName;}
+ const Variant &getCommandName();
/**
* Returns a reference at the internally stored command name. Only valid if
@@ -249,7 +298,14 @@ public:
* @return a reference at a variant containing arguments given to the
* command.
*/
- const Variant& getCommandArguments() {return commandArguments;}
+ const Variant &getCommandArguments();
+
+ /**
+ * Returns a reference at the char reader.
+ *
+ * @return the last internal token location.
+ */
+ SourceLocation &getLocation() {return location;}
};
}