summaryrefslogtreecommitdiff
path: root/src/plugins/plain/DynamicTokenizer.cpp
blob: 7690395c69b8e4a7950d32da28618a68008f92d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/*
    Ousía
    Copyright (C) 2014  Benjamin Paaßen, Andreas Stöckel

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <memory>
#include <string>
#include <unordered_map>

#include <core/common/CharReader.hpp>

#include "DynamicTokenizer.hpp"

namespace ousia {

/**
 * The TokenDescriptor class is a simple wrapper around a standard string
 * containing the character sequence of the token.
 */
class TokenDescriptor {
	/**
	 * The character sequence of the token.
	 */
	std::string str;

	/**
	 * Default constructor of the TokenDescriptor class. Used to describe
	 * special tokens.
	 */
	TokenDescriptor();

	/**
	 * Constructor initializing the character sequence of the token.
	 */
	TokenDescriptor(const std::string &str) : str(str) {}
};

/* Class DynamicTokenizer */

void DynamicTokenizer:setWhitespaceMode(WhitespaceMode mode)
{
	whitespaceMode = mode;
}

WhitespaceMode DynamicTokenizer::getWhitespaceMode()
{
	return whitespaceMode;
}


/* Constant initializations */

static const TokenDescriptor Empty;
static const TokenDescriptor Text;
static const TokenDescriptor* DynamicTokenizer::Empty = &Empty;
static const TokenDescriptor* DynamicTokenizer::Token = &Text;


}