1 #ifndef HEADER_STRINGS_2_FST_TOKENIZER_H
2 #define HEADER_STRINGS_2_FST_TOKENIZER_H
25 typedef std::pair<std::string,std::string>
StringPair;
29 #define BACKSLASH "\\"
31 #define BACKSLASH_ESC "@_BACKSLASH_@"
32 #define EPSILON_SYMBOL "@_EPSILON_SYMBOL_@"
36 #define BACKSLASH_CHAR '\\'
38 #define COL_ESCAPE "@_COLON_@"
39 #define TAB_ESCAPE "@_TAB_@"
40 #define SPACE_ESCAPE "@_SPACE_@"
44 class EmptyMulticharSymbol
47 class UnescapedColsFound
50 class HfstStrings2FstTokenizer
53 HfstStrings2FstTokenizer
54 (StringVector &multichar_symbols,
const std::string &eps);
59 StringPairVector tokenize_pair_string(
const std::string &str,
bool spaces);
64 StringPairVector tokenize_string_pair(
const std::string &str,
bool spaces);
72 void add_multichar_symbol(
const std::string &multichar_symbol);
75 void add_multichar_symbol_head(
const std::string &multichar_symbol);
80 StringPairVector make_pair_vector(
const StringVector &v);
84 StringPairVector make_pair_vector(
const StringVector &input,
85 const StringVector &output);
88 bool is_pair_input_symbol(StringVector::const_iterator it,
89 StringVector::const_iterator end);
92 std::string unescape(std::string symbol);
95 int get_col_pos(
const std::string &str);
98 StringVector split_at_spaces(
const std::string &str);
101 void check_cols(
const std::string &symbol);
106 #endif // HEADER_STRINGS_2_FST_TOKENIZER_H
Declaration of class hfst::HfstTokenizer.
std::pair< String, String > StringPair
A symbol pair in a transition.
Definition: HfstSymbolDefs.h:70
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:105
Datatypes that are needed when using the HFST API.
A tokenizer for creating transducers from UTF-8 strings.
Definition: HfstTokenizer.h:84
std::vector< std::string > StringVector
A vector of strings.
Definition: HfstDataTypes.h:87