10 #ifndef _TROPICAL_WEIGHT_TRANSDUCER_H_
11 #define _TROPICAL_WEIGHT_TRANSDUCER_H_
18 #include "../../../config.h"
22 #include "back-ends/openfstwin/src/include/fst/fstlib.h"
24 #include "back-ends/openfst/src/include/fst/fstlib.h"
27 #include "HfstExtractStrings.h"
39 namespace implementations
43 typedef StdArc::StateId StateId;
45 typedef std::vector<StdArc> StdArcVector;
46 struct StdArcLessThan {
47 bool operator() (
const StdArc &arc1,
const StdArc &arc2)
const; };
50 using std::ostringstream;
51 using std::stringstream;
53 void openfst_tropical_set_hopcroft(
bool value);
55 class TropicalWeightInputStream
60 istream &input_stream;
61 void skip_identifier_version_3_0(
void);
62 void skip_hfst_header(
void);
64 TropicalWeightInputStream(
void);
65 TropicalWeightInputStream(
const std::string &filename);
67 bool is_eof(
void)
const;
68 bool is_bad(
void)
const;
69 bool is_good(
void)
const;
70 bool is_fst(
void)
const;
71 bool operator() (
void)
const;
72 void ignore(
unsigned int);
73 StdVectorFst * read_transducer();
76 short stream_get_short();
77 void stream_unget(
char c);
79 static bool is_fst(FILE * f);
80 static bool is_fst(istream &s);
83 class TropicalWeightOutputStream
88 ostream &output_stream;
92 TropicalWeightOutputStream(
bool hfst_format=
true);
93 TropicalWeightOutputStream
94 (
const std::string &filename,
bool hfst_format=
false);
96 void write(
const char &c);
97 void write_transducer(StdVectorFst * transducer);
100 class TropicalWeightTransitionIterator;
102 typedef StateId TropicalWeightState;
104 class TropicalWeightStateIterator
107 StateIterator<StdVectorFst> * iterator;
109 TropicalWeightStateIterator(StdVectorFst * t);
110 ~TropicalWeightStateIterator(
void);
113 TropicalWeightState value(
void);
117 class TropicalWeightTransition
123 TropicalWeightTransition(
const StdArc &arc, StdVectorFst *t);
124 ~TropicalWeightTransition(
void);
125 std::string get_input_symbol(
void)
const;
126 std::string get_output_symbol(
void)
const;
127 TropicalWeightState get_target_state(
void)
const;
128 TropicalWeight get_weight(
void)
const;
132 class TropicalWeightTransitionIterator
135 ArcIterator<StdVectorFst> * arc_iterator;
138 TropicalWeightTransitionIterator(StdVectorFst * t, StateId state);
139 ~TropicalWeightTransitionIterator(
void);
142 TropicalWeightTransition value(
void);
146 class TropicalWeightTransducer
149 static StdVectorFst * create_empty_transducer(
void);
150 static StdVectorFst * create_epsilon_transducer(
void);
153 static StdVectorFst * define_transducer(
const std::string &symbol);
154 static StdVectorFst * define_transducer
155 (
const std::string &isymbol,
const std::string &osymbol);
156 static StdVectorFst * define_transducer
158 static StdVectorFst * define_transducer
160 static StdVectorFst * define_transducer
161 (
const std::vector<StringPairSet> &spsv);
164 static StdVectorFst * define_transducer(
unsigned int number);
165 static StdVectorFst * define_transducer
166 (
unsigned int inumber,
unsigned int onumber);
167 static StdVectorFst * define_transducer
168 (
const hfst::NumberPairVector &npv);
169 static StdVectorFst * define_transducer
170 (
const hfst::NumberPairSet &nps,
bool cyclic=
false);
171 static StdVectorFst * define_transducer
172 (
const std::vector<NumberPairSet> &npsv);
174 static StdVectorFst * copy(StdVectorFst * t);
175 static StdVectorFst * determinize(StdVectorFst * t);
176 static StdVectorFst * minimize(StdVectorFst * t);
177 static StdVectorFst * remove_epsilons(StdVectorFst * t);
178 static StdVectorFst * n_best(StdVectorFst * t,
unsigned int n);
179 static StdVectorFst * prune(StdVectorFst * t);
180 static StdVectorFst * repeat_star(StdVectorFst * t);
181 static StdVectorFst * repeat_plus(StdVectorFst * t);
182 static StdVectorFst * repeat_n(StdVectorFst * t,
unsigned int n);
183 static StdVectorFst * repeat_le_n(StdVectorFst * t,
unsigned int n);
184 static StdVectorFst * optionalize(StdVectorFst * t);
185 static StdVectorFst * invert(StdVectorFst * t);
186 static StdVectorFst * reverse(StdVectorFst * transducer);
187 static StdVectorFst * extract_input_language(StdVectorFst * t);
188 static StdVectorFst * extract_output_language(StdVectorFst * t);
189 static void extract_paths
190 (StdVectorFst * t, hfst::ExtractStringsCb& callback,
191 int cycles=-1, FdTable<int64>* fd=NULL,
bool filter_fd=
false
194 static void extract_random_paths
197 static void extract_random_paths_fd
200 static StdVectorFst * compose(StdVectorFst * t1,
202 static StdVectorFst * concatenate(StdVectorFst * t1,
204 static StdVectorFst * disjunct(StdVectorFst * t1,
207 static StdVectorFst * disjunct
208 (StdVectorFst * t,
const StringPairVector &spv);
209 static StdVectorFst * disjunct
210 (StdVectorFst * t,
const NumberPairVector &npv);
212 static fst::StdVectorFst * disjunct_as_tries(fst::StdVectorFst * t1,
213 const fst::StdVectorFst * t2);
215 static StdVectorFst * intersect(StdVectorFst * t1,
217 static StdVectorFst * subtract(StdVectorFst * t1,
219 static StdVectorFst * set_weight(StdVectorFst * t,
float f);
220 static StdVectorFst * set_final_weights(StdVectorFst * t,
float weight,
bool increment=
false);
221 static StdVectorFst * transform_weights
222 (StdVectorFst * t,
float (*func)(
float f));
223 static StdVectorFst * push_weights
224 (StdVectorFst * t,
bool to_initial_state);
226 static std::pair<StdVectorFst*, StdVectorFst*> harmonize
227 (StdVectorFst *t1, StdVectorFst *t2,
bool unknown_symbols_in_use=
true);
229 static void write_in_att_format(StdVectorFst * t, FILE *ofile);
230 static void write_in_att_format_number(StdVectorFst * t, FILE *ofile);
234 static void write_in_att_format(StdVectorFst * t, std::ostream &os);
235 static void write_in_att_format_number
236 (StdVectorFst * t, std::ostream &os);
238 static StdVectorFst * read_in_att_format(FILE *ifile);
240 static bool are_equivalent(StdVectorFst *one, StdVectorFst *another);
241 static bool is_cyclic(StdVectorFst * t);
242 static bool is_automaton(StdVectorFst * t);
244 static FdTable<int64>* get_flag_diacritics(StdVectorFst * t);
246 static void add_to_weights(StdVectorFst * t,
float w);
247 static float get_smallest_weight(StdVectorFst * t);
249 static void print_alphabet(
const StdVectorFst *t);
252 static StdVectorFst * insert_freely
253 (StdVectorFst * t,
const StringPair &symbol_pair);
254 static StdVectorFst * substitute
255 (StdVectorFst * t, std::string old_symbol, std::string new_symbol);
256 static StdVectorFst * substitute(StdVectorFst * t,
257 StringPair old_symbol_pair,
258 StringPair new_symbol_pair);
259 static StdVectorFst * substitute(StdVectorFst * t,
260 StringPair old_symbol_pair,
262 static StdVectorFst * substitute(StdVectorFst * t,
263 const StringPair old_symbol_pair,
264 StdVectorFst *transducer);
267 static StdVectorFst * insert_freely
268 (StdVectorFst * t,
const NumberPair &number_pair);
269 static StdVectorFst * substitute
270 (StdVectorFst * t,
unsigned int,
unsigned int);
271 static StdVectorFst * substitute(StdVectorFst * t,
272 NumberPair old_number_pair,
273 NumberPair new_number_pair);
274 static StdVectorFst * substitute(StdVectorFst * t,
275 const NumberPair old_number_pair,
276 StdVectorFst *transducer);
278 static void insert_to_alphabet
279 (StdVectorFst *t,
const std::string &symbol);
280 static void remove_from_alphabet
281 (StdVectorFst *t,
const std::string &symbol);
282 static StringSet get_alphabet(StdVectorFst *t);
283 static void get_first_input_symbols
284 (StdVectorFst *t, StateId s, std::set<StateId> & visited_states, StringSet & symbols);
285 static StringSet get_first_input_symbols(StdVectorFst *t);
286 static unsigned int get_symbol_number(StdVectorFst *t,
287 const std::string &symbol);
288 static unsigned int get_biggest_symbol_number(StdVectorFst *t);
289 static StringVector get_symbol_vector(StdVectorFst *t);
291 static NumberNumberMap create_mapping
292 (StdVectorFst * t1, StdVectorFst * t2);
293 static void recode_symbol_numbers
294 (StdVectorFst * t, hfst::NumberNumberMap &km);
295 static StdVectorFst * expand_arcs
296 (StdVectorFst * t, hfst::StringSet &unknown,
297 bool unknown_symbols_in_use);
300 static StdVectorFst * compose_intersect(StdVectorFst * t,
304 float get_profile_seconds();
306 static unsigned int number_of_states(
const StdVectorFst * t);
307 static unsigned int number_of_arcs(
const StdVectorFst * t);
310 static void set_symbol_table
312 std::vector<std::pair<unsigned short, std::string> > symbol_mappings);
314 static void set_warning_stream(std::ostream * os);
315 static std::ostream * get_warning_stream();
318 static fst::SymbolTable create_symbol_table(std::string name);
319 static void initialize_symbol_tables(StdVectorFst *t);
320 static void remove_symbol_table(StdVectorFst *t);
322 static std::ostream * warning_stream;
326 typedef std::map<int, StateId> StateMap;
327 static StateId add_and_map_state(StdVectorFst *t,
int state_number,
328 StateMap &state_map);
330 static int has_arc(StdVectorFst &t,
331 StdArc::StateId sourcestate,
332 StdArc::Label ilabel,
333 StdArc::Label olabel);
334 static void disjunct_as_tries(fst::StdVectorFst &t1,
336 const fst::StdVectorFst * t2,
338 static void add_sub_trie(StdVectorFst &t1,
340 const StdVectorFst * t2,
344 static StateId add_state(StdVectorFst *t);
345 static void set_final_weight(StdVectorFst *t, StateId s,
float w);
346 static void add_transition
347 (StdVectorFst *t, StateId source,
348 std::string &isymbol, std::string &osymbol,
float w, StateId target);
349 static float get_final_weight(StdVectorFst *t, StateId s);
350 static float is_final(StdVectorFst *t, StateId s);
351 static StateId get_initial_state(StdVectorFst *t);
352 static void represent_empty_transducer_as_having_one_state
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:105
Class declarations for flag diacritic handling.
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:109
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:82
Typedefs and functions for symbols, symbol pairs and sets of symbols.