HFST - Helsinki Finite-State Transducer Technology - C++ API  version 3.9.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TropicalWeightTransducer.h
Go to the documentation of this file.
1 // Copyright (c) 2016 University of Helsinki
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 3 of the License, or (at your option) any later version.
7 // See the file COPYING included with this distribution for more
8 // information.
9 
10 #ifndef _TROPICAL_WEIGHT_TRANSDUCER_H_
11 #define _TROPICAL_WEIGHT_TRANSDUCER_H_
12 
13 #include "HfstSymbolDefs.h"
14 #include "HfstExceptionDefs.h"
15 #include "HfstFlagDiacritics.h"
16 
17 #if HAVE_CONFIG_H
18  #include "../../../config.h"
19 #endif
20 
21 #ifdef _MSC_VER
22 #include "back-ends/openfstwin/src/include/fst/fstlib.h"
23 #else
24 #include "back-ends/openfst/src/include/fst/fstlib.h"
25 #endif // _MSC_VER
26 
27 #include "HfstExtractStrings.h"
28 #include <cstdio>
29 #include <string>
30 #include <sstream>
31 #include <iostream>
32 //#include "HfstAlphabet.h"
33 
38 namespace hfst {
39 namespace implementations
40 {
41  using namespace fst;
42  ;
43  typedef StdArc::StateId StateId;
44 
45  typedef std::vector<StdArc> StdArcVector;
46  struct StdArcLessThan {
47  bool operator() (const StdArc &arc1,const StdArc &arc2) const; };
48 
49  using std::ostream;
50  using std::ostringstream;
51  using std::stringstream;
52 
53  void openfst_tropical_set_hopcroft(bool value);
54 
55  class TropicalWeightInputStream
56  {
57  private:
58  std::string filename;
59  ifstream i_stream;
60  istream &input_stream;
61  void skip_identifier_version_3_0(void);
62  void skip_hfst_header(void);
63  public:
64  TropicalWeightInputStream(void);
65  TropicalWeightInputStream(const std::string &filename);
66  void close(void);
67  bool is_eof(void) const;
68  bool is_bad(void) const;
69  bool is_good(void) const;
70  bool is_fst(void) const;
71  bool operator() (void) const;
72  void ignore(unsigned int);
73  StdVectorFst * read_transducer();
74 
75  char stream_get();
76  short stream_get_short();
77  void stream_unget(char c);
78 
79  static bool is_fst(FILE * f);
80  static bool is_fst(istream &s);
81  };
82 
83  class TropicalWeightOutputStream
84  {
85  private:
86  std::string filename;
87  ofstream o_stream;
88  ostream &output_stream;
89  bool hfst_format;
90  //void write_3_0_library_header(std::ostream &out);
91  public:
92  TropicalWeightOutputStream(bool hfst_format=true);
93  TropicalWeightOutputStream
94  (const std::string &filename, bool hfst_format=false);
95  void close(void);
96  void write(const char &c);
97  void write_transducer(StdVectorFst * transducer);
98  };
99 
100  class TropicalWeightTransitionIterator;
101 
102  typedef StateId TropicalWeightState;
103 
104  class TropicalWeightStateIterator
105  {
106  protected:
107  StateIterator<StdVectorFst> * iterator;
108  public:
109  TropicalWeightStateIterator(StdVectorFst * t);
110  ~TropicalWeightStateIterator(void);
111  void next(void);
112  bool done(void);
113  TropicalWeightState value(void);
114  };
115 
116 
117  class TropicalWeightTransition
118  {
119  protected:
120  StdArc arc;
121  StdVectorFst * t;
122  public:
123  TropicalWeightTransition(const StdArc &arc, StdVectorFst *t);
124  ~TropicalWeightTransition(void);
125  std::string get_input_symbol(void) const;
126  std::string get_output_symbol(void) const;
127  TropicalWeightState get_target_state(void) const;
128  TropicalWeight get_weight(void) const;
129  };
130 
131 
132  class TropicalWeightTransitionIterator
133  {
134  protected:
135  ArcIterator<StdVectorFst> * arc_iterator;
136  StdVectorFst * t;
137  public:
138  TropicalWeightTransitionIterator(StdVectorFst * t, StateId state);
139  ~TropicalWeightTransitionIterator(void);
140  void next(void);
141  bool done(void);
142  TropicalWeightTransition value(void);
143  };
144 
145 
146  class TropicalWeightTransducer
147  {
148  public:
149  static StdVectorFst * create_empty_transducer(void);
150  static StdVectorFst * create_epsilon_transducer(void);
151 
152  // string versions
153  static StdVectorFst * define_transducer(const std::string &symbol);
154  static StdVectorFst * define_transducer
155  (const std::string &isymbol, const std::string &osymbol);
156  static StdVectorFst * define_transducer
157  (const hfst::StringPairVector &spv);
158  static StdVectorFst * define_transducer
159  (const hfst::StringPairSet &sps, bool cyclic=false);
160  static StdVectorFst * define_transducer
161  (const std::vector<StringPairSet> &spsv);
162 
163  // number versions
164  static StdVectorFst * define_transducer(unsigned int number);
165  static StdVectorFst * define_transducer
166  (unsigned int inumber, unsigned int onumber);
167  static StdVectorFst * define_transducer
168  (const hfst::NumberPairVector &npv);
169  static StdVectorFst * define_transducer
170  (const hfst::NumberPairSet &nps, bool cyclic=false);
171  static StdVectorFst * define_transducer
172  (const std::vector<NumberPairSet> &npsv);
173 
174  static StdVectorFst * copy(StdVectorFst * t);
175  static StdVectorFst * determinize(StdVectorFst * t);
176  static StdVectorFst * minimize(StdVectorFst * t);
177  static StdVectorFst * remove_epsilons(StdVectorFst * t);
178  static StdVectorFst * n_best(StdVectorFst * t, unsigned int n);
179  static StdVectorFst * prune(StdVectorFst * t);
180  static StdVectorFst * repeat_star(StdVectorFst * t);
181  static StdVectorFst * repeat_plus(StdVectorFst * t);
182  static StdVectorFst * repeat_n(StdVectorFst * t, unsigned int n);
183  static StdVectorFst * repeat_le_n(StdVectorFst * t, unsigned int n);
184  static StdVectorFst * optionalize(StdVectorFst * t);
185  static StdVectorFst * invert(StdVectorFst * t);
186  static StdVectorFst * reverse(StdVectorFst * transducer);
187  static StdVectorFst * extract_input_language(StdVectorFst * t);
188  static StdVectorFst * extract_output_language(StdVectorFst * t);
189  static void extract_paths
190  (StdVectorFst * t, hfst::ExtractStringsCb& callback,
191  int cycles=-1, FdTable<int64>* fd=NULL, bool filter_fd=false
192  /*bool include_spv=false*/);
193 
194  static void extract_random_paths
195  (StdVectorFst *t, HfstTwoLevelPaths &results, int max_num);
196 
197  static void extract_random_paths_fd
198  (StdVectorFst *t, HfstTwoLevelPaths &results, int max_num, bool filter_fd);
199 
200  static StdVectorFst * compose(StdVectorFst * t1,
201  StdVectorFst * t2);
202  static StdVectorFst * concatenate(StdVectorFst * t1,
203  StdVectorFst * t2);
204  static StdVectorFst * disjunct(StdVectorFst * t1,
205  StdVectorFst * t2);
206 
207  static StdVectorFst * disjunct
208  (StdVectorFst * t, const StringPairVector &spv);
209  static StdVectorFst * disjunct
210  (StdVectorFst * t, const NumberPairVector &npv);
211 
212  static fst::StdVectorFst * disjunct_as_tries(fst::StdVectorFst * t1,
213  const fst::StdVectorFst * t2);
214 
215  static StdVectorFst * intersect(StdVectorFst * t1,
216  StdVectorFst * t2);
217  static StdVectorFst * subtract(StdVectorFst * t1,
218  StdVectorFst * t2);
219  static StdVectorFst * set_weight(StdVectorFst * t,float f);
220  static StdVectorFst * set_final_weights(StdVectorFst * t, float weight, bool increment=false);
221  static StdVectorFst * transform_weights
222  (StdVectorFst * t,float (*func)(float f));
223  static StdVectorFst * push_weights
224  (StdVectorFst * t, bool to_initial_state);
225 
226  static std::pair<StdVectorFst*, StdVectorFst*> harmonize
227  (StdVectorFst *t1, StdVectorFst *t2, bool unknown_symbols_in_use=true);
228 
229  static void write_in_att_format(StdVectorFst * t, FILE *ofile);
230  static void write_in_att_format_number(StdVectorFst * t, FILE *ofile);
231 
232  //static void test_minimize(void);
233 
234  static void write_in_att_format(StdVectorFst * t, std::ostream &os);
235  static void write_in_att_format_number
236  (StdVectorFst * t, std::ostream &os);
237 
238  static StdVectorFst * read_in_att_format(FILE *ifile);
239 
240  static bool are_equivalent(StdVectorFst *one, StdVectorFst *another);
241  static bool is_cyclic(StdVectorFst * t);
242  static bool is_automaton(StdVectorFst * t);
243 
244  static FdTable<int64>* get_flag_diacritics(StdVectorFst * t);
245 
246  static void add_to_weights(StdVectorFst * t, float w);
247  static float get_smallest_weight(StdVectorFst * t);
248 
249  static void print_alphabet(const StdVectorFst *t);
250 
251  // string versions
252  static StdVectorFst * insert_freely
253  (StdVectorFst * t, const StringPair &symbol_pair);
254  static StdVectorFst * substitute
255  (StdVectorFst * t, std::string old_symbol, std::string new_symbol);
256  static StdVectorFst * substitute(StdVectorFst * t,
257  StringPair old_symbol_pair,
258  StringPair new_symbol_pair);
259  static StdVectorFst * substitute(StdVectorFst * t,
260  StringPair old_symbol_pair,
261  StringPairSet new_symbol_pair_set);
262  static StdVectorFst * substitute(StdVectorFst * t,
263  const StringPair old_symbol_pair,
264  StdVectorFst *transducer);
265 
266  // number versions
267  static StdVectorFst * insert_freely
268  (StdVectorFst * t, const NumberPair &number_pair);
269  static StdVectorFst * substitute
270  (StdVectorFst * t, unsigned int, unsigned int);
271  static StdVectorFst * substitute(StdVectorFst * t,
272  NumberPair old_number_pair,
273  NumberPair new_number_pair);
274  static StdVectorFst * substitute(StdVectorFst * t,
275  const NumberPair old_number_pair,
276  StdVectorFst *transducer);
277 
278  static void insert_to_alphabet
279  (StdVectorFst *t, const std::string &symbol);
280  static void remove_from_alphabet
281  (StdVectorFst *t, const std::string &symbol);
282  static StringSet get_alphabet(StdVectorFst *t);
283  static void get_first_input_symbols
284  (StdVectorFst *t, StateId s, std::set<StateId> & visited_states, StringSet & symbols);
285  static StringSet get_first_input_symbols(StdVectorFst *t);
286  static unsigned int get_symbol_number(StdVectorFst *t,
287  const std::string &symbol);
288  static unsigned int get_biggest_symbol_number(StdVectorFst *t);
289  static StringVector get_symbol_vector(StdVectorFst *t);
290 
291  static NumberNumberMap create_mapping
292  (StdVectorFst * t1, StdVectorFst * t2);
293  static void recode_symbol_numbers
294  (StdVectorFst * t, hfst::NumberNumberMap &km);
295  static StdVectorFst * expand_arcs
296  (StdVectorFst * t, hfst::StringSet &unknown,
297  bool unknown_symbols_in_use);
298 
299 #ifdef FOO
300  static StdVectorFst * compose_intersect(StdVectorFst * t,
301  Grammar * grammar);
302 #endif
303 
304  float get_profile_seconds();
305 
306  static unsigned int number_of_states(const StdVectorFst * t);
307  static unsigned int number_of_arcs(const StdVectorFst * t);
308 
309  // for HFST version 2 transducer handling
310  static void set_symbol_table
311  (StdVectorFst * t,
312  std::vector<std::pair<unsigned short, std::string> > symbol_mappings);
313 
314  static void set_warning_stream(std::ostream * os);
315  static std::ostream * get_warning_stream();
316 
317  private:
318  static fst::SymbolTable create_symbol_table(std::string name);
319  static void initialize_symbol_tables(StdVectorFst *t);
320  static void remove_symbol_table(StdVectorFst *t);
321 
322  static std::ostream * warning_stream;
323 
324  /* Maps state numbers in AT&T text format to state ids used by
325  OpenFst transducers. */
326  typedef std::map<int, StateId> StateMap;
327  static StateId add_and_map_state(StdVectorFst *t, int state_number,
328  StateMap &state_map);
329 
330  static int has_arc(StdVectorFst &t,
331  StdArc::StateId sourcestate,
332  StdArc::Label ilabel,
333  StdArc::Label olabel);
334  static void disjunct_as_tries(fst::StdVectorFst &t1,
335  StateId t1_state,
336  const fst::StdVectorFst * t2,
337  StateId t2_state);
338  static void add_sub_trie(StdVectorFst &t1,
339  StateId t1_state,
340  const StdVectorFst * t2,
341  StateId t2_state);
342 
343  public:
344  static StateId add_state(StdVectorFst *t);
345  static void set_final_weight(StdVectorFst *t, StateId s, float w);
346  static void add_transition
347  (StdVectorFst *t, StateId source,
348  std::string &isymbol, std::string &osymbol, float w, StateId target);
349  static float get_final_weight(StdVectorFst *t, StateId s);
350  static float is_final(StdVectorFst *t, StateId s);
351  static StateId get_initial_state(StdVectorFst *t);
352  static void represent_empty_transducer_as_having_one_state
353  (StdVectorFst *t);
354 
355  };
356 
357 } }
358 #endif
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:105
A file for exceptions.
Class declarations for flag diacritic handling.
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:109
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:82
Typedefs and functions for symbols, symbol pairs and sets of symbols.