HFST - Helsinki Finite-State Transducer Technology - C++ API  version 3.9.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
LogWeightTransducer.h
Go to the documentation of this file.
1 // Copyright (c) 2016 University of Helsinki
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 3 of the License, or (at your option) any later version.
7 // See the file COPYING included with this distribution for more
8 // information.
9 
10 #ifndef _LOG_WEIGHT_TRANSDUCER_H_
11 #define _LOG_WEIGHT_TRANSDUCER_H_
12 
13 #include "HfstSymbolDefs.h"
14 #include "HfstExceptionDefs.h"
15 #include "HfstFlagDiacritics.h"
16 
17 #if HAVE_CONFIG_H
18  #include "../../../config.h"
19 #endif
20 
21 #ifdef _MSC_VER
22  #include "back-ends/openfstwin/src/include/fst/fstlib.h"
23 #else
24  #include "back-ends/openfst/src/include/fst/fstlib.h"
25 #endif // _MSC_VER
26 
27 #include "HfstExtractStrings.h"
28 #include <cstdio>
29 #include <string>
30 #include <sstream>
31 #include <iostream>
32 //#include "HfstAlphabet.h"
33 
38 namespace hfst {
39 namespace implementations
40 {
41  using namespace fst;
42  ;
43  typedef LogArc::StateId StateId;
44  typedef VectorFst<LogArc> LogFst;
45 
46  typedef std::vector<LogArc> LogArcVector;
47  struct LogArcLessThan {
48  bool operator() (const LogArc &arc1,const LogArc &arc2) const; };
49 
50  using std::ostream;
51  using std::ostringstream;
52  using std::stringstream;
53 
54  void openfst_log_set_hopcroft(bool value);
55 
56  class LogWeightInputStream
57  {
58  private:
59  std::string filename;
60  ifstream i_stream;
61  istream &input_stream;
62  void skip_identifier_version_3_0(void);
63  void skip_hfst_header(void);
64  public:
65  LogWeightInputStream(void);
66  LogWeightInputStream(const std::string &filename);
67  void close(void);
68  bool is_eof(void) const;
69  bool is_bad(void) const;
70  bool is_good(void) const;
71  bool is_fst(void) const;
72  bool operator() (void) const;
73  void ignore(unsigned int);
74  LogFst * read_transducer();
75 
76  char stream_get();
77  short stream_get_short();
78  void stream_unget(char c);
79 
80  static bool is_fst(FILE * f);
81  static bool is_fst(istream &s);
82  };
83 
84  class LogWeightOutputStream
85  {
86  private:
87  std::string filename;
88  ofstream o_stream;
89  ostream &output_stream;
90  //void write_3_0_library_header(std::ostream &out);
91  public:
92  LogWeightOutputStream(void);
93  LogWeightOutputStream(const std::string &filename);
94  void close(void);
95  void write(const char &c);
96  void write_transducer(LogFst * transducer);
97  };
98 
99  class LogWeightTransitionIterator;
100 
101  typedef StateId LogWeightState;
102 
103  class LogWeightStateIterator
104  {
105  protected:
106  StateIterator<LogFst> * iterator;
107  public:
108  LogWeightStateIterator(LogFst * t);
109  ~LogWeightStateIterator(void);
110  void next(void);
111  bool done(void);
112  LogWeightState value(void);
113  };
114 
115 
116  class LogWeightTransition
117  {
118  protected:
119  LogArc arc;
120  LogFst * t;
121  public:
122  LogWeightTransition(const LogArc &arc, LogFst *t);
123  ~LogWeightTransition(void);
124  std::string get_input_symbol(void) const;
125  std::string get_output_symbol(void) const;
126  LogWeightState get_target_state(void) const;
127  LogWeight get_weight(void) const;
128  };
129 
130 
131  class LogWeightTransitionIterator
132  {
133  protected:
134  ArcIterator<LogFst> * arc_iterator;
135  LogFst * t;
136  public:
137  LogWeightTransitionIterator(LogFst * t, StateId state);
138  ~LogWeightTransitionIterator(void);
139  void next(void);
140  bool done(void);
141  LogWeightTransition value(void);
142  };
143 
144 
145  class LogWeightTransducer
146  {
147  public:
148  static LogFst * create_empty_transducer(void);
149  static LogFst * create_epsilon_transducer(void);
150 
151  // string versions
152  static LogFst * define_transducer(const std::string &symbol);
153  static LogFst * define_transducer
154  (const std::string &isymbol, const std::string &osymbol);
155  static LogFst * define_transducer
156  (const hfst::StringPairVector &spv);
157  static LogFst * define_transducer
158  (const hfst::StringPairSet &sps, bool cyclic=false);
159  static LogFst * define_transducer(const std::vector<StringPairSet> &spsv);
160 
161  // number versions
162  static LogFst * define_transducer(unsigned int number);
163  static LogFst * define_transducer
164  (unsigned int inumber, unsigned int onumber);
165  static LogFst * define_transducer(const hfst::NumberPairVector &npv);
166  static LogFst * define_transducer
167  (const hfst::NumberPairSet &nps, bool cyclic=false);
168  static LogFst * define_transducer
169  (const std::vector<NumberPairSet> &npsv);
170 
171  static LogFst * copy(LogFst * t);
172  static LogFst * determinize(LogFst * t);
173  static LogFst * minimize(LogFst * t);
174  static LogFst * remove_epsilons(LogFst * t);
175  static LogFst * n_best(LogFst * t, unsigned int n);
176  static LogFst * repeat_star(LogFst * t);
177  static LogFst * repeat_plus(LogFst * t);
178  static LogFst * repeat_n(LogFst * t, unsigned int n);
179  static LogFst * repeat_le_n(LogFst * t, unsigned int n);
180  static LogFst * optionalize(LogFst * t);
181  static LogFst * invert(LogFst * t);
182  static LogFst * reverse(LogFst * transducer);
183  static LogFst * extract_input_language(LogFst * t);
184  static LogFst * extract_output_language(LogFst * t);
185  static void extract_paths
186  (LogFst * t, hfst::ExtractStringsCb& callback,
187  int cycles=-1, FdTable<int64>* fd=NULL, bool filter_fd=false
188  /*bool include_spv=false*/);
189  static void extract_random_paths
190  (const LogFst *t, HfstTwoLevelPaths &results, int max_num);
191  static LogFst * compose(LogFst * t1,
192  LogFst * t2);
193  static LogFst * concatenate(LogFst * t1,
194  LogFst * t2);
195  static LogFst * disjunct(LogFst * t1,
196  LogFst * t2);
197 
198  static LogFst * disjunct(LogFst * t, const StringPairVector &spv);
199  static LogFst * disjunct(LogFst * t, const NumberPairVector &npv);
200 
201  static LogFst * intersect(LogFst * t1,
202  LogFst * t2);
203  static LogFst * subtract(LogFst * t1,
204  LogFst * t2);
205  static LogFst * set_weight(LogFst * t,float f);
206  static LogFst * set_final_weights(LogFst * t, float weight);
207  static LogFst * transform_weights(LogFst * t,float (*func)(float f));
208  static LogFst * push_weights(LogFst * t, bool to_initial_state);
209 
210  static std::pair<LogFst*, LogFst*> harmonize
211  (LogFst *t1, LogFst *t2, bool unknown_symbols_in_use=true);
212 
213  static void write_in_att_format(LogFst * t, FILE *ofile);
214  static void write_in_att_format_number(LogFst * t, FILE *ofile);
215 
216  static void test_minimize(void);
217 
218  static void write_in_att_format(LogFst * t, std::ostream &os);
219  static void write_in_att_format_number(LogFst * t, std::ostream &os);
220 
221  static LogFst * read_in_att_format(FILE *ifile);
222 
223  static bool are_equivalent(LogFst *one, LogFst *another);
224  static bool is_cyclic(LogFst * t);
225  static bool is_automaton(LogFst * t);
226 
227  static FdTable<int64>* get_flag_diacritics(LogFst * t);
228 
229  // string versions
230  static LogFst * insert_freely(LogFst * t, const StringPair &symbol_pair);
231  static LogFst * substitute
232  (LogFst * t, std::string old_symbol, std::string new_symbol);
233  static LogFst * substitute(LogFst * t,
234  StringPair old_symbol_pair,
235  StringPair new_symbol_pair);
236  static LogFst * substitute(LogFst * t,
237  StringPair old_symbol_pair,
238  StringPairSet new_symbol_pair_set);
239  static LogFst * substitute(LogFst * t,
240  const StringPair old_symbol_pair,
241  LogFst *transducer);
242 
243  // number versions
244  static LogFst * insert_freely(LogFst * t, const NumberPair &number_pair);
245  static LogFst * substitute(LogFst * t, unsigned int, unsigned int);
246  static LogFst * substitute(LogFst * t,
247  NumberPair old_number_pair,
248  NumberPair new_number_pair);
249  static LogFst * substitute(LogFst * t,
250  const NumberPair old_number_pair,
251  LogFst *transducer);
252 
253  static void insert_to_alphabet
254  (LogFst *t, const std::string &symbol);
255  static void remove_from_alphabet
256  (LogFst *t, const std::string &symbol);
257  static StringSet get_alphabet(LogFst *t);
258  static unsigned int get_symbol_number(LogFst *t,
259  const std::string &symbol);
260 
261  static NumberNumberMap create_mapping(LogFst * t1, LogFst * t2);
262  static void recode_symbol_numbers(LogFst * t, hfst::NumberNumberMap &km);
263 
264  static LogFst * expand_arcs
265  (LogFst * t, hfst::StringSet &unknown, bool unknown_symbols_in_use);
266 
267  float get_profile_seconds();
268 
269  static unsigned int number_of_states(const LogFst * t);
270 
271  private:
272  static fst::SymbolTable create_symbol_table(std::string name);
273  static void initialize_symbol_tables(LogFst *t);
274  static void remove_symbol_table(LogFst *t);
275 
276  /* Maps state numbers in AT&T text format to state ids used by
277  OpenFst transducers. */
278  typedef std::map<int, StateId> StateMap;
279 
280  static StateId add_and_map_state
281  (LogFst *t, int state_number, StateMap &state_map);
282 
283  static int has_arc(LogFst &t,
284  LogArc::StateId sourcestate,
285  LogArc::Label ilabel,
286  LogArc::Label olabel);
287  static void disjunct_as_tries(LogFst &t1,
288  StateId t1_state,
289  const LogFst * t2,
290  StateId t2_state);
291  static void add_sub_trie(LogFst &t1,
292  StateId t1_state,
293  const LogFst * t2,
294  StateId t2_state);
295 
296  public:
297  static StateId add_state(LogFst *t);
298  static void set_final_weight(LogFst *t, StateId s, float w);
299  static void add_transition
300  (LogFst *t, StateId source,
301  std::string &isymbol, std::string &osymbol, float w, StateId target);
302  static float get_final_weight(LogFst *t, StateId s);
303  static float is_final(LogFst *t, StateId s);
304  static StateId get_initial_state(LogFst *t);
305  static void represent_empty_transducer_as_having_one_state(LogFst *t);
306 
307  };
308 
309 } }
310 #endif
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:105
A file for exceptions.
Class declarations for flag diacritic handling.
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:109
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:82
Typedefs and functions for symbols, symbol pairs and sets of symbols.