HFST - Helsinki Finite-State Transducer Technology - C++ API  version 3.9.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SfstTransducer.h
Go to the documentation of this file.
1 // Copyright (c) 2016 University of Helsinki
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 3 of the License, or (at your option) any later version.
7 // See the file COPYING included with this distribution for more
8 // information.
9 
10 #ifndef _SFST_TRANSDUCER_H_
11 #define _SFST_TRANSDUCER_H_
12 
13 #include "HfstExceptionDefs.h"
14 #include "HfstFlagDiacritics.h"
15 #include "HfstSymbolDefs.h"
16 #include "HfstExtractStrings.h"
17 #include "back-ends/sfst/interface.h"
18 #include "back-ends/sfst/fst.h"
19 #include <cstdio>
20 #include <string>
21 #include <sstream>
22 #include <iostream>
23 
28 namespace hfst {
29 namespace implementations
30 {
31  //using namespace SFST;
32  typedef SFST::Transducer Transducer;
33  ;
34  using std::ostream;
35  using std::ostringstream;
36 
37  void sfst_set_hopcroft(bool);
38 
39  class SfstInputStream
40  {
41  private:
42  std::string filename;
43  FILE * input_file;
44  bool is_minimal; // whether the next transducer in the stream is minimal
45  // this can be said in the header
46  void add_symbol(StringNumberMap &string_number_map,
47  SFST::Character c,
48  SFST::Alphabet &alphabet);
49 
50  public:
51  SfstInputStream(void);
52  SfstInputStream(const std::string &filename);
53  void close(void);
54  bool is_eof(void);
55  bool is_bad(void);
56  bool is_good(void);
57  bool is_fst(void);
58  void ignore(unsigned int);
59 
60  char stream_get();
61  short stream_get_short();
62  void stream_unget(char c);
63 
64  bool set_implementation_specific_header_data
65  (StringPairVector &data, unsigned int index);
66  SFST::Transducer * read_transducer();
67 
68  static bool is_fst(FILE * f);
69  static bool is_fst(std::istream &s);
70  };
71 
72  class SfstOutputStream
73  {
74  private:
75  std::string filename;
76  FILE *ofile;
77  //void write_3_0_library_header(FILE *file, bool is_minimal);
78  public:
79  SfstOutputStream(void);
80  SfstOutputStream(const std::string &filename);
81  void close(void);
82  void write(const char &c);
83  void append_implementation_specific_header_data
84  (std::vector<char> &header, SFST::Transducer *t);
85  void write_transducer(SFST::Transducer * transducer);
86  };
87 
88  class HfstNode2Int {
89 
90  struct hashf {
91  size_t operator()(const SFST::Node *node) const {
92  return (size_t)node;
93  }
94  };
95  struct equalf {
96  int operator()(const SFST::Node *n1, const SFST::Node *n2) const {
97  return (n1 == n2);
98  }
99  };
100  typedef SFST::hash_map<SFST::Node*, int, hashf, equalf> NL;
101 
102  private:
103  NL number;
104 
105  public:
106  int &operator[]( SFST::Node *node ) {
107  NL::iterator it=number.find(node);
108  if (it == number.end())
109  return number.insert(NL::value_type(node, 0)).first->second;
110  return it->second;
111  };
112  };
113 
114 
115  class SfstTransducer
116  {
117  public:
118  static SFST::Transducer * create_empty_transducer(void);
119  static SFST::Transducer * create_epsilon_transducer(void);
120 
121  static SFST::Transducer * define_transducer(unsigned int number);
122  static SFST::Transducer * define_transducer
123  (unsigned int inumber, unsigned int onumber);
124 
125  static SFST::Transducer * define_transducer(const std::string &symbol);
126  static SFST::Transducer * define_transducer
127  (const std::string &isymbol, const std::string &osymbol);
128  static SFST::Transducer * define_transducer
129  (const StringPairVector &spv);
130  static SFST::Transducer * define_transducer
131  (const StringPairSet &sps, bool cyclic=false);
132  static SFST::Transducer * define_transducer
133  (const std::vector<StringPairSet> &spsv);
134  static SFST::Transducer * copy(SFST::Transducer * t);
135  static SFST::Transducer * determinize(SFST::Transducer * t);
136  static SFST::Transducer * minimize(SFST::Transducer * t);
137  static SFST::Transducer * remove_epsilons(SFST::Transducer * t);
138  static SFST::Transducer * repeat_star(SFST::Transducer * t);
139  static SFST::Transducer * repeat_plus(SFST::Transducer * t);
140  static SFST::Transducer * repeat_n(SFST::Transducer * t,unsigned int n);
141  static SFST::Transducer * repeat_le_n(SFST::Transducer * t,unsigned int n);
142  static SFST::Transducer * optionalize(SFST::Transducer * t);
143  static SFST::Transducer * invert(SFST::Transducer * t);
144  static SFST::Transducer * reverse(SFST::Transducer * transducer);
145  static SFST::Transducer * extract_input_language(SFST::Transducer * t);
146  static SFST::Transducer * extract_output_language(SFST::Transducer * t);
147  static std::vector<SFST::Transducer*> extract_path_transducers
148  (SFST::Transducer *t);
149  static void extract_paths
150  (SFST::Transducer * t, hfst::ExtractStringsCb& callback, int cycles=-1,
151  FdTable<SFST::Character>* fd=NULL, bool filter_fd=false);
152 
153  static void extract_random_paths
154  (SFST::Transducer *t, HfstTwoLevelPaths &results, int max_num);
155 
156  static SFST::Transducer * insert_freely
157  (SFST::Transducer *t , const StringPair &symbol_pair);
158  static SFST::Transducer * substitute
159  (SFST::Transducer * t, String old_symbol, String new_symbol);
160  static SFST::Transducer * substitute
161  (SFST::Transducer *t, const StringPair &symbol_pair, SFST::Transducer *tr);
162 
163  static SFST::Transducer * compose(SFST::Transducer * t1,
164  SFST::Transducer * t2);
165  static SFST::Transducer * concatenate(SFST::Transducer * t1,
166  SFST::Transducer * t2);
167  static SFST::Transducer * disjunct(SFST::Transducer * t1,
168  SFST::Transducer * t2);
169  static SFST::Transducer * intersect(SFST::Transducer * t1,
170  SFST::Transducer * t2);
171  static SFST::Transducer * subtract(SFST::Transducer * t1,
172  SFST::Transducer * t2);
173  static std::pair<SFST::Transducer*, SFST::Transducer*> harmonize
174  (SFST::Transducer *t1, SFST::Transducer *t2, bool unknown_symbols_in_use=true);
175 
176  static bool are_equivalent(SFST::Transducer * t1, SFST::Transducer * t2);
177  static bool is_cyclic(SFST::Transducer * t);
178  static bool is_automaton(SFST::Transducer * t);
179 
180  static FdTable<SFST::Character>* get_flag_diacritics(SFST::Transducer * t);
181 
182  static void print_test(SFST::Transducer *t);
183  static void print_alphabet(SFST::Transducer *t);
184 
185  static unsigned int get_biggest_symbol_number(SFST::Transducer * t);
186 
187  static StringVector get_symbol_vector(SFST::Transducer * t);
188 
189  static std::map<std::string, unsigned int> get_symbol_map(SFST::Transducer * t);
190 
191  static SFST::Transducer * disjunct(SFST::Transducer * t, const StringPairVector &spv);
192 
193  static StringPairSet get_symbol_pairs(SFST::Transducer *t);
194 
195  float get_profile_seconds();
196  static unsigned int number_of_states(SFST::Transducer *t);
197  static unsigned int number_of_arcs(SFST::Transducer *t);
198 
199  static StringSet get_alphabet(SFST::Transducer *t);
200  static void insert_to_alphabet(SFST::Transducer *t, const std::string &symbol);
201  static void remove_from_alphabet
202  (SFST::Transducer *t, const std::string &symbol);
203  static unsigned int get_symbol_number(SFST::Transducer *t,
204  const std::string &symbol);
205 
206  protected:
207  static void initialize_alphabet(SFST::Transducer *t);
208  static SFST::Transducer * expand_arcs(SFST::Transducer * t, StringSet &unknown);
209 
210  static void expand_node( SFST::Transducer *t, SFST::Node *origin, SFST::Label &l,
211  SFST::Node *target, hfst::StringSet &s );
212  static void expand2
213  ( SFST::Transducer *t, SFST::Node *node,
214  hfst::StringSet &new_symbols, std::set<SFST::Node*> &visited_nodes );
215  static void expand(SFST::Transducer *t, hfst::StringSet &new_symbols);
216 
217  };
218 } }
219 #endif
std::string String
A UTF-8 symbol in a transition.
Definition: HfstSymbolDefs.h:59
A file for exceptions.
Class declarations for flag diacritic handling.
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:109
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:82
Typedefs and functions for symbols, symbol pairs and sets of symbols.