HFST - Helsinki Finite-State Transducer Technology - C++ API  version 3.9.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FomaTransducer.h
Go to the documentation of this file.
1 // Copyright (c) 2016 University of Helsinki
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 3 of the License, or (at your option) any later version.
7 // See the file COPYING included with this distribution for more
8 // information.
9 
10 #ifndef _FOMA_TRANSDUCER_H_
11 #define _FOMA_TRANSDUCER_H_
12 
13 #include "HfstSymbolDefs.h"
14 #include "HfstExceptionDefs.h"
15 #include "HfstExtractStrings.h"
16 #include "HfstFlagDiacritics.h"
17 #include <stdlib.h>
18 
19 #ifndef _FOMALIB_H_
20 #define _FOMALIB_H_
21 #include "back-ends/foma/fomalib.h"
22 #endif
23 
24 #include <cstdio>
25 #include <string>
26 #include <sstream>
27 #include <iostream>
28 
29 #include "../FormatSpecifiers.h"
30 
35 namespace hfst {
36  namespace implementations
37 {
38  ;
39  using std::ostream;
40  using std::ostringstream;
41 
42  class FomaInputStream
43  {
44  private:
45  std::string filename;
46  FILE * input_file;
47  void skip_identifier_version_3_0(void);
48  void skip_hfst_header(void);
49  public:
50  FomaInputStream(void);
51  FomaInputStream(const std::string &filename);
52  void close(void);
53  bool is_eof(void);
54  bool is_bad(void);
55  bool is_good(void);
56  bool is_fst(void);
57  void ignore(unsigned int);
58  fsm * read_transducer();
59 
60  char stream_get();
61  short stream_get_short();
62  void stream_unget(char c);
63 
64  static bool is_fst(FILE * f);
65  static bool is_fst(std::istream &s);
66  };
67 
68  class FomaOutputStream
69  {
70  private:
71  std::string filename;
72  FILE *ofile;
73  //void write_3_0_library_header(FILE *file);
74  public:
75  FomaOutputStream(void);
76  FomaOutputStream(const std::string &filename);
77  void close(void);
78  void write(const char &c);
79  void write_transducer(fsm * transducer);
80  };
81 
82  class FomaTransducer
83  {
84  public:
85  static fsm * create_empty_transducer(void);
86  static fsm * create_epsilon_transducer(void);
87  static fsm * define_transducer(const hfst::StringPairVector &spv);
88  static fsm * define_transducer
89  (const hfst::StringPairSet &sps, bool cyclic=false);
90  static fsm * define_transducer(const std::vector<StringPairSet> &spsv);
91  static fsm * define_transducer
92  (const std::string &symbol);
93  static fsm * define_transducer
94  (const std::string &isymbol, const std::string &osymbol);
95  static fsm * copy(fsm * t);
96  static fsm * determinize(fsm * t);
97  static fsm * minimize(fsm * t);
98  static fsm * remove_epsilons(fsm * t);
99  static fsm * repeat_star(fsm * t);
100  static fsm * repeat_plus(fsm * t);
101  static fsm * repeat_n(fsm * t, unsigned int n);
102  static fsm * repeat_le_n(fsm * t, unsigned int n);
103  static fsm * optionalize(fsm * t);
104  static fsm * invert(fsm * t);
105  static fsm * reverse(fsm * t);
106  static fsm * extract_input_language(fsm * t);
107  static fsm * extract_output_language(fsm * t);
108 
109  static fsm * insert_freely(fsm * t, const StringPair &symbol_pair);
110 
111  static bool are_equivalent(fsm *t1, fsm *t2);
112  static bool is_cyclic(fsm * t);
113 
114  static fsm * substitute
115  (fsm * t,hfst::String old_symbol,hfst::String new_symbol);
116 
117  static fsm * compose(fsm * t1,
118  fsm * t2);
119  static fsm * concatenate(fsm * t1,
120  fsm * t2);
121  static fsm * disjunct(fsm * t1,
122  fsm * t2);
123  static fsm * intersect(fsm * t1,
124  fsm * t2);
125  static fsm * subtract(fsm * t1,
126  fsm * t2);
127 
128  static void extract_paths(fsm * t, hfst::ExtractStringsCb& callback,
129  int cycles=-1, FdTable<int>* fd=NULL,
130  bool filter_fd=false);
131  static void extract_random_paths
132  (const fsm *t, HfstTwoLevelPaths &results, int max_num);
133 
134  static FdTable<int>* get_flag_diacritics(fsm * t);
135 
136  static unsigned int get_biggest_symbol_number(fsm * t);
137  static StringVector get_symbol_vector(fsm * t);
138  static std::map<std::string, unsigned int> get_symbol_map(fsm * t);
139 
140  static void insert_to_alphabet(fsm *t, const std::string &symbol);
141  static void remove_from_alphabet(fsm *t, const std::string &symbol);
142  static StringSet get_alphabet(fsm *t);
143  static unsigned int get_symbol_number(fsm *t,
144  const std::string &symbol);
145 
146  static void harmonize(fsm *net1, fsm *net2);
147 
148  static fsm * read_net(FILE * file);
149  static int write_net(fsm * net, FILE * file);
150 
151  static void delete_foma(fsm * net);
152  static void print_test(fsm * t);
153 
154 #if GENERATE_LEXC_WRAPPER
155  static fsm * read_lexc(const std::string &filename, bool verbose);
156 #endif
157 
158  static unsigned int number_of_states(fsm * net);
159  static unsigned int number_of_arcs(fsm * net);
160 
161  static fsm * eliminate_flags(fsm * t);
162  static fsm * eliminate_flag(fsm * t, const std::string & flag);
163 
164  };
165 
166 } }
167 #endif
std::string String
A UTF-8 symbol in a transition.
Definition: HfstSymbolDefs.h:59
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:105
A file for exceptions.
Class declarations for flag diacritic handling.
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:109
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:82
Typedefs and functions for symbols, symbol pairs and sets of symbols.