HFST - Helsinki Finite-State Transducer Technology - C++ API  version 3.9.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HfstTropicalTransducerTransitionData.h
1 // Copyright (c) 2016 University of Helsinki
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 3 of the License, or (at your option) any later version.
7 // See the file COPYING included with this distribution for more
8 // information.
9 
10 #include <string>
11 #include <map>
12 #include <set>
13 #include <cassert>
14 #include <cstdio>
15 #include <iostream>
16 #include <vector>
17 #include "../HfstExceptionDefs.h"
18 
19 #include "../hfstdll.h"
20 
21 namespace hfst {
22 
23  namespace implementations {
24 
25  struct string_comparison {
26  bool operator() (const std::string &str1, const std::string &str2) const {
27  return (str1.compare(str2) < 0);
28  }
29  };
30 
46  public:
48  typedef std::string SymbolType;
50  typedef float WeightType;
52  typedef std::set<SymbolType> SymbolTypeSet;
53 
54  typedef std::vector<SymbolType>
55  Number2SymbolVector;
56  typedef std::map<SymbolType, unsigned int, string_comparison>
57  Symbol2NumberMap;
58 
59  HFSTDLL static SymbolType get_epsilon()
60  {
61  return SymbolType("@_EPSILON_SYMBOL_@");
62  }
63 
64  HFSTDLL static SymbolType get_unknown()
65  {
66  return SymbolType("@_UNKNOWN_SYMBOL_@");
67  }
68 
69  HFSTDLL static SymbolType get_identity()
70  {
71  return SymbolType("@_IDENTITY_SYMBOL_@");
72  }
73 
74  public: /* FIXME: Should be private. */
75  /* Maps that contain information of the mappings between strings
76  and numbers */
77  HFSTDLL static Number2SymbolVector number2symbol_map;
78  HFSTDLL static Symbol2NumberMap symbol2number_map;
79  /* The biggest number in use. */
80  HFSTDLL static unsigned int max_number;
81 
82  /* Get the biggest number used to represent a symbol. */
83  HFSTDLL static unsigned int get_max_number() {
84  return max_number;
85  }
86 
87  /*
88  Get a vector that defines how numbers of a transducer must
89  be changed, i.e. harmonized, so that it follows the same
90  number-to-string encoding as all transducers that use the datatype
91  HfstTropicalTransducerTransitionData.
92 
93  \a symbols defines how numbers are mapped to strings in the
94  original transducer so that each index in \a symbols
95  is the number that corresponds to the string at that index.
96  An empty string at an index means that the index is not
97  used in the original transducer.
98 
99  The result is a vector whose each index is the number that
100  must be replaced by the number at that index when a
101  transducer is harmonized. If an index is not used in the
102  transducer, the result will contain a zero at that index.
103  */
104  static std::vector<unsigned int> get_harmonization_vector
105  (const std::vector<SymbolType> &symbols)
106  {
107  std::vector<unsigned int> harmv;
108  harmv.reserve(symbols.size());
109  harmv.resize(symbols.size(), 0);
110  for (unsigned int i=0; i<symbols.size(); i++)
111  {
112  if (symbols.at(i) != "")
113  harmv.at(i) = get_number(symbols.at(i));
114  }
115  return harmv;
116  }
117 
118  static std::vector<unsigned int> get_reverse_harmonization_vector
119  (const std::map<SymbolType, unsigned int> &symbols)
120  {
121  std::vector<unsigned int> harmv;
122  harmv.reserve(max_number+1);
123  harmv.resize(max_number+1, 0);
124  for (unsigned int i=0; i<harmv.size(); i++)
125  {
126  std::map<SymbolType, unsigned int>::const_iterator it
127  = symbols.find(get_symbol(i));
128  if (it != symbols.end())
129  harmv.at(i) = it->second;
130  }
131  return harmv;
132  }
133 
134  protected:
135  /* Get the symbol that is mapped as \a number */
136  static const std::string &get_symbol(unsigned int number)
137  {
138  if (number >= number2symbol_map.size()) {
139  std::string message("HfstTropicalTransducerTransitionData: "
140  "number ");
141  std::ostringstream oss;
142  oss << number;
143  message.append(oss.str());
144  message.append(" is not mapped to any symbol");
146  (HfstFatalException, message);
147  }
148  return number2symbol_map[number];
149  }
150 
151  /* Get the number that is used to represent \a symbol */
152  static unsigned int get_number(const std::string &symbol)
153  {
154  if(symbol == "") { // FAIL
155  Symbol2NumberMap::iterator it = symbol2number_map.find(symbol);
156  if (it == symbol2number_map.end()) {
157  std::cerr << "ERROR: No number for the empty symbol\n"
158  << std::endl;
159  }
160  else {
161  std::cerr << "ERROR: The empty symbol corresdponds to number "
162  << it->second << std::endl;
163  }
164  assert(false);
165  }
166 
167  Symbol2NumberMap::iterator it = symbol2number_map.find(symbol);
168  if (it == symbol2number_map.end())
169  {
170  max_number++;
171  symbol2number_map[symbol] = max_number;
172  number2symbol_map.push_back(symbol);
173  return max_number;
174  }
175  return it->second;
176  }
177 
178  //private: TEST
179  public:
180  /* The actual transition data */
181  unsigned int input_number;
182  unsigned int output_number;
183  WeightType weight;
184 
185  public:
186  HFSTDLL void print_transition_data()
187  {
188  fprintf(stderr, "%i:%i %f\n",
189  input_number, output_number, weight);
190  }
191 
192  public:
193 
197  input_number(0), output_number(0), weight(0) {}
198 
203  input_number = data.input_number;
204  output_number = data.output_number;
205  weight = data.weight;
206  }
207 
212  SymbolType osymbol,
213  WeightType weight) {
214  if (isymbol == "" || osymbol == "")
216  (EmptyStringException,
217  "HfstTropicalTransducerTransitionData"
218  "(SymbolType, SymbolType, WeightType)");
219 
220  input_number = get_number(isymbol);
221  output_number = get_number(osymbol);
222  this->weight = weight;
223  }
224 
226  (unsigned int inumber,
227  unsigned int onumber,
228  WeightType weight) {
229  input_number = inumber;
230  output_number = onumber;
231  this->weight = weight;
232  }
233 
235  HFSTDLL const SymbolType &get_input_symbol() const {
236  return get_symbol(input_number);
237  }
238 
240  HFSTDLL const SymbolType &get_output_symbol() const {
241  return get_symbol(output_number);
242  }
243 
244  HFSTDLL unsigned int get_input_number() const {
245  return input_number;
246  }
247 
248  HFSTDLL unsigned int get_output_number() const {
249  return output_number;
250  }
251 
253  HFSTDLL WeightType get_weight() const {
254  return weight;
255  }
256 
258  HFSTDLL void set_weight(WeightType w) {
259  weight = w;
260  }
261 
262 
263  /* Are these needed? */
264  HFSTDLL static bool is_epsilon(const SymbolType &symbol) {
265  return (symbol.compare("@_EPSILON_SYMBOL_@") == 0);
266  }
267  HFSTDLL static bool is_unknown(const SymbolType &symbol) {
268  return (symbol.compare("@_UNKNOWN_SYMBOL_@") == 0);
269  }
270  HFSTDLL static bool is_identity(const SymbolType &symbol) {
271  return (symbol.compare("@_IDENTITY_SYMBOL_@") == 0);
272  }
273  HFSTDLL static bool is_valid_symbol(const SymbolType &symbol) {
274  if (symbol == "")
275  return false;
276  return true;
277  }
278 
279  HFSTDLL static SymbolType get_marker(const SymbolTypeSet &sts) {
280  (void)sts;
281  return SymbolType("@_MARKER_SYMBOL_@");
282  }
283 
289  HFSTDLL bool operator<(const HfstTropicalTransducerTransitionData &another)
290  const {
291  if (input_number < another.input_number )
292  return true;
293  if (input_number > another.input_number)
294  return false;
295  if (output_number < another.output_number)
296  return true;
297  if (output_number > another.output_number)
298  return false;
299  return (weight < another.weight);
300  }
301 
302  // same as operator< but weight is ignored
303  HFSTDLL bool less_than_ignore_weight(const HfstTropicalTransducerTransitionData &another)
304  const {
305  if (input_number < another.input_number )
306  return true;
307  if (input_number > another.input_number)
308  return false;
309  if (output_number < another.output_number)
310  return true;
311  if (output_number > another.output_number)
312  return false;
313  return false;
314  }
315 
316  HFSTDLL void operator=(const HfstTropicalTransducerTransitionData &another)
317  {
318  input_number = another.input_number;
319  output_number = another.output_number;
320  weight = another.weight;
321  }
322 
323  friend class Number2SymbolVectorInitializer;
324  friend class Symbol2NumberMapInitializer;
325 
326  friend class ComposeIntersectFst;
327  friend class ComposeIntersectLexicon;
328  friend class ComposeIntersectRule;
329  friend class ComposeIntersectRulePair;
330  template <class C> friend class HfstTransitionGraph;
331 
332  };
333 
334  // Initialization of static members in class
335  // HfstTropicalTransducerTransitionData..
336  class Number2SymbolVectorInitializer {
337  public:
338  HFSTDLL Number2SymbolVectorInitializer
339  (HfstTropicalTransducerTransitionData::Number2SymbolVector &vect) {
340  vect.push_back(std::string("@_EPSILON_SYMBOL_@"));
341  vect.push_back(std::string("@_UNKNOWN_SYMBOL_@"));
342  vect.push_back(std::string("@_IDENTITY_SYMBOL_@"));
343  }
344  };
345 
346  class Symbol2NumberMapInitializer {
347  public:
348  HFSTDLL Symbol2NumberMapInitializer
349  (HfstTropicalTransducerTransitionData::Symbol2NumberMap &map) {
350  map["@_EPSILON_SYMBOL_@"] = 0;
351  map["@_UNKNOWN_SYMBOL_@"] = 1;
352  map["@_IDENTITY_SYMBOL_@"] = 2;
353  }
354  };
355 
356  } // namespace implementations
357 
358 } // namespace hfst
HFSTDLL const SymbolType & get_input_symbol() const
Get the input symbol.
Definition: HfstTropicalTransducerTransitionData.h:235
std::set< SymbolType > SymbolTypeSet
A set of symbols.
Definition: HfstTropicalTransducerTransitionData.h:52
float WeightType
The weight type.
Definition: HfstTropicalTransducerTransitionData.h:50
std::string SymbolType
The input and output symbol type.
Definition: HfstTropicalTransducerTransitionData.h:48
HFSTDLL bool operator<(const HfstTropicalTransducerTransitionData &another) const
Whether this transition is less than transition another.
Definition: HfstTropicalTransducerTransitionData.h:289
HFSTDLL HfstTropicalTransducerTransitionData()
Create a HfstTropicalTransducerTransitionData with epsilon input and output strings and weight zero...
Definition: HfstTropicalTransducerTransitionData.h:196
HFSTDLL void set_weight(WeightType w)
Set the weight.
Definition: HfstTropicalTransducerTransitionData.h:258
HFSTDLL HfstTropicalTransducerTransitionData(SymbolType isymbol, SymbolType osymbol, WeightType weight)
Create a HfstTropicalTransducerTransitionData with input symbol isymbol, output symbol osymbol and we...
Definition: HfstTropicalTransducerTransitionData.h:211
An error happened probably due to a bug in the HFST code.
Definition: HfstExceptionDefs.h:390
HFSTDLL const SymbolType & get_output_symbol() const
Get the output symbol.
Definition: HfstTropicalTransducerTransitionData.h:240
#define HFST_THROW_MESSAGE(E, M)
Macro to throw an exception of type E with message M. Use THROW instead of regular throw with subclas...
Definition: HfstExceptionDefs.h:47
One implementation of template class C in HfstTransition.
Definition: HfstTropicalTransducerTransitionData.h:45
HFSTDLL WeightType get_weight() const
Get the weight.
Definition: HfstTropicalTransducerTransitionData.h:253