HFST - Helsinki Finite-State Transducer Technology - C++ API  version 3.9.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HfstFlagDiacritics.h
Go to the documentation of this file.
1 // Copyright (c) 2016 University of Helsinki
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 3 of the License, or (at your option) any later version.
7 // See the file COPYING included with this distribution for more
8 // information.
9 
10 #ifndef _FLAG_DIACRITICS_H_
11 #define _FLAG_DIACRITICS_H_
12 
13 #include <iostream>
14 #include <string>
15 #include <map>
16 #include <vector>
17 #include <cassert>
18 #include <utility>
19 
20 #include "hfstdll.h"
21 
25 namespace hfst {
26 
27 enum FdOperator {Pop, Nop, Rop, Dop, Cop, Uop};
28 
29 typedef unsigned short FdFeature;
30 typedef short FdValue;
31 
32 class FdOperation
33 {
34 private:
35  FdOperator op;
36  FdFeature feature;
37  FdValue value;
38  std::string name;
39 public:
40  HFSTDLL FdOperation
41  (FdOperator op, FdFeature feat, FdValue val, const std::string& str):
42  op(op), feature(feat), value(val), name(str) {}
43 
44  HFSTDLL FdOperator Operator(void) const { return op; }
45  HFSTDLL FdFeature Feature(void) const { return feature; }
46  HFSTDLL FdValue Value(void) const { return value; }
47  HFSTDLL std::string Name(void) const { return name; }
48 
49  HFSTDLL static FdOperator char_to_operator(char c)
50  {
51  switch (c) {
52  case 'P': return Pop;
53  case 'N': return Nop;
54  case 'R': return Rop;
55  case 'D': return Dop;
56  case 'C': return Cop;
57  case 'U': return Uop;
58  default:
59  throw;
60  }
61  }
62 
63  HFSTDLL static bool is_diacritic(const std::string& diacritic_str);
64  HFSTDLL static std::string::size_type find_diacritic
65  (const std::string& diacritic_str,
66  std::string::size_type& length);
67 
68  HFSTDLL static std::string get_operator(const std::string& diacritic);
69  HFSTDLL static std::string get_feature(const std::string& diacritic);
70  HFSTDLL static std::string get_value(const std::string& diacritic);
71  HFSTDLL static bool has_value(const std::string& diacritic);
72 };
73 
74 template<class T> class FdState;
75 
79 template<class T>
80 class FdTable
81 {
82 private:
83  // Used for generating IDs that stand in for feature and value strings
84  std::map<std::string, FdFeature> feature_map;
85  std::map<std::string, FdValue> value_map;
86 
87  std::map<T, FdOperation> operations;
88  std::map<std::string, T> symbol_map;
89 public:
90  FdTable(): feature_map(), value_map()
91  { value_map[std::string()] = 0; } // empty value = neutral
92 
93  void define_diacritic(T symbol, const std::string& str)
94  {
95  if(!FdOperation::is_diacritic(str))
96  throw;
97 
98  FdOperator op = FdOperation::char_to_operator(str.at(1));
99  std::string feat;
100  std::string val;
101 
102  // Third character is always the first fullstop.
103  size_t first_full_stop_pos = 2;
104  // Find the second full stop, if there is one.
105  size_t second_full_stop_pos = str.find('.',first_full_stop_pos+1);
106  size_t last_char_pos = str.size() - 1;
107  if(second_full_stop_pos == std::string::npos)
108  {
109  assert(op == Cop || op == Dop || op == Rop);
110  feat = str.substr(first_full_stop_pos+1,
111  last_char_pos-first_full_stop_pos-1);
112  }
113  else
114  {
115  feat = str.substr(first_full_stop_pos+1,
116  second_full_stop_pos-first_full_stop_pos-1);
117  val = str.substr(second_full_stop_pos+1,
118  last_char_pos-second_full_stop_pos-1);
119  }
120 
121  if(feature_map.count(feat) == 0)
122  {
123  FdFeature next = feature_map.size();
124  feature_map[feat] = next;
125  }
126  if(value_map.count(val) == 0)
127  {
128  FdValue next = value_map.size()+1;
129  value_map[val] = next;
130  }
131 
132  operations.insert
133  (std::pair<T,FdOperation>
134  (symbol,
135  FdOperation(op, feature_map[feat], value_map[val], str)));
136  symbol_map.insert(std::pair<std::string,T>(str, symbol));
137  }
138 
139  FdFeature num_features() const { return feature_map.size(); }
140  bool is_diacritic(T symbol) const
141  { return operations.find(symbol) != operations.end(); }
142 
143  const FdOperation* get_operation(T symbol) const
144  {
145  // for some reason this fails to compile???
146  //std::map<T,FdOperation>::const_iterator i
147  // = operations.find(symbol);
148  //return (i==operations.end()) ? NULL : &(i->second);
149 
150  return (operations.find(symbol)==operations.end()) ? NULL :
151  &(operations.find(symbol)->second);
152  }
153  const FdOperation* get_operation(const std::string& symbol) const
154  {
155  return (symbol_map.find(symbol)==symbol_map.end()) ? NULL :
156  get_operation(symbol_map.find(symbol)->second);
157  }
158 
159  bool is_valid_string(const std::vector<T>& symbols) const
160  {
161  FdState<T> state(*this);
162 
163  for(size_t i=0; i<symbols.size(); i++)
164  {
165  if(!state.apply_operation(symbols[i]))
166  break;
167  }
168  return !state.fails();
169  }
170 
171  bool is_valid_string(const std::string& str) const
172  {
173  FdState<T> state(*this);
174  std::string remaining(str);
175  std::string::size_type length;
176 
177  while(true)
178  {
179  std::string::size_type next_diacritic_pos
180  = FdOperation::find_diacritic(remaining, length);
181  if(next_diacritic_pos == std::string::npos)
182  break;
183 
184  std::string diacritic = remaining.substr(0, length);
185  if(!state.apply_operation(diacritic))
186  break;
187  remaining = remaining.substr(length);
188  }
189  return !state.fails();
190  }
191 };
192 
196 template<class T>
197 class FdState
198 {
199 private:
200  const FdTable<T>* table;
201 
202  // This is indexed with values of type FdFeature
203  typename std::vector<FdValue> values;
204  T num_features;
205 
206  bool error_flag;
207 public:
208  FdState(const FdTable<T>& t):
209  table(&t), values(table->num_features()),
210  num_features(table->num_features()), error_flag(false)
211  {}
212 
213  FdState():
214  table(NULL), values(), num_features(0), error_flag(false)
215  {}
216 
217  const FdTable<T>& get_table() const {return *table;}
218 
219  const std::vector<FdValue> & get_values(void) const
220  { return values; }
221 
222  void assign_values(std::vector<FdValue> const & vals)
223  {
224  values = vals;
225  if (values.size() != num_features) {
226  error_flag = true;
227  }
228  }
229 
230  bool apply_operation(T symbol)
231  {
232  const FdOperation* op = table->get_operation(symbol);
233  if(op)
234  return apply_operation(*op);
235  return true; // if the symbol isn't a diacritic
236  }
237  bool apply_operation(const FdOperation& op)
238  {
239  switch(op.Operator()) {
240  case Pop: // positive set
241  values[op.Feature()] = op.Value();
242  return true;
243 
244  case Nop: // negative set (literally, in this implementation)
245  values[op.Feature()] = -1*op.Value();
246  return true;
247 
248  case Rop: // require
249  if (op.Value() == 0) // empty require
250  return (values[op.Feature()] != 0);
251  else // nonempty require
252  return (values[op.Feature()] == op.Value());
253 
254  case Dop: // disallow
255  if (op.Value() == 0) // empty disallow
256  return (values[op.Feature()] == 0);
257  else // nonempty disallow
258  return (values[op.Feature()] != op.Value());
259 
260  case Cop: // clear
261  values[op.Feature()] = 0;
262  return true;
263 
264  case Uop: // unification
265  if(values[op.Feature()] == 0 || /* if the feature is unset or */
266  values[op.Feature()] == op.Value() || /* the feature is at
267  this value already
268  or */
269  (values[op.Feature()] < 0 &&
270  (values[op.Feature()]*(-1) != op.Value())) /* the feature is
271  negatively set
272  to something
273  else */
274  )
275  {
276  values[op.Feature()] = op.Value();
277  return true;
278  }
279  return false;
280  }
281  throw; // for the compiler's peace of mind
282  }
283  bool apply_operation(const std::string& symbol)
284  {
285  const FdOperation* op = table->get_operation(symbol);
286  if(op)
287  return apply_operation(*op);
288  return true;
289  }
290 
291  bool fails() const {return error_flag;}
292  void reset()
293  {
294  error_flag = false;
295  values.clear();
296  values.insert(values.begin(), table->num_features(), 0);
297  }
298 };
299 
300 }
301 #endif