10 #ifndef _HFST_OL_CONVERT_H_
11 #define _HFST_OL_CONVERT_H_
14 #include "fst/fstlib.h"
15 #endif // HAVE_OPENFST
17 #include "transducer.h"
22 typedef std::map<hfst_ol::TransitionTableIndex,unsigned int>
23 HfstOlToBasicStateMap;
25 struct TransitionPlaceholder {
31 TransitionPlaceholder(
unsigned int t, SymbolNumber i, SymbolNumber o,
float w):
43 struct StatePlaceholder {
44 enum indexing_type {empty, simple_zero_index, simple_nonzero_index, nonsimple};
46 unsigned int state_number;
47 unsigned int start_index;
48 unsigned int first_transition;
49 std::vector<unsigned int> symbol_to_transition_placeholder_v;
50 std::vector<std::vector<TransitionPlaceholder> > transition_placeholders;
55 StatePlaceholder (
unsigned int state,
bool finality,
unsigned int first,
58 start_index(UINT_MAX),
59 first_transition(first),
61 final_weight(final_weight),
62 type(state == 0 ? nonsimple: empty),
66 state_number(UINT_MAX),
67 start_index(UINT_MAX),
68 first_transition(UINT_MAX),
75 bool is_simple(
void)
const
77 return type != nonsimple;
80 unsigned int number_of_transitions(
void)
const {
81 unsigned int count = 0;
82 for(std::vector<std::vector<TransitionPlaceholder> >::const_iterator it
83 = transition_placeholders.begin();
84 it != transition_placeholders.end(); ++it) {
90 bool input_present(SymbolNumber input)
const {
91 return input < symbol_to_transition_placeholder_v.size() &&
92 symbol_to_transition_placeholder_v[input] != UINT_MAX;
95 void add_input(SymbolNumber input, std::set<SymbolNumber>
const & flag_symbols)
97 if (input_present(input)) {
100 while (symbol_to_transition_placeholder_v.size() <= input) {
101 symbol_to_transition_placeholder_v.push_back(UINT_MAX);
103 symbol_to_transition_placeholder_v[input] = transition_placeholders.size();
104 transition_placeholders.push_back(std::vector<TransitionPlaceholder>());
106 if (type != nonsimple) {
111 if (input == 0 || flag_symbols.count(input) == 1) {
112 type = simple_zero_index;
114 type = simple_nonzero_index;
116 }
else if (type == simple_zero_index) {
117 if (input != 0 && flag_symbols.count(input) == 0) {
121 if (inputs > 1 || input == 0 || flag_symbols.count(input) == 1) {
128 SymbolNumber get_largest_index(
void)
130 return transition_placeholders[symbol_to_transition_placeholder_v.back()][0].input;
133 void add_transition(TransitionPlaceholder & trans)
135 transition_placeholders[symbol_to_transition_placeholder_v[trans.input]].push_back(trans);
138 std::vector<TransitionPlaceholder> & get_transition_placeholders(SymbolNumber input)
140 return transition_placeholders[symbol_to_transition_placeholder_v[input]];
143 unsigned int symbol_offset(
144 SymbolNumber
const symbol,
145 std::set<SymbolNumber>
const & flag_symbols) {
149 unsigned int offset = 0;
160 if (input_present(0)) {
161 offset = get_transition_placeholders(0).size();
163 for(std::set<SymbolNumber>::iterator flag_it = flag_symbols.begin();
164 flag_it != flag_symbols.end(); ++flag_it) {
165 if (input_present(*flag_it)) {
166 if (symbol == *flag_it) {
170 offset += get_transition_placeholders(*flag_it).size();
173 for(
unsigned int i = 1; i < symbol_to_transition_placeholder_v.size(); ++i) {
174 if (input_present(i)) {
175 if (flag_symbols.count(i) != 0) {
182 offset += get_transition_placeholders(i).size();
185 std::string message(
"error in conversion between optimized lookup "
186 "format and HfstTransducer;\ntried to calculate "
187 "symbol_offset for symbol not present in state");
194 bool compare_states_by_input_size(
195 const StatePlaceholder & lhs,
const StatePlaceholder & rhs);
196 bool compare_states_by_state_number(
197 const StatePlaceholder & lhs,
const StatePlaceholder & rhs);
199 struct IndexPlaceholders
201 std::vector<unsigned int> indices;
202 std::vector<std::pair<unsigned int, SymbolNumber> > targets;
204 bool used(
unsigned int const position)
const
206 return position < indices.size() && indices[position] != NO_TABLE_INDEX;
209 void assign(
unsigned int const position,
unsigned int target, SymbolNumber sym)
211 while (position >= indices.size()) {
212 indices.push_back(NO_TABLE_INDEX);
214 indices[position] = targets.size();
215 targets.push_back(std::pair<unsigned int, SymbolNumber>(target, sym));
218 std::pair<unsigned int, SymbolNumber> get_target(
unsigned int index)
220 return targets[indices[index]];
223 bool fits(StatePlaceholder
const & state,
224 std::set<SymbolNumber>
const & flag_symbols,
225 unsigned int const position)
const
227 if (used(position)) {
230 for (std::vector<std::vector<TransitionPlaceholder> >::const_iterator it = state.transition_placeholders.begin();
231 it != state.transition_placeholders.end(); ++it) {
232 SymbolNumber index_offset = it->at(0).input;
233 if (flag_symbols.count(index_offset) != 0) {
236 if (used(index_offset + position + 1)) {
243 bool unsuitable(
unsigned int const index,
244 SymbolNumber
const symbols,
245 float const packing_aggression)
const
260 unsigned int filled = 0;
261 for (
unsigned int i = 0; i < symbols; ++i) {
262 filled += used(index + i + 1);
263 if (filled >= (packing_aggression*symbols)) {
272 void write_transitions_from_state_placeholders(
273 TransducerTable<TransitionW> & transition_table,
274 std::vector<hfst_ol::StatePlaceholder>
275 & state_placeholders,
276 std::set<SymbolNumber> & flag_symbols);
278 void add_transitions_with(SymbolNumber symbol,
279 std::vector<TransitionPlaceholder> & transitions,
280 TransducerTable<TransitionW> & transition_table,
281 std::vector<hfst_ol::StatePlaceholder>
282 & state_placeholders,
283 std::set<SymbolNumber> & flag_symbols);
285 #if HAVE_OPENFST // Covers remainder of file
286 typedef fst::StdArc::StateId StateId;
287 typedef fst::StdArc StdArc;
288 typedef fst::StdVectorFst TransduceR;
289 typedef fst::ArcIterator<TransduceR> ArcIterator;
290 typedef std::set<StateId> StateIdSet;
291 typedef std::set<int64> OfstSymbolSet;
293 const StateIdNumber NO_ID_NUMBER = UINT_MAX;
294 const StateId NO_STATE_ID = UINT_MAX;
295 const SymbolNumber BIG_STATE_LIMIT = 1;
298 struct transition_label
304 struct compare_transition_labels
306 bool operator() (
const transition_label &l1,
307 const transition_label &l2)
const
309 if (l1.input_symbol == l2.input_symbol)
310 return l1.output_symbol < l2.output_symbol;
311 return l1.input_symbol < l2.input_symbol;
315 typedef std::set<transition_label,compare_transition_labels> LabelSet;
317 enum place_holder {EMPTY, EMPTY_START, OCCUPIED_START, OCCUPIED };
318 typedef std::vector<place_holder> PlaceHolderVector;
323 class ConvertIdNumberMap
326 typedef std::map<StateIdNumber,StateId> IdNumbersToStateIds;
327 typedef std::map<StateId,StateIdNumber> StateIdsToIdNumbers;
329 IdNumbersToStateIds id_to_node;
330 StateIdsToIdNumbers node_to_id;
332 StateIdNumber node_counter;
334 void add_node(StateId n, TransduceR *tr);
340 void set_node_maps(TransduceR * t);
343 ConvertIdNumberMap(TransduceR * t):
345 { set_node_maps(t); }
347 StateIdNumber get_number_of_nodes(
void)
const
348 {
return node_counter; }
350 StateIdNumber get_node_id(StateId n)
const;
351 StateId get_id_node(StateIdNumber i)
const;
354 typedef std::map<int64,unsigned int> OfstSymbolCountMap;
355 typedef std::set<std::string> SymbolSet;
357 class ConvertTransducerAlphabet
360 SymbolTable symbol_table;
362 TransduceR* transducer;
363 fst::SymbolTable * ofst_symbol_table;
366 std::map<int64, SymbolNumber> input_symbols_map;
367 std::map<int64, SymbolNumber> output_symbols_map;
369 void inspect_node(StateId n, StateIdSet& visited_nodes,
370 OfstSymbolCountMap& symbol_count_map, SymbolSet& all_symbol_set);
371 void get_symbol_info(
372 OfstSymbolCountMap &symbol_count_map, SymbolSet& all_symbol_set);
373 void populate_symbol_table(
374 OfstSymbolCountMap &input_symbol_counts, SymbolSet& all_symbol_set);
377 ConvertTransducerAlphabet(TransduceR* t);
379 void display()
const;
381 const SymbolTable& get_symbol_table()
const {
return symbol_table;}
382 SymbolNumber lookup_ofst_input_symbol(int64 s)
const;
383 SymbolNumber lookup_ofst_output_symbol(int64 s)
const;
384 bool is_flag_diacritic(SymbolNumber symbol)
const;
386 TransducerAlphabet to_alphabet()
const;
389 class ConvertTransition
392 SymbolNumber input_symbol;
393 SymbolNumber output_symbol;
398 StateIdNumber target_state_id;
399 TransitionTableIndex target_state_index;
403 TransitionTableIndex table_index;
409 ConvertTransition(
const StdArc &a);
411 void display()
const;
413 SymbolNumber get_input_symbol(
void)
const
414 {
return input_symbol; }
416 void set_target_state_index();
418 void set_table_index(TransitionTableIndex i)
421 TransitionTableIndex get_table_index(
void)
const
422 {
return table_index; }
424 template<
class T> T to_transition()
const;
426 bool numerical_cmp(
const ConvertTransition &another_transition)
const;
427 bool operator<(
const ConvertTransition &another_index)
const;
430 class ConvertTransitionIndex
433 SymbolNumber input_symbol;
438 ConvertTransition* first_transition;
439 TransitionTableIndex first_transition_index;
442 ConvertTransitionIndex(SymbolNumber input, ConvertTransition* transition):
443 input_symbol(input), first_transition(transition) {}
445 void display()
const;
447 SymbolNumber get_input_symbol(
void)
const
448 {
return input_symbol; }
450 ConvertTransition* get_first_transition()
const
451 {
return first_transition; }
453 void set_first_transition_index(TransitionTableIndex i)
454 { first_transition_index = i; }
456 template<
class T> T to_transition_index()
const;
458 bool operator<(
const ConvertTransitionIndex &another_index)
const;
461 struct ConvertTransitionCompare
463 bool operator() (
const ConvertTransition * t1,
464 const ConvertTransition * t2)
const
466 return t1->operator<(*t2);
470 struct ConvertTransitionIndexCompare
472 bool operator() (
const ConvertTransitionIndex * i1,
473 const ConvertTransitionIndex * i2)
const
475 return i1->operator<(*i2);
479 typedef std::set<ConvertTransition*,ConvertTransitionCompare>
480 ConvertTransitionSet;
481 typedef std::set<ConvertTransitionIndex*,ConvertTransitionIndexCompare>
482 ConvertTransitionIndexSet;
484 class ConvertFstState
487 ConvertTransitionSet transitions;
488 ConvertTransitionIndexSet transition_indices;
492 TransitionTableIndex first_transition_index;
498 TransitionTableIndex table_index;
505 void set_transitions(StateId n, TransduceR * tr);
506 void set_transition_indices(
void);
508 friend class fst_state_compare;
510 ConvertFstState(StateId n, TransduceR * tr);
513 void display()
const;
515 TransitionTableIndex set_transition_table_indices(
516 TransitionTableIndex place);
518 TransitionTableIndex get_first_transition_index()
const
519 {
return first_transition_index; }
521 void set_table_index(TransitionTableIndex i)
523 TransitionTableIndex get_table_index(
void)
const
524 {
return table_index; }
526 SymbolNumberSet * get_input_symbols(
void)
const;
528 SymbolNumber number_of_input_symbols(
void)
const
529 {
return transition_indices.size(); }
530 SymbolNumber number_of_transitions(
void)
const
531 {
return transitions.size(); }
532 bool is_final(
void)
const {
return final;}
533 bool is_big_state(
void)
const
535 return (transition_indices.size() > BIG_STATE_LIMIT);
537 bool is_start_state(
void)
const {
return id == 0;}
538 StateIdNumber get_id(
void)
const {
return id;}
541 void set_transition_target_indices();
545 void insert_transition_indices(TransducerTable<T>& index_table)
const;
548 TransitionTableIndex append_transitions(TransducerTable<T>& transition_table,
549 TransitionTableIndex place)
const;
552 typedef std::vector<ConvertFstState*> ConvertFstStateVector;
554 class ConvertTransitionTableIndices
557 PlaceHolderVector indices;
558 PlaceHolderVector::size_type lower_bound;
559 unsigned int lower_bound_test_count;
560 SymbolNumber number_of_input_symbols;
562 bool state_fits(SymbolNumberSet * input_symbols,
564 PlaceHolderVector::size_type index);
566 void insert_state(SymbolNumberSet * input_symbols,
568 PlaceHolderVector::size_type index);
569 void get_more_space(
void);
571 ConvertTransitionTableIndices(SymbolNumber input_symbol_count):
572 lower_bound(0), lower_bound_test_count(0),
573 number_of_input_symbols(input_symbol_count)
578 PlaceHolderVector::size_type add_state(ConvertFstState * state);
579 PlaceHolderVector::size_type size(
void)
const
580 {
return indices.size(); }
582 PlaceHolderVector::size_type last_full_index(
void)
const;
585 class ConvertTransducerHeader
588 static void full_traversal(TransducerHeader& h, TransduceR* tr, StateId n,
589 StateIdSet& visited_nodes, StateIdSet& nodes_in_path,
590 OfstSymbolSet& all_input_symbols);
591 static void find_input_epsilon_cycles(StateId n, StateId t,
592 StateIdSet &epsilon_targets,
593 bool unweighted_only, TransduceR * tr,
594 TransducerHeader& h);
596 static void compute_header(TransducerHeader& header,
597 TransduceR * t, SymbolNumber symbol_count,
598 TransitionTableIndex number_of_index_table_entries,
599 TransitionTableIndex number_of_target_table_entries,
603 class ConvertTransducer
607 ConvertIdNumberMap * id_number_map;
608 ConvertTransitionTableIndices * fst_indices;
609 PlaceHolderVector::size_type index_table_size;
611 TransducerHeader header;
612 ConvertTransducerAlphabet alphabet;
613 ConvertFstStateVector states;
615 void read_nodes(
void);
616 void set_transition_table_indices(
void);
617 void set_index_table_indices(
void);
619 void add_input_symbols(StateId n,
620 SymbolNumberSet &input_symbols,
621 StateIdSet &visited_nodes);
622 SymbolNumber number_of_input_symbols(
void);
624 TransitionTableIndex count_transitions(
void)
const;
626 void display_states()
const;
627 void display_tables()
const;
629 template<
class T> TransducerTable<T> make_index_table(
630 TransitionTableIndex index_table_size)
const;
631 template<
class T> TransducerTable<T> make_transition_table()
const;
633 ConvertTransducer(TransduceR * tr,
bool weighted);
634 ~ConvertTransducer();
636 ConvertIdNumberMap& get_id_number_map() {
return *id_number_map;}
637 ConvertTransducerAlphabet& get_alphabet() {
return alphabet;}
638 ConvertFstState& get_state(StateIdNumber s) {
return *states[s];}
639 bool is_weighted()
const {
return header.probe_flag(Weighted);}
641 Transducer* to_transducer()
const;
644 static ConvertTransducer* constructing_transducer;
647 #endif // HAVE_OPENFST
651 #endif // include guard
An error happened probably due to a bug in the HFST code.
Definition: HfstExceptionDefs.h:390
#define HFST_THROW_MESSAGE(E, M)
Macro to throw an exception of type E with message M. Use THROW instead of regular throw with subclas...
Definition: HfstExceptionDefs.h:47