11 #ifndef _HFST_OL_TRANSDUCER_TRANSDUCER_H_
12 #define _HFST_OL_TRANSDUCER_TRANSDUCER_H_
34 #include "../../HfstExceptionDefs.h"
35 #include "../../HfstFlagDiacritics.h"
36 #include "../../HfstSymbolDefs.h"
40 typedef SSIZE_T ssize_t;
46 using hfst::FdOperation;
53 typedef unsigned short SymbolNumber;
54 typedef unsigned int TransitionTableIndex;
55 typedef unsigned int TransitionNumber;
56 typedef unsigned int StateIdNumber;
57 typedef short ValueNumber;
59 typedef std::set<SymbolNumber> SymbolNumberSet;
60 typedef std::vector<SymbolNumber> SymbolNumberVector;
61 typedef std::set<TransitionTableIndex> TransitionTableIndexSet;
62 typedef std::vector<std::string> SymbolTable;
67 typedef std::vector<std::string> StringVector;
70 typedef std::vector<short> FlagDiacriticState;
71 typedef std::map<SymbolNumber, hfst::FdOperation> OperationMap;
72 typedef std::map<std::string, SymbolNumber> StringSymbolMap;
78 TransitionTableIndex index;
79 FlagDiacriticState flags;
80 TraversalState(TransitionTableIndex i, FlagDiacriticState f):
82 bool operator==(
const TraversalState & rhs)
const;
83 bool operator<(
const TraversalState & rhs)
const;
86 typedef std::set<TraversalState> TraversalStates;
89 const SymbolNumber NO_SYMBOL_NUMBER = (std::numeric_limits<SymbolNumber>::max)();
90 const TransitionTableIndex NO_TABLE_INDEX =
91 (std::numeric_limits<TransitionTableIndex>::max)();
92 const unsigned long NO_COUNTER = (std::numeric_limits<unsigned long>::max)();
93 const Weight INFINITE_WEIGHT =
static_cast<float>(NO_TABLE_INDEX);
95 enum HeaderFlag {Weighted, Deterministic, Input_deterministic, Minimized,
96 Cyclic, Has_epsilon_epsilon_transitions,
97 Has_input_epsilon_transitions, Has_input_epsilon_cycles,
98 Has_unweighted_input_epsilon_cycles};
102 const TransitionTableIndex TRANSITION_TARGET_TABLE_START = 2147483648u;
103 const unsigned int MAX_IO_LEN = 10000;
104 const unsigned int MAX_RECURSION_DEPTH = 5000;
109 bool should_ascii_tokenize(
unsigned char c);
111 inline bool indexes_transition_table(
const TransitionTableIndex i)
113 return i >= TRANSITION_TARGET_TABLE_START;
115 inline bool indexes_transition_index_table(
const TransitionTableIndex i)
117 return i < TRANSITION_TARGET_TABLE_START;
120 class TransducerHeader
123 SymbolNumber number_of_input_symbols;
124 SymbolNumber number_of_symbols;
125 TransitionTableIndex size_of_transition_index_table;
126 TransitionTableIndex size_of_transition_target_table;
128 StateIdNumber number_of_states;
129 TransitionNumber number_of_transitions;
133 bool input_deterministic;
136 bool has_epsilon_epsilon_transitions;
137 bool has_input_epsilon_transitions;
138 bool has_input_epsilon_cycles;
139 bool has_unweighted_input_epsilon_cycles;
141 static void header_error()
147 static T read_property(std::istream& is)
150 is.read(reinterpret_cast<char*>(&p),
sizeof(T));
154 static void write_property(T prop, std::ostream& os)
155 { os.write(reinterpret_cast<const char*>(&prop),
sizeof(prop)); }
156 static bool read_bool_property(std::istream& is)
159 is.read(reinterpret_cast<char*>(&prop),
sizeof(
unsigned int));
167 static void write_bool_property(
bool value, std::ostream& os)
169 unsigned int prop = (value?1:0);
170 os.write(reinterpret_cast<char*>(&prop),
sizeof(prop));
173 TransducerHeader(
bool weights):
174 number_of_input_symbols(0),
175 number_of_symbols(1),
176 size_of_transition_index_table(1),
177 size_of_transition_target_table(0),
179 number_of_transitions(0),
182 input_deterministic(true),
185 has_epsilon_epsilon_transitions(false),
186 has_input_epsilon_transitions(false),
187 has_input_epsilon_cycles(false),
188 has_unweighted_input_epsilon_cycles(false)
194 SymbolNumber input_symbols,
195 SymbolNumber symbols,
196 TransitionTableIndex transition_index_table,
197 TransitionTableIndex transition_table,
199 number_of_input_symbols(input_symbols),
200 number_of_symbols(symbols),
201 size_of_transition_index_table(transition_index_table),
202 size_of_transition_target_table(transition_table),
204 number_of_transitions(0),
207 input_deterministic(true),
210 has_epsilon_epsilon_transitions(false),
211 has_input_epsilon_transitions(false),
212 has_input_epsilon_cycles(false),
213 has_unweighted_input_epsilon_cycles(false)
217 TransducerHeader(std::istream& is):
218 number_of_input_symbols(read_property<SymbolNumber>(is)),
219 number_of_symbols(read_property<SymbolNumber>(is)),
220 size_of_transition_index_table(
221 read_property<TransitionTableIndex>(is)),
222 size_of_transition_target_table(
223 read_property<TransitionTableIndex>(is)),
224 number_of_states(read_property<StateIdNumber>(is)),
225 number_of_transitions(read_property<TransitionNumber>(is)),
226 weighted(read_bool_property(is)),
227 deterministic(read_bool_property(is)),
228 input_deterministic(read_bool_property(is)),
229 minimized(read_bool_property(is)),
230 cyclic(read_bool_property(is)),
231 has_epsilon_epsilon_transitions(read_bool_property(is)),
232 has_input_epsilon_transitions(read_bool_property(is)),
233 has_input_epsilon_cycles(read_bool_property(is)),
234 has_unweighted_input_epsilon_cycles(read_bool_property(is))
241 SymbolNumber symbol_count(
void)
const {
return number_of_symbols; }
242 SymbolNumber input_symbol_count(
void)
const {
243 return number_of_input_symbols;
245 void increment_symbol_count(
void)
246 {++number_of_symbols; ++number_of_input_symbols;}
248 TransitionTableIndex index_table_size(
void)
const
249 {
return size_of_transition_index_table; }
250 TransitionTableIndex target_table_size(
void)
const
251 {
return size_of_transition_target_table; }
253 bool probe_flag(HeaderFlag flag)
const
259 return deterministic;
260 case Input_deterministic:
261 return input_deterministic;
266 case Has_epsilon_epsilon_transitions:
267 return has_epsilon_epsilon_transitions;
268 case Has_input_epsilon_transitions:
269 return has_input_epsilon_transitions;
270 case Has_input_epsilon_cycles:
271 return has_input_epsilon_cycles;
272 case Has_unweighted_input_epsilon_cycles:
273 return has_unweighted_input_epsilon_cycles;
278 void set_flag(HeaderFlag flag,
bool value)
285 deterministic =
true;
287 case Input_deterministic:
288 input_deterministic =
true;
296 case Has_epsilon_epsilon_transitions:
297 has_epsilon_epsilon_transitions =
true;
299 case Has_input_epsilon_transitions:
300 has_input_epsilon_transitions =
true;
302 case Has_input_epsilon_cycles:
303 has_input_epsilon_cycles =
true;
305 case Has_unweighted_input_epsilon_cycles:
306 has_unweighted_input_epsilon_cycles =
true;
314 std::cout <<
"Transducer properties:" << std::endl
315 <<
" number_of_symbols: "
316 << number_of_symbols << std::endl
317 <<
" number_of_input_symbols: "
318 << number_of_input_symbols << std::endl
319 <<
" size_of_transition_index_table: "
320 << size_of_transition_index_table << std::endl
321 <<
" size_of_transition_target_table: "
322 << size_of_transition_target_table << std::endl
323 <<
" number_of_states: "
324 << number_of_states << std::endl
325 <<
" number_of_transitions: "
326 << number_of_transitions << std::endl
328 << weighted << std::endl
329 <<
" deterministic: "
330 << deterministic << std::endl
331 <<
" input_deterministic: "
332 << input_deterministic << std::endl
334 << minimized << std::endl
336 << cyclic << std::endl
337 <<
" has_epsilon_epsilon_transitions: "
338 << has_epsilon_epsilon_transitions << std::endl
339 <<
" has_input_epsilon_transitions: "
340 << has_input_epsilon_transitions << std::endl
341 <<
" has_input_epsilon_cycles: "
342 << has_input_epsilon_cycles << std::endl
343 <<
" has_unweighted_input_epsilon_cycles: "
344 << has_unweighted_input_epsilon_cycles << std::endl;
347 void write(std::ostream& os)
const
349 write_property(number_of_input_symbols, os);
350 write_property(number_of_symbols, os);
351 write_property(size_of_transition_index_table, os);
352 write_property(size_of_transition_target_table, os);
353 write_property(number_of_states, os);
354 write_property(number_of_transitions, os);
355 write_bool_property(weighted, os);
356 write_bool_property(deterministic, os);
357 write_bool_property(input_deterministic, os);
358 write_bool_property(minimized, os);
359 write_bool_property(cyclic, os);
360 write_bool_property(has_epsilon_epsilon_transitions, os);
361 write_bool_property(has_input_epsilon_transitions, os);
362 write_bool_property(has_input_epsilon_cycles, os);
363 write_bool_property(has_unweighted_input_epsilon_cycles, os);
366 friend class ConvertTransducerHeader;
369 class TransducerAlphabet
372 SymbolTable symbol_table;
373 hfst::FdTable<SymbolNumber> fd_table;
374 SymbolNumber unknown_symbol;
375 SymbolNumber default_symbol;
376 SymbolNumber identity_symbol;
377 SymbolNumber orig_symbol_count;
382 symbol_table.push_back(
"@_EPSILON_SYMBOL_@");
383 unknown_symbol = NO_SYMBOL_NUMBER;
384 default_symbol = NO_SYMBOL_NUMBER;
385 identity_symbol = NO_SYMBOL_NUMBER;
386 orig_symbol_count = 1;
388 TransducerAlphabet(std::istream& is,
389 SymbolNumber symbol_count,
390 bool preserve_diacritic_strings =
true);
391 TransducerAlphabet(
const SymbolTable& st);
393 void display()
const;
395 void write(std::ostream& os)
const
397 for(SymbolTable::const_iterator i = symbol_table.begin();
398 i != symbol_table.end(); i++)
405 bool has_flag_diacritics()
const
406 {
return fd_table.num_features() > 0; }
407 bool is_flag_diacritic(SymbolNumber symbol)
const
408 {
return fd_table.is_diacritic(symbol); }
409 bool is_like_epsilon(SymbolNumber symbol)
const;
411 const SymbolTable& get_symbol_table()
const
412 {
return symbol_table; }
414 const std::string string_from_symbol(
const SymbolNumber symbol)
const
416 {
return (symbol == 0) ?
"" : symbol_table[symbol]; }
418 SymbolNumber symbol_from_string(
const std::string symbol_string)
const;
419 StringSymbolMap build_string_symbol_map(
void)
const;
420 const hfst::FdTable<SymbolNumber>& get_fd_table()
const
422 const hfst::FdOperation * get_operation(SymbolNumber symbol)
const
424 return fd_table.get_operation(symbol);
426 SymbolNumber get_unknown_symbol(
void)
const
427 {
return unknown_symbol; }
428 SymbolNumber get_default_symbol(
void)
const
429 {
return default_symbol; }
430 SymbolNumber get_identity_symbol(
void)
const
431 {
return identity_symbol; }
432 SymbolNumber get_orig_symbol_count(
void)
const
433 {
return orig_symbol_count; }
434 virtual void add_symbol(
char * symbol);
435 virtual void add_symbol(
const std::string & symbol);
440 class TransitionIndex
443 SymbolNumber input_symbol;
444 TransitionTableIndex first_transition_index;
446 static const size_t size =
447 sizeof(SymbolNumber) +
sizeof(TransitionTableIndex);
448 TransitionIndex(): input_symbol(NO_SYMBOL_NUMBER),
449 first_transition_index(NO_TABLE_INDEX) {}
450 TransitionIndex(SymbolNumber input,
451 TransitionTableIndex first_transition):
452 input_symbol(input), first_transition_index(first_transition) {}
454 TransitionIndex(std::istream& is):
455 input_symbol(NO_SYMBOL_NUMBER), first_transition_index(0)
457 is.read(reinterpret_cast<char*>(&input_symbol),
458 sizeof(SymbolNumber));
459 is.read(reinterpret_cast<char*>(&first_transition_index),
460 sizeof(TransitionTableIndex));
463 TransitionIndex(
char * p):
464 input_symbol(*((SymbolNumber*) p)),
465 first_transition_index((*(TransitionTableIndex*)
466 (p + sizeof(SymbolNumber)))) {}
467 virtual ~TransitionIndex() {}
469 void write(std::ostream& os,
bool weighted)
const
471 os.write(reinterpret_cast<const char*>(&input_symbol),
472 sizeof(SymbolNumber));
473 if(!weighted && input_symbol == NO_SYMBOL_NUMBER &&
474 first_transition_index != NO_TABLE_INDEX) {
476 unsigned int unweighted_final_index = 1;
477 os.write(reinterpret_cast<const char*>(&unweighted_final_index),
478 sizeof(first_transition_index));
480 os.write(reinterpret_cast<const char*>(
481 &first_transition_index),
482 sizeof(first_transition_index));
486 void display()
const;
488 TransitionTableIndex get_target(
void)
const
489 {
return first_transition_index; }
490 SymbolNumber get_input_symbol(
void)
const
491 {
return input_symbol; }
493 bool matches(
const SymbolNumber s)
const;
494 virtual bool final(void)
const;
495 virtual Weight final_weight(
void)
const {
return 0.0; }
497 static TransitionIndex create_final()
498 {
return TransitionIndex(NO_SYMBOL_NUMBER, 1); }
501 class TransitionWIndex :
public TransitionIndex
504 TransitionWIndex(): TransitionIndex() {}
505 TransitionWIndex(SymbolNumber input,
506 TransitionTableIndex first_transition):
507 TransitionIndex(input, first_transition) {}
508 TransitionWIndex(std::istream& is):
509 TransitionIndex(is) {}
510 TransitionWIndex(
char * p):
511 TransitionIndex(p) {}
513 Weight final_weight(
void)
const;
515 static TransitionWIndex create_final()
516 {
return TransitionWIndex(NO_SYMBOL_NUMBER, 0); }
518 static TransitionWIndex create_final(Weight w)
522 TransitionTableIndex i;
526 return TransitionWIndex(NO_SYMBOL_NUMBER, weight.i);
533 SymbolNumber input_symbol;
534 SymbolNumber output_symbol;
535 TransitionTableIndex target_index;
537 static const size_t size = 2 *
sizeof(SymbolNumber) +
538 sizeof(TransitionTableIndex);
539 Transition(SymbolNumber input, SymbolNumber output,
540 TransitionTableIndex target, Weight bogus=0.0f):
541 input_symbol(input), output_symbol(output), target_index(target)
542 {(void)bogus; bogus=0.0f;}
543 Transition(
bool final, Weight bogus=0.0f):
544 input_symbol(NO_SYMBOL_NUMBER), output_symbol(NO_SYMBOL_NUMBER),
545 target_index(final?1:NO_TABLE_INDEX) {(void)bogus; bogus=0.0f;}
546 Transition(std::istream& is):
547 input_symbol(NO_SYMBOL_NUMBER), output_symbol(NO_SYMBOL_NUMBER),
550 is.read(reinterpret_cast<char*>(&input_symbol),
551 sizeof(SymbolNumber));
552 is.read(reinterpret_cast<char*>(&output_symbol),
553 sizeof(SymbolNumber));
554 is.read(reinterpret_cast<char*>(&target_index),
555 sizeof(target_index));
558 Transition(
char * p):
559 input_symbol(*(SymbolNumber*) p),
560 output_symbol(*(SymbolNumber*) (p + sizeof(SymbolNumber))),
561 target_index(*(TransitionTableIndex*) (p + 2 * sizeof(SymbolNumber)))
564 virtual ~Transition() {}
566 virtual void write(std::ostream& os,
bool weighted)
const
568 os.write(reinterpret_cast<const char*>(&input_symbol),
569 sizeof(input_symbol));
570 os.write(reinterpret_cast<const char*>(&output_symbol),
571 sizeof(output_symbol));
572 os.write(reinterpret_cast<const char*>(&target_index),
573 sizeof(target_index));
579 virtual void display()
const;
581 TransitionTableIndex get_target(
void)
const {
return target_index;}
582 SymbolNumber get_output_symbol(
void)
const {
return output_symbol;}
583 SymbolNumber get_input_symbol(
void)
const {
return input_symbol;}
585 bool matches(
const SymbolNumber s)
const;
586 virtual bool final(void)
const;
587 virtual Weight get_weight(
void)
const {
return 0.0; }
590 class TransitionW :
public Transition
593 Weight transition_weight;
595 static const size_t size = 2 *
sizeof(SymbolNumber) +
596 sizeof(TransitionTableIndex) +
sizeof(Weight);
598 TransitionW(SymbolNumber input, SymbolNumber output,
599 TransitionTableIndex target, Weight w):
600 Transition(input, output, target), transition_weight(w) {}
601 TransitionW(
bool final, Weight w):
602 Transition(final), transition_weight(w) {}
603 TransitionW(std::istream& is): Transition(is), transition_weight(0.0f)
604 {is.read(reinterpret_cast<char*>(&transition_weight),
sizeof(Weight));}
605 TransitionW(
char * p):
607 transition_weight(*((Weight*)
608 (p + 2 * sizeof(SymbolNumber)
609 + sizeof(TransitionTableIndex))))
612 void write(std::ostream& os,
bool weighted)
const
614 Transition::write(os,
false);
616 os.write(reinterpret_cast<const char*>(&transition_weight),
617 sizeof(transition_weight));
621 void display()
const;
623 Weight get_weight(
void)
const {
return transition_weight; }
628 class TransducerTable
631 std::vector<T> table;
633 TransducerTable(): table() {}
634 TransducerTable(
size_t size,
const T& entry): table(size, entry) {}
636 std::istream& is, TransitionTableIndex index_count): table()
638 char * p = (
char*) malloc(T::size * index_count);
639 is.read(p, T::size * index_count);
642 table.push_back(T(p));
648 TransducerTable(
const TransducerTable& t): table(t.table) {}
650 void append(
const T& v) {table.push_back(v);}
651 void set(
size_t index,
const T& v) {table[index] = v;}
653 const T& operator[](TransitionTableIndex i)
const
655 return (i < TRANSITION_TARGET_TABLE_START) ?
656 table[i] : table[i-TRANSITION_TARGET_TABLE_START];
659 void display(
bool transition_table)
const
661 for(
size_t i=0;i<table.size();i++)
665 std::cout <<
"/" << i+TRANSITION_TARGET_TABLE_START;
671 unsigned int size()
const {
return table.size();}
674 class TransducerTablesInterface
677 virtual ~TransducerTablesInterface() {}
679 virtual const TransitionIndex& get_index(
680 TransitionTableIndex i)
const = 0;
681 virtual const Transition& get_transition(
682 TransitionTableIndex i)
const = 0;
683 virtual Weight get_weight(
684 TransitionTableIndex i)
const = 0;
685 virtual SymbolNumber get_transition_input(
686 TransitionTableIndex i)
const = 0;
687 virtual SymbolNumber get_transition_output(
688 TransitionTableIndex i)
const = 0;
689 virtual TransitionTableIndex get_transition_target(
690 TransitionTableIndex i)
const = 0;
691 virtual bool get_transition_finality(
692 TransitionTableIndex i)
const = 0;
693 virtual SymbolNumber get_index_input(
694 TransitionTableIndex i)
const = 0;
695 virtual TransitionTableIndex get_index_target(
696 TransitionTableIndex i)
const = 0;
697 virtual bool get_index_finality(
698 TransitionTableIndex i)
const = 0;
699 virtual Weight get_final_weight(
700 TransitionTableIndex i)
const = 0;
702 virtual void display()
const {}
705 template <
class T1,
class T2>
706 class TransducerTables :
public TransducerTablesInterface
709 TransducerTable<T1> index_table;
710 TransducerTable<T2> transition_table;
712 TransducerTables(std::istream& is, TransitionTableIndex index_table_size,
713 TransitionTableIndex transition_table_size):
715 is, index_table_size),
716 transition_table(is, transition_table_size) { }
718 TransducerTables(): index_table(1, T1::create_final()),
719 transition_table() {}
720 TransducerTables(
const TransducerTable<T1>& index_table,
721 const TransducerTable<T2>& transition_table):
722 index_table(index_table), transition_table(transition_table) {}
724 const TransitionIndex& get_index(TransitionTableIndex i)
const
725 {
return index_table[i];}
726 const Transition& get_transition(TransitionTableIndex i)
const
727 {
return transition_table[i];}
728 Weight get_weight(TransitionTableIndex i)
const
729 {
return transition_table[i].get_weight(); }
730 SymbolNumber get_transition_input(TransitionTableIndex i)
const
731 {
return transition_table[i].get_input_symbol(); }
732 SymbolNumber get_transition_output(TransitionTableIndex i)
const
733 {
return transition_table[i].get_output_symbol(); }
734 TransitionTableIndex get_transition_target(TransitionTableIndex i)
const
735 {
return transition_table[i].get_target(); }
736 bool get_transition_finality(TransitionTableIndex i)
const
737 {
return transition_table[i].final(); }
738 SymbolNumber get_index_input(TransitionTableIndex i)
const
739 {
return index_table[i].get_input_symbol(); }
740 TransitionTableIndex get_index_target(TransitionTableIndex i)
const
741 {
return index_table[i].get_target(); }
742 bool get_index_finality(TransitionTableIndex i)
const
743 {
return index_table[i].final(); }
744 Weight get_final_weight(TransitionTableIndex i)
const
745 {
return index_table[i].final_weight(); }
750 std::cout <<
"Transition index table:" << std::endl;
751 index_table.display(
false);
752 std::cout <<
"Transition table:" << std::endl;
753 transition_table.display(
true);
761 typedef std::vector<OlLetterTrie*> OlLetterTrieVector;
766 OlLetterTrieVector letters;
767 SymbolNumberVector symbols;
771 letters(UCHAR_MAX, static_cast<OlLetterTrie*>(NULL)),
772 symbols(UCHAR_MAX,NO_SYMBOL_NUMBER)
776 for (
size_t i=0 ; i<letters.size() ; ++i) {
782 void add_string(
const char * p,SymbolNumber symbol_key);
783 bool has_key_starting_with(
const char c)
const;
785 SymbolNumber find_key(
char ** p);
792 SymbolNumber number_of_input_symbols;
793 OlLetterTrie letters;
794 SymbolNumberVector ascii_symbols;
796 void read_input_symbols(
const SymbolTable & kt);
797 void read_input_symbol(
const char * symbol,
const int symbol_number);
800 Encoder(
const SymbolTable & st, SymbolNumber input_symbol_count):
801 number_of_input_symbols(input_symbol_count),
802 ascii_symbols(128, NO_SYMBOL_NUMBER)
804 read_input_symbols(st);
807 SymbolNumber find_key(
char ** p);
809 friend class Transducer;
810 friend class PmatchContainer;
815 class Tape:
public SymbolNumberVector
818 void write(
unsigned int i, SymbolNumber s)
820 if (this->size() > i) {
821 this->operator[](i) = s;
823 while (this->size() <= i) {
824 this->push_back(NO_SYMBOL_NUMBER);
826 this->operator[](i) = s;
836 TransducerHeader* header;
837 TransducerAlphabet* alphabet;
838 TransducerTablesInterface* tables;
839 void load_tables(std::istream& is);
842 Weight current_weight;
843 HfstOneLevelPaths * lookup_paths;
847 hfst::FdState<SymbolNumber> flag_state;
849 bool found_transition;
852 TraversalStates traversal_states;
855 unsigned int recursion_depth_left;
859 void try_epsilon_transitions(
unsigned int input_tape_pos,
860 unsigned int output_tape_pos,
861 TransitionTableIndex i);
863 void try_epsilon_indices(
unsigned int input_tape_pos,
864 unsigned int output_tape_pos,
865 TransitionTableIndex i);
867 void find_transitions(SymbolNumber input,
868 unsigned int input_tape_pos,
869 unsigned int output_tape_pos,
870 TransitionTableIndex i);
872 void find_index(SymbolNumber input,
873 unsigned int input_tape_pos,
874 unsigned int output_tape_pos,
875 TransitionTableIndex i);
877 void get_analyses(
unsigned int input_tape_pos,
878 unsigned int output_tape_pos,
879 TransitionTableIndex i);
881 void find_loop_epsilon_transitions(
unsigned int input_pos,
882 TransitionTableIndex i);
883 void find_loop_epsilon_indices(
unsigned int input_pos,
884 TransitionTableIndex i);
885 void find_loop_transitions(SymbolNumber input,
886 unsigned int input_pos,
887 TransitionTableIndex i);
888 void find_loop_index(SymbolNumber input,
889 unsigned int input_pos,
890 TransitionTableIndex i);
891 void find_loop(
unsigned int input_pos,
892 TransitionTableIndex i);
901 const TransducerAlphabet& alphabet,
902 const TransducerTable<TransitionIndex>& index_table,
903 const TransducerTable<Transition>& transition_table);
905 const TransducerAlphabet& alphabet,
906 const TransducerTable<TransitionWIndex>& index_table,
907 const TransducerTable<TransitionW>& transition_table);
910 void write(std::ostream& os)
const;
912 void display()
const;
914 const TransducerHeader& get_header()
const
916 const TransducerAlphabet& get_alphabet()
const
917 {
return *alphabet; }
918 const Encoder& get_encoder(
void)
const
920 const hfst::FdTable<SymbolNumber>& get_fd_table()
const
921 {
return alphabet->get_fd_table(); }
922 const SymbolTable& get_symbol_table()
const
923 {
return alphabet->get_symbol_table(); }
926 const TransitionIndex& get_index(TransitionTableIndex i)
const
927 {
return tables->get_index(i); }
928 const Transition& get_transition(TransitionTableIndex i)
const
929 {
return tables->get_transition(i); }
930 bool final_index(TransitionTableIndex i)
const
932 if (indexes_transition_table(i)) {
933 return tables->get_transition_finality(i);
935 return tables->get_index_finality(i);
938 bool is_infinitely_ambiguous(
void)
const
940 return header->probe_flag(Has_input_epsilon_cycles);
943 bool is_lookup_infinitely_ambiguous(
const StringVector & s);
944 bool is_lookup_infinitely_ambiguous(
const std::string & input);
946 TransducerTable<TransitionWIndex> copy_windex_table();
947 TransducerTable<TransitionW> copy_transitionw_table();
948 TransducerTable<TransitionIndex> copy_index_table();
949 TransducerTable<Transition> copy_transition_table();
957 TransitionTableIndexSet get_transitions_from_state(
958 TransitionTableIndex state_index)
const;
961 bool initialize_input(
const char * input_str);
962 HfstOneLevelPaths * lookup_fd(
const StringVector & s, ssize_t limit = -1,
963 double time_cutoff = 0.0);
968 HfstOneLevelPaths * lookup_fd(
const std::string & s, ssize_t limit = -1,
969 double time_cutoff = 0.0);
970 HfstOneLevelPaths * lookup_fd(
const char * s, ssize_t limit = -1,
971 double time_cutoff = 0.0);
972 void note_analysis(
void);
975 SymbolNumber get_unknown_symbol(
void)
const
976 {
return alphabet->get_unknown_symbol(); }
977 StringSymbolMap get_string_symbol_map(
void)
const
978 {
return alphabet->build_string_symbol_map(); }
979 STransition take_epsilons(
const TransitionTableIndex i)
const;
980 STransition take_epsilons_and_flags(
const TransitionTableIndex i);
981 STransition take_non_epsilons(
const TransitionTableIndex i,
982 const SymbolNumber symbol)
const;
983 TransitionTableIndex next(
const TransitionTableIndex i,
984 const SymbolNumber symbol)
const;
985 TransitionTableIndex next_e(
const TransitionTableIndex i)
const;
986 bool has_transitions(
const TransitionTableIndex i,
987 const SymbolNumber symbol)
const;
988 bool has_epsilons_or_flags(
const TransitionTableIndex i);
989 Weight final_weight(
const TransitionTableIndex i)
const;
990 bool is_flag(
const SymbolNumber symbol)
991 {
return alphabet->is_flag_diacritic(symbol); }
992 bool is_weighted(
void)
993 {
return header->probe_flag(Weighted);}
996 friend class ConvertTransducer;
1001 TransitionTableIndex index;
1002 SymbolNumber symbol;
1005 STransition(TransitionTableIndex i,
1012 STransition(TransitionTableIndex i,
1022 typedef std::pair<std::string, Weight> StringWeightPair;
1024 class StringWeightComparison
1031 StringWeightComparison(
bool reverse_result=
false):
1032 reverse(reverse_result)
1035 bool operator() (StringWeightPair lhs, StringWeightPair rhs)
1038 return (lhs.second < rhs.second);
1040 return (lhs.second > rhs.second);
1045 typedef std::priority_queue<StringWeightPair,
1046 std::vector<StringWeightPair>,
1047 StringWeightComparison> CorrectionQueue;
1048 typedef std::priority_queue<StringWeightPair,
1049 std::vector<StringWeightPair>,
1050 StringWeightComparison> AnalysisQueue;
1051 typedef std::priority_queue<StringWeightPair,
1052 std::vector<StringWeightPair>,
1053 StringWeightComparison> HyphenationQueue;
1138 SymbolNumberVector string;
1139 unsigned int input_state;
1140 TransitionTableIndex mutator_state;
1141 TransitionTableIndex lexicon_state;
1142 hfst::FdState<SymbolNumber> flag_state;
1145 TreeNode(SymbolNumberVector prev_string,
1147 TransitionTableIndex mutator,
1148 TransitionTableIndex lexicon,
1149 hfst::FdState<SymbolNumber> state,
1151 string(prev_string),
1153 mutator_state(mutator),
1154 lexicon_state(lexicon),
1159 TreeNode(hfst::FdState<SymbolNumber> start_state):
1160 string(SymbolNumberVector()),
1164 flag_state(start_state),
1168 TreeNode update_lexicon(SymbolNumber next_symbol,
1169 TransitionTableIndex next_lexicon,
1172 TreeNode update_mutator(SymbolNumber next_symbol,
1173 TransitionTableIndex next_mutator,
1176 void increment_mutator(
void);
1178 TreeNode update(SymbolNumber next_symbol,
1179 unsigned int next_input,
1180 TransitionTableIndex next_mutator,
1181 TransitionTableIndex next_lexicon,
1184 TreeNode update(SymbolNumber next_symbol,
1185 TransitionTableIndex next_mutator,
1186 TransitionTableIndex next_lexicon,
1192 typedef std::deque<TreeNode> TreeNodeQueue;
1194 int nByte_utf8(
unsigned char c);
1200 SymbolNumberVector s;
1204 s(SymbolNumberVector())
1207 bool initialize(
const Encoder & encoder,
char * input, SymbolNumber other);
1209 unsigned int len(
void)
1214 SymbolNumber operator[](
unsigned int i)
1221 class AlphabetTranslationException:
public std::runtime_error
1225 AlphabetTranslationException(
const std::string what):
1226 std::runtime_error(what)
1239 TreeNodeQueue queue;
1240 SymbolNumberVector alphabet_translator;
1242 std::vector<std::string> symbol_table;
1245 mutator(mutator_ptr),
1246 lexicon(lexicon_ptr),
1247 input(InputString()),
1248 queue(TreeNodeQueue()),
1249 alphabet_translator(SymbolNumberVector()),
1251 symbol_table(lexicon->get_symbol_table())
1253 build_alphabet_translator();
1256 bool init_input(
char * str,
const Encoder & encoder, SymbolNumber other);
1258 void build_alphabet_translator(
void);
1259 void lexicon_epsilons(
void);
1260 void mutator_epsilons(
void);
1261 void consume_input(
void);
1262 void lexicon_consume(
void);
1265 bool check(
char * line);
1268 CorrectionQueue
correct(
char * line);
1269 std::string stringify(SymbolNumberVector symbol_vector);
A compiled transducer format, suitable for fast lookup operations.
Definition: transducer.h:833
#define HFST_THROW(E)
Macro to throw an exception of type E. Use THROW instead of regular throw with subclasses of HfstExce...
Definition: HfstExceptionDefs.h:40
A spellchecker, constructed from two optimized-lookup transducer instances. An alphabet translator is...
Definition: transducer.h:1233
std::set< HfstOneLevelPath > HfstOneLevelPaths
A set of simple paths.
Definition: HfstDataTypes.h:100
std::pair< float, StringVector > HfstOneLevelPath
A path of one level of arcs with collected weight.
Definition: HfstDataTypes.h:96
CorrectionQueue correct(char *line)
Definition: ospell.cc:290
bool check(char *line)
Definition: ospell.cc:334
Transducer has wrong type.
Definition: HfstExceptionDefs.h:400