HFST - Helsinki Finite-State Transducer Technology - C++ API  version 3.9.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HfstTransitionGraph.h
Go to the documentation of this file.
1 // Copyright (c) 2016 University of Helsinki
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 3 of the License, or (at your option) any later version.
7 // See the file COPYING included with this distribution for more
8 // information.
9 
10  #ifndef _HFST_TRANSITION_GRAPH_H_
11  #define _HFST_TRANSITION_GRAPH_H_
12 
16  #include <cstdio>
17  #include <string>
18  #include <set>
19  #include <cassert>
20  #include <iostream>
21  #include <algorithm>
22  #include <stack>
23 
24  #include "../HfstSymbolDefs.h"
25  #include "../HfstExceptionDefs.h"
26  #include "../HfstDataTypes.h"
27  #include "../HarmonizeUnknownAndIdentitySymbols.h"
28  #include "../HfstFlagDiacritics.h"
29  #include "../HfstEpsilonHandler.h"
30  #include "ConvertTransducerFormat.h"
31  #include "HfstTransition.h"
32  #include "HfstTropicalTransducerTransitionData.h"
33 //#include "HfstFastTransitionData.h"
34 
35  #include "../hfstdll.h"
36 
37  namespace hfst {
38 
47  namespace implementations {
48 
50  typedef unsigned int HfstState;
51 
52  typedef std::pair<HfstState, std::vector<std::pair<std::string, std::string> > > HfstReplacement;
53  typedef std::vector<HfstReplacement> HfstReplacements;
54  typedef std::map<HfstState, HfstReplacements > HfstReplacementsMap;
55 
56  typedef std::vector<std::vector<hfst::implementations::HfstBasicTransition> > HfstBasicStates;
57 
125  template <class C> class HfstTransitionGraph
126  {
127 
128  // --- Datatypes and variables ---
129 
130  public:
132  typedef typename C::SymbolType HfstSymbol;
134  typedef std::pair<HfstSymbol, HfstSymbol>
137  typedef std::set<HfstSymbolPair> HfstSymbolPairSet;
139  typedef std::set<HfstSymbol> HfstSymbolSet;
141  typedef std::vector<HfstSymbolPair> HfstSymbolPairVector;
143  typedef std::set<HfstSymbol> HfstTransitionGraphAlphabet;
144 
146  typedef std::vector<HfstTransition<C> > HfstTransitions;
147 
148  /* Datatype for the states of a graph and their transitions.
149  Each index of the vector is a state and the transitions
150  on that index are the transitions of that state. */
151  typedef std::vector<HfstTransitions> HfstStates;
152 
153  /* States of the graph and their transitions. */
154  HfstStates state_vector;
155 
156  protected:
157  /* The initial state number. */
158  static const HfstState INITIAL_STATE = 0;
159 
160  /* Datatype for the final states and their weights in a graph. */
161  typedef std::map<HfstState,typename C::WeightType> FinalWeightMap;
162  /* The final states and their weights in the graph. */
163  FinalWeightMap final_weight_map;
164 
165  /* The alphabet of the graph. */
167 
168  /* Used by substitute function. */
169  typedef unsigned int HfstNumber;
170  typedef std::vector<HfstNumber> HfstNumberVector;
171  typedef std::pair<HfstNumber, HfstNumber> HfstNumberPair;
172  typedef std::map<HfstNumberPair, HfstNumberPair> HfstNumberPairSubstitutions;
173 
174  protected:
175  /* @brief An iterator type that points to a state in a graph.
176 
177  The value pointed by the iterator is of type HfstTransitions. */
178  typedef typename HfstStates::iterator iterator;
179 
180  public:
184  typedef typename HfstStates::const_iterator const_iterator;
185 
187  std::string name;
188 
190  std::vector<HfstState> states() const {
191  std::vector<HfstState> retval(this->get_max_state()+1, 0);
192  for (unsigned int i=0; i<(this->get_max_state()+1); i++)
193  retval[i] = i;
194  return retval;
195  }
196 
198  HfstBasicStates states_and_transitions() const {
199  return state_vector;
200  }
201 
202  // --------------------------------------------------------
203  // --- Construction, assignment, copying and conversion ---
204  // --------------------------------------------------------
205 
208  HFSTDLL HfstTransitionGraph(void) {
209  initialize_alphabet(alphabet);
210  HfstTransitions tr;
211  state_vector.push_back(tr);
212  }
213 
214  HFSTDLL HfstTransitionGraph(FILE *file) {
215  initialize_alphabet(alphabet);
216  HfstTransitions tr;
217  state_vector.push_back(tr);
218  unsigned int linecount=0;
219  this->assign(read_in_att_format(file, "@0@", linecount));
220  }
221 
222 
225  {
226  if (this == &graph)
227  return *this;
228  state_vector = graph.state_vector;
229  final_weight_map = graph.final_weight_map;
230  alphabet = graph.alphabet;
231  assert(alphabet.count(HfstSymbol()) == 0);
232  return *this;
233  }
234 
235  HFSTDLL HfstTransitionGraph &assign(const HfstTransitionGraph &graph)
236  {
237  return this->operator=(graph);
238  }
239 
241  HFSTDLL HfstTransitionGraph(const HfstTransitionGraph &graph) {
242  state_vector = graph.state_vector;
243  final_weight_map = graph.final_weight_map;
244  alphabet = graph.alphabet;
245  assert(alphabet.count(HfstSymbol()) == 0);
246  }
247 
250  HFSTDLL HfstTransitionGraph(const hfst::HfstTransducer &transducer) {
252  *fsm = ConversionFunctions::
253  hfst_transducer_to_hfst_basic_transducer(transducer);
254  state_vector = fsm->state_vector;
255  final_weight_map = fsm->final_weight_map;
256  alphabet = fsm->alphabet;
257  delete fsm;
258  }
259 
260 
261  // --------------------------------------------------
262  // --- Initialization, optimization and debugging ---
263  // --------------------------------------------------
264 
265  protected:
266  /* Add epsilon, unknown and identity symbols to the alphabet
267  \a alpha. */
268  void initialize_alphabet(HfstTransitionGraphAlphabet &alpha) {
269  alpha.insert(C::get_epsilon());
270  alpha.insert(C::get_unknown());
271  alpha.insert(C::get_identity());
272  }
273 
274  /* Check that all symbols that occur in the transitions of the graph
275  are also in the alphabet. */
276  bool check_alphabet()
277  {
278  for (iterator it = begin(); it != end(); it++)
279  {
280  for (typename HfstTransitions::iterator tr_it
281  = it->begin();
282  tr_it != it->end(); tr_it++)
283  {
284  C data = tr_it->get_transition_data();
285 
286  if(alphabet.find(data.get_input_symbol())
287  == alphabet.end()) {
288  return false;
289  }
290  if(alphabet.find(data.get_output_symbol())
291  == alphabet.end()) {
292  return false;
293  }
294  }
295  }
296  return true;
297  }
298 
299  public:
300  /* Print the alphabet of the graph to standard error stream. */
301  HFSTDLL void print_alphabet() const
302  {
303  for (typename HfstTransitionGraphAlphabet::const_iterator it
304  = alphabet.begin(); it != alphabet.end(); it++)
305  {
306  if (it != alphabet.begin())
307  std::cerr << ", ";
308  std::cerr << *it;
309  }
310  std::cerr << std::endl;
311  }
312 
313  protected:
314  /* Get the number of the \a symbol. */
315  unsigned int get_symbol_number
316  (const HfstSymbol &symbol) const {
317  return C::get_number(symbol);
318  }
319 
320  /* For internal optimization: Reserve space for
321  \a number_of_states states. */
322  void initialize_state_vector
323  (unsigned int number_of_states)
324  {
325  state_vector.reserve(number_of_states);
326  }
327 
328  /* For internal optimization: Reserve space for
329  \a number_of_transitions transitions for state number
330  \a state_number. */
331  void initialize_transition_vector
332  (unsigned int state_number, unsigned int number_of_transitions)
333  {
334  add_state(state_number);
335  state_vector[state_number].reserve(number_of_transitions);
336  }
337 
338 
339  // -----------------------------------
340  // ---------- The alphabet -----------
341  // -----------------------------------
342 
343  public:
348  HFSTDLL void add_symbol_to_alphabet(const HfstSymbol &symbol) {
349  alphabet.insert(symbol);
350  }
351 
356  HFSTDLL void remove_symbol_from_alphabet(const HfstSymbol &symbol) {
357  alphabet.erase(symbol);
358  }
359 
360  HFSTDLL void remove_symbols_from_alphabet(const HfstSymbolSet &symbols) {
361  for (typename HfstSymbolSet::const_iterator it = symbols.begin();
362  it != symbols.end(); it++)
363  {
364  alphabet.erase(*it);
365  }
366  }
367 
370  HFSTDLL void add_symbols_to_alphabet(const HfstSymbolSet &symbols)
371  {
372  for (typename HfstSymbolSet::const_iterator it = symbols.begin();
373  it != symbols.end(); it++)
374  {
375  alphabet.insert(*it);
376  }
377  }
378 
379  HFSTDLL void add_symbols_to_alphabet(const HfstSymbolPairSet &symbols)
380  {
381  for (typename HfstSymbolPairSet::const_iterator it = symbols.begin();
382  it != symbols.end(); it++)
383  {
384  alphabet.insert(it->first);
385  alphabet.insert(it->second);
386  }
387  }
388 
389  /* Remove all symbols that are given in \a symbols but do not occur
390  in transitions of the graph from its alphabet. */
391  HFSTDLL void prune_alphabet_after_substitution(const std::set<unsigned int> &symbols)
392  {
393  if (symbols.size() == 0)
394  return;
395 
396  std::vector<bool> symbols_found;
397  symbols_found.resize
398  (C::get_max_number()+1, false);
399 
400  // Go through all transitions
401  for (iterator it = begin(); it != end(); it++)
402  {
403  for (typename HfstTransitions::iterator tr_it
404  = it->begin();
405  tr_it != it->end(); tr_it++)
406  {
407  const C & data = tr_it->get_transition_data();
408  symbols_found.at(data.get_input_number()) = true;
409  symbols_found.at(data.get_output_number()) = true;
410  }
411  }
412 
413  // Remove symbols in \a symbols from the alphabet if they did not
414  // occur in any transitions
415  for (std::set<unsigned int>::const_iterator it = symbols.begin();
416  it != symbols.end(); it++)
417  {
418  if (! symbols_found.at(*it))
419  alphabet.erase(C::get_symbol(*it));
420  }
421 
422  }
423 
424  HFSTDLL HfstTransitionGraphAlphabet symbols_used()
425  {
427  for (iterator it = begin(); it != end(); it++)
428  {
429  for (typename HfstTransitions::iterator tr_it
430  = it->begin();
431  tr_it != it->end(); tr_it++)
432  {
433  C data = tr_it->get_transition_data();
434 
435  retval.insert(data.get_input_symbol());
436  retval.insert(data.get_output_symbol());
437  }
438  }
439  return retval;
440  }
441 
450  HFSTDLL void prune_alphabet(bool force=true) {
451 
452  // Which symbols occur in the graph
453  HfstTransitionGraphAlphabet symbols_found = symbols_used();
454 
455  // Whether unknown or identity symbols are used
456  bool unknowns_or_identities_used =
457  ( (symbols_found.find("@_UNKNOWN_SYMBOL_@") != symbols_found.end()) ||
458  (symbols_found.find("@_IDENTITY_SYMBOL_@") != symbols_found.end()) );
459 
460  // We cannot prune the transducer because unknowns or identities
461  // are used in its transitions.
462  if (!force && unknowns_or_identities_used)
463  return;
464 
465  // Special symbols are always known
466  symbols_found.insert("@_EPSILON_SYMBOL_@");
467  symbols_found.insert("@_UNKNOWN_SYMBOL_@");
468  symbols_found.insert("@_IDENTITY_SYMBOL_@");
469 
470  // Which symbols in the graph's alphabet did not occur in
471  // the graph
472  HfstTransitionGraphAlphabet symbols_not_found;
473 
474  for (typename HfstTransitionGraphAlphabet::iterator it
475  = alphabet.begin();
476  it != alphabet.end(); it++)
477  {
478  if (symbols_found.find(*it) == symbols_found.end())
479  symbols_not_found.insert(*it);
480  }
481 
482  // Remove the symbols that did not occur in the graph
483  // from its alphabet
484  for (typename HfstTransitionGraphAlphabet::iterator it
485  = symbols_not_found.begin();
486  it != symbols_not_found.end(); it++)
487  {
488  alphabet.erase(*it);
489  }
490  }
491 
498  HFSTDLL const HfstTransitionGraphAlphabet &get_alphabet() const {
499  return alphabet;
500  }
501 
502  HFSTDLL StringPairSet get_transition_pairs() const {
503 
504  StringPairSet retval;
505  for (const_iterator it = begin(); it != end(); it++)
506  {
507  for (typename HfstTransitions::const_iterator tr_it
508  = it->begin();
509  tr_it != it->end(); tr_it++)
510  {
511  C data = tr_it->get_transition_data();
512 
513  retval.insert(StringPair(data.get_input_symbol(),
514  data.get_output_symbol()));
515  }
516  }
517  return retval;
518  }
519 
520 
521  // ----------------------------------------------------------------
522  // --- Adding states and transitions and iterating through them ---
523  // ----------------------------------------------------------------
524 
528  HFSTDLL HfstState add_state(void) {
529  HfstTransitions tr;
530  state_vector.push_back(tr);
531  return state_vector.size()-1;
532  }
533 
541  while(state_vector.size() <= s) {
542  HfstTransitions tr;
543  state_vector.push_back(tr);
544  }
545  return s;
546  }
547 
549  HFSTDLL HfstState get_max_state() const {
550  return state_vector.size()-1;
551  }
552 
556  HFSTDLL void add_transition(HfstState s, const HfstTransition<C> & transition,
557  bool add_symbols_to_alphabet=true) {
558 
559  C data = transition.get_transition_data();
560 
561  add_state(s);
562  add_state(transition.get_target_state());
564  alphabet.insert(data.get_input_symbol());
565  alphabet.insert(data.get_output_symbol());
566  }
567  state_vector[s].push_back(transition);
568  }
569 
576  HFSTDLL void remove_transition(HfstState s, const HfstTransition<C> & transition,
577  bool remove_symbols_from_alphabet=false)
578  {
579  if (! (state_vector.size() > s))
580  {
581  return;
582  }
583 
584  HfstTransitions & transitions = state_vector[s];
585  // iterators to transitions to be removed
586  // transitions must be removed in reverse order so that iterators
587  // are not invalidated
588  std::stack<typename HfstTransitions::iterator> elements_to_remove;
589 
590  // find the transitions to be removed
591  for (typename HfstTransitions::iterator it = transitions.begin();
592  it != transitions.end(); it++)
593  {
594  // weight is ignored
595  if (it->get_input_symbol() == transition.get_input_symbol() &&
596  it->get_output_symbol() == transition.get_output_symbol() &&
597  it->get_target_state() == transition.get_target_state())
598  {
599  // schedule transition to be removed
600  elements_to_remove.push(it);
601  }
602  }
603  // remove the transitions in reverse order
604  while (!elements_to_remove.empty())
605  {
606  state_vector[s].erase(elements_to_remove.top());
607  elements_to_remove.pop();
608  }
609 
610  if (remove_symbols_from_alphabet)
611  {
612  HfstTransitionGraphAlphabet alpha = this->symbols_used();
613  if (alpha.find(transition.get_input_symbol()) == alpha.end())
614  this->remove_symbol_from_alphabet(transition.get_input_symbol());
615  if (alpha.find(transition.get_output_symbol()) == alpha.end())
617  }
618  }
619 
622  HFSTDLL bool is_final_state(HfstState s) const {
623  return (final_weight_map.find(s) != final_weight_map.end());
624  }
625 
627  HFSTDLL typename C::WeightType get_final_weight(HfstState s) const {
628  if (s > this->get_max_state())
630  if (final_weight_map.find(s) != final_weight_map.end())
631  return final_weight_map.find(s)->second;
633  }
634 
639  HFSTDLL void set_final_weight(HfstState s,
640  const typename C::WeightType & weight) {
641  add_state(s);
642  final_weight_map[s] = weight;
643  }
644 
648  {
649  for (typename HfstStates::iterator it = state_vector.begin();
650  it != state_vector.end();
651  ++it)
652  {
654  std::sort<typename HfstTransitions::iterator>
655  (transitions.begin(),transitions.end());
656  }
657  return *this;
658  }
659 
664  HFSTDLL iterator begin() { return state_vector.begin(); }
665 
668  HFSTDLL const_iterator begin() const { return state_vector.begin(); }
669 
672  HFSTDLL iterator end() { return state_vector.end(); }
673 
676  HFSTDLL const_iterator end() const { return state_vector.end(); }
677 
678 
684  HFSTDLL const HfstTransitions & operator[](HfstState s) const
685  {
686  if (s >= state_vector.size()) {
688  return state_vector[s];
689  }
690 
697  HFSTDLL const HfstTransitions & transitions(HfstState s) const
698  {
699  return this->operator[](s);
700  }
701 
705  {
706  if (s >= state_vector.size()) {
708  return state_vector[s];
709  }
710 
711  // --------------------------------------------------
712  // ----- Reading and writing in AT&T format -----
713  // --------------------------------------------------
714 
715  protected:
716  /* Change state numbers s1 to s2 and vice versa. */
717  void swap_state_numbers(HfstState s1, HfstState s2) {
718 
719  HfstTransitions s1_copy = state_vector[s1];
720  state_vector[s1] = state_vector[s2];
721  state_vector[s2] = s1_copy;
722 
723  // ----- Go through all states -----
724  for (iterator it = begin(); it != end(); it++)
725  {
726  // Go through all transitions
727  for (unsigned int i=0; i < it->size(); i++)
728  {
729  HfstTransition<C> &tr_it = it->operator[](i);
730 
731  HfstState new_target=tr_it.get_target_state();
732  if (tr_it.get_target_state() == s1)
733  new_target = s2;
734  if (tr_it.get_target_state() == s2)
735  new_target = s1;
736 
737  if (new_target != tr_it.get_target_state())
738  {
740  (new_target,
741  tr_it.get_input_symbol(),
742  tr_it.get_output_symbol(),
743  tr_it.get_weight());
744 
745  it->operator[](i) = tr;
746  }
747 
748  } // all transitions gone through
749 
750  } // ----- all states gone through -----
751 
752  // Swap final states, if needed
753  typename FinalWeightMap::iterator s1_it = final_weight_map.find(s1);
754  typename FinalWeightMap::iterator s2_it = final_weight_map.find(s2);
755  typename FinalWeightMap::iterator end_it = final_weight_map.end();
756 
757  if (s1_it != end_it && s2_it != end_it) {
758  typename C::WeightType s1_weight = s1_it->second;
759  final_weight_map[s1] = s2_it->second;
760  final_weight_map[s2] = s1_weight;
761  }
762  if (s1_it != end_it) {
763  typename C::WeightType w = s1_it->second;
764  final_weight_map.erase(s1);
765  final_weight_map[s2] = w;
766  }
767  if (s2_it != end_it) {
768  typename C::WeightType w = s2_it->second;
769  final_weight_map.erase(s2);
770  final_weight_map[s1] = w;
771  }
772 
773  return;
774 
775  }
776 
777  static void write_weight(FILE * file, float weight)
778  {
779  //if (weight == 0) // avoid unnecessary 0.000000's
780  // fprintf(file, "%i", 0);
781  //else
782  fprintf(file, "%f", weight);
783  }
784 
785  static void write_weight(std::ostream & os, float weight)
786  {
787  //if (weight == 0) // avoid unnecessary 0.000000's
788  // os << 0;
789  //else
790  os << weight;
791  }
792 
793  /* Replace all strings \a str1 in \a symbol with \a str2. */
794  static void replace_all(std::string & symbol,
795  const std::string &str1,
796  const std::string &str2)
797  {
798  size_t pos = symbol.find(str1);
799  while (pos != std::string::npos) // while there are str1:s to replace
800  {
801  symbol.erase(pos, str1.size()); // erase str1
802  symbol.insert(pos, str2); // insert str2 instead
803  pos = symbol.find // find next str1
804  (str1, pos+str2.size());
805  }
806  return;
807  }
808 
809  static void xfstize(std::string & symbol)
810  {
811  std::string escaped_symbol;
812  for (size_t pos = 0; pos < symbol.size(); pos++)
813  {
814  if (symbol[pos] == '%')
815  escaped_symbol.append("\"%\"");
816  else if (symbol[pos] == '"')
817  escaped_symbol.append("%\"");
818  else if (symbol[pos] == '?')
819  escaped_symbol.append("\"?\"");
820  else
821  escaped_symbol.append(1, symbol[pos]);
822  }
823  symbol = escaped_symbol;
824  }
825 
826  static void xfstize_symbol(std::string & symbol)
827  {
828  xfstize(symbol);
829  replace_all(symbol, "@_EPSILON_SYMBOL_@", "0");
830  replace_all(symbol, "@_UNKNOWN_SYMBOL_@", "?");
831  replace_all(symbol, "@_IDENTITY_SYMBOL_@", "?");
832  replace_all(symbol, "\t", "@_TAB_@");
833  }
834 
835  void print_xfst_state(std::ostream & os, HfstState state)
836  {
837  if (state == INITIAL_STATE) { os << "S"; }
838  if (is_final_state(state)) { os << "f"; }
839  os << "s" << state;
840  }
841 
842  void print_xfst_state(FILE * file, HfstState state)
843  {
844  if (state == INITIAL_STATE) { fprintf(file, "S"); }
845  if (is_final_state(state)) { fprintf(file, "f"); }
846  fprintf(file, "s%i", state);
847  }
848 
849  void print_xfst_arc(std::ostream & os, C data)
850  {
851  // replace all spaces, epsilons and tabs
852  if (data.get_input_symbol() !=
853  data.get_output_symbol())
854  {
855  os << "<";
856  }
857  std::string s = data.get_input_symbol();
858  xfstize_symbol(s);
859  os << s;
860  if (data.get_input_symbol() !=
861  data.get_output_symbol() ||
862  data.get_output_symbol() == "@_UNKNOWN_SYMBOL_@")
863  {
864  s = data.get_output_symbol();
865  xfstize_symbol(s);
866  os << ":" << s;
867  }
868  if (data.get_input_symbol() !=
869  data.get_output_symbol())
870  {
871  os << ">";
872  }
873  }
874 
875  void print_xfst_arc(FILE * file, C data)
876  {
877  if (data.get_input_symbol() !=
878  data.get_output_symbol())
879  {
880  fprintf(file, "<");
881  }
882  // replace all spaces, epsilons and tabs
883  std::string s = data.get_input_symbol();
884  xfstize_symbol(s);
885  fprintf(file, "%s", s.c_str());
886 
887  if (data.get_input_symbol() !=
888  data.get_output_symbol() ||
889  data.get_output_symbol() == "@_UNKNOWN_SYMBOL_@")
890  {
891  s = data.get_output_symbol();
892  xfstize_symbol(s);
893  fprintf(file, ":%s", s.c_str());
894  }
895  if (data.get_input_symbol() !=
896  data.get_output_symbol())
897  {
898  fprintf(file, ">");
899  }
900  }
901 
902  public:
903 
906  HFSTDLL void write_in_xfst_format(std::ostream &os, bool write_weights=true)
907  {
908  (void)write_weights; // todo
909  unsigned int source_state=0;
910  for (iterator it = begin(); it != end(); it++)
911  {
912  print_xfst_state(os, source_state);
913  os << ":\t";
914 
915  if (it->begin() == it->end())
916  {
917  os << "(no arcs)";
918  }
919  else
920  {
921  for (typename HfstTransitions::iterator tr_it
922  = it->begin();
923  tr_it != it->end(); tr_it++)
924  {
925  if (tr_it != it->begin())
926  {
927  os << ", ";
928  }
929  C data = tr_it->get_transition_data();
930  print_xfst_arc(os, data);
931 
932  os << " -> ";
933  print_xfst_state(os, tr_it->get_target_state());
934  }
935  }
936  os << "." << std::endl;
937  source_state++;
938  }
939  }
940 
941  // note: unknown and identity are both '?'
942  HFSTDLL static std::string prologize_symbol(const std::string & symbol)
943  {
944  if (symbol == "0")
945  return "%0";
946  if (symbol == "?")
947  return "%?";
948  if (symbol == "@_EPSILON_SYMBOL_@")
949  return "0";
950  if (symbol == "@_UNKNOWN_SYMBOL_@")
951  return "?";
952  if(symbol == "@_IDENTITY_SYMBOL_@")
953  return "?";
954  // prepend a backslash to a double quote and to a backslash
955  std::string retval(symbol);
956  replace_all(retval, "\\", "\\\\");
957  replace_all(retval, "\"", "\\\"");
958  return retval;
959  }
960 
961  // caveat: '?' is always unknown
962  HFSTDLL static std::string deprologize_symbol(const std::string & symbol)
963  {
964  if (symbol == "%0")
965  return "0";
966  if (symbol == "%?")
967  return "?";
968  if (symbol == "0")
969  return "@_EPSILON_SYMBOL_@";
970  if (symbol == "?")
971  return "@_UNKNOWN_SYMBOL_@";
972  // remove the escaping backslash in front of a double quote and
973  // a double quote
974  std::string retval(symbol);
975  replace_all(retval, "\\\"", "\"");
976  replace_all(retval, "\\\\", "\\");
977  return retval;
978  }
979 
980  HFSTDLL static void print_prolog_arc_symbols(FILE * file, C data)
981  {
982  std::string symbol = prologize_symbol(data.get_input_symbol());
983  fprintf(file, "\"%s\"", symbol.c_str());
984 
985  if (data.get_input_symbol() !=
986  data.get_output_symbol() ||
987  data.get_input_symbol() == "@_UNKNOWN_SYMBOL_@")
988  {
989  symbol = prologize_symbol(data.get_output_symbol());
990  fprintf(file, ":\"%s\"", symbol.c_str());
991  }
992  }
993 
994  HFSTDLL static void print_prolog_arc_symbols(std::ostream & os, C data)
995  {
996  std::string symbol = prologize_symbol(data.get_input_symbol());
997  os << "\"" << symbol << "\"";
998 
999  if (data.get_input_symbol() !=
1000  data.get_output_symbol() ||
1001  data.get_input_symbol() == "@_UNKNOWN_SYMBOL_@")
1002  {
1003  symbol = prologize_symbol(data.get_output_symbol());
1004  os << ":\"" << symbol << "\"";
1005  }
1006  }
1007 
1010  HFSTDLL void write_in_prolog_format(FILE * file, const std::string & name,
1011  bool write_weights=true)
1012  {
1013  unsigned int source_state=0;
1014  const char * identifier = name.c_str();
1015  // Print the name.
1016  if (name.find(",") != std::string::npos)
1017  {
1018  std::string msg("no commas allowed in the name of prolog networks");
1020  }
1021  fprintf(file, "network(%s).\n", identifier);
1022 
1023  // Print symbols that are in the alphabet but not used in arcs.
1024  HfstTransitionGraphAlphabet symbols_used_ = symbols_used();
1025  initialize_alphabet(symbols_used_); // exclude special symbols
1026  for (typename HfstTransitionGraphAlphabet::const_iterator it
1027  = alphabet.begin(); it != alphabet.end(); it++)
1028  {
1029  if (symbols_used_.find(*it) == symbols_used_.end())
1030  {
1031  fprintf(file, "symbol(%s, \"%s\").\n", identifier, prologize_symbol(it->c_str()).c_str());
1032  }
1033  }
1034 
1035  // Print arcs.
1036  for (iterator it = begin(); it != end(); it++)
1037  {
1038  for (typename HfstTransitions::iterator tr_it
1039  = it->begin();
1040  tr_it != it->end(); tr_it++)
1041  {
1042  fprintf(file, "arc(%s, %i, %i, ",
1043  identifier, source_state, tr_it->get_target_state());
1044  C data = tr_it->get_transition_data();
1045  print_prolog_arc_symbols(file, data);
1046  if (write_weights) {
1047  fprintf(file, ", ");
1048  write_weight(file, data.get_weight());
1049  }
1050  fprintf(file, ").\n");
1051  }
1052  source_state++;
1053  }
1054 
1055  // Print final states.
1056  for (typename FinalWeightMap::const_iterator it
1057  = this->final_weight_map.begin();
1058  it != this->final_weight_map.end(); it++)
1059  {
1060  fprintf(file, "final(%s, %i", identifier, it->first);
1061  if (write_weights)
1062  {
1063  fprintf(file, ", ");
1064  write_weight(file, it->second);
1065  }
1066  fprintf(file, ").\n");
1067  }
1068  }
1069 
1072  HFSTDLL void write_in_prolog_format(std::ostream & os, const std::string & name,
1073  bool write_weights=true)
1074  {
1075  unsigned int source_state=0;
1076 
1077  // Print the name.
1078  if (name.find(",") != std::string::npos)
1079  {
1080  std::string msg("no commas allowed in the name of prolog networks");
1082  }
1083  os << "network(" << name << ")." << std::endl;
1084 
1085  // Print symbols that are in the alphabet but not used in arcs.
1086  HfstTransitionGraphAlphabet symbols_used_ = symbols_used();
1087  initialize_alphabet(symbols_used_); // exclude special symbols
1088  for (typename HfstTransitionGraphAlphabet::const_iterator it
1089  = alphabet.begin(); it != alphabet.end(); it++)
1090  {
1091  if (symbols_used_.find(*it) == symbols_used_.end())
1092  {
1093  os << "symbol(" << name << ", \"" << prologize_symbol(*it) << "\")." << std::endl;
1094  }
1095  }
1096 
1097  // Print arcs.
1098  for (iterator it = begin(); it != end(); it++)
1099  {
1100  for (typename HfstTransitions::iterator tr_it
1101  = it->begin();
1102  tr_it != it->end(); tr_it++)
1103  {
1104  os << "arc(" << name << ", " << source_state << ", " << tr_it->get_target_state() << ", ";
1105  C data = tr_it->get_transition_data();
1106  print_prolog_arc_symbols(os, data);
1107  if (write_weights) {
1108  os << ", ";
1109  write_weight(os, data.get_weight());
1110  }
1111  os << ")." << std::endl;
1112  }
1113  source_state++;
1114  }
1115 
1116  // Print final states.
1117  for (typename FinalWeightMap::const_iterator it
1118  = this->final_weight_map.begin();
1119  it != this->final_weight_map.end(); it++)
1120  {
1121  os << "final(" << name << ", " << it->first;
1122  if (write_weights) {
1123  os << ", ";
1124  write_weight(os, it->second);
1125  }
1126  os << ")." << std::endl;
1127  }
1128  }
1129 
1130  // If \a str is of format ".+", change it to .+ and return true.
1131  // Else, return false.
1132  HFSTDLL static bool strip_quotes_from_both_sides(std::string & str)
1133  {
1134  if (str.size() < 3)
1135  return false;
1136  if (str[0] != '"' || str[str.length()-1] != '"')
1137  return false;
1138  str.erase(0, 1);
1139  str.erase(str.length()-1, 1);
1140  return true;
1141  }
1142 
1143  // If \a str is of format .+)\.", change it to .+ and return true.
1144  // Else, return false.
1145  HFSTDLL static bool strip_ending_parenthesis_and_comma(std::string & str)
1146  {
1147  if (str.size() < 3)
1148  return false;
1149  if (str[str.length()-2] != ')' || str[str.length()-1] != '.')
1150  return false;
1151  str.erase(str.length()-2);
1152  return true;
1153  }
1154 
1155  HFSTDLL static bool parse_prolog_network_line(const std::string & line, std::string & name)
1156  {
1157  // 'network(NAME).'
1158  char namearr[100];
1159  int n = sscanf(line.c_str(), "network(%s", namearr);
1160  if (n != 1)
1161  return false;
1162 
1163  std::string namestr(namearr);
1164  // strip the ending ")." from namestr
1165  if (!strip_ending_parenthesis_and_comma(namestr))
1166  return false;
1167 
1168  name = namestr;
1169  return true;
1170  }
1171 
1172  // Get positions of \a c in \a str. If \a esc is precedes
1173  // \a c, \a c is not included.
1174  HFSTDLL static std::vector<unsigned int> get_positions_of_unescaped_char
1175  (const std::string & str, char c, char esc)
1176  {
1177  std::vector<unsigned int> retval;
1178  for (size_t i=0; i < str.length(); i++)
1179  {
1180  if (str[i] == c)
1181  {
1182  if (i == 0)
1183  retval.push_back(i);
1184  else if (str[i-1] == esc)
1185  ; // skip escaped chars
1186  else
1187  retval.push_back(i);
1188  }
1189  }
1190  return retval;
1191  }
1192 
1193  // Extract input and output symbols, if possible, from prolog arc
1194  // \a str and store them to \a isymbol and \a osymbol.
1195  // Return whether symbols were successfully extracted.
1196  // \a str must be of format "foo":"bar" or "foo"
1197  HFSTDLL static bool get_prolog_arc_symbols
1198  (const std::string & str, std::string & isymbol, std::string & osymbol)
1199  {
1200  // find positions of non-escaped double quotes (todo: double double-quote?)
1201  std::vector<unsigned int> quote_positions
1202  = get_positions_of_unescaped_char(str, '"', '\\');
1203 
1204  // "foo"
1205  if (quote_positions.size() == 2)
1206  {
1207  if (quote_positions[0] != 0 ||
1208  quote_positions[1] != str.length()-1)
1209  return false; // extra characters outside quotes
1210  }
1211  // "foo":"bar"
1212  else if (quote_positions.size() == 4)
1213  {
1214  if (quote_positions[0] != 0 ||
1215  quote_positions[3] != str.length()-1)
1216  {
1217  return false; // extra characters outside quotes
1218  }
1219  if (quote_positions[2] - quote_positions[1] != 2)
1220  {
1221  return false; // missing colon between inner quotes
1222  }
1223  if (str[quote_positions[1] + 1] != ':')
1224  {
1225  return false; // else than colon between inner quotes
1226  }
1227  }
1228  // not valid prolog arc
1229  else
1230  {
1231  return false;
1232  }
1233 
1234  // "foo"
1235  if (quote_positions.size() == 2)
1236  {
1237  // "foo" -> foo
1238  std::string symbol(str, quote_positions[0]+1, quote_positions[1]-quote_positions[0]-1);
1239  isymbol = deprologize_symbol(symbol);
1240  if (isymbol == "@_UNKNOWN_SYMBOL_@") // single unknown -> identity
1241  isymbol = "@_IDENTITY_SYMBOL_@";
1242  osymbol = isymbol;
1243  }
1244  // "foo":"bar"
1245  else
1246  {
1247  // "foo" -> foo, "bar" -> bar
1248  std::string insymbol(str, quote_positions[0]+1, quote_positions[1]-quote_positions[0]-1);
1249  std::string outsymbol(str, quote_positions[2]+1, quote_positions[3]-quote_positions[2]-1);
1250  isymbol = deprologize_symbol(insymbol);
1251  osymbol = deprologize_symbol(outsymbol);
1252  }
1253 
1254  return true;
1255  }
1256 
1257  HFSTDLL static bool extract_weight(std::string & symbol, float & weight)
1258  {
1259  size_t last_double_quote = symbol.find_last_of('"');
1260  size_t last_space = symbol.find_last_of(' ');
1261 
1262  // at least two double quotes should be found
1263  if (last_double_quote == std::string::npos)
1264  { return false; }
1265 
1266  if (last_space == std::string::npos) {
1267  ; // no weight
1268  }
1269  else if (last_double_quote > last_space) {
1270  ; // no weight, last space is part of a symbol
1271  }
1272  else if (last_double_quote + 2 == last_space && last_space < symbol.size()-1) // + 2 because of the comma
1273  {
1274  std::istringstream buffer(symbol.substr(last_space+1));
1275  buffer >> weight;
1276  if (buffer.fail()) // a float could not be read
1277  { return false; }
1278  symbol.resize(last_space-1); // get rid of the comma and weight
1279  }
1280  else {
1281  return false; // not valid symbol and weight
1282  }
1283  return true;
1284  }
1285 
1286  HFSTDLL static bool parse_prolog_arc_line(const std::string & line, HfstTransitionGraph & graph)
1287  {
1288  // symbolstr can also contain the weight
1289  char namestr[100]; char sourcestr[100];
1290  char targetstr[100]; char symbolstr[100];
1291 
1292  int n = sscanf(line.c_str(), "arc(%[^,], %[^,], %[^,], %[^\t\n]",
1293  namestr, sourcestr, targetstr, symbolstr);
1294 
1295  std::string symbol(symbolstr);
1296 
1297  // strip the ending ")." from symbolstr
1298  if (!strip_ending_parenthesis_and_comma(symbol))
1299  { return false; }
1300 
1301  if (n != 4)
1302  { return false; }
1303  if (std::string(namestr) != graph.name)
1304  { return false; }
1305 
1306  unsigned int source = atoi(sourcestr);
1307  unsigned int target = atoi(targetstr);
1308 
1309  // handle the weight that might be included in symbol string
1310  float weight = 0;
1311  if (! extract_weight(symbol, weight))
1312  { return false; }
1313 
1314  std::string isymbol = "";
1315  std::string osymbol = "";
1316 
1317  if (!get_prolog_arc_symbols(symbol, isymbol, osymbol))
1318  return false;
1319 
1320  graph.add_transition(source, HfstTransition<C>(target, isymbol, osymbol, weight));
1321  return true;
1322  }
1323 
1324  HFSTDLL static bool parse_prolog_final_line(const std::string & line, HfstTransitionGraph & graph)
1325  {
1326  // 'final(NAME, number).' or 'final(NAME, number, weight).'
1327  char namestr[100];
1328  char finalstr[100];
1329  char weightstr[100];
1330  float weight = 0;
1331 
1332  unsigned int number_of_commas = 0;
1333  size_t pos = line.find(',');
1334  while (pos != std::string::npos)
1335  {
1336  number_of_commas++;
1337  pos = line.find(',', pos+1);
1338  }
1339 
1340  if (number_of_commas == 1)
1341  {
1342  int n = sscanf(line.c_str(), "final(%[^,], %[^)]).", namestr, finalstr);
1343  if (n != 2)
1344  { return false; }
1345  }
1346  else if (number_of_commas == 2)
1347  {
1348  int n = sscanf(line.c_str(), "final(%[^,], %[^,], %[^)]).", namestr, finalstr, weightstr);
1349  if (n != 3)
1350  { return false; }
1351  std::istringstream buffer(weightstr);
1352  buffer >> weight;
1353  if (buffer.fail()) // a float could not be read
1354  { return false; }
1355  }
1356  else
1357  {
1358  return false;
1359  }
1360 
1361  if (std::string(namestr) != graph.name)
1362  return false;
1363 
1364  graph.set_final_weight(atoi(finalstr), weight);
1365  return true;
1366  }
1367 
1368  HFSTDLL static bool parse_prolog_symbol_line(const std::string & line, HfstTransitionGraph & graph)
1369  {
1370  // 'symbol(NAME, "foo").'
1371  char namearr[100];
1372  char symbolarr[100];
1373  int n = sscanf(line.c_str(), "symbol(%[^,], %s", namearr, symbolarr);
1374 
1375  if (n != 2)
1376  return false;
1377 
1378  std::string namestr(namearr);
1379  std::string symbolstr(symbolarr);
1380 
1381  if (namestr != graph.name)
1382  return false;
1383 
1384  if (! strip_ending_parenthesis_and_comma(symbolstr))
1385  return false;
1386 
1387  if (! strip_quotes_from_both_sides(symbolstr))
1388  return false;
1389 
1390  graph.add_symbol_to_alphabet(deprologize_symbol(symbolstr));
1391  return true;
1392  }
1393 
1394  // Erase newlines from the end of \a str and return \a str.
1395  HFSTDLL static std::string strip_newlines(std::string & str)
1396  {
1397  for (signed int i=(signed int)str.length()-1; i >= 0; --i)
1398  {
1399  if (str[i] == '\n' || str[i] == '\r')
1400  str.erase(i, 1);
1401  else
1402  break;
1403  }
1404  return str;
1405  }
1406 
1407  // Try to get a line from \a is (if \a file == NULL)
1408  // or from \a file. If successfull, strip the line from newlines,
1409  // increment \a linecount by one and return the line.
1410  // Else, throw an EndOfStreamException.
1411  HFSTDLL static std::string get_stripped_line
1412  (std::istream & is, FILE * file, unsigned int & linecount)
1413  {
1414  char line [255];
1415 
1416  if (file == NULL) { /* we use streams */
1417  if (! is.getline(line,255).eof())
1419  }
1420  else { /* we use FILEs */
1421  if (NULL == fgets(line, 255, file))
1423  }
1424  linecount++;
1425 
1426  std::string linestr(line);
1427  return strip_newlines(linestr);
1428  }
1429 
1430  /* Create an HfstTransitionGraph as defined in prolog format
1431  in istream \a is or FILE \a file.
1432 
1433  The functions is called by functions
1434  read_in_prolog_format(istream&) and
1435  read_in_prolog_format(FILE*).
1436  If \a file is NULL, it is ignored and \a is is used.
1437  If \a file is not NULL, it is used and \a is is ignored. */
1438  HFSTDLL static HfstTransitionGraph read_in_prolog_format
1439  (std::istream &is, FILE *file, unsigned int & linecount)
1440  {
1441 
1442  HfstTransitionGraph retval;
1443  std::string linestr;
1444 
1445  while(true)
1446  {
1447  try
1448  {
1449  linestr = get_stripped_line(is, file, linecount);
1450  }
1451  catch (const EndOfStreamException & e)
1452  {
1453  HFST_THROW(NotValidPrologFormatException);
1454  }
1455 
1456  if (linestr.length() != 0 && linestr[0] == '#')
1457  {
1458  continue; // comment line
1459  }
1460  else
1461  {
1462  break; // first non-comment line
1463  }
1464  }
1465 
1466 
1467  if (! parse_prolog_network_line(linestr, retval.name))
1468  {
1469  std::string message("first line not valid prolog: ");
1470  message.append(linestr);
1471  HFST_THROW_MESSAGE(NotValidPrologFormatException, message);
1472  }
1473 
1474  while(true)
1475  {
1476  try
1477  {
1478  linestr = get_stripped_line(is, file, linecount);
1479  if (linestr == "") // prolog separator
1480  {
1481  return retval;
1482  }
1483  }
1484  catch (const EndOfStreamException & e)
1485  {
1486  return retval;
1487  }
1488 
1489  if (! (parse_prolog_arc_line(linestr, retval) ||
1490  parse_prolog_final_line(linestr, retval) ||
1491  parse_prolog_symbol_line(linestr, retval)) )
1492  {
1493  std::string message("line not valid prolog: ");
1494  message.append(linestr);
1495  HFST_THROW_MESSAGE(NotValidPrologFormatException, message);
1496  }
1497  }
1498  HFST_THROW(NotValidPrologFormatException); // this should not happen
1499  }
1500 
1501  HFSTDLL static HfstTransitionGraph read_in_prolog_format
1502  (std::istream &is,
1503  unsigned int & linecount)
1504  {
1505  return read_in_prolog_format
1506  (is, NULL /* a dummy variable */,
1507  linecount);
1508  }
1509 
1510  HFSTDLL static HfstTransitionGraph read_in_prolog_format
1511  (FILE *file,
1512  unsigned int & linecount)
1513  {
1514  return read_in_prolog_format
1515  (std::cin /* a dummy variable */,
1516  file, linecount);
1517  }
1518 
1519 
1522  HFSTDLL void write_in_xfst_format(FILE * file, bool write_weights=true)
1523  {
1524  (void)write_weights;
1525  unsigned int source_state=0;
1526  for (iterator it = begin(); it != end(); it++)
1527  {
1528  print_xfst_state(file, source_state);
1529  fprintf(file, ":\t");
1530 
1531  if (it->begin() == it->end())
1532  {
1533  fprintf(file, "(no arcs)");
1534  }
1535  else
1536  {
1537  for (typename HfstTransitions::iterator tr_it
1538  = it->begin();
1539  tr_it != it->end(); tr_it++)
1540  {
1541  if (tr_it != it->begin())
1542  {
1543  fprintf(file, ", ");
1544  }
1545  C data = tr_it->get_transition_data();
1546 
1547  print_xfst_arc(file, data);
1548 
1549  fprintf(file, " -> ");
1550  print_xfst_state(file, tr_it->get_target_state());
1551  }
1552  }
1553  fprintf(file, ".\n");
1554  source_state++;
1555  }
1556  }
1557 
1558 
1559 
1560 
1563  HFSTDLL void write_in_att_format(std::ostream &os, bool write_weights=true)
1564  {
1565  unsigned int source_state=0;
1566  for (iterator it = begin(); it != end(); it++)
1567  {
1568  for (typename HfstTransitions::iterator tr_it
1569  = it->begin();
1570  tr_it != it->end(); tr_it++)
1571  {
1572  C data = tr_it->get_transition_data();
1573 
1574  std::string isymbol = data.get_input_symbol();
1575  replace_all(isymbol, " ", "@_SPACE_@");
1576  replace_all(isymbol, "@_EPSILON_SYMBOL_@", "@0@");
1577  replace_all(isymbol, "\t", "@_TAB_@");
1578 
1579  std::string osymbol = data.get_output_symbol();
1580  replace_all(osymbol, " ", "@_SPACE_@");
1581  replace_all(osymbol, "@_EPSILON_SYMBOL_@", "@0@");
1582  replace_all(osymbol, "\t", "@_TAB_@");
1583 
1584  os << source_state << "\t"
1585  << tr_it->get_target_state() << "\t"
1586  << isymbol << "\t"
1587  << osymbol;
1588 
1589  if (write_weights) {
1590  os << "\t";
1591  write_weight(os, data.get_weight());
1592  }
1593  os << "\n";
1594  }
1595  if (is_final_state(source_state))
1596  {
1597  os << source_state;
1598  if (write_weights) {
1599  os << "\t";
1600  write_weight(os, get_final_weight(source_state));
1601  }
1602  os << "\n";
1603  }
1604  source_state++;
1605  }
1606  }
1607 
1610  HFSTDLL void write_in_att_format(FILE *file, bool write_weights=true)
1611  {
1612  unsigned int source_state=0;
1613  for (iterator it = begin(); it != end(); it++)
1614  {
1615  for (typename HfstTransitions::iterator tr_it
1616  = it->begin();
1617  tr_it != it->end(); tr_it++)
1618  {
1619  C data = tr_it->get_transition_data();
1620 
1621  std::string isymbol = data.get_input_symbol();
1622  replace_all(isymbol, " ", "@_SPACE_@");
1623  replace_all(isymbol, "@_EPSILON_SYMBOL_@", "@0@");
1624  replace_all(isymbol, "\t", "@_TAB_@");
1625 
1626  std::string osymbol = data.get_output_symbol();
1627  replace_all(osymbol, " ", "@_SPACE_@");
1628  replace_all(osymbol, "@_EPSILON_SYMBOL_@", "@0@");
1629  replace_all(osymbol, "\t", "@_TAB_@");
1630 
1631  fprintf(file, "%i\t%i\t%s\t%s",
1632  source_state,
1633  tr_it->get_target_state(),
1634  isymbol.c_str(),
1635  osymbol.c_str());
1636 
1637  if (write_weights) {
1638  fprintf(file, "\t");
1639  write_weight(file, data.get_weight());
1640  }
1641  fprintf(file, "\n");
1642  }
1643  if (is_final_state(source_state))
1644  {
1645  fprintf(file, "%i", source_state);
1646  if (write_weights) {
1647  fprintf(file, "\t");
1648  write_weight(file, get_final_weight(source_state));
1649  }
1650  fprintf(file, "\n");
1651  }
1652  source_state++;
1653  }
1654  }
1655 
1656  HFSTDLL void write_in_att_format(char * ptr, bool write_weights=true)
1657  {
1658  unsigned int source_state=0;
1659  size_t cwt = 0; // characters written in total
1660  size_t cw = 0; // characters written in latest call to sprintf
1661  for (iterator it = begin(); it != end(); it++)
1662  {
1663  for (typename HfstTransitions::iterator tr_it
1664  = it->begin();
1665  tr_it != it->end(); tr_it++)
1666  {
1667  C data = tr_it->get_transition_data();
1668 
1669  std::string isymbol = data.get_input_symbol();
1670  replace_all(isymbol, " ", "@_SPACE_@");
1671  replace_all(isymbol, "@_EPSILON_SYMBOL_@", "@0@");
1672  replace_all(isymbol, "\t", "@_TAB_@");
1673 
1674  std::string osymbol = data.get_output_symbol();
1675  replace_all(osymbol, " ", "@_SPACE_@");
1676  replace_all(osymbol, "@_EPSILON_SYMBOL_@", "@0@");
1677  replace_all(osymbol, "\t", "@_TAB_@");
1678 
1679  cw = sprintf(ptr + cwt, "%i\t%i\t%s\t%s",
1680  source_state,
1681  tr_it->get_target_state(),
1682  isymbol.c_str(),
1683  osymbol.c_str());
1684 
1685  cwt = cwt + cw;
1686 
1687  if (write_weights)
1688  cw = sprintf(ptr + cwt, "\t%f",
1689  data.get_weight());
1690  cwt = cwt + cw;
1691  cw = sprintf(ptr + cwt, "\n");
1692  cwt = cwt + cw;
1693  }
1694  if (is_final_state(source_state))
1695  {
1696  cw = sprintf(ptr + cwt, "%i", source_state);
1697  cwt = cwt + cw;
1698  if (write_weights)
1699  cw = sprintf(ptr + cwt, "\t%f",
1700  get_final_weight(source_state));
1701  cwt = cwt + cw;
1702  cw = sprintf(ptr + cwt, "\n");
1703  cwt = cwt + cw;
1704  }
1705  source_state++;
1706  }
1707  }
1708 
1709 
1713  HFSTDLL void write_in_att_format_number(FILE *file, bool write_weights=true)
1714  {
1715  unsigned int source_state=0;
1716  for (iterator it = begin(); it != end(); it++)
1717  {
1718  for (typename HfstTransitions::iterator tr_it
1719  = it->begin();
1720  tr_it != it->end(); tr_it++)
1721  {
1722  C data = tr_it->get_transition_data();
1723 
1724  fprintf(file, "%i\t%i\t%i\t%i",
1725  source_state,
1726  tr_it->get_target_state(),
1727  tr_it->get_input_number(),
1728  tr_it->get_output_number());
1729 
1730  if (write_weights)
1731  fprintf(file, "\t%f",
1732  data.get_weight());
1733  fprintf(file, "\n");
1734 
1735  if (is_final_state(source_state))
1736  {
1737  fprintf(file, "%i", source_state);
1738  if (write_weights)
1739  fprintf(file, "\t%f",
1740  get_final_weight(source_state));
1741  fprintf(file, "\n");
1742  }
1743  }
1744  source_state++;
1745  }
1746  }
1747 
1748 
1749  /* Create an HfstTransitionGraph as defined in AT&T format
1750  in istream \a is or FILE \a file. \a epsilon_symbol defines
1751  how epsilon is represented.
1752 
1753  The functions is called by functions
1754  read_in_att_format(istream&, std::string) and
1755  read_in_att_format(FILE*, std::string).
1756  If \a file is NULL, it is ignored and \a is is used.
1757  If \a file is not NULL, it is used and \a is is ignored. */
1758  HFSTDLL static HfstTransitionGraph read_in_att_format
1759  (std::istream &is,
1760  FILE *file,
1761  std::string epsilon_symbol,
1762  unsigned int & linecount) {
1763 
1764  if (file == NULL) {
1765  if (is.eof()) {
1767  }
1768  }
1769  else {
1770  if (feof(file)) {
1772  }
1773  }
1774 
1775  HfstTransitionGraph retval;
1776  char line [255];
1777  while(true) {
1778 
1779  if (file == NULL) { /* we use streams */
1780  if (! is.getline(line,255).eof())
1781  break;
1782  }
1783  else { /* we use FILEs */
1784  if (NULL == fgets(line, 255, file))
1785  break;
1786  }
1787 
1788  linecount++;
1789 
1790  // an empty line signifying an empty transducer,
1791  // a special case that is accepted if it is the only
1792  // transducer in the stream
1793  if ( // empty line with or without a newline
1794  (line[0] == '\0') ||
1795  (line[0] == '\n' && line[1] == '\0') ||
1796  // windows newline
1797  (line[0] == '\r' && line[1] == '\n' && line[2] == '\0')
1798  ) {
1799 
1800  // make sure that the end-of-file is reached
1801  if (file == NULL)
1802  is.get();
1803  else
1804  fgetc(file);
1805  break;
1806  }
1807 
1808  if (*line == '-') // transducer separator line is "--"
1809  return retval;
1810 
1811  // scan one line that can have a maximum of five fields
1812  char a1 [100]; char a2 [100]; char a3 [100];
1813  char a4 [100]; char a5 [100];
1814  // how many fields could be parsed
1815  int n = sscanf(line, "%s%s%s%s%s", a1, a2, a3, a4, a5);
1816 
1817  // set value of weight
1818  float weight = 0;
1819  if (n == 2) // a final state line with weight
1820  weight = atof(a2);
1821  if (n == 5) // a transition line with weight
1822  weight = atof(a5);
1823 
1824  if (n == 1 || n == 2) // a final state line
1825  retval.set_final_weight( atoi(a1), weight );
1826 
1827  else if (n == 4 || n == 5) { // a transition line
1828  std::string input_symbol=std::string(a3);
1829  std::string output_symbol=std::string(a4);
1830 
1831  // replace "@_SPACE_@"s with " " and "@0@"s with
1832  // "@_EPSILON_SYMBOL_@"
1833  replace_all(input_symbol, "@_SPACE_@", " ");
1834  replace_all(input_symbol, "@0@", "@_EPSILON_SYMBOL_@");
1835  replace_all(input_symbol, "@_TAB_@", "\t");
1836  replace_all(input_symbol, "@_COLON_@", ":");
1837 
1838  replace_all(output_symbol, "@_SPACE_@", " ");
1839  replace_all(output_symbol, "@0@", "@_EPSILON_SYMBOL_@");
1840  replace_all(output_symbol, "@_TAB_@", "\t");
1841  replace_all(output_symbol, "@_COLON_@", ":");
1842 
1843  if (epsilon_symbol.compare(input_symbol) == 0)
1844  input_symbol="@_EPSILON_SYMBOL_@";
1845  if (epsilon_symbol.compare(output_symbol) == 0)
1846  output_symbol="@_EPSILON_SYMBOL_@";
1847 
1848  HfstTransition <C> tr( atoi(a2), input_symbol,
1849  output_symbol, weight );
1850  retval.add_transition( atoi(a1), tr );
1851  }
1852 
1853  else { // line could not be parsed
1854  std::string message(line);
1857  message);
1858  }
1859  }
1860  return retval;
1861  }
1862 
1863 
1870  HFSTDLL static HfstTransitionGraph read_in_att_format
1871  (std::istream &is,
1872  std::string epsilon_symbol,
1873  unsigned int & linecount)
1874  {
1875  return read_in_att_format
1876  (is, NULL /* a dummy variable */,
1877  epsilon_symbol, linecount);
1878  }
1879 
1886  HFSTDLL static HfstTransitionGraph read_in_att_format
1887  (FILE *file,
1888  std::string epsilon_symbol,
1889  unsigned int & linecount)
1890  {
1891  return read_in_att_format
1892  (std::cin /* a dummy variable */,
1893  file, epsilon_symbol, linecount);
1894  }
1895 
1896 
1897  // ----------------------------------------------
1898  // ----- Substitution functions -----
1899  // ----------------------------------------------
1900 
1901  protected:
1902 
1903  /* A function that performs in-place-substitution in the graph. */
1904 
1905  void substitute_(HfstSymbol old_symbol,
1906  HfstSymbol new_symbol,
1907  bool input_side=true,
1908  bool output_side=true)
1909  {
1910  // ----- Go through all states -----
1911  for (iterator it = begin(); it != end(); it++)
1912  {
1913  // Go through all transitions
1914  for (unsigned int i=0; i < it->size(); i++)
1915  {
1916  HfstTransition<C> &tr_it = it->operator[](i);
1917 
1918  // The substituting input and output symbols for the
1919  // current transition.
1920  HfstSymbol substituting_input_symbol
1921  = tr_it.get_input_symbol();
1922  HfstSymbol substituting_output_symbol
1923  = tr_it.get_output_symbol();
1924 
1925  // Whether a substitution will be performed.
1926  bool substitution_made=false;
1927 
1928  if (input_side &&
1929  tr_it.get_input_symbol() == old_symbol) {
1930  substituting_input_symbol = new_symbol;
1931  substitution_made=true;
1932  }
1933  if (output_side &&
1934  tr_it.get_output_symbol() == old_symbol) {
1935  substituting_output_symbol = new_symbol;
1936  substitution_made=true;
1937  }
1938 
1939  // If a substitution is to be performed,
1940  if (substitution_made) {
1941 
1942  add_symbol_to_alphabet(new_symbol);
1943 
1944  // change the current transition accordingly.
1945  HfstTransition<C> tr
1946  (tr_it.get_target_state(),
1947  substituting_input_symbol,
1948  substituting_output_symbol,
1949  tr_it.get_weight());
1950 
1951  it->operator[](i) = tr;
1952  }
1953 
1954  } // all transitions gone through
1955 
1956  } // ----- all states gone through -----
1957 
1958  return;
1959  }
1960 
1961  /* A function that performs in-place substitutions in the graph
1962  as defined in \a substitutions.
1963 
1964  substitutions[from_number] = to_number,
1965  if substitutions[from_number] = no_substitution, no substitution is made */
1966  void substitute_(const HfstNumberVector &substitutions,
1967  unsigned int no_substitution)
1968  {
1969  // ----- Go through all states -----
1970  for (iterator it = begin(); it != end(); it++)
1971  {
1972  // Go through all transitions
1973  for (unsigned int i=0; i < it->size(); i++)
1974  {
1975  HfstTransition<C> &tr_it = it->operator[](i);
1976 
1977  HfstNumber old_inumber = tr_it.get_input_number();
1978  HfstNumber old_onumber = tr_it.get_output_number();
1979 
1980  HfstNumber new_inumber = substitutions.at(old_inumber);
1981  HfstNumber new_onumber = substitutions.at(old_onumber);
1982 
1983  // If a substitution is to be performed,
1984  if (new_inumber != no_substitution ||
1985  new_onumber != no_substitution)
1986  {
1987  if (new_inumber != no_substitution)
1988  add_symbol_to_alphabet(C::get_symbol(new_inumber));
1989  else
1990  new_inumber = old_inumber;
1991 
1992  if (new_onumber != no_substitution)
1993  add_symbol_to_alphabet(C::get_symbol(new_onumber));
1994  else
1995  new_onumber = old_onumber;
1996 
1997  // change the current transition accordingly.
1998  HfstTransition<C> tr
1999  (tr_it.get_target_state(),
2000  new_inumber,
2001  new_onumber,
2002  tr_it.get_weight(), false);
2003 
2004  it->operator[](i) = tr;
2005  }
2006 
2007  } // all transitions gone through
2008 
2009  } // ----- all states gone through -----
2010 
2011  return;
2012  }
2013 
2014  /* A function that performs in-place substitutions in the graph
2015  as defined in \a substitutions. */
2016  void substitute_(const HfstNumberPairSubstitutions &substitutions)
2017  {
2018  // ----- Go through all states -----
2019  for (iterator it = begin(); it != end(); it++)
2020  {
2021  // Go through all transitions
2022  for (unsigned int i=0; i < it->size(); i++)
2023  {
2024  HfstTransition<C> &tr_it = it->operator[](i);
2025 
2026  HfstNumberPair old_number_pair
2027  ( tr_it.get_input_number(),
2028  tr_it.get_output_number() );
2029 
2030  HfstNumberPairSubstitutions::const_iterator subst_it
2031  = substitutions.find(old_number_pair);
2032 
2033  // If a substitution is to be performed,
2034  if (subst_it != substitutions.end()) {
2035 
2036  HfstNumber new_input_number = subst_it->second.first;
2037  HfstNumber new_output_number = subst_it->second.second;
2038 
2039  add_symbol_to_alphabet(HfstTropicalTransducerTransitionData::
2040  get_symbol(new_input_number));
2041  add_symbol_to_alphabet(HfstTropicalTransducerTransitionData::
2042  get_symbol(new_output_number));
2043 
2044  // change the current transition accordingly.
2045  HfstTransition<C> tr
2046  (tr_it.get_target_state(),
2047  new_input_number,
2048  new_output_number,
2049  tr_it.get_weight(), false);
2050 
2051  it->operator[](i) = tr;
2052  }
2053 
2054  } // all transitions gone through
2055 
2056  } // ----- all states gone through -----
2057 
2058  return;
2059  }
2060 
2061  public:
2062 
2063  /* A function that performs in-place removal of all transitions
2064  equivalent to \a sp in the graph. */
2065  HFSTDLL void remove_transitions(const HfstSymbolPair &sp)
2066  {
2067  unsigned int in_match = C::get_number(sp.first);
2068  unsigned int out_match = C::get_number(sp.second);
2069 
2070  bool in_match_used = false;
2071  bool out_match_used = false;
2072 
2073  // ----- Go through all states -----
2074  for (iterator it = begin(); it != end(); it++)
2075  {
2076  // Go through all transitions of the current state
2077  for (unsigned int i=0; i < it->size(); i++)
2078  {
2079  HfstTransition<C> &tr_it = it->operator[](i);
2080 
2081  // If a match was found, remove the transition:
2082  unsigned int in_tr = tr_it.get_input_number();
2083  unsigned int out_tr = tr_it.get_output_number();
2084  if (in_tr == in_match && out_tr == out_match) {
2085  it->erase(it->begin()+i); }
2086  else
2087  {
2088  if (in_tr == in_match || out_tr == in_match) {
2089  in_match_used=true; }
2090  if (in_tr == out_match || out_tr == out_match) {
2091  out_match_used=true; }
2092  }
2093  }
2094  }
2095 
2096  // Handle the alphabet
2097  if (!in_match_used) {
2098  alphabet.erase(sp.first); }
2099  if (!out_match_used) {
2100  alphabet.erase(sp.second); }
2101  }
2102 
2103  protected:
2104 
2105  /* A function that performs in-place-substitution in the graph. */
2106  void substitute_(const HfstSymbolPair &old_sp,
2107  const HfstSymbolPairSet &new_sps)
2108  {
2109  if (new_sps.empty())
2110  {
2111  return remove_transitions(old_sp);
2112  }
2113 
2114  unsigned int old_input_number = C::get_number(old_sp.first);
2115  unsigned int old_output_number = C::get_number(old_sp.second);
2116 
2117  // Whether any substitution was performed
2118  bool substitution_performed=false;
2119 
2120  // ----- Go through all states -----
2121  for (iterator it = begin(); it != end(); it++)
2122  {
2123  // The transitions to be added to the current state
2124  HfstTransitions new_transitions;
2125 
2126  // Go through all transitions of the current state
2127  for (unsigned int i=0; i < it->size(); i++)
2128  {
2129  HfstTransition<C> &tr_it = it->operator[](i);
2130 
2131  // If a match was found, substitute:
2132  if (tr_it.get_input_number() == old_input_number &&
2133  tr_it.get_output_number() == old_output_number)
2134  {
2135  substitution_performed=true;
2136 
2137  // change the current transition so that it is equivalent
2138  // to the first substituting transition in new_sps
2139  typename HfstSymbolPairSet::const_iterator IT
2140  = new_sps.begin();
2141 
2142  HfstTransition<C> tr
2143  (tr_it.get_target_state(),
2144  C::get_number(IT->first),
2145  C::get_number(IT->second),
2146  tr_it.get_weight(),
2147  true);
2148 
2149  it->operator[](i) = tr;
2150 
2151  // and schedule the rest of the substituting transitions
2152  // in new_sps to be added to the current state.
2153  while (IT != new_sps.end())
2154  {
2155  HfstTransition<C> TR
2156  (tr_it.get_target_state(),
2157  C::get_number(IT->first),
2158  C::get_number(IT->second),
2159  tr_it.get_weight(),
2160  true);
2161 
2162  new_transitions.push_back(TR);
2163  IT++;
2164  }
2165 
2166  } // (substitution and scheduling done)
2167 
2168  } // (all transitions of a state gone through)
2169 
2170  // Add the new transitions to the current state
2171  for (typename HfstTransitions
2172  ::const_iterator NIT = new_transitions.begin();
2173  NIT != new_transitions.end(); NIT++)
2174  {
2175  it->push_back(*NIT);
2176  }
2177 
2178  } // ( ----- all states in the graph gone through ----- )
2179 
2180  // If at least one substitution was performed, add all the
2181  // symbols in the substituting transitions to the alphabet of
2182  // the graph.
2183  if (substitution_performed) {
2184  add_symbols_to_alphabet(new_sps);
2185  }
2186 
2187  // Remove symbols that were removed because of substitutions
2188  // (or didn't occur in the graph in the first place)
2189  std::set<unsigned int> syms;
2190  /*for (typename HfstSymbolPairSet::const_iterator it = new_sps.begin();
2191  it != new_sps.end(); it++) {
2192  syms.insert(C::get_number(it->first));
2193  syms.insert(C::get_number(it->second)); ?????????
2194  }*/
2195  syms.insert(old_input_number);
2196  syms.insert(old_output_number);
2197  prune_alphabet_after_substitution(syms);
2198 
2199  return;
2200  }
2201 
2202 
2203  /* A function that performs in-place-substitution in the graph. */
2204  void substitute_(bool (*func)
2205  (const HfstSymbolPair &sp, HfstSymbolPairSet &sps))
2206  {
2207  // ----- Go through all states. -----
2208  for (iterator it = begin(); it != end(); it++)
2209  {
2210  // The transitions to be added to the current state.
2211  HfstTransitions new_transitions;
2212 
2213  // Go through all transitions.
2214  for (unsigned int i=0; i < it->size(); i++)
2215  {
2216  HfstTransition<C> &tr_it = it->operator[](i);
2217 
2218  HfstSymbolPair transition_symbol_pair
2219  (tr_it.get_input_symbol(),
2220  tr_it.get_output_symbol());
2221  HfstSymbolPairSet substituting_transitions;
2222 
2223  // If a substitution is to be performed,
2224  bool perform_substitution=false;
2225  try {
2226  perform_substitution =
2227  (*func)(transition_symbol_pair, substituting_transitions);
2228  }
2229  catch (const HfstException & e)
2230  {
2231  throw e;
2232  }
2233  if (perform_substitution)
2234  {
2235  // change the transition to the first element
2236  // in new_sps
2237  typename HfstSymbolPairSet::const_iterator IT
2238  = substituting_transitions.begin();
2239 
2240  if (! C::is_valid_symbol(IT->first) ||
2241  ! C::is_valid_symbol(IT->second) )
2243  (EmptyStringException,
2244  "HfstTransitionGraph::substitute");
2245 
2246  HfstTransition<C> tr
2247  (tr_it.get_target_state(),
2248  IT->first,
2249  IT->second,
2250  tr_it.get_weight());
2251 
2252  it->operator[](i) = tr;
2253 
2254  add_symbol_to_alphabet(IT->first);
2255  add_symbol_to_alphabet(IT->second);
2256 
2257  // and schedule the rest of the elements in new_sps
2258  // to be added to this state.
2259  while (IT != substituting_transitions.end())
2260  {
2261 
2262  if (! C::is_valid_symbol(IT->first) ||
2263  ! C::is_valid_symbol(IT->second) )
2265  (EmptyStringException,
2266  "HfstTransitionGraph::substitute");
2267 
2268  HfstTransition<C> TR
2269  (tr_it.get_target_state(),
2270  IT->first,
2271  IT->second,
2272  tr_it.get_weight());
2273 
2274  new_transitions.push_back(TR);
2275 
2276  add_symbol_to_alphabet(IT->first);
2277  add_symbol_to_alphabet(IT->second);
2278 
2279  IT++;
2280  }
2281 
2282  } // Substitution and scheduling performed.
2283 
2284  } // All transitions gone through.
2285 
2286  // Add the new transitions.
2287  for (typename HfstTransitions
2288  ::const_iterator NIT = new_transitions.begin();
2289  NIT != new_transitions.end(); NIT++)
2290  {
2291  it->push_back(*NIT);
2292  }
2293 
2294  } // ----- All states gone through. -----
2295 
2296  return;
2297  }
2298 
2299  public:
2300 
2301  /* ----------------------------------------
2302  The public substitution functions.
2303  ---------------------------------------- */
2304 
2308  HFSTDLL HfstTransitionGraph &
2309  substitute(const HfstSymbol &old_symbol,
2310  const HfstSymbol &new_symbol,
2311  bool input_side=true,
2312  bool output_side=true) {
2313 
2314  if (! C::is_valid_symbol(old_symbol) ||
2315  ! C::is_valid_symbol(new_symbol) ) {
2317  (EmptyStringException,
2318  "HfstTransitionGraph::substitute"); }
2319 
2320  // If a symbol is substituted with itself, do nothing.
2321  if (old_symbol == new_symbol)
2322  return *this;
2323  // If the old symbol is not known to the graph, do nothing.
2324  if (alphabet.find(old_symbol) == alphabet.end())
2325  return *this;
2326 
2327  // Remove the symbol to be substituted from the alphabet
2328  // if the substitution is made on both sides.
2329  if (input_side && output_side) {
2330  /* Special symbols are always included in the alphabet */
2331  if (! is_epsilon(old_symbol) &&
2332  ! is_unknown(old_symbol) &&
2333  ! is_identity(old_symbol)) {
2334  alphabet.erase(old_symbol); }
2335  }
2336  // Insert the substituting symbol to the alphabet.
2337  alphabet.insert(new_symbol);
2338 
2339  substitute_(old_symbol, new_symbol, input_side, output_side);
2340 
2341  return *this;
2342  }
2343 
2344  HFSTDLL HfstTransitionGraph &substitute_symbols
2345  (const HfstSymbolSubstitutions &substitutions)
2346  { return this->substitute(substitutions); }
2347 
2350  (const HfstSymbolSubstitutions &substitutions)
2351  {
2352  // add symbols to the global HfstTransition alphabet
2353  for (HfstSymbolSubstitutions::const_iterator it
2354  = substitutions.begin();
2355  it != substitutions.end(); it++)
2356  {
2357  (void)get_symbol_number(it->first);
2358  (void)get_symbol_number(it->second);
2359  }
2360 
2361  // how symbol numbers are substituted:
2362  // substitutions_[from_symbol] = to_symbol
2363  std::vector<unsigned int> substitutions_;
2364  // marker that means that no substitution is made
2365  unsigned int no_substitution = C::get_max_number()+substitutions.size()+1;
2366  substitutions_.resize
2367  (C::get_max_number()+1, no_substitution);
2368  for (HfstSymbolSubstitutions::const_iterator it
2369  = substitutions.begin();
2370  it != substitutions.end(); it++)
2371  {
2372  HfstNumber from_symbol = get_symbol_number(it->first);
2373  HfstNumber to_symbol = get_symbol_number(it->second);
2374 
2375  substitutions_.at(from_symbol) = to_symbol;
2376  }
2377 
2378  substitute_(substitutions_, no_substitution);
2379 
2380  return *this;
2381  }
2382 
2383  HFSTDLL HfstTransitionGraph &substitute_symbol_pairs
2384  (const HfstSymbolPairSubstitutions &substitutions)
2385  { return this->substitute(substitutions); }
2386 
2394  (const HfstSymbolPairSubstitutions &substitutions)
2395  {
2396  // Convert from symbols to numbers
2397  HfstNumberPairSubstitutions substitutions_;
2398  for (HfstSymbolPairSubstitutions::const_iterator it
2399  = substitutions.begin();
2400  it != substitutions.end(); it++)
2401  {
2402  HfstNumberPair from_transition
2403  (get_symbol_number(it->first.first),
2404  get_symbol_number(it->first.second));
2405  HfstNumberPair to_transition
2406  (get_symbol_number(it->second.first),
2407  get_symbol_number(it->second.second));
2408  substitutions_[from_transition] = to_transition;
2409  }
2410 
2411  substitute_(substitutions_);
2412 
2413  return *this;
2414  }
2415 
2419  (const HfstSymbolPair &sp, const HfstSymbolPairSet &sps)
2420  {
2421  if (! C::is_valid_symbol(sp.first) ||
2422  ! C::is_valid_symbol(sp.second) ) {
2424  (EmptyStringException,
2425  "HfstTransitionGraph::substitute"); }
2426 
2427  for (typename HfstSymbolPairSet::const_iterator it = sps.begin();
2428  it != sps.end(); it++)
2429  {
2430  if (! C::is_valid_symbol(it->first) ||
2431  ! C::is_valid_symbol(it->second) ) {
2433  (EmptyStringException,
2434  "HfstTransitionGraph::substitute"); }
2435  }
2436 
2437  substitute_(sp, sps);
2438 
2439  return *this;
2440  }
2441 
2445  (const HfstSymbolPair &old_pair,
2446  const HfstSymbolPair &new_pair)
2447  {
2448  if (! C::is_valid_symbol(old_pair.first) ||
2449  ! C::is_valid_symbol(new_pair.first) ||
2450  ! C::is_valid_symbol(old_pair.second) ||
2451  ! C::is_valid_symbol(new_pair.second) ) {
2453  (EmptyStringException,
2454  "HfstTransitionGraph::substitute"); }
2455 
2456  StringPairSet new_pair_set;
2457  new_pair_set.insert(new_pair);
2458  substitute_(old_pair, new_pair_set);
2459 
2460  return *this;
2461  }
2462 
2471  HFSTDLL HfstTransitionGraph &
2472  substitute(bool (*func)
2473  (const HfstSymbolPair &sp, HfstSymbolPairSet &sps) )
2474  {
2475  substitute_(func);
2476  return *this;
2477  }
2478 
2479 
2480 
2481  /* ----------------------------------------------------
2482  Substitute string pair with a transition graph
2483  ---------------------------------------------------- */
2484 
2485  protected:
2486  /* Used in function
2487  substitute(const StringPair&, HfstTransitionGraph&) */
2488  struct substitution_data
2489  {
2490  HfstState origin_state;
2491  HfstState target_state;
2492  typename C::WeightType weight;
2493  HfstTransitionGraph * substituting_graph;
2494 
2495  substitution_data(HfstState origin,
2496  HfstState target,
2497  typename C::WeightType weight,
2498  HfstTransitionGraph * substituting)
2499  {
2500  origin_state=origin;
2501  target_state=target;
2502  this->weight=weight;
2503  substituting_graph=substituting;
2504  }
2505  };
2506 
2507  /* Used in function substitute(const StringPair&,
2508  HfstTransitionGraph&)
2509  Add a copy of substituting graph with epsilon transitions between
2510  states and with weight as defined in \a sub. */
2511  void add_substitution(const substitution_data &sub) {
2512  // Epsilon transition to initial state of \a graph
2513  HfstState s = add_state();
2514  HfstTransition <C> epsilon_transition
2515  (s, C::get_epsilon(), C::get_epsilon(),
2516  sub.weight);
2517  add_transition(sub.origin_state, epsilon_transition);
2518 
2519  /* Offset between state numbers */
2520  unsigned int offset = s;
2521 
2522  // Copy \a graph
2523  const HfstTransitionGraph * graph = sub.substituting_graph;
2524  HfstState source_state=0;
2525  for (const_iterator it = graph->begin();
2526  it != graph->end(); it++)
2527  {
2528  for (typename HfstTransitions::const_iterator tr_it
2529  = it->begin();
2530  tr_it != it->end(); tr_it++)
2531  {
2532  C data = tr_it->get_transition_data();
2533 
2534  HfstTransition <C> transition
2535  (tr_it->get_target_state() + offset,
2536  data.get_input_symbol(),
2537  data.get_output_symbol(),
2538  data.get_weight());
2539 
2540  add_transition(source_state + offset, transition);
2541  }
2542  source_state++;
2543  }
2544 
2545  // Epsilon transitions from final states of \a graph
2546  for (typename FinalWeightMap::const_iterator it
2547  = graph->final_weight_map.begin();
2548  it != graph->final_weight_map.end(); it++)
2549  {
2550  HfstTransition <C> epsilon_transition
2551  (sub.target_state, C::get_epsilon(), C::get_epsilon(),
2552  it->second);
2553  add_transition(it->first + offset, epsilon_transition);
2554  }
2555  }
2556 
2557 
2558  public:
2559 
2578  HFSTDLL HfstTransitionGraph &
2580  const HfstTransitionGraph &graph) {
2581 
2582  if ( ! ( C::is_valid_symbol(sp.first) &&
2583  C::is_valid_symbol(sp.second) ) ) {
2585  (EmptyStringException,
2586  "HfstTransitionGraph::substitute(const HfstSymbolPair&, "
2587  "const HfstTransitionGraph&)");
2588  }
2589 
2590 
2591  // If neither symbol to be substituted is known to the graph,
2592  // do nothing.
2593  if (alphabet.find(sp.first) == alphabet.end() &&
2594  alphabet.find(sp.second) == alphabet.end())
2595  return *this;
2596 
2597  // Where the substituting copies of substituting graphs
2598  // are inserted (source state, target state, weight)
2599  std::vector<substitution_data> substitutions;
2600 
2601  // Go through all states
2602  HfstState source_state=0;
2603  for (iterator it = begin(); it != end(); it++)
2604  {
2605 
2606  // The transitions that are substituted, i.e. removed
2607  std::vector<typename HfstTransitions::iterator>
2608  old_transitions;
2609 
2610  // Go through all transitions
2611  for (typename HfstTransitions::iterator tr_it
2612  = it->begin();
2613  tr_it != it->end(); tr_it++)
2614  {
2615  C data = tr_it->get_transition_data();
2616 
2617  // Whether there is anything to substitute
2618  // in this transition
2619  if (data.get_input_symbol() == sp.first &&
2620  data.get_output_symbol() == sp.second)
2621  {
2622  // schedule a substitution
2623  substitutions.push_back(substitution_data
2624  (source_state,
2625  tr_it->get_target_state(),
2626  data.get_weight(),
2627  const_cast<HfstTransitionGraph *>(&graph)));
2628  // schedule the old transition to be deleted
2629  old_transitions.push_back(tr_it);
2630  }
2631  // (one transition gone through)
2632  }
2633  // (all transitions in a state gone through)
2634 
2635  // Remove the substituted transitions
2636  for (typename std::vector<typename
2637  HfstTransitions::iterator>::iterator IT =
2638  old_transitions.begin();
2639  IT != old_transitions.end(); IT++) {
2640  it->erase(*IT);
2641  }
2642 
2643  source_state++;
2644  }
2645  // (all states gone trough)
2646 
2647  // Add the substitutions
2648  for (typename std::vector<substitution_data>::iterator IT
2649  = substitutions.begin();
2650  IT != substitutions.end(); IT++)
2651  {
2652  add_substitution(*IT);
2653  }
2654  return *this;
2655  }
2656 
2657 
2658 
2659 
2660 
2661 
2662 
2663 
2664 
2665 
2666 
2667 
2668 
2669 
2670  HFSTDLL std::string weight2marker(float weight)
2671  {
2672  std::ostringstream o;
2673  o << weight;
2674  return std::string("@") + o.str() + std::string("@");
2675  }
2676 
2677  HFSTDLL HfstTransitionGraph & substitute_weights_with_markers() {
2678 
2679  // Go through all current states (we are going to add them)
2680  HfstState limit = state_vector.size();
2681  for (HfstState state = 0; state < limit; state++)
2682  {
2683  // The transitions that are substituted
2684  std::stack<typename HfstTransitions::iterator>
2685  old_transitions;
2686  // The transitions that will substitute
2687  std::vector<HfstTransition <C> > new_transitions;
2688 
2689  // Go through all transitions
2690  for (typename HfstTransitions::iterator tr_it
2691  = state_vector[state].begin();
2692  tr_it != state_vector[state].end(); tr_it++)
2693  {
2694  C data = tr_it->get_transition_data();
2695 
2696  // Whether there is anything to substitute
2697  // in this transition
2698  if (data.get_weight() != 0 )
2699  {
2700  // schedule a substitution
2701  new_transitions.push_back
2702  (HfstTransition <C> (tr_it->get_target_state(),
2703  data.get_input_symbol(),
2704  data.get_output_symbol(),
2705  data.get_weight()));
2706  // schedule the old transition to be deleted
2707  old_transitions.push(tr_it);
2708  }
2709  // (one transition gone through)
2710  }
2711  // (all transitions in a state gone through)
2712 
2713  // Remove the substituted transitions
2714  while (! old_transitions.empty()) {
2715  state_vector[state].erase(old_transitions.top());
2716  old_transitions.pop();
2717  }
2718 
2719  // Add the substituting transitions
2720  for (typename std::vector<HfstTransition <C> >::iterator IT
2721  = new_transitions.begin();
2722  IT != new_transitions.end(); IT++)
2723  {
2724  HfstState new_state = add_state();
2725  std::string marker = weight2marker(IT->get_weight());
2726  //std::cerr << "got marker '" << marker << "'" << std::endl;
2727  HfstTransition <C> marker_transition(IT->get_target_state(),
2728  marker,
2729  marker,
2730  0);
2731  HfstTransition <C> new_transition(new_state,
2732  IT->get_input_symbol(),
2733  IT->get_output_symbol(),
2734  0);
2735  add_transition(state, new_transition);
2736  add_transition(new_state, marker_transition);
2737  }
2738 
2739  }
2740  // (all states gone trough)
2741 
2742  // Go through the final states
2743  std::set<HfstState> final_states_to_remove;
2744 
2745  for (typename FinalWeightMap::iterator fin_it = final_weight_map.begin();
2746  fin_it != final_weight_map.end(); fin_it++)
2747  {
2748  if (fin_it->second != 0)
2749  {
2750  HfstState new_state = add_state();
2751  set_final_weight(new_state, 0);
2752  std::string marker = weight2marker(fin_it->second);
2753  HfstTransition <C> epsilon_transition(new_state,
2754  marker,
2755  marker,
2756  0);
2757  add_transition(fin_it->first, epsilon_transition);
2758  final_states_to_remove.insert(fin_it->first);
2759  }
2760  }
2761  for (std::set<HfstState>::iterator it = final_states_to_remove.begin();
2762  it != final_states_to_remove.end(); it++)
2763  {
2764  final_weight_map.erase(*it);
2765  }
2766 
2767  return *this;
2768  }
2769 
2770  // ####
2771  // another version of substitute for internal use..
2772  // ####
2773  typedef std::map<HfstSymbol, HfstTransitionGraph> SubstMap;
2774 
2775  HFSTDLL HfstTransitionGraph &
2776  substitute(SubstMap & substitution_map,
2777  bool harmonize) {
2778 
2779  bool symbol_found = false;
2780  for (typename SubstMap::const_iterator it = substitution_map.begin();
2781  it != substitution_map.end(); it++)
2782  {
2783  if ( ! ( C::is_valid_symbol(it->first) ))
2784  {
2785  HFST_THROW_MESSAGE(EmptyStringException,
2786  "HfstTransitionGraph::substitute "
2787  "(const std::map<HfstSymbol, HfstTransitionGraph> &)");
2788  }
2789  if (!symbol_found && alphabet.find(it->first) != alphabet.end())
2790  {
2791  symbol_found = true;
2792  }
2793  }
2794 
2795  // If none of the symbols to be substituted is known to the graph,
2796  // do nothing.
2797  if (!symbol_found)
2798  {
2799  return *this;
2800  }
2801 
2802  std::set<String> substitutions_performed_for_symbols;
2803 
2804  // Where the substituting copies of graphs
2805  // are inserted (source state, target state, weight)
2806  std::vector<substitution_data> substitutions;
2807 
2808  // Go through all states
2809  HfstState source_state=0;
2810  for (iterator it = begin(); it != end(); it++)
2811  {
2812 
2813  // The transitions that are substituted, i.e. removed
2814  std::stack<typename HfstTransitions::iterator>
2815  old_transitions;
2816 
2817  // Go through all transitions
2818  for (typename HfstTransitions::iterator tr_it
2819  = it->begin();
2820  tr_it != it->end(); tr_it++)
2821  {
2822  C data = tr_it->get_transition_data();
2823 
2824  // Whether there is anything to substitute
2825  // in this transition
2826  String istr = data.get_input_symbol();
2827  String ostr = data.get_output_symbol();
2828  typename SubstMap::iterator map_it_input = substitution_map.find(istr);
2829  typename SubstMap::iterator map_it_output = substitution_map.find(ostr);
2830 
2831  if (map_it_input == substitution_map.end() &&
2832  map_it_output == substitution_map.end())
2833  {
2834  ;
2835  }
2836  else if (istr != ostr)
2837  {
2838  std::string msg("symbol to be substituted must not occur only on one side of transition");
2840  }
2841  else
2842  {
2843  // schedule a substitution
2844  substitution_data sd
2845  (source_state,
2846  tr_it->get_target_state(),
2847  data.get_weight(),
2848  &(map_it_input->second));
2849  substitutions.push_back(sd);
2850  // schedule the old transition to be deleted
2851  old_transitions.push(tr_it);
2852  // ...
2853  substitutions_performed_for_symbols.insert(istr);
2854  }
2855  // (one transition gone through)
2856  }
2857  // (all transitions in a state gone through)
2858 
2859  // Remove the substituted transitions
2860  while (!old_transitions.empty())
2861  {
2862  it->erase(old_transitions.top());
2863  old_transitions.pop();
2864  }
2865 
2866  source_state++;
2867  }
2868  // (all states gone trough)
2869 
2870  // Remove all symbols that were substituted
2871  for (StringSet::const_iterator sym_it = substitutions_performed_for_symbols.begin();
2872  sym_it != substitutions_performed_for_symbols.end(); sym_it++)
2873  {
2874  if (*sym_it != "@_EPSILON_SYMBOL_@" && *sym_it != "@_UNKNOWN_SYMBOL_@" && *sym_it != "@_IDENTITY_SYMBOL_@")
2875  this->remove_symbol_from_alphabet(*sym_it);
2876  }
2877 
2878  // Harmonize the resulting and the substituting graphs, if needed
2879  if (harmonize)
2880  {
2881  for (StringSet::iterator sym_it = substitutions_performed_for_symbols.begin();
2882  sym_it != substitutions_performed_for_symbols.end(); sym_it++)
2883  {
2884  this->harmonize(substitution_map.at(*sym_it));
2885  }
2886  }
2887 
2888  // Add the substitutions
2889  for (typename std::vector<substitution_data>::iterator IT
2890  = substitutions.begin();
2891  IT != substitutions.end(); IT++)
2892  {
2893  add_substitution(*IT);
2894  }
2895 
2896  return *this;
2897  }
2898 
2899 
2900 
2901  HFSTDLL bool marker2weight(const std::string & str, float & weight)
2902  {
2903  if (str.size() < 3)
2904  return false;
2905  if (str.at(0) != '@' || str.at(str.size()-1) != '@')
2906  return false;
2907 
2908  std::string weight_string = str.substr(1, str.size()-2);
2909  std::stringstream sstream(weight_string);
2910  sstream >> weight;
2911  if (sstream.fail()) {
2912  return false;
2913  }
2914  return true;
2915  }
2916 
2917  HFSTDLL HfstTransitionGraph & substitute_markers_with_weights() {
2918 
2919  // Go through all states
2920  HfstState limit = state_vector.size();
2921  for (HfstState state = 0; state < limit; state++)
2922  {
2923  // The transitions that are substituted
2924  std::stack<typename HfstTransitions::iterator>
2925  old_transitions;
2926  // The transitions that will substitute
2927  std::vector<HfstTransition <C> > new_transitions;
2928 
2929  // Go through all transitions
2930  for (typename HfstTransitions::iterator tr_it
2931  = state_vector[state].begin();
2932  tr_it != state_vector[state].end(); tr_it++)
2933  {
2934  C data = tr_it->get_transition_data();
2935 
2936  float weight = 0;
2937  // Whether there is anything to substitute
2938  // in this transition
2939  if ( (!marker2weight(data.get_input_symbol(), weight)) &&
2940  marker2weight(data.get_output_symbol(), weight) )
2941  {
2942  //std::cerr << "got weight '" << weight << "'" << std::endl;
2943  // schedule a substitution
2944  new_transitions.push_back
2945  (HfstTransition <C> (tr_it->get_target_state(),
2946  data.get_input_symbol(),
2947  hfst::internal_epsilon,
2948  weight));
2949  // schedule the old transition to be deleted
2950  old_transitions.push(tr_it);
2951  }
2952  // or the transition must be deleted
2953  else if (marker2weight(data.get_input_symbol(), weight) &&
2954  marker2weight(data.get_output_symbol(), weight) )
2955  {
2956  //std::cerr << "got weight '" << weight << "'" << std::endl;
2957  // schedule the old transition to be deleted
2958  old_transitions.push(tr_it);
2959  }
2960  else {}
2961  // (one transition gone through)
2962  }
2963  // (all transitions in a state gone through)
2964 
2965  // Remove the substituted transitions
2966  while (! old_transitions.empty()) {
2967  state_vector[state].erase(old_transitions.top());
2968  old_transitions.pop();
2969  }
2970 
2971  // Add the substituting transitions
2972  for (typename std::vector<HfstTransition <C> >::iterator IT
2973  = new_transitions.begin();
2974  IT != new_transitions.end(); IT++)
2975  {
2976  state_vector[state].push_back(*IT);
2977  }
2978 
2979  }
2980  // (all states gone trough)
2981 
2982  std::stack<StringSet::iterator> weight_markers;
2983  for (StringSet::iterator it = alphabet.begin();
2984  it != alphabet.end(); it++)
2985  {
2986  float foo;
2987  if (marker2weight(*it, foo))
2988  {
2989  weight_markers.push(it);
2990  }
2991  }
2992  while (! weight_markers.empty())
2993  {
2994  alphabet.erase(weight_markers.top());
2995  weight_markers.pop();
2996  }
2997 
2998  return *this;
2999  }
3000 
3001 
3002  // aliases
3003  HFSTDLL HfstTransitionGraph & substitute_symbol(const std::string &old_symbol, const std::string &new_symbol, bool input_side=true, bool output_side=true)
3004  { return this->substitute(old_symbol, new_symbol, input_side, output_side); }
3005 
3006  HFSTDLL HfstTransitionGraph & substitute_symbol_pair(const StringPair &old_symbol_pair, const StringPair &new_symbol_pair)
3007  { return this->substitute(old_symbol_pair, new_symbol_pair); }
3008 
3009  HFSTDLL HfstTransitionGraph & substitute_symbol_pair_with_set(const StringPair &old_symbol_pair, const hfst::StringPairSet &new_symbol_pair_set)
3010  { return this->substitute(old_symbol_pair, new_symbol_pair_set); }
3011 
3012  HFSTDLL HfstTransitionGraph & substitute_symbol_pair_with_transducer(const StringPair &symbol_pair, HfstTransitionGraph &transducer)
3013  { return this->substitute(symbol_pair, transducer); }
3014 
3015 
3016  /* ----------------------------
3017  Insert freely functions
3018  ---------------------------- */
3019 
3020 
3024  (const HfstSymbolPair &symbol_pair, typename C::WeightType weight)
3025  {
3026  if ( ! ( C::is_valid_symbol(symbol_pair.first) &&
3027  C::is_valid_symbol(symbol_pair.second) ) ) {
3029  (EmptyStringException,
3030  "HfstTransitionGraph::insert_freely"
3031  "(const HfstSymbolPair&, W)");
3032  }
3033 
3034  alphabet.insert(symbol_pair.first);
3035  alphabet.insert(symbol_pair.second);
3036 
3037  HfstState source_state=0;
3038  for (iterator it = begin(); it != end(); it++) {
3039  HfstTransition <C> tr( source_state, symbol_pair.first,
3040  symbol_pair.second, weight );
3041  it->push_back(tr);
3042  source_state++;
3043  }
3044 
3045  return *this;
3046  }
3047 
3051  (const HfstSymbolPairSet &symbol_pairs,
3052  typename C::WeightType weight)
3053  {
3054  for (typename HfstSymbolPairSet::const_iterator symbols_it
3055  = symbol_pairs.begin();
3056  symbols_it != symbol_pairs.end(); symbols_it++)
3057  {
3058  if ( ! ( C::is_valid_symbol(symbols_it->first) &&
3059  C::is_valid_symbol(symbols_it->second) ) ) {
3061  (EmptyStringException,
3062  "HfstTransitionGraph::insert_freely"
3063  "(const HfstSymbolPairSet&, W)");
3064  }
3065 
3066  alphabet.insert(symbols_it->first);
3067  alphabet.insert(symbols_it->second);
3068  }
3069 
3070  HfstState source_state=0;
3071  for (iterator it = begin(); it != end(); it++)
3072  {
3073  for (typename HfstSymbolPairSet::const_iterator symbols_it
3074  = symbol_pairs.begin();
3075  symbols_it != symbol_pairs.end(); symbols_it++)
3076  {
3077  HfstTransition <C> tr( source_state, symbols_it->first,
3078  symbols_it->second, weight );
3079  it->push_back(tr);
3080  }
3081  source_state++;
3082  }
3083 
3084  return *this;
3085  }
3086 
3090  (const HfstTransitionGraph &graph)
3091  {
3092  HfstSymbol marker_this = C::get_marker(alphabet);
3093  HfstSymbol marker_graph = C::get_marker(alphabet);
3094  HfstSymbol marker = marker_this;
3095  if (marker_graph > marker)
3096  marker = marker_graph;
3097 
3098  HfstSymbolPair marker_pair(marker, marker);
3099  insert_freely(marker_pair, 0);
3100  substitute(marker_pair, graph);
3101  alphabet.erase(marker); // TODO: fix
3102 
3103  return *this;
3104  }
3105 
3106 
3107  /* -------------------------------
3108  Harmonization function
3109  ------------------------------- */
3110 
3138  {
3139  HarmonizeUnknownAndIdentitySymbols foo(*this, another);
3140  return *this;
3141  }
3142 
3143 
3144  /* -------------------------------
3145  Disjunction functions
3146  ------------------------------- */
3147 
3148  protected:
3149  /* Disjunct the transition of path \a spv pointed by \a it
3150  to state \a s. If the transition does not exist in the graph,
3151  it is created as well as its target state.
3152 
3153  @return The final state of path \a spv, when \a it is at end. */
3154  HfstState disjunct(const StringPairVector &spv,
3155  StringPairVector::const_iterator &it,
3156  HfstState s)
3157  {
3158  // Path inserted, return the final state on this path
3159  if (it == spv.end()) {
3160  return s;
3161  }
3162 
3163  HfstTransitions tr = state_vector[s];
3164  bool transition_found=false;
3165  /* The target state of the transition followed or added */
3166  HfstState next_state;
3167 
3168  // Find the transition
3169  // (Searching is slow?)
3170  for (typename HfstTransitions::iterator tr_it = tr.begin();
3171  tr_it != tr.end(); tr_it++)
3172  {
3173  C data = tr_it->get_transition_data();
3174  if (data.get_input_symbol().compare(it->first) == 0 &&
3175  data.get_output_symbol().compare(it->second) == 0)
3176  {
3177  transition_found=true;
3178  next_state = tr_it->get_target_state();
3179  break;
3180  }
3181  }
3182 
3183  // If not found, create the transition
3184  if (! transition_found)
3185  {
3186  next_state = add_state();
3187  HfstTransition <C> transition(next_state, it->first,
3188  it->second, 0);
3189  add_transition(s, transition);
3190  }
3191 
3192  // Advance to the next transition on path
3193  it++;
3194  return disjunct(spv, it, next_state);
3195  }
3196 
3197  public:
3198 
3218  HFSTDLL HfstTransitionGraph &disjunct
3219  (const StringPairVector &spv, typename C::WeightType weight)
3220  {
3221  StringPairVector::const_iterator it = spv.begin();
3222  HfstState final_state = disjunct(spv, it, INITIAL_STATE);
3223 
3224  // Set the weight of final state
3225  if (is_final_state(final_state))
3226  {
3227  float old_weight = get_final_weight(final_state);
3228  if (old_weight < weight)
3229  return *this; /* The same path with smaller weight remains */
3230  }
3231  set_final_weight(final_state, weight);
3232  return *this;
3233  }
3234 
3235  HFSTDLL bool is_special_symbol(const std::string & symbol)
3236  {
3237  if (symbol.size() < 2)
3238  return false;
3239  if (symbol[0] == '@' && symbol[1] == '_')
3240  return true;
3241  return false;
3242  }
3243 
3244  HFSTDLL HfstTransitionGraph &complete()
3245  {
3246  HfstState failure_state = add_state();
3247  HfstState current_state = 0;
3248 
3249  for (iterator it = begin(); it != end(); it++)
3250  {
3251  std::set<HfstSymbol> symbols_present;
3252 
3253  for (typename HfstTransitions::iterator tr_it
3254  = it->begin();
3255  tr_it != it->end(); tr_it++)
3256  {
3257  C data = tr_it->get_transition_data();
3258 
3259  if (data.get_input_symbol() != data.get_output_symbol())
3260  {
3262  }
3263  symbols_present.insert(data.get_input_symbol());
3264  }
3265  for (std::set<std::string>::const_iterator alpha_it = alphabet.begin();
3266  alpha_it != alphabet.end(); alpha_it++)
3267  {
3268  if (symbols_present.find(*alpha_it) ==
3269  symbols_present.end() &&
3270  ! is_special_symbol(*alpha_it))
3271  {
3273  (current_state,
3274  HfstBasicTransition(failure_state, *alpha_it, *alpha_it, 0));
3275  }
3276  }
3277  current_state++;
3278  }
3279  return *this;
3280  }
3281 
3282  HFSTDLL StringSet get_flags() const
3283  {
3284  StringSet flags;
3285  for (StringSet::const_iterator it = alphabet.begin();
3286  it != alphabet.end(); it++)
3287  {
3288  if (FdOperation::is_diacritic(*it)) {
3289  flags.insert(*it);
3290  }
3291  }
3292  return flags;
3293  }
3294 
3295  // Whether symbol \a symbol must be purged from transitions and alphabet
3296  // of a transducer after \a flag has been eliminated from the transducer.
3297  // If \a flag is the empty string, all flags have been eliminated.
3298  HFSTDLL bool purge_symbol(const std::string & symbol, const std::string & flag)
3299  {
3300  if (! FdOperation::is_diacritic(symbol))
3301  return false;
3302  if (flag == "")
3303  return true;
3304  else if (FdOperation::get_feature(symbol) == flag)
3305  return true;
3306  return false;
3307  }
3308 
3309  // Replace arcs in \a transducer that use flag \a flag with epsilon arcs
3310  // and remove \a flag from alphabet of \a transducer. If \a flag is the empty
3311  // string, replace/remove all flags.
3312  HFSTDLL void flag_purge(const std::string & flag)
3313  {
3314  // (1) Go through all states and transitions
3315  for (iterator it = begin(); it != end(); it++)
3316  {
3317  for (unsigned int i=0; i < it->size(); i++)
3318  {
3319  HfstTransition<C> &tr_it = it->operator[](i);
3320 
3321  if ( purge_symbol(tr_it.get_input_symbol(), flag) ||
3322  purge_symbol(tr_it.get_output_symbol(), flag) )
3323  {
3324  // change the current transition
3325  HfstTransition<C> tr
3326  (tr_it.get_target_state(), "@_EPSILON_SYMBOL_@",
3327  "@_EPSILON_SYMBOL_@", tr_it.get_weight());
3328  it->operator[](i) = tr;
3329  }
3330  }
3331  }
3332  // (2) Go through the alphabet
3333  StringSet extra_symbols;
3334  for (StringSet::const_iterator it = alphabet.begin();
3335  it != alphabet.end(); it++)
3336  {
3337  if (purge_symbol(*it, flag))
3338  extra_symbols.insert(*it);
3339  }
3340  // remove symbols
3341  remove_symbols_from_alphabet(extra_symbols);
3342  }
3343 
3344  /* A topological sort. */
3345  struct TopologicalSort
3346  {
3347  std::vector<int> distance_of_state;
3348  std::vector<std::set<HfstState> > states_at_distance;
3349 
3350  /* Initialize the TopologicalSort by reserving space for a transducer
3351  with biggest state number \a biggest_state_number, */
3352  HFSTDLL void set_biggest_state_number(unsigned int biggest_state_number)
3353  {
3354  distance_of_state = std::vector<int>(biggest_state_number+1, -1);
3355  }
3356 
3357  /* Set the maximum distance of \a state to \a distance, */
3358  HFSTDLL void set_state_at_distance(HfstState state, unsigned int distance,
3359  bool overwrite)
3360  {
3361  // see that 'state' does not exceed the maximum state number given in initialization
3362  if (state > (distance_of_state.size() - 1))
3363  {
3364  std::cerr << "ERROR in TopologicalSort::set_state_at_distance: first argument ("
3365  << state << ") is out of range (should be < " << distance_of_state.size()
3366  << ")" << std::endl;
3367  }
3368  // if there is nothing on index 'state',
3369  // push back empty sets of states up to index 'state', including
3370  while (distance + 1 > (unsigned int)states_at_distance.size())
3371  {
3372  std::set<HfstState> empty_set;
3373  states_at_distance.push_back(empty_set);
3374  }
3375  // if there was previous distance defined for 'state', erase it, if needed
3376  int previous_distance = distance_of_state.at(state);
3377  if (previous_distance != -1 && previous_distance != (int)distance && overwrite)
3378  {
3379  states_at_distance.at(previous_distance).erase(state);
3380  }
3381  // set state and distance
3382  states_at_distance.at(distance).insert(state);
3383  distance_of_state.at(state) = distance;
3384  }
3385 
3386  /* The states that have a maximum distance of \a distance. */
3387  HFSTDLL const std::set<HfstState> & get_states_at_distance(unsigned int distance)
3388  {
3389  // if there is nothing on index 'state',
3390  // push back empty sets of states up to index 'state', including
3391  while (distance > (states_at_distance.size() - 1))
3392  {
3393  std::set<HfstState> empty_set;
3394  states_at_distance.push_back(empty_set);
3395  }
3396  return states_at_distance.at(distance);
3397  }
3398  };
3399 
3400  enum SortDistance { MaximumDistance, MinimumDistance };
3401 
3402  /*
3403  Get a topological (maximum distance) sort of this graph.
3404  @return A vector of sets of states. At each vector index ind, the
3405  result contains the set of all states whose (maximum) distance from
3406  the start state is ind.
3407  */
3408  HFSTDLL std::vector<std::set<HfstState> > topsort(SortDistance dist) const
3409  {
3410  typedef std::set<HfstState>::const_iterator StateIt;
3411  unsigned int current_distance = 0; // topological distance
3412  TopologicalSort TopSort;
3413  TopSort.set_biggest_state_number(state_vector.size()-1);
3414  TopSort.set_state_at_distance(0,current_distance,(dist == MaximumDistance));
3415  bool new_states_found = false; // end condition for do-while loop
3416 
3417  do
3418  {
3419  new_states_found = false;
3420  // states that are accessible from the current set of states
3421  std::set<HfstState> new_states;
3422 
3423  // go through all states at current distance
3424  const std::set<HfstState> & states =
3425  TopSort.get_states_at_distance(current_distance);
3426  for (StateIt state_it = states.begin();
3427  state_it != states.end(); state_it++)
3428  {
3429  // go through all transitions of each state
3430  const HfstTransitions & transitions
3431  = this->state_vector.at(*state_it);
3432  for (typename HfstTransitions::const_iterator transition_it
3433  = transitions.begin();
3434  transition_it != transitions.end(); transition_it++)
3435  {
3436  new_states_found = true;
3437  new_states.insert(transition_it->get_target_state());
3438  }
3439  // all transitions gone through
3440  }
3441  // all states gone through
3442 
3443  // set each accessible state at distance one higher than the
3444  // current distance
3445  for (StateIt it = new_states.begin();
3446  it != new_states.end(); it++)
3447  {
3448  TopSort.set_state_at_distance(*it, current_distance + 1, (dist == MaximumDistance));
3449  }
3450  current_distance++;
3451  }
3452  while (new_states_found);
3453 
3454  return TopSort.states_at_distance;
3455  }
3456 
3459  HFSTDLL int longest_path_size()
3460  {
3461  // get topological maximum distance sort
3462  std::vector<std::set<HfstState> > states_sorted = this->topsort(MaximumDistance);
3463  // go through all sets of states in descending order
3464  for (int distance = states_sorted.size() - 1; distance >= 0; distance--)
3465  {
3466  const std::set<HfstState> & states
3467  = states_sorted.at((unsigned int)distance);
3468  // go through all states in a set
3469  for (std::set<HfstState>::const_iterator it = states.begin();
3470  it != states.end(); it++)
3471  {
3472  // if a final state is encountered, return the distance
3473  // of that state
3474  if (is_final_state(*it))
3475  {
3476  return distance;
3477  }
3478  }
3479  }
3480  // if no final states were encountered, return a negative value
3481  return -1;
3482  }
3483 
3486  HFSTDLL std::vector<unsigned int> path_sizes()
3487  {
3488  std::vector<unsigned int> result;
3489  // get topological maximum distance sort
3490  std::vector<std::set<HfstState> > states_sorted = this->topsort(MinimumDistance);
3491  // go through all sets of states in descending order
3492  for (int distance = states_sorted.size() - 1; distance >= 0; distance--)
3493  {
3494  const std::set<HfstState> & states
3495  = states_sorted.at((unsigned int)distance);
3496  // go through all states in a set
3497  for (std::set<HfstState>::const_iterator it = states.begin();
3498  it != states.end(); it++)
3499  {
3500  // if a final state is encountered, add its distance
3501  // to result
3502  if (is_final_state(*it))
3503  {
3504  result.push_back((unsigned int)distance);
3505  break; // go to next set of states
3506  }
3507  }
3508  }
3509  return result;
3510  }
3511 
3512  bool has_negative_epsilon_cycles
3513  (HfstState state,
3514  float total_weight,
3515  std::map<HfstState, float> & state_weights)
3516  {
3517  std::map<HfstState, float>::const_iterator it = state_weights.find(state);
3518  if (it != state_weights.end()) // cycle detected
3519  {
3520  if (total_weight - it->second < 0)
3521  {
3522  return true; // cycle with negative weight detected
3523  }
3524  return false; // cycle with positive weight
3525  }
3526  state_weights[state] = total_weight;
3527 
3528  // Go through all transitions in this state
3529  const HfstBasicTransducer::HfstTransitions &transitions
3530  = this->operator[](state);
3531  for (HfstBasicTransducer::HfstTransitions::const_iterator it
3532  = transitions.begin();
3533  it != transitions.end(); it++)
3534  {
3535  if (is_epsilon(it->get_input_symbol()) && is_epsilon(it->get_output_symbol()))
3536  {
3537  if (has_negative_epsilon_cycles
3538  (it->get_target_state(), total_weight + it->get_weight(), state_weights))
3539  return true;
3540  }
3541  }
3542  state_weights.erase(state);
3543  return false;
3544  }
3545 
3546  bool has_negative_epsilon_cycles()
3547  {
3548  bool has_negative_epsilon_transitions = false;
3549  for (iterator it = begin(); it != end(); it++)
3550  {
3551  for (typename HfstTransitions::iterator tr_it
3552  = it->begin();
3553  tr_it != it->end(); tr_it++)
3554  {
3555  if (is_epsilon(tr_it->get_input_symbol()) && is_epsilon(tr_it->get_output_symbol()) && tr_it->get_weight() < 0)
3556  {
3557  has_negative_epsilon_transitions = true;
3558  break;
3559  }
3560  }
3561  }
3562  if (! has_negative_epsilon_transitions)
3563  {
3564  return false;
3565  }
3566 
3567  std::map<HfstState, float> state_weights;
3568  for (unsigned int state = INITIAL_STATE; state < (this->get_max_state()+1); state++)
3569  {
3570  if (has_negative_epsilon_cycles(state, 0, state_weights))
3571  return true;
3572  }
3573  return false;
3574  }
3575 
3576  HFSTDLL bool is_infinitely_ambiguous
3577  (HfstState state,
3578  std::set<HfstState> &epsilon_path_states,
3579  std::vector<unsigned int> &states_handled)
3580  {
3581  if (states_handled[state] != 0)
3582  return false;
3583 
3584  // Go through all transitions in this state
3585  const HfstBasicTransducer::HfstTransitions &transitions
3586  = this->operator[](state);
3587  for (HfstBasicTransducer::HfstTransitions::const_iterator it
3588  = transitions.begin();
3589  it != transitions.end(); it++)
3590  {
3591  // (Diacritics are also treated as epsilons, although it might cause false
3592  // positive results, because loops with diacritics can be invalidated by
3593  // other diacritics.)
3594  if ( is_epsilon(it->get_input_symbol()) ||
3595  FdOperation::is_diacritic(it->get_input_symbol()) )
3596  {
3597  epsilon_path_states.insert(state);
3598  if (epsilon_path_states.find(it->get_target_state())
3599  != epsilon_path_states.end())
3600  {
3601  return true;
3602  }
3603  if (is_infinitely_ambiguous
3604  (it->get_target_state(), epsilon_path_states, states_handled))
3605  {
3606  return true;
3607  }
3608  epsilon_path_states.erase(state);
3609  }
3610  }
3611  // mark state as handled
3612  states_handled[state] = 1;
3613  return false;
3614  }
3615 
3616  HFSTDLL bool is_infinitely_ambiguous()
3617  {
3618  std::set<HfstState> epsilon_path_states;
3619  HfstState max_state = this->get_max_state();
3620  std::vector<unsigned int> states_handled(max_state+1, 0);
3621 
3622  for (unsigned int state = INITIAL_STATE; state < (max_state+1); state++)
3623  {
3624  if (is_infinitely_ambiguous(state, epsilon_path_states, states_handled))
3625  return true;
3626  }
3627  return false;
3628  }
3629 
3630  HFSTDLL bool is_lookup_infinitely_ambiguous
3631  (const HfstOneLevelPath& s,
3632  unsigned int& index, HfstState state,
3633  std::set<HfstState> &epsilon_path_states
3634  )
3635  {
3636  // Whether the end of the lookup path s has been reached
3637  bool only_epsilons=false;
3638  if ((unsigned int)s.second.size() == index)
3639  {
3640  only_epsilons=true;
3641  }
3642 
3643  // Go through all transitions in this state
3644  const HfstBasicTransducer::HfstTransitions &transitions
3645  = this->operator[](state);
3646  for (HfstBasicTransducer::HfstTransitions::const_iterator it
3647  = transitions.begin();
3648  it != transitions.end(); it++)
3649  {
3650  // CASE 1: Input epsilons do not consume a symbol in the lookup path s,
3651  // so they can be added freely.
3652  // (Diacritics are also treated as epsilons, although it might cause false
3653  // positive results, because loops with diacritics can be invalidated by
3654  // other diacritics.)
3655  if ( is_epsilon(it->get_input_symbol()) ||
3656  FdOperation::is_diacritic(it->get_input_symbol()) )
3657  {
3658  epsilon_path_states.insert(state);
3659  if (epsilon_path_states.find(it->get_target_state())
3660  != epsilon_path_states.end())
3661  {
3662  return true;
3663  }
3664  if (is_lookup_infinitely_ambiguous
3665  (s, index, it->get_target_state(), epsilon_path_states))
3666  {
3667  return true;
3668  }
3669  epsilon_path_states.erase(state);
3670  }
3671 
3672  /* CASE 2: Other input symbols consume a symbol in the lookup path s,
3673  so they can be added only if the end of the lookup path s has not
3674  been reached. */
3675  else if (! only_epsilons)
3676  {
3677  bool continu = false;
3678  if (it->get_input_symbol().compare(s.second.at(index)) == 0)
3679  continu = true;
3680  else if ((it->get_input_symbol().compare("@_UNKNOWN_SYMBOL_@") ||
3681  it->get_input_symbol().compare("@_IDENTITY_SYMBOL_@"))
3682  &&
3683  (alphabet.find(s.second.at(index)) == alphabet.end()))
3684  {
3685  continu = true;
3686  }
3687 
3688  if (continu)
3689  {
3690  index++; // consume an input symbol in the lookup path s
3691  std::set<HfstState> empty_set;
3692  if (is_lookup_infinitely_ambiguous
3693  (s, index, it->get_target_state(), empty_set))
3694  {
3695  return true;
3696  }
3697  index--; // add the input symbol back to the lookup path s.
3698  }
3699  }
3700  }
3701  return false;
3702  }
3703 
3704  HFSTDLL bool is_lookup_infinitely_ambiguous(const HfstOneLevelPath & s)
3705  {
3706  std::set<HfstState> epsilon_path_states;
3707  epsilon_path_states.insert(0);
3708  unsigned int index=0;
3709 
3710  return is_lookup_infinitely_ambiguous(s, index, INITIAL_STATE,
3711  epsilon_path_states);
3712  }
3713 
3714  HFSTDLL bool is_lookup_infinitely_ambiguous(const StringVector & s)
3715  {
3716  std::set<HfstState> epsilon_path_states;
3717  epsilon_path_states.insert(0);
3718  unsigned int index=0;
3719  HfstOneLevelPath path((float)0, s);
3720 
3721  return is_lookup_infinitely_ambiguous(path, index, INITIAL_STATE,
3722  epsilon_path_states);
3723  }
3724 
3725 
3726 
3727  HFSTDLL static void push_back_to_two_level_path
3728  (HfstTwoLevelPath &path,
3729  const StringPair &sp,
3730  const float &weight)
3731  {
3732  path.second.push_back(sp);
3733  path.first = path.first + weight;
3734  }
3735 
3736  HFSTDLL static void pop_back_from_two_level_path
3737  (HfstTwoLevelPath &path,
3738  const float &weight)
3739  {
3740  path.second.pop_back();
3741  path.first = path.first - weight;
3742  }
3743 
3744  HFSTDLL static void add_to_results
3745  (HfstTwoLevelPaths &results,
3746  HfstTwoLevelPath &path_so_far,
3747  const float &final_weight,
3748  float * max_weight)
3749  {
3750  path_so_far.first = path_so_far.first + final_weight;
3751 
3752  if (max_weight == NULL) // no weight limitation given
3753  {
3754  results.insert(path_so_far);
3755  }
3756  else if (!(path_so_far.first > *max_weight)) // weight limitation not exceeded
3757  {
3758  results.insert(path_so_far);
3759  }
3760  else // weight limitation exceeded
3761  {
3762  ;
3763  }
3764  path_so_far.first = path_so_far.first - final_weight;
3765  }
3766 
3767  HFSTDLL static bool is_possible_transition
3768  (const HfstBasicTransition &transition,
3769  const StringVector &lookup_path,
3770  const unsigned int &lookup_index,
3771  const StringSet &alphabet,
3772  bool &input_symbol_consumed)
3773  {
3774  std::string isymbol = transition.get_input_symbol();
3775 
3776  // If we are not at the end of lookup_path,
3777  if (! (lookup_index == (unsigned int)lookup_path.size()))
3778  {
3779  // we can go further if the current symbol in lookup_path
3780  // matches to the input symbol of the transition, i.e
3781  // either the input symbol is the same as the current symbol
3782  if ( isymbol.compare(lookup_path.at(lookup_index)) == 0 ||
3783  // or the input symbol is the identity or unknown symbol and
3784  // the current symbol is not found in the alphabet
3785  // of the transducer.
3786  ( (is_identity(isymbol) || is_unknown(isymbol)) &&
3787  (alphabet.find(lookup_path.at(lookup_index))
3788  == alphabet.end()) )
3789  )
3790  {
3791  input_symbol_consumed=true;
3792  return true;
3793  }
3794  }
3795  // Whether there are more symbols in lookup_path or not,
3796  // we can always go further if the input symbol of the transition
3797  // is an epsilon or a flag diacritic.
3798  if ( is_epsilon(isymbol) ||
3799  FdOperation::is_diacritic(isymbol) )
3800  {
3801  input_symbol_consumed=false;
3802  return true;
3803  }
3804 
3805  // No matches.
3806  return false;
3807  }
3808 
3809  HFSTDLL void lookup_fd
3810  (const StringVector &lookup_path,
3811  HfstTwoLevelPaths &results,
3812  HfstState state,
3813  unsigned int lookup_index, // an iterator instead?
3814  HfstTwoLevelPath &path_so_far,
3815  StringSet &alphabet,
3816  HfstEpsilonHandler Eh,
3817  size_t infinite_cutoff,
3818  float * max_weight = NULL)
3819  {
3820  // Check whether the number of input epsilon cycles is exceeded
3821  if (! Eh.can_continue(state)) {
3822  return;
3823  }
3824  // Check whether the maximum weight is exceeded
3825  if (max_weight != NULL && path_so_far.first > *max_weight) {
3826  return;
3827  }
3828 
3829  // If we are at the end of lookup_path,
3830  if (lookup_index == lookup_path.size())
3831  {
3832  // and if the current state is final,
3833  if (this->is_final_state(state))
3834  {
3835  // path_so_far is a valid result if max_weight is not exceeded
3836  add_to_results
3837  (results, path_so_far, this->get_final_weight(state), max_weight);
3838  }
3839  }
3840 
3841  // Whether there are more symbols in lookup_path or not,
3842  // go through all transitions in the current state.
3843  const HfstBasicTransducer::HfstTransitions &transitions
3844  = this->operator[](state);
3845  for (HfstBasicTransducer::HfstTransitions::const_iterator it
3846  = transitions.begin();
3847  it != transitions.end(); it++)
3848  {
3849  bool input_symbol_consumed=false;
3850  if ( is_possible_transition
3851  (*it, lookup_path, lookup_index, alphabet,
3852  input_symbol_consumed) )
3853  {
3854  // update path_so_far and lookup_index
3855  std::string istr;
3856  std::string ostr;
3857 
3858  // identity symbol is replaced with the lookup symbol
3859  if (is_identity(it->get_input_symbol()))
3860  {
3861  istr = lookup_path.at(lookup_index);
3862  ostr = istr;
3863  }
3864  else
3865  {
3866  if (is_unknown(it->get_input_symbol()))
3867  istr = lookup_path.at(lookup_index);
3868  else
3869  istr = it->get_input_symbol();
3870 
3871  /*if (is_unknown(it->get_output_symbol()))
3872  ostr = std::string("?");
3873  else*/
3874  ostr = it->get_output_symbol();
3875  }
3876 
3877  push_back_to_two_level_path
3878  (path_so_far,
3879  StringPair(istr, ostr),
3880  it->get_weight());
3881 
3882  HfstEpsilonHandler * Ehp = NULL;
3883  if (input_symbol_consumed) {
3884  lookup_index++;
3885  Ehp = new HfstEpsilonHandler(infinite_cutoff);
3886  }
3887  else {
3888  Eh.push_back(state);
3889  Ehp = &Eh;
3890  }
3891 
3892  // call lookup for the target state of the transition
3893  lookup_fd(lookup_path, results, it->get_target_state(),
3894  lookup_index, path_so_far, alphabet, *Ehp, infinite_cutoff, max_weight);
3895 
3896  // return to the original values of path_so_far
3897  // and lookup_index
3898  if (input_symbol_consumed) {
3899  lookup_index--;
3900  delete Ehp;
3901  }
3902  else {
3903  // Eh.pop_back(); not needed because the destructor
3904  // of Eh is automatically called next
3905  }
3906 
3907  pop_back_from_two_level_path(path_so_far, it->get_weight());
3908  }
3909  }
3910 
3911  }
3912 
3913  HFSTDLL void lookup_fd
3914  (const StringVector &lookup_path,
3915  HfstTwoLevelPaths &results,
3916  size_t * infinite_cutoff = NULL,
3917  float * max_weight = NULL)
3918  {
3919  HfstState state = 0;
3920  unsigned int lookup_index = 0;
3921  HfstTwoLevelPath path_so_far;
3922  StringSet alphabet = this->get_alphabet();
3923  if (infinite_cutoff != NULL)
3924  {
3925  HfstEpsilonHandler Eh(*infinite_cutoff);
3926  lookup_fd(lookup_path, results, state, lookup_index, path_so_far,
3927  alphabet, Eh, *infinite_cutoff, max_weight);
3928  }
3929  else
3930  {
3931  HfstEpsilonHandler Eh(100000);
3932  lookup_fd(lookup_path, results, state, lookup_index, path_so_far,
3933  alphabet, Eh, 100000, max_weight);
3934  }
3935  }
3936 
3937 
3938  HFSTDLL void check_regexp_state_for_cycle(HfstState s, const std::set<HfstState> & states_visited)
3939  {
3940  if (states_visited.find(s) != states_visited.end())
3941  {
3942  throw "error: loop detected inside compile-replace regular expression";
3943  }
3944  }
3945 
3946  // Returns whether tr is "^]":"^]". If tr is not allowed, throws an error message.
3947  HFSTDLL bool check_regexp_transition_end(const HfstBasicTransition & tr, bool input_side)
3948  {
3949  std::string istr = tr.get_input_symbol();
3950  std::string ostr = tr.get_output_symbol();
3951 
3952  if (input_side && is_epsilon(istr))
3953  {}
3954  else if (!input_side && is_epsilon(ostr))
3955  {}
3956  else if ((input_side && is_special_symbol(istr)) || (!input_side && is_special_symbol(ostr)))
3957  {
3958  throw "error: special symbol detected in compile-replace regular expression";
3959  }
3960  else
3961  {}
3962 
3963  if ((input_side && ("^[" == istr)) || (!input_side && ("^[" == ostr)))
3964  {
3965  throw "error: ^[ detected inside compile-replace regular expression";
3966  }
3967  if ((input_side && ("^]" == istr)) || (!input_side && ("^]" == ostr)))
3968  {
3969  /*if (istr != ostr)
3970  {
3971  throw "error: ^] detected on only one side of transition inside compile-replace regular expression";
3972  }*/
3973  return true;
3974  }
3975  return false;
3976  // weights?
3977  // flag diacritics?
3978  }
3979 
3980  // If there is a "^[":"^[" transition leading to state \a s from some state
3981  // S and state S is included in \a states_visited and \a path and \a full_paths
3982  // are empty, this function can be used to find all (sub-)paths of form
3983  // [x:y]* "^]" (x and y cannot be "^]" or "^[") starting from state \a s. The resulting
3984  // paths are stored in \a full_paths. \a path is used to keep track of each path so
3985  // far. Weights are currently ignored.
3986  HFSTDLL void find_regexp_paths
3987  (HfstState s,
3988  std::set<HfstState> & states_visited,
3989  std::vector<std::pair<std::string, std::string> > & path,
3990  HfstReplacements & full_paths, bool input_side)
3991  {
3992  // no cycles allowed inside "^[" and "^]"
3993  check_regexp_state_for_cycle(s, states_visited);
3994  states_visited.insert(s);
3995 
3996  // go through all transitions
3997  const HfstBasicTransducer::HfstTransitions &transitions
3998  = this->operator[](s);
3999  for (HfstBasicTransducer::HfstTransitions::const_iterator it
4000  = transitions.begin();
4001  it != transitions.end(); it++)
4002  {
4003  // closing bracket..
4004  if (check_regexp_transition_end(*it, input_side)) // throws error message if *it is not a valid transition
4005  {
4006  // ..cannot lead to a state already visited..
4007  check_regexp_state_for_cycle(it->get_target_state(), states_visited);
4008  // ..but else we can add the expression that it ends to the results
4009  path.push_back(std::pair<std::string, std::string>(it->get_input_symbol(), it->get_output_symbol()));
4010  full_paths.push_back
4011  (HfstReplacement(it->get_target_state(), path));
4012  path.pop_back(); // remove closing bracket as we are not going to proceed to next state
4013  }
4014  // add transition to path and call function again for its target state
4015  else
4016  {
4017  path.push_back(StringPair(it->get_input_symbol(), it->get_output_symbol()));
4018  find_regexp_paths
4019  (it->get_target_state(),
4020  states_visited,
4021  path,
4022  full_paths, input_side);
4023  path.pop_back();
4024  }
4025  }
4026  // all transitions gone through
4027  states_visited.erase(s);
4028  }
4029 
4030  // For each "^[":"^[" transition in state \a s, find continuing paths of form [x:y]* "^]":"^]"
4031  // (where x and y can freely be any symbols except "^]" or "^[") and store those paths in \a full_paths
4032  // vector where the first member of each element is the state where the ending "^]":"^]" transition
4033  // leads to and the second element is a vector of transitions (i.e. string pairs) without the ending
4034  // "^]":"^]" transition.
4035  // An error is thrown if mismatched "^[" or "^]" symbols, special symbols (epsilon, unknown, identity),
4036  // or loops are encountered on a regexp path. Final states are allowed on regexp paths as they are also
4037  // allowed by Xerox tools.
4038  // Weights are currently ignored.
4039  HFSTDLL void find_regexp_paths
4040  (HfstState s,
4041  std::vector<std::pair<HfstState, std::vector<std::pair<std::string, std::string> > > > & full_paths,
4042  bool input_side)
4043  {
4044  // go through all transitions
4045  const HfstBasicTransducer::HfstTransitions &transitions
4046  = this->operator[](s);
4047  for (HfstBasicTransducer::HfstTransitions::const_iterator it
4048  = transitions.begin();
4049  it != transitions.end(); it++)
4050  {
4051  std::string istr = it->get_input_symbol();
4052  std::string ostr = it->get_output_symbol();
4053  if ((input_side && ("^[" == istr)) || (!input_side && ("^[" == ostr)))
4054  {
4055  /*if (istr != ostr)
4056  {
4057  throw "error: ^[ detected on only one side of transition";
4058  }*/
4059  std::set<HfstState> states_visited;
4060  states_visited.insert(s);
4061  std::vector<std::pair<std::string, std::string> > path;
4062  path.push_back(std::pair<std::string, std::string>(istr, ostr));
4063  find_regexp_paths(it->get_target_state(), states_visited, path, full_paths, input_side);
4064  //fprintf(stderr, "%u regexp paths found for state %u\n", (unsigned int)full_paths.size(), s); // debug
4065  }
4066  }
4067  }
4068 
4069  // Find all subpaths of form "^[" [x:y]* "^]" (x and y cannot be "^[" or "^]") and return them.
4070  // retval[start_state] == vector(pair(end_state, vector(pair(isymbol,osymbol) ) ) )
4071  // Weights are currently ignored.
4072  HFSTDLL HfstReplacementsMap find_replacements(bool input_side)
4073  {
4074  HfstReplacementsMap replacements;
4075  unsigned int state = 0;
4076  for (iterator it = begin(); it != end(); it++)
4077  {
4078  //fprintf(stderr, "state %u......\n", state); // debug
4079  HfstReplacements full_paths;
4080  find_regexp_paths(state, full_paths, input_side);
4081  if (full_paths.size() > 0)
4082  {
4083  replacements[state] = full_paths;
4084  }
4085  state++;
4086  }
4087  return replacements;
4088  }
4089 
4090  // Attach a copy of \a graph between states \a state1 and \a state2 with epsilon transitions.
4091  // There will be an epsilon transition with weight zero from state \a state1 to the
4092  // initial state of \a graph and one epsilon transition from each final state of
4093  // \a graph to state \a state2 with a weight of that state's final weight. Final states of
4094  // \a graph as well as its initial state are made normal non-final states when copying \a graph.
4095  // Todo: copy alphabet? harmonize graphs?
4096  HFSTDLL void insert_transducer(HfstState state1, HfstState state2, const HfstTransitionGraph & graph)
4097  {
4098  HfstState offset = add_state();
4099  HfstState source_state=0;
4100  for (const_iterator it = graph.begin();
4101  it != graph.end(); it++)
4102  {
4103  for (typename HfstTransitions::const_iterator tr_it
4104  = it->begin();
4105  tr_it != it->end(); tr_it++)
4106  {
4107  C data = tr_it->get_transition_data();
4108 
4109  HfstTransition <C> transition
4110  (tr_it->get_target_state() + offset,
4111  data.get_input_symbol(),
4112  data.get_output_symbol(),
4113  data.get_weight());
4114 
4115  add_transition(source_state + offset, transition);
4116  }
4117  source_state++;
4118  }
4119 
4120  // Epsilon transitions
4121  for (typename FinalWeightMap::const_iterator it
4122  = graph.final_weight_map.begin();
4123  it != graph.final_weight_map.end(); it++)
4124  {
4125  HfstTransition <C> epsilon_transition
4126  (state2, C::get_epsilon(), C::get_epsilon(),
4127  it->second);
4128  add_transition(it->first + offset, epsilon_transition);
4129  }
4130 
4131  // Initial transition
4132  HfstTransition <C> epsilon_transition
4133  (offset, C::get_epsilon(), C::get_epsilon(), 0);
4134  add_transition(state1, epsilon_transition);
4135  }
4136 
4137  typedef std::pair<HfstState, HfstState> StatePair;
4138  typedef std::map<StatePair, HfstState> StateMap;
4139 
4140  // Find target state corresponding to state pair \a target1, \a target2 in \a state_map and return that state.
4141  // If not found, add a new state to \a intersection, add it to \a state_map and return it.
4142  // \a was_new_state specifies whether a new state was added.
4143  static HfstState find_target_state
4144  (HfstState target1, HfstState target2, StateMap & state_map,
4145  HfstTransitionGraph & intersection, bool & was_new_state)
4146  {
4147  StatePair state_pair(target1, target2);
4148  StateMap::const_iterator it = state_map.find(state_pair);
4149  if (it != state_map.end())
4150  {
4151  was_new_state=false;
4152  return it->second;
4153  }
4154  HfstState retval = intersection.add_state();
4155  state_map[state_pair] = retval;
4156  was_new_state=true;
4157  return retval;
4158  }
4159 
4160  // A function used by find_matches.
4161  // Copy matching transition tr1/tr2 to state \a state in \a intersection and return
4162  // the target state of that transition. Also make that state final, if needed.
4163  HFSTDLL static HfstState handle_match(const HfstTransitionGraph & graph1, const HfstTransition <C> & tr1,
4164  const HfstTransitionGraph & graph2, const HfstTransition <C> & tr2,
4165  HfstTransitionGraph & intersection, HfstState state, StateMap & state_map)
4166 
4167  {
4168  HfstState target1 = tr1.get_target_state();
4169  HfstState target2 = tr2.get_target_state();
4170  bool was_new_state = false;
4171  HfstState retval = find_target_state
4172  (target1, target2, state_map, intersection, was_new_state);
4173  // The sum of weight is copied to the resulting intersection.
4174  float transition_weight = tr1.get_weight() + tr2.get_weight();
4175  intersection.add_transition
4176  (state, HfstTransition <C>
4177  (retval, tr1.get_input_symbol(), tr1.get_output_symbol(), transition_weight));
4178  // For each new state added, check if the corresponding states in \a graph1 and \a graph2
4179  // are final. If they are, make the new state final with the sum of final weights.
4180  if (was_new_state && (graph1.is_final_state(target1) && graph2.is_final_state(target2)))
4181  {
4182  float final_weight = graph1.get_final_weight(target1) + graph2.get_final_weight(target2);
4183  intersection.set_final_weight(retval, final_weight);
4184  }
4185  return retval;
4186  }
4187 
4188 
4189  // A recursive function used by function intersect.
4190  //
4191  // @param graph1 The first transducer argument of intersection.
4192  // @param state1 The current state of \a graph1.
4193  // @param graph2 The second transducer argument of intersection.
4194  // @param state2 The current state of \a graph2.
4195  // @param intersection The intersection of \a graph1 and \a graph2.
4196  // @param state The current state of \a intersection.
4197  // @param state_map State pairs from \a graph1 and \a graph2 mapped to states in \a intersection.
4198  // @param agenda States in \a intersection already handled or scheduled to be handled.
4199  //
4200  // @pre \a graph1 and \a graph2 must be arc-sorted (via sort_arcs()) to make transition matching faster.
4201  // @pre \a graph1 and \a graph2 must be deterministic. (todo: handle equivalent transitions, maybe even epsilons?)
4202  HFSTDLL static void find_matches
4203  (HfstTransitionGraph & graph1, HfstState state1, HfstTransitionGraph & graph2, HfstState state2,
4204  HfstTransitionGraph & intersection, HfstState state, StateMap & state_map, std::set<HfstState> & agenda)
4205  {
4206  agenda.insert(state); // do not handle \a state twice
4207  HfstTransitions & tr1 = graph1.state_vector[state1]; // transitions of graph1
4208  HfstTransitions & tr2 = graph2.state_vector[state2]; // transitions of graph2
4209 
4210  if (tr1.size() == 0 || tr2.size() == 0)
4211  {
4212  return; // we cannot proceed as no matches are possible
4213  }
4214  unsigned int start_search_from=0; // where to start search in tr2
4215 
4216  // *** Go through all transitions of state \a state1 in \a graph1 and try to find a match in
4217  // transitions of state \a state2 in \a graph2. As transitions are sorted, we know that
4218  // if a match is found for tr1[i] in tr2[j], a match for tr1[i+1] can be searched starting from
4219  // tr2[j+1]. If no match is found for tr1[i] in tr2 but tr2[j] is the first element that is bigger
4220  // than tr1[i], a match for tr1[i+1] can be searched starting from tr2[j]. ***
4221  for (unsigned int i=0; i < tr1.size(); i++)
4222  {
4223  HfstTransition <C> & transition1 = tr1[i];
4224  // Transition data (input and output symbols) to be compared.
4225  const C & transition_data1 = transition1.get_transition_data();
4226 
4227  // --- Go through tr2 starting from start_search_from. ---
4228  for(unsigned int j=start_search_from; j < tr2.size(); j++)
4229  {
4230  HfstTransition <C> & transition2 = tr2[j];
4231  // Transition data (input and output symbols) to be compared.
4232  const C & transition_data2 = transition2.get_transition_data();
4233  // todo: input:output duplicates with different weights? (lower weight is chosen always?)
4234  if (transition_data2.less_than_ignore_weight(transition_data1))
4235  {
4236  // no match found, continue searching
4237  }
4238  else if (transition_data1.less_than_ignore_weight(transition_data2))
4239  {
4240  // No match can be found, start searching for next item in tr1
4241  // starting from current item of tr2.
4242  start_search_from=j;
4243  break;
4244  }
4245  else
4246  {
4247  // Match found, copy transitions and define target state in intersection
4248  HfstState target = handle_match(graph1, transition1, graph2, transition2, intersection, state, state_map);
4249  // Recursive call for target state, if it is not already on the agenda.
4250  if (agenda.find(target) == agenda.end())
4251  {
4252  find_matches(graph1, transition1.get_target_state(), graph2, transition2.get_target_state(), intersection, target, state_map, agenda);
4253  }
4254  // Start searching for next item in tr1 starting from next item of tr2.
4255  start_search_from=j+1;
4256  break;
4257  }
4258  }
4259  // --- A transition in tr1 compared for all possible transitions in tr2, compare next transition. ---
4260  }
4261  // *** All transitions in tr1 gone through. ***
4262  return;
4263  }
4264 
4265  HFSTDLL static HfstTransitionGraph intersect
4266  (HfstTransitionGraph & graph1, HfstTransitionGraph & graph2)
4267  {
4268  HfstTransitionGraph retval;
4269  StateMap state_map;
4270  std::set<HfstState> agenda;
4271  graph1.sort_arcs();
4272  graph2.sort_arcs();
4273  state_map[StatePair(0, 0)] = 0; // initial states
4274 
4275  if (graph1.is_final_state(0) && graph2.is_final_state(0))
4276  {
4277  float final_weight = std::min(graph1.get_final_weight(0), graph2.get_final_weight(0));
4278  retval.set_final_weight(0, final_weight);
4279  }
4280 
4281  find_matches(graph1, 0, graph2, 0, retval, 0, state_map, agenda);
4282 
4283  return retval;
4284  }
4285 
4286  // A function used by find_matches_for_merge
4287  // Copy matching transition graph_tr/merger_tr to state \a result_state in \a result and return
4288  // the target state of that transition. Also make that state final, if needed.
4289  HFSTDLL static HfstState handle_non_list_match(const HfstTransitionGraph & graph, const HfstTransition <C> & graph_transition,
4290  const HfstTransitionGraph & merger, HfstState merger_target,
4291  HfstTransitionGraph & result, HfstState result_state, StateMap & state_map)
4292 
4293  {
4294  HfstState graph_target = graph_transition.get_target_state();
4295  bool was_new_state = false;
4296  HfstState retval = find_target_state
4297  (graph_target, merger_target, state_map, result, was_new_state);
4298  result.add_transition
4299  (result_state, HfstTransition <C>
4300  (retval, graph_transition.get_input_symbol(), graph_transition.get_output_symbol(), graph_transition.get_weight()));
4301  // For each new state added, check if the corresponding states in \a graph and \a merger
4302  // are final. If they are, make the new state final with the sum of final weights.
4303  if (was_new_state && (graph.is_final_state(graph_target) && merger.is_final_state(merger_target)))
4304  {
4305  float final_weight = graph.get_final_weight(graph_target) + merger.get_final_weight(merger_target);
4306  result.set_final_weight(retval, final_weight);
4307  }
4308  return retval;
4309  }
4310 
4311 
4312  // A function used by find_matches_for_merge
4313  // Copy matching transition graph_tr/merger_tr to state \a result_state in \a result and return
4314  // the target state of that transition. Also make that state final, if needed.
4315  HFSTDLL static HfstState handle_list_match(const HfstTransitionGraph & graph, const HfstTransition <C> & graph_transition,
4316  const HfstTransitionGraph & merger, const HfstTransition <C> & merger_transition,
4317  HfstTransitionGraph & result, HfstState result_state, StateMap & state_map, std::set<std::string> & markers_added)
4318  {
4319  HfstState graph_target = graph_transition.get_target_state();
4320  HfstState merger_target = merger_transition.get_target_state();
4321  bool was_new_state = false;
4322  HfstState retval = find_target_state
4323  (graph_target, merger_target, state_map, result, was_new_state);
4324  // The sum of weight is copied to the resulting intersection.
4325  float transition_weight = graph_transition.get_weight() + merger_transition.get_weight();
4326 
4327  // testing: add a marker
4328  HfstState extra_state = result.add_state();
4329  result.add_transition
4330  (result_state, HfstTransition <C>
4331  (extra_state, "@" + graph_transition.get_input_symbol() + "@", "@" + graph_transition.get_output_symbol() + "@", 0));
4332  markers_added.insert("@" + graph_transition.get_input_symbol() + "@");
4333 
4334  result.add_transition
4335  (extra_state /*result_state*/, HfstTransition <C>
4336  (retval, merger_transition.get_input_symbol(), merger_transition.get_output_symbol(), transition_weight));
4337  // For each new state added, check if the corresponding states in \a graph1 and \a graph2
4338  // are final. If they are, make the new state final with the sum of final weights.
4339  if (was_new_state && (graph.is_final_state(graph_target) && merger.is_final_state(merger_target)))
4340  {
4341  float final_weight = graph.get_final_weight(graph_target) + merger.get_final_weight(merger_target);
4342  result.set_final_weight(retval, final_weight);
4343  }
4344  return retval;
4345  }
4346 
4347 
4348 
4349  HFSTDLL static bool is_list_symbol(const C & transition_data, const std::map<std::string, std::set<std::string> > & list_symbols)
4350  {
4351  std::string isymbol = transition_data.get_input_symbol();
4352  std::string osymbol = transition_data.get_output_symbol();
4353 
4354  if (isymbol != osymbol)
4355  {
4356  throw "is_list_symbol: input and output symbols must be the same";
4357  }
4358  return (list_symbols.find(isymbol) != list_symbols.end());
4359  }
4360 
4361  /*
4362  // @pre \a transition_data is a list symbol
4363  // @pre list symbols cannot contain '_' or '@'
4364  static std::set<std::string> get_list_symbols(const std::string & list_symbol)
4365  {
4366  std::set<std::string> result;
4367  unsigned int i = 6;
4368 
4369  // skip list name
4370  while(list_symbol[i] != '_')
4371  {
4372  i++;
4373  }
4374  i++;
4375 
4376  // extract symbols
4377  std::string symbol("");
4378  while (list_symbol[i] != '@')
4379  {
4380  if (list_symbol[i] == '_')
4381  {
4382  result.insert(symbol);
4383  symbol = std::string("");
4384  }
4385  else
4386  {
4387  symbol.append(1, list_symbol[i]);
4388  }
4389  i++;
4390  }
4391  result.insert(symbol);
4392 
4393  return result;
4394  }*/
4395 
4396  // A recursive function used by function intersect.
4397  //
4398  // @param graph The first transducer argument of intersection.
4399  // @param graph_state The current state of \a graph1.
4400  // @param merger The second transducer argument of intersection.
4401  // @param merger_state The current state of \a graph2.
4402  // @param result The intersection of \a graph1 and \a graph2.
4403  // @param result_state The current state of \a intersection.
4404  // @param state_map State pairs from \a graph and \a merger mapped to states in \a result.
4405  // @param agenda States in \a result already handled or scheduled to be handled.
4406  //
4407  // @pre \a graph and \a merger must be arc-sorted (via sort_arcs()) to make transition matching faster.
4408  // @pre \a graph and \a merger must be deterministic. (todo: handle equivalent transitions, maybe even epsilons?)
4409  HFSTDLL static void find_matches_for_merge
4410  (HfstTransitionGraph & graph, HfstState graph_state, HfstTransitionGraph & merger, HfstState merger_state,
4411  HfstTransitionGraph & result, HfstState result_state, StateMap & state_map, std::set<HfstState> & agenda,
4412  const std::map<std::string, std::set<std::string> > & list_symbols, std::set<std::string> & markers_added)
4413  {
4414  agenda.insert(result_state); // do not handle \a result_state twice
4415  HfstTransitions & graph_transitions = graph.state_vector[graph_state]; // transitions of graph
4416  HfstTransitions & merger_transitions = merger.state_vector[merger_state]; // transitions of merger
4417 
4418  if (graph_transitions.size() == 0)
4419  {
4420  return; // we cannot proceed as no matches are possible
4421  }
4422 
4423  // Go through all transitions in state \a graph_state of \a graph.
4424  for (unsigned int i=0; i < graph_transitions.size(); i++)
4425  {
4426  HfstTransition <C> & graph_transition = graph_transitions[i];
4427  const C & graph_transition_data = graph_transition.get_transition_data();
4428 
4429  // List symbols must be checked separately
4430  if (is_list_symbol(graph_transition_data, list_symbols))
4431  {
4432  const std::set<std::string> & symbol_list = list_symbols.find(graph_transition_data.get_input_symbol())->second;
4433  bool list_match_found=false;
4434  // Find all matches
4435  for(unsigned int j=0; j < merger_transitions.size(); j++)
4436  {
4437  HfstTransition <C> & merger_transition = merger_transitions[j];
4438  const C & merger_transition_data = merger_transition.get_transition_data();
4439  const std::string & isymbol = merger_transition_data.get_input_symbol();
4440  const std::string & osymbol = merger_transition_data.get_output_symbol();
4441 
4442  if (isymbol != osymbol)
4443  {
4444  throw "find_matches_for_merge: input and output symbols must be the same";
4445  }
4446 
4447  if (symbol_list.find(isymbol) != symbol_list.end())
4448  {
4449  list_match_found=true;
4450  HfstState target = handle_list_match(graph, graph_transition, merger, merger_transition, result, result_state, state_map, markers_added);
4451  if (agenda.find(target) == agenda.end())
4452  {
4453  find_matches_for_merge(graph, graph_transition.get_target_state(), merger, merger_transition.get_target_state(), result, target, state_map, agenda, list_symbols, markers_added);
4454  }
4455  }
4456  }
4457  if (list_match_found)
4458  {
4459  continue;
4460  }
4461  }
4462  // If symbol is not a list symbol or no match was found for it, copy symbol as such
4463  // We use merger_state as merger transition target state
4464  HfstState target = handle_non_list_match(graph, graph_transition, merger, merger_state, result, result_state, state_map);
4465  if (agenda.find(target) == agenda.end())
4466  {
4467  find_matches_for_merge(graph, graph_transition.get_target_state(), merger, /*merger_transition.get_target_state()*/ merger_state, result, target, state_map, agenda, list_symbols, markers_added);
4468  }
4469  // --- A transition in graph compared for all corresponding transitions in merger, compare next transition. ---
4470  }
4471  // *** All transitions in state gone through. ***
4472  return;
4473  }
4474 
4475  HFSTDLL static HfstTransitionGraph merge
4476  (HfstTransitionGraph & graph, HfstTransitionGraph & merger, const std::map<std::string, std::set<std::string> > & list_symbols, std::set<std::string> & markers_added)
4477  {
4478  HfstTransitionGraph result;
4479  StateMap state_map;
4480  std::set<HfstState> agenda;
4481  graph.sort_arcs();
4482  merger.sort_arcs();
4483  state_map[StatePair(0, 0)] = 0; // initial states
4484 
4485  if (graph.is_final_state(0) && merger.is_final_state(0))
4486  {
4487  float final_weight = graph.get_final_weight(0) + merger.get_final_weight(0);
4488  result.set_final_weight(0, final_weight);
4489  }
4490 
4491  try
4492  {
4493  find_matches_for_merge(graph, 0, merger, 0, result, 0, state_map, agenda, list_symbols, markers_added);
4494  }
4495  catch (const char * msg)
4496  {
4498  }
4499 
4500  return result;
4501  }
4502 
4503 
4504 
4505 
4506 
4507 
4508 
4509 
4510 
4511 /* /\** @brief Determine whether this graph has input-epsilon cycles. */
4512 /* *\/ */
4513 /* bool has_input_epsilon_cycles(void) */
4514 /* { */
4515 /* typedef std::map<HfstState, */
4516 /* std::set<HfstTransition<C> > > */
4517 /* HfstStates; */
4518 /* HfstStates state_map; */
4519 
4520 /* std::set<HfstState> total_seen; */
4521 /* for (state_vector::iterator it = state_vector.begin(); */
4522 /* it != state_vector.end(); ++it) { */
4523 /* if (total_seen.count(*it) != 0) { */
4524 /* continue; */
4525 /* } */
4526 
4527 /* } */
4528 /* } */
4529 
4530 
4531  // --- Friends ---
4532 
4533  friend class ConversionFunctions;
4534  friend class hfst::HarmonizeUnknownAndIdentitySymbols;
4535  };
4536 
4541  typedef HfstTransitionGraph <HfstTropicalTransducerTransitionData>
4543 
4545  //typedef HfstTransitionGraph <HfstFastTransitionData>
4546  // HfstFastTransducer;
4547 
4548  }
4549 
4550  }
4551 
4552 #endif // #ifndef _HFST_TRANSITION_GRAPH_H_
HFSTDLL C::WeightType get_weight() const
Get the weight of the transition.
Definition: HfstTransition.h:124
HFSTDLL HfstTransitionGraph(const hfst::HfstTransducer &transducer)
Create an HfstTransitionGraph equivalent to HfstTransducer transducer. FIXME: move to a separate file...
Definition: HfstTransitionGraph.h:250
HFSTDLL void add_symbol_to_alphabet(const HfstSymbol &symbol)
Explicitly add symbol to the alphabet of the graph.
Definition: HfstTransitionGraph.h:348
std::pair< String, String > StringPair
A symbol pair in a transition.
Definition: HfstSymbolDefs.h:70
HFSTDLL void write_in_xfst_format(FILE *file, bool write_weights=true)
Write the graph in xfst text format to FILE file. write_weights defines whether weights are printed (...
Definition: HfstTransitionGraph.h:1522
HFSTDLL void set_final_weight(HfstState s, const typename C::WeightType &weight)
Set the final weight of state s in this graph to weight.
Definition: HfstTransitionGraph.h:639
HfstBasicStates states_and_transitions() const
The states of the graph and their transitions.
Definition: HfstTransitionGraph.h:198
HFSTDLL void add_transition(HfstState s, const HfstTransition< C > &transition, bool add_symbols_to_alphabet=true)
Add a transition transition to state s.
Definition: HfstTransitionGraph.h:556
std::string String
A UTF-8 symbol in a transition.
Definition: HfstSymbolDefs.h:59
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:105
unsigned int HfstState
The number of a state in an HfstTransitionGraph.
Definition: HfstDataTypes.h:119
A synchronous finite-state transducer.
Definition: HfstTransducer.h:253
HFSTDLL void write_in_xfst_format(std::ostream &os, bool write_weights=true)
Write the graph in xfst text format to ostream os. write_weights defines whether weights are printed ...
Definition: HfstTransitionGraph.h:906
HFSTDLL HfstTransitions & transitions(HfstState s)
Get mutable transitions.
Definition: HfstTransitionGraph.h:704
HFSTDLL const_iterator begin() const
Get a const iterator to the beginning of states in the graph.
Definition: HfstTransitionGraph.h:668
HFSTDLL HfstTransitionGraph & sort_arcs(void)
Sort the arcs of this transducer according to input and output symbols.
Definition: HfstTransitionGraph.h:647
HFSTDLL void remove_symbol_from_alphabet(const HfstSymbol &symbol)
Remove symbol symbol from the alphabet of the graph.
Definition: HfstTransitionGraph.h:356
HFSTDLL HfstTransitionGraph & substitute(const HfstSymbol &old_symbol, const HfstSymbol &new_symbol, bool input_side=true, bool output_side=true)
Substitute old_symbol with new_symbol in all transitions. input_side and output_side define whether t...
Definition: HfstTransitionGraph.h:2309
HFSTDLL iterator begin()
Get an iterator to the beginning of the states in the graph.
Definition: HfstTransitionGraph.h:664
State is not final (and cannot have a final weight).
Definition: HfstExceptionDefs.h:262
HFSTDLL void add_symbols_to_alphabet(const HfstSymbolSet &symbols)
Same as add_symbol_to_alphabet for each symbol in symbols.
Definition: HfstTransitionGraph.h:370
std::set< HfstSymbol > HfstSymbolSet
A set of symbol pairs.
Definition: HfstTransitionGraph.h:139
HFSTDLL HfstTransitionGraph & insert_freely(const HfstSymbolPair &symbol_pair, typename C::WeightType weight)
Insert freely any number of symbol_pair in the graph with weight weight.
Definition: HfstTransitionGraph.h:3024
HFSTDLL C::WeightType get_final_weight(HfstState s) const
Definition: HfstTransitionGraph.h:627
HFSTDLL const_iterator end() const
Get a const iterator to the end of states (last state + 1) in the graph.
Definition: HfstTransitionGraph.h:676
Class HfstTransition.
Base class for HfstExceptions. Holds its own name and the file and line number where it was thrown...
Definition: HfstExceptionDefs.h:26
HFSTDLL HfstState get_target_state() const
Get the target state of the transition.
Definition: HfstTransition.h:94
HFSTDLL HfstTransitionGraph & substitute(const HfstSymbolPair &sp, const HfstTransitionGraph &graph)
Substitute all transitions old_symbol : new_symbol with a copy of graph.
Definition: HfstTransitionGraph.h:2579
HFSTDLL const HfstTransitions & transitions(HfstState s) const
Alternative name for operator[].
Definition: HfstTransitionGraph.h:697
A simple transition graph format that consists of states and transitions between those states...
Definition: HfstDataTypes.h:112
std::vector< HfstTransition< C > > HfstTransitions
Datatype for the states of a transition in a graph.
Definition: HfstTransitionGraph.h:146
HFSTDLL void write_in_att_format_number(FILE *file, bool write_weights=true)
Write the graph in AT&T format to FILE file using numbers instead of symbol names. write_weights defines whether weights are printed.
Definition: HfstTransitionGraph.h:1713
HfstStates::const_iterator const_iterator
A const iterator type that points a state in a graph.
Definition: HfstTransitionGraph.h:184
HFSTDLL void write_in_att_format(std::ostream &os, bool write_weights=true)
Write the graph in AT&T format to ostream os. write_weights defines whether weights are printed...
Definition: HfstTransitionGraph.h:1563
HFSTDLL HfstTransitionGraph & operator=(const HfstTransitionGraph &graph)
The assignment operator.
Definition: HfstTransitionGraph.h:224
#define HFST_THROW(E)
Macro to throw an exception of type E. Use THROW instead of regular throw with subclasses of HfstExce...
Definition: HfstExceptionDefs.h:40
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:109
std::string name
The name of the graph.
Definition: HfstTransitionGraph.h:187
The stream is at end.
Definition: HfstExceptionDefs.h:161
std::vector< HfstSymbolPair > HfstSymbolPairVector
A vector of symbol pairs.
Definition: HfstTransitionGraph.h:141
The StateId argument is not valid.
Definition: HfstExceptionDefs.h:316
HFSTDLL void write_in_prolog_format(std::ostream &os, const std::string &name, bool write_weights=true)
Write the graph in prolog format to ostream os. write_weights defines whether weights are printed (to...
Definition: HfstTransitionGraph.h:1072
HFSTDLL HfstState add_state(void)
Add a new state to this graph and return its number.
Definition: HfstTransitionGraph.h:528
HFSTDLL HfstState get_max_state() const
Get the biggest state number in use.
Definition: HfstTransitionGraph.h:549
HFSTDLL void prune_alphabet(bool force=true)
Remove all symbols that do not occur in transitions of the graph from its alphabet.
Definition: HfstTransitionGraph.h:450
HFSTDLL const C & get_transition_data() const
Get the transition data of the transition.
Definition: HfstTransition.h:99
HFSTDLL int longest_path_size()
Definition: HfstTransitionGraph.h:3459
HFSTDLL C::SymbolType get_output_symbol() const
Get the output symbol of the transition.
Definition: HfstTransition.h:109
C::SymbolType HfstSymbol
Datatype for a symbol in a transition.
Definition: HfstTransitionGraph.h:132
HFSTDLL HfstTransitionGraph & harmonize(HfstTransitionGraph &another)
Harmonize this HfstTransitionGraph and another.
Definition: HfstTransitionGraph.h:3137
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:82
HFSTDLL bool is_final_state(HfstState s) const
Whether state s is final. FIXME: return positive infinity instead if not final.
Definition: HfstTransitionGraph.h:622
HFSTDLL void write_in_att_format(FILE *file, bool write_weights=true)
Write the graph in AT&T format to FILE file. write_weights defines whether weights are printed...
Definition: HfstTransitionGraph.h:1610
HFSTDLL C::SymbolType get_input_symbol() const
Get the input symbol of the transition.
Definition: HfstTransition.h:104
HFSTDLL std::vector< unsigned int > path_sizes()
Definition: HfstTransitionGraph.h:3486
std::pair< float, StringVector > HfstOneLevelPath
A path of one level of arcs with collected weight.
Definition: HfstDataTypes.h:96
Transducers are not automata.
Definition: HfstExceptionDefs.h:303
std::pair< HfstSymbol, HfstSymbol > HfstSymbolPair
Datatype for a symbol pair in a transition.
Definition: HfstTransitionGraph.h:135
Declarations of functions for converting between transducer backend formats.
HFSTDLL HfstTransitionGraph(const HfstTransitionGraph &graph)
Create a deep copy of HfstTransitionGraph graph.
Definition: HfstTransitionGraph.h:241
std::vector< HfstState > states() const
The states of the graph.
Definition: HfstTransitionGraph.h:190
#define HFST_THROW_MESSAGE(E, M)
Macro to throw an exception of type E with message M. Use THROW instead of regular throw with subclas...
Definition: HfstExceptionDefs.h:47
std::map< String, String > HfstSymbolSubstitutions
A map of substitutions used when performing multiple symbol-to-symbol substitutions.
Definition: HfstSymbolDefs.h:88
std::pair< float, StringPairVector > HfstTwoLevelPath
A path of two level of arcs with collected weight.
Definition: HfstDataTypes.h:107
HfstTransition< HfstTropicalTransducerTransitionData > HfstBasicTransition
An HfstTransition with transition data of type HfstTropicalTransducerTransitionData.
Definition: HfstDataTypes.h:121
HFSTDLL void remove_transition(HfstState s, const HfstTransition< C > &transition, bool remove_symbols_from_alphabet=false)
Remove transition transition from state s. remove_symbols_from_alphabet defines whether symbols in tr...
Definition: HfstTransitionGraph.h:576
std::set< HfstSymbol > HfstTransitionGraphAlphabet
Datatype for the alphabet of a graph.
Definition: HfstTransitionGraph.h:143
HfstTransitionGraph< HfstTropicalTransducerTransitionData > HfstBasicTransducer
An HfstTransitionGraph with transitions of type HfstTropicalTransducerTransitionData and weight type ...
Definition: HfstDataTypes.h:113
A transition that consists of a target state and transition data represented by class C...
Definition: HfstDataTypes.h:121
HFSTDLL void write_in_prolog_format(FILE *file, const std::string &name, bool write_weights=true)
Write the graph in prolog format to FILE file. write_weights defines whether weights are printed (tod...
Definition: HfstTransitionGraph.h:1010
std::map< StringPair, StringPair > HfstSymbolPairSubstitutions
A map of substitutions used when performing multiple symbol pair-to-symbol pair substitutions.
Definition: HfstSymbolDefs.h:94
HFSTDLL HfstTransitionGraph & substitute(bool(*func)(const HfstSymbolPair &sp, HfstSymbolPairSet &sps))
Substitute all transitions with a set of transitions as defined by function func. ...
Definition: HfstTransitionGraph.h:2472
HFSTDLL iterator end()
Get an iterator to the end of states (last state + 1) in the graph.
Definition: HfstTransitionGraph.h:672
HFSTDLL HfstTransitionGraph(void)
Create a graph with one initial state that has state number zero and is not a final state...
Definition: HfstTransitionGraph.h:208
HFSTDLL const HfstTransitions & operator[](HfstState s) const
Get the set of transitions of state s in this graph.
Definition: HfstTransitionGraph.h:684
HFSTDLL HfstState add_state(HfstState s)
Add a state s to this graph.
Definition: HfstTransitionGraph.h:540
std::set< HfstSymbolPair > HfstSymbolPairSet
A set of symbol pairs.
Definition: HfstTransitionGraph.h:137
The stream is not in valid AT&T format.
Definition: HfstExceptionDefs.h:241
HFSTDLL const HfstTransitionGraphAlphabet & get_alphabet() const
Get the set of HfstSymbols in the alphabet of the graph.
Definition: HfstTransitionGraph.h:498