10 #ifndef _HFST_TRANSITION_GRAPH_H_
11 #define _HFST_TRANSITION_GRAPH_H_
24 #include "../HfstSymbolDefs.h"
25 #include "../HfstExceptionDefs.h"
26 #include "../HfstDataTypes.h"
27 #include "../HarmonizeUnknownAndIdentitySymbols.h"
28 #include "../HfstFlagDiacritics.h"
29 #include "../HfstEpsilonHandler.h"
32 #include "HfstTropicalTransducerTransitionData.h"
35 #include "../hfstdll.h"
47 namespace implementations {
52 typedef std::pair<HfstState, std::vector<std::pair<std::string, std::string> > > HfstReplacement;
53 typedef std::vector<HfstReplacement> HfstReplacements;
54 typedef std::map<HfstState, HfstReplacements > HfstReplacementsMap;
56 typedef std::vector<std::vector<hfst::implementations::HfstBasicTransition> > HfstBasicStates;
125 template <
class C>
class HfstTransitionGraph
134 typedef std::pair<HfstSymbol, HfstSymbol>
151 typedef std::vector<HfstTransitions> HfstStates;
154 HfstStates state_vector;
158 static const HfstState INITIAL_STATE = 0;
161 typedef std::map<HfstState,typename C::WeightType> FinalWeightMap;
163 FinalWeightMap final_weight_map;
169 typedef unsigned int HfstNumber;
170 typedef std::vector<HfstNumber> HfstNumberVector;
171 typedef std::pair<HfstNumber, HfstNumber> HfstNumberPair;
172 typedef std::map<HfstNumberPair, HfstNumberPair> HfstNumberPairSubstitutions;
178 typedef typename HfstStates::iterator iterator;
209 initialize_alphabet(alphabet);
211 state_vector.push_back(tr);
215 initialize_alphabet(alphabet);
217 state_vector.push_back(tr);
218 unsigned int linecount=0;
219 this->assign(read_in_att_format(file,
"@0@", linecount));
228 state_vector = graph.state_vector;
229 final_weight_map = graph.final_weight_map;
230 alphabet = graph.alphabet;
242 state_vector = graph.state_vector;
243 final_weight_map = graph.final_weight_map;
244 alphabet = graph.alphabet;
252 *fsm = ConversionFunctions::
253 hfst_transducer_to_hfst_basic_transducer(transducer);
254 state_vector = fsm->state_vector;
255 final_weight_map = fsm->final_weight_map;
256 alphabet = fsm->alphabet;
269 alpha.insert(C::get_epsilon());
270 alpha.insert(C::get_unknown());
271 alpha.insert(C::get_identity());
276 bool check_alphabet()
278 for (iterator it =
begin(); it !=
end(); it++)
280 for (
typename HfstTransitions::iterator tr_it
282 tr_it != it->end(); tr_it++)
284 C data = tr_it->get_transition_data();
286 if(alphabet.find(data.get_input_symbol())
290 if(alphabet.find(data.get_output_symbol())
301 HFSTDLL
void print_alphabet()
const
303 for (
typename HfstTransitionGraphAlphabet::const_iterator it
304 = alphabet.begin(); it != alphabet.end(); it++)
306 if (it != alphabet.begin())
310 std::cerr << std::endl;
315 unsigned int get_symbol_number
317 return C::get_number(symbol);
322 void initialize_state_vector
323 (
unsigned int number_of_states)
325 state_vector.reserve(number_of_states);
331 void initialize_transition_vector
332 (
unsigned int state_number,
unsigned int number_of_transitions)
335 state_vector[state_number].reserve(number_of_transitions);
349 alphabet.insert(symbol);
357 alphabet.erase(symbol);
360 HFSTDLL
void remove_symbols_from_alphabet(
const HfstSymbolSet &symbols) {
361 for (
typename HfstSymbolSet::const_iterator it = symbols.begin();
362 it != symbols.end(); it++)
372 for (
typename HfstSymbolSet::const_iterator it = symbols.begin();
373 it != symbols.end(); it++)
375 alphabet.insert(*it);
381 for (
typename HfstSymbolPairSet::const_iterator it = symbols.begin();
382 it != symbols.end(); it++)
384 alphabet.insert(it->first);
385 alphabet.insert(it->second);
391 HFSTDLL
void prune_alphabet_after_substitution(
const std::set<unsigned int> &symbols)
393 if (symbols.size() == 0)
396 std::vector<bool> symbols_found;
398 (C::get_max_number()+1,
false);
401 for (iterator it =
begin(); it !=
end(); it++)
403 for (
typename HfstTransitions::iterator tr_it
405 tr_it != it->end(); tr_it++)
407 const C & data = tr_it->get_transition_data();
408 symbols_found.at(data.get_input_number()) =
true;
409 symbols_found.at(data.get_output_number()) =
true;
415 for (std::set<unsigned int>::const_iterator it = symbols.begin();
416 it != symbols.end(); it++)
418 if (! symbols_found.at(*it))
419 alphabet.erase(C::get_symbol(*it));
427 for (iterator it =
begin(); it !=
end(); it++)
429 for (
typename HfstTransitions::iterator tr_it
431 tr_it != it->end(); tr_it++)
433 C data = tr_it->get_transition_data();
435 retval.insert(data.get_input_symbol());
436 retval.insert(data.get_output_symbol());
456 bool unknowns_or_identities_used =
457 ( (symbols_found.find(
"@_UNKNOWN_SYMBOL_@") != symbols_found.end()) ||
458 (symbols_found.find(
"@_IDENTITY_SYMBOL_@") != symbols_found.end()) );
462 if (!force && unknowns_or_identities_used)
466 symbols_found.insert(
"@_EPSILON_SYMBOL_@");
467 symbols_found.insert(
"@_UNKNOWN_SYMBOL_@");
468 symbols_found.insert(
"@_IDENTITY_SYMBOL_@");
474 for (
typename HfstTransitionGraphAlphabet::iterator it
476 it != alphabet.end(); it++)
478 if (symbols_found.find(*it) == symbols_found.end())
479 symbols_not_found.insert(*it);
484 for (
typename HfstTransitionGraphAlphabet::iterator it
485 = symbols_not_found.begin();
486 it != symbols_not_found.end(); it++)
507 for (
typename HfstTransitions::const_iterator tr_it
509 tr_it != it->end(); tr_it++)
511 C data = tr_it->get_transition_data();
513 retval.insert(
StringPair(data.get_input_symbol(),
514 data.get_output_symbol()));
530 state_vector.push_back(tr);
531 return state_vector.size()-1;
541 while(state_vector.size() <= s) {
543 state_vector.push_back(tr);
550 return state_vector.size()-1;
564 alphabet.insert(data.get_input_symbol());
565 alphabet.insert(data.get_output_symbol());
567 state_vector[s].push_back(transition);
577 bool remove_symbols_from_alphabet=
false)
579 if (! (state_vector.size() > s))
588 std::stack<typename HfstTransitions::iterator> elements_to_remove;
591 for (
typename HfstTransitions::iterator it = transitions.begin();
592 it != transitions.end(); it++)
600 elements_to_remove.push(it);
604 while (!elements_to_remove.empty())
606 state_vector[s].erase(elements_to_remove.top());
607 elements_to_remove.pop();
610 if (remove_symbols_from_alphabet)
623 return (final_weight_map.find(s) != final_weight_map.end());
630 if (final_weight_map.find(s) != final_weight_map.end())
631 return final_weight_map.find(s)->second;
640 const typename C::WeightType & weight) {
642 final_weight_map[s] = weight;
649 for (
typename HfstStates::iterator it = state_vector.begin();
650 it != state_vector.end();
654 std::sort<typename HfstTransitions::iterator>
655 (transitions.begin(),transitions.end());
664 HFSTDLL iterator
begin() {
return state_vector.begin(); }
672 HFSTDLL iterator
end() {
return state_vector.end(); }
686 if (s >= state_vector.size()) {
688 return state_vector[s];
706 if (s >= state_vector.size()) {
708 return state_vector[s];
720 state_vector[s1] = state_vector[s2];
721 state_vector[s2] = s1_copy;
724 for (iterator it =
begin(); it !=
end(); it++)
727 for (
unsigned int i=0; i < it->size(); i++)
745 it->operator[](i) = tr;
753 typename FinalWeightMap::iterator s1_it = final_weight_map.find(s1);
754 typename FinalWeightMap::iterator s2_it = final_weight_map.find(s2);
755 typename FinalWeightMap::iterator end_it = final_weight_map.end();
757 if (s1_it != end_it && s2_it != end_it) {
758 typename C::WeightType s1_weight = s1_it->second;
759 final_weight_map[s1] = s2_it->second;
760 final_weight_map[s2] = s1_weight;
762 if (s1_it != end_it) {
763 typename C::WeightType w = s1_it->second;
764 final_weight_map.erase(s1);
765 final_weight_map[s2] = w;
767 if (s2_it != end_it) {
768 typename C::WeightType w = s2_it->second;
769 final_weight_map.erase(s2);
770 final_weight_map[s1] = w;
777 static void write_weight(FILE * file,
float weight)
782 fprintf(file,
"%f", weight);
785 static void write_weight(std::ostream & os,
float weight)
794 static void replace_all(std::string & symbol,
795 const std::string &str1,
796 const std::string &str2)
798 size_t pos = symbol.find(str1);
799 while (pos != std::string::npos)
801 symbol.erase(pos, str1.size());
802 symbol.insert(pos, str2);
804 (str1, pos+str2.size());
809 static void xfstize(std::string & symbol)
811 std::string escaped_symbol;
812 for (
size_t pos = 0; pos < symbol.size(); pos++)
814 if (symbol[pos] ==
'%')
815 escaped_symbol.append(
"\"%\"");
816 else if (symbol[pos] ==
'"')
817 escaped_symbol.append(
"%\"");
818 else if (symbol[pos] ==
'?')
819 escaped_symbol.append(
"\"?\"");
821 escaped_symbol.append(1, symbol[pos]);
823 symbol = escaped_symbol;
826 static void xfstize_symbol(std::string & symbol)
829 replace_all(symbol,
"@_EPSILON_SYMBOL_@",
"0");
830 replace_all(symbol,
"@_UNKNOWN_SYMBOL_@",
"?");
831 replace_all(symbol,
"@_IDENTITY_SYMBOL_@",
"?");
832 replace_all(symbol,
"\t",
"@_TAB_@");
835 void print_xfst_state(std::ostream & os,
HfstState state)
837 if (state == INITIAL_STATE) { os <<
"S"; }
842 void print_xfst_state(FILE * file,
HfstState state)
844 if (state == INITIAL_STATE) { fprintf(file,
"S"); }
846 fprintf(file,
"s%i", state);
849 void print_xfst_arc(std::ostream & os, C data)
852 if (data.get_input_symbol() !=
853 data.get_output_symbol())
857 std::string s = data.get_input_symbol();
860 if (data.get_input_symbol() !=
861 data.get_output_symbol() ||
862 data.get_output_symbol() ==
"@_UNKNOWN_SYMBOL_@")
864 s = data.get_output_symbol();
868 if (data.get_input_symbol() !=
869 data.get_output_symbol())
875 void print_xfst_arc(FILE * file, C data)
877 if (data.get_input_symbol() !=
878 data.get_output_symbol())
883 std::string s = data.get_input_symbol();
885 fprintf(file,
"%s", s.c_str());
887 if (data.get_input_symbol() !=
888 data.get_output_symbol() ||
889 data.get_output_symbol() ==
"@_UNKNOWN_SYMBOL_@")
891 s = data.get_output_symbol();
893 fprintf(file,
":%s", s.c_str());
895 if (data.get_input_symbol() !=
896 data.get_output_symbol())
909 unsigned int source_state=0;
910 for (iterator it =
begin(); it !=
end(); it++)
912 print_xfst_state(os, source_state);
915 if (it->begin() == it->end())
921 for (
typename HfstTransitions::iterator tr_it
923 tr_it != it->end(); tr_it++)
925 if (tr_it != it->begin())
930 print_xfst_arc(os, data);
936 os <<
"." << std::endl;
942 HFSTDLL
static std::string prologize_symbol(
const std::string & symbol)
948 if (symbol ==
"@_EPSILON_SYMBOL_@")
950 if (symbol ==
"@_UNKNOWN_SYMBOL_@")
952 if(symbol ==
"@_IDENTITY_SYMBOL_@")
955 std::string retval(symbol);
956 replace_all(retval,
"\\",
"\\\\");
957 replace_all(retval,
"\"",
"\\\"");
962 HFSTDLL
static std::string deprologize_symbol(
const std::string & symbol)
969 return "@_EPSILON_SYMBOL_@";
971 return "@_UNKNOWN_SYMBOL_@";
974 std::string retval(symbol);
975 replace_all(retval,
"\\\"",
"\"");
976 replace_all(retval,
"\\\\",
"\\");
980 HFSTDLL
static void print_prolog_arc_symbols(FILE * file, C data)
982 std::string symbol = prologize_symbol(data.get_input_symbol());
983 fprintf(file,
"\"%s\"", symbol.c_str());
985 if (data.get_input_symbol() !=
986 data.get_output_symbol() ||
987 data.get_input_symbol() ==
"@_UNKNOWN_SYMBOL_@")
989 symbol = prologize_symbol(data.get_output_symbol());
990 fprintf(file,
":\"%s\"", symbol.c_str());
994 HFSTDLL
static void print_prolog_arc_symbols(std::ostream & os, C data)
996 std::string symbol = prologize_symbol(data.get_input_symbol());
997 os <<
"\"" << symbol <<
"\"";
999 if (data.get_input_symbol() !=
1000 data.get_output_symbol() ||
1001 data.get_input_symbol() ==
"@_UNKNOWN_SYMBOL_@")
1003 symbol = prologize_symbol(data.get_output_symbol());
1004 os <<
":\"" << symbol <<
"\"";
1011 bool write_weights=
true)
1013 unsigned int source_state=0;
1014 const char * identifier = name.c_str();
1016 if (name.find(
",") != std::string::npos)
1018 std::string msg(
"no commas allowed in the name of prolog networks");
1021 fprintf(file,
"network(%s).\n", identifier);
1025 initialize_alphabet(symbols_used_);
1026 for (
typename HfstTransitionGraphAlphabet::const_iterator it
1027 = alphabet.begin(); it != alphabet.end(); it++)
1029 if (symbols_used_.find(*it) == symbols_used_.end())
1031 fprintf(file,
"symbol(%s, \"%s\").\n", identifier, prologize_symbol(it->c_str()).c_str());
1036 for (iterator it =
begin(); it !=
end(); it++)
1038 for (
typename HfstTransitions::iterator tr_it
1040 tr_it != it->end(); tr_it++)
1042 fprintf(file,
"arc(%s, %i, %i, ",
1045 print_prolog_arc_symbols(file, data);
1046 if (write_weights) {
1047 fprintf(file,
", ");
1048 write_weight(file, data.get_weight());
1050 fprintf(file,
").\n");
1056 for (
typename FinalWeightMap::const_iterator it
1057 = this->final_weight_map.begin();
1058 it != this->final_weight_map.end(); it++)
1060 fprintf(file,
"final(%s, %i", identifier, it->first);
1063 fprintf(file,
", ");
1064 write_weight(file, it->second);
1066 fprintf(file,
").\n");
1073 bool write_weights=
true)
1075 unsigned int source_state=0;
1078 if (name.find(
",") != std::string::npos)
1080 std::string msg(
"no commas allowed in the name of prolog networks");
1083 os <<
"network(" << name <<
")." << std::endl;
1087 initialize_alphabet(symbols_used_);
1088 for (
typename HfstTransitionGraphAlphabet::const_iterator it
1089 = alphabet.begin(); it != alphabet.end(); it++)
1091 if (symbols_used_.find(*it) == symbols_used_.end())
1093 os <<
"symbol(" << name <<
", \"" << prologize_symbol(*it) <<
"\")." << std::endl;
1098 for (iterator it =
begin(); it !=
end(); it++)
1100 for (
typename HfstTransitions::iterator tr_it
1102 tr_it != it->end(); tr_it++)
1104 os <<
"arc(" << name <<
", " << source_state <<
", " << tr_it->
get_target_state() <<
", ";
1106 print_prolog_arc_symbols(os, data);
1107 if (write_weights) {
1109 write_weight(os, data.get_weight());
1111 os <<
")." << std::endl;
1117 for (
typename FinalWeightMap::const_iterator it
1118 = this->final_weight_map.begin();
1119 it != this->final_weight_map.end(); it++)
1121 os <<
"final(" << name <<
", " << it->first;
1122 if (write_weights) {
1124 write_weight(os, it->second);
1126 os <<
")." << std::endl;
1132 HFSTDLL
static bool strip_quotes_from_both_sides(std::string & str)
1136 if (str[0] !=
'"' || str[str.length()-1] !=
'"')
1139 str.erase(str.length()-1, 1);
1145 HFSTDLL
static bool strip_ending_parenthesis_and_comma(std::string & str)
1149 if (str[str.length()-2] !=
')' || str[str.length()-1] !=
'.')
1151 str.erase(str.length()-2);
1155 HFSTDLL
static bool parse_prolog_network_line(
const std::string & line, std::string &
name)
1159 int n = sscanf(line.c_str(),
"network(%s", namearr);
1163 std::string namestr(namearr);
1165 if (!strip_ending_parenthesis_and_comma(namestr))
1174 HFSTDLL
static std::vector<unsigned int> get_positions_of_unescaped_char
1175 (
const std::string & str,
char c,
char esc)
1177 std::vector<unsigned int> retval;
1178 for (
size_t i=0; i < str.length(); i++)
1183 retval.push_back(i);
1184 else if (str[i-1] == esc)
1187 retval.push_back(i);
1197 HFSTDLL
static bool get_prolog_arc_symbols
1198 (
const std::string & str, std::string & isymbol, std::string & osymbol)
1201 std::vector<unsigned int> quote_positions
1202 = get_positions_of_unescaped_char(str,
'"',
'\\');
1205 if (quote_positions.size() == 2)
1207 if (quote_positions[0] != 0 ||
1208 quote_positions[1] != str.length()-1)
1212 else if (quote_positions.size() == 4)
1214 if (quote_positions[0] != 0 ||
1215 quote_positions[3] != str.length()-1)
1219 if (quote_positions[2] - quote_positions[1] != 2)
1223 if (str[quote_positions[1] + 1] !=
':')
1235 if (quote_positions.size() == 2)
1238 std::string symbol(str, quote_positions[0]+1, quote_positions[1]-quote_positions[0]-1);
1239 isymbol = deprologize_symbol(symbol);
1240 if (isymbol ==
"@_UNKNOWN_SYMBOL_@")
1241 isymbol =
"@_IDENTITY_SYMBOL_@";
1248 std::string insymbol(str, quote_positions[0]+1, quote_positions[1]-quote_positions[0]-1);
1249 std::string outsymbol(str, quote_positions[2]+1, quote_positions[3]-quote_positions[2]-1);
1250 isymbol = deprologize_symbol(insymbol);
1251 osymbol = deprologize_symbol(outsymbol);
1257 HFSTDLL
static bool extract_weight(std::string & symbol,
float & weight)
1259 size_t last_double_quote = symbol.find_last_of(
'"');
1260 size_t last_space = symbol.find_last_of(
' ');
1263 if (last_double_quote == std::string::npos)
1266 if (last_space == std::string::npos) {
1269 else if (last_double_quote > last_space) {
1272 else if (last_double_quote + 2 == last_space && last_space < symbol.size()-1)
1274 std::istringstream buffer(symbol.substr(last_space+1));
1278 symbol.resize(last_space-1);
1286 HFSTDLL
static bool parse_prolog_arc_line(
const std::string & line,
HfstTransitionGraph & graph)
1289 char namestr[100];
char sourcestr[100];
1290 char targetstr[100];
char symbolstr[100];
1292 int n = sscanf(line.c_str(),
"arc(%[^,], %[^,], %[^,], %[^\t\n]",
1293 namestr, sourcestr, targetstr, symbolstr);
1295 std::string symbol(symbolstr);
1298 if (!strip_ending_parenthesis_and_comma(symbol))
1303 if (std::string(namestr) != graph.name)
1306 unsigned int source = atoi(sourcestr);
1307 unsigned int target = atoi(targetstr);
1311 if (! extract_weight(symbol, weight))
1314 std::string isymbol =
"";
1315 std::string osymbol =
"";
1317 if (!get_prolog_arc_symbols(symbol, isymbol, osymbol))
1320 graph.add_transition(source, HfstTransition<C>(target, isymbol, osymbol, weight));
1324 HFSTDLL
static bool parse_prolog_final_line(
const std::string & line,
HfstTransitionGraph & graph)
1329 char weightstr[100];
1332 unsigned int number_of_commas = 0;
1333 size_t pos = line.find(
',');
1334 while (pos != std::string::npos)
1337 pos = line.find(
',', pos+1);
1340 if (number_of_commas == 1)
1342 int n = sscanf(line.c_str(),
"final(%[^,], %[^)]).", namestr, finalstr);
1346 else if (number_of_commas == 2)
1348 int n = sscanf(line.c_str(),
"final(%[^,], %[^,], %[^)]).", namestr, finalstr, weightstr);
1351 std::istringstream buffer(weightstr);
1361 if (std::string(namestr) != graph.name)
1364 graph.set_final_weight(atoi(finalstr), weight);
1368 HFSTDLL
static bool parse_prolog_symbol_line(
const std::string & line,
HfstTransitionGraph & graph)
1372 char symbolarr[100];
1373 int n = sscanf(line.c_str(),
"symbol(%[^,], %s", namearr, symbolarr);
1378 std::string namestr(namearr);
1379 std::string symbolstr(symbolarr);
1381 if (namestr != graph.name)
1384 if (! strip_ending_parenthesis_and_comma(symbolstr))
1387 if (! strip_quotes_from_both_sides(symbolstr))
1390 graph.add_symbol_to_alphabet(deprologize_symbol(symbolstr));
1395 HFSTDLL
static std::string strip_newlines(std::string & str)
1397 for (
signed int i=(
signed int)str.length()-1; i >= 0; --i)
1399 if (str[i] ==
'\n' || str[i] ==
'\r')
1411 HFSTDLL
static std::string get_stripped_line
1412 (std::istream & is, FILE * file,
unsigned int & linecount)
1417 if (! is.getline(line,255).eof())
1421 if (NULL == fgets(line, 255, file))
1426 std::string linestr(line);
1427 return strip_newlines(linestr);
1439 (std::istream &is, FILE *file,
unsigned int & linecount)
1443 std::string linestr;
1449 linestr = get_stripped_line(is, file, linecount);
1456 if (linestr.length() != 0 && linestr[0] ==
'#')
1467 if (! parse_prolog_network_line(linestr, retval.name))
1469 std::string message(
"first line not valid prolog: ");
1470 message.append(linestr);
1478 linestr = get_stripped_line(is, file, linecount);
1489 if (! (parse_prolog_arc_line(linestr, retval) ||
1490 parse_prolog_final_line(linestr, retval) ||
1491 parse_prolog_symbol_line(linestr, retval)) )
1493 std::string message(
"line not valid prolog: ");
1494 message.append(linestr);
1503 unsigned int & linecount)
1505 return read_in_prolog_format
1512 unsigned int & linecount)
1514 return read_in_prolog_format
1524 (void)write_weights;
1525 unsigned int source_state=0;
1526 for (iterator it =
begin(); it !=
end(); it++)
1528 print_xfst_state(file, source_state);
1529 fprintf(file,
":\t");
1531 if (it->begin() == it->end())
1533 fprintf(file,
"(no arcs)");
1537 for (
typename HfstTransitions::iterator tr_it
1539 tr_it != it->end(); tr_it++)
1541 if (tr_it != it->begin())
1543 fprintf(file,
", ");
1547 print_xfst_arc(file, data);
1549 fprintf(file,
" -> ");
1553 fprintf(file,
".\n");
1565 unsigned int source_state=0;
1566 for (iterator it =
begin(); it !=
end(); it++)
1568 for (
typename HfstTransitions::iterator tr_it
1570 tr_it != it->end(); tr_it++)
1574 std::string isymbol = data.get_input_symbol();
1575 replace_all(isymbol,
" ",
"@_SPACE_@");
1576 replace_all(isymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1577 replace_all(isymbol,
"\t",
"@_TAB_@");
1579 std::string osymbol = data.get_output_symbol();
1580 replace_all(osymbol,
" ",
"@_SPACE_@");
1581 replace_all(osymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1582 replace_all(osymbol,
"\t",
"@_TAB_@");
1584 os << source_state <<
"\t"
1589 if (write_weights) {
1591 write_weight(os, data.get_weight());
1598 if (write_weights) {
1612 unsigned int source_state=0;
1613 for (iterator it =
begin(); it !=
end(); it++)
1615 for (
typename HfstTransitions::iterator tr_it
1617 tr_it != it->end(); tr_it++)
1621 std::string isymbol = data.get_input_symbol();
1622 replace_all(isymbol,
" ",
"@_SPACE_@");
1623 replace_all(isymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1624 replace_all(isymbol,
"\t",
"@_TAB_@");
1626 std::string osymbol = data.get_output_symbol();
1627 replace_all(osymbol,
" ",
"@_SPACE_@");
1628 replace_all(osymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1629 replace_all(osymbol,
"\t",
"@_TAB_@");
1631 fprintf(file,
"%i\t%i\t%s\t%s",
1637 if (write_weights) {
1638 fprintf(file,
"\t");
1639 write_weight(file, data.get_weight());
1641 fprintf(file,
"\n");
1645 fprintf(file,
"%i", source_state);
1646 if (write_weights) {
1647 fprintf(file,
"\t");
1650 fprintf(file,
"\n");
1658 unsigned int source_state=0;
1661 for (iterator it =
begin(); it !=
end(); it++)
1663 for (
typename HfstTransitions::iterator tr_it
1665 tr_it != it->end(); tr_it++)
1669 std::string isymbol = data.get_input_symbol();
1670 replace_all(isymbol,
" ",
"@_SPACE_@");
1671 replace_all(isymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1672 replace_all(isymbol,
"\t",
"@_TAB_@");
1674 std::string osymbol = data.get_output_symbol();
1675 replace_all(osymbol,
" ",
"@_SPACE_@");
1676 replace_all(osymbol,
"@_EPSILON_SYMBOL_@",
"@0@");
1677 replace_all(osymbol,
"\t",
"@_TAB_@");
1679 cw = sprintf(ptr + cwt,
"%i\t%i\t%s\t%s",
1688 cw = sprintf(ptr + cwt,
"\t%f",
1691 cw = sprintf(ptr + cwt,
"\n");
1696 cw = sprintf(ptr + cwt,
"%i", source_state);
1699 cw = sprintf(ptr + cwt,
"\t%f",
1702 cw = sprintf(ptr + cwt,
"\n");
1715 unsigned int source_state=0;
1716 for (iterator it =
begin(); it !=
end(); it++)
1718 for (
typename HfstTransitions::iterator tr_it
1720 tr_it != it->end(); tr_it++)
1724 fprintf(file,
"%i\t%i\t%i\t%i",
1727 tr_it->get_input_number(),
1728 tr_it->get_output_number());
1731 fprintf(file,
"\t%f",
1733 fprintf(file,
"\n");
1737 fprintf(file,
"%i", source_state);
1739 fprintf(file,
"\t%f",
1741 fprintf(file,
"\n");
1761 std::string epsilon_symbol,
1762 unsigned int & linecount) {
1780 if (! is.getline(line,255).eof())
1784 if (NULL == fgets(line, 255, file))
1794 (line[0] ==
'\0') ||
1795 (line[0] ==
'\n' && line[1] ==
'\0') ||
1797 (line[0] ==
'\r' && line[1] ==
'\n' && line[2] ==
'\0')
1812 char a1 [100];
char a2 [100];
char a3 [100];
1813 char a4 [100];
char a5 [100];
1815 int n = sscanf(line,
"%s%s%s%s%s", a1, a2, a3, a4, a5);
1824 if (n == 1 || n == 2)
1825 retval.set_final_weight( atoi(a1), weight );
1827 else if (n == 4 || n == 5) {
1828 std::string input_symbol=std::string(a3);
1829 std::string output_symbol=std::string(a4);
1833 replace_all(input_symbol,
"@_SPACE_@",
" ");
1834 replace_all(input_symbol,
"@0@",
"@_EPSILON_SYMBOL_@");
1835 replace_all(input_symbol,
"@_TAB_@",
"\t");
1836 replace_all(input_symbol,
"@_COLON_@",
":");
1838 replace_all(output_symbol,
"@_SPACE_@",
" ");
1839 replace_all(output_symbol,
"@0@",
"@_EPSILON_SYMBOL_@");
1840 replace_all(output_symbol,
"@_TAB_@",
"\t");
1841 replace_all(output_symbol,
"@_COLON_@",
":");
1843 if (epsilon_symbol.compare(input_symbol) == 0)
1844 input_symbol=
"@_EPSILON_SYMBOL_@";
1845 if (epsilon_symbol.compare(output_symbol) == 0)
1846 output_symbol=
"@_EPSILON_SYMBOL_@";
1848 HfstTransition <C> tr( atoi(a2), input_symbol,
1849 output_symbol, weight );
1850 retval.add_transition( atoi(a1), tr );
1854 std::string message(line);
1872 std::string epsilon_symbol,
1873 unsigned int & linecount)
1875 return read_in_att_format
1877 epsilon_symbol, linecount);
1888 std::string epsilon_symbol,
1889 unsigned int & linecount)
1891 return read_in_att_format
1893 file, epsilon_symbol, linecount);
1907 bool input_side=
true,
1908 bool output_side=
true)
1911 for (iterator it =
begin(); it !=
end(); it++)
1914 for (
unsigned int i=0; i < it->size(); i++)
1926 bool substitution_made=
false;
1930 substituting_input_symbol = new_symbol;
1931 substitution_made=
true;
1935 substituting_output_symbol = new_symbol;
1936 substitution_made=
true;
1940 if (substitution_made) {
1945 HfstTransition<C> tr
1947 substituting_input_symbol,
1948 substituting_output_symbol,
1951 it->operator[](i) = tr;
1966 void substitute_(
const HfstNumberVector &substitutions,
1967 unsigned int no_substitution)
1970 for (iterator it =
begin(); it !=
end(); it++)
1973 for (
unsigned int i=0; i < it->size(); i++)
1975 HfstTransition<C> &tr_it = it->operator[](i);
1977 HfstNumber old_inumber = tr_it.get_input_number();
1978 HfstNumber old_onumber = tr_it.get_output_number();
1980 HfstNumber new_inumber = substitutions.at(old_inumber);
1981 HfstNumber new_onumber = substitutions.at(old_onumber);
1984 if (new_inumber != no_substitution ||
1985 new_onumber != no_substitution)
1987 if (new_inumber != no_substitution)
1990 new_inumber = old_inumber;
1992 if (new_onumber != no_substitution)
1995 new_onumber = old_onumber;
1998 HfstTransition<C> tr
1999 (tr_it.get_target_state(),
2002 tr_it.get_weight(),
false);
2004 it->operator[](i) = tr;
2016 void substitute_(
const HfstNumberPairSubstitutions &substitutions)
2019 for (iterator it =
begin(); it !=
end(); it++)
2022 for (
unsigned int i=0; i < it->size(); i++)
2024 HfstTransition<C> &tr_it = it->operator[](i);
2026 HfstNumberPair old_number_pair
2027 ( tr_it.get_input_number(),
2028 tr_it.get_output_number() );
2030 HfstNumberPairSubstitutions::const_iterator subst_it
2031 = substitutions.find(old_number_pair);
2034 if (subst_it != substitutions.end()) {
2036 HfstNumber new_input_number = subst_it->second.first;
2037 HfstNumber new_output_number = subst_it->second.second;
2040 get_symbol(new_input_number));
2042 get_symbol(new_output_number));
2045 HfstTransition<C> tr
2046 (tr_it.get_target_state(),
2049 tr_it.get_weight(),
false);
2051 it->operator[](i) = tr;
2067 unsigned int in_match = C::get_number(sp.first);
2068 unsigned int out_match = C::get_number(sp.second);
2070 bool in_match_used =
false;
2071 bool out_match_used =
false;
2074 for (iterator it =
begin(); it !=
end(); it++)
2077 for (
unsigned int i=0; i < it->size(); i++)
2079 HfstTransition<C> &tr_it = it->operator[](i);
2082 unsigned int in_tr = tr_it.get_input_number();
2083 unsigned int out_tr = tr_it.get_output_number();
2084 if (in_tr == in_match && out_tr == out_match) {
2085 it->erase(it->begin()+i); }
2088 if (in_tr == in_match || out_tr == in_match) {
2089 in_match_used=
true; }
2090 if (in_tr == out_match || out_tr == out_match) {
2091 out_match_used=
true; }
2097 if (!in_match_used) {
2098 alphabet.erase(sp.first); }
2099 if (!out_match_used) {
2100 alphabet.erase(sp.second); }
2109 if (new_sps.empty())
2111 return remove_transitions(old_sp);
2114 unsigned int old_input_number = C::get_number(old_sp.first);
2115 unsigned int old_output_number = C::get_number(old_sp.second);
2118 bool substitution_performed=
false;
2121 for (iterator it =
begin(); it !=
end(); it++)
2127 for (
unsigned int i=0; i < it->size(); i++)
2129 HfstTransition<C> &tr_it = it->operator[](i);
2132 if (tr_it.get_input_number() == old_input_number &&
2133 tr_it.get_output_number() == old_output_number)
2135 substitution_performed=
true;
2139 typename HfstSymbolPairSet::const_iterator IT
2142 HfstTransition<C> tr
2143 (tr_it.get_target_state(),
2144 C::get_number(IT->first),
2145 C::get_number(IT->second),
2149 it->operator[](i) = tr;
2153 while (IT != new_sps.end())
2155 HfstTransition<C> TR
2156 (tr_it.get_target_state(),
2157 C::get_number(IT->first),
2158 C::get_number(IT->second),
2162 new_transitions.push_back(TR);
2173 NIT != new_transitions.end(); NIT++)
2175 it->push_back(*NIT);
2183 if (substitution_performed) {
2189 std::set<unsigned int> syms;
2195 syms.insert(old_input_number);
2196 syms.insert(old_output_number);
2197 prune_alphabet_after_substitution(syms);
2204 void substitute_(
bool (*func)
2208 for (iterator it =
begin(); it !=
end(); it++)
2214 for (
unsigned int i=0; i < it->size(); i++)
2216 HfstTransition<C> &tr_it = it->operator[](i);
2219 (tr_it.get_input_symbol(),
2220 tr_it.get_output_symbol());
2224 bool perform_substitution=
false;
2226 perform_substitution =
2227 (*func)(transition_symbol_pair, substituting_transitions);
2233 if (perform_substitution)
2237 typename HfstSymbolPairSet::const_iterator IT
2238 = substituting_transitions.begin();
2240 if (! C::is_valid_symbol(IT->first) ||
2241 ! C::is_valid_symbol(IT->second) )
2243 (EmptyStringException,
2244 "HfstTransitionGraph::substitute");
2246 HfstTransition<C> tr
2247 (tr_it.get_target_state(),
2250 tr_it.get_weight());
2252 it->operator[](i) = tr;
2259 while (IT != substituting_transitions.end())
2262 if (! C::is_valid_symbol(IT->first) ||
2263 ! C::is_valid_symbol(IT->second) )
2265 (EmptyStringException,
2266 "HfstTransitionGraph::substitute");
2268 HfstTransition<C> TR
2269 (tr_it.get_target_state(),
2272 tr_it.get_weight());
2274 new_transitions.push_back(TR);
2289 NIT != new_transitions.end(); NIT++)
2291 it->push_back(*NIT);
2311 bool input_side=
true,
2312 bool output_side=
true) {
2314 if (! C::is_valid_symbol(old_symbol) ||
2315 ! C::is_valid_symbol(new_symbol) ) {
2317 (EmptyStringException,
2318 "HfstTransitionGraph::substitute"); }
2321 if (old_symbol == new_symbol)
2324 if (alphabet.find(old_symbol) == alphabet.end())
2329 if (input_side && output_side) {
2331 if (! is_epsilon(old_symbol) &&
2332 ! is_unknown(old_symbol) &&
2333 ! is_identity(old_symbol)) {
2334 alphabet.erase(old_symbol); }
2337 alphabet.insert(new_symbol);
2339 substitute_(old_symbol, new_symbol, input_side, output_side);
2353 for (HfstSymbolSubstitutions::const_iterator it
2354 = substitutions.begin();
2355 it != substitutions.end(); it++)
2357 (void)get_symbol_number(it->first);
2358 (void)get_symbol_number(it->second);
2363 std::vector<unsigned int> substitutions_;
2365 unsigned int no_substitution = C::get_max_number()+substitutions.size()+1;
2366 substitutions_.resize
2367 (C::get_max_number()+1, no_substitution);
2368 for (HfstSymbolSubstitutions::const_iterator it
2369 = substitutions.begin();
2370 it != substitutions.end(); it++)
2372 HfstNumber from_symbol = get_symbol_number(it->first);
2373 HfstNumber to_symbol = get_symbol_number(it->second);
2375 substitutions_.at(from_symbol) = to_symbol;
2378 substitute_(substitutions_, no_substitution);
2397 HfstNumberPairSubstitutions substitutions_;
2398 for (HfstSymbolPairSubstitutions::const_iterator it
2399 = substitutions.begin();
2400 it != substitutions.end(); it++)
2402 HfstNumberPair from_transition
2403 (get_symbol_number(it->first.first),
2404 get_symbol_number(it->first.second));
2405 HfstNumberPair to_transition
2406 (get_symbol_number(it->second.first),
2407 get_symbol_number(it->second.second));
2408 substitutions_[from_transition] = to_transition;
2411 substitute_(substitutions_);
2421 if (! C::is_valid_symbol(sp.first) ||
2422 ! C::is_valid_symbol(sp.second) ) {
2424 (EmptyStringException,
2425 "HfstTransitionGraph::substitute"); }
2427 for (
typename HfstSymbolPairSet::const_iterator it = sps.begin();
2428 it != sps.end(); it++)
2430 if (! C::is_valid_symbol(it->first) ||
2431 ! C::is_valid_symbol(it->second) ) {
2433 (EmptyStringException,
2434 "HfstTransitionGraph::substitute"); }
2437 substitute_(sp, sps);
2448 if (! C::is_valid_symbol(old_pair.first) ||
2449 ! C::is_valid_symbol(new_pair.first) ||
2450 ! C::is_valid_symbol(old_pair.second) ||
2451 ! C::is_valid_symbol(new_pair.second) ) {
2453 (EmptyStringException,
2454 "HfstTransitionGraph::substitute"); }
2457 new_pair_set.insert(new_pair);
2458 substitute_(old_pair, new_pair_set);
2488 struct substitution_data
2492 typename C::WeightType weight;
2497 typename C::WeightType weight,
2500 origin_state=origin;
2501 target_state=target;
2502 this->weight=weight;
2503 substituting_graph=substituting;
2511 void add_substitution(
const substitution_data &sub) {
2514 HfstTransition <C> epsilon_transition
2515 (s, C::get_epsilon(), C::get_epsilon(),
2520 unsigned int offset = s;
2526 it != graph->end(); it++)
2528 for (
typename HfstTransitions::const_iterator tr_it
2530 tr_it != it->end(); tr_it++)
2532 C data = tr_it->get_transition_data();
2534 HfstTransition <C> transition
2535 (tr_it->get_target_state() + offset,
2536 data.get_input_symbol(),
2537 data.get_output_symbol(),
2546 for (
typename FinalWeightMap::const_iterator it
2547 = graph->final_weight_map.begin();
2548 it != graph->final_weight_map.end(); it++)
2550 HfstTransition <C> epsilon_transition
2551 (sub.target_state, C::get_epsilon(), C::get_epsilon(),
2582 if ( ! ( C::is_valid_symbol(sp.first) &&
2583 C::is_valid_symbol(sp.second) ) ) {
2585 (EmptyStringException,
2586 "HfstTransitionGraph::substitute(const HfstSymbolPair&, "
2587 "const HfstTransitionGraph&)");
2593 if (alphabet.find(sp.first) == alphabet.end() &&
2594 alphabet.find(sp.second) == alphabet.end())
2599 std::vector<substitution_data> substitutions;
2603 for (iterator it =
begin(); it !=
end(); it++)
2607 std::vector<typename HfstTransitions::iterator>
2611 for (
typename HfstTransitions::iterator tr_it
2613 tr_it != it->end(); tr_it++)
2615 C data = tr_it->get_transition_data();
2619 if (data.get_input_symbol() == sp.first &&
2620 data.get_output_symbol() == sp.second)
2623 substitutions.push_back(substitution_data
2625 tr_it->get_target_state(),
2629 old_transitions.push_back(tr_it);
2636 for (
typename std::vector<
typename
2637 HfstTransitions::iterator>::iterator IT =
2638 old_transitions.begin();
2639 IT != old_transitions.end(); IT++) {
2648 for (
typename std::vector<substitution_data>::iterator IT
2649 = substitutions.begin();
2650 IT != substitutions.end(); IT++)
2652 add_substitution(*IT);
2670 HFSTDLL std::string weight2marker(
float weight)
2672 std::ostringstream o;
2674 return std::string(
"@") + o.str() + std::string(
"@");
2681 for (
HfstState state = 0; state < limit; state++)
2684 std::stack<typename HfstTransitions::iterator>
2687 std::vector<HfstTransition <C> > new_transitions;
2690 for (
typename HfstTransitions::iterator tr_it
2691 = state_vector[state].
begin();
2692 tr_it != state_vector[state].end(); tr_it++)
2694 C data = tr_it->get_transition_data();
2698 if (data.get_weight() != 0 )
2701 new_transitions.push_back
2702 (HfstTransition <C> (tr_it->get_target_state(),
2703 data.get_input_symbol(),
2704 data.get_output_symbol(),
2705 data.get_weight()));
2707 old_transitions.push(tr_it);
2714 while (! old_transitions.empty()) {
2715 state_vector[state].erase(old_transitions.top());
2716 old_transitions.pop();
2720 for (
typename std::vector<HfstTransition <C> >::iterator IT
2721 = new_transitions.begin();
2722 IT != new_transitions.end(); IT++)
2725 std::string marker = weight2marker(IT->get_weight());
2727 HfstTransition <C> marker_transition(IT->get_target_state(),
2731 HfstTransition <C> new_transition(new_state,
2732 IT->get_input_symbol(),
2733 IT->get_output_symbol(),
2743 std::set<HfstState> final_states_to_remove;
2745 for (
typename FinalWeightMap::iterator fin_it = final_weight_map.begin();
2746 fin_it != final_weight_map.end(); fin_it++)
2748 if (fin_it->second != 0)
2752 std::string marker = weight2marker(fin_it->second);
2753 HfstTransition <C> epsilon_transition(new_state,
2758 final_states_to_remove.insert(fin_it->first);
2761 for (std::set<HfstState>::iterator it = final_states_to_remove.begin();
2762 it != final_states_to_remove.end(); it++)
2764 final_weight_map.erase(*it);
2773 typedef std::map<HfstSymbol, HfstTransitionGraph> SubstMap;
2779 bool symbol_found =
false;
2780 for (
typename SubstMap::const_iterator it = substitution_map.begin();
2781 it != substitution_map.end(); it++)
2783 if ( ! ( C::is_valid_symbol(it->first) ))
2786 "HfstTransitionGraph::substitute "
2787 "(const std::map<HfstSymbol, HfstTransitionGraph> &)");
2789 if (!symbol_found && alphabet.find(it->first) != alphabet.end())
2791 symbol_found =
true;
2802 std::set<String> substitutions_performed_for_symbols;
2806 std::vector<substitution_data> substitutions;
2810 for (iterator it =
begin(); it !=
end(); it++)
2814 std::stack<typename HfstTransitions::iterator>
2818 for (
typename HfstTransitions::iterator tr_it
2820 tr_it != it->end(); tr_it++)
2822 C data = tr_it->get_transition_data();
2826 String istr = data.get_input_symbol();
2827 String ostr = data.get_output_symbol();
2828 typename SubstMap::iterator map_it_input = substitution_map.find(istr);
2829 typename SubstMap::iterator map_it_output = substitution_map.find(ostr);
2831 if (map_it_input == substitution_map.end() &&
2832 map_it_output == substitution_map.end())
2836 else if (istr != ostr)
2838 std::string msg(
"symbol to be substituted must not occur only on one side of transition");
2844 substitution_data sd
2846 tr_it->get_target_state(),
2848 &(map_it_input->second));
2849 substitutions.push_back(sd);
2851 old_transitions.push(tr_it);
2853 substitutions_performed_for_symbols.insert(istr);
2860 while (!old_transitions.empty())
2862 it->erase(old_transitions.top());
2863 old_transitions.pop();
2871 for (StringSet::const_iterator sym_it = substitutions_performed_for_symbols.begin();
2872 sym_it != substitutions_performed_for_symbols.end(); sym_it++)
2874 if (*sym_it !=
"@_EPSILON_SYMBOL_@" && *sym_it !=
"@_UNKNOWN_SYMBOL_@" && *sym_it !=
"@_IDENTITY_SYMBOL_@")
2881 for (StringSet::iterator sym_it = substitutions_performed_for_symbols.begin();
2882 sym_it != substitutions_performed_for_symbols.end(); sym_it++)
2884 this->
harmonize(substitution_map.at(*sym_it));
2889 for (
typename std::vector<substitution_data>::iterator IT
2890 = substitutions.begin();
2891 IT != substitutions.end(); IT++)
2893 add_substitution(*IT);
2901 HFSTDLL
bool marker2weight(
const std::string & str,
float & weight)
2905 if (str.at(0) !=
'@' || str.at(str.size()-1) !=
'@')
2908 std::string weight_string = str.substr(1, str.size()-2);
2909 std::stringstream sstream(weight_string);
2911 if (sstream.fail()) {
2921 for (
HfstState state = 0; state < limit; state++)
2924 std::stack<typename HfstTransitions::iterator>
2927 std::vector<HfstTransition <C> > new_transitions;
2930 for (
typename HfstTransitions::iterator tr_it
2931 = state_vector[state].
begin();
2932 tr_it != state_vector[state].end(); tr_it++)
2934 C data = tr_it->get_transition_data();
2939 if ( (!marker2weight(data.get_input_symbol(), weight)) &&
2940 marker2weight(data.get_output_symbol(), weight) )
2944 new_transitions.push_back
2945 (HfstTransition <C> (tr_it->get_target_state(),
2946 data.get_input_symbol(),
2947 hfst::internal_epsilon,
2950 old_transitions.push(tr_it);
2953 else if (marker2weight(data.get_input_symbol(), weight) &&
2954 marker2weight(data.get_output_symbol(), weight) )
2958 old_transitions.push(tr_it);
2966 while (! old_transitions.empty()) {
2967 state_vector[state].erase(old_transitions.top());
2968 old_transitions.pop();
2972 for (
typename std::vector<HfstTransition <C> >::iterator IT
2973 = new_transitions.begin();
2974 IT != new_transitions.end(); IT++)
2976 state_vector[state].push_back(*IT);
2982 std::stack<StringSet::iterator> weight_markers;
2983 for (StringSet::iterator it = alphabet.begin();
2984 it != alphabet.end(); it++)
2987 if (marker2weight(*it, foo))
2989 weight_markers.push(it);
2992 while (! weight_markers.empty())
2994 alphabet.erase(weight_markers.top());
2995 weight_markers.pop();
3003 HFSTDLL
HfstTransitionGraph & substitute_symbol(
const std::string &old_symbol,
const std::string &new_symbol,
bool input_side=
true,
bool output_side=
true)
3004 {
return this->
substitute(old_symbol, new_symbol, input_side, output_side); }
3006 HFSTDLL
HfstTransitionGraph & substitute_symbol_pair(
const StringPair &old_symbol_pair,
const StringPair &new_symbol_pair)
3007 {
return this->
substitute(old_symbol_pair, new_symbol_pair); }
3010 {
return this->
substitute(old_symbol_pair, new_symbol_pair_set); }
3013 {
return this->
substitute(symbol_pair, transducer); }
3026 if ( ! ( C::is_valid_symbol(symbol_pair.first) &&
3027 C::is_valid_symbol(symbol_pair.second) ) ) {
3029 (EmptyStringException,
3030 "HfstTransitionGraph::insert_freely"
3031 "(const HfstSymbolPair&, W)");
3034 alphabet.insert(symbol_pair.first);
3035 alphabet.insert(symbol_pair.second);
3038 for (iterator it =
begin(); it !=
end(); it++) {
3040 symbol_pair.second, weight );
3052 typename C::WeightType weight)
3054 for (
typename HfstSymbolPairSet::const_iterator symbols_it
3055 = symbol_pairs.begin();
3056 symbols_it != symbol_pairs.end(); symbols_it++)
3058 if ( ! ( C::is_valid_symbol(symbols_it->first) &&
3059 C::is_valid_symbol(symbols_it->second) ) ) {
3061 (EmptyStringException,
3062 "HfstTransitionGraph::insert_freely"
3063 "(const HfstSymbolPairSet&, W)");
3066 alphabet.insert(symbols_it->first);
3067 alphabet.insert(symbols_it->second);
3071 for (iterator it =
begin(); it !=
end(); it++)
3073 for (
typename HfstSymbolPairSet::const_iterator symbols_it
3074 = symbol_pairs.begin();
3075 symbols_it != symbol_pairs.end(); symbols_it++)
3078 symbols_it->second, weight );
3092 HfstSymbol marker_this = C::get_marker(alphabet);
3093 HfstSymbol marker_graph = C::get_marker(alphabet);
3095 if (marker_graph > marker)
3096 marker = marker_graph;
3101 alphabet.erase(marker);
3139 HarmonizeUnknownAndIdentitySymbols foo(*
this, another);
3155 StringPairVector::const_iterator &it,
3159 if (it == spv.end()) {
3164 bool transition_found=
false;
3170 for (
typename HfstTransitions::iterator tr_it = tr.begin();
3171 tr_it != tr.end(); tr_it++)
3173 C data = tr_it->get_transition_data();
3174 if (data.get_input_symbol().compare(it->first) == 0 &&
3175 data.get_output_symbol().compare(it->second) == 0)
3177 transition_found=
true;
3178 next_state = tr_it->get_target_state();
3184 if (! transition_found)
3187 HfstTransition <C> transition(next_state, it->first,
3194 return disjunct(spv, it, next_state);
3221 StringPairVector::const_iterator it = spv.begin();
3222 HfstState final_state = disjunct(spv, it, INITIAL_STATE);
3228 if (old_weight < weight)
3235 HFSTDLL
bool is_special_symbol(
const std::string & symbol)
3237 if (symbol.size() < 2)
3239 if (symbol[0] ==
'@' && symbol[1] ==
'_')
3249 for (iterator it =
begin(); it !=
end(); it++)
3251 std::set<HfstSymbol> symbols_present;
3253 for (
typename HfstTransitions::iterator tr_it
3255 tr_it != it->end(); tr_it++)
3257 C data = tr_it->get_transition_data();
3259 if (data.get_input_symbol() != data.get_output_symbol())
3263 symbols_present.insert(data.get_input_symbol());
3265 for (std::set<std::string>::const_iterator alpha_it = alphabet.begin();
3266 alpha_it != alphabet.end(); alpha_it++)
3268 if (symbols_present.find(*alpha_it) ==
3269 symbols_present.end() &&
3270 ! is_special_symbol(*alpha_it))
3282 HFSTDLL StringSet get_flags()
const
3285 for (StringSet::const_iterator it = alphabet.begin();
3286 it != alphabet.end(); it++)
3288 if (FdOperation::is_diacritic(*it)) {
3298 HFSTDLL
bool purge_symbol(
const std::string & symbol,
const std::string & flag)
3300 if (! FdOperation::is_diacritic(symbol))
3304 else if (FdOperation::get_feature(symbol) == flag)
3312 HFSTDLL
void flag_purge(
const std::string & flag)
3315 for (iterator it =
begin(); it !=
end(); it++)
3317 for (
unsigned int i=0; i < it->size(); i++)
3319 HfstTransition<C> &tr_it = it->operator[](i);
3321 if ( purge_symbol(tr_it.get_input_symbol(), flag) ||
3322 purge_symbol(tr_it.get_output_symbol(), flag) )
3325 HfstTransition<C> tr
3326 (tr_it.get_target_state(),
"@_EPSILON_SYMBOL_@",
3327 "@_EPSILON_SYMBOL_@", tr_it.get_weight());
3328 it->operator[](i) = tr;
3333 StringSet extra_symbols;
3334 for (StringSet::const_iterator it = alphabet.begin();
3335 it != alphabet.end(); it++)
3337 if (purge_symbol(*it, flag))
3338 extra_symbols.insert(*it);
3341 remove_symbols_from_alphabet(extra_symbols);
3345 struct TopologicalSort
3347 std::vector<int> distance_of_state;
3348 std::vector<std::set<HfstState> > states_at_distance;
3352 HFSTDLL
void set_biggest_state_number(
unsigned int biggest_state_number)
3354 distance_of_state = std::vector<int>(biggest_state_number+1, -1);
3358 HFSTDLL
void set_state_at_distance(
HfstState state,
unsigned int distance,
3362 if (state > (distance_of_state.size() - 1))
3364 std::cerr <<
"ERROR in TopologicalSort::set_state_at_distance: first argument ("
3365 << state <<
") is out of range (should be < " << distance_of_state.size()
3366 <<
")" << std::endl;
3370 while (distance + 1 > (
unsigned int)states_at_distance.size())
3372 std::set<HfstState> empty_set;
3373 states_at_distance.push_back(empty_set);
3376 int previous_distance = distance_of_state.at(state);
3377 if (previous_distance != -1 && previous_distance != (
int)distance && overwrite)
3379 states_at_distance.at(previous_distance).erase(state);
3382 states_at_distance.at(distance).insert(state);
3383 distance_of_state.at(state) = distance;
3387 HFSTDLL
const std::set<HfstState> & get_states_at_distance(
unsigned int distance)
3391 while (distance > (states_at_distance.size() - 1))
3393 std::set<HfstState> empty_set;
3394 states_at_distance.push_back(empty_set);
3396 return states_at_distance.at(distance);
3400 enum SortDistance { MaximumDistance, MinimumDistance };
3408 HFSTDLL std::vector<std::set<HfstState> > topsort(SortDistance dist)
const
3410 typedef std::set<HfstState>::const_iterator StateIt;
3411 unsigned int current_distance = 0;
3412 TopologicalSort TopSort;
3413 TopSort.set_biggest_state_number(state_vector.size()-1);
3414 TopSort.set_state_at_distance(0,current_distance,(dist == MaximumDistance));
3415 bool new_states_found =
false;
3419 new_states_found =
false;
3421 std::set<HfstState> new_states;
3424 const std::set<HfstState> &
states =
3425 TopSort.get_states_at_distance(current_distance);
3426 for (StateIt state_it = states.begin();
3427 state_it != states.end(); state_it++)
3431 = this->state_vector.at(*state_it);
3432 for (
typename HfstTransitions::const_iterator transition_it
3433 = transitions.begin();
3434 transition_it != transitions.end(); transition_it++)
3436 new_states_found =
true;
3437 new_states.insert(transition_it->get_target_state());
3445 for (StateIt it = new_states.begin();
3446 it != new_states.end(); it++)
3448 TopSort.set_state_at_distance(*it, current_distance + 1, (dist == MaximumDistance));
3452 while (new_states_found);
3454 return TopSort.states_at_distance;
3462 std::vector<std::set<HfstState> > states_sorted = this->topsort(MaximumDistance);
3464 for (
int distance = states_sorted.size() - 1; distance >= 0; distance--)
3466 const std::set<HfstState> & states
3467 = states_sorted.at((
unsigned int)distance);
3469 for (std::set<HfstState>::const_iterator it = states.begin();
3470 it != states.end(); it++)
3488 std::vector<unsigned int> result;
3490 std::vector<std::set<HfstState> > states_sorted = this->topsort(MinimumDistance);
3492 for (
int distance = states_sorted.size() - 1; distance >= 0; distance--)
3494 const std::set<HfstState> & states
3495 = states_sorted.at((
unsigned int)distance);
3497 for (std::set<HfstState>::const_iterator it = states.begin();
3498 it != states.end(); it++)
3504 result.push_back((
unsigned int)distance);
3512 bool has_negative_epsilon_cycles
3515 std::map<HfstState, float> & state_weights)
3517 std::map<HfstState, float>::const_iterator it = state_weights.find(state);
3518 if (it != state_weights.end())
3520 if (total_weight - it->second < 0)
3526 state_weights[state] = total_weight;
3531 for (HfstBasicTransducer::HfstTransitions::const_iterator it
3532 = transitions.begin();
3533 it != transitions.end(); it++)
3535 if (is_epsilon(it->get_input_symbol()) && is_epsilon(it->get_output_symbol()))
3537 if (has_negative_epsilon_cycles
3538 (it->get_target_state(), total_weight + it->get_weight(), state_weights))
3542 state_weights.erase(state);
3546 bool has_negative_epsilon_cycles()
3548 bool has_negative_epsilon_transitions =
false;
3549 for (iterator it =
begin(); it !=
end(); it++)
3551 for (
typename HfstTransitions::iterator tr_it
3553 tr_it != it->end(); tr_it++)
3555 if (is_epsilon(tr_it->get_input_symbol()) && is_epsilon(tr_it->get_output_symbol()) && tr_it->get_weight() < 0)
3557 has_negative_epsilon_transitions =
true;
3562 if (! has_negative_epsilon_transitions)
3567 std::map<HfstState, float> state_weights;
3568 for (
unsigned int state = INITIAL_STATE; state < (this->
get_max_state()+1); state++)
3570 if (has_negative_epsilon_cycles(state, 0, state_weights))
3576 HFSTDLL
bool is_infinitely_ambiguous
3578 std::set<HfstState> &epsilon_path_states,
3579 std::vector<unsigned int> &states_handled)
3581 if (states_handled[state] != 0)
3587 for (HfstBasicTransducer::HfstTransitions::const_iterator it
3588 = transitions.begin();
3589 it != transitions.end(); it++)
3594 if ( is_epsilon(it->get_input_symbol()) ||
3595 FdOperation::is_diacritic(it->get_input_symbol()) )
3597 epsilon_path_states.insert(state);
3598 if (epsilon_path_states.find(it->get_target_state())
3599 != epsilon_path_states.end())
3603 if (is_infinitely_ambiguous
3604 (it->get_target_state(), epsilon_path_states, states_handled))
3608 epsilon_path_states.erase(state);
3612 states_handled[state] = 1;
3616 HFSTDLL
bool is_infinitely_ambiguous()
3618 std::set<HfstState> epsilon_path_states;
3620 std::vector<unsigned int> states_handled(max_state+1, 0);
3622 for (
unsigned int state = INITIAL_STATE; state < (max_state+1); state++)
3624 if (is_infinitely_ambiguous(state, epsilon_path_states, states_handled))
3630 HFSTDLL
bool is_lookup_infinitely_ambiguous
3633 std::set<HfstState> &epsilon_path_states
3637 bool only_epsilons=
false;
3638 if ((
unsigned int)s.second.size() == index)
3646 for (HfstBasicTransducer::HfstTransitions::const_iterator it
3647 = transitions.begin();
3648 it != transitions.end(); it++)
3655 if ( is_epsilon(it->get_input_symbol()) ||
3656 FdOperation::is_diacritic(it->get_input_symbol()) )
3658 epsilon_path_states.insert(state);
3659 if (epsilon_path_states.find(it->get_target_state())
3660 != epsilon_path_states.end())
3664 if (is_lookup_infinitely_ambiguous
3665 (s, index, it->get_target_state(), epsilon_path_states))
3669 epsilon_path_states.erase(state);
3675 else if (! only_epsilons)
3677 bool continu =
false;
3678 if (it->get_input_symbol().compare(s.second.at(index)) == 0)
3680 else if ((it->get_input_symbol().compare(
"@_UNKNOWN_SYMBOL_@") ||
3681 it->get_input_symbol().compare(
"@_IDENTITY_SYMBOL_@"))
3683 (alphabet.find(s.second.at(index)) == alphabet.end()))
3691 std::set<HfstState> empty_set;
3692 if (is_lookup_infinitely_ambiguous
3693 (s, index, it->get_target_state(), empty_set))
3706 std::set<HfstState> epsilon_path_states;
3707 epsilon_path_states.insert(0);
3708 unsigned int index=0;
3710 return is_lookup_infinitely_ambiguous(s, index, INITIAL_STATE,
3711 epsilon_path_states);
3714 HFSTDLL
bool is_lookup_infinitely_ambiguous(
const StringVector & s)
3716 std::set<HfstState> epsilon_path_states;
3717 epsilon_path_states.insert(0);
3718 unsigned int index=0;
3721 return is_lookup_infinitely_ambiguous(path, index, INITIAL_STATE,
3722 epsilon_path_states);
3727 HFSTDLL
static void push_back_to_two_level_path
3729 const StringPair &sp,
3730 const float &weight)
3732 path.second.push_back(sp);
3733 path.first = path.first + weight;
3736 HFSTDLL
static void pop_back_from_two_level_path
3738 const float &weight)
3740 path.second.pop_back();
3741 path.first = path.first - weight;
3744 HFSTDLL
static void add_to_results
3747 const float &final_weight,
3750 path_so_far.first = path_so_far.first + final_weight;
3752 if (max_weight == NULL)
3754 results.insert(path_so_far);
3756 else if (!(path_so_far.first > *max_weight))
3758 results.insert(path_so_far);
3764 path_so_far.first = path_so_far.first - final_weight;
3767 HFSTDLL
static bool is_possible_transition
3769 const StringVector &lookup_path,
3770 const unsigned int &lookup_index,
3771 const StringSet &alphabet,
3772 bool &input_symbol_consumed)
3774 std::string isymbol = transition.get_input_symbol();
3777 if (! (lookup_index == (
unsigned int)lookup_path.size()))
3782 if ( isymbol.compare(lookup_path.at(lookup_index)) == 0 ||
3786 ( (is_identity(isymbol) || is_unknown(isymbol)) &&
3787 (alphabet.find(lookup_path.at(lookup_index))
3788 == alphabet.end()) )
3791 input_symbol_consumed=
true;
3798 if ( is_epsilon(isymbol) ||
3799 FdOperation::is_diacritic(isymbol) )
3801 input_symbol_consumed=
false;
3809 HFSTDLL
void lookup_fd
3810 (
const StringVector &lookup_path,
3813 unsigned int lookup_index,
3815 StringSet &alphabet,
3816 HfstEpsilonHandler Eh,
3817 size_t infinite_cutoff,
3818 float * max_weight = NULL)
3821 if (! Eh.can_continue(state)) {
3825 if (max_weight != NULL && path_so_far.first > *max_weight) {
3830 if (lookup_index == lookup_path.size())
3845 for (HfstBasicTransducer::HfstTransitions::const_iterator it
3846 = transitions.begin();
3847 it != transitions.end(); it++)
3849 bool input_symbol_consumed=
false;
3850 if ( is_possible_transition
3851 (*it, lookup_path, lookup_index, alphabet,
3852 input_symbol_consumed) )
3859 if (is_identity(it->get_input_symbol()))
3861 istr = lookup_path.at(lookup_index);
3866 if (is_unknown(it->get_input_symbol()))
3867 istr = lookup_path.at(lookup_index);
3869 istr = it->get_input_symbol();
3874 ostr = it->get_output_symbol();
3877 push_back_to_two_level_path
3882 HfstEpsilonHandler * Ehp = NULL;
3883 if (input_symbol_consumed) {
3885 Ehp =
new HfstEpsilonHandler(infinite_cutoff);
3888 Eh.push_back(state);
3893 lookup_fd(lookup_path, results, it->get_target_state(),
3894 lookup_index, path_so_far, alphabet, *Ehp, infinite_cutoff, max_weight);
3898 if (input_symbol_consumed) {
3907 pop_back_from_two_level_path(path_so_far, it->get_weight());
3913 HFSTDLL
void lookup_fd
3914 (
const StringVector &lookup_path,
3916 size_t * infinite_cutoff = NULL,
3917 float * max_weight = NULL)
3920 unsigned int lookup_index = 0;
3923 if (infinite_cutoff != NULL)
3925 HfstEpsilonHandler Eh(*infinite_cutoff);
3926 lookup_fd(lookup_path, results, state, lookup_index, path_so_far,
3927 alphabet, Eh, *infinite_cutoff, max_weight);
3931 HfstEpsilonHandler Eh(100000);
3932 lookup_fd(lookup_path, results, state, lookup_index, path_so_far,
3933 alphabet, Eh, 100000, max_weight);
3938 HFSTDLL
void check_regexp_state_for_cycle(
HfstState s,
const std::set<HfstState> & states_visited)
3940 if (states_visited.find(s) != states_visited.end())
3942 throw "error: loop detected inside compile-replace regular expression";
3949 std::string istr = tr.get_input_symbol();
3950 std::string ostr = tr.get_output_symbol();
3952 if (input_side && is_epsilon(istr))
3954 else if (!input_side && is_epsilon(ostr))
3956 else if ((input_side && is_special_symbol(istr)) || (!input_side && is_special_symbol(ostr)))
3958 throw "error: special symbol detected in compile-replace regular expression";
3963 if ((input_side && (
"^[" == istr)) || (!input_side && (
"^[" == ostr)))
3965 throw "error: ^[ detected inside compile-replace regular expression";
3967 if ((input_side && (
"^]" == istr)) || (!input_side && (
"^]" == ostr)))
3986 HFSTDLL
void find_regexp_paths
3988 std::set<HfstState> & states_visited,
3989 std::vector<std::pair<std::string, std::string> > & path,
3990 HfstReplacements & full_paths,
bool input_side)
3993 check_regexp_state_for_cycle(s, states_visited);
3994 states_visited.insert(s);
3999 for (HfstBasicTransducer::HfstTransitions::const_iterator it
4000 = transitions.begin();
4001 it != transitions.end(); it++)
4004 if (check_regexp_transition_end(*it, input_side))
4007 check_regexp_state_for_cycle(it->get_target_state(), states_visited);
4009 path.push_back(std::pair<std::string, std::string>(it->get_input_symbol(), it->get_output_symbol()));
4010 full_paths.push_back
4011 (HfstReplacement(it->get_target_state(), path));
4017 path.push_back(
StringPair(it->get_input_symbol(), it->get_output_symbol()));
4019 (it->get_target_state(),
4022 full_paths, input_side);
4027 states_visited.erase(s);
4039 HFSTDLL
void find_regexp_paths
4041 std::vector<std::pair<
HfstState, std::vector<std::pair<std::string, std::string> > > > & full_paths,
4047 for (HfstBasicTransducer::HfstTransitions::const_iterator it
4048 = transitions.begin();
4049 it != transitions.end(); it++)
4051 std::string istr = it->get_input_symbol();
4052 std::string ostr = it->get_output_symbol();
4053 if ((input_side && (
"^[" == istr)) || (!input_side && (
"^[" == ostr)))
4059 std::set<HfstState> states_visited;
4060 states_visited.insert(s);
4061 std::vector<std::pair<std::string, std::string> > path;
4062 path.push_back(std::pair<std::string, std::string>(istr, ostr));
4063 find_regexp_paths(it->get_target_state(), states_visited, path, full_paths, input_side);
4072 HFSTDLL HfstReplacementsMap find_replacements(
bool input_side)
4074 HfstReplacementsMap replacements;
4075 unsigned int state = 0;
4076 for (iterator it =
begin(); it !=
end(); it++)
4079 HfstReplacements full_paths;
4080 find_regexp_paths(state, full_paths, input_side);
4081 if (full_paths.size() > 0)
4083 replacements[state] = full_paths;
4087 return replacements;
4101 it != graph.end(); it++)
4103 for (
typename HfstTransitions::const_iterator tr_it
4105 tr_it != it->end(); tr_it++)
4107 C data = tr_it->get_transition_data();
4109 HfstTransition <C> transition
4110 (tr_it->get_target_state() + offset,
4111 data.get_input_symbol(),
4112 data.get_output_symbol(),
4121 for (
typename FinalWeightMap::const_iterator it
4122 = graph.final_weight_map.begin();
4123 it != graph.final_weight_map.end(); it++)
4125 HfstTransition <C> epsilon_transition
4126 (state2, C::get_epsilon(), C::get_epsilon(),
4132 HfstTransition <C> epsilon_transition
4133 (offset, C::get_epsilon(), C::get_epsilon(), 0);
4137 typedef std::pair<HfstState, HfstState> StatePair;
4138 typedef std::map<StatePair, HfstState> StateMap;
4147 StatePair state_pair(target1, target2);
4148 StateMap::const_iterator it = state_map.find(state_pair);
4149 if (it != state_map.end())
4151 was_new_state=
false;
4154 HfstState retval = intersection.add_state();
4155 state_map[state_pair] = retval;
4168 HfstState target1 = tr1.get_target_state();
4169 HfstState target2 = tr2.get_target_state();
4170 bool was_new_state =
false;
4172 (target1, target2, state_map, intersection, was_new_state);
4174 float transition_weight = tr1.get_weight() + tr2.get_weight();
4175 intersection.add_transition
4176 (state, HfstTransition <C>
4177 (retval, tr1.get_input_symbol(), tr1.get_output_symbol(), transition_weight));
4180 if (was_new_state && (graph1.is_final_state(target1) && graph2.is_final_state(target2)))
4182 float final_weight = graph1.get_final_weight(target1) + graph2.get_final_weight(target2);
4183 intersection.set_final_weight(retval, final_weight);
4202 HFSTDLL
static void find_matches
4206 agenda.insert(state);
4210 if (tr1.size() == 0 || tr2.size() == 0)
4214 unsigned int start_search_from=0;
4221 for (
unsigned int i=0; i < tr1.size(); i++)
4223 HfstTransition <C> & transition1 = tr1[i];
4225 const C & transition_data1 = transition1.get_transition_data();
4228 for(
unsigned int j=start_search_from; j < tr2.size(); j++)
4230 HfstTransition <C> & transition2 = tr2[j];
4232 const C & transition_data2 = transition2.get_transition_data();
4234 if (transition_data2.less_than_ignore_weight(transition_data1))
4238 else if (transition_data1.less_than_ignore_weight(transition_data2))
4242 start_search_from=j;
4248 HfstState target = handle_match(graph1, transition1, graph2, transition2, intersection, state, state_map);
4250 if (agenda.find(target) == agenda.end())
4252 find_matches(graph1, transition1.get_target_state(), graph2, transition2.get_target_state(), intersection, target, state_map, agenda);
4255 start_search_from=j+1;
4270 std::set<HfstState> agenda;
4273 state_map[StatePair(0, 0)] = 0;
4275 if (graph1.is_final_state(0) && graph2.is_final_state(0))
4277 float final_weight = std::min(graph1.get_final_weight(0), graph2.get_final_weight(0));
4278 retval.set_final_weight(0, final_weight);
4281 find_matches(graph1, 0, graph2, 0, retval, 0, state_map, agenda);
4294 HfstState graph_target = graph_transition.get_target_state();
4295 bool was_new_state =
false;
4297 (graph_target, merger_target, state_map, result, was_new_state);
4298 result.add_transition
4299 (result_state, HfstTransition <C>
4300 (retval, graph_transition.get_input_symbol(), graph_transition.get_output_symbol(), graph_transition.get_weight()));
4303 if (was_new_state && (graph.is_final_state(graph_target) && merger.is_final_state(merger_target)))
4305 float final_weight = graph.get_final_weight(graph_target) + merger.get_final_weight(merger_target);
4306 result.set_final_weight(retval, final_weight);
4319 HfstState graph_target = graph_transition.get_target_state();
4320 HfstState merger_target = merger_transition.get_target_state();
4321 bool was_new_state =
false;
4323 (graph_target, merger_target, state_map, result, was_new_state);
4325 float transition_weight = graph_transition.get_weight() + merger_transition.get_weight();
4328 HfstState extra_state = result.add_state();
4329 result.add_transition
4330 (result_state, HfstTransition <C>
4331 (extra_state,
"@" + graph_transition.get_input_symbol() +
"@",
"@" + graph_transition.get_output_symbol() +
"@", 0));
4332 markers_added.insert(
"@" + graph_transition.get_input_symbol() +
"@");
4334 result.add_transition
4335 (extra_state , HfstTransition <C>
4336 (retval, merger_transition.get_input_symbol(), merger_transition.get_output_symbol(), transition_weight));
4339 if (was_new_state && (graph.is_final_state(graph_target) && merger.is_final_state(merger_target)))
4341 float final_weight = graph.get_final_weight(graph_target) + merger.get_final_weight(merger_target);
4342 result.set_final_weight(retval, final_weight);
4349 HFSTDLL
static bool is_list_symbol(
const C & transition_data,
const std::map<std::string, std::set<std::string> > & list_symbols)
4351 std::string isymbol = transition_data.get_input_symbol();
4352 std::string osymbol = transition_data.get_output_symbol();
4354 if (isymbol != osymbol)
4356 throw "is_list_symbol: input and output symbols must be the same";
4358 return (list_symbols.find(isymbol) != list_symbols.end());
4409 HFSTDLL
static void find_matches_for_merge
4412 const std::map<std::string, std::set<std::string> > & list_symbols, std::set<std::string> & markers_added)
4414 agenda.insert(result_state);
4415 HfstTransitions & graph_transitions = graph.state_vector[graph_state];
4416 HfstTransitions & merger_transitions = merger.state_vector[merger_state];
4418 if (graph_transitions.size() == 0)
4424 for (
unsigned int i=0; i < graph_transitions.size(); i++)
4426 HfstTransition <C> & graph_transition = graph_transitions[i];
4427 const C & graph_transition_data = graph_transition.get_transition_data();
4430 if (is_list_symbol(graph_transition_data, list_symbols))
4432 const std::set<std::string> & symbol_list = list_symbols.find(graph_transition_data.get_input_symbol())->second;
4433 bool list_match_found=
false;
4435 for(
unsigned int j=0; j < merger_transitions.size(); j++)
4437 HfstTransition <C> & merger_transition = merger_transitions[j];
4438 const C & merger_transition_data = merger_transition.get_transition_data();
4439 const std::string & isymbol = merger_transition_data.get_input_symbol();
4440 const std::string & osymbol = merger_transition_data.get_output_symbol();
4442 if (isymbol != osymbol)
4444 throw "find_matches_for_merge: input and output symbols must be the same";
4447 if (symbol_list.find(isymbol) != symbol_list.end())
4449 list_match_found=
true;
4450 HfstState target = handle_list_match(graph, graph_transition, merger, merger_transition, result, result_state, state_map, markers_added);
4451 if (agenda.find(target) == agenda.end())
4453 find_matches_for_merge(graph, graph_transition.get_target_state(), merger, merger_transition.get_target_state(), result, target, state_map, agenda, list_symbols, markers_added);
4457 if (list_match_found)
4464 HfstState target = handle_non_list_match(graph, graph_transition, merger, merger_state, result, result_state, state_map);
4465 if (agenda.find(target) == agenda.end())
4467 find_matches_for_merge(graph, graph_transition.get_target_state(), merger, merger_state, result, target, state_map, agenda, list_symbols, markers_added);
4480 std::set<HfstState> agenda;
4483 state_map[StatePair(0, 0)] = 0;
4485 if (graph.is_final_state(0) && merger.is_final_state(0))
4487 float final_weight = graph.get_final_weight(0) + merger.get_final_weight(0);
4488 result.set_final_weight(0, final_weight);
4493 find_matches_for_merge(graph, 0, merger, 0, result, 0, state_map, agenda, list_symbols, markers_added);
4495 catch (
const char * msg)
4533 friend class ConversionFunctions;
4534 friend class hfst::HarmonizeUnknownAndIdentitySymbols;
4541 typedef HfstTransitionGraph <HfstTropicalTransducerTransitionData>
4552 #endif // #ifndef _HFST_TRANSITION_GRAPH_H_
HFSTDLL C::WeightType get_weight() const
Get the weight of the transition.
Definition: HfstTransition.h:124
HFSTDLL HfstTransitionGraph(const hfst::HfstTransducer &transducer)
Create an HfstTransitionGraph equivalent to HfstTransducer transducer. FIXME: move to a separate file...
Definition: HfstTransitionGraph.h:250
HFSTDLL void add_symbol_to_alphabet(const HfstSymbol &symbol)
Explicitly add symbol to the alphabet of the graph.
Definition: HfstTransitionGraph.h:348
std::pair< String, String > StringPair
A symbol pair in a transition.
Definition: HfstSymbolDefs.h:70
HFSTDLL void write_in_xfst_format(FILE *file, bool write_weights=true)
Write the graph in xfst text format to FILE file. write_weights defines whether weights are printed (...
Definition: HfstTransitionGraph.h:1522
HFSTDLL void set_final_weight(HfstState s, const typename C::WeightType &weight)
Set the final weight of state s in this graph to weight.
Definition: HfstTransitionGraph.h:639
HfstBasicStates states_and_transitions() const
The states of the graph and their transitions.
Definition: HfstTransitionGraph.h:198
HFSTDLL void add_transition(HfstState s, const HfstTransition< C > &transition, bool add_symbols_to_alphabet=true)
Add a transition transition to state s.
Definition: HfstTransitionGraph.h:556
std::string String
A UTF-8 symbol in a transition.
Definition: HfstSymbolDefs.h:59
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:105
unsigned int HfstState
The number of a state in an HfstTransitionGraph.
Definition: HfstDataTypes.h:119
A synchronous finite-state transducer.
Definition: HfstTransducer.h:253
HFSTDLL void write_in_xfst_format(std::ostream &os, bool write_weights=true)
Write the graph in xfst text format to ostream os. write_weights defines whether weights are printed ...
Definition: HfstTransitionGraph.h:906
HFSTDLL HfstTransitions & transitions(HfstState s)
Get mutable transitions.
Definition: HfstTransitionGraph.h:704
HFSTDLL const_iterator begin() const
Get a const iterator to the beginning of states in the graph.
Definition: HfstTransitionGraph.h:668
HFSTDLL HfstTransitionGraph & sort_arcs(void)
Sort the arcs of this transducer according to input and output symbols.
Definition: HfstTransitionGraph.h:647
HFSTDLL void remove_symbol_from_alphabet(const HfstSymbol &symbol)
Remove symbol symbol from the alphabet of the graph.
Definition: HfstTransitionGraph.h:356
HFSTDLL HfstTransitionGraph & substitute(const HfstSymbol &old_symbol, const HfstSymbol &new_symbol, bool input_side=true, bool output_side=true)
Substitute old_symbol with new_symbol in all transitions. input_side and output_side define whether t...
Definition: HfstTransitionGraph.h:2309
HFSTDLL iterator begin()
Get an iterator to the beginning of the states in the graph.
Definition: HfstTransitionGraph.h:664
State is not final (and cannot have a final weight).
Definition: HfstExceptionDefs.h:262
HFSTDLL void add_symbols_to_alphabet(const HfstSymbolSet &symbols)
Same as add_symbol_to_alphabet for each symbol in symbols.
Definition: HfstTransitionGraph.h:370
std::set< HfstSymbol > HfstSymbolSet
A set of symbol pairs.
Definition: HfstTransitionGraph.h:139
HFSTDLL HfstTransitionGraph & insert_freely(const HfstSymbolPair &symbol_pair, typename C::WeightType weight)
Insert freely any number of symbol_pair in the graph with weight weight.
Definition: HfstTransitionGraph.h:3024
HFSTDLL C::WeightType get_final_weight(HfstState s) const
Definition: HfstTransitionGraph.h:627
HFSTDLL const_iterator end() const
Get a const iterator to the end of states (last state + 1) in the graph.
Definition: HfstTransitionGraph.h:676
Base class for HfstExceptions. Holds its own name and the file and line number where it was thrown...
Definition: HfstExceptionDefs.h:26
HFSTDLL HfstState get_target_state() const
Get the target state of the transition.
Definition: HfstTransition.h:94
HFSTDLL HfstTransitionGraph & substitute(const HfstSymbolPair &sp, const HfstTransitionGraph &graph)
Substitute all transitions old_symbol : new_symbol with a copy of graph.
Definition: HfstTransitionGraph.h:2579
HFSTDLL const HfstTransitions & transitions(HfstState s) const
Alternative name for operator[].
Definition: HfstTransitionGraph.h:697
A simple transition graph format that consists of states and transitions between those states...
Definition: HfstDataTypes.h:112
std::vector< HfstTransition< C > > HfstTransitions
Datatype for the states of a transition in a graph.
Definition: HfstTransitionGraph.h:146
HFSTDLL void write_in_att_format_number(FILE *file, bool write_weights=true)
Write the graph in AT&T format to FILE file using numbers instead of symbol names. write_weights defines whether weights are printed.
Definition: HfstTransitionGraph.h:1713
HfstStates::const_iterator const_iterator
A const iterator type that points a state in a graph.
Definition: HfstTransitionGraph.h:184
HFSTDLL void write_in_att_format(std::ostream &os, bool write_weights=true)
Write the graph in AT&T format to ostream os. write_weights defines whether weights are printed...
Definition: HfstTransitionGraph.h:1563
HFSTDLL HfstTransitionGraph & operator=(const HfstTransitionGraph &graph)
The assignment operator.
Definition: HfstTransitionGraph.h:224
#define HFST_THROW(E)
Macro to throw an exception of type E. Use THROW instead of regular throw with subclasses of HfstExce...
Definition: HfstExceptionDefs.h:40
std::set< HfstTwoLevelPath > HfstTwoLevelPaths
A set of two-level weighted paths.
Definition: HfstDataTypes.h:109
std::string name
The name of the graph.
Definition: HfstTransitionGraph.h:187
The stream is at end.
Definition: HfstExceptionDefs.h:161
std::vector< HfstSymbolPair > HfstSymbolPairVector
A vector of symbol pairs.
Definition: HfstTransitionGraph.h:141
The StateId argument is not valid.
Definition: HfstExceptionDefs.h:316
HFSTDLL void write_in_prolog_format(std::ostream &os, const std::string &name, bool write_weights=true)
Write the graph in prolog format to ostream os. write_weights defines whether weights are printed (to...
Definition: HfstTransitionGraph.h:1072
HFSTDLL HfstState add_state(void)
Add a new state to this graph and return its number.
Definition: HfstTransitionGraph.h:528
HFSTDLL HfstState get_max_state() const
Get the biggest state number in use.
Definition: HfstTransitionGraph.h:549
HFSTDLL void prune_alphabet(bool force=true)
Remove all symbols that do not occur in transitions of the graph from its alphabet.
Definition: HfstTransitionGraph.h:450
HFSTDLL const C & get_transition_data() const
Get the transition data of the transition.
Definition: HfstTransition.h:99
HFSTDLL int longest_path_size()
Definition: HfstTransitionGraph.h:3459
HFSTDLL C::SymbolType get_output_symbol() const
Get the output symbol of the transition.
Definition: HfstTransition.h:109
C::SymbolType HfstSymbol
Datatype for a symbol in a transition.
Definition: HfstTransitionGraph.h:132
HFSTDLL HfstTransitionGraph & harmonize(HfstTransitionGraph &another)
Harmonize this HfstTransitionGraph and another.
Definition: HfstTransitionGraph.h:3137
std::set< StringPair > StringPairSet
A set of symbol pairs used in substituting symbol pairs and in rule functions.
Definition: HfstSymbolDefs.h:82
HFSTDLL bool is_final_state(HfstState s) const
Whether state s is final. FIXME: return positive infinity instead if not final.
Definition: HfstTransitionGraph.h:622
HFSTDLL void write_in_att_format(FILE *file, bool write_weights=true)
Write the graph in AT&T format to FILE file. write_weights defines whether weights are printed...
Definition: HfstTransitionGraph.h:1610
HFSTDLL C::SymbolType get_input_symbol() const
Get the input symbol of the transition.
Definition: HfstTransition.h:104
HFSTDLL std::vector< unsigned int > path_sizes()
Definition: HfstTransitionGraph.h:3486
std::pair< float, StringVector > HfstOneLevelPath
A path of one level of arcs with collected weight.
Definition: HfstDataTypes.h:96
Transducers are not automata.
Definition: HfstExceptionDefs.h:303
std::pair< HfstSymbol, HfstSymbol > HfstSymbolPair
Datatype for a symbol pair in a transition.
Definition: HfstTransitionGraph.h:135
HFSTDLL HfstTransitionGraph(const HfstTransitionGraph &graph)
Create a deep copy of HfstTransitionGraph graph.
Definition: HfstTransitionGraph.h:241
std::vector< HfstState > states() const
The states of the graph.
Definition: HfstTransitionGraph.h:190
#define HFST_THROW_MESSAGE(E, M)
Macro to throw an exception of type E with message M. Use THROW instead of regular throw with subclas...
Definition: HfstExceptionDefs.h:47
std::map< String, String > HfstSymbolSubstitutions
A map of substitutions used when performing multiple symbol-to-symbol substitutions.
Definition: HfstSymbolDefs.h:88
std::pair< float, StringPairVector > HfstTwoLevelPath
A path of two level of arcs with collected weight.
Definition: HfstDataTypes.h:107
HfstTransition< HfstTropicalTransducerTransitionData > HfstBasicTransition
An HfstTransition with transition data of type HfstTropicalTransducerTransitionData.
Definition: HfstDataTypes.h:121
HFSTDLL void remove_transition(HfstState s, const HfstTransition< C > &transition, bool remove_symbols_from_alphabet=false)
Remove transition transition from state s. remove_symbols_from_alphabet defines whether symbols in tr...
Definition: HfstTransitionGraph.h:576
std::set< HfstSymbol > HfstTransitionGraphAlphabet
Datatype for the alphabet of a graph.
Definition: HfstTransitionGraph.h:143
HfstTransitionGraph< HfstTropicalTransducerTransitionData > HfstBasicTransducer
An HfstTransitionGraph with transitions of type HfstTropicalTransducerTransitionData and weight type ...
Definition: HfstDataTypes.h:113
A transition that consists of a target state and transition data represented by class C...
Definition: HfstDataTypes.h:121
HFSTDLL void write_in_prolog_format(FILE *file, const std::string &name, bool write_weights=true)
Write the graph in prolog format to FILE file. write_weights defines whether weights are printed (tod...
Definition: HfstTransitionGraph.h:1010
std::map< StringPair, StringPair > HfstSymbolPairSubstitutions
A map of substitutions used when performing multiple symbol pair-to-symbol pair substitutions.
Definition: HfstSymbolDefs.h:94
HFSTDLL HfstTransitionGraph & substitute(bool(*func)(const HfstSymbolPair &sp, HfstSymbolPairSet &sps))
Substitute all transitions with a set of transitions as defined by function func. ...
Definition: HfstTransitionGraph.h:2472
HFSTDLL iterator end()
Get an iterator to the end of states (last state + 1) in the graph.
Definition: HfstTransitionGraph.h:672
HFSTDLL HfstTransitionGraph(void)
Create a graph with one initial state that has state number zero and is not a final state...
Definition: HfstTransitionGraph.h:208
HFSTDLL const HfstTransitions & operator[](HfstState s) const
Get the set of transitions of state s in this graph.
Definition: HfstTransitionGraph.h:684
HFSTDLL HfstState add_state(HfstState s)
Add a state s to this graph.
Definition: HfstTransitionGraph.h:540
std::set< HfstSymbolPair > HfstSymbolPairSet
A set of symbol pairs.
Definition: HfstTransitionGraph.h:137
HFSTDLL const HfstTransitionGraphAlphabet & get_alphabet() const
Get the set of HfstSymbols in the alphabet of the graph.
Definition: HfstTransitionGraph.h:498