HFST - Helsinki Finite-State Transducer Technology - C++ API  version 3.9.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HfstXeroxRules.h
Go to the documentation of this file.
1 // Copyright (c) 2016 University of Helsinki
2 
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 3 of the License, or (at your option) any later version.
7 // See the file COPYING included with this distribution for more
8 // information.
9 
10 #ifndef GUARD_hfst_xerox_rules_h
11 #define GUARD_hfst_xerox_rules_h
12 
13 //#include "HfstDataTypes.h"
14 //#include "HfstSymbolDefs.h"
15 #include "HfstTransducer.h"
16 
21 namespace hfst
22 {
24  namespace xeroxRules
25  {
26  enum ReplaceType {REPL_UP, REPL_DOWN, REPL_RIGHT, REPL_LEFT};
27 
28  // this enum is used in xre_parse.yy for the regex2pfst tool
29  // it is not in the xre_parse.yy file because we couldn't make it work there
30  enum ReplaceArrow { E_REPLACE_RIGHT,
31  E_OPTIONAL_REPLACE_RIGHT,
32  E_REPLACE_LEFT,
33  E_OPTIONAL_REPLACE_LEFT,
34  E_REPLACE_RIGHT_MARKUP,
35  E_RTL_LONGEST_MATCH,
36  E_RTL_SHORTEST_MATCH,
37  E_LTR_LONGEST_MATCH,
38  E_LTR_SHORTEST_MATCH
39  };
45  class Rule
46  {
47  /* cross product of mapping transducers */
49  /* context */
51  /* if there is a context, it needs to have a direction (up, left, down or right) */
52  ReplaceType replType;
53 
54  public:
55  Rule ( const HfstTransducerPairVector& );
56  Rule ( const HfstTransducerPairVector&, const HfstTransducerPairVector&, ReplaceType );
57 
58  //copy
59  Rule ( const Rule& );
60 
61  HfstTransducerPairVector get_mapping() const;
62  HfstTransducerPairVector get_context() const;
63  ReplaceType get_replType() const;
64 
65  void encodeFlags();
66 
67  friend std::ostream& operator<<(std::ostream &out, const Rule &r);
68  };
69 
76 
84 
85 
86  // Disjunct all transducers from TransducerVector
87  HfstTransducer disjunctVectorMembers( const HfstTransducerVector &trVector );
88 
94 
95  //Used in changing weights to zero (in constraintComposition function)
96  float zero_weight(float f);
97 
102  HfstTransducer constraintComposition( const HfstTransducer &t, const HfstTransducer &Constraint );
103 
107  void insertFreelyAllTheBrackets( HfstTransducer &t, bool optional );
108 
115  const HfstTransducer &mappingWithBracketsAndTmpBoundary,
116  const HfstTransducer &identityExpanded,
117  ReplaceType replType,
118  bool optional);
119 
120 
133  HfstTransducer bracketedReplace( const Rule &rule, bool optional);
134 
136  HfstTransducer parallelBracketedReplace( const std::vector<Rule> &ruleVector, bool optional);
137 
138 
139 
140  //---------------------------------
141  // CONSTRAINTS
142  //---------------------------------
143 
144  // (help function)
145  // returns: [ B:0 | 0:B | ?-B ]*
146  // which is used in some constraints
147  HfstTransducer constraintsRightPart( ImplementationType type );
148 
149  // .#. ?* <:0 0:> ?* .#.
150  // filters out empty string
151  HfstTransducer oneBetterthanNoneConstraint( const HfstTransducer &uncondidtionalTr );
152 
153 
154  // ?* <:0 [B:0]* [I-B] [ B:0 | 0:B | ?-B ]*
155  HfstTransducer leftMostConstraint( const HfstTransducer &uncondidtionalTr );
156 
157  // [ B:0 | 0:B | ?-B ]* [I-B]+ >:0 [ ?-B ]*
158  HfstTransducer rightMostConstraint( const HfstTransducer &uncondidtionalTr );
159 
160  // Longest match
161  // it should be composed to left most transducer........
162  // ?* < [?-B]+ 0:> [ ? | 0:< | <:0 | 0:> | B ] [ B:0 | 0:B | ?-B ]*
163  HfstTransducer longestMatchLeftMostConstraint( const HfstTransducer &uncondidtionalTr );
164 
165 
166  // Longest match RIGHT most
167  HfstTransducer longestMatchRightMostConstraint(const HfstTransducer &uncondidtionalTr );
168 
169 
170  // Shortest match
171  // it should be composed to left most transducer........
172  // ?* < [?-B]+ >:0
173  // [?-B] or [ ? | 0:< | <:0 | >:0 | B ][?-B]+
174  // [ B:0 | 0:B | ?-B ]*
175  HfstTransducer shortestMatchLeftMostConstraint( const HfstTransducer &uncondidtionalTr );
176 
177 
178  // Shortest match
179  // it should be composed to left most transducer........
180  //[ B:0 | 0:B | ?-B ]*
181  // [?-B] or [?-B]+ [ ? | 0:> | >:0 | <:0 | B ]
182  // <:0 [?-B]+ > ?*
183  HfstTransducer shortestMatchRightMostConstraint( const HfstTransducer &uncondidtionalTr );
184 
185 
186  // ?* [ BL:0 (?-B)+ BR:0 ?* ]+
187  HfstTransducer mostBracketsPlusConstraint( const HfstTransducer &uncondidtionalTr );
188 
189 
190  // ?* [ BL:0 (?-B)* BR:0 ?* ]+
191  HfstTransducer mostBracketsStarConstraint( const HfstTransducer &uncondidtionalTr );
192 
193  // ?* B2 ?*
194  HfstTransducer removeB2Constraint( const HfstTransducer &t );
195 
196  // to avoid repetition in empty replace rule
197  HfstTransducer noRepetitionConstraint( const HfstTransducer &t );
198 
211 
212 
213  //---------------------------------
214  // INTERFACE HELPING FUNCTIONS
215  //---------------------------------
216  //used by hfst-regexp parser
217  HfstTransducerPair create_mapping_for_mark_up_replace( const HfstTransducerPair &mappingPair,
218  const HfstTransducerPair &marks );
219  // HfstTransducerPairVector create_mapping_for_mark_up_replace( const HfstTransducerPairVector &mappingPairVector,
220  // const StringPair &marks );
221 
222  // HfstTransducerPairVector create_mapping_for_mark_up_replace( const HfstTransducerPairVector &mappingPairVector,
223  // const HfstTransducerPair &marks );
224  //---------------------------------
225  // REPLACE FUNCTIONS - INTERFACE
226  //---------------------------------
227 
228  // replace up, left, right, down
229  HfstTransducer replace( const Rule &rule, bool optional);
230  // for parallel rules
231  HfstTransducer replace( const std::vector<Rule> &ruleVector, bool optional);
232  // replace up, left, right, down
233  HfstTransducer replace_left( const Rule &rule, bool optional);
234  // for parallel rules
235  HfstTransducer replace_left( const std::vector<Rule> &ruleVector, bool optional);
236  // left to right
237  HfstTransducer replace_leftmost_longest_match( const Rule &rule );
238  // left to right
239  HfstTransducer replace_leftmost_longest_match( const std::vector<Rule> &ruleVector );
240  // right to left
241  HfstTransducer replace_rightmost_longest_match( const Rule &rule );
242 
243  // right to left
244  HfstTransducer replace_rightmost_longest_match( const std::vector<Rule> &ruleVector );
245 
246  HfstTransducer replace_leftmost_shortest_match( const Rule &rule);
247 
248  HfstTransducer replace_leftmost_shortest_match(const std::vector<Rule> &ruleVector );
249  HfstTransducer replace_rightmost_shortest_match( const Rule &rule );
250  HfstTransducer replace_rightmost_shortest_match( const std::vector<Rule> &ruleVector );
251 
252 
253  // the problem is that the mark-up rules can be mixed with ordinary rules
254  // ie a -> b ... c , a -> d ;
255  // this is why each markup mapping should be accessed seperratly
256  // HfstTransducer mark_up_replace( const Rule &rule,
257  // const StringPair &marks,
258  // bool optional);
259 
260  // HfstTransducer mark_up_replace(const Rule &rule,
261  // const HfstTransducerPair &marks,
262  // bool optional);
263 
264 
265  // HfstTransducer mark_up_replace( const std::vector<MarkUpRule> &markUpRuleVector,
266  // bool optional);
267 
268  // replace up, left, right, down
269  HfstTransducer replace_epenthesis( const Rule &rule, bool optional);
270  // replace up, left, right, down
271  HfstTransducer replace_epenthesis( const std::vector<Rule> &ruleVector, bool optional);
272 
273 
274  //---------------------------------
275  // RESTRICTION FUNCTIONS
276  //---------------------------------
277 
278  // create marks for given i
279  //static StringPair restrictionMarks( int i);
280 
281  /*
282  * define AA1a [ [. 0 .] -> LEFT_MARK || _ center ];
283  * define AA1b [ [. 0 .] -> RIGHT_MARK || center _ ];
284  * retval = AA1 .o. AA2
285  */
286  //static HfstTransducer surroundCenterWithBrackets( const HfstTransducer &center,
287  // const HfstTransducer &leftMark,
288  // const HfstTransducer &rightMark);
289  // Contexts
290  // define NOS1 [ %[ -> 0 || b / B _ ];
291  // define NOF1 [ %] -> 0 || _ c / B ];
292  static HfstTransducer removeBracketsInContext( const HfstTransducerPairVector &context,
293  const HfstTransducer &leftMark,
294  const HfstTransducer &rightMark,
295  int i);
296  // Restriction function "=>"
297  HfstTransducer restriction( const HfstTransducer &automata, const HfstTransducerPairVector &context);
298  HfstTransducer before( const HfstTransducer &left, const HfstTransducer &right);
299  HfstTransducer after( const HfstTransducer &left, const HfstTransducer &right);
300  }
301 }
302 
303 // define guard
304 #endif
HfstTransducer expandContextsWithMapping(const HfstTransducerPairVector &ContextVector, const HfstTransducer &mappingWithBracketsAndTmpBoundary, const HfstTransducer &identityExpanded, ReplaceType replType, bool optional)
It is used in bracketedReplace, when the replace expression has context. Cr' = (Rc ...
Definition: HfstXeroxRules.cc:387
HfstTransducer decodeFlagDiacritics(const HfstTransducer &tr)
In the transducer , change back all "non-special" flag diacritics to normal, functional flag diacriti...
Definition: HfstXeroxRules.cc:228
Declarations of HFST API functions and datatypes.
HfstTransducer removeMarkers(const HfstTransducer &tr)
Remove makers used in replace functions from a tr. Additionally, decode flag diacritics.
Definition: HfstXeroxRules.cc:288
HfstTransducer parallelBracketedReplace(const std::vector< Rule > &ruleVector, bool optional)
Bracketed replace for parallel rules.
Definition: HfstXeroxRules.cc:851
HfstTransducer encodeFlagDiacritics(const HfstTransducer &tr)
In the transducer , change all flag diacritics to "non-special" multichar symbols It means that @ sig...
Definition: HfstXeroxRules.cc:184
A synchronous finite-state transducer.
Definition: HfstTransducer.h:253
HfstTransducer applyBoundaryMark(const HfstTransducer &t)
It applies boundary marker from contexts (.#.) to t.
Definition: HfstXeroxRules.cc:2187
ImplementationType
The type of an HfstTransducer.
Definition: HfstDataTypes.h:41
std::vector< HfstTransducer > HfstTransducerVector
a vector of transducers for methods applying a cascade of automata
Definition: HfstDataTypes.h:33
A rule that contains mapping and context and replace type (if any). If rule is A -> B || L _ R ...
Definition: HfstXeroxRules.h:45
void insertFreelyAllTheBrackets(HfstTransducer &t, bool optional)
If optional is false, the function freely inserts in t @ and If it is true, it also inserts @ and ...
Definition: HfstXeroxRules.cc:355
std::pair< HfstTransducer, HfstTransducer > HfstTransducerPair
A pair of transducers.
Definition: HfstDataTypes.h:78
std::vector< HfstTransducerPair > HfstTransducerPairVector
A vector of transducer pairs.
Definition: HfstDataTypes.h:82
HfstTransducer constraintComposition(const HfstTransducer &t, const HfstTransducer &Constraint)
Generalized Lenient Composition (by Anssi Yli-Jyr�) of a t and a Constraint. More about this composi...
Definition: HfstXeroxRules.cc:324
HfstTransducer bracketedReplace(const Rule &rule, bool optional)
Unconditional replace, in multiple contexts first: (.* TT .*) - [( .* L1 TT R1 ...
Definition: HfstXeroxRules.cc:575