HFST - Helsinki Finite-State Transducer Technology - C++ API  version 3.9.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HfstInputStream.h
Go to the documentation of this file.
1 // Copyright (c) 2016 University of Helsinki
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 3 of the License, or (at your option) any later version.
7 // See the file COPYING included with this distribution for more
8 // information.
9 
10 #ifndef _HFST_INPUTSTREAM_H_
11 #define _HFST_INPUTSTREAM_H_
12 
13 #if HAVE_CONFIG_H
14 # include <config.h>
15 #endif
16 
17 #include "HfstDataTypes.h"
18 
19 #include "hfstdll.h"
20 
25 namespace hfst
26 {
27 
28  namespace implementations {
29 #if HAVE_OPENFST
30  class LogWeightInputStream;
31 #if HAVE_OPENFST_LOG
32  class TropicalWeightInputStream;
33 #endif
34 #endif
35 #if HAVE_SFST
36  class SfstInputStream;
37 #endif
38 #if HAVE_FOMA
39  class FomaInputStream;
40 #endif
41 #if HAVE_XFSM
42  class XfsmInputStream;
43 #endif
44 #if HAVE_MY_TRANSDUCER_LIBRARY
45  class MyTransducerLibraryInputStream;
46 #endif
47  class HfstOlInputStream;
48  }
49 
50 
98  {
99  protected:
100 
101  union StreamImplementation
102  {
103 #if HAVE_SFST
104  hfst::implementations::SfstInputStream * sfst;
105 #endif
106 #if HAVE_OPENFST
107  hfst::implementations::TropicalWeightInputStream * tropical_ofst;
108 #if HAVE_OPENFST_LOG
109  hfst::implementations::LogWeightInputStream * log_ofst;
110 #endif
111 #endif
112 #if HAVE_FOMA
113  hfst::implementations::FomaInputStream * foma;
114 #endif
115 #if HAVE_XFSM
116  hfst::implementations::XfsmInputStream * xfsm;
117 #endif
118 
119 #if HAVE_MY_TRANSDUCER_LIBRARY
121  my_transducer_library;
122 #endif
123 
124  hfst::implementations::HfstOlInputStream * hfst_ol;
125  };
126 
127  /* The backend implementation */
128  StreamImplementation implementation;
129  /* Implementation type */
130  ImplementationType type;
131  /* Name of next transducer, given in the hfst header */
132  std::string name;
133  std::map<std::string,std::string> props;
134  /* How many bytes have been already read by the function
135  when processing the hfst header */
136  unsigned int bytes_to_skip;
137  /* The name of the file, if stdin, name is "" */
138  std::string filename;
139  /* Whether the current transducer has an hfst header */
140  bool has_hfst_header;
141 
142  /* A special case where an OpenFst transducer has no symbol tables but an
143  SFST alphabet is appended at the end. Should not occur very often, but
144  possible when converting old transducers into version 3.0. transducers */
145  bool hfst_version_2_weighted_transducer;
146 
147  /* The stream that the reading operations use when reading the first
148  transducer. Then the type of the transducer is not known so there
149  is no backend implementation whose reading functions could be used.
150  If input_stream==NULL, the backend implementation is used */
151  std::istream * input_stream;
152 
153  /* Basic stream operators, work on input_stream (if not NULL) or on
154  the stream implementation. */
155 
156  /* Extract one character from the stream */
157  char stream_get();
158 
159  /* Extract one character from the stream and store it in @a c. */
160  char &stream_get(char &c);
161 
162  /* Extract one short from the stream and store it in @a i. */
163  short &stream_get(short &i);
164 
165  /* Extract one unsigned short from the stream and store it in @a i. */
166  unsigned short &stream_get(unsigned short &i);
167 
168  /* Return character c to the stream */
169  void stream_unget(char c);
170  /* Whether the stream is at end */
171  bool stream_eof();
172  /* Get a string from the stream */
173  std::string stream_getstring();
174  /* Return the next character in the stream without extracting it */
175  char stream_peek();
176  /* The stream implementation ignores n bytes. */
177  void ignore(unsigned int n);
178 
179  /* The type of a transducer not supported directly by HFST version 3.0
180  but which can occur in conversion functions. */
181  enum TransducerType {
182  /* See the above variable. */
183  HFST_VERSION_2_WEIGHTED,
184  /* An SFST transducer with no alphabet, not supported. */
185  HFST_VERSION_2_UNWEIGHTED_WITHOUT_ALPHABET,
186  /* Old header + ordinary SFST transducer. */
187  HFST_VERSION_2_UNWEIGHTED,
188  /* An OpenFst transducer, can cause problems if it does not have
189  symbol tables. */
190  OPENFST_TROPICAL_,
191  OPENFST_LOG_,
192  /* An SFST transducer. */
193  SFST_,
194  /* A foma transducer. */
195  FOMA_,
196  /* An xfsm transducer. */
197  XFSM_,
198  /* Your transducer type */
199  //MY_TRANSDUCER_LIBRARY_,
200  /* Transducer type not recognized. */
201  ERROR_TYPE_
202  };
203 
204  /* Read a transducer from the stream. */
205  void read_transducer(HfstTransducer &t);
206  /* Type of next transducer in the stream. */
207  ImplementationType stream_fst_type();
208 
209  // methods used by function stream_fst_type
210  TransducerType guess_fst_type(int &bytes_read);
211  bool read_hfst_header(int &bytes_read);
212  bool read_library_header(int &bytes_read);
213  int get_header_size(int &bytes_read);
214  StringPairVector get_header_data(int header_size);
215  void process_header_data
216  (StringPairVector &header_data, bool warnings=false);
217  bool set_implementation_specific_header_data
218  (StringPairVector &data, unsigned int index);
219 
220 
221  bool read_library_header_old(int &bytes_read);
222  ImplementationType get_fst_type_old(int &bytes_read);
223 
224  public:
225 
234  HFSTDLL HfstInputStream(void);
235 
247  HFSTDLL HfstInputStream(const std::string &filename);
248 
250  HFSTDLL ~HfstInputStream(void);
251 
255  HFSTDLL void close(void);
256 
258  HFSTDLL bool is_eof(void);
260  HFSTDLL bool is_bad(void);
262  HFSTDLL bool is_good(void);
263 
269  HFSTDLL ImplementationType get_type(void) const;
270 
271  friend class HfstTransducer;
272  };
273 
274 
275 }
276 
277 
278 
279 #endif
HFSTDLL void close(void)
Close the stream.
Definition: HfstInputStream.cc:1127
std::vector< std::pair< std::string, std::string > > StringPairVector
A vector of string pairs.
Definition: HfstDataTypes.h:105
A synchronous finite-state transducer.
Definition: HfstTransducer.h:253
ImplementationType
The type of an HfstTransducer.
Definition: HfstDataTypes.h:41
HFSTDLL bool is_eof(void)
Whether the stream is at end.
Definition: HfstInputStream.cc:1170
HFSTDLL bool is_bad(void)
Whether badbit is set.
Definition: HfstInputStream.cc:1213
HFSTDLL HfstInputStream(void)
Create a stream to standard input for reading binary transducers.
Definition: HfstInputStream.cc:926
A skeleton class for reading a new type of binary transducers from a stream.
Definition: MyTransducerLibraryTransducer.h:56
HFSTDLL ImplementationType get_type(void) const
The type of the first transducer in the stream.
Definition: HfstInputStream.cc:1301
Datatypes that are needed when using the HFST API.
A stream for reading HFST binary transducers.
Definition: HfstInputStream.h:97
HFSTDLL ~HfstInputStream(void)
Destructor.
Definition: HfstInputStream.cc:1082
HFSTDLL bool is_good(void)
Whether the state of the stream is good for input operations.
Definition: HfstInputStream.cc:1257