tesseract  4.1.1
renderer.h
Go to the documentation of this file.
1 // File: renderer.h
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifndef TESSERACT_API_RENDERER_H_
19 #define TESSERACT_API_RENDERER_H_
20 
21 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
22 // complexity of includes here. Use forward declarations wherever possible
23 // and hide includes of complex types in baseapi.cpp.
24 #include <string> // for std::string
25 #include "genericvector.h"
26 #include "platform.h"
27 #include "strngs.h" // for STRING
28 
29 struct Pix;
30 
31 namespace tesseract {
32 
33 class TessBaseAPI;
34 
49  public:
50  virtual ~TessResultRenderer();
51 
52  // Takes ownership of pointer so must be new'd instance.
53  // Renderers aren't ordered, but appends the sequences of next parameter
54  // and existing next(). The renderers should be unique across both lists.
55  void insert(TessResultRenderer* next);
56 
57  // Returns the next renderer or nullptr.
59  return next_;
60  }
61 
67  bool BeginDocument(const char* title);
68 
77  bool AddImage(TessBaseAPI* api);
78 
83  bool EndDocument();
84 
85  const char* file_extension() const {
86  return file_extension_;
87  }
88  const char* title() const {
89  return title_.c_str();
90  }
91 
92  // Is everything fine? Otherwise something went wrong.
93  bool happy() {
94  return happy_;
95  }
96 
106  int imagenum() const {
107  return imagenum_;
108  }
109 
110  protected:
121  TessResultRenderer(const char* outputbase, const char* extension);
122 
123  // Hook for specialized handling in BeginDocument()
124  virtual bool BeginDocumentHandler();
125 
126  // This must be overridden to render the OCR'd results
127  virtual bool AddImageHandler(TessBaseAPI* api) = 0;
128 
129  // Hook for specialized handling in EndDocument()
130  virtual bool EndDocumentHandler();
131 
132  // Renderers can call this to append '\0' terminated strings into
133  // the output string returned by GetOutput.
134  // This method will grow the output buffer if needed.
135  void AppendString(const char* s);
136 
137  // Renderers can call this to append binary byte sequences into
138  // the output string returned by GetOutput. Note that s is not necessarily
139  // '\0' terminated (and can contain '\0' within it).
140  // This method will grow the output buffer if needed.
141  void AppendData(const char* s, int len);
142 
143  private:
144  const char* file_extension_; // standard extension for generated output
145  STRING title_; // title of document being renderered
146  int imagenum_; // index of last image added
147 
148  FILE* fout_; // output file pointer
149  TessResultRenderer* next_; // Can link multiple renderers together
150  bool happy_; // I get grumpy when the disk fills up, etc.
151 };
152 
157  public:
158  explicit TessTextRenderer(const char* outputbase);
159 
160  protected:
161  bool AddImageHandler(TessBaseAPI* api) override;
162 };
163 
168  public:
169  explicit TessHOcrRenderer(const char* outputbase, bool font_info);
170  explicit TessHOcrRenderer(const char* outputbase);
171 
172  protected:
173  bool BeginDocumentHandler() override;
174  bool AddImageHandler(TessBaseAPI* api) override;
175  bool EndDocumentHandler() override;
176 
177  private:
178  bool font_info_; // whether to print font information
179 };
180 
185  public:
186  explicit TessAltoRenderer(const char* outputbase);
187 
188  protected:
189  bool BeginDocumentHandler() override;
190  bool AddImageHandler(TessBaseAPI* api) override;
191  bool EndDocumentHandler() override;
192 };
193 
198  public:
199  explicit TessTsvRenderer(const char* outputbase, bool font_info);
200  explicit TessTsvRenderer(const char* outputbase);
201 
202  protected:
203  bool BeginDocumentHandler() override;
204  bool AddImageHandler(TessBaseAPI* api) override;
205  bool EndDocumentHandler() override;
206 
207  private:
208  bool font_info_; // whether to print font information
209 };
210 
215  public:
216  // datadir is the location of the TESSDATA. We need it because
217  // we load a custom PDF font from this location.
218  TessPDFRenderer(const char* outputbase, const char* datadir,
219  bool textonly = false);
220 
221  protected:
222  bool BeginDocumentHandler() override;
223  bool AddImageHandler(TessBaseAPI* api) override;
224  bool EndDocumentHandler() override;
225 
226  private:
227  // We don't want to have every image in memory at once,
228  // so we store some metadata as we go along producing
229  // PDFs one page at a time. At the end, that metadata is
230  // used to make everything that isn't easily handled in a
231  // streaming fashion.
232  long int obj_; // counter for PDF objects
233  GenericVector<long int> offsets_; // offset of every PDF object in bytes
234  GenericVector<long int> pages_; // object number for every /Page object
235  std::string datadir_; // where to find the custom font
236  bool textonly_; // skip images if set
237  // Bookkeeping only. DIY = Do It Yourself.
238  void AppendPDFObjectDIY(size_t objectsize);
239  // Bookkeeping + emit data.
240  void AppendPDFObject(const char* data);
241  // Create the /Contents object for an entire page.
242  char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
243  // Turn an image into a PDF object. Only transcode if we have to.
244  static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum,
245  char** pdf_object, long int* pdf_object_size,
246  int jpg_quality);
247 };
248 
253  public:
254  explicit TessUnlvRenderer(const char* outputbase);
255 
256  protected:
257  bool AddImageHandler(TessBaseAPI* api) override;
258 };
259 
264  public:
265  explicit TessLSTMBoxRenderer(const char* outputbase);
266 
267  protected:
268  bool AddImageHandler(TessBaseAPI* api) override;
269 };
270 
275  public:
276  explicit TessBoxTextRenderer(const char* outputbase);
277 
278  protected:
279  bool AddImageHandler(TessBaseAPI* api) override;
280 };
281 
286  public:
287  explicit TessWordStrBoxRenderer(const char* outputbase);
288 
289  protected:
290  bool AddImageHandler(TessBaseAPI* api) override;
291 };
292 
293 #ifndef DISABLED_LEGACY_ENGINE
294 
299  public:
300  explicit TessOsdRenderer(const char* outputbase);
301 
302  protected:
303  bool AddImageHandler(TessBaseAPI* api) override;
304 };
305 
306 #endif // ndef DISABLED_LEGACY_ENGINE
307 
308 } // namespace tesseract.
309 
310 #endif // TESSERACT_API_RENDERER_H_
const char * title() const
Definition: renderer.h:88
void insert(LIST list, void *node)
Definition: oldlist.cpp:172
struct TessHOcrRenderer TessHOcrRenderer
Definition: capi.h:89
struct TessBaseAPI TessBaseAPI
Definition: capi.h:93
struct TessBoxTextRenderer TessBoxTextRenderer
Definition: capi.h:92
struct TessTextRenderer TessTextRenderer
Definition: capi.h:88
struct TessResultRenderer TessResultRenderer
Definition: capi.h:87
const char * file_extension() const
Definition: renderer.h:85
#define TESS_API
Definition: platform.h:54
TessResultRenderer * next()
Definition: renderer.h:58
struct TessUnlvRenderer TessUnlvRenderer
Definition: capi.h:91
struct TessPDFRenderer TessPDFRenderer
Definition: capi.h:90
Definition: strngs.h:45