tesseract 5.2.0
Loading...
Searching...
No Matches
imagefind.cpp
Go to the documentation of this file.
1
2// File: imagefind.cpp
3// Description: Function to find image and drawing regions in an image
4// and create a corresponding list of empty blobs.
5// Author: Ray Smith
6//
7// (C) Copyright 2008, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#ifdef HAVE_CONFIG_H
21# include "config_auto.h"
22#endif
23
24#include "imagefind.h"
25
26#include "colpartitiongrid.h"
27#include "linlsq.h"
28#include "params.h"
29#include "statistc.h"
30
31#include <allheaders.h>
32
33#include <algorithm>
34
35namespace tesseract {
36
37static INT_VAR(textord_tabfind_show_images, false, "Show image blobs");
38
39// Fraction of width or height of on pixels that can be discarded from a
40// roughly rectangular image.
41const double kMinRectangularFraction = 0.125;
42// Fraction of width or height to consider image completely used.
43const double kMaxRectangularFraction = 0.75;
44// Fraction of width or height to allow transition from kMinRectangularFraction
45// to kMaxRectangularFraction, equivalent to a dy/dx skew.
46const double kMaxRectangularGradient = 0.1; // About 6 degrees.
47// Minimum image size to be worth looking for images on.
48const int kMinImageFindSize = 100;
49// Pixel padding for noise blobs and partitions when rendering on the image
50// mask to encourage them to join together. Make it too big and images
51// will fatten out too much and have to be clipped to text.
52const int kNoisePadding = 4;
53
54// Scans horizontally on x=[x_start,x_end), starting with y=*y_start,
55// stepping y+=y_step, until y=y_end. *ystart is input/output.
56// If the number of black pixels in a row, pix_count fits this pattern:
57// 0 or more rows with pix_count < min_count then
58// <= mid_width rows with min_count <= pix_count <= max_count then
59// a row with pix_count > max_count then
60// true is returned, and *y_start = the first y with pix_count >= min_count.
61static bool HScanForEdge(uint32_t *data, int wpl, int x_start, int x_end, int min_count,
62 int mid_width, int max_count, int y_end, int y_step, int *y_start) {
63 int mid_rows = 0;
64 for (int y = *y_start; y != y_end; y += y_step) {
65 // Need pixCountPixelsInRow(pix, y, &pix_count, nullptr) to count in a
66 // subset.
67 int pix_count = 0;
68 uint32_t *line = data + wpl * y;
69 for (int x = x_start; x < x_end; ++x) {
70 if (GET_DATA_BIT(line, x)) {
71 ++pix_count;
72 }
73 }
74 if (mid_rows == 0 && pix_count < min_count) {
75 continue; // In the min phase.
76 }
77 if (mid_rows == 0) {
78 *y_start = y; // Save the y_start where we came out of the min phase.
79 }
80 if (pix_count > max_count) {
81 return true; // Found the pattern.
82 }
83 ++mid_rows;
84 if (mid_rows > mid_width) {
85 break; // Middle too big.
86 }
87 }
88 return false; // Never found max_count.
89}
90
91// Scans vertically on y=[y_start,y_end), starting with x=*x_start,
92// stepping x+=x_step, until x=x_end. *x_start is input/output.
93// If the number of black pixels in a column, pix_count fits this pattern:
94// 0 or more cols with pix_count < min_count then
95// <= mid_width cols with min_count <= pix_count <= max_count then
96// a column with pix_count > max_count then
97// true is returned, and *x_start = the first x with pix_count >= min_count.
98static bool VScanForEdge(uint32_t *data, int wpl, int y_start, int y_end, int min_count,
99 int mid_width, int max_count, int x_end, int x_step, int *x_start) {
100 int mid_cols = 0;
101 for (int x = *x_start; x != x_end; x += x_step) {
102 int pix_count = 0;
103 uint32_t *line = data + y_start * wpl;
104 for (int y = y_start; y < y_end; ++y, line += wpl) {
105 if (GET_DATA_BIT(line, x)) {
106 ++pix_count;
107 }
108 }
109 if (mid_cols == 0 && pix_count < min_count) {
110 continue; // In the min phase.
111 }
112 if (mid_cols == 0) {
113 *x_start = x; // Save the place where we came out of the min phase.
114 }
115 if (pix_count > max_count) {
116 return true; // found the pattern.
117 }
118 ++mid_cols;
119 if (mid_cols > mid_width) {
120 break; // Middle too big.
121 }
122 }
123 return false; // Never found max_count.
124}
125
126// Returns true if there is a rectangle in the source pix, such that all
127// pixel rows and column slices outside of it have less than
128// min_fraction of the pixels black, and within max_skew_gradient fraction
129// of the pixels on the inside, there are at least max_fraction of the
130// pixels black. In other words, the inside of the rectangle looks roughly
131// rectangular, and the outside of it looks like extra bits.
132// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
133// Note: the algorithm is iterative, allowing it to slice off pixels from
134// one edge, allowing it to then slice off more pixels from another edge.
135static bool pixNearlyRectangular(Image pix, double min_fraction, double max_fraction,
136 double max_skew_gradient, int *x_start, int *y_start,
137 int *x_end, int *y_end) {
138 ASSERT_HOST(pix != nullptr);
139 *x_start = 0;
140 *x_end = pixGetWidth(pix);
141 *y_start = 0;
142 *y_end = pixGetHeight(pix);
143
144 uint32_t *data = pixGetData(pix);
145 int wpl = pixGetWpl(pix);
146 bool any_cut = false;
147 bool left_done = false;
148 bool right_done = false;
149 bool top_done = false;
150 bool bottom_done = false;
151 do {
152 any_cut = false;
153 // Find the top/bottom edges.
154 int width = *x_end - *x_start;
155 int min_count = static_cast<int>(width * min_fraction);
156 int max_count = static_cast<int>(width * max_fraction);
157 int edge_width = static_cast<int>(width * max_skew_gradient);
158 if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, max_count, *y_end, 1,
159 y_start) &&
160 !top_done) {
161 top_done = true;
162 any_cut = true;
163 }
164 --(*y_end);
165 if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, max_count, *y_start, -1,
166 y_end) &&
167 !bottom_done) {
168 bottom_done = true;
169 any_cut = true;
170 }
171 ++(*y_end);
172
173 // Find the left/right edges.
174 int height = *y_end - *y_start;
175 min_count = static_cast<int>(height * min_fraction);
176 max_count = static_cast<int>(height * max_fraction);
177 edge_width = static_cast<int>(height * max_skew_gradient);
178 if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, max_count, *x_end, 1,
179 x_start) &&
180 !left_done) {
181 left_done = true;
182 any_cut = true;
183 }
184 --(*x_end);
185 if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, max_count, *x_start, -1,
186 x_end) &&
187 !right_done) {
188 right_done = true;
189 any_cut = true;
190 }
191 ++(*x_end);
192 } while (any_cut);
193
194 // All edges must satisfy the condition of sharp gradient in pixel density
195 // in order for the full rectangle to be present.
196 return left_done && right_done && top_done && bottom_done;
197}
198
199// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
200// analogous to pixConnComp, except that connected components which are nearly
201// rectangular are replaced with solid rectangles.
202// The returned boxa, pixa may be nullptr, meaning no images found.
203// If not nullptr, they must be destroyed by the caller.
204// Resolution of pix should match the source image (Tesseract::pix_binary_)
205// so the output coordinate systems match.
206static void ConnCompAndRectangularize(Image pix, DebugPixa *pixa_debug, Boxa **boxa,
207 Pixa **pixa) {
208 *boxa = nullptr;
209 *pixa = nullptr;
210
211 if (textord_tabfind_show_images && pixa_debug != nullptr) {
212 pixa_debug->AddPix(pix, "Conncompimage");
213 }
214 // Find the individual image regions in the mask image.
215 *boxa = pixConnComp(pix, pixa, 8);
216 // Rectangularize the individual images. If a sharp edge in vertical and/or
217 // horizontal occupancy can be found, it indicates a probably rectangular
218 // image with unwanted bits merged on, so clip to the approximate rectangle.
219 int npixes = 0;
220 if (*boxa != nullptr && *pixa != nullptr) {
221 npixes = pixaGetCount(*pixa);
222 }
223 for (int i = 0; i < npixes; ++i) {
224 int x_start, x_end, y_start, y_end;
225 Image img_pix = pixaGetPix(*pixa, i, L_CLONE);
226 if (textord_tabfind_show_images && pixa_debug != nullptr) {
227 pixa_debug->AddPix(img_pix, "A component");
228 }
229 if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction,
230 kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) {
231 Image simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
232 pixSetAll(simple_pix);
233 img_pix.destroy();
234 // pixaReplacePix takes ownership of the simple_pix.
235 pixaReplacePix(*pixa, i, simple_pix, nullptr);
236 img_pix = pixaGetPix(*pixa, i, L_CLONE);
237 // Fix the box to match the new pix.
238 l_int32 x, y, width, height;
239 boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
240 Box *simple_box = boxCreate(x + x_start, y + y_start, x_end - x_start, y_end - y_start);
241 boxaReplaceBox(*boxa, i, simple_box);
242 }
243 img_pix.destroy();
244 }
245}
246
247// Finds image regions within the BINARY source pix (page image) and returns
248// the image regions as a mask image.
249// The returned pix may be nullptr, meaning no images found.
250// If not nullptr, it must be PixDestroyed by the caller.
251// If textord_tabfind_show_images, debug images are appended to pixa_debug.
253 // Not worth looking at small images.
254 if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) {
255 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
256 }
257
258 // Reduce by factor 2.
259 Image pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
260 if (textord_tabfind_show_images && pixa_debug != nullptr) {
261 pixa_debug->AddPix(pixr, "CascadeReduced");
262 }
263
264 // Get the halftone mask directly from Leptonica.
265 //
266 // Leptonica will print an error message and return nullptr if we call
267 // pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we
268 // want to bypass that.
269 if (pixGetWidth(pixr) < kMinImageFindSize || pixGetHeight(pixr) < kMinImageFindSize) {
270 pixr.destroy();
271 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
272 }
273 // Get the halftone mask.
274 l_int32 ht_found = 0;
275 Pixa *pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) ? pixaCreate(0) : nullptr;
276 Image pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
277 if (pixadb) {
278 Image pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
279 if (textord_tabfind_show_images && pixa_debug != nullptr) {
280 pixa_debug->AddPix(pixdb, "HalftoneMask");
281 }
282 pixdb.destroy();
283 pixaDestroy(&pixadb);
284 }
285 pixr.destroy();
286 if (!ht_found && pixht2 != nullptr) {
287 pixht2.destroy();
288 }
289 if (pixht2 == nullptr) {
290 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
291 }
292
293 // Expand back up again.
294 Image pixht = pixExpandReplicate(pixht2, 2);
295 if (textord_tabfind_show_images && pixa_debug != nullptr) {
296 pixa_debug->AddPix(pixht, "HalftoneReplicated");
297 }
298 pixht2.destroy();
299
300 // Fill to capture pixels near the mask edges that were missed
301 Image pixt = pixSeedfillBinary(nullptr, pixht, pix, 8);
302 pixht |= pixt;
303 pixt.destroy();
304
305 // Eliminate lines and bars that may be joined to images.
306 Image pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
307 pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
308 if (textord_tabfind_show_images && pixa_debug != nullptr) {
309 pixa_debug->AddPix(pixfinemask, "FineMask");
310 }
311 Image pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
312 Image pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
313 pixreduced.destroy();
314 pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
315 Image pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
316 pixreduced2.destroy();
317 if (textord_tabfind_show_images && pixa_debug != nullptr) {
318 pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
319 }
320 // Combine the coarse and fine image masks.
321 pixcoarsemask &= pixfinemask;
322 pixfinemask.destroy();
323 // Dilate a bit to make sure we get everything.
324 pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
325 Image pixmask = pixExpandReplicate(pixcoarsemask, 16);
326 pixcoarsemask.destroy();
327 if (textord_tabfind_show_images && pixa_debug != nullptr) {
328 pixa_debug->AddPix(pixmask, "MaskDilated");
329 }
330 // And the image mask with the line and bar remover.
331 pixht &= pixmask;
332 pixmask.destroy();
333 if (textord_tabfind_show_images && pixa_debug != nullptr) {
334 pixa_debug->AddPix(pixht, "FinalMask");
335 }
336 // Make the result image the same size as the input.
337 Image result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
338 result |= pixht;
339 pixht.destroy();
340 return result;
341}
342
343// Given an input pix, and a bounding rectangle, the sides of the rectangle
344// are shrunk inwards until they bound any black pixels found within the
345// original rectangle. Returns false if the rectangle contains no black
346// pixels at all.
347bool ImageFind::BoundsWithinRect(Image pix, int *x_start, int *y_start, int *x_end, int *y_end) {
348 Box *input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, *y_end - *y_start);
349 Box *output_box = nullptr;
350 pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
351 bool result = output_box != nullptr;
352 if (result) {
353 l_int32 x, y, width, height;
354 boxGetGeometry(output_box, &x, &y, &width, &height);
355 *x_start = x;
356 *y_start = y;
357 *x_end = x + width;
358 *y_end = y + height;
359 boxDestroy(&output_box);
360 }
361 boxDestroy(&input_box);
362 return result;
363}
364
365// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
366// of the point from the given line, defined by a pair of points in the 3-D
367// (RGB) space, line1 and line2.
368double ImageFind::ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2,
369 const uint8_t *point) {
370 int line_vector[kRGBRMSColors];
371 int point_vector[kRGBRMSColors];
372 for (int i = 0; i < kRGBRMSColors; ++i) {
373 line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
374 point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
375 }
376 line_vector[L_ALPHA_CHANNEL] = 0;
377 // Now the cross product in 3d.
378 int cross[kRGBRMSColors];
379 cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE] -
380 line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
381 cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED] -
382 line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
383 cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN] -
384 line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
385 cross[L_ALPHA_CHANNEL] = 0;
386 // Now the sums of the squares.
387 double cross_sq = 0.0;
388 double line_sq = 0.0;
389 for (int j = 0; j < kRGBRMSColors; ++j) {
390 cross_sq += static_cast<double>(cross[j]) * cross[j];
391 line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
392 }
393 if (line_sq == 0.0) {
394 return 0.0;
395 }
396 return cross_sq / line_sq; // This is the squared distance.
397}
398
399// ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================
400// The following functions are responsible for cutting a polygonal image from
401// a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts
402// with DivideImageIntoParts as the master.
403// Problem statement:
404// We start with a single connected component from the image mask: we get
405// a Pix of the component, and its location on the page (im_box).
406// The objective of cutting a polygonal image from its rectangle is to avoid
407// interfering text, but not text that completely overlaps the image.
408// ------------------------------ ------------------------------
409// | Single input partition | | 1 Cut up output partitions |
410// | | ------------------------------
411// Av|oid | Avoid | |
412// | | |________________________|
413// Int|erfering | Interfering | |
414// | | _____|__________________|
415// T|ext | Text | |
416// | Text-on-image | | Text-on-image |
417// ------------------------------ --------------------------
418// DivideImageIntoParts does this by building a ColPartition_LIST (not in the
419// grid) with each ColPartition representing one of the rectangles needed,
420// starting with a single rectangle for the whole image component, and cutting
421// bits out of it with CutChunkFromParts as needed to avoid text. The output
422// ColPartitions are supposed to be ordered from top to bottom.
423
424// The problem is complicated by the fact that we have rotated the coordinate
425// system to make text lines horizontal, so if we need to look at the component
426// image, we have to rotate the coordinates. Throughout the functions in this
427// section im_box is the rectangle representing the image component in the
428// rotated page coordinates (where we are building our output ColPartitions),
429// rotation is the rotation that we used to get there, and rerotation is the
430// rotation required to get back to original page image coordinates.
431// To get to coordinates in the component image, pix, we rotate the im_box,
432// the point we want to locate, and subtract the rotated point from the top-left
433// of the rotated im_box.
434// im_box is therefore essential to calculating coordinates within the pix.
435
436// Returns true if there are no black pixels in between the boxes.
437// The im_box must represent the bounding box of the pix in tesseract
438// coordinates, which may be negative, due to rotations to make the textlines
439// horizontal. The boxes are rotated by rotation, which should undo such
440// rotations, before mapping them onto the pix.
441bool ImageFind::BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box,
442 const FCOORD &rotation, Image pix) {
443 TBOX search_box(box1);
444 search_box += box2;
445 if (box1.x_gap(box2) >= box1.y_gap(box2)) {
446 if (box1.x_gap(box2) <= 0) {
447 return true;
448 }
449 search_box.set_left(std::min(box1.right(), box2.right()));
450 search_box.set_right(std::max(box1.left(), box2.left()));
451 } else {
452 if (box1.y_gap(box2) <= 0) {
453 return true;
454 }
455 search_box.set_top(std::max(box1.bottom(), box2.bottom()));
456 search_box.set_bottom(std::min(box1.top(), box2.top()));
457 }
458 return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
459}
460
461// Returns the number of pixels in box in the pix.
462// rotation, pix and im_box are defined in the large comment above.
463int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation,
464 Image pix) {
465 // Intersect it with the image box.
466 box &= im_box; // This is in-place box intersection.
467 if (box.null_box()) {
468 return 0;
469 }
470 box.rotate(rotation);
471 TBOX rotated_im_box(im_box);
472 rotated_im_box.rotate(rotation);
473 Image rect_pix = pixCreate(box.width(), box.height(), 1);
474 pixRasterop(rect_pix, 0, 0, box.width(), box.height(), PIX_SRC, pix,
475 box.left() - rotated_im_box.left(), rotated_im_box.top() - box.top());
476 l_int32 result;
477 pixCountPixels(rect_pix, &result, nullptr);
478 rect_pix.destroy();
479 return result;
480}
481
482// The box given by slice contains some black pixels, but not necessarily
483// over the whole box. Shrink the x bounds of slice, but not the y bounds
484// until there is at least one black pixel in the outermost columns.
485// rotation, rerotation, pix and im_box are defined in the large comment above.
486static void AttemptToShrinkBox(const FCOORD &rotation, const FCOORD &rerotation, const TBOX &im_box,
487 Image pix, TBOX *slice) {
488 TBOX rotated_box(*slice);
489 rotated_box.rotate(rerotation);
490 TBOX rotated_im_box(im_box);
491 rotated_im_box.rotate(rerotation);
492 int left = rotated_box.left() - rotated_im_box.left();
493 int right = rotated_box.right() - rotated_im_box.left();
494 int top = rotated_im_box.top() - rotated_box.top();
495 int bottom = rotated_im_box.top() - rotated_box.bottom();
496 ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom);
497 top = rotated_im_box.top() - top;
498 bottom = rotated_im_box.top() - bottom;
499 left += rotated_im_box.left();
500 right += rotated_im_box.left();
501 rotated_box.set_to_given_coords(left, bottom, right, top);
502 rotated_box.rotate(rotation);
503 slice->set_left(rotated_box.left());
504 slice->set_right(rotated_box.right());
505}
506
507// The meat of cutting a polygonal image around text.
508// This function covers the general case of cutting a box out of a box
509// as shown:
510// Input Output
511// ------------------------------ ------------------------------
512// | Single input partition | | 1 Cut up output partitions |
513// | | ------------------------------
514// | ---------- | --------- ----------
515// | | box | | | 2 | box | 3 |
516// | | | | | | is cut | |
517// | ---------- | --------- out ----------
518// | | ------------------------------
519// | | | 4 |
520// ------------------------------ ------------------------------
521// In the context that this function is used, at most 3 of the above output
522// boxes will be created, as the overlapping box is never contained by the
523// input.
524// The above cutting operation is executed for each element of part_list that
525// is overlapped by the input box. Each modified ColPartition is replaced
526// in place in the list by the output of the cutting operation in the order
527// shown above, so iff no holes are ever created, the output will be in
528// top-to-bottom order, but in extreme cases, hole creation is possible.
529// In such cases, the output order may cause strange block polygons.
530// rotation, rerotation, pix and im_box are defined in the large comment above.
531static void CutChunkFromParts(const TBOX &box, const TBOX &im_box, const FCOORD &rotation,
532 const FCOORD &rerotation, Image pix, ColPartition_LIST *part_list) {
533 ASSERT_HOST(!part_list->empty());
534 ColPartition_IT part_it(part_list);
535 do {
536 ColPartition *part = part_it.data();
537 TBOX part_box = part->bounding_box();
538 if (part_box.overlap(box)) {
539 // This part must be cut and replaced with the remains. There are
540 // up to 4 pieces to be made. Start with the first one and use
541 // add_before_stay_put. For each piece if it has no black pixels
542 // left, just don't make the box.
543 // Above box.
544 if (box.top() < part_box.top()) {
545 TBOX slice(part_box);
546 slice.set_bottom(box.top());
547 if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) {
548 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
549 part_it.add_before_stay_put(
551 }
552 }
553 // Left of box.
554 if (box.left() > part_box.left()) {
555 TBOX slice(part_box);
556 slice.set_right(box.left());
557 if (box.top() < part_box.top()) {
558 slice.set_top(box.top());
559 }
560 if (box.bottom() > part_box.bottom()) {
561 slice.set_bottom(box.bottom());
562 }
563 if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) {
564 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
565 part_it.add_before_stay_put(
567 }
568 }
569 // Right of box.
570 if (box.right() < part_box.right()) {
571 TBOX slice(part_box);
572 slice.set_left(box.right());
573 if (box.top() < part_box.top()) {
574 slice.set_top(box.top());
575 }
576 if (box.bottom() > part_box.bottom()) {
577 slice.set_bottom(box.bottom());
578 }
579 if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) {
580 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
581 part_it.add_before_stay_put(
583 }
584 }
585 // Below box.
586 if (box.bottom() > part_box.bottom()) {
587 TBOX slice(part_box);
588 slice.set_top(box.bottom());
589 if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, pix) > 0) {
590 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
591 part_it.add_before_stay_put(
593 }
594 }
595 part->DeleteBoxes();
596 delete part_it.extract();
597 }
598 part_it.forward();
599 } while (!part_it.at_first());
600}
601
602// Starts with the bounding box of the image component and cuts it up
603// so that it doesn't intersect text where possible.
604// Strong fully contained horizontal text is marked as text on image,
605// and does not cause a division of the image.
606// For more detail see the large comment above on cutting polygonal images
607// from a rectangle.
608// rotation, rerotation, pix and im_box are defined in the large comment above.
609static void DivideImageIntoParts(const TBOX &im_box, const FCOORD &rotation,
610 const FCOORD &rerotation, Image pix,
611 ColPartitionGridSearch *rectsearch, ColPartition_LIST *part_list) {
612 // Add the full im_box partition to the list to begin with.
613 ColPartition *pix_part =
615 ColPartition_IT part_it(part_list);
616 part_it.add_after_then_move(pix_part);
617
618 rectsearch->StartRectSearch(im_box);
619 ColPartition *part;
620 while ((part = rectsearch->NextRectSearch()) != nullptr) {
621 TBOX part_box = part->bounding_box();
622 if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) {
623 // This image is completely covered by an existing text partition.
624 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
625 ColPartition *pix_part = part_it.extract();
626 pix_part->DeleteBoxes();
627 delete pix_part;
628 }
629 } else if (part->flow() == BTFT_STRONG_CHAIN) {
630 // Text intersects the box.
631 TBOX overlap_box = part_box.intersection(im_box);
632 // Intersect it with the image box.
633 int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box, rerotation, pix);
634 if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) {
635 // Eat a piece out of the image.
636 // Pad it so that pieces eaten out look decent.
637 int padding = part->blob_type() == BRT_VERT_TEXT ? part_box.width() : part_box.height();
638 part_box.set_top(part_box.top() + padding / 2);
639 part_box.set_bottom(part_box.bottom() - padding / 2);
640 CutChunkFromParts(part_box, im_box, rotation, rerotation, pix, part_list);
641 } else {
642 // Strong overlap with the black area, so call it text on image.
643 part->set_flow(BTFT_TEXT_ON_IMAGE);
644 }
645 }
646 if (part_list->empty()) {
647 break;
648 }
649 }
650}
651
652// Search for the rightmost text that overlaps vertically and is to the left
653// of the given box, but within the given left limit.
654static int ExpandImageLeft(const TBOX &box, int left_limit, ColPartitionGrid *part_grid) {
656 ColPartition *part;
657 // Search right to left for any text that overlaps.
658 search.StartSideSearch(box.left(), box.bottom(), box.top());
659 while ((part = search.NextSideSearch(true)) != nullptr) {
660 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
661 const TBOX &part_box(part->bounding_box());
662 if (part_box.y_gap(box) < 0) {
663 if (part_box.right() > left_limit && part_box.right() < box.left()) {
664 left_limit = part_box.right();
665 }
666 break;
667 }
668 }
669 }
670 if (part != nullptr) {
671 // Search for the nearest text up to the one we already found.
672 TBOX search_box(left_limit, box.bottom(), box.left(), box.top());
673 search.StartRectSearch(search_box);
674 while ((part = search.NextRectSearch()) != nullptr) {
675 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
676 const TBOX &part_box(part->bounding_box());
677 if (part_box.y_gap(box) < 0) {
678 if (part_box.right() > left_limit && part_box.right() < box.left()) {
679 left_limit = part_box.right();
680 }
681 }
682 }
683 }
684 }
685 return left_limit;
686}
687
688// Search for the leftmost text that overlaps vertically and is to the right
689// of the given box, but within the given right limit.
690static int ExpandImageRight(const TBOX &box, int right_limit, ColPartitionGrid *part_grid) {
692 ColPartition *part;
693 // Search left to right for any text that overlaps.
694 search.StartSideSearch(box.right(), box.bottom(), box.top());
695 while ((part = search.NextSideSearch(false)) != nullptr) {
696 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
697 const TBOX &part_box(part->bounding_box());
698 if (part_box.y_gap(box) < 0) {
699 if (part_box.left() < right_limit && part_box.left() > box.right()) {
700 right_limit = part_box.left();
701 }
702 break;
703 }
704 }
705 }
706 if (part != nullptr) {
707 // Search for the nearest text up to the one we already found.
708 TBOX search_box(box.left(), box.bottom(), right_limit, box.top());
709 search.StartRectSearch(search_box);
710 while ((part = search.NextRectSearch()) != nullptr) {
711 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
712 const TBOX &part_box(part->bounding_box());
713 if (part_box.y_gap(box) < 0) {
714 if (part_box.left() < right_limit && part_box.left() > box.right()) {
715 right_limit = part_box.left();
716 }
717 }
718 }
719 }
720 }
721 return right_limit;
722}
723
724// Search for the topmost text that overlaps horizontally and is below
725// the given box, but within the given bottom limit.
726static int ExpandImageBottom(const TBOX &box, int bottom_limit, ColPartitionGrid *part_grid) {
728 ColPartition *part;
729 // Search right to left for any text that overlaps.
730 search.StartVerticalSearch(box.left(), box.right(), box.bottom());
731 while ((part = search.NextVerticalSearch(true)) != nullptr) {
732 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
733 const TBOX &part_box(part->bounding_box());
734 if (part_box.x_gap(box) < 0) {
735 if (part_box.top() > bottom_limit && part_box.top() < box.bottom()) {
736 bottom_limit = part_box.top();
737 }
738 break;
739 }
740 }
741 }
742 if (part != nullptr) {
743 // Search for the nearest text up to the one we already found.
744 TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom());
745 search.StartRectSearch(search_box);
746 while ((part = search.NextRectSearch()) != nullptr) {
747 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
748 const TBOX &part_box(part->bounding_box());
749 if (part_box.x_gap(box) < 0) {
750 if (part_box.top() > bottom_limit && part_box.top() < box.bottom()) {
751 bottom_limit = part_box.top();
752 }
753 }
754 }
755 }
756 }
757 return bottom_limit;
758}
759
760// Search for the bottommost text that overlaps horizontally and is above
761// the given box, but within the given top limit.
762static int ExpandImageTop(const TBOX &box, int top_limit, ColPartitionGrid *part_grid) {
764 ColPartition *part;
765 // Search right to left for any text that overlaps.
766 search.StartVerticalSearch(box.left(), box.right(), box.top());
767 while ((part = search.NextVerticalSearch(false)) != nullptr) {
768 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
769 const TBOX &part_box(part->bounding_box());
770 if (part_box.x_gap(box) < 0) {
771 if (part_box.bottom() < top_limit && part_box.bottom() > box.top()) {
772 top_limit = part_box.bottom();
773 }
774 break;
775 }
776 }
777 }
778 if (part != nullptr) {
779 // Search for the nearest text up to the one we already found.
780 TBOX search_box(box.left(), box.top(), box.right(), top_limit);
781 search.StartRectSearch(search_box);
782 while ((part = search.NextRectSearch()) != nullptr) {
783 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
784 const TBOX &part_box(part->bounding_box());
785 if (part_box.x_gap(box) < 0) {
786 if (part_box.bottom() < top_limit && part_box.bottom() > box.top()) {
787 top_limit = part_box.bottom();
788 }
789 }
790 }
791 }
792 }
793 return top_limit;
794}
795
796// Expands the image box in the given direction until it hits text,
797// limiting the expansion to the given limit box, returning the result
798// in the expanded box, and
799// returning the increase in area resulting from the expansion.
800static int ExpandImageDir(BlobNeighbourDir dir, const TBOX &im_box, const TBOX &limit_box,
801 ColPartitionGrid *part_grid, TBOX *expanded_box) {
802 *expanded_box = im_box;
803 switch (dir) {
804 case BND_LEFT:
805 expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(), part_grid));
806 break;
807 case BND_RIGHT:
808 expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(), part_grid));
809 break;
810 case BND_ABOVE:
811 expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid));
812 break;
813 case BND_BELOW:
814 expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(), part_grid));
815 break;
816 default:
817 return 0;
818 }
819 return expanded_box->area() - im_box.area();
820}
821
822// Expands the image partition into any non-text until it touches text.
823// The expansion proceeds in the order of increasing increase in area
824// as a heuristic to find the best rectangle by expanding in the most
825// constrained direction first.
826static void MaximalImageBoundingBox(ColPartitionGrid *part_grid, TBOX *im_box) {
827 bool dunnit[BND_COUNT];
828 memset(dunnit, 0, sizeof(dunnit));
829 TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(), part_grid->tright().x(),
830 part_grid->tright().y());
831 TBOX text_box(*im_box);
832 for (int iteration = 0; iteration < BND_COUNT; ++iteration) {
833 // Find the direction with least area increase.
834 int best_delta = -1;
835 BlobNeighbourDir best_dir = BND_LEFT;
836 TBOX expanded_boxes[BND_COUNT];
837 for (int dir = 0; dir < BND_COUNT; ++dir) {
838 auto bnd = static_cast<BlobNeighbourDir>(dir);
839 if (!dunnit[bnd]) {
840 TBOX expanded_box;
841 int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid, &expanded_boxes[bnd]);
842 if (best_delta < 0 || area_delta < best_delta) {
843 best_delta = area_delta;
844 best_dir = bnd;
845 }
846 }
847 }
848 // Run the best and remember the direction.
849 dunnit[best_dir] = true;
850 text_box = expanded_boxes[best_dir];
851 }
852 *im_box = text_box;
853}
854
855// Helper deletes the given partition but first marks up all the blobs as
856// noise, so they get deleted later, and disowns them.
857// If the initial type of the partition is image, then it actually deletes
858// the blobs, as the partition owns them in that case.
859static void DeletePartition(ColPartition *part) {
860 BlobRegionType type = part->blob_type();
861 if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
862 // The partition owns the boxes of these types, so just delete them.
863 part->DeleteBoxes(); // From a previous iteration.
864 } else {
865 // Once marked, the blobs will be swept up by TidyBlobs.
866 part->set_flow(BTFT_NONTEXT);
867 part->set_blob_type(BRT_NOISE);
868 part->SetBlobTypes();
869 part->DisownBoxes(); // Created before FindImagePartitions.
870 }
871 delete part;
872}
873
874// The meat of joining fragmented images and consuming ColPartitions of
875// uncertain type.
876// *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be
877// expanded to consume overlapping and nearby ColPartitions of uncertain type
878// and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond
879// max_image_box. *part_ptr is NOT in the part_grid.
880// rectsearch is already constructed on the part_grid, and is used for
881// searching for overlapping and nearby ColPartitions.
882// ExpandImageIntoParts is called iteratively until it returns false. Each
883// time it absorbs the nearest non-contained candidate, and everything that
884// is fully contained within part_ptr's bounding box.
885// TODO(rays) what if it just eats everything inside max_image_box in one go?
886static bool ExpandImageIntoParts(const TBOX &max_image_box, ColPartitionGridSearch *rectsearch,
887 ColPartitionGrid *part_grid, ColPartition **part_ptr) {
888 ColPartition *image_part = *part_ptr;
889 TBOX im_part_box = image_part->bounding_box();
890 if (textord_tabfind_show_images > 1) {
891 tprintf("Searching for merge with image part:");
892 im_part_box.print();
893 tprintf("Text box=");
894 max_image_box.print();
895 }
896 rectsearch->StartRectSearch(max_image_box);
897 ColPartition *part;
898 ColPartition *best_part = nullptr;
899 int best_dist = 0;
900 while ((part = rectsearch->NextRectSearch()) != nullptr) {
901 if (textord_tabfind_show_images > 1) {
902 tprintf("Considering merge with part:");
903 part->Print();
904 if (im_part_box.contains(part->bounding_box())) {
905 tprintf("Fully contained\n");
906 } else if (!max_image_box.contains(part->bounding_box())) {
907 tprintf("Not within text box\n");
908 } else if (part->flow() == BTFT_STRONG_CHAIN) {
909 tprintf("Too strong text\n");
910 } else {
911 tprintf("Real candidate\n");
912 }
913 }
914 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_TEXT_ON_IMAGE ||
915 part->blob_type() == BRT_POLYIMAGE) {
916 continue;
917 }
918 TBOX box = part->bounding_box();
919 if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) {
920 if (im_part_box.contains(box)) {
921 // Eat it completely.
922 rectsearch->RemoveBBox();
923 DeletePartition(part);
924 continue;
925 }
926 int x_dist = std::max(0, box.x_gap(im_part_box));
927 int y_dist = std::max(0, box.y_gap(im_part_box));
928 int dist = x_dist * x_dist + y_dist * y_dist;
929 if (dist > box.area() || dist > im_part_box.area()) {
930 continue; // Not close enough.
931 }
932 if (best_part == nullptr || dist < best_dist) {
933 // We keep the nearest qualifier, which is not necessarily the nearest.
934 best_part = part;
935 best_dist = dist;
936 }
937 }
938 }
939 if (best_part != nullptr) {
940 // It needs expanding. We can do it without touching text.
941 TBOX box = best_part->bounding_box();
942 if (textord_tabfind_show_images > 1) {
943 tprintf("Merging image part:");
944 im_part_box.print();
945 tprintf("with part:");
946 box.print();
947 }
948 im_part_box += box;
950 DeletePartition(image_part);
951 part_grid->RemoveBBox(best_part);
952 DeletePartition(best_part);
953 rectsearch->RepositionIterator();
954 return true;
955 }
956 return false;
957}
958
959// Helper function to compute the overlap area between the box and the
960// given list of partitions.
961static int IntersectArea(const TBOX &box, ColPartition_LIST *part_list) {
962 int intersect_area = 0;
963 ColPartition_IT part_it(part_list);
964 // Iterate the parts and subtract intersecting area.
965 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
966 ColPartition *image_part = part_it.data();
967 TBOX intersect = box.intersection(image_part->bounding_box());
968 intersect_area += intersect.area();
969 }
970 return intersect_area;
971}
972
973// part_list is a set of ColPartitions representing a polygonal image, and
974// im_box is the union of the bounding boxes of all the parts in part_list.
975// Tests whether part is to be consumed by the polygonal image.
976// Returns true if part is weak text and more than half of its area is
977// intersected by parts from the part_list, and it is contained within im_box.
978static bool TestWeakIntersectedPart(const TBOX &im_box, ColPartition_LIST *part_list,
979 ColPartition *part) {
980 if (part->flow() < BTFT_STRONG_CHAIN) {
981 // A weak partition intersects the box.
982 const TBOX &part_box = part->bounding_box();
983 if (im_box.contains(part_box)) {
984 int area = part_box.area();
985 int intersect_area = IntersectArea(part_box, part_list);
986 if (area < 2 * intersect_area) {
987 return true;
988 }
989 }
990 }
991 return false;
992}
993
994// A rectangular or polygonal image has been completed, in part_list, bounding
995// box in im_box. We want to eliminate weak text or other uncertain partitions
996// (basically anything that is not BRT_STRONG_CHAIN or better) from both the
997// part_grid and the big_parts list that are contained within im_box and
998// overlapped enough by the possibly polygonal image.
999static void EliminateWeakParts(const TBOX &im_box, ColPartitionGrid *part_grid,
1000 ColPartition_LIST *big_parts, ColPartition_LIST *part_list) {
1001 ColPartitionGridSearch rectsearch(part_grid);
1002 ColPartition *part;
1003 rectsearch.StartRectSearch(im_box);
1004 while ((part = rectsearch.NextRectSearch()) != nullptr) {
1005 if (TestWeakIntersectedPart(im_box, part_list, part)) {
1006 BlobRegionType type = part->blob_type();
1007 if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) {
1008 rectsearch.RemoveBBox();
1009 DeletePartition(part);
1010 } else {
1011 // The part is mostly covered, so mark it. Non-image partitions are
1012 // kept hanging around to mark the image for pass2
1013 part->set_flow(BTFT_NONTEXT);
1014 part->set_blob_type(BRT_NOISE);
1015 part->SetBlobTypes();
1016 }
1017 }
1018 }
1019 ColPartition_IT big_it(big_parts);
1020 for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) {
1021 part = big_it.data();
1022 if (TestWeakIntersectedPart(im_box, part_list, part)) {
1023 // Once marked, the blobs will be swept up by TidyBlobs.
1024 DeletePartition(big_it.extract());
1025 }
1026 }
1027}
1028
1029// Helper scans for good text partitions overlapping the given box.
1030// If there are no good text partitions overlapping an expanded box, then
1031// the box is expanded, otherwise, the original box is returned.
1032// If good text overlaps the box, true is returned.
1033static bool ScanForOverlappingText(ColPartitionGrid *part_grid, TBOX *box) {
1034 ColPartitionGridSearch rectsearch(part_grid);
1035 TBOX padded_box(*box);
1036 padded_box.pad(kNoisePadding, kNoisePadding);
1037 rectsearch.StartRectSearch(padded_box);
1038 ColPartition *part;
1039 bool any_text_in_padded_rect = false;
1040 while ((part = rectsearch.NextRectSearch()) != nullptr) {
1041 if (part->flow() == BTFT_CHAIN || part->flow() == BTFT_STRONG_CHAIN) {
1042 // Text intersects the box.
1043 any_text_in_padded_rect = true;
1044 const TBOX &part_box = part->bounding_box();
1045 if (box->overlap(part_box)) {
1046 return true;
1047 }
1048 }
1049 }
1050 if (!any_text_in_padded_rect) {
1051 *box = padded_box;
1052 }
1053 return false;
1054}
1055
1056// Renders the boxes of image parts from the supplied list onto the image_pix,
1057// except where they interfere with existing strong text in the part_grid,
1058// and then deletes them.
1059// Box coordinates are rotated by rerotate to match the image.
1060static void MarkAndDeleteImageParts(const FCOORD &rerotate, ColPartitionGrid *part_grid,
1061 ColPartition_LIST *image_parts, Image image_pix) {
1062 if (image_pix == nullptr) {
1063 return;
1064 }
1065 int imageheight = pixGetHeight(image_pix);
1066 ColPartition_IT part_it(image_parts);
1067 for (; !part_it.empty(); part_it.forward()) {
1068 ColPartition *part = part_it.extract();
1069 TBOX part_box = part->bounding_box();
1070 BlobRegionType type = part->blob_type();
1071 if (!ScanForOverlappingText(part_grid, &part_box) || type == BRT_RECTIMAGE ||
1072 type == BRT_POLYIMAGE) {
1073 // Mark the box on the image.
1074 // All coords need to be rotated to match the image.
1075 part_box.rotate(rerotate);
1076 int left = part_box.left();
1077 int top = part_box.top();
1078 pixRasterop(image_pix, left, imageheight - top, part_box.width(), part_box.height(), PIX_SET,
1079 nullptr, 0, 0);
1080 }
1081 DeletePartition(part);
1082 }
1083}
1084
1085// Locates all the image partitions in the part_grid, that were found by a
1086// previous call to FindImagePartitions, marks them in the image_mask,
1087// removes them from the grid, and deletes them. This makes it possible to
1088// call FindImagePartitions again to produce less broken-up and less
1089// overlapping image partitions.
1090// rerotation specifies how to rotate the partition coords to match
1091// the image_mask, since this function is used after orientation correction.
1093 Image image_mask) {
1094 // Extract the noise parts from the grid and put them on a temporary list.
1095 ColPartition_LIST parts_list;
1096 ColPartition_IT part_it(&parts_list);
1097 ColPartitionGridSearch gsearch(part_grid);
1098 gsearch.StartFullSearch();
1099 ColPartition *part;
1100 while ((part = gsearch.NextFullSearch()) != nullptr) {
1101 BlobRegionType type = part->blob_type();
1102 if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1103 part_it.add_after_then_move(part);
1104 gsearch.RemoveBBox();
1105 }
1106 }
1107 // Render listed noise partitions to the image mask.
1108 MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
1109}
1110
1111// Removes and deletes all image partitions that are too small to be worth
1112// keeping. We have to do this as a separate phase after creating the image
1113// partitions as the small images are needed to join the larger ones together.
1114static void DeleteSmallImages(ColPartitionGrid *part_grid) {
1115 if (part_grid != nullptr) {
1116 return;
1117 }
1118 ColPartitionGridSearch gsearch(part_grid);
1119 gsearch.StartFullSearch();
1120 ColPartition *part;
1121 while ((part = gsearch.NextFullSearch()) != nullptr) {
1122 // Only delete rectangular images, since if it became a poly image, it
1123 // is more evidence that it is somehow important.
1124 if (part->blob_type() == BRT_RECTIMAGE) {
1125 const TBOX &part_box = part->bounding_box();
1126 if (part_box.width() < kMinImageFindSize || part_box.height() < kMinImageFindSize) {
1127 // It is too small to keep. Just make it disappear.
1128 gsearch.RemoveBBox();
1129 DeletePartition(part);
1130 }
1131 }
1132 }
1133}
1134
1135// Runs a CC analysis on the image_pix mask image, and creates
1136// image partitions from them, cutting out strong text, and merging with
1137// nearby image regions such that they don't interfere with text.
1138// Rotation and rerotation specify how to rotate image coords to match
1139// the blob and partition coords and back again.
1140// The input/output part_grid owns all the created partitions, and
1141// the partitions own all the fake blobs that belong in the partitions.
1142// Since the other blobs in the other partitions will be owned by the block,
1143// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
1144// situation and collect the image blobs.
1145void ImageFind::FindImagePartitions(Image image_pix, const FCOORD &rotation,
1146 const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid,
1147 DebugPixa *pixa_debug, ColPartitionGrid *part_grid,
1148 ColPartition_LIST *big_parts) {
1149 int imageheight = pixGetHeight(image_pix);
1150 Boxa *boxa;
1151 Pixa *pixa;
1152 ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa);
1153 // Iterate the connected components in the image regions mask.
1154 int nboxes = 0;
1155 if (boxa != nullptr && pixa != nullptr) {
1156 nboxes = boxaGetCount(boxa);
1157 }
1158 for (int i = 0; i < nboxes; ++i) {
1159 l_int32 x, y, width, height;
1160 boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
1161 Image pix = pixaGetPix(pixa, i, L_CLONE);
1162 TBOX im_box(x, imageheight - y - height, x + width, imageheight - y);
1163 im_box.rotate(rotation); // Now matches all partitions and blobs.
1164 ColPartitionGridSearch rectsearch(part_grid);
1165 rectsearch.SetUniqueMode(true);
1166 ColPartition_LIST part_list;
1167 DivideImageIntoParts(im_box, rotation, rerotation, pix, &rectsearch, &part_list);
1168 if (textord_tabfind_show_images && pixa_debug != nullptr) {
1169 pixa_debug->AddPix(pix, "ImageComponent");
1170 tprintf("Component has %d parts\n", part_list.length());
1171 }
1172 pix.destroy();
1173 if (!part_list.empty()) {
1174 ColPartition_IT part_it(&part_list);
1175 if (part_list.singleton()) {
1176 // We didn't have to chop it into a polygon to fit around text, so
1177 // try expanding it to merge fragmented image parts, as long as it
1178 // doesn't touch strong text.
1179 ColPartition *part = part_it.extract();
1180 TBOX text_box(im_box);
1181 MaximalImageBoundingBox(part_grid, &text_box);
1182 while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part)) {
1183 ;
1184 }
1185 part_it.set_to_list(&part_list);
1186 part_it.add_after_then_move(part);
1187 im_box = part->bounding_box();
1188 }
1189 EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
1190 // Iterate the part_list and put the parts into the grid.
1191 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
1192 ColPartition *image_part = part_it.extract();
1193 im_box = image_part->bounding_box();
1194 part_grid->InsertBBox(true, true, image_part);
1195 if (!part_it.at_last()) {
1196 ColPartition *neighbour = part_it.data_relative(1);
1197 image_part->AddPartner(false, neighbour);
1198 neighbour->AddPartner(true, image_part);
1199 }
1200 }
1201 }
1202 }
1203 boxaDestroy(&boxa);
1204 pixaDestroy(&pixa);
1205 DeleteSmallImages(part_grid);
1206#ifndef GRAPHICS_DISABLED
1207 if (textord_tabfind_show_images) {
1208 ScrollView *images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
1209 part_grid->DisplayBoxes(images_win_);
1210 }
1211#endif
1212}
1213
1214} // namespace tesseract.
#define INT_VAR(name, val, comment)
Definition: params.h:356
#define ASSERT_HOST(x)
Definition: errcode.h:54
@ TBOX
const double kMinRectangularFraction
Definition: imagefind.cpp:41
BlobRegionType
Definition: blobbox.h:74
@ BRT_NOISE
Definition: blobbox.h:75
@ BRT_POLYIMAGE
Definition: blobbox.h:79
@ BRT_VERT_TEXT
Definition: blobbox.h:81
@ BRT_RECTIMAGE
Definition: blobbox.h:78
const int kNoisePadding
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919
@ BTFT_STRONG_CHAIN
Definition: blobbox.h:115
@ BTFT_CHAIN
Definition: blobbox.h:114
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:116
@ BTFT_NONTEXT
Definition: blobbox.h:112
LIST search(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:211
const int kRGBRMSColors
Definition: colpartition.h:36
const double kMaxRectangularGradient
Definition: imagefind.cpp:46
const int kMinImageFindSize
Definition: imagefind.cpp:48
const double kMaxRectangularFraction
Definition: imagefind.cpp:43
BlobNeighbourDir
Definition: blobbox.h:89
@ BND_LEFT
Definition: blobbox.h:89
@ BND_RIGHT
Definition: blobbox.h:89
@ BND_BELOW
Definition: blobbox.h:89
@ BND_ABOVE
Definition: blobbox.h:89
@ BND_COUNT
Definition: blobbox.h:89
void AddPix(const Image pix, const char *caption)
Definition: debugpixa.h:32
void destroy()
Definition: image.cpp:32
TDimension left() const
Definition: rect.h:82
int y_gap(const TBOX &box) const
Definition: rect.h:245
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
void set_right(int x)
Definition: rect.h:92
void set_left(int x)
Definition: rect.h:85
void rotate(const FCOORD &vec)
Definition: rect.h:210
TDimension top() const
Definition: rect.h:68
int x_gap(const TBOX &box) const
Definition: rect.h:238
bool null_box() const
Definition: rect.h:60
void set_bottom(int y)
Definition: rect.h:78
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
void set_top(int y)
Definition: rect.h:71
void SetUniqueMode(bool mode)
Definition: bbgrid.h:249
void StartFullSearch()
Definition: bbgrid.h:701
BBC * NextFullSearch()
Definition: bbgrid.h:711
void DisplayBoxes(ScrollView *window)
Definition: bbgrid.h:649
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:529
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:633
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
BlobRegionType blob_type() const
Definition: colpartition.h:147
const TBOX & bounding_box() const
Definition: colpartition.h:108
void AddPartner(bool upper, ColPartition *partner)
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Image pix)
Definition: imagefind.cpp:441
static bool BoundsWithinRect(Image pix, int *x_start, int *y_start, int *x_end, int *y_end)
Definition: imagefind.cpp:347
static void FindImagePartitions(Image image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
Definition: imagefind.cpp:1145
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, Image image_mask)
Definition: imagefind.cpp:1092
static Image FindImages(Image pix, DebugPixa *pixa_debug)
Definition: imagefind.cpp:252
static int CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation, Image pix)
Definition: imagefind.cpp:463
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
Definition: imagefind.cpp:368