tesseract  4.1.1
underlin.cpp File Reference
#include "underlin.h"

Go to the source code of this file.

Functions

void restore_underlined_blobs (TO_BLOCK *block)
 
TO_ROWmost_overlapping_row (TO_ROW_LIST *rows, BLOBNBOX *blob)
 
void find_underlined_blobs (BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
 
void vertical_cunderline_projection (C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
 

Variables

double textord_underline_offset = 0.1
 
bool textord_restore_underlines = true
 

Function Documentation

◆ find_underlined_blobs()

void find_underlined_blobs ( BLOBNBOX u_line,
QSPLINE baseline,
float  xheight,
float  baseline_offset,
ICOORDELT_LIST *  chop_cells 
)

Definition at line 164 of file underlin.cpp.

170  {
171  int16_t x, y; //sides of blob
172  ICOORD blob_chop; //sides of blob
173  TBOX blob_box = u_line->bounding_box ();
174  //cell iterator
175  ICOORDELT_IT cell_it = chop_cells;
176  STATS upper_proj (blob_box.left (), blob_box.right () + 1);
177  STATS middle_proj (blob_box.left (), blob_box.right () + 1);
178  STATS lower_proj (blob_box.left (), blob_box.right () + 1);
179  C_OUTLINE_IT out_it; //outlines of blob
180 
181  ASSERT_HOST (u_line->cblob () != nullptr);
182 
183  out_it.set_to_list (u_line->cblob ()->out_list ());
184  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
185  vertical_cunderline_projection (out_it.data (),
186  baseline, xheight, baseline_offset,
187  &lower_proj, &middle_proj, &upper_proj);
188  }
189 
190  for (x = blob_box.left (); x < blob_box.right (); x++) {
191  if (middle_proj.pile_count (x) > 0) {
192  for (y = x + 1;
193  y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
194  blob_chop = ICOORD (x, y);
195  cell_it.add_after_then_move (new ICOORDELT (blob_chop));
196  x = y;
197  }
198  }
199 }
integer coordinate
Definition: points.h:31
const TBOX & bounding_box() const
Definition: blobbox.h:230
int16_t left() const
Definition: rect.h:72
void vertical_cunderline_projection(C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
Definition: underlin.cpp:209
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
C_BLOB * cblob() const
Definition: blobbox.h:268
Definition: rect.h:34
Definition: statistc.h:31
int16_t right() const
Definition: rect.h:79
#define ASSERT_HOST(x)
Definition: errcode.h:88

◆ most_overlapping_row()

TO_ROW* most_overlapping_row ( TO_ROW_LIST *  rows,
BLOBNBOX blob 
)

Definition at line 107 of file underlin.cpp.

110  {
111  int16_t x = (blob->bounding_box ().left ()
112  + blob->bounding_box ().right ()) / 2;
113  TO_ROW_IT row_it = rows; //row iterator
114  TO_ROW *row; //current row
115  TO_ROW *best_row; //output row
116  float overlap; //of blob & row
117  float bestover; //best overlap
118 
119  best_row = nullptr;
120  bestover = static_cast<float>(-INT32_MAX);
121  if (row_it.empty ())
122  return nullptr;
123  row = row_it.data ();
124  row_it.mark_cycle_pt ();
125  while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
126  && !row_it.cycled_list ()) {
127  best_row = row;
128  bestover =
129  blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
130  row_it.forward ();
131  row = row_it.data ();
132  }
133  while (row->baseline.y (x) + row->xheight + row->ascrise
134  >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
135  overlap = row->baseline.y (x) + row->xheight + row->ascrise;
136  if (blob->bounding_box ().top () < overlap)
137  overlap = blob->bounding_box ().top ();
138  if (blob->bounding_box ().bottom () >
139  row->baseline.y (x) + row->descdrop)
140  overlap -= blob->bounding_box ().bottom ();
141  else
142  overlap -= row->baseline.y (x) + row->descdrop;
143  if (overlap > bestover) {
144  bestover = overlap;
145  best_row = row;
146  }
147  row_it.forward ();
148  row = row_it.data ();
149  }
150  if (bestover < 0
151  && row->baseline.y (x) + row->xheight + row->ascrise
152  - blob->bounding_box ().bottom () > bestover)
153  best_row = row;
154  return best_row;
155 }
float xheight
Definition: blobbox.h:657
const TBOX & bounding_box() const
Definition: blobbox.h:230
int16_t left() const
Definition: rect.h:72
float descdrop
Definition: blobbox.h:660
int16_t bottom() const
Definition: rect.h:65
double y(double x) const
Definition: quspline.cpp:209
QSPLINE baseline
Definition: blobbox.h:670
float ascrise
Definition: blobbox.h:659
int16_t right() const
Definition: rect.h:79
int16_t top() const
Definition: rect.h:58

◆ restore_underlined_blobs()

void restore_underlined_blobs ( TO_BLOCK block)

Definition at line 30 of file underlin.cpp.

32  {
33  int16_t chop_coord; //chop boundary
34  TBOX blob_box; //of underline
35  BLOBNBOX *u_line; //underline bit
36  TO_ROW *row; //best row for blob
37  ICOORDELT_LIST chop_cells; //blobs to cut out
38  //real underlines
39  BLOBNBOX_LIST residual_underlines;
40  C_OUTLINE_LIST left_coutlines;
41  C_OUTLINE_LIST right_coutlines;
42  ICOORDELT_IT cell_it = &chop_cells;
43  //under lines
44  BLOBNBOX_IT under_it = &block->underlines;
45  BLOBNBOX_IT ru_it = &residual_underlines;
46 
47  if (block->get_rows()->empty())
48  return; // Don't crash if there are no rows.
49  for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
50  under_it.forward ()) {
51  u_line = under_it.extract ();
52  blob_box = u_line->bounding_box ();
53  row = most_overlapping_row (block->get_rows (), u_line);
54  if (row == nullptr)
55  return; // Don't crash if there is no row.
56  find_underlined_blobs (u_line, &row->baseline, row->xheight,
58  &chop_cells);
59  cell_it.set_to_list (&chop_cells);
60  for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
61  cell_it.forward ()) {
62  chop_coord = cell_it.data ()->x ();
63  if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
64  split_to_blob (u_line, chop_coord,
66  &left_coutlines,
67  &right_coutlines);
68  if (!left_coutlines.empty()) {
69  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
70  }
71  chop_coord = cell_it.data ()->y ();
72  split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5,
73  &left_coutlines, &right_coutlines);
74  if (!left_coutlines.empty()) {
75  row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
76  }
77  u_line = nullptr; //no more blobs to add
78  }
79  delete cell_it.extract();
80  }
81  if (!right_coutlines.empty ()) {
82  split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5,
83  &left_coutlines, &right_coutlines);
84  if (!left_coutlines.empty())
85  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
86  }
87  if (u_line != nullptr) {
88  delete u_line->cblob();
89  delete u_line;
90  }
91  }
92  if (!ru_it.empty()) {
93  ru_it.move_to_first();
94  for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
95  under_it.add_after_then_move(ru_it.extract());
96  }
97  }
98 }
TO_ROW_LIST * get_rows()
Definition: blobbox.h:704
void split_to_blob(BLOBNBOX *blob, int16_t chop_coord, float pitch_error, C_OUTLINE_LIST *left_coutlines, C_OUTLINE_LIST *right_coutlines)
Definition: fpchop.cpp:236
void find_underlined_blobs(BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
Definition: underlin.cpp:164
float xheight
Definition: blobbox.h:657
const TBOX & bounding_box() const
Definition: blobbox.h:230
BLOBNBOX_LIST underlines
Definition: blobbox.h:773
int textord_fp_chop_error
Definition: fpchop.cpp:32
TO_ROW * most_overlapping_row(TO_ROW_LIST *rows, BLOBNBOX *blob)
Definition: underlin.cpp:107
double textord_underline_offset
Definition: underlin.cpp:21
void insert_blob(BLOBNBOX *blob)
Definition: blobbox.cpp:769
C_BLOB * cblob() const
Definition: blobbox.h:268
Definition: rect.h:34
QSPLINE baseline
Definition: blobbox.h:670
int16_t right() const
Definition: rect.h:79

◆ vertical_cunderline_projection()

void vertical_cunderline_projection ( C_OUTLINE outline,
QSPLINE baseline,
float  xheight,
float  baseline_offset,
STATS lower_proj,
STATS middle_proj,
STATS upper_proj 
)

Definition at line 209 of file underlin.cpp.

217  {
218  ICOORD pos; //current point
219  ICOORD step; //edge step
220  int16_t lower_y, upper_y; //region limits
221  int32_t length; //of outline
222  int16_t stepindex; //current step
223  C_OUTLINE_IT out_it = outline->child ();
224 
225  pos = outline->start_pos ();
226  length = outline->pathlength ();
227  for (stepindex = 0; stepindex < length; stepindex++) {
228  step = outline->step (stepindex);
229  if (step.x () > 0) {
230  lower_y =
231  static_cast<int16_t>(floor (baseline->y (pos.x ()) + baseline_offset + 0.5));
232  upper_y =
233  static_cast<int16_t>(floor (baseline->y (pos.x ()) + baseline_offset +
234  xheight + 0.5));
235  if (pos.y () >= lower_y) {
236  lower_proj->add (pos.x (), -lower_y);
237  if (pos.y () >= upper_y) {
238  middle_proj->add (pos.x (), lower_y - upper_y);
239  upper_proj->add (pos.x (), upper_y - pos.y ());
240  }
241  else
242  middle_proj->add (pos.x (), lower_y - pos.y ());
243  }
244  else
245  lower_proj->add (pos.x (), -pos.y ());
246  }
247  else if (step.x () < 0) {
248  lower_y =
249  static_cast<int16_t>(floor (baseline->y (pos.x () - 1) + baseline_offset +
250  0.5));
251  upper_y =
252  static_cast<int16_t>(floor (baseline->y (pos.x () - 1) + baseline_offset +
253  xheight + 0.5));
254  if (pos.y () >= lower_y) {
255  lower_proj->add (pos.x () - 1, lower_y);
256  if (pos.y () >= upper_y) {
257  middle_proj->add (pos.x () - 1, upper_y - lower_y);
258  upper_proj->add (pos.x () - 1, pos.y () - upper_y);
259  }
260  else
261  middle_proj->add (pos.x () - 1, pos.y () - lower_y);
262  }
263  else
264  lower_proj->add (pos.x () - 1, pos.y ());
265  }
266  pos += step;
267  }
268 
269  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
270  vertical_cunderline_projection (out_it.data (),
271  baseline, xheight, baseline_offset,
272  lower_proj, middle_proj, upper_proj);
273  }
274 }
integer coordinate
Definition: points.h:31
C_OUTLINE_LIST * child()
Definition: coutln.h:108
void add(int32_t value, int32_t count)
Definition: statistc.cpp:93
int32_t pathlength() const
Definition: coutln.h:135
void vertical_cunderline_projection(C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
Definition: underlin.cpp:209
int16_t x() const
access function
Definition: points.h:52
const ICOORD & start_pos() const
Definition: coutln.h:148
ICOORD step(int index) const
Definition: coutln.h:144
int16_t y() const
access_function
Definition: points.h:56

Variable Documentation

◆ textord_restore_underlines

bool textord_restore_underlines = true

"Chop underlines & put back"

Definition at line 22 of file underlin.cpp.

◆ textord_underline_offset

double textord_underline_offset = 0.1

"Fraction of x to ignore"

Definition at line 21 of file underlin.cpp.