tesseract 5.2.0
Loading...
Searching...
No Matches
tesseract::ParagraphModelSmearer Class Reference

#include <paragraphs_internal.h>

Public Member Functions

 ParagraphModelSmearer (std::vector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
 
void Smear ()
 

Detailed Description

Definition at line 239 of file paragraphs_internal.h.

Constructor & Destructor Documentation

◆ ParagraphModelSmearer()

tesseract::ParagraphModelSmearer::ParagraphModelSmearer ( std::vector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory theory 
)

Definition at line 1363 of file paragraphs.cpp.

1365 : theory_(theory), rows_(rows), row_start_(row_start), row_end_(row_end) {
1366 if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
1367 row_start_ = 0;
1368 row_end_ = 0;
1369 return;
1370 }
1371 open_models_.resize(open_models_.size() + row_end - row_start + 2);
1372}

Member Function Documentation

◆ Smear()

void tesseract::ParagraphModelSmearer::Smear ( )

Definition at line 1407 of file paragraphs.cpp.

1407 {
1408 CalculateOpenModels(row_start_, row_end_);
1409
1410 // For each row which we're unsure about (that is, it is LT_UNKNOWN or
1411 // we have multiple LT_START hypotheses), see if there's a model that
1412 // was recently used (an "open" model) which might model it well.
1413 for (int i = row_start_; i < row_end_; i++) {
1414 RowScratchRegisters &row = (*rows_)[i];
1415 if (row.ri_->num_words == 0) {
1416 continue;
1417 }
1418
1419 // Step One:
1420 // Figure out if there are "open" models which are left-alined or
1421 // right-aligned. This is important for determining whether the
1422 // "first" word in a row would fit at the "end" of the previous row.
1423 bool left_align_open = false;
1424 bool right_align_open = false;
1425 for (auto &m : OpenModels(i)) {
1426 switch (m->justification()) {
1427 case JUSTIFICATION_LEFT:
1428 left_align_open = true;
1429 break;
1431 right_align_open = true;
1432 break;
1433 default:
1434 left_align_open = right_align_open = true;
1435 }
1436 }
1437 // Step Two:
1438 // Use that knowledge to figure out if this row is likely to
1439 // start a paragraph.
1440 bool likely_start;
1441 if (i == 0) {
1442 likely_start = true;
1443 } else {
1444 if ((left_align_open && right_align_open) || (!left_align_open && !right_align_open)) {
1445 likely_start = LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_LEFT) ||
1446 LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_RIGHT);
1447 } else if (left_align_open) {
1448 likely_start = LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_LEFT);
1449 } else {
1450 likely_start = LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_RIGHT);
1451 }
1452 }
1453
1454 // Step Three:
1455 // If this text line seems like an obvious first line of an
1456 // open model, or an obvious continuation of an existing
1457 // modelled paragraph, mark it up.
1458 if (likely_start) {
1459 // Add Start Hypotheses for all Open models that fit.
1460 for (unsigned m = 0; m < OpenModels(i).size(); m++) {
1461 if (ValidFirstLine(rows_, i, OpenModels(i)[m])) {
1462 row.AddStartLine(OpenModels(i)[m]);
1463 }
1464 }
1465 } else {
1466 // Add relevant body line hypotheses.
1467 SetOfModels last_line_models;
1468 if (i > 0) {
1469 (*rows_)[i - 1].StrongHypotheses(&last_line_models);
1470 } else {
1471 theory_->NonCenteredModels(&last_line_models);
1472 }
1473 for (auto model : last_line_models) {
1474 if (ValidBodyLine(rows_, i, model)) {
1475 row.AddBodyLine(model);
1476 }
1477 }
1478 }
1479
1480 // Step Four:
1481 // If we're still quite unsure about this line, go through all
1482 // models in our theory and see if this row could be the start
1483 // of any of our models.
1484 if (row.GetLineType() == LT_UNKNOWN ||
1485 (row.GetLineType() == LT_START && !row.UniqueStartHypothesis())) {
1486 SetOfModels all_models;
1487 theory_->NonCenteredModels(&all_models);
1488 for (auto &all_model : all_models) {
1489 if (ValidFirstLine(rows_, i, all_model)) {
1490 row.AddStartLine(all_model);
1491 }
1492 }
1493 }
1494 // Step Five:
1495 // Since we may have updated the hypotheses about this row, we need
1496 // to recalculate the Open models for the rest of rows[i + 1, row_end)
1497 if (row.GetLineType() != LT_UNKNOWN) {
1498 CalculateOpenModels(i + 1, row_end_);
1499 }
1500 }
1501}
@ JUSTIFICATION_LEFT
Definition: capi.h:121
@ JUSTIFICATION_RIGHT
Definition: capi.h:123
std::vector< const ParagraphModel * > SetOfModels
bool ValidBodyLine(const std::vector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
bool ValidFirstLine(const std::vector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
void NonCenteredModels(SetOfModels *models)

The documentation for this class was generated from the following files: