tesseract 5.2.0
Loading...
Searching...
No Matches
tesseract::TatweelTest Class Reference
Inheritance diagram for tesseract::TatweelTest:

Protected Member Functions

void SetUp () override
 
 TatweelTest ()
 
std::string TestDataNameToPath (const std::string &name)
 

Protected Attributes

UNICHARSET unicharset_
 

Detailed Description

Definition at line 35 of file tatweel_test.cc.

Constructor & Destructor Documentation

◆ TatweelTest()

tesseract::TatweelTest::TatweelTest ( )
inlineprotected

Definition at line 42 of file tatweel_test.cc.

42 {
43 std::string filename = TestDataNameToPath("ara.wordlist");
44 if (file_exists(filename.c_str())) {
45 std::string wordlist("\u0640");
46 CHECK_OK(file::GetContents(filename, &wordlist, file::Defaults()));
47 // Put all the unicodes in the unicharset_.
48 UnicodeText text;
49 text.PointToUTF8(wordlist.data(), wordlist.size());
50 int num_tatweel = 0;
51 for (auto it = text.begin(); it != text.end(); ++it) {
52 std::string utf8 = it.get_utf8_string();
53 if (utf8.find("\u0640") != std::string::npos)
54 ++num_tatweel;
55 unicharset_.unichar_insert(utf8.c_str());
56 }
57 LOG(INFO) << "Num tatweels in source data=" << num_tatweel;
58 EXPECT_GT(num_tatweel, 0);
59 }
60 }
@ INFO
Definition: log.h:28
#define CHECK_OK(test)
Definition: include_gunit.h:84
@ LOG
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
Definition: unicharset.cpp:654
static int Defaults()
Definition: include_gunit.h:61
static bool GetContents(const std::string &filename, std::string *out, int)
Definition: include_gunit.h:52
std::string TestDataNameToPath(const std::string &name)
Definition: tatweel_test.cc:62
const_iterator end() const
Definition: unicodetext.cc:412
UnicodeText & PointToUTF8(const char *utf8_buffer, int byte_length)
Definition: unicodetext.cc:254
const_iterator begin() const
Definition: unicodetext.cc:408

Member Function Documentation

◆ SetUp()

void tesseract::TatweelTest::SetUp ( )
inlineoverrideprotected

Definition at line 37 of file tatweel_test.cc.

37 {
38 static std::locale system_locale("");
39 std::locale::global(system_locale);
40 }

◆ TestDataNameToPath()

std::string tesseract::TatweelTest::TestDataNameToPath ( const std::string &  name)
inlineprotected

Definition at line 62 of file tatweel_test.cc.

62 {
63 return file::JoinPath(TESTDATA_DIR, name);
64 }
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:65

Member Data Documentation

◆ unicharset_

UNICHARSET tesseract::TatweelTest::unicharset_
protected

Definition at line 65 of file tatweel_test.cc.


The documentation for this class was generated from the following file: