tesseract 5.2.0
Loading...
Searching...
No Matches
unicharset_extractor.cpp File Reference
#include <cstdlib>
#include "boxread.h"
#include "commandlineflags.h"
#include "commontraining.h"
#include "lang_model_helpers.h"
#include "normstrngs.h"
#include "unicharset.h"
#include "unicharset_training_utils.h"

Go to the source code of this file.

Namespaces

namespace  tesseract
 

Functions

int main (int argc, char **argv)
 

Function Documentation

◆ main()

int main ( int  argc,
char **  argv 
)

Definition at line 96 of file unicharset_extractor.cpp.

96 {
97 tesseract::CheckSharedLibraryVersion();
98 if (argc > 1) {
99 tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
100 }
101 if (argc < 2) {
102 tprintf(
103 "Usage: %s [--output_unicharset filename] [--norm_mode mode]"
104 " box_or_text_file [...]\n",
105 argv[0]);
106 tprintf("Where mode means:\n");
107 tprintf(" 1=combine graphemes (use for Latin and other simple scripts)\n");
108 tprintf(" 2=split graphemes (use for Indic/Khmer/Myanmar)\n");
109 tprintf(" 3=pure unicode (use for Arabic/Hebrew/Thai/Tibetan)\n");
110 tprintf("Reads box or plain text files to extract the unicharset.\n");
111 return EXIT_FAILURE;
112 }
113 return tesseract::Main(argc, argv);
114}
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
void tprintf(const char *format,...)
Definition: tprintf.cpp:41