Package nltk_lite :: Package contrib :: Package classifier :: Module commandline
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.classifier.commandline

  1  # Natural Language Toolkit CommandLine 
  2  #     understands the command line interaction 
  3  # Author: Sumukh Ghodke <sumukh dot ghodke at gmail dot com> 
  4  # 
  5  # URL: <http://nltk.sf.net> 
  6  # This software is distributed under GPL, for license information see LICENSE.TXT 
  7  from optparse import OptionParser 
  8  from nltk_lite.contrib.classifier.exceptions import filenotfounderror as fnf, invaliddataerror as inv 
  9  from nltk_lite.contrib.classifier import format 
 10   
 11  D_help = "Used to specify the data format.                      " \ 
 12          + "Options: C45 for C4.5 format.                        " \ 
 13          + "Default: C45.                                        " 
 14   
 15   
 16  ALGORITHM = 'algorithm' 
 17  FILES = 'files' 
 18  TRAINING = 'training' 
 19  TEST = 'test' 
 20  GOLD = 'gold' 
 21  DATA_FORMAT = 'data_format' 
 22   
 23  C45_FORMAT = 'C45'  
 24   
 25  DATA_FORMAT_MAPPINGS = {C45_FORMAT: format.C45_FORMAT} 
 26   
27 -class CommandLineInterface(OptionParser):
28 - def __init__(self, alg_choices, alg_default, a_help, f_help, t_help, T_help, g_help):
29 OptionParser.__init__(self) 30 self.add_option("-a", "--algorithm", dest=ALGORITHM, type="choice", \ 31 choices=alg_choices, default=alg_default, help= a_help) 32 self.add_option("-f", "--files", dest=FILES, type="string", help=f_help) 33 self.add_option("-t", "--training-file", dest=TRAINING, type="string", help=t_help) 34 self.add_option("-T", "--test-file", dest=TEST, type="string", help=T_help) 35 self.add_option("-g", "--gold-file", dest=GOLD, type="string", help=g_help) 36 37 self.add_option("-D", "--data-format", dest=DATA_FORMAT, type="choice", choices=DATA_FORMAT_MAPPINGS.keys(), \ 38 default=C45_FORMAT, help=D_help)
39
40 - def get_value(self, name):
41 return self.values.ensure_value(name, None)
42
43 - def parse(self, args):
44 """ 45 method to aid testing 46 """ 47 self.parse_args(args, None)
48
49 - def execute(self):
50 """ 51 Stores values from arguments which are common to all command line interfaces 52 """ 53 self.algorithm = self.get_value(ALGORITHM) 54 self.files = self.get_value(FILES) 55 self.training_path = self.get_value(TRAINING) 56 self.test_path = self.get_value(TEST) 57 self.gold_path = self.get_value(GOLD) 58 self.data_format = DATA_FORMAT_MAPPINGS[self.get_value(DATA_FORMAT)]
59
60 - def run(self, args):
61 """ 62 Main method which delegates all the work 63 """ 64 self.parse(args) 65 self.execute()
66
68 if self.algorithm is None or self.files is None and (self.training_path is None or (self.test_path is None and self.gold_path is None)): 69 self.required_arguments_not_present_error()
70
72 if self.files is not None and (self.training_path is not None or self.test_path is not None or self.gold_path is not None): 73 self.error("Invalid arguments. The files argument cannot exist with training, test or gold arguments.")
74
75 - def get_instances(self, training_path, test_path, gold_path, ignore_missing = False):
76 test = gold = None 77 training = self.data_format.get_training_instances(training_path) 78 attributes = self.data_format.get_attributes(training_path) 79 klass = self.data_format.get_klass(training_path) 80 test = self.__get_instance(self.data_format.get_test_instances, test_path, ignore_missing) 81 gold = self.__get_instance(self.data_format.get_gold_instances, gold_path, ignore_missing) 82 return [training, attributes, klass, test, gold]
83
84 - def __get_instance(self, method, path, ignore_if_missing):
85 if path is not None: 86 if ignore_if_missing: 87 try: 88 return method(path) 89 except fnf.FileNotFoundError: 90 return None 91 return method(path) 92 return None
93
95 self.error("Invalid arguments. One or more required arguments are not present.")
96
97 - def write_to_file(self, suffix, training, attributes, klass, test, gold):
98 files_written = [] 99 files_written.append(self.data_format.write_training_to_file(training, self.training_path + suffix)) 100 if test is not None: files_written.append(self.data_format.write_test_to_file(test, self.test_path + suffix)) 101 if gold is not None: files_written.append(self.data_format.write_gold_to_file(gold, self.gold_path + suffix)) 102 files_written.append(self.data_format.write_metadata_to_file(attributes, klass, self.training_path + suffix)) 103 return files_written
104
105 -def as_integers(name, str_array):
106 indices = [] 107 if str_array is not None: 108 for element in str_array.split(','): 109 try: 110 indices.append(int(element.strip())) 111 except ValueError: 112 raise inv.InvalidDataError('Invalid Data. ' + name + ' should be integers.') 113 return indices
114