Package nltk_lite :: Package contrib :: Package classifier_tests :: Module attributestests
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.classifier_tests.attributestests

  1  # Natural Language Toolkit 
  2  # 
  3  # Author: Sumukh Ghodke <sumukh dot ghodke at gmail dot com> 
  4  # 
  5  # URL: <http://nltk.sf.net> 
  6  # This software is distributed under GPL, for license information see LICENSE.TXT 
  7   
  8  from nltk_lite.contrib.classifier import attribute as a, discretisedattribute as da, numrange as nr, format 
  9  from nltk_lite.contrib.classifier_tests import * 
 10   
11 -class AttributesTestCase(unittest.TestCase):
12 - def setUp(self):
13 self.attrs = format.C45_FORMAT.get_attributes(datasetsDir(self) + 'test_phones' + SEP + 'phoney')
14
16 self.assertEqual(5, len(self.attrs), 'there should be 5 attributes')
17
19 self.assertTrue(self.attrs.has_values(['dual', 'big', 'symbian', 'y', 'y']))
20
22 self.assertEqual('band', self.attrs[0].name) 23 self.assertEqual('size', self.attrs[1].name) 24 self.assertEqual('os', self.attrs[2].name) 25 self.assertEqual('pda', self.attrs[3].name) 26 self.assertEqual('mp3', self.attrs[4].name)
27
29 self.assertTrue(self.attrs.__contains__(a.Attribute('band', ['dual','tri','quad'], 0)))
30
32 attrs = a.Attributes([a.Attribute('band', ['dual','tri','quad'], 0), a.Attribute('size', ['big','small','medium'], 1)]) 33 same = a.Attributes([a.Attribute('band', ['dual','tri','quad'], 0), a.Attribute('size', ['big','small','medium'], 1)]) 34 self.assertEqual(attrs, same, 'they should be the same') 35 other = a.Attributes([a.Attribute('band', ['dual','tri','quad'], 0), a.Attribute('pda', ['y','n'], 1)]) 36 self.assertNotEqual(self.attrs, other, 'shouldnt be the same')
37
39 for i in range(len(self.attrs)): 40 self.assertEqual(i, self.attrs[i].index)
41
43 has_cont = format.C45_FORMAT.get_attributes(datasetsDir(self) + 'numerical' + SEP + 'weather') 44 self.assertTrue(has_cont.has_continuous_attributes()) 45 46 all_disc = format.C45_FORMAT.get_attributes(datasetsDir(self) + 'test_phones' + SEP + 'phoney') 47 self.assertFalse(all_disc.has_continuous_attributes())
48
50 has_cont = format.C45_FORMAT.get_attributes(datasetsDir(self) + 'numerical' + SEP + 'weather') 51 self.assertTrue(has_cont.has_values(['sunny','21','normal','true']))
52
54 attrs = format.C45_FORMAT.get_attributes(datasetsDir(self) + 'numerical' + SEP + 'person') 55 subset = attrs.subset([2, 4, 5]) 56 self.assertEqual(3, len(subset)) 57 self.assertEqual(2, subset[0].index) 58 self.assertEqual(4, subset[1].index) 59 self.assertEqual(5, subset[2].index)
60
62 attrs = format.C45_FORMAT.get_attributes(datasetsDir(self) + 'numerical' + SEP + 'person') 63 self.assertTrue(attrs[0].is_continuous()) 64 self.assertTrue(attrs[4].is_continuous()) 65 self.assertTrue(attrs[6].is_continuous()) 66 self.assertTrue(attrs[7].is_continuous()) 67 68 attrs.discretise([da.DiscretisedAttribute('dependents', nr.Range(0, 2, True).split(2), 4), \ 69 da.DiscretisedAttribute('annualincome', nr.Range(0, 120000, True).split(5), 6)]) 70 71 self.assertFalse(attrs[4].is_continuous()) 72 self.assertFalse(attrs[6].is_continuous()) 73 74 self.assertTrue(attrs[0].is_continuous()) 75 self.assertTrue(attrs[7].is_continuous()) 76 77 self.assertEqual(['a', 'b'], attrs[4].values) 78 self.assertEqual(['a', 'b', 'c', 'd', 'e'], attrs[6].values)
79
81 path = datasetsDir(self) + 'numerical' + SEP + 'person' 82 attrs = format.C45_FORMAT.get_attributes(path) 83 klass = format.C45_FORMAT.get_klass(path) 84 85 decision_stumps = attrs.empty_decision_stumps([], klass) 86 self.assertEqual(8, len(decision_stumps)) 87 88 decision_stumps = attrs.empty_decision_stumps([attrs[0], attrs[3]], klass) 89 self.assertEqual(6, len(decision_stumps))
90
91 - def test_remove_attributes(self):
92 path = datasetsDir(self) + 'numerical' + SEP + 'person' 93 attrs = format.C45_FORMAT.get_attributes(path) 94 95 self.assertEqual(8, len(attrs)) 96 attr1 = attrs[1] 97 attrs.remove_attributes([attrs[0], attrs[6]]) 98 self.assertEqual(6, len(attrs)) 99 self.assertEqual(attr1, attrs[0])
100 101 if __name__ == '__main__': 102 runner = unittest.TextTestRunner() 103 runner.run(unittest.TestSuite(unittest.makeSuite(AttributesTestCase))) 104