1
2
3
4
5
6
7
8
9
10 """
11 Utility functions for evaluating processing modules.
12 """
13
14 import sets, math
15
17 """
18 Given a list of reference values and a corresponding list of test
19 values, return the percentage of corresponding values that are
20 equal. In particular, return the percentage of indices
21 C{0<i<=len(test)} such that C{test[i] == reference[i]}.
22
23 @type reference: C{list}
24 @param reference: An ordered list of reference values.
25 @type test: C{list}
26 @param test: A list of values to compare against the corresponding
27 reference values.
28 @raise ValueError: If C{reference} and C{length} do not have the
29 same length.
30 """
31 if len(reference) != len(test):
32 raise ValueError("Lists must have the same length.")
33 num_correct = [1 for x,y in zip(reference, test) if x==y]
34 return float(len(num_correct)) / len(reference)
35
37 """
38 Given a set of reference values and a set of test values, return
39 the percentage of test values that appear in the reference set.
40 In particular, return |C{reference}S{cap}C{test}|/|C{test}|.
41 If C{test} is empty, then return C{None}.
42
43 @type reference: C{Set}
44 @param reference: A set of reference values.
45 @type test: C{Set}
46 @param test: A set of values to compare against the reference set.
47 @rtype: C{float} or C{None}
48 """
49 if len(test) == 0:
50 return None
51 else:
52 return float(len(reference.intersection(test)))/len(test)
53
55 """
56 Given a set of reference values and a set of test values, return
57 the percentage of reference values that appear in the test set.
58 In particular, return |C{reference}S{cap}C{test}|/|C{reference}|.
59 If C{reference} is empty, then return C{None}.
60
61 @type reference: C{Set}
62 @param reference: A set of reference values.
63 @type test: C{Set}
64 @param test: A set of values to compare against the reference set.
65 @rtype: C{float} or C{None}
66 """
67 if len(reference) == 0:
68 return None
69 else:
70 return float(len(reference.intersection(test)))/len(reference)
71
73 """
74 Given a set of reference values and a set of test values, return
75 the f-measure of the test values, when compared against the
76 reference values. The f-measure is the harmonic mean of the
77 L{precision} and L{recall}, weighted by C{alpha}. In particular,
78 given the precision M{p} and recall M{r} defined by:
79 - M{p} = |C{reference}S{cap}C{test}|/|C{test}|
80 - M{r} = |C{reference}S{cap}C{test}|/|C{reference}|
81 The f-measure is:
82 - 1/(C{alpha}/M{p} + (1-C{alpha})/M{r})
83
84 If either C{reference} or C{test} is empty, then C{f_measure}
85 returns C{None}.
86
87 @type reference: C{Set}
88 @param reference: A set of reference values.
89 @type test: C{Set}
90 @param test: A set of values to compare against the reference set.
91 @rtype: C{float} or C{None}
92 """
93 p = precision(reference, test)
94 r = recall(reference, test)
95 if p is None or r is None:
96 return None
97 if p == 0 or r == 0:
98 return 0
99 return 1.0/(alpha/p + (1-alpha)/r)
100
102 """
103 Given a list of reference values and a corresponding list of test
104 probability distributions, return the average log likelihood of
105 the reference values, given the probability distributions.
106
107 @param reference: A list of reference values
108 @type reference: C{list}
109 @param test: A list of probability distributions over values to
110 compare against the corresponding reference values.
111 @type test: C{list} of L{ProbDist}
112 """
113 if len(reference) != len(test):
114 raise ValueError("Lists must have the same length.")
115
116
117 total_likelihood = sum(dist.logprob(val)
118 for (val, dist) in zip(reference, test))
119 return total_likelihood/len(reference)
120
122 """
123 The confusion matrix between a list of reference values and a
124 corresponding list of test values. Entry [M{r},M{t}] of this
125 matrix is a count of the number of times that the reference value
126 M{r} corresponds to the test value M{t}. E.g.:
127
128 >>> ref = 'DET NN VB DET JJ NN NN IN DET NN'.split()
129 >>> test = 'DET VB VB DET NN NN NN IN DET NN'.split()
130 >>> cm = ConfusionMatrix(ref, test)
131 >>> print cm['NN', 'NN']
132 3
133
134 Note that the diagonal entries (M{Ri}=M{Tj}) of this matrix
135 corresponds to correct values; and the off-diagonal entries
136 correspond to incorrect values.
137 """
139 """
140 Construct a new confusion matrix from a list of reference
141 values and a corresponding list of test values.
142
143 @type reference: C{list}
144 @param reference: An ordered list of reference values.
145 @type test: C{list}
146 @param test: A list of values to compare against the
147 corresponding reference values.
148 @raise ValueError: If C{reference} and C{length} do not have
149 the same length.
150 """
151 if len(reference) != len(test):
152 raise ValueError('Lists must have the same length.')
153
154
155 values = sorted(set(reference+test))
156
157
158 indices = dict((val,i) for (i,val) in enumerate(values))
159
160
161 confusion = [[0 for val in values] for val in values]
162 max_conf = 0
163 for w,g in zip(reference, test):
164 confusion[indices[w]][indices[g]] += 1
165 max_conf = max(max_conf, confusion[indices[w]][indices[g]])
166
167
168 self._values = values
169
170 self._indices = indices
171
172 self._confusion = confusion
173
174 self._max_conf = 0
175
176 self._total = len(reference)
177
178 self._correct = sum(confusion[i][i] for i in range(len(values)))
179
181 """
182 @return: The number of times that value C{li} was expected and
183 value C{lj} was given.
184 @rtype: C{int}
185 """
186 i = self._indices[li]
187 j = self._indices[lj]
188 return self._confusion[i][j]
189
191 return '<ConfusionMatrix: %s/%s correct>' % (self._correct,
192 self._total)
193
196
197 - def pp(self, show_percents=False, values_in_chart=True):
198 """
199 @return: A multi-line string representation of this confusion
200 matrix.
201 @todo: add marginals?
202 """
203 confusion = self._confusion
204
205 if values_in_chart:
206 values = self._values
207 else:
208 values = range(len(self._values))
209
210
211 valuelen = max(len(str(val)) for val in values)
212 value_format = '%' + `valuelen` + 's |'
213
214 if show_percents:
215 entrylen = 6
216 entry_format = '%5.1f%%'
217 else:
218 entrylen = len(`self._max_conf`)
219 entry_format = '%' + `entrylen` + 'd'
220
221
222 value_strings = [str(val) for val in values]
223 s = ''
224 for i in range(valuelen):
225 s += (' '*valuelen)+' |'
226 for val in value_strings:
227 if i >= valuelen-len(val):
228 s += val[i-valuelen+len(val)].rjust(entrylen+1)
229 else:
230 s += ' '*(entrylen+1)
231 s += ' |\n'
232
233
234 s += '%s-+-%s+\n' % ('-'*valuelen, '-'*((entrylen+1)*len(values)))
235
236
237 for i in range(len(values)):
238 s += value_format % values[i]
239 for j in range(len(values)):
240 s += ' '
241 if show_percents:
242 s += entry_format % (100.0*confusion[i][j]/self._total)
243 else:
244 s += entry_format % confusion[i][j]
245 s += ' |\n'
246
247
248 s += '%s-+-%s+\n' % ('-'*valuelen, '-'*((entrylen+1)*len(values)))
249
250
251 s += '(row = reference; col = test)\n'
252 if not values_in_chart:
253 s += 'Value key:\n'
254 for i, value in enumerate(self._values):
255 s += '%6d: %s\n' % (i, value)
256
257 return s
258
260 values = self._values
261 str = 'Value key:\n'
262 indexlen = len(`len(values)-1`)
263 key_format = ' %'+`indexlen`+'d: %s\n'
264 for i in range(len(values)):
265 str += key_format % (i, values[i])
266
267 return str
268
270 print '-'*75
271 reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
272 test = 'DET VB VB DET NN NN NN IN DET NN'.split()
273 print 'Reference =', reference
274 print 'Test =', test
275 print 'Confusion matrix:'
276 print ConfusionMatrix(reference, test)
277 print 'Accuracy:', accuracy(reference, test)
278
279 print '-'*75
280 reference_set = sets.Set(reference)
281 test_set = sets.Set(test)
282 print 'Reference =', reference_set
283 print 'Test = ', test_set
284 print 'Precision:', precision(reference_set, test_set)
285 print ' Recall:', recall(reference_set, test_set)
286 print 'F-Measure:', f_measure(reference_set, test_set)
287 print '-'*75
288 if __name__ == '__main__':
289 demo()
290