1
2
3
4
5
6
7
8
9
10
11
12
13 -def pr(data, start=0, end=None):
14 """
15 Pretty print a sequence of data items
16
17 @param data: the data stream to print
18 @type data: C{sequence} or C{iterator}
19 @param start: the start position
20 @type start: C{int}
21 @param end: the end position
22 @type end: C{int}
23 """
24 from pprint import pprint
25 from itertools import islice
26 pprint(list(islice(data, start, end)))
27
29 """
30 Pretty print a string, breaking lines on whitespace
31
32 @param s: the string to print, consisting of words and spaces
33 @type s: C{string}
34 @param width: the display width
35 @type width: C{int}
36 """
37 import re
38 while s:
39 s = s.strip()
40 try:
41 i = s[:width].rindex(' ')
42 except ValueError:
43 print s
44 return
45 print s[:i]
46 s = s[i:]
47
49 """
50 A very rudamentary sorted dictionary, whose main purpose is to
51 allow dictionaries to be displayed in a consistent order in
52 regression tests. keys(), items(), values(), iter*(), and
53 __repr__ all sort their return values before returning them.
54 (note that the sort order for values() does *not* correspond to
55 the sort order for keys(). I.e., zip(d.keys(), d.values()) is not
56 necessarily equal to d.items().
57 """
66 items = ['%s=%s' % t for t in sorted(self.items())]
67 return '{%s}' % ', '.join(items)
68
69
70
71
73 """
74 This implementation of a dictionary keeps track of the order
75 in which keys were inserted.
76 """
77
81
85
94
98
100 for i in self._keys:
101 yield i, self[i]
102
105
107 if len(self._keys) == 0:
108 raise KeyError('dictionary is empty')
109 else:
110 key = self._keys[-1]
111 val = self[key]
112 del self[key]
113 return key, val
114
119
125
127 for i in self._keys:
128 yield self[i]
129
130 - def move(self, key, index):
131
132 """ Move the specified to key to *before* the specified index. """
133
134 try:
135 cur = self._keys.index(key)
136 except ValueError:
137 raise KeyError(key)
138 self._keys.insert(index, key)
139
140 if cur >= index: cur = cur + 1
141 del self._keys[cur]
142
147
148
149
150
151
152
153
154
156 lev = []
157 for i in range(len1):
158 lev.append([0] * len2)
159 for i in range(len1):
160 lev[i][0] = i
161 for j in range(len2):
162 lev[0][j] = j
163 return lev
164
166 a = lev[i-1][j ] + 1
167 b = lev[i-1][j-1] + (c1 != c2)
168 c = lev[i ][j-1] + 1
169 lev[i][j] = min(a,b,c)
170
172 """
173 Calculate the Levenshtein edit-distance between two strings.
174 The edit distance is the number of characters that need to be
175 substituted, inserted, or deleted, to transform s1 into s2. For
176 example, transforming "rain" to "shine" requires three steps,
177 consisting of two substitutions and one insertion:
178 "rain" -> "sain" -> "shin" -> "shine". These operations could have
179 been done in other orders, but at least three steps are needed.
180
181 @param s1, s2: The strings to be analysed
182 @type s1, s2: C{string}
183 @rtype C{int}
184 """
185
186 len1 = len(s1); len2 = len(s2)
187 lev = _edit_dist_init(len1+1, len2+1)
188
189
190 for i in range(len1):
191 for j in range (len2):
192 _edit_dist_step(lev, i+1, j+1, s1[i], s2[j])
193 return lev[len1][len2]
194
195
196
197
198
199
201 """
202 Find contexts where more than one possible target value can
203 appear. E.g. if targets are word-initial letters, and contexts
204 are the remainders of words, then we would like to find cases like
205 "fat" vs "cat", and "training" vs "draining". If targets are
206 parts-of-speech and contexts are words, then we would like to find
207 cases like wind (noun) 'air in rapid motion', vs wind (verb)
208 'coil, wrap'.
209 """
211 """
212 Create a new minimal set.
213
214 @param parameters: The (context, target, display) tuples for the item
215 @type parameters: C{list} of C{tuple} of C{string}
216 """
217 self._targets = set()
218 self._contexts = set()
219 self._seen = {}
220 self._displays = {}
221
222 if parameters:
223 for context, target, display in parameters:
224 self.add(context, target, display)
225
226 - def add(self, context, target, display):
227 """
228 Add a new item to the minimal set, having the specified
229 context, target, and display form.
230
231 @param context: The context in which the item of interest appears
232 @type context: C{string}
233 @param target: The item of interest
234 @type target: C{string}
235 @param display: The information to be reported for each item
236 @type display: C{string}
237 """
238
239 if context not in self._seen:
240 self._seen[context] = set()
241 self._seen[context].add(target)
242
243
244 self._contexts.add(context)
245 self._targets.add(target)
246
247
248 self._displays[(context, target)] = display
249
250 - def contexts(self, minimum=2):
251 """
252 Determine which contexts occurred with enough distinct targets.
253
254 @param minimum: the minimum number of distinct target forms
255 @type minimum: C(int)
256 @rtype C(list)
257 """
258 return [c for c in self._contexts if len(self._seen[c]) >= minimum]
259
260 - def display(self, context, target, default=""):
261 if self._displays.has_key((context, target)):
262 return self._displays[(context, target)]
263 else:
264 return default
265
267 result = []
268 for target in self._targets:
269 x = self.display(context, target)
270 if x: result.append(x)
271 return result
272
275
276
277
278
279
280
281 import re
283 """
284 Search C{string} for substrings matching C{regexp} and wrap
285 the matches with braces. This is convenient for learning about
286 regular expressions.
287
288 @param regexp: The regular expression.
289 @param string: The string being matched.
290 @rtype: C{string}
291 @return: A string with braces surrounding the matched substrings.
292 """
293 print re.compile(regexp, re.M).sub("{\g<0>}", string.rstrip())
294
295
296
297
298
299
300
302 if hasattr(f, 'read'):
303 return f.read()
304 elif isinstance(f, basestring):
305 return open(f).read()
306 else:
307 raise ValueError, "Must be called with a filename or file-like object"
308
309
310
311
312
314 """
315 A counter that auto-increments each time its value is read.
316 """
318 self._value = initial_value
320 self._value += 1
321 return self._value
322
323
324
325
326
327
328
329
330
332 """A Trie is like a dictionary in that it maps keys to
333 values. However, because of the way keys are stored, it allows
334 look up based on the longest prefix that matches. Keys must be
335 strings.
336 """
337
339 if trie is None:
340 self._root = [None, {}, 0]
341 else:
342 self._root = trie
343
345 self._root = [None, {}, 0]
346
348 """Return True if the key is present and it's a leaf of the
349 Trie, False otherwise."""
350
351 curr_node = self._root
352 for char in key:
353 curr_node_1 = curr_node[1]
354 if char in curr_node_1:
355 curr_node = curr_node_1[char]
356 else:
357 return False
358 return curr_node[0] is not None
359
361 """Find as much of the key as one can, by using the longest
362 prefix that has a value. Return (value, remainder) where
363 remainder is the rest of the given string."""
364
365 curr_node = self._root
366 remainder = key
367 for char in key:
368 if char in curr_node[1]:
369 curr_node = curr_node[1][char]
370 else:
371 return curr_node[0], remainder
372 remainder = remainder[1:]
373 return curr_node[0], remainder
374
376 curr_node = self._root
377 for char in key:
378 curr_node = curr_node[1][char]
379 return Trie(trie=curr_node)
380
383
385 return self._root == other._root
386
388 curr_node = self._root
389 for char in key:
390 curr_node[2] += 1
391 curr_node = curr_node[1].setdefault(char, [None, {}, 0])
392 curr_node[0] = value
393 curr_node[2] += 1
394
396 """Return the value for the given key if it is present, raises
397 a KeyError if key not found, and return None if it is present
398 a key2 that starts with key."""
399
400 curr_node = self._root
401 for char in key:
402 curr_node = curr_node[1][char]
403 return curr_node[0]
404
406 """Return True if the key is present or if it is present a
407 key2 string that starts with key."""
408
409 curr_node = self._root
410 for char in key:
411 curr_node_1 = curr_node[1]
412 if char in curr_node_1:
413 curr_node = curr_node_1[char]
414 else:
415 return False
416 return True
417
419 return str(self._root)
420
422 return "Trie(%r)" % self._root
423
424
425
426
427
428
430 """Traverse the nodes of a tree in breadth-first order.
431 The first argument should be the tree root; children
432 should be a function taking as argument a tree node and
433 returning an iterator of the node's children.
434 """
435 yield tree
436 last = tree
437 if depth != 0:
438 for node in breadth_first(tree, children, depth-1):
439 for child in children(node):
440 yield child
441 last = child
442 if last == node:
443 return
444
445
446
447
448
449
450
451
452 import locale
454 """
455 Given a byte string, attempt to decode it.
456 Tries the standard 'UTF8' and 'latin-1' encodings,
457 Plus several gathered from locale information.
458
459 The calling program *must* first call
460 locale.setlocale(locale.LC_ALL, '')
461
462 If successful it returns
463 (decoded_unicode, successful_encoding)
464 If unsuccessful it raises a ``UnicodeError``
465 """
466 successful_encoding = None
467
468 encodings = ['utf-8']
469
470
471 try:
472 encodings.append(locale.nl_langinfo(locale.CODESET))
473 except AttributeError:
474 pass
475 try:
476 encodings.append(locale.getlocale()[1])
477 except (AttributeError, IndexError):
478 pass
479 try:
480 encodings.append(locale.getdefaultlocale()[1])
481 except (AttributeError, IndexError):
482 pass
483
484
485 encodings.append('latin-1')
486 for enc in encodings:
487
488
489 if not enc:
490 continue
491 try:
492 decoded = unicode(data, enc)
493 successful_encoding = enc
494
495 except (UnicodeError, LookupError):
496 pass
497 else:
498 break
499 if not successful_encoding:
500 raise UnicodeError(
501 'Unable to decode input data. Tried the following encodings: %s.'
502 % ', '.join([repr(enc) for enc in encodings if enc]))
503 else:
504 return (decoded, successful_encoding)
505