1
2
3
4
5
6 """
7 This module provides code to work with the prosite.doc file from
8 Prosite.
9 http://www.expasy.ch/prosite/
10
11 Tested with:
12 Release 15.0, July 1998
13 Release 16.0, July 1999
14 Release 20.22, 13 November 2007
15
16
17 Functions:
18 parse Iterates over entries in a Prodoc file.
19 index_file Index a Prodoc file for a Dictionary.
20 _extract_record Extract Prodoc data from a web page.
21
22
23 Classes:
24 Record Holds Prodoc data.
25 Reference Holds data from a Prodoc reference.
26 Dictionary Accesses a Prodoc file using a dictionary interface.
27 RecordParser Parses a Prodoc record into a Record object.
28
29 _Scanner Scans Prodoc-formatted data.
30 _RecordConsumer Consumes Prodoc data to a Record object.
31 Iterator Iterates over entries in a Prodoc file; DEPRECATED.
32 """
33
34 from types import *
35 import os
36 import sgmllib
37 from Bio import File
38 from Bio import Index
39 from Bio.ParserSupport import *
40
52
61
62
63
64
65
67 """Holds information from a Prodoc record.
68
69 Members:
70 accession Accession number of the record.
71 prosite_refs List of tuples (prosite accession, prosite name).
72 text Free format text.
73 references List of reference objects.
74
75 """
77 self.accession = ''
78 self.prosite_refs = []
79 self.text = ''
80 self.references = []
81
83 """Holds information from a Prodoc citation.
84
85 Members:
86 number Number of the reference. (string)
87 authors Names of the authors.
88 citation Describes the citation.
89
90 """
92 self.number = ''
93 self.authors = ''
94 self.citation = ''
95
97 """Returns one record at a time from a Prodoc file.
98
99 Methods:
100 next Return the next record from the stream, or None.
101
102 """
103 - def __init__(self, handle, parser=None):
104 """__init__(self, handle, parser=None)
105
106 Create a new iterator. handle is a file-like object. parser
107 is an optional Parser object to change the results into another form.
108 If set to None, then the raw contents of the file will be returned.
109
110 """
111 import warnings
112 warnings.warn("Bio.Prosite.Prodoc.Iterator is deprecated; we recommend using the function Bio.Prosite.Prodoc.parse instead. Please contact the Biopython developers at biopython-dev@biopython.org you cannot use Bio.Prosite.Prodoc.parse instead of Bio.Prosite.Prodoc.Iterator.",
113 DeprecationWarning)
114 if type(handle) is not FileType and type(handle) is not InstanceType:
115 raise ValueError("I expected a file handle or file-like object")
116 self._uhandle = File.UndoHandle(handle)
117 self._parser = parser
118
120 """next(self) -> object
121
122 Return the next Prodoc record from the file. If no more records,
123 return None.
124
125 """
126 lines = []
127 while 1:
128 line = self._uhandle.readline()
129 if not line:
130 break
131 lines.append(line)
132 if line[:5] == '{END}':
133 break
134
135 if not lines:
136 return None
137
138 data = "".join(lines)
139 if self._parser is not None:
140 return self._parser.parse(File.StringHandle(data))
141 return data
142
144 return iter(self.next, None)
145
147 """Accesses a Prodoc file using a dictionary interface.
148
149 """
150 __filename_key = '__filename'
151
152 - def __init__(self, indexname, parser=None):
153 """__init__(self, indexname, parser=None)
154
155 Open a Prodoc Dictionary. indexname is the name of the
156 index for the dictionary. The index should have been created
157 using the index_file function. parser is an optional Parser
158 object to change the results into another form. If set to None,
159 then the raw contents of the file will be returned.
160
161 """
162 self._index = Index.Index(indexname)
163 self._handle = open(self._index[Dictionary.__filename_key])
164 self._parser = parser
165
167 return len(self._index)
168
176
178 return getattr(self._index, name)
179
181 """Access PRODOC at ExPASy using a read-only dictionary interface.
182
183 """
184 - def __init__(self, delay=5.0, parser=None):
185 """__init__(self, delay=5.0, parser=None)
186
187 Create a new Dictionary to access PRODOC. parser is an optional
188 parser (e.g. Prodoc.RecordParser) object to change the results
189 into another form. If set to None, then the raw contents of the
190 file will be returned. delay is the number of seconds to wait
191 between each query.
192
193 """
194 import warnings
195 warnings.warn("Bio.Prosite.Prodoc.ExPASyDictionary is deprecated. Please use the function Bio.ExPASy.get_prosite_raw instead.",
196 DeprecationWarning)
197
198 self.delay = delay
199 self.parser = parser
200 self.last_query_time = None
201
203 raise NotImplementedError("Prodoc contains lots of entries")
205 raise NotImplementedError("This is a read-only dictionary")
207 raise NotImplementedError("This is a read-only dictionary")
209 raise NotImplementedError("This is a read-only dictionary")
211 raise NotImplementedError("You don't need to do this...")
213 raise NotImplementedError("You don't really want to do this...")
215 raise NotImplementedError("You don't really want to do this...")
217 raise NotImplementedError("You don't really want to do this...")
218
220 """has_key(self, id) -> bool"""
221 try:
222 self[id]
223 except KeyError:
224 return 0
225 return 1
226
227 - def get(self, id, failobj=None):
228 try:
229 return self[id]
230 except KeyError:
231 return failobj
232
234 """__getitem__(self, id) -> object
235
236 Return a Prodoc entry. id is either the id or accession
237 for the entry. Raises a KeyError if there's an error.
238
239 """
240 import time
241 from Bio import ExPASy
242
243
244 if self.last_query_time is not None:
245 delay = self.last_query_time + self.delay - time.time()
246 if delay > 0.0:
247 time.sleep(delay)
248 self.last_query_time = time.time()
249
250 try:
251 handle = ExPASy.get_prodoc_entry(id)
252 except IOError:
253 raise KeyError(id)
254 try:
255 handle = File.StringHandle(_extract_record(handle))
256 except ValueError:
257 raise KeyError(id)
258
259 if self.parser is not None:
260 return self.parser.parse(handle)
261 return handle.read()
262
264 """Parses Prodoc data into a Record object.
265
266 """
270
271 - def parse(self, handle):
272 self._scanner.feed(handle, self._consumer)
273 return self._consumer.data
274
276 """Scans Prodoc-formatted data.
277
278 Tested with:
279 Release 15.0, July 1998
280
281 """
282 - def feed(self, handle, consumer):
283 """feed(self, handle, consumer)
284
285 Feed in Prodoc data for scanning. handle is a file-like
286 object that contains prosite data. consumer is a
287 Consumer object that will receive events as the report is scanned.
288
289 """
290 if isinstance(handle, File.UndoHandle):
291 uhandle = handle
292 else:
293 uhandle = File.UndoHandle(handle)
294
295 while 1:
296 line = uhandle.peekline()
297 if not line:
298 break
299 elif is_blank_line(line):
300
301 uhandle.readline()
302 continue
303 else:
304 self._scan_record(uhandle, consumer)
305
318
321
326
327 - def _scan_text(self, uhandle, consumer):
328 while 1:
329 line = safe_readline(uhandle)
330 if (line[0] == '[' and line[3] == ']' and line[4] == ' ') or \
331 line[:5] == '{END}':
332 uhandle.saveline(line)
333 break
334 consumer.text(line)
335
343
345
346
347 read_and_call_while(uhandle, consumer.noevent, blank=1)
348 if attempt_read_and_call(uhandle, consumer.noevent, start='+----'):
349 read_and_call_until(uhandle, consumer.noevent, start='+----')
350 read_and_call(uhandle, consumer.noevent, start='+----')
351 read_and_call_while(uhandle, consumer.noevent, blank=1)
352
354 """Consumer that converts a Prodoc record to a Record object.
355
356 Members:
357 data Record with Prodoc data.
358
359 """
362
365
368
370 line = line.rstrip()
371 if line[0] != '{' or line[-1] != '}':
372 raise ValueError("I don't understand accession line\n%s" % line)
373 acc = line[1:-1]
374 if acc[:4] != 'PDOC':
375 raise ValueError("Invalid accession in line\n%s" % line)
376 self.data.accession = acc
377
379 line = line.rstrip()
380 if line[0] != '{' or line[-1] != '}':
381 raise ValueError("I don't understand accession line\n%s" % line)
382 acc, name = line[1:-1].split('; ')
383 self.data.prosite_refs.append((acc, name))
384
385 - def text(self, line):
386 self.data.text = self.data.text + line
387
389 if line[0] == '[' and line[3] == ']':
390 self._ref = Reference()
391 self._ref.number = line[1:3].strip()
392 if line[1] == 'E':
393
394
395 self._ref.citation = line[4:].strip()
396 else:
397 self._ref.authors = line[4:].strip()
398 self.data.references.append(self._ref)
399 elif line[:4] == ' ':
400 if not self._ref:
401 raise ValueError("Unnumbered reference lines\n%s" % line)
402 self._ref.citation = self._ref.citation + line[5:]
403 else:
404 raise Exception("I don't understand the reference line\n%s" % line)
405
411
412 -def index_file(filename, indexname, rec2key=None):
413 """index_file(filename, indexname, rec2key=None)
414
415 Index a Prodoc file. filename is the name of the file.
416 indexname is the name of the dictionary. rec2key is an
417 optional callback that takes a Record and generates a unique key
418 (e.g. the accession number) for the record. If not specified,
419 the id name will be used.
420
421 """
422 import os
423 if not os.path.exists(filename):
424 raise ValueError("%s does not exist" % filename)
425
426 index = Index.Index(indexname, truncate=1)
427 index[Dictionary._Dictionary__filename_key] = filename
428
429 handle = open(filename)
430 records = parse(handle)
431 end = 0L
432 for record in records:
433 start = end
434 end = long(handle.tell())
435 length = end - start
436
437 if rec2key is not None:
438 key = rec2key(record)
439 else:
440 key = record.accession
441
442 if not key:
443 raise KeyError("empty key was produced")
444 elif key in index:
445 raise KeyError("duplicate key %s found" % key)
446
447 index[key] = start, length
448
449
450
452 """_extract_record(handle) -> str
453
454 Extract PRODOC data from a web page. Raises a ValueError if no
455 data was found in the web page.
456
457 """
458
459
460
461 class parser(sgmllib.SGMLParser):
462 def __init__(self):
463 sgmllib.SGMLParser.__init__(self)
464 self._in_pre = 0
465 self.data = []
466 def handle_data(self, data):
467 if self._in_pre:
468 self.data.append(data)
469 def do_br(self, attrs):
470 if self._in_pre:
471 self.data.append('\n')
472 def start_pre(self, attrs):
473 self._in_pre = 1
474 def end_pre(self):
475 self._in_pre = 0
476 p = parser()
477 p.feed(handle.read())
478 data = ''.join(p.data).lstrip()
479 if not data:
480 raise ValueError("No data found in web page.")
481 return data
482