1
2
3
4
5 """Parser for the MASE/IntelliGenetics alignment file format (DEPRECATED).
6
7 Please use Bio.SeqIO with the "ig" format instead."""
8
9 import warnings
10 warnings.warn("Bio.IntelliGenetics is deprecated." \
11 + " We hope the new 'ig' format support in Bio.SeqIO will be" \
12 + " suitable for most users. Please get in touch on the " \
13 + " mailing lists if this (or its removal) causes any problems "\
14 + "for you.",
15 DeprecationWarning)
16
17
18
19 import string
20 import array
21 import os
22 import re
23 import sgmllib
24 import urlparse
25
26
27 from xml.sax import handler
28
29
30 import Martel
31 from Martel import RecordReader
32
33 from Bio.ParserSupport import EventGenerator
34 from Bio.ParserSupport import AbstractConsumer
35 from Bio import File
36 import intelligenetics_format
37 import Record
39 """Iterator interface to move over a file of IntelliGenetics entries one at a time.
40 """
41 - def __init__(self, handle, parser = None):
42 """Initialize the iterator.
43
44 Arguments:
45 o handle - A handle with IntelliGenetics entries to iterate through.
46 o parser - An optional parser to pass the entries through before
47 returning them. If None, then the raw entry will be returned.
48 """
49 self.handle = File.UndoHandle( handle )
50 self._reader = IntelliGeneticsReader( self.handle )
51 self._parser = parser
52
54 """Return the next IntelliGenetics record from the handle.
55
56 Will return None if we ran out of records.
57 """
58 data = self._reader.next()
59
60 if self._parser is not None:
61 if data:
62 return self._parser.parse(File.StringHandle(data))
63
64 return data
65
67 return iter(self.next, None)
68
70 """Start up Martel to do the scanning of the file.
71
72 This initialzes the Martel based parser and connects it to a handler
73 that will generate events for a Feature Consumer.
74 """
76 """Initialize the scanner by setting up our caches.
77
78 Creating the parser takes a long time, so we want to cache it
79 to reduce parsing time.
80
81 Arguments:
82 o debug - The level of debugging that the parser should
83 display. Level 0 is no debugging, Level 2 displays the most
84 debugging info (but is much slower). See Martel documentation
85 for more info on this.
86 """
87
88
89 self.interest_tags = ["comment", "title_line", "sequence" ]
90
91
92 expression = Martel.select_names(intelligenetics_format.intelligenetics_record, self.interest_tags)
93 self._parser = expression.make_parser(debug_level = debug)
94
95 - def feed(self, handle, consumer):
96 """Feeed a set of data into the scanner.
97
98 Arguments:
99 o handle - A handle with the information to parse.
100 o consumer - The consumer that should be informed of events.
101 """
102 self._parser.setContentHandler( EventGenerator(consumer,
103 self.interest_tags))
104
105
106 self._parser.parseFile(handle)
107
109 """Create an IntelliGenetics Record object from scanner generated information.
110 """
113
114
117
121
125
127 """Parse IntelliGenetics files into Record objects
128 """
130 """Initialize the parser.
131
132 Arguments:
133 o debug_level - An optional argument that specifies the amount of
134 debugging information Martel should spit out. By default we have
135 no debugging info (the fastest way to do things), but if you want
136 you can set this as high as two and see exactly where a parse fails.
137 """
138 self._scanner = _Scanner(debug_level)
139
140 - def parse(self, handle):
141 """Parse the specified handle into a GenBank record.
142 """
143 self._consumer = _RecordConsumer()
144 self._scanner.feed(handle, self._consumer)
145 return self._consumer.data
146
148
151
153 infile = self.infile
154 state = 'COMMENT_STATE'
155 record = ''
156 while( state != 'DONE' ):
157 line = infile.readline()
158 if( line == '' ):
159 state = 'DONE'
160 break
161 if( line[ 0 ] == ';' ):
162 if( state == 'SEQUENCE_STATE' ):
163 state = 'DONE'
164 infile.saveline( line )
165 elif( state == 'COMMENT_STATE' ):
166 record = record + line
167 else:
168 if( state == 'COMMENT_STATE' ):
169 record = record + line
170 state = 'SEQUENCE_STATE'
171 elif( state == 'SEQUENCE_STATE' ):
172 record = record + line
173 return record
174