1
2
3
4
5 """
6 Bio.SeqIO support module (not for general use).
7
8 Unless you are writing a new parser or writer for Bio.SeqIO, you should not
9 use this module. It provides base classes to try and simplify things.
10 """
11
12 from Bio.Alphabet import generic_alphabet
13
15 """Base class for building SeqRecord iterators.
16
17 You should write a next() method to return SeqRecord
18 objects. You may wish to redefine the __init__
19 method as well.
20 """
22 """Create a SequenceIterator object.
23
24 handle - input file
25 alphabet - optional, e.g. Bio.Alphabet.generic_protein
26
27 Note when subclassing:
28 - there should be a single non-optional argument,
29 the handle.
30 - you do not have to require an alphabet.
31 - you can add additional optional arguments."""
32 self.handle = handle
33 self.alphabet = alphabet
34
35
36
37
38
39
41 """Return the next record in the file.
42
43 This method should be replaced by any derived class to do something useful."""
44 raise NotImplementedError("This object should be subclassed")
45
46
47
48
49
50
52 """Iterate over the entries as a SeqRecord objects.
53
54 Example usage for Fasta files:
55
56 myFile = open("example.fasta","r")
57 myFastaReader = FastaIterator(myFile)
58 for record in myFastaReader :
59 print record.id
60 print record.seq
61 myFile.close()"""
62 return iter(self.next, None)
63
65 """Base class for any iterator of a non-sequential file type.
66
67 This object is not intended for use directly.
68
69 When writing a parser for any interlaced sequence file where the whole
70 file must be read in order to extract any single record, then you should
71 subclass this object.
72
73 All you need to do is to define your own:
74 (1) __init__ method to parse the file and call self.move_start()
75 (2) __len__ method to return the number of records
76 (3) __getitem__ to return any requested record.
77
78 This class will then provide the iterator methods including next(), but relies
79 on knowing the total number of records and tracking the pending record index in
80 as self._n
81
82 It is up to the subclassed object to decide if it wants to generate a cache of
83 SeqRecords when initialised, or simply use its own lists and dicts and create
84 SeqRecords on request.
85 """
86
88 """Create the object.
89
90 This method should be replaced by any derived class to do something useful."""
91
92 self.move_start()
93 raise NotImplementedError("This object method should be subclassed")
94
95
96
97
99 """Return the number of records.
100
101 This method should be replaced by any derived class to do something useful."""
102 raise NotImplementedError("This object method should be subclassed")
103
104
105
106
108 """Return the requested record.
109
110 This method should be replaced by any derived class to do something
111 useful.
112
113 It should NOT touch the value of self._n"""
114 raise NotImplementedError("This object method should be subclassed")
115
116
117
118
121
123 next_record = self._n
124 if next_record < len(self) :
125 self._n = next_record+1
126 return self[next_record]
127 else :
128
129 return None
130
132 return iter(self.next, None)
133
135 """This class should be subclassed.
136
137 Interlaced file formats (e.g. Clustal) should subclass directly.
138
139 Sequential file formats (e.g. Fasta, GenBank) should subclass
140 the SequentialSequenceWriter class instead.
141 """
143 """Creates the writer object.
144
145 Use the method write_file() to actually record your sequence records."""
146 self.handle = handle
147
149 """Use this to avoid getting newlines in the output."""
150 answer = text
151 for x in ["\n", "\r"] :
152 answer = answer.replace(x, " ")
153 return answer.replace(" ", " ")
154
156 """Use this to write an entire file containing the given records.
157
158 records - A list or iterator returning SeqRecord objects
159
160 Should return the number of records (as an integer).
161
162 This method can only be called once."""
163
164 raise NotImplementedError("This object should be subclassed")
165
166
167
168
170 """This class should be subclassed.
171
172 It is intended for sequential file formats with an (optional)
173 header, repeated records, and an (optional) footer.
174
175 In this case (as with interlaced file formats), the user may
176 simply call the write_file() method and be done.
177
178 However, they may also call the write_header(), followed
179 by multiple calls to write_record() and/or write_records()
180 followed finally by write_footer().
181
182 Users must call write_header() and write_footer() even when
183 the file format concerned doesn't have a header or footer.
184 This is to try and make life as easy as possible when
185 switching the output format.
186
187 Note that write_header() cannot require any assumptions about
188 the number of records.
189 """
195
197 assert not self._header_written, "You have aleady called write_header()"
198 assert not self._record_written, "You have aleady called write_record() or write_records()"
199 assert not self._footer_written, "You have aleady called write_footer()"
200 self._header_written = True
201
207
209 """Write a single record to the output file.
210
211 record - a SeqRecord object
212
213 Once you have called write_header() you can call write_record()
214 and/or write_records() as many times as needed. Then call
215 write_footer() and close()."""
216 assert self._header_written, "You must call write_header() first"
217 assert not self._footer_written, "You have already called write_footer()"
218 self._record_written = True
219 raise NotImplementedError("This object should be subclassed")
220
221
222
223
225 """Write multiple record to the output file.
226
227 records - A list or iterator returning SeqRecord objects
228
229 Once you have called write_header() you can call write_record()
230 and/or write_records() as many times as needed. Then call
231 write_footer() and close().
232
233 Returns the number of records written.
234 """
235
236 assert self._header_written, "You must call write_header() first"
237 assert not self._footer_written, "You have already called write_footer()"
238 count = 0
239 for record in records :
240 self.write_record(record)
241 count += 1
242
243 self._record_written = True
244 return count
245
247 """Use this to write an entire file containing the given records.
248
249 records - A list or iterator returning SeqRecord objects
250
251 This method can only be called once. Returns the number of records
252 written.
253 """
254 self.write_header()
255 count = self.write_records(records)
256 self.write_footer()
257 return count
258