Package Bio :: Package SeqIO :: Module TabIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqIO.TabIO

 1  # Copyright 2008 by Peter Cock.  All rights reserved. 
 2  # This code is part of the Biopython distribution and governed by its 
 3  # license.  Please see the LICENSE file that should have been included 
 4  # as part of this package. 
 5   
 6  """Bio.SeqIO support for the "tab" (simple tab separated) file format. 
 7   
 8  You are expected to use this module via the Bio.SeqIO functions. 
 9   
10  The "tab" format is an ad-hoc plain text file format where each sequence is 
11  on one (long) line.  Each line contains the identifier/description, followed 
12  by a tab, followed by the sequence.  For example, consider the following 
13  short FASTA format file: 
14   
15  >ID123456 possible binding site? 
16  CATCNAGATGACACTACGACTACGACTCAGACTAC 
17  >ID123457 random sequence 
18  ACACTACGACTACGACTCAGACTACAAN 
19   
20  Apart from the descriptions, this can be represented in the simple two column 
21  tab separated format as follows: 
22   
23  ID123456(tab)CATCNAGATGACACTACGACTACGACTCAGACTAC 
24  ID123457(tab)ACACTACGACTACGACTCAGACTACAAN 
25   
26  When reading this file, "ID123456" or "ID123457" will be taken as the record's 
27  .id and .name property.  There is no other information to record. 
28   
29  Similarly, when writing to this format, Biopython will ONLY record the record's 
30  .id and .seq (and not the description or any other information) as in the example 
31  above. 
32  """ 
33   
34  from Bio.Alphabet import single_letter_alphabet 
35  from Bio.Seq import Seq 
36  from Bio.SeqRecord import SeqRecord 
37  from Interfaces import SequentialSequenceWriter 
38   
39  #This is a generator function! 
40 -def TabIterator(handle, alphabet = single_letter_alphabet) :
41 """Iterates over tab separated lines (as SeqRecord objects). 42 43 Each line of the file should contain one tab only, dividing the line 44 into an identifier and the full sequence. 45 46 handle - input file 47 alphabet - optional alphabet 48 49 The first field is taken as the record's .id and .name (regardless of 50 any spaces within the text) and the second field is the sequence. 51 """ 52 for line in handle : 53 title, seq = line.split("\t") #will fail if more than one tab! 54 title = title.strip() 55 seq = seq.strip() #removes the trailing new line 56 yield SeqRecord(Seq(seq, alphabet), id = title, name = title)
57
58 -class TabWriter(SequentialSequenceWriter):
59 """Class to write simple tab separated format files. 60 61 Each line consists of "id(tab)sequence" only. 62 63 Any description, name or other annotation is not recorded. 64 """
65 - def write_record(self, record):
66 """Write a single tab line to the file.""" 67 assert self._header_written 68 assert not self._footer_written 69 self._record_written = True 70 71 title = self.clean(record.id) 72 seq = record.seq.tostring() 73 assert "\t" not in title 74 assert "\n" not in title 75 assert "\r" not in title 76 assert "\t" not in seq 77 assert "\n" not in seq 78 assert "\r" not in seq 79 self.handle.write("%s\t%s\n" % (title, seq))
80