Package Bio :: Package SeqIO :: Module IgIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqIO.IgIO

 1  # Copyright 2008 by Peter Cock.  All rights reserved. 
 2  # This code is part of the Biopython distribution and governed by its 
 3  # license.  Please see the LICENSE file that should have been included 
 4  # as part of this package. 
 5  # 
 6  # This module is for reading and writing IntelliGenetics format files as 
 7  # SeqRecord objects.  This file format appears to be the same as the MASE 
 8  # multiple sequence alignment format. 
 9   
10  """Bio.SeqIO support for the "ig" (IntelliGenetics or MASE) file format. 
11   
12  You are expected to use this module via the Bio.SeqIO functions.""" 
13   
14  from Bio.Alphabet import single_letter_alphabet 
15  from Bio.Seq import Seq 
16  from Bio.SeqRecord import SeqRecord 
17   
18  #This is a generator function! 
19 -def IgIterator(handle, alphabet = single_letter_alphabet) :
20 """Iterate over IntelliGenetics records (as SeqRecord objects). 21 22 handle - input file 23 alphabet - optional alphabet 24 25 The optional free format file header lines (which start with two 26 semi-colons) are ignored. 27 28 The free format commentary lines at the start of each record (which 29 start with a semi-colon) are recorded as a single string with embedded 30 new line characters in the SeqRecord's annotations dictionary under the 31 key 'comment'. 32 """ 33 #Skip any file header text before the first record (;; lines) 34 while True : 35 line = handle.readline() 36 if not line : break #Premature end of file, or just empty? 37 if not line.startswith(";;") : break 38 39 while line : 40 #Now iterate over the records 41 if line[0]!=";" : 42 raise ValueError( \ 43 "Records should start with ';' and not:\n%s" % repr(line)) 44 45 #Try and agree with SeqRecord convention from the GenBank parser, 46 #(and followed in the SwissProt parser) which stores the comments 47 #as a long string with newlines under annotations key 'comment'. 48 49 #Note some examples use "; ..." and others ";..." 50 comment_lines = [] 51 while line.startswith(";") : 52 #TODO - Extract identifier from lines like "LOCUS\tB_SF2"? 53 comment_lines.append(line[1:].strip()) 54 line = handle.readline() 55 title = line.rstrip() 56 57 seq_lines = [] 58 while True: 59 line = handle.readline() 60 if not line : break 61 if line[0] == ";": break 62 #Remove trailing whitespace, and any internal spaces 63 seq_lines.append(line.rstrip().replace(" ","")) 64 65 #Return the record and then continue... 66 record= SeqRecord(Seq("".join(seq_lines), alphabet), 67 id = title, name = title) 68 record.annotations['comment'] = "\n".join(comment_lines) 69 yield record 70 71 #We should be at the end of the file now 72 assert not line
73 74 if __name__ == "__main__" : 75 print "Running quick self test" 76 77 import os 78 for filename in os.listdir("../../Tests/Intelligenetics/") : 79 if os.path.splitext(filename)[-1] == ".txt" : 80 print 81 print filename 82 print "-"*len(filename) 83 handle = open(os.path.join("../../Tests/Intelligenetics/", filename)) 84 for record in IgIterator(handle) : 85 print record.id, len(record) 86 handle.close() 87 print "Done" 88