Package Bio :: Package NBRF :: Module nbrf_format
[hide private]
[frames] | no frames]

Source Code for Module Bio.NBRF.nbrf_format

 1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
 2  # This code is part of the Biopython distribution and governed by its 
 3  # license.  Please see the LICENSE file that should have been included 
 4  # as part of this package. 
 5   
 6  """Martel based parser to read NBRF formatted files. 
 7   
 8  This is a huge regular regular expression for NBRF, built using 
 9  the 'regular expressiona on steroids' capabilities of Martel. 
10   
11  http://www-nbrf.georgetown.edu/pirwww/pirhome.shtml 
12  """ 
13   
14  # Martel 
15  import Martel 
16  from Martel import Str 
17  from Martel import AnyEol, UntilEol 
18  from Martel import Group 
19  from Martel import Alt 
20  from Martel import Rep 
21  from Martel import Rep1 
22  from Martel import AnyBut 
23  from Martel import UntilSep 
24   
25  from Bio.NBRF.ValSeq import valid_sequence_dict 
26   
27  sequence_types = map( Str, valid_sequence_dict.keys() ) 
28  sequence_type = Group( "sequence_type", Alt( *sequence_types ) ) 
29  name_line = Martel.Group( "name_line", \ 
30      Str( ">" ) + 
31      sequence_type + 
32      Str( ";" ) + 
33      UntilEol("sequence_name") + 
34      AnyEol() ) 
35   
36  comment_line = UntilEol("comment") + AnyEol() 
37   
38  # 0x2a -- '*' 
39  # 10 -- '\n', 13 -- '\r' newline endings 
40  excluded_chars = chr(0x2a) + chr(10) + chr(13) 
41  # sequence lines with only sequence 
42  sequence_text = Group( "sequence_text", \ 
43      Martel.Rep1( AnyBut( excluded_chars ) ) ) 
44  sequence_line = Group( "sequence_line", sequence_text + 
45      AnyEol()) 
46  # the final line, has a '*' and potentially some sequence 
47  sequence_final_line = Group( "sequence_final_line", 
48          UntilSep("sequence_final_text", chr(0x2a)) + Str(chr(0x2a)) + 
49          Rep1(AnyEol())) 
50   
51  sequence_block = Group("sequence_block", Rep( sequence_line )) 
52  nbrf_record =  name_line + comment_line + sequence_block + sequence_final_line 
53