Package Bio :: Package Saf
[hide private]
[frames] | no frames]

Source Code for Package Bio.Saf

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Parser for SAF (Simple Alignment Format). 
  6   
  7  This is a fairly liberal multiple sequence alignment format, where 
  8  record names may contain up to 14 characters and no blanks.  Lines 
  9  beginging with a hash (#) are ignored. 
 10   
 11  SAF has been described as a simplified subset of MSF, dropping the 
 12  checksum and with more flexibility in terms of line length. 
 13   
 14  A current URL describing this file format is: 
 15  http://www.predictprotein.org/Dexa/optin_saf.html 
 16   
 17  This appears to replace the old URL of: 
 18  http://www.embl-heidelberg.de/predictprotein/Dexa/optin_safDes.html 
 19  """ 
 20   
 21  import warnings 
 22  warnings.warn("Bio.Saf has been deprecated, due to problems with Martel"\ 
 23                +" and recent versions of mxTextTools. If you want to"\ 
 24                +" continue to use this module (or read this file format),"\ 
 25                +" please get touch to avoid permanent removal of this"\ 
 26                +" module from Biopython.", DeprecationWarning) 
 27   
 28  # Martel 
 29  import Martel 
 30  from Martel import RecordReader 
 31  from Martel import Dispatch 
 32   
 33   
 34  from Bio import File 
 35  import saf_format 
 36  import Record 
 37   
 38   
39 -class Iterator:
40 """Iterator interface to move over a file of Saf entries one at a time. 41 """
42 - def __init__(self, handle, parser = None):
43 """Initialize the iterator. 44 45 Arguments: 46 o handle - A handle with Saf entries to iterate through. 47 o parser - An optional parser to pass the entries through before 48 returning them. If None, then the raw entry will be returned. 49 """ 50 self.handle = File.UndoHandle( handle ) 51 self._reader = RecordReader.Everything( self.handle ) 52 self._parser = parser
53
54 - def next(self):
55 """Return the next Saf record from the handle. 56 57 Will return None if we ran out of records. 58 """ 59 data = self._reader.next() 60 61 if self._parser is not None: 62 if data: 63 return self._parser.parse(File.StringHandle(data)) 64 65 return data
66
67 - def __iter__(self):
68 return iter(self.next, None)
69
70 -class _Scanner:
71 """Start up Martel to do the scanning of the file. 72 73 This initialzes the Martel based parser and connects it to a handler 74 that will generate events for a Feature Consumer. 75 """
76 - def __init__(self, debug = 0):
77 """Initialize the scanner by setting up our caches. 78 79 Creating the parser takes a long time, so we want to cache it 80 to reduce parsing time. 81 82 Arguments: 83 o debug - The level of debugging that the parser should 84 display. Level 0 is no debugging, Level 2 displays the most 85 debugging info (but is much slower). See Martel documentation 86 for more info on this. 87 """ 88 # a listing of all tags we are interested in scanning for 89 # in the MartelParser 90 self.interest_tags = [ 'candidate_line', 'saf_record' ] 91 92 # make a parser that returns only the tags we are interested in 93 expression = Martel.select_names( saf_format.saf_record, self.interest_tags) 94 self._parser = expression.make_parser(debug_level = debug)
95
96 - def feed(self, handle, consumer):
97 """Feed a set of data into the scanner. 98 99 Arguments: 100 o handle - A handle with the information to parse. 101 o consumer - The consumer that should be informed of events. 102 """ 103 consumer.set_interest_tags( self.interest_tags ) 104 self._parser.setContentHandler( consumer ) 105 # self._parser.setErrorHandler(handle.ErrorHandler()) 106 107 self._parser.parseFile(handle)
108
109 -class _RecordConsumer( Dispatch.Dispatcher ):
110 """Create a Saf Record object from scanner generated information. 111 """
112 - def __init__(self ):
113 Dispatch.Dispatcher.__init__( self ) 114 self.data = Record.Record() 115 self._refresh()
116
117 - def _refresh( self ):
118 self._sequences = {} 119 self._names = {} 120 self._history = [] 121 self._guide = '' 122 self._ref_length = 0 123 self._ordinal = 0
124
125 - def set_interest_tags( self, interest_tags ):
126 self.interest_tags = interest_tags
127
128 - def startDocument(self):
129 self.data = Record.Record() 130 self._refresh()
131 132
133 - def start_candidate_line(self, name, attrs):
134 self.save_characters()
135
136 - def end_candidate_line(self, candidate_lines ):
137 candidate_line = self.get_characters() 138 name = candidate_line.split( ' ' )[ 0 ] 139 sequence = candidate_line[ len( name ): ] 140 name = name.strip() 141 sequence = sequence.replace( " ", "" ) 142 if( self._guide == '' ): 143 self._guide = name 144 self._ref_length = len( sequence ) 145 elif( name == self._guide ): 146 history = [] 147 self._ref_length = len( sequence ) 148 try: 149 self._history.index( name ) 150 except ValueError: 151 self._names[ self._ordinal ] = name 152 self._ordinal = self._ordinal + 1 153 self._history.append( name ) 154 sequence = sequence.strip() 155 try: 156 sequence = self._sequences[ name ] + sequence 157 except KeyError: 158 pass 159 self._sequences[ name ] = sequence
160
161 - def start_saf_record( self, sequence, attrs ):
162 self._sequences = {}
163
164 - def end_saf_record( self, saf_record ):
165 ordinals = self._names.keys() 166 ordinals.sort() 167 for ordinal in ordinals: 168 name = self._names[ ordinal ] 169 sequence = self._sequences[ name ] 170 self.data.alignment.add_sequence( name, sequence ) 171 self._refresh()
172
173 -class RecordParser:
174 """Parse Saf files into Record objects. 175 """
176 - def __init__(self, debug_level = 0):
177 """Initialize the parser. 178 179 Arguments: 180 o debug_level - An optional argument that specifies the amount of 181 debugging information Martel should spit out. By default we have 182 no debugging info (the fastest way to do things), but if you want 183 you can set this as high as two and see exactly where a parse fails. 184 """ 185 self._scanner = _Scanner(debug_level)
186
187 - def parse(self, handle):
188 """Parse the specified handle into a SAF record. 189 """ 190 self._consumer = _RecordConsumer() 191 self._scanner.feed(handle, self._consumer) 192 return self._consumer.data
193