Package Bio :: Module FilteredReader
[hide private]
[frames] | no frames]

Source Code for Module Bio.FilteredReader

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code for more fancy file handles. 
  7   
  8  Classes: 
  9  Filtered is a decorator for File that allows the user to filter the output 
 10  on a line by line basis. 
 11   
 12  The FilteredReader module reads a file and applies a sequence of filters to the input 
 13  The constructor sets a default filter chain, but the user can select another filter by setting 
 14  Bio.FilteredReader.filter_chain. 
 15   
 16  handle = open( "filename" ) 
 17  filtered_reader = Bio.FilteredReader( handle ) 
 18  filtered_reader.filter_chain = [ remove_asterisks, replace_dot_with_dash ] 
 19  filtered_reasder.read() 
 20   
 21  All filters in the chain must provide the same interface with a line of text as the single 
 22  input parameter and altered text as the return value. 
 23   
 24  """ 
 25   
 26  import os 
 27  import string 
 28  import copy 
 29  from File import UndoHandle 
 30   
 31   
 32   
 33  """Used for debugging""" 
34 -def dump_saved( name, text, j ):
35 dump_file = open( name + '%d' % j, "w" ) 36 k = 0 37 for i in range ( 0, len( text ), 80 ): 38 dump_file.write( '%s\n' % text[ i : i + 80 ] ) 39 dump_file.close()
40
41 -def remove_leading_whitespace( line ):
42 return line.lstrip()
43 44
45 -def remove_empty_line( line ):
46 stripped_line = line.strip() 47 if( stripped_line ): 48 return line[ : ] 49 else: 50 return ''
51
52 -def remove_useless_dot( line ):
53 before = line 54 while( 1 ): 55 after = before.replace( "\t.\t", "\t\t" ) 56 if( len( before ) == len( after ) ): 57 break 58 before = after 59 if( after.endswith( '.' ) ): 60 after = after[ :-1 ] 61 return after
62
63 -def fix_punctuation( line ):
64 line = line.replace( "'", '' ) 65 line = line.replace( '"', '' ) 66 line = line.replace( ';', '\t' ) 67 line = line.replace( 'entryname', 'id' ) 68 # line = line.lower( ) 69 if( line ): 70 return line[ : ] 71 else: 72 return ''
73 74 75
76 -class FilteredReader:
77 - def __init__(self, handle ):
78 self._handle = handle 79 self._start_line = '' 80 self._debug_count = 0 81 self.filter_chain = [ remove_empty_line, remove_useless_dot, fix_punctuation ]
82
83 - def __getattr__(self, attr):
84 return getattr(self._handle, attr)
85 86 87
88 - def close(self, *args, **keywds ):
89 return self._handle.close( *args, **keywds)
90
91 - def read( self, *args, **keywds ):
92 line = '' 93 len_expected = self._get_len_expected( args, keywds ) 94 if( len_expected ): 95 filtered_text = self.read_block( len_expected ) 96 else: 97 filtered_text = self.read_to_end() 98 return filtered_text
99
100 - def read_block( self, len_expected ):
101 102 len_filtered = 0 103 len_adjusted -= len( self._start_line ) 104 filtered_text = '' 105 while( len_filtered < len_expected ): 106 107 text_read = self._handle.read( len_adjusted ) 108 full_text = self._start_line + text_read 109 lines = full_text.splitlines( 1 ) 110 if( text_read == '' ): 111 filtered_text = filtered_text + self.filter( lines ) 112 break 113 else: 114 all_but_last_line = lines[ :-1 ] 115 self._start_line = lines[ -1 ] 116 filtered_text = filtered_text + self.filter( all_but_last_line ) 117 len_filtered_text = len( filtered_text ) 118 len_adjusted = len_adjusted - len_filtered_text 119 return filtered_text[ : ]
120
121 - def read_to_end( self ):
122 filtered_text = '' 123 text_read = self._handle.read() 124 full_text = self._start_line + text_read 125 lines = full_text.splitlines( 1 ) 126 filtered_text += self.filter( lines[:] ) 127 return filtered_text[ : ]
128
129 - def _get_len_expected( self, args, keywds ):
130 131 if( len( args) > 0 ): 132 len_expected = args[ 0 ] 133 if( len_expected < 0 ): 134 len_expected = None 135 elif 'size' in keywds: 136 len_expected = keywds['size'] 137 else: 138 len_expected = None 139 return len_expected
140
141 - def filter( self, lines ):
142 filter_chain = self.filter_chain 143 filtered_text = '' 144 for line in lines: 145 for filter in filter_chain: 146 line = filter( *( line, ) ) 147 filtered_text += line 148 149 return filtered_text
150
151 -def has_trailing_linefeed( line ):
152 if( line.endswith( chr( 13 ) ) or \ 153 line.endswith( chr( 10 ) ) ): 154 return 1 155 else: 156 return 0
157