Package Martel :: Module IterParser
[hide private]
[frames] | no frames]

Source Code for Module Martel.IterParser

  1  # Copyright 2000-2001, Dalke Scientific Software, LLC 
  2  # Distributed under the Biopython License Agreement (see the LICENSE file). 
  3   
  4  """Implement Martel parsers. 
  5   
  6  The classes in this module are used by other Martel modules and not 
  7  typically by external users. 
  8   
  9  There are two major parsers, 'Parser' and 'RecordParser.'  The first 
 10  is the standard one, which parses the file as one string in memory 
 11  then generates the SAX events.  The other reads a record at a time 
 12  using a RecordReader and generates events after each read.  The 
 13  generated event callbacks are identical. 
 14   
 15  At some level, both parsers use "_do_callback" to convert mxTextTools 
 16  tags into SAX events. 
 17   
 18  XXX finish this documentation 
 19   
 20  XXX need a better way to get closer to the likely error position when 
 21  parsing. 
 22   
 23  XXX need to implement Locator 
 24   
 25  """ 
 26  import urllib, traceback, sys 
 27  from xml.sax import handler, saxutils 
 28  import Parser, RecordReader 
 29   
 30  try: 
 31      from cStringIO import StringIO 
 32  except ImportError: 
 33      from StringIO import StringIO 
 34   
 35   
36 -class IterRecords:
37 - def __init__(self, record_parser, make_reader, reader_args, marker_tag):
38 self.record_parser = record_parser 39 self.make_reader = make_reader 40 self.reader_args = reader_args 41 self.marker_tag = marker_tag
42
43 - def copy(self):
44 return IterRecords(self.record_parser.copy(), 45 self.make_reader, 46 self.reader_args, 47 self.marker_tag)
48
49 - def iterate(self, source, cont_handler = None):
50 source = saxutils.prepare_input_source(source) 51 file = source.getCharacterStream() or source.getByteStream() 52 return self.iterateFile(file, cont_handler)
53
54 - def iterateString(self, s, cont_handler = None):
55 return self.iterateFile(StringIO(s), cont_handler)
56
57 - def iterateFile(self, fileobj, cont_handler = None):
58 self.start_position = 0 59 if cont_handler is None: 60 import LAX 61 cont_handler = LAX.LAX() 62 self.record_parser.setContentHandler(cont_handler) 63 64 reader = self.make_reader(fileobj, *self.reader_args) 65 while 1: 66 try: 67 rec = reader.next() 68 except RecordReader.ReaderError: 69 raise Parser.ParserPositionException(self.start_position) 70 if rec is None: 71 break 72 self.end_position = self.start_position + len(rec) 73 try: 74 self.record_parser.parseString(rec) 75 except Parser.ParserPositionException, exc: 76 exc += self.start_position 77 raise 78 79 yield cont_handler 80 self.start_position = self.end_position 81 82 fileobj, lookahead = reader.remainder() 83 if lookahead or fileobj.read(1): 84 raise Parser.ParserPositionException(self.start_position)
85 86
87 -class IterHeaderFooter:
88 - def __init__(self, 89 header_parser, make_header_reader, header_args, 90 record_parser, make_record_reader, record_args, 91 footer_parser, make_footer_reader, footer_args, 92 marker_tag):
93 self.header_parser = header_parser 94 self.make_header_reader = make_header_reader 95 self.header_args = header_args 96 97 self.record_parser = record_parser 98 self.make_record_reader = make_record_reader 99 self.record_args = record_args 100 101 self.footer_parser = footer_parser 102 self.make_footer_reader = make_footer_reader 103 self.footer_args = footer_args 104 105 self.marker_tag = marker_tag
106
107 - def copy(self):
108 header_parser = self.header_parser 109 if header_parser is not None: 110 header_parser = header_parser.copy() 111 record_parser = self.record_parser.copy() 112 footer_parser = self.footer_parser 113 if footer_parser is not None: 114 footer_parser = footer_parser.copy() 115 116 return IterHeaderFooter( 117 header_parser, self.make_header_reader, self.header_args, 118 record_parser, self.make_record_reader, self.record_args, 119 footer_parser, self.make_footer_reader, self.footer_args, 120 self.marker_tag)
121
122 - def iterate(self, source, cont_handler = None):
123 """parse using the URL or file handle""" 124 source = saxutils.prepare_input_source(source) 125 file = source.getCharacterStream() or source.getByteStream() 126 return self.iterateFile(file, cont_handler)
127
128 - def iterateString(self, s, cont_handler = None):
129 return self.iterateFile(StringIO(s), cont_handler)
130
131 - def iterateFile(self, fileobj, cont_handler = None):
132 self.start_position = self.end_position = 0 133 if cont_handler is None: 134 import LAX 135 cont_handler = LAX.LAX() 136 self.record_parser.setContentHandler(cont_handler) 137 138 lookahead = "" 139 140 # By construction, we never need events from the header 141 # nor from the footer 142 if self.header_parser is not None: 143 reader = self.make_header_reader(fileobj, *self.header_args, 144 **{"lookahead": lookahead}) 145 try: 146 rec = reader.next() 147 except RecordReader.ReaderError: 148 raise Parser.ParserPositionException(self.start_position) 149 self.end_position = self.start_position + len(rec) 150 self.header_parser.parseString(rec) 151 self.start_position = self.end_position 152 fileobj, lookahead = reader.remainder() 153 154 reader = self.make_record_reader(fileobj, *self.record_args, 155 **{"lookahead": lookahead}) 156 157 if not self.footer_parser: 158 while 1: 159 try: 160 rec = reader.next() 161 except RecordReader.ReaderError: 162 raise Parser.ParserPositionException(self.start_position) 163 if rec is None: 164 break 165 self.end_position = self.start_position + len(rec) 166 try: 167 self.record_parser.parseString(rec) 168 except Parser.ParserPositionException, exc: 169 exc += self.start_position 170 raise 171 yield cont_handler 172 self.start_position = self.end_position 173 return 174 175 # This one is tedious 176 while 1: 177 try: 178 rec = reader.next() 179 except RecordReader.ReaderError: 180 # we may have stumbled into the footer 181 fileobj, lookahead = reader.remainder() 182 break 183 184 if not rec: 185 # maybe there's a footer left 186 fileobj, lookahead = reader.remainder() 187 break 188 189 try: 190 self.record_parser.parseString(rec) 191 except Parser.ParserException: 192 # we may have tried to parse the footer 193 fileobj, lookahead = reader.remainder() 194 lookahead = rec + lookahead 195 break 196 self.end_position = self.start_position + len(rec) 197 yield cont_handler 198 self.start_position = self.end_position 199 200 # Try to read the footer 201 reader = self.make_footer_reader(fileobj, *self.footer_args, 202 **{"lookahead": lookahead}) 203 try: 204 rec = reader.next() 205 except RecordReader.ReaderError: 206 raise Parser.ParserPositionException(self.start_position) 207 208 if rec is None: 209 # Could read any footer 210 raise Parser.ParserPositionException(self.start_position) 211 212 try: 213 self.footer_parser.parseString(rec) 214 except Parser.ParserPositionException, exc: 215 exc += self.start_position 216 raise 217 self.end_position = self.start_position + len(rec) 218 self.start_position = self.end_position 219 220 fileobj, lookahead = reader.remainder() 221 if lookahead or fileobj.read(1): 222 raise Parser.ParserIncompleteException(self.start_position)
223