1
2
3
4
5
6
7 import sys
8 from string import split
9
10 import numpy
11
12
13 from StructureBuilder import StructureBuilder
14 from PDBExceptions import PDBConstructionException
15 from parse_pdb_header import _parse_pdb_header_list
16
17 __doc__="Parser for PDB files."
18
19
20
21
22
24 """
25 Parse a PDB file and return a Structure object.
26 """
27
28 - def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
29 """
30 The PDB parser call a number of standard methods in an aggregated
31 StructureBuilder object. Normally this object is instanciated by the
32 PDBParser object itself, but if the user provides his own StructureBuilder
33 object, the latter is used instead.
34
35 Arguments:
36 o PERMISSIVE - int, if this is 0 exceptions in constructing the
37 SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are
38 caught, but some residues or atoms will be missing. THESE EXCEPTIONS
39 ARE DUE TO PROBLEMS IN THE PDB FILE!.
40 o structure_builder - an optional user implemented StructureBuilder class.
41 """
42 if structure_builder!=None:
43 self.structure_builder=structure_builder
44 else:
45 self.structure_builder=StructureBuilder()
46 self.header=None
47 self.trailer=None
48 self.line_counter=0
49 self.PERMISSIVE=PERMISSIVE
50
51
52
54 """Return the structure.
55
56 Arguments:
57 o id - string, the id that will be used for the structure
58 o file - name of the PDB file OR an open filehandle
59 """
60 self.header=None
61 self.trailer=None
62
63 self.structure_builder.init_structure(id)
64 if isinstance(file, basestring):
65 file=open(file)
66 self._parse(file.readlines())
67 self.structure_builder.set_header(self.header)
68
69 return self.structure_builder.get_structure()
70
72 "Return the header."
73 return self.header
74
76 "Return the trailer."
77 return self.trailer
78
79
80
81 - def _parse(self, header_coords_trailer):
87
89 "Get the header of the PDB file, return the rest."
90 structure_builder=self.structure_builder
91 for i in range(0, len(header_coords_trailer)):
92 structure_builder.set_line_counter(i+1)
93 line=header_coords_trailer[i]
94 record_type=line[0:6]
95 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '):
96 break
97 header=header_coords_trailer[0:i]
98
99 self.line_counter=i
100 coords_trailer=header_coords_trailer[i:]
101 header_dict=_parse_pdb_header_list(header)
102 return header_dict, coords_trailer
103
105 "Parse the atomic data in the PDB file."
106 local_line_counter=0
107 structure_builder=self.structure_builder
108 current_model_id=0
109
110 model_open=0
111 current_chain_id=None
112 current_segid=None
113 current_residue_id=None
114 current_resname=None
115 for i in range(0, len(coords_trailer)):
116 line=coords_trailer[i]
117 record_type=line[0:6]
118 global_line_counter=self.line_counter+local_line_counter+1
119 structure_builder.set_line_counter(global_line_counter)
120 if(record_type=='ATOM ' or record_type=='HETATM'):
121
122 if not model_open:
123 structure_builder.init_model(current_model_id)
124 current_model_id+=1
125 model_open=1
126 fullname=line[12:16]
127
128 split_list=split(fullname)
129 if len(split_list)!=1:
130
131
132 name=fullname
133 else:
134
135 name=split_list[0]
136 altloc=line[16:17]
137 resname=line[17:20]
138 chainid=line[21:22]
139 try:
140 serial_number=int(line[6:11])
141 except:
142 serial_number=0
143 resseq=int(split(line[22:26])[0])
144 icode=line[26:27]
145 if record_type=='HETATM':
146 if resname=="HOH" or resname=="WAT":
147 hetero_flag="W"
148 else:
149 hetero_flag="H"
150 else:
151 hetero_flag=" "
152 residue_id=(hetero_flag, resseq, icode)
153
154 x=float(line[30:38])
155 y=float(line[38:46])
156 z=float(line[46:54])
157 coord=numpy.array((x, y, z), 'f')
158
159 occupancy=float(line[54:60])
160 bfactor=float(line[60:66])
161 segid=line[72:76]
162 if current_segid!=segid:
163 current_segid=segid
164 structure_builder.init_seg(current_segid)
165 if current_chain_id!=chainid:
166 current_chain_id=chainid
167 structure_builder.init_chain(current_chain_id)
168 current_residue_id=residue_id
169 current_resname=resname
170 try:
171 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
172 except PDBConstructionException, message:
173 self._handle_PDB_exception(message, global_line_counter)
174 elif current_residue_id!=residue_id or current_resname!=resname:
175 current_residue_id=residue_id
176 current_resname=resname
177 try:
178 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
179 except PDBConstructionException, message:
180 self._handle_PDB_exception(message, global_line_counter)
181
182 try:
183 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, fullname, serial_number)
184 except PDBConstructionException, message:
185 self._handle_PDB_exception(message, global_line_counter)
186 elif(record_type=='ANISOU'):
187 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70]))
188
189 anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f')
190 structure_builder.set_anisou(anisou_array)
191 elif(record_type=='MODEL '):
192 structure_builder.init_model(current_model_id)
193 current_model_id+=1
194 model_open=1
195 current_chain_id=None
196 current_residue_id=None
197 elif(record_type=='END ' or record_type=='CONECT'):
198
199 self.line_counter=self.line_counter+local_line_counter
200 return coords_trailer[local_line_counter:]
201 elif(record_type=='ENDMDL'):
202 model_open=0
203 current_chain_id=None
204 current_residue_id=None
205 elif(record_type=='SIGUIJ'):
206
207 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70]))
208
209 siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f')
210 structure_builder.set_siguij(siguij_array)
211 elif(record_type=='SIGATM'):
212
213 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66]))
214 sigatm_array=numpy.array(sigatm, 'f')
215 structure_builder.set_sigatm(sigatm_array)
216 local_line_counter=local_line_counter+1
217
218 self.line_counter=self.line_counter+local_line_counter
219 return []
220
222 """
223 This method catches an exception that occurs in the StructureBuilder
224 object (if PERMISSIVE==1), or raises it again, this time adding the
225 PDB line number to the error message.
226 """
227 message="%s at line %i." % (message, line_counter)
228 if self.PERMISSIVE:
229
230 print "PDBConstructionException: %s" % message
231 print "Exception ignored.\nSome atoms or residues will be missing in the data structure."
232 else:
233
234 raise PDBConstructionException(message)
235
236
237 if __name__=="__main__":
238
239 import sys
240
241 p=PDBParser(PERMISSIVE=1)
242
243 s=p.get_structure("scr", sys.argv[1])
244
245 for m in s:
246 p=m.get_parent()
247 assert(p is s)
248 for c in m:
249 p=c.get_parent()
250 assert(p is m)
251 for r in c:
252 print r
253 p=r.get_parent()
254 assert(p is c)
255 for a in r:
256 p=a.get_parent()
257 if not p is r:
258 print p, r
259