Package Bio :: Package PDB :: Module Polypeptide
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.Polypeptide

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  from types import StringType 
  7   
  8  from Bio.Alphabet import ProteinAlphabet 
  9  from Bio.Seq import Seq 
 10  from Bio.SCOP.Raf import to_one_letter_code 
 11  from Bio.PDB.PDBExceptions import PDBException 
 12  from Bio.PDB.Residue import Residue, DisorderedResidue 
 13  from Vector import calc_dihedral, calc_angle 
 14   
 15  __doc__=""" 
 16  Polypeptide related classes (construction and representation). 
 17   
 18  Example: 
 19   
 20      >>> ppb=PPBuilder() 
 21      >>> for pp in ppb.build_peptides(structure): 
 22      >>>     print pp.get_sequence() 
 23  """ 
 24   
 25  standard_aa_names=["ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS",  
 26                     "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL", 
 27                     "TRP", "TYR"] 
 28   
 29   
 30  aa1="ACDEFGHIKLMNPQRSTVWY" 
 31  aa3=standard_aa_names 
 32   
 33  d1_to_index={} 
 34  dindex_to_1={} 
 35  d3_to_index={} 
 36  dindex_to_3={} 
 37   
 38  # Create some lookup tables 
 39  for i in range(0, 20): 
 40      n1=aa1[i] 
 41      n3=aa3[i] 
 42      d1_to_index[n1]=i 
 43      dindex_to_1[i]=n1 
 44      d3_to_index[n3]=i 
 45      dindex_to_3[i]=n3 
 46   
47 -def index_to_one(index):
48 """ 49 Index to corresponding one letter amino acid name. 50 For example: 0 to A. 51 """ 52 return dindex_to_1[index]
53
54 -def one_to_index(s):
55 """ 56 One letter code to index. 57 For example: A to 0. 58 """ 59 return d1_to_index[s]
60
61 -def index_to_three(i):
62 """ 63 Index to corresponding three letter amino acid name. 64 For example: 0 to ALA. 65 """ 66 return dindex_to_3[i]
67
68 -def three_to_index(s):
69 """ 70 Three letter code to index. 71 For example: ALA to 0. 72 """ 73 return d3_to_index[s]
74
75 -def three_to_one(s):
76 """ 77 Three letter code to one letter code. 78 For example: ALA to A. 79 """ 80 i=d3_to_index[s] 81 return dindex_to_1[i]
82
83 -def one_to_three(s):
84 """ 85 One letter code to three letter code. 86 For example: A to ALA. 87 """ 88 i=d1_to_index[s] 89 return dindex_to_3[i]
90
91 -def is_aa(residue, standard=0):
92 """ 93 Return 1 if residue object/string is an amino acid. 94 95 @param residue: a L{Residue} object OR a three letter amino acid code 96 @type residue: L{Residue} or string 97 98 @param standard: flag to check for the 20 AA (default false) 99 @type standard: boolean 100 """ 101 if not type(residue)==StringType: 102 residue=residue.get_resname() 103 residue=residue.upper() 104 if standard: 105 return d3_to_index.has_key(residue) 106 else: 107 return to_one_letter_code.has_key(residue)
108 109
110 -class Polypeptide(list):
111 """ 112 A polypeptide is simply a list of L{Residue} objects. 113 """
114 - def get_ca_list(self):
115 """ 116 @return: the list of C-alpha atoms 117 @rtype: [L{Atom}, L{Atom}, ...] 118 """ 119 ca_list=[] 120 for res in self: 121 ca=res["CA"] 122 ca_list.append(ca) 123 return ca_list
124
125 - def get_phi_psi_list(self):
126 """ 127 Return the list of phi/psi dihedral angles 128 """ 129 ppl=[] 130 lng=len(self) 131 for i in range(0, lng): 132 res=self[i] 133 try: 134 n=res['N'].get_vector() 135 ca=res['CA'].get_vector() 136 c=res['C'].get_vector() 137 except: 138 # Some atoms are missing 139 # Phi/Psi cannot be calculated for this residue 140 ppl.append((None, None)) 141 res.xtra["PHI"]=None 142 res.xtra["PSI"]=None 143 continue 144 # Phi 145 if i>0: 146 rp=self[i-1] 147 try: 148 cp=rp['C'].get_vector() 149 phi=calc_dihedral(cp, n, ca, c) 150 except: 151 phi=None 152 else: 153 # No phi for residue 0! 154 phi=None 155 # Psi 156 if i<(lng-1): 157 rn=self[i+1] 158 try: 159 nn=rn['N'].get_vector() 160 psi=calc_dihedral(n, ca, c, nn) 161 except: 162 psi=None 163 else: 164 # No psi for last residue! 165 psi=None 166 ppl.append((phi, psi)) 167 # Add Phi/Psi to xtra dict of residue 168 res.xtra["PHI"]=phi 169 res.xtra["PSI"]=psi 170 return ppl
171
172 - def get_tau_list(self):
173 """ 174 Return list of tau torsions angles for all 4 consecutive 175 Calpha atoms. 176 """ 177 ca_list=self.get_ca_list() 178 tau_list=[] 179 for i in range(0, len(ca_list)-3): 180 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3]] 181 vector_list=map(lambda a: a.get_vector(), atom_list) 182 v1, v2, v3, v4=vector_list 183 tau=calc_dihedral(v1, v2, v3, v4) 184 tau_list.append(tau) 185 # Put tau in xtra dict of residue 186 res=ca_list[i+2].get_parent() 187 res.xtra["TAU"]=tau 188 return tau_list
189
190 - def get_theta_list(self):
191 """ 192 Return list of theta angles for all 3 consecutive 193 Calpha atoms. 194 """ 195 theta_list=[] 196 ca_list=self.get_ca_list() 197 for i in range(0, len(ca_list)-2): 198 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2]] 199 vector_list=map(lambda a: a.get_vector(), atom_list) 200 v1, v2, v3=vector_list 201 theta=calc_angle(v1, v2, v3) 202 theta_list.append(theta) 203 # Put tau in xtra dict of residue 204 res=ca_list[i+1].get_parent() 205 res.xtra["THETA"]=theta 206 return theta_list
207
208 - def get_sequence(self):
209 """ 210 Return the AA sequence. 211 212 @return: polypeptide sequence 213 @rtype: L{Seq} 214 """ 215 s="" 216 for res in self: 217 resname=res.get_resname() 218 if to_one_letter_code.has_key(resname): 219 resname=to_one_letter_code[resname] 220 else: 221 resname='X' 222 s=s+resname 223 seq=Seq(s, ProteinAlphabet) 224 return seq
225
226 - def __repr__(self):
227 """ 228 Return <Polypeptide start=START end=END>, where START 229 and END are sequence identifiers of the outer residues. 230 """ 231 start=self[0].get_id()[1] 232 end=self[-1].get_id()[1] 233 s="<Polypeptide start=%s end=%s>" % (start, end) 234 return s
235
236 -class _PPBuilder:
237 """ 238 Base class to extract polypeptides. 239 It checks if two consecutive residues in a chain 240 are connected. The connectivity test is implemented by a 241 subclass. 242 """
243 - def __init__(self, radius):
244 """ 245 @param radius: distance 246 @type radius: float 247 """ 248 self.radius=radius
249
250 - def _accept(self, residue):
251 "Check if the residue is an amino acid." 252 if is_aa(residue): 253 # not a standard AA so skip 254 return 1 255 else: 256 return 0
257
258 - def build_peptides(self, entity, aa_only=1):
259 """ 260 Build and return a list of Polypeptide objects. 261 262 @param entity: polypeptides are searched for in this object 263 @type entity: L{Structure}, L{Model} or L{Chain} 264 265 @param aa_only: if 1, the residue needs to be a standard AA 266 @type aa_only: int 267 """ 268 is_connected=self._is_connected 269 accept=self._accept 270 level=entity.get_level() 271 # Decide wich entity we are dealing with 272 if level=="S": 273 model=entity[0] 274 chain_list=model.get_list() 275 elif level=="M": 276 chain_list=entity.get_list() 277 elif level=="C": 278 chain_list=[entity] 279 else: 280 raise PDBException("Entity should be Structure, Model or Chain.") 281 pp_list=[] 282 for chain in chain_list: 283 chain_it=iter(chain) 284 prev=chain_it.next() 285 pp=None 286 for next in chain_it: 287 if aa_only and not accept(prev): 288 prev=next 289 continue 290 if is_connected(prev, next): 291 if pp is None: 292 pp=Polypeptide() 293 pp.append(prev) 294 pp_list.append(pp) 295 pp.append(next) 296 else: 297 pp=None 298 prev=next 299 return pp_list
300 301
302 -class CaPPBuilder(_PPBuilder):
303 """ 304 Use CA--CA distance to find polypeptides. 305 """
306 - def __init__(self, radius=4.3):
307 _PPBuilder.__init__(self, radius)
308
309 - def _is_connected(self, prev, next):
310 for r in [prev, next]: 311 if not r.has_id("CA"): 312 return 0 313 n=next["CA"] 314 p=prev["CA"] 315 # Unpack disordered 316 if n.is_disordered(): 317 nlist=n.disordered_get_list() 318 else: 319 nlist=[n] 320 if p.is_disordered(): 321 plist=p.disordered_get_list() 322 else: 323 plist=[p] 324 for nn in nlist: 325 for pp in plist: 326 if (nn-pp)<self.radius: 327 return 1 328 return 0
329 330
331 -class PPBuilder(_PPBuilder):
332 """ 333 Use C--N distance to find polypeptides. 334 """
335 - def __init__(self, radius=1.8):
336 _PPBuilder.__init__(self, radius)
337
338 - def _is_connected(self, prev, next):
339 if not prev.has_id("C"): 340 return 0 341 if not next.has_id("N"): 342 return 0 343 test_dist=self._test_dist 344 c=prev["C"] 345 n=next["N"] 346 # Test all disordered atom positions! 347 if c.is_disordered(): 348 clist=c.disordered_get_list() 349 else: 350 clist=[c] 351 if n.is_disordered(): 352 nlist=n.disordered_get_list() 353 else: 354 nlist=[n] 355 for nn in nlist: 356 for cc in clist: 357 # To form a peptide bond, N and C must be 358 # within radius and have the same altloc 359 # identifier or one altloc blanc 360 n_altloc=nn.get_altloc() 361 c_altloc=cc.get_altloc() 362 if n_altloc==c_altloc or n_altloc==" " or c_altloc==" ": 363 if test_dist(nn, cc): 364 # Select the disordered atoms that 365 # are indeed bonded 366 if c.is_disordered(): 367 c.disordered_select(c_altloc) 368 if n.is_disordered(): 369 n.disordered_select(n_altloc) 370 return 1 371 return 0
372
373 - def _test_dist(self, c, n):
374 "Return 1 if distance between atoms<radius" 375 if (c-n)<self.radius: 376 return 1 377 else: 378 return 0
379 380 381 if __name__=="__main__": 382 383 import sys 384 385 from Bio.PDB.PDBParser import PDBParser 386 387 p=PDBParser(PERMISSIVE=1) 388 389 s=p.get_structure("scr", sys.argv[1]) 390 391 ppb=PPBuilder() 392 393 print "C-N" 394 for pp in ppb.build_peptides(s): 395 print pp.get_sequence() 396 for pp in ppb.build_peptides(s[0]): 397 print pp.get_sequence() 398 for pp in ppb.build_peptides(s[0]["A"]): 399 print pp.get_sequence() 400 401 for pp in ppb.build_peptides(s): 402 for phi, psi in pp.get_phi_psi_list(): 403 print phi, psi 404 405 ppb=CaPPBuilder() 406 407 print "CA-CA" 408 for pp in ppb.build_peptides(s): 409 print pp.get_sequence() 410 for pp in ppb.build_peptides(s[0]): 411 print pp.get_sequence() 412 for pp in ppb.build_peptides(s[0]["A"]): 413 print pp.get_sequence() 414