1
2
3
4
5
6
7
8
9
10 """Implementations of Biopython-like Seq objects on top of BioSQL.
11
12 This allows retrival of items stored in a BioSQL database using
13 a biopython-like Seq interface.
14 """
15
16 from Bio import Alphabet
17 from Bio.Seq import Seq
18 from Bio.SeqRecord import SeqRecord
19 from Bio import SeqFeature
20
22 - def __init__(self, primary_id, adaptor, alphabet, start, length):
23 """Create a new DBSeq object referring to a BioSQL entry.
24
25 You wouldn't normally create a DBSeq object yourself, this is done
26 for you when retreiving a DBSeqRecord object from the database.
27 """
28 self.primary_id = primary_id
29 self.adaptor = adaptor
30 self.alphabet = alphabet
31 self._length = length
32 self.start = start
33
36
38
39
40
41 if isinstance(index, int) :
42
43 i = index
44 if i < 0:
45 if -i > self._length:
46 raise IndexError(i)
47 i = i + self._length
48 elif i >= self._length:
49 raise IndexError(i)
50 return self.adaptor.get_subseq_as_string(self.primary_id,
51 self.start + i,
52 self.start + i + 1)
53 if not isinstance(index, slice) :
54 raise ValueError("Unexpected index type")
55
56
57
58 if index.start is None :
59 i=0
60 else :
61 i = index.start
62 if i < 0 :
63
64 if -i > self._length:
65 raise IndexError(i)
66 i = i + self._length
67 elif i >= self._length:
68
69 i = self._length
70
71 if index.stop is None :
72 j = self._length
73 else :
74 j = index.stop
75 if j < 0 :
76
77 if -j > self._length:
78 raise IndexError(j)
79 j = j + self._length
80 elif j >= self._length:
81 j = self._length
82
83 if i >= j:
84
85 return Seq("", self.alphabet)
86 elif index.step is None or index.step == 1 :
87
88 return self.__class__(self.primary_id, self.adaptor, self.alphabet,
89 self.start + i, j - i)
90 else :
91
92 full = self.adaptor.get_subseq_as_string(self.primary_id,
93 self.start + i,
94 self.start + j)
95 return Seq(full[::index.step], self.alphabet)
96
98 """Returns the full sequence as a python string.
99
100 Although not formally deprecated, you are now encouraged to use
101 str(my_seq) instead of my_seq.tostring()."""
102 return self.adaptor.get_subseq_as_string(self.primary_id,
103 self.start,
104 self.start + self._length)
110
111 data = property(tostring, doc="Sequence as string (DEPRECATED)")
112
114 """Returns the full sequence as a Seq object."""
115
116 return Seq(str(self), self.alphabet)
117
119
120 return self.toseq() + other
121
123
124 return other + self.toseq()
125
126
152
154 """Retrieve the database cross references for the sequence."""
155 _dbxrefs = []
156 dbxrefs = adaptor.execute_and_fetchall(
157 "SELECT dbname, accession, version" \
158 " FROM bioentry_dbxref join dbxref using (dbxref_id)" \
159 " WHERE bioentry_id = %s" \
160 " ORDER BY rank", (primary_id,))
161 for dbname, accession, version in dbxrefs:
162 if version and version != "0":
163 v = "%s.%s" % (accession, version)
164 else:
165 v = accession
166 _dbxrefs.append("%s:%s" % (dbname, v))
167 return _dbxrefs
168
170 sql = "SELECT seqfeature_id, type.name, rank" \
171 " FROM seqfeature join term type on (type_term_id = type.term_id)" \
172 " WHERE bioentry_id = %s" \
173 " ORDER BY rank"
174 results = adaptor.execute_and_fetchall(sql, (primary_id,))
175 seq_feature_list = []
176 for seqfeature_id, seqfeature_type, seqfeature_rank in results:
177
178 qvs = adaptor.execute_and_fetchall(
179 "SELECT name, value" \
180 " FROM seqfeature_qualifier_value join term using (term_id)" \
181 " WHERE seqfeature_id = %s" \
182 " ORDER BY rank", (seqfeature_id,))
183 qualifiers = {}
184 for qv_name, qv_value in qvs:
185 qualifiers.setdefault(qv_name, []).append(qv_value)
186
187 qvs = adaptor.execute_and_fetchall(
188 "SELECT dbxref.dbname, dbxref.accession" \
189 " FROM dbxref join seqfeature_dbxref using (dbxref_id)" \
190 " WHERE seqfeature_dbxref.seqfeature_id = %s" \
191 " ORDER BY rank", (seqfeature_id,))
192 for qv_name, qv_value in qvs:
193 value = "%s:%s" % (qv_name, qv_value)
194 qualifiers.setdefault("db_xref", []).append(value)
195
196 results = adaptor.execute_and_fetchall(
197 "SELECT location_id, start_pos, end_pos, strand" \
198 " FROM location" \
199 " WHERE seqfeature_id = %s" \
200 " ORDER BY rank", (seqfeature_id,))
201 locations = []
202
203
204
205
206
207
208 for location_id, start, end, strand in results:
209 if start:
210 start -= 1
211 if strand == 0:
212 strand = None
213 locations.append( (location_id, start, end, strand) )
214
215 remote_results = adaptor.execute_and_fetchall(
216 "SELECT location_id, dbname, accession, version" \
217 " FROM location join dbxref using (dbxref_id)" \
218 " WHERE seqfeature_id = %s", (seqfeature_id,))
219 lookup = {}
220 for location_id, dbname, accession, version in remote_results:
221 if version and version != "0":
222 v = "%s.%s" % (accession, version)
223 else:
224 v = accession
225
226
227 if dbname == "":
228 dbname = None
229 lookup[location_id] = (dbname, v)
230
231 feature = SeqFeature.SeqFeature(type = seqfeature_type)
232 feature._seqfeature_id = seqfeature_id
233 feature.qualifiers = qualifiers
234 if len(locations) == 0:
235 pass
236 elif len(locations) == 1:
237 location_id, start, end, strand = locations[0]
238
239
240 feature.location_operator = \
241 _retrieve_location_qualifier_value(adaptor, location_id)
242 dbname, version = lookup.get(location_id, (None, None))
243 feature.location = SeqFeature.FeatureLocation(start, end)
244 feature.strand = strand
245 feature.ref_db = dbname
246 feature.ref = version
247 else:
248 assert feature.sub_features == []
249 for location in locations:
250 location_id, start, end, strand = location
251 dbname, version = lookup.get(location_id, (None, None))
252 subfeature = SeqFeature.SeqFeature()
253 subfeature.type = seqfeature_type
254 subfeature.location_operator = \
255 _retrieve_location_qualifier_value(adaptor, location_id)
256
257
258
259 if not subfeature.location_operator :
260 subfeature.location_operator="join"
261 subfeature.location = SeqFeature.FeatureLocation(start, end)
262 subfeature.strand = strand
263 subfeature.ref_db = dbname
264 subfeature.ref = version
265 feature.sub_features.append(subfeature)
266
267
268 feature.location_operator = \
269 feature.sub_features[0].location_operator
270
271
272 start = locations[0][1]
273 end = locations[-1][2]
274 feature.location = SeqFeature.FeatureLocation(start, end)
275 feature.strand = feature.sub_features[0].strand
276
277 seq_feature_list.append(feature)
278
279 return seq_feature_list
280
282 value = adaptor.execute_and_fetch_col0(
283 "SELECT value FROM location_qualifier_value" \
284 " WHERE location_id = %s", (location_id,))
285 try:
286 return value[0]
287 except IndexError:
288 return ""
289
296
298 qvs = adaptor.execute_and_fetchall(
299 "SELECT name, value" \
300 " FROM bioentry_qualifier_value JOIN term USING (term_id)" \
301 " WHERE bioentry_id = %s" \
302 " ORDER BY rank", (primary_id,))
303 qualifiers = {}
304 for name, value in qvs:
305 if name == "keyword": name = "keywords"
306 elif name == "date_changed": name = "dates"
307 elif name == "secondary_accession": name = "accessions"
308 qualifiers.setdefault(name, []).append(value)
309 return qualifiers
310
312
313
314 refs = adaptor.execute_and_fetchall(
315 "SELECT start_pos, end_pos, " \
316 " location, title, authors," \
317 " dbname, accession" \
318 " FROM bioentry_reference" \
319 " JOIN reference USING (reference_id)" \
320 " LEFT JOIN dbxref USING (dbxref_id)" \
321 " WHERE bioentry_id = %s" \
322 " ORDER BY rank", (primary_id,))
323 references = []
324 for start, end, location, title, authors, dbname, accession in refs:
325 reference = SeqFeature.Reference()
326 if start: start -= 1
327 reference.location = [SeqFeature.FeatureLocation(start, end)]
328
329 if authors : reference.authors = authors
330 if title : reference.title = title
331 reference.journal = location
332 if dbname == 'PUBMED':
333 reference.pubmed_id = accession
334 elif dbname == 'MEDLINE':
335 reference.medline_id = accession
336 references.append(reference)
337 return {'references': references}
338
340 a = {}
341 common_names = adaptor.execute_and_fetch_col0(
342 "SELECT name FROM taxon_name WHERE taxon_id = %s" \
343 " AND name_class = 'genbank common name'", (taxon_id,))
344 if common_names:
345 a['source'] = common_names[0]
346 scientific_names = adaptor.execute_and_fetch_col0(
347 "SELECT name FROM taxon_name WHERE taxon_id = %s" \
348 " AND name_class = 'scientific name'", (taxon_id,))
349 if scientific_names:
350 a['organism'] = scientific_names[0]
351 ncbi_taxids = adaptor.execute_and_fetch_col0(
352 "SELECT ncbi_taxon_id FROM taxon WHERE taxon_id = %s", (taxon_id,))
353 if ncbi_taxids and ncbi_taxids[0] and ncbi_taxids[0] != "0":
354 a['ncbi_taxid'] = ncbi_taxids[0]
355
356
357
358
359
360
361
362
363
364
365 taxonomy = []
366 while taxon_id :
367 name, rank, parent_taxon_id = adaptor.execute_one(
368 "SELECT taxon_name.name, taxon.node_rank, taxon.parent_taxon_id" \
369 " FROM taxon, taxon_name" \
370 " WHERE taxon.taxon_id=taxon_name.taxon_id" \
371 " AND taxon_name.name_class='scientific name'" \
372 " AND taxon.taxon_id = %s", (taxon_id,))
373 if taxon_id == parent_taxon_id :
374
375
376
377 break
378 if rank != "no rank" :
379
380
381
382 taxonomy.insert(0, name)
383 taxon_id = parent_taxon_id
384
385 if taxonomy:
386 a['taxonomy'] = taxonomy
387 return a
388
390 """BioSQL equivalent of the biopython SeqRecord object.
391 """
392
393 - def __init__(self, adaptor, primary_id):
394 self._adaptor = adaptor
395 self._primary_id = primary_id
396
397 (self._biodatabase_id, self._taxon_id, self.name,
398 accession, version, self._identifier,
399 self._division, self.description) = self._adaptor.execute_one(
400 "SELECT biodatabase_id, taxon_id, name, accession, version," \
401 " identifier, division, description" \
402 " FROM bioentry" \
403 " WHERE bioentry_id = %s", (self._primary_id,))
404 if version and version != "0":
405 self.id = "%s.%s" % (accession, version)
406 else:
407 self.id = accession
408
410 if not hasattr(self, "_seq"):
411 self._seq = _retrieve_seq(self._adaptor, self._primary_id)
412 return self._seq
415 seq = property(__get_seq, __set_seq, __del_seq, "Seq object")
416
418 if not hasattr(self,"_dbxrefs"):
419 self._dbxrefs = _retrieve_dbxrefs(self._adaptor, self._primary_id)
420 return self._dbxrefs
423 dbxrefs = property(__get_dbxrefs, __set_dbxrefs, __del_dbxrefs,
424 "Database cross references")
425
427 if not hasattr(self, "_features"):
428 self._features = _retrieve_features(self._adaptor,
429 self._primary_id)
430 return self._features
433 features = property(__get_features, __set_features, __del_features,
434 "Features")
435
437 if not hasattr(self, "_annotations"):
438 self._annotations = _retrieve_annotations(self._adaptor,
439 self._primary_id,
440 self._taxon_id)
441 if self._identifier:
442 self._annotations["gi"] = self._identifier
443 if self._division:
444 self._annotations["data_file_division"] = self._division
445 return self._annotations
448 annotations = property(__get_annotations, __set_annotations,
449 __del_annotations, "Annotations")
450