1
2
3
4
5
6
7 """Provides code to access NCBI over the WWW.
8
9 The main Entrez web page is available at:
10 http://www.ncbi.nlm.nih.gov/Entrez/
11
12 A list of the Entrez utilities is available at:
13 http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
14
15
16 Functions:
17 efetch Retrieves records in the requested format from a list of one or
18 more primary IDs or from the user's environment
19 epost Posts a file containing a list of primary IDs for future use in
20 the user's environment to use with subsequent search strategies
21 esearch Searches and retrieves primary IDs (for use in EFetch, ELink,
22 and ESummary) and term translations and optionally retains
23 results for future use in the user's environment.
24 elink Checks for the existence of an external or Related Articles link
25 from a list of one or more primary IDs. Retrieves primary IDs
26 and relevancy scores for links to Entrez databases or Related
27 Articles; creates a hyperlink to the primary LinkOut provider
28 for a specific ID and database, or lists LinkOut URLs
29 and Attributes for multiple IDs.
30 einfo Provides field index term counts, last update, and available
31 links for each database.
32 esummary Retrieves document summaries from a list of primary IDs or from
33 the user's environment.
34 egquery Provides Entrez database counts in XML for a single search
35 using Global Query.
36 espell Retrieves spelling suggestions.
37
38 read Parses the XML results returned by any of the above functions.
39 Typical usage is:
40 >>> handle = Entrez.einfo() # or esearch, efetch, ...
41 >>> record = Entrez.read(handle)
42 where record is now a Python dictionary or list.
43
44 _open Internally used function.
45
46 """
47 import urllib, time, warnings
48 import os.path
49 from Bio import File
50
51
52 email = None
53
54 -def query(cmd, db, cgi='http://www.ncbi.nlm.nih.gov/sites/entrez',
55 **keywds):
56 """Query Entrez and return a handle to the HTML results (DEPRECATED).
57
58 See the online documentation for an explanation of the parameters:
59 http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp
60
61 Return a handle to the results.
62
63 Raises an IOError exception if there's a network error.
64 """
65 import warnings
66 warnings.warn("Bio.Entrez.query is deprecated, since it breaks NCBI's rule to only use the E-Utilities URL.", DeprecationWarning)
67
68
69 -def epost(db, cgi=None, **keywds):
70 """Post a file of identifiers for future use.
71
72 Posts a file containing a list of UIs for future use in the user's
73 environment to use with subsequent search strategies.
74
75 See the online documentation for an explanation of the parameters:
76 http://www.ncbi.nlm.nih.gov/entrez/query/static/epost_help.html
77
78 Return a handle to the results.
79
80 Raises an IOError exception if there's a network error.
81 """
82 if cgi:
83 import warnings
84 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
85 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi'
86 variables = {'db' : db}
87 variables.update(keywds)
88 return _open(cgi, variables)
89
90 -def efetch(db, cgi=None, **keywds):
91 """Fetches Entrez results which are returned as a handle.
92
93 EFetch retrieves records in the requested format from a list of one or
94 more UIs or from user's environment.
95
96 See the online documentation for an explanation of the parameters:
97 http://www.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html
98
99 Return a handle to the results.
100
101 Raises an IOError exception if there's a network error.
102
103 Short example:
104
105 from Bio import Entrez
106 handle = Entrez.efetch(db="nucleotide", id="57240072", rettype="genbank")
107 print handle.read()
108 """
109 if cgi:
110 import warnings
111 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
112 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
113 variables = {'db' : db}
114 variables.update(keywds)
115 return _open(cgi, variables)
116
117 -def esearch(db, term, cgi=None, **keywds):
118 """ESearch runs an Entrez search and returns a handle to the results.
119
120 ESearch searches and retrieves primary IDs (for use in EFetch, ELink
121 and ESummary) and term translations, and optionally retains results
122 for future use in the user's environment.
123
124 See the online documentation for an explanation of the parameters:
125 http://www.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
126
127 Return a handle to the results which are always in XML format.
128
129 Raises an IOError exception if there's a network error.
130
131 Short example:
132
133 from Bio import Entez
134 handle = Entrez.esearch(db="nucleotide", retmax=10, term="Opuntia")
135 record = Entrez.read(handle)
136 print record["Count"]
137 print record["IdList"]
138 """
139 if cgi:
140 import warnings
141 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
142 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
143 variables = {'db' : db,
144 'term' : term}
145 variables.update(keywds)
146 return _open(cgi, variables)
147
148 -def elink(cgi=None, **keywds):
149 """ELink checks for linked external articles and returns a handle.
150
151 ELink checks for the existence of an external or Related Articles link
152 from a list of one or more primary IDs; retrieves IDs and relevancy
153 scores for links to Entrez databases or Related Articles; creates a
154 hyperlink to the primary LinkOut provider for a specific ID and
155 database, or lists LinkOut URLs and attributes for multiple IDs.
156
157 See the online documentation for an explanation of the parameters:
158 http://www.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html
159
160 Return a handle to the results, by default in XML format.
161
162 Raises an IOError exception if there's a network error.
163 """
164 if cgi:
165 import warnings
166 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
167 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi'
168 variables = {}
169 variables.update(keywds)
170 return _open(cgi, variables)
171
172 -def einfo(cgi=None, **keywds):
173 """EInfo returns a summary of the Entez databases as a results handle.
174
175 EInfo provides field names, index term counts, last update, and
176 available links for each Entrez database.
177
178 See the online documentation for an explanation of the parameters:
179 http://www.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html
180
181 Return a handle to the results, by default in XML format.
182
183 Raises an IOError exception if there's a network error.
184
185 Short example:
186
187 from Bio import Entrez
188 record = Entrez.read(Entrez.einfo())
189 print record['DbList']
190 """
191 if cgi:
192 import warnings
193 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
194 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi'
195 variables = {}
196 variables.update(keywds)
197 return _open(cgi, variables)
198
200 """ESummary retrieves document summaries as a results handle.
201
202 ESummary retrieves document summaries from a list of primary IDs or
203 from the user's environment.
204
205 See the online documentation for an explanation of the parameters:
206 http://www.ncbi.nlm.nih.gov/entrez/query/static/esummary_help.html
207
208 Return a handle to the results, by default in XML format.
209
210 Raises an IOError exception if there's a network error.
211 """
212 if cgi:
213 import warnings
214 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
215 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
216 variables = {}
217 variables.update(keywds)
218 return _open(cgi, variables)
219
221 """EGQuery provides Entrez database counts for a global search.
222
223 EGQuery provides Entrez database counts in XML for a single search
224 using Global Query.
225
226 See the online documentation for an explanation of the parameters:
227 http://www.ncbi.nlm.nih.gov/entrez/query/static/egquery_help.html
228
229 Return a handle to the results in XML format.
230
231 Raises an IOError exception if there's a network error.
232 """
233 if cgi:
234 import warnings
235 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
236 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi'
237 variables = {}
238 variables.update(keywds)
239 return _open(cgi, variables)
240
241 -def espell(cgi=None, **keywds):
242 """ESpell retrieves spelling suggestions, returned in a results handle.
243
244 ESpell retrieves spelling suggestions, if available.
245
246 See the online documentation for an explanation of the parameters:
247 http://www.ncbi.nlm.nih.gov/entrez/query/static/espell_help.html
248
249 Return a handle to the results, by default in XML format.
250
251 Raises an IOError exception if there's a network error.
252
253 Short example:
254
255 from Bio import Entrez
256 record = Entrez.read(Entrez.espell(term="biopythooon"))
257 print record["Query"]
258 print record["CorrectedQuery"]
259 """
260 if cgi:
261 import warnings
262 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
263 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/espell.fcgi'
264 variables = {}
265 variables.update(keywds)
266 return _open(cgi, variables)
267
269 """Parses an XML file from the NCBI Entrez Utilities into python objects.
270
271 This function parses an XML file created by NCBI's Entrez Utilities,
272 returning a multilevel data structure of Python lists and dictionaries.
273 Most XML files returned by NCBI's Entrez Utilities can be parsed by
274 this function, provided its DTD is available. Biopython includes the
275 DTDs for most commonly used Entrez Utilities.
276
277 Whereas the data structure seems to consist of generic Python lists,
278 dictionaries, strings, and so on, each of these is actually a class
279 derived from the base type. This allows us to store the attributes
280 (if any) of each element in a dictionary my_element.attributes, and
281 the tag name in my_element.tag.
282 """
283 from Parser import DataHandler
284 DTDs = os.path.join(__path__[0], "DTDs")
285 handler = DataHandler(DTDs)
286 record = handler.run(handle)
287 return record
288
289 -def _open(cgi, params={}):
290 """Helper function to build the URL and open a handle to it (PRIVATE).
291
292 Open a handle to Entrez. cgi is the URL for the cgi script to access.
293 params is a dictionary with the options to pass to it. Does some
294 simple error checking, and will raise an IOError if it encounters one.
295
296 This function also enforces the "three second rule" to avoid abusing
297 the NCBI servers.
298 """
299
300 delay = 3.0
301 current = time.time()
302 wait = _open.previous + delay - current
303 if wait > 0:
304 time.sleep(wait)
305 _open.previous = current + wait
306 else:
307 _open.previous = current
308
309 for key, value in params.items():
310 if value is None:
311 del params[key]
312
313 if not "tool" in params:
314 params["tool"] = "biopython"
315
316 if not "email" in params:
317 if email!=None:
318 params["email"] = email
319
320 options = urllib.urlencode(params, doseq=True)
321 cgi += "?" + options
322 handle = urllib.urlopen(cgi)
323
324
325 uhandle = File.UndoHandle(handle)
326
327
328
329 lines = []
330 for i in range(5):
331 lines.append(uhandle.readline())
332 for i in range(4, -1, -1):
333 uhandle.saveline(lines[i])
334 data = ''.join(lines)
335
336 if "500 Proxy Error" in data:
337
338 raise IOError("500 Proxy Error (NCBI busy?)")
339 elif "502 Proxy Error" in data:
340 raise IOError("502 Proxy Error (NCBI busy?)")
341 elif "WWW Error 500 Diagnostic" in data:
342 raise IOError("WWW Error 500 Diagnostic (NCBI busy?)")
343 elif data.startswith("Error:") :
344
345 raise IOError(data.strip())
346 elif data.startswith("The resource is temporarily unavailable") :
347
348
349 raise IOError("The resource is temporarily unavailable")
350 elif data.startswith("download dataset is empty") :
351
352
353 raise IOError("download dataset is empty")
354 elif data[:5] == "ERROR":
355
356
357 raise IOError("ERROR, possibly because id not available?")
358
359 return uhandle
360
361 _open.previous = 0
362