Package Bio :: Package EUtils :: Module Datatypes
[hide private]
[frames] | no frames]

Source Code for Module Bio.EUtils.Datatypes

  1  """Various EUtils datatypes.""" 
  2   
  3  import re, types 
  4   
5 -class EUtilsError(Exception):
6 """Base class for all EUtils-specific errors 7 8 Contains a single error string -- use str(err) to get it. 9 """ 10 pass
11
12 -class EUtilsSearchError(EUtilsError):
13 """Used when the ESearch XML says there is an ERROR 14 15 The main error is in err.errmsg but more information 16 may be available in err.errors or err.warnings. Eg, 17 the error message is often "Can't run executor" but 18 you can get more information from the list of errors. 19 20 """
21 - def __init__(self, errmsg, errors = None, warnings = None):
22 EUtilsError.__init__(self, errmsg) 23 24 if errors is None: errors = [] 25 if warnings is None: warnings = [] 26 27 self.errmsg = errmsg 28 self.errors = errors 29 self.warnings = warnings
30 - def __repr__(self):
31 return "%s(%r, %r, %r)" % (self.__class__.__name__, 32 self.errmsg, self.errors, self.warnings)
33 - def __str__(self):
34 s = self.errmsg 35 if self.errors: 36 s = s + "; ERRORS: " + ", ".join(map(str, self.errors)) 37 if self.warnings: 38 s = s + "; WARNINGS: " + ", ".join(map(str, self.warnings)) 39 return s.encode("latin1")
40 41 42 43 ####################################
44 -class DBIds:
45 """Store a list of identifiers for a database 46 47 This is used as input for the '*_using_dbids' functions. 48 49 Constructed with the database name and list of identifier strings. 50 51 """
52 - def __init__(self, db, ids):
53 """db, ids 54 55 'db' -- the database for those identifiers 56 'ids' -- a list of identifiers for the given database 57 """ 58 self.db = db 59 self.ids = ids
60 - def __len__(self):
61 """number of identifers""" 62 return len(self.ids)
63 - def __getitem__(self, i):
64 """get an identifier or a subset of the DBIds""" 65 if isinstance(i, types.SliceType): 66 # XXX Python 2.3 fixes this, I think 67 # Either that, or I'm doing something wrong? 68 step = i.step 69 start = i.start 70 if start is None: start = 0 71 stop = i.stop 72 if stop is None: stop = len(self.ids) 73 if step is None: 74 return self.__class__(self.db, self.ids[start:stop]) 75 else: 76 return self.__class__(self.db, self.ids[start:stop:step]) 77 # XXX Should this return a DBIds as well? Because of this, I nee 78 # the 'item' method 79 return self.ids[i]
80 - def item(self, i):
81 """Get a DBIds containing the item at position i 82 83 Can't use dbids[i] since that returns only the identifier. 84 This returns a DBIds, which can be used for another request. 85 """ 86 return self.__class__(self.db, [self.ids[i]])
87
88 - def __iter__(self):
89 """Iterate over the list of identifiers""" 90 return iter(self.ids)
91 - def __repr__(self):
92 return "DBIds(%r, %r)" % (self.db, self.ids)
93 - def __eq__(self, other):
94 """does this DBIds equal the other? 95 96 The database names must match, but the identifiers 97 themselves can be in any order. 98 """ 99 if self.ids == other.ids: 100 return self.db == other.db 101 if self.db != other.db: 102 return 0 103 # Could be in a different order, and there may be non-unique 104 # keys. XXX use a sets.Set from Python 2.3? But then 105 # there won't be a simple mapping from id to efetch results. 106 d1 = {} 107 for x in self.ids: 108 d1[x] = 0 109 d2 = {} 110 for x in other.ids: 111 d2[x] = 0 112 return d1 == d2
113 - def __ne__(self, other):
114 """check if this isn't equal to the other DBIds""" 115 return not self == other
116
117 - def __sub__(self, other):
118 """DBIds of the identifiers in this set which aren't in the other""" 119 if self.db != other.db: 120 raise TypeError("Different databases: %r and %r" % ( 121 self.db, other.db)) 122 other_d = {} 123 for x in other.ids: 124 other_d[x] = 0 125 new_ids = [x for x in self.ids if x not in other_d] 126 return DBIds(self.db, new_ids)
127
128 -class WithinNDays:
129 """Restrict a search to matches in the last N days 130 131 Eg, to see what's been published in PubMed about rabies 132 in the last 20 days. 133 134 client.search("rabies", daterange = WithinNDays(20, "pdat") 135 """
136 - def __init__(self, ndays, datetype = None):
137 """ndays, datetype = None 138 139 'ndays' -- within this many days of now (the 'reldate' field 140 of a search) 141 'datetype' -- the date field to use (defaults to Entrez date, 142 which is "edat") 143 """ 144 self.ndays = ndays 145 self.datetype = datetype
146 - def get_query_params(self):
147 """returns the fields to add to the EUtils query 148 149 This is an internal implementation feature you can ignore. 150 """ 151 return {"reldate": self.ndays, 152 "datetype": self.datetype}
153 154 # Could actually check the month and day fields... 155 _date_re_match = re.compile(r"\d{4}(/\d\d(/\d\d)?)?$").match 156
157 -class DateRange:
158 """Restrict a search to matches within a date range 159 160 Some examples: 161 matches between 1995 and 2000 -- DateRange("1995", "1999/12/31") 162 matches before 1990 -- DateRange(maxdate = "1990/01/01") 163 matches in 2002 or later -- DateRange(mindate = "2002/01/01") 164 matches in June or July of 2001 -- DateRange("2001/06", "2001/07") 165 166 """
167 - def __init__(self, mindate = None, maxdate = None, datetype = None):
168 """mindate = None, maxdate = None, datetype = None 169 170 'mindate' -- matches must be on or after this date 171 'maxdate' -- matches must be on or before this date 172 'datetype' -- the date field to use for the search (defaults 173 to Entrez date, which is "edat") 174 175 At least one of mindate or maxdate must be specified. 176 If mindate is omitted, all results on or before maxdate are returned. 177 If maxdate is omitted, all results on or after mindate are returned. 178 179 Dates must be formatted as 'YYYY/MM/DD', 'YYYY/MM', or 'YYYY'. 180 """ 181 if mindate is None and maxdate is None: 182 raise TypeError("Must specify at least one of mindate or maxdate") 183 184 errinfo = None 185 if mindate is not None and _date_re_match(mindate) is None: 186 errinfo = ("mindate", mindate) 187 elif maxdate is not None and _date_re_match(maxdate) is None: 188 errinfo = ("maxdate", maxdate) 189 if errinfo: 190 raise TypeError( 191 "%s is not in YYYY/MM/DD format (month and " 192 "day are optional): %r" % errinfo) 193 self.mindate = mindate 194 self.maxdate = maxdate 195 self.datetype = datetype
196
197 - def get_query_params(self):
198 """returns the fields to add to the EUtils query 199 200 This is an internal implementation feature you can ignore. 201 """ 202 return {"mindate": str(self.mindate), 203 "maxdate": str(self.maxdate), 204 "datetype": self.datetype}
205 206 #################################### 207
208 -class Expression:
209 """Base class for the Expression given in the eSearch output 210 211 NCBI does some processing on the request. They return the 212 translated expression as part of the search results. To get the 213 expression as an Entrez string, use str(expression). 214 215 iter(expression) traverses the expression tree in postfix order. 216 """
217 - def __and__(self, other):
218 """intersection of two expressions""" 219 return And(self, other)
220 - def __or__(self, other):
221 """union of two expressions""" 222 return Or(self, other)
223 - def __iter__(self):
224 """Traverse the tree in postfix order""" 225 raise NotImplementedError
226
227 -class Term(Expression):
228 """Information about an Expression Term, which is the leaf node 229 230 The fields are: 231 term -- a word from the search term 232 field -- the field searched by this term 233 count -- the number of records matching this word 234 explode -- no idea 235 """
236 - def __init__(self, term, field, count, explode):
237 self.term = term 238 self.field = field 239 self.count = count 240 self.explode = explode
241 - def __str__(self):
242 return self.term
243 - def __iter__(self):
244 """Traverse the tree in postfix order""" 245 yield self
246
247 -class BinaryOp(Expression):
248 """Base class for binary expressions. Has a left and a right child"""
249 - def __init__(self, left, right):
250 self.left = left 251 self.right = right
252 - def __iter__(self):
253 """Traverse the tree in postfix order""" 254 for x in self.left: 255 yield x 256 for x in self.right: 257 yield x 258 yield self
259 260 # NCBI processes booleans left to right (no precedence) 261 # I'm not going to worry about using minimal parens, 262 # I'll just always put them around them
263 -class And(BinaryOp):
264 """intersection of two subexpressions"""
265 - def __str__(self):
266 return "(%s AND %s)" % (self.left, self.right)
267
268 -class Or(BinaryOp):
269 """union two subexpressions"""
270 - def __str__(self):
271 return "(%s OR %s)" % (self.left, self.right)
272 273 # NOT and BUTNOT
274 -class Not(BinaryOp):
275 """the set of the left child without elements from the right child 276 277 This is used for something like "poliovirus NOT polio" 278 """
279 - def __str__(self):
280 return "(%s NOT %s)" % (self.left, self.right)
281
282 -class Range(BinaryOp):
283 """Used to store a date range"""
284 - def __init__(self, left, right):
285 if left.field != right.field: 286 raise TypeError("dates must have the same field: %r and %r" % 287 (left.field, right.field)) 288 BinaryOp.__init__(self, left, right)
289
290 - def __str__(self):
291 i = self.left.term.rfind("[") 292 if i == -1: 293 i = len(self.left.term) 294 x = self.left.term[:i] 295 296 i = self.right.term.rfind("[") 297 if i == -1: 298 i = len(self.right.term) 299 y = self.right.term[:i] 300 301 return "%s:%s[%s]" % (x, y, self.left.field)
302 303 ################## 304
305 -class SearchResult:
306 """Store results from a database search 307 308 Attributes are: 309 count -- total number of matches to the query 310 retmax -- total number of identifiers requested 311 retstart -- a search can return a portion of the total 312 number of results. retstart is the offset into this list 313 ids -- matching identifiers (may be a subset of the full list) 314 translation_set -- dict mapping an input name to the canonical 315 form prefered by NCBI 316 expression -- the full equery as understood by NCBI 317 webenv -- the WebEnv string (if use_history is set) 318 query_key -- the query_key (if use_history is set) 319 errors -- list of Problems in the ErrorList 320 warnings -- list of Problems in the WarningList 321 timestamp -- timestamp (from time.time()) when this record 322 was received from the server. 323 324 Returns a list of identifers instead of a DBIds because the output 325 from NCBI's eSearch doesn't include the database name. 326 """
327 - def __init__(self, 328 count, retmax, retstart, ids, 329 translation_set, expression, 330 webenv, query_key, errors, 331 warnings, timestamp):
332 self.count = count 333 self.retmax = retmax 334 self.retstart = retstart 335 self.ids = ids 336 self.translation_set = translation_set 337 self.expression = expression 338 self.webenv = webenv 339 self.query_key = query_key 340 self.errors = errors 341 self.warnings = warnings 342 self.timestamp = timestamp
343
344 -class PostResult:
345 """Store the results of a Post 346 347 Attributes are: 348 webenv -- the WebEnv string 349 query_key -- the query_ket 350 timestamp -- timestamp (from time.time()) when this record 351 was received from the server. 352 """
353 - def __init__(self, webenv, query_key, invalid_ids, timestamp):
354 self.webenv = webenv 355 self.query_key = query_key 356 self.invalid_ids = invalid_ids 357 self.timestamp = timestamp
358
359 -class Summary:
360 """Store information from calling eSummary 361 362 Attributes are: 363 id -- the identifier string for this record 364 dataitems -- an OrderedDictList containing the parsed Item 365 elements for this Summary. 366 """
367 - def __init__(self, id, dataitems):
368 self.id = id 369 self.dataitems = dataitems
370 - def __repr__(self):
371 return "Summary(%r, %r)" % (self.id, self.dataitems)
372 - def __str__(self):
373 return "<Summary id=%s, %s>" % (self.id, self.dataitems)
374 375 # XXX Use the new 'datetime' module when 2.3 is out!
376 -class Date:
377 """Allow simple Date storage 378 379 Parameters and attributes are 'year', 'month', and 'day' 380 """
381 - def __init__(self, year, month, day):
382 self.year = year 383 self.month = month 384 self.day = day
385 - def __repr__(self):
386 return "%s(%r, %r, %r)" % (self.__class__.__name__, 387 self.year, self.month, self.day)
388 - def __str__(self):
389 return "%4d/%02d/%02d" % (self.year, self.month, self.day)
390 - def timetuple(self):
391 """Return the 9-tuple needed by various time functions""" 392 # NOTE: I don't yet deal with the last three fields 393 # (day of week, day of year, isDST) 394 return (self.year, self.month, self.day, 0, 0, 0, 0, 0, -1)
395 - def __eq__(self, other):
396 """Are these two times equal?""" 397 return (self.year == other.year and 398 self.month == other.month and 399 self.day == other.day)
400 - def __ne__(self, other):
401 """Are these two times dissimilar?""" 402 return not self == other
403 404 405 # possible errors from eSearch 406 # <!ELEMENT ErrorList (PhraseNotFound*,FieldNotFound*)> 407 # <!ELEMENT WarningList (PhraseIgnored*, 408 # QuotedPhraseNotFound*, 409 # OutputMessage*)> 410
411 -class Problem:
412 """Base class for Search Errors or Warnings 413 414 A problem has: 415 text -- the text of the problem 416 severity -- either Problem.ERROR or Problem.WARNING 417 category -- how NCBI categorizes this problem 418 """ 419 ERROR = "ERROR" 420 WARNING = "WARNING"
421 - def __init__(self, text):
422 self.text = text
423 - def __eq__(self, other):
424 return (self.text == other.text and 425 self.severity == other.severity and 426 self.category == other.category)
427 - def __ne__(self, other):
428 return not self == other
429 - def __repr__(self):
430 return "%s(%r)" % (self.__class__.__name__, self.text)
431 - def __str__(self):
432 return str(self.text)
433
434 -class ErrorProblem(Problem):
435 severity = Problem.ERROR
436
437 -class WarningProblem(Problem):
438 severity = Problem.WARNING
439
440 -class PhraseNotFound(ErrorProblem):
441 category = "PhraseNotFound"
442
443 -class FieldNotFound(ErrorProblem):
444 severity = Problem.ERROR 445 category = "FieldNotFound"
446
447 -class PhraseIgnored(WarningProblem):
448 category = "PhraseIgnored"
449
450 -class QuotedPhraseNotFound(WarningProblem):
451 category = "QuotedPhraseNotFound"
452
453 -class OutputMessage(WarningProblem):
454 category = "OutputMessage"
455
456 -def _build_problem_mapping():
457 """Internal: make a map from category name (in XML) to the right class""" 458 mapping = {} 459 for v in globals().values(): 460 try: 461 if issubclass(v, Problem) and hasattr(v, "category"): 462 mapping[v.category] = v 463 except TypeError: 464 pass 465 return mapping
466 467 problem_category_mapping = _build_problem_mapping() 468 469 470 # elinks with cmd=="neighbor" 487
488 -class IdCheck:
489 """Store results from an lcheck link 490 491 Attributes are: 492 id -- the id of the requested record 493 has_linkout -- boolean, either it does or doesn't 494 has_neighbor -- boolean, either it does or doesn't 495 """
496 - def __init__(self, id, has_linkout = 0, has_neighbor = 0):
497 self.id = id 498 self.has_linkout = has_linkout 499 self.has_neighbor = has_neighbor
500 - def __eq__(self, other):
501 return (self.id == other.id and 502 self.has_linkout == other.has_linkout and 503 self.has_neighbor == other.has_neighbor)
504 - def __ne__(self, other):
505 return not self == other
506 - def __repr__(self):
507 return "IdCheck(%r, %r, %r)" % (self.id, self.has_linkout, self.has_neighbor)
508
509 -class LinkSetDb(object):
510 """Used in eLink with cmd == neighbor 511 512 Attributes are: 513 dbto -- the links are TO this database name 514 linkname -- the name for this set (eg, "pubmed_protein") 515 links -- list of Links, one per matching record (includes score) 516 List order is the sames as the XML, which is ordered from 517 most likely to least. The identifer is from 'dbto' 518 info -- ignored; this is only used as a warning when there is 519 an empty list 520 521 You can also use 522 dbids -- get a DBIds of dbto and the identifiers in each Link 523 """
524 - def __init__(self, dbto, linkname, links = None, info = None):
525 if links is None: 526 if info is None: 527 raise TypeError("At least one of 'links' and 'info' must be set") 528 links = [] 529 self.dbto = dbto 530 self.linkname = linkname 531 self.links = links
532
533 - def _get_dbids(self):
534 return DBIds(self.dbto, [link.id for link in self.links])
535 dbids = property(_get_dbids) 536
537 - def __eq__(self, other):
538 return (self.dbto == other.dbto and 539 self.linkname == other.linkname and 540 self.links == other.links)
541 - def __ne__(self, other):
542 return not self == other
543 - def __repr__(self):
544 return "LinkSetDb(%r, %r, %r)" % (self.dbto, self.linkname, self.links)
545
546 -class NeighborLinkSet:
547 """Results from an eLink neighbor search 548 549 Attributes are: 550 dbids -- the DBIds of the *REQUESTED* identifiers 551 linksetdbs -- an OrderedMultiDict of LinkSetDb objects 552 553 """
554 - def __init__(self, dbids, linksetdbs):
555 self.dbids = dbids 556 self.linksetdbs = linksetdbs
557 - def __eq__(self, other):
558 return (self.dbids == other.dbids and 559 self.linksetdbs == other.linksetdbs)
560 - def __ne__(self, other):
561 return not self == other
562
563 - def __repr__(self):
564 return "NeighborLinkSet(%r, %r)" % (self.dbids, self.linksetdbs)
565 566 # elinks with cmd in ("ncheck", "lcheck")
567 -class CheckLinkSet(object):
568 """Results from 'ncheck' and 'lcheck' searches 569 570 This is used to check if a set of records has neighbors 571 or links. 572 573 Attributes are: 574 dbfrom -- the database containing those records 575 idchecks -- list of IdCheck objects, one per id 576 577 dbids -- the DBIds make from dbfrom and the idchecks 578 """
579 - def __init__(self, dbfrom, idchecks):
580 self.dbfrom = dbfrom 581 self.idchecks = idchecks
582
583 - def _get_dbids(self):
584 return DBIds(self.dbfrom, [idcheck.id for idcheck in self.idchecks])
585 dbids = property(_get_dbids) 586
587 - def __eq__(self, other):
588 return (self.dbfrom == other.dbfrom and 589 self.idchecks == other.idchecks)
590 - def __ne__(self, other):
591 return not self == other
592 - def __repr__(self):
593 return "CheckLinkSet(%r, %r)" % (self.dbfrom, self.idchecks)
594 595 596 # elinks with cmd == "llinks"
597 -class Provider:
598 """The Provider, as listed in 'llinks' (LinkOut) 599 600 Attributes are: 601 name -- name of the provider 602 name_abbr -- an abbreviated name for the provider 603 id -- a unique id for the provider 604 url -- where to go for more information about the provider 605 icon_url -- a small image to use for the provider 606 607 """
608 - def __init__(self, name, name_abbr, id, 609 url = None, icon_url = None):
610 self.name = name 611 self.name_abbr = name_abbr 612 self.id = id 613 self.url = url 614 self.icon_url = icon_url
615 - def __eq__(self, other):
616 return (self.name == other.name and 617 self.name_abbr == other.name_abbr and 618 self.id == other.id and 619 self.url == other.url and 620 self.icon_url == other.icon_url)
621 - def __ne__(self, other):
622 return not self == other
623 - def __repr__(self):
624 return "Provider(%r, %r, %r, %r, %r)" % ( 625 self.name, self.name_abbr, self.id, self.url, self.icon_url)
626 627
628 -class ObjUrl:
629 """The ObjUrl containing LinkOut information for a record 630 631 Attributes are: 632 subject_types -- list of strings describing this link (0 or more) 633 provider -- a Provider instance 634 linkname -- a name used to categorize this link (optional) 635 attributes -- list of attributes (text strings), (0 or more) 636 url -- URL of the link (optional) 637 iconurl -- URL containing image for this link (optional) 638 """
639 - def __init__(self, subject_types, provider, 640 linkname = None, url = None, attributes = None):
641 assert isinstance(subject_types, list) 642 self.subject_types = subject_types 643 self.provider = provider 644 self.linkname = linkname 645 if attributes is None: 646 attributes = [] 647 self.url = url 648 self.attributes = attributes
649 - def __eq__(self, other):
650 return (self.linkname == other.linkname and 651 self.subject_types == other.subject_types and 652 self.url == other.url and 653 self.attributes == other.attributes and 654 self.provider == other.provider)
655 - def __ne__(self, other):
656 return not self == other
657 - def __repr__(self):
658 return "ObjUrl(%r, %r, %r, %r, %r)" % ( 659 self.subject_types, self.provider, self.linkname, 660 self.url, self.attributes)
661
662 -class IdUrlSet:
663 """Set of ObjUrls for the record with the given 'id'"""
664 - def __init__(self, id, objurls):
665 self.id = id 666 self.objurls = objurls
667 - def __eq__(self, other):
668 return (self.id == other.id and 669 self.objurls == other.objurls)
670 - def __ne__(self, other):
671 return not self == other
672 - def __repr__(self):
673 return "IdUrlSet(%r, %r)" % (self.id, self.objurls)
674
675 -class LinksLinkSet:
676 """Results of an 'llink' (LinkOut) search 677 678 Finds links from records in a given database to external 679 resources. 680 681 Fields are: 682 dbfrom -- the database in which search started 683 idurlset -- a list of IdUrlSet, one for each identifier 684 """ 685
686 - def __init__(self, dbfrom, idurlset):
687 self.dbfrom = dbfrom 688 self.idurlset = idurlset
689 - def __eq__(self, other):
690 return (self.dbfrom == other.dbfrom and 691 self.idurlset == other.idurlset)
692 - def __ne__(self, other):
693 return not self == other
694 - def __repr__(self):
695 return "LinksLinkSet(%r, %r)" % (self.dbfrom, self.idurlset)
696