Package Bio :: Module DBXRef
[hide private]
[frames] | no frames]

Source Code for Module Bio.DBXRef

  1  # This is a Python module. 
  2  """This module is DEPRECATED. 
  3   
  4  DBXref was used in building SeqRecords from Martel descriptions (see 
  5  Bio.builders.SeqRecord.sequence for more details). 
  6   
  7  Andrew Dalke is no longer maintaining Martel or Bio.Mindy, and these modules 
  8  and associate ones like Bio.DBXRef are now deprecated.  They are no longer 
  9  used in any of the current Biopython parsers, and are likely to be removed 
 10  in a future release. 
 11  """ 
 12   
 13  import warnings 
 14  warnings.warn("Martel and those parts of Biopython closely linked to it" \ 
 15                +" (such as Bio.DBXRef which is not used elsewhere) are now" \ 
 16                +" deprecated, and will be removed in a future release of"\ 
 17                +" Biopython.  If you want to continue to use this code,"\ 
 18                +" please get in contact with the Biopython developers via"\ 
 19                +" the mailing lists to avoid its permanent removal from"\ 
 20                +" Biopython.", \ 
 21                DeprecationWarning) 
 22   
23 -class DBXRef:
24 - def __init__(self, dbname, dbid, reftype = None, negate = 0):
25 self.dbname = dbname 26 self.dbid = dbid 27 self.reftype = reftype 28 self.negate = negate
29
30 - def __str__(self):
31 if self.reftype is None: 32 reftype = "" 33 else: 34 reftype = self.reftype + "=" 35 s = "%s/%s%s" % (self.dbname, reftype, self.dbid) 36 if self.negate: 37 s = "not(%s)" % s 38 return s
39 __repr__ = __str__
40
41 -class BioformatDBName:
42 - def __getitem__(self, name):
43 return name
44 -class UnknownDBName:
45 - def __getitem__(self, name):
46 return "x-unknown-" + name.lower()
47 48 dbname_conversions = { 49 "bioformat": BioformatDBName(), 50 "unknown": UnknownDBName(), 51 "sp": {"AARHUS/GHENT-2DPAGE": "x-aarhus-ghent-2dpage", 52 "CARBBANK": "x-carbbank", 53 "DICTYDB": "x-dictydb", 54 "ECO2DBASE": "x-eco2dbase", 55 "ECOGENE": "x-ecogene", 56 "EMBL": "embl", # EMBL (in GO) 57 "FLYBASE": "fb", # Flybase (in GO) 58 "GCRDB": "x-gcrdb", 59 "HIV": "x-hiv", 60 "HSC-2DPAGE": "x-hsc", 61 "HSSP": "x-hssp", 62 "MAIZE-2DPAGE": "x-maize", 63 "MAIZEDB": "x-maizedb", 64 "MENDEL": "x-mendel", 65 "MGD": "mgd", # (in GO) 66 "MIM": "x-mim", 67 "PDB": "x-pdb", # Protein Data Bank 68 "PFAM": "x-pfam", 69 "PIR": "pir", # GO 70 "PROSITE": "x-prosite", 71 "REBASE": "x-rebase", 72 "SGD": "sgd", # GO 73 "STYGENE": "x-stygene", 74 "SUBTILIST": "x-subtilist", 75 "SWISS-2DPAGE": "x-swiss", 76 "TIGR": "tigr", # GO 77 "TRANSFAC": "x-transfac", 78 "WORMPEP": "x-wormpep", 79 "YEPD": "x-yepd", 80 "ZFIN": "x-zfin", 81 }, 82 "go": {"CGEN": "cgen", # Compugen, Inc. 83 "DDB": "ddb", # DictyBase (Dictyostelium discoideum) 84 "DDBJ": "ddbj", # DNA Database of Japan 85 "EC": "ec", # Enzyme Commission 86 "EMBL": "embl", # EMBL Nucleotide Sequence Data Library 87 "ENSEMBL": "ensembl", # ENSEMBL 88 "ENZYME": "enzyme", # ENZYME 89 "FB": "fb", # FlyBase 90 "GB": "gb", # GenBank 91 "GO": "go", # Gene Ontology 92 "GXD": "gxd", # Gene Expression Database (mouse) 93 "IPR": "ipr", # InterPro 94 "ISBN": "isbn", # International Standard Book Number 95 "IUBMB": "iubmb", # International Union of Biochemistry 96 # and Molecular Biology 97 "IUPAC": "iupac", # International Union of Pure and Applied 98 # Chemistry 99 "MEDLINE": "medline", # MEDLINE 100 "MGD": "mgd", # Mouse Genome Database 101 "MGI": "mgi", # Mouse Genome Informatics 102 "NC-IUBMB": "NC-IUBMB", 103 # Nomenclature Committee of the International 104 # Union of Biochemistry and Molecular Biology 105 "PIR": "pir", # PIR 106 "PMID": "pmid", # PubMed 107 "Pombase": "pombase", # Schizosaccharomyces pombe 108 "Pompep": "pompep", # Schizosaccharomyces pombe Protein 109 # Sequence Database 110 "RESID": "resid", # RESID (protein post-translational modifications) 111 "SGD": "sgd", # Saccharomyces Genome Database 112 "SP": "sp", # SWISS-PROT 113 "SWALL": "swall", # SWISS-PROT + TrEMBL + TrEMBLnew 114 "TAIR": "tair", # The Arabidopsis Information Resource 115 "taxonID": "taxonid", # Taxonomy ID 116 "TC": "tc", # Transport Commission 117 "TIGR": "tigr", # The Institute of Genome Research 118 "TR": "tr", # TrEMBL 119 "WB": "wb", # WormBase (Caenorhabditis elegans) 120 }, 121 # http://www.ncbi.nlm.nih.gov/collab/db_xref.html 122 "genbank": { 123 "ATCC": "x-atcc", # American Type Culture Collection database 124 # /db_xref="ATCC:123456" 125 "ATCC(in host)": "x-atcc-host", # See above 126 "ATCC(dna)": "x-atcc-dna", # See above 127 128 "BDGP_EST": "x-bdgp-est", # Berkeley Drosophila Genome Project 129 # EST database 130 # /db_xref="BDGP_EST:123456" 131 132 "BDGP_INS": "x-bdgp-ins", # Berkeley Drosophila Genome Project 133 # database -- Insertion 134 # /db_xref="BDGP_INS:123456" 135 136 137 "dbEST": "x-dbest", # EST database maintained at the NCBI. 138 # /db_xref="dbEST:123456" 139 140 "dbSNP": "x-dbsnp", # Variation database maintained at the NCBI. 141 # /db_xref="dbSNP:4647" 142 143 "dbSTS": "x-dbsts", # STS database maintained at the NCBI. 144 # /db_xref="dbSTS:456789" 145 146 "ENSEMBL": "ensembl", # Database of automatically annotated genomic data 147 # /db_xref="ENSEMBL:HUMAN-Clone-AC005612" 148 # /db_xref="ENSEMBL:HUMAN-Gene-ENSG00000007102" 149 150 "ESTLIB": "x-estlib", # EBI's EST library identifier #' 151 # /db_xref="ESTLIB:1200" 152 153 "FANTOM_DB": "x-fantom-db", # Database of Functional Annotation of Mouse 154 # /db_xref="FANTOM_DB:0610005A07" 155 156 "FLYBASE": "fb", # Database of Genetic and molecular data of Drosophila. 157 # /db_xref="FLYBASE:FBgn0000024" 158 159 "GDB": "x-gdb", # Human Genome Database accession numbers. 160 # /db_xref="GDB:G00-128-600" 161 162 "GI": "x-gi", # GenInfo identifier, used as a unique sequence 163 # identifier for nucleotide and proteins. 164 # /db_xref="GI:1234567890" 165 166 "GO": "go", # Gene Ontology Database identifier 167 # /db_xref="GO:123" 168 169 "IMGT/LIGM": "x-imgt-ligm", # Immunogenetics database, immunoglobulins 170 # and T-cell receptors 171 # /db_xref="IMGT/LIGM:U03895" 172 173 "IMGT/HLA": "x-imgt-hla", # Immunogenetics database, human MHC 174 # /db_xref="IMGT/HLA:HLA00031" 175 176 177 "LocusID": "x-locus-id", # NCBI LocusLink ID. 178 # /db_xref="LocusID:51199" 179 180 "MaizeDB": "x-maizedb", # Maize Genome Database unique identifiers. 181 # /db_xref="MaizeDB:Probe/79847" 182 183 "MGD": "mgd", # Mouse Genome Database accession numbers. 184 # /db_xref="MGD:123456" 185 186 "MGI": "mgi", # Medicago Genome Initiative 187 # /db_xref="MGI:S:20819" 188 189 "MIM": "x-mim", # Mendelian Inheritance in Man numbers. 190 # /db_xref="MIM:123456" 191 192 "niaEST": "x-niaEST", # NIA Mouse cDNA Project 193 # /db_xref="niaEST:L0304H12-3" 194 195 "PIR": "pir", # Protein Information Resource accession numbers. 196 # /db_xref="PIR:S12345" 197 198 "PSEUDO": "x-pseudo-embl", # EMBL pseudo protein identifier 199 # /db_xref="PSEUDO:CAC44644.1" 200 201 "RATMAP": "x-ratmap", # Rat Genome Database 202 # /db_xref="RATMAP:5" 203 204 "RiceGenes": "x-ricegenes", # Rice database accession numbers. 205 # /db_xref="RiceGenes:AA231856" 206 207 "REMTREMBL": "x-remtrembl", 208 # Computer-annotated protein sequence database containing 209 # the translations of those codings sequences (CDS) present 210 # in the EMBL Nucleotide Sequence Database that won't be ' 211 # included in SWISS-PROT. These include: immunoglobulins and 212 # T-cell receptors, synthetic sequences, patent application 213 # sequences, small fragments, CDS not coding for real 214 # proteins and truncated proteins. 215 # example: /db_xref="REMTREMBL:CAC01666" 216 217 "RZPD": "x-rzpd", # Resource Centre Primary Database Clone Identifiers 218 # /db_xref="RZPD:IMAGp998I142450Q6" 219 220 "SGD": "sgd", # Saccharomyces Genome Database accession numbers. 221 # /db_xref="SGD:L0000470" 222 223 "SoyBase": "x-soybase", # Glycine max Genome Database 224 # /db_xref="SoyBase:Satt005" 225 226 "SPTREMBL": "x-sptrembl", # is this the same as "swall" ? 227 # Computer-annotated protein sequence database 228 # supplementing SWISS-PROT and containing the 229 # translations of all coding sequences (CDS) 230 # present in the EMBL Nucleotide Sequence 231 # Database not yet integrated in SWISS-PROT. 232 # /db_xref="SPTREMBL:Q00177" 233 234 "SWISS-PROT": "sp", # Swiss-Prot protein database accession numbers. 235 # /db_xref="SWISS-PROT:P12345" 236 237 "taxon": "taxonid", # NCBI taxonomic identifier. 238 # /db_xref="taxon:4932" 239 }, 240 } 241
242 -def from_parser(dbname_style, dbname, idtype, dbid, negate):
243 try: 244 dbname = dbname_conversions[dbname_style][dbname] 245 except KeyError: 246 dbname = "x-unknown2-%s--%s" % (dbname_style, dbname) 247 return DBXRef(dbname, dbid, idtype, negate)
248