Package Bio :: Package Mindy :: Module BerkeleyDB
[hide private]
[frames] | no frames]

Source Code for Module Bio.Mindy.BerkeleyDB

  1  import os 
  2  from bsddb3 import db 
  3  import Location 
  4  import BaseDB 
  5  import Bio 
  6   
  7  _open = open  # rename for internal use -- gets redefined below 
  8   
  9  INDEX_TYPE = "BerkeleyDB/1" 
 10   
11 -def create(dbname, primary_namespace, secondary_namespaces, 12 formatname = "unknown"):
13 os.mkdir(dbname) 14 config_filename = os.path.join(dbname, "config.dat") 15 BaseDB.write_config(config_filename = config_filename, 16 index_type = INDEX_TYPE, 17 primary_namespace = primary_namespace, 18 secondary_namespaces = secondary_namespaces, 19 fileid_info = {}, 20 formatname = formatname 21 ) 22 23 dbenv = db.DBEnv(0) 24 envflags = db.DB_THREAD | db.DB_INIT_MPOOL 25 dbenv.open(dbname, envflags | db.DB_CREATE) 26 27 primary_table = db.DB(dbenv) 28 primary_table.open("key_%s" % (primary_namespace,), None, 29 db.DB_BTREE, db.DB_CREATE, 0660) 30 31 secondary_tables = {} 32 for namespace in secondary_namespaces: 33 x = db.DB(dbenv) 34 x.open("id_%s" % (namespace,), None, db.DB_BTREE, db.DB_CREATE, 0) 35 secondary_tables[namespace] = x 36 37 for x in secondary_tables.values(): 38 x.close() 39 primary_table.close() 40 dbenv.close() 41 42 return open(dbname, "rw")
43 44
45 -class PrimaryNamespace(BaseDB.DictLookup):
46 - def __init__(self, db, namespace):
47 self.db = db 48 self.namespace = namespace 49 assert namespace == db.primary_namespace
50
51 - def __getitem__(self, name):
52 loc = self.db.primary_table[name] 53 filetag, startpos, length = loc.split("\t") 54 filename = self.db.fileid_info[filetag][0] 55 return [ 56 Location.Location(self.namespace, 57 name, 58 filename, 59 long(startpos), 60 long(length)) 61 ]
62
63 - def keys(self):
64 return self.db.primary_table.keys()
65
66 -class SecondaryNamespace(BaseDB.DictLookup):
67 - def __init__(self, db, namespace):
68 self.db = db 69 self.namespace = namespace 70 assert namespace in db.secondary_namespaces
71
72 - def __getitem__(self, name):
73 table = self.db._load_namespace(self.namespace) 74 text = table.get(name, None) 75 if text is None: 76 raise KeyError("Cannot find %r key %r" % (self.namespace, name)) 77 data = [] 78 for key in text.split("\t"): 79 loc = self.db.primary_table[key] 80 filetag, start, length = loc.split("\t") 81 filename = self.db.fileid_info[filetag][0] 82 data.append(Location.Location(self.namespace, 83 name, 84 filename, 85 long(start), 86 long(length))) 87 88 return data
89
90 - def keys(self):
91 table = self.db._load_namespace(self.namespace) 92 return table.keys()
93
94 -class BerkeleyDB(BaseDB.OpenDB, BaseDB.WriteDB):
95 - def __init__(self, dbname, mode = "r"):
96 if mode not in ("r", "rw"): 97 raise TypeError("Unknown mode: %r" % (mode,)) 98 self.__need_flush = 0 99 BaseDB.OpenDB.__init__(self, dbname, INDEX_TYPE) 100 101 self.dbenv = None 102 dbenv = db.DBEnv() 103 envflags = db.DB_THREAD | db.DB_INIT_MPOOL 104 dbenv.open(dbname, envflags) 105 if mode == "r": 106 self._dbopen_flags = db.DB_RDONLY 107 else: 108 self._dbopen_flags = 0 109 110 self.primary_table = db.DB(dbenv) 111 self.primary_table.open("key_%s" % (self.primary_namespace,), 112 None, 113 db.DB_BTREE, self._dbopen_flags, 0660) 114 115 self.secondary_tables = {} 116 self.dbenv = dbenv
117
118 - def _load_namespace(self, namespace):
119 dbname = "id_%s" % namespace 120 # Get the appropriate lookup table 121 if not self.secondary_tables.has_key(namespace): 122 # Nope, so load it up 123 self.secondary_tables[namespace] = db.DB(self.dbenv) 124 self.secondary_tables[namespace].open(dbname, None, 125 db.DB_BTREE, 126 self._dbopen_flags, 0) 127 return self.secondary_tables[namespace]
128 129
130 - def add_record(self, filetag, startpos, length, table):
131 key_list = table[self.primary_namespace] 132 if len(key_list) != 1: 133 raise TypeError( 134 "Field %s has %d entries but must have only one " 135 "(must be unique)" % (repr(self.primary_namespace), 136 len(key_list))) 137 key = key_list[0] 138 if self.primary_table.has_key(key): 139 raise TypeError("Field %r = %r already exists" % 140 (self.primary_namespace, key)) 141 self.primary_table[key] = "%s\t%s\t%s" % (filetag, 142 BaseDB._int_str(startpos), 143 BaseDB._int_str(length)) 144 145 for namespace in self.secondary_namespaces: 146 lookup = self._load_namespace(namespace) 147 # Get the list of secondary identifiers for this identifier 148 for val in table.get(namespace, ()): 149 # Go from secondary identifier to list of primary identifiers 150 if lookup.has_key(val): 151 lookup[val] = lookup[val] + "\t" + key 152 else: 153 lookup[val] = key 154 self.__need_flush = 1
155
156 - def flush(self):
157 if not self.__need_flush: 158 return 159 config_filename = os.path.join(self.dbname, "config.dat") 160 BaseDB.write_config(config_filename = config_filename, 161 index_type = INDEX_TYPE, 162 primary_namespace = self.primary_namespace, 163 secondary_namespaces = 164 self.secondary_tables.keys(), 165 fileid_info = self.fileid_info, 166 formatname = self.formatname, 167 ) 168 self.__need_flush = 0
169
170 - def close(self):
171 self.flush() 172 self.primary_table.close() 173 [x.close() for x in self.secondary_tables.values()] 174 self.dbenv.close() 175 self.dbenv = self.primary_table = self.fileid_info = \ 176 self.secondary_tables = self.fileid_info = None
177
178 - def __del__(self):
179 if self.dbenv is not None: 180 self.close()
181
182 - def __getitem__(self, key):
183 if key not in self.keys(): 184 raise KeyError(key) 185 if key == self.primary_namespace: 186 return PrimaryNamespace(self, key) 187 else: 188 return SecondaryNamespace(self, key)
189 190 open = BerkeleyDB 191