Source code for privileged_residues.table

import h5py
import numpy as np
import pandas

[docs]class GenericTable: """Indexed key-value store implementation. Best used on large datasets that do not fit into memory. """ def __init__(self, dbpath): """ Args: dbpath (str): Path to HDF5 database. """ self._table = h5py.File(dbpath, "r") labels = [ ] def init_labels(name, item): nonlocal labels if (isinstance(item, h5py.Dataset)): labels.append(name) self._table.visititems(init_labels) self._labels = labels self._indices = { } def __getitem__(self, key): """ Parameters ---------- key : np.uint64 or tuple(np.uint64, str) Either a hash value or a tuple containing a hash and a named group to search in. Returns ------- np.ndarray Concatenated list of matches for a hash (and group) query. """ return self.fetch(*key) if isinstance(key, tuple) else self.fetch(key)
[docs] def fetch(self, key, findgroup = ""): """ Parameters ---------- key : np.uint64 A hash value. findgroup : str, optional A named group to search for hashes in. Defaults to "", which searches in all named groups. Returns ------- np.ndarray Concatenated list of matches for a hash and group query. """ data = [ ] for label in self._labels: if (not findgroup or findgroup in label): dataset = self._table[label] if (label not in self._indices): self._indices[label] = pandas.Index(dataset[dataset.dtype.names[0]]) index = self._indices[label] if (key in index): results = index.get_loc(key) data.extend(dataset[results]) return np.array(data)
def __iter__(self): """ Yields ------ np.ndarray A record from the database. """ for label in self._labels: yield from self._table[label] def __len__(self): """ Returns ------- int The total number of records in the database. """ return sum([len(self._table[label]) for label in self._labels])