Source code for bronx.datagrip.datastore

"""
A simplified key/value embedded-database.

See the :class:`DataStore` class docstring for an example.
"""
import collections
import functools
import pickle

#: No automatic export
__all__ = []


class _DataStoreEntryKey:
    """The key of any element stored in a DataStore class."""

    def __init__(self, kind, **kwargs):
        """
        :param object kind: The `kind` of data (must be hashable)
        :param dict kwargs: Any key/value pairs that describe the data (values
            must be hashable)
        """
        self._kind = kind
        self._extras = kwargs
        try:
            self._hash = self._compute_hash()
        except TypeError:
            raise TypeError('kind and extra arguments values must be hashable.')

    def _compute_hash(self):
        """Get the hash values (it's precomputed)."""
        return hash(tuple([self._kind, ] +
                          [(k, v) for k, v in sorted(self._extras.items())]))

    def __getstate__(self):
        return dict(_kind=self._kind, _extras=self._extras)

    def __setstate__(self, state):
        self._kind = state['_kind']
        self._extras = state['_extras']
        self._hash = self._compute_hash()

    def __repr__(self):
        """Return a string representation of the present object."""
        return '<{:s} object | {!s}>'.format(self.__class__.__name__, self)

    def __str__(self):
        """Return a string representation of the present object."""
        extras_str = (' ' + ' '.join(['{!s}={!r}'.format(k, v) for k, v in self])).rstrip()
        return 'kind={!r}{:s}'.format(self.kind, extras_str)

    @property
    def kind(self):
        """The kind of the data."""
        return self._kind

    @property
    def extras(self):
        """Dictionary of key/value pairs that describe the data."""
        return self._extras.copy()  # This way, the user won't mess up things

    def __iter__(self):
        """Iterates through *extras*."""
        yield from self._extras.items()

    def __eq__(self, other):
        """Compare to keys."""
        return self.kind == other.kind and self._extras == other.extras

    def __hash__(self):
        """Return the hash value of the present object."""
        return self._hash

    def __getattr__(self, key):
        """Elements of extra are directly accessible."""
        if key.startswith('_'):
            raise AttributeError('Attribute not found')
        if key in self._extras:
            return self._extras[key]
        else:
            raise AttributeError('Attribute not found')


[docs]class DataStore: """An object that can store any pickable data. It acts like a small key/value database. * Keys are of :class:`_DataStoreEntryKey` class. They contain a mandatory `kind` attribute plus key/value pairs that describe the stored data more precisely. * Various methods are provided to access the entries. * Keys are indexed in order to perform fast searches (see the grep method). Data should always be pickalable so that the DataStore could be dumped to disk using the :meth:`pickle_dump` method. :example: Data should be inserted this way:: ds = DataStore() ds.insert('kind_of_data', dict(key1='meaningful'), 'The data themselves...', readonly=True) ds.insert('kind_of_data', dict(key1='meaningful', key2='breathtaking'), 'More date...', readonly=True) ds.insert('kind_of_data', dict(), 'Another One', readonly=True) It could later be accessed:: data = ds.get('kind_of_data', dict(key1='meaningful', key2='breathtaking')) print data More date... A search can be performed:: dict_of_results = ds.grep('kind_of_data', dict(key1='meaningful')) print dict_of_results {<_DataStoreEntryKey object | kind='kind_of_data' key1='meaningful' key2='breathtaking'>: 'More date...', <_DataStoreEntryKey object | kind='kind_of_data' key1='meaningful'>: 'The data themselves...'} Finally the DataStore can be dumped/loaded to/from disk:: ds.pickle_dump() another_ds = DataStore() another_ds.pickle_load() """ _PICKLE_PROTOCOL = pickle.HIGHEST_PROTOCOL def __init__(self, default_picklefile='datastore.pickled'): """ :param str default_picklefile: default name for the pickle dump file """ self._pickle_dumpfile = default_picklefile self._reset_internal_state() def _reset_internal_state(self): self._store = dict() self._lock = dict() self._index = collections.defaultdict(functools.partial(collections.defaultdict, set)) def _index_update(self, key): self._index['kind'][key.kind].add(key) for k, v in key: self._index[k][v].add(key) def _index_remove(self, key): self._index['kind'][key.kind].remove(key) for k, v in key: self._index[k][v].remove(key) def _build_key(self, kind, extras): if not isinstance(extras, dict): raise ValueError("The 'extras' needs to be dictionary of hashables.") return _DataStoreEntryKey(kind, **extras)
[docs] def insert(self, kind, extras, payload, readonly=True): """Insert a new ``payload`` data in the current DataStore. :param object kind: The kind of the ``payload`` data :param dict extras: Any key/value pairs that describe the ``payload`` data :param object payload: The data that will be stored :param bool readonly: Is the data readonly ? """ key = self._build_key(kind, extras) if key in self._store and self._lock[key]: raise RuntimeError("This entry already exists and is read-only.") self._index_update(key) self._store[key] = payload self._lock[key] = readonly return payload
[docs] def check(self, kind, extras): """Check if a data described by ``kind`` and ``extras`` exists in this DataStore. :param object kind: The kind of the expected data :param dict extras: Any key/value pairs that describe the expected data """ key = self._build_key(kind, extras) return key in self._store
[docs] def get(self, kind, extras, default_payload=None, readonly=True): """Retrieve data from the current DataStore. if the desired data is missing and ``default_payload`` is not `None`, a new entry is added to the DataStore using the ``default_payload`` and ``readonly`` arguments. :param object kind: The kind of the expected data :param dict extras: Any key/value pairs that describe the expected data :param object default_payload: Default data that may be stored and returned :param bool readonly: Is the default data readonly ? """ key = self._build_key(kind, extras) try: return self._store[key] except KeyError: if default_payload is None: raise KeyError("No corresponding entry was found in the DataStore for {!r}". format(key)) else: self.insert(kind, extras, default_payload, readonly=readonly) return self._store[key]
[docs] def delete(self, kind, extras, force=False): """Delete data from the current DataStore. :param object kind: The kind of the expected data :param dict extras: Any key/value pairs that describe the expected data """ key = self._build_key(kind, extras) if not self._lock[key] or force: self._index_remove(key) del self._store[key] del self._lock[key] else: raise RuntimeError("This entry already exists and is read-only.")
[docs] def grep(self, kind, extras): """Search for items that matches both ``kind`` and ``extras``. :note: When matching ``extras``, supernumerary attributes are ignored (e.g. ``extras=dict(a=1)`` will match ``dict(a=1, b=2)``) :param object kind: The kind of the expected data :param dict extras: Any key/value pairs that describe the expected data """ if not isinstance(extras, dict): raise ValueError("The 'extras' needs to be dictionary of hashables.") result = self._index['kind'][kind].copy() for k, v in extras.items(): result &= self._index[k][v] return {k: self._store[k] for k in result}
[docs] def grep_delete(self, kind, extras, force=False): """Search for items that matches both ``kind`` and ``extras`` and delete them. The dictionary of the removed key/data is returned. :note: When matching ``extras``, supernumerary attributes are ignored (e.g. ``extras=dict(a=1)`` will match ``dict(a=1, b=2)``) :param object kind: The kind of the expected data :param dict extras: Any key/value pairs that describe the expected data """ grep = self.grep(kind, extras) for k in grep.keys(): if not self._lock[k] or force: self._index_remove(k) del self._store[k] del self._lock[k] else: raise RuntimeError("This entry already exists and is read-only.") return grep
[docs] def pickle_dump(self, dumpfile=None): """Pickle the content of the current DataStore and write it to disk. :param str dumpfile: Path to the dump file (if `None`, the default provided at the object creation time is used). """ thefile = dumpfile or self._pickle_dumpfile with open(thefile, 'wb') as pfh: pickle.dump((self._store, self._lock), pfh, protocol=self._PICKLE_PROTOCOL)
[docs] def pickle_load(self, dumpfile=None): """Read a pickle dump file from disk and refill the current DataStore. :param str dumpfile: Path to the dump file (if `None`, the default provided at the object creation time is used). """ # Get the pickle file contents thefile = dumpfile or self._pickle_dumpfile with open(thefile, 'rb') as pfh: unpickled = pickle.load(pfh) # Build the new store dictionary newstore = dict() for k, v in unpickled[0].items(): if k in self._store and hasattr(self._store[k], 'datastore_inplace_overwrite'): # In some particular cases, we want the an existing object to # reset itself. I guess we could call that an inplace overwrite self._store[k].datastore_inplace_overwrite(v) newstore[k] = self._store[k] else: newstore[k] = v # Update internals and rebuild the index self._reset_internal_state() self._store = newstore self._lock = unpickled[1] for k in self._store.keys(): self._index_update(k)
[docs] def keys(self): """Return the list of available keys in this DataStore.""" return self._store.keys()
def __iter__(self): """Iterate over the DataStore's items.""" yield from self._store.items() def __len__(self): """The number of entries in the present DataStore.""" return len(self._store) def __repr__(self): """A condensed string representation of the present DataStore.""" return '<{:s} object at {!s} | {:d} items>'.format(self.__class__.__name__, hex(id(self)), len(self)) def __str__(self): """A condensed string representation of the present DataStore.""" outstr = '' for k, v in self: outstr += '{:10s} key : {!s}\n'.format('read-only' if self._lock[k] else 'read-write', k) outstr += '{:10s} value: {!r}\n'.format('', v) return outstr