# -*- coding: utf-8 -*-
"""Utility class to read VarBC files.
The :class:`VarbcFile` should be used to read VarBC files (see its documentation
below).
"""
from __future__ import print_function, absolute_import, unicode_literals, division
import six
from collections import namedtuple, OrderedDict
from contextlib import contextmanager
import numpy as np
import re
from bronx.compat.moves import collections_abc
from bronx.datagrip import varbcheaders
#: No automatic export
__all__ = []
class _VarbcEntryTypeDescriptor(object):
"""Handle to an object's data of a specific type."""
def __init__(self, attr, objtype, doc='Undocumented footprint attribute'):
"""
Ensures a proper conversion to the **objtype** type for the _**attr**
attribute.
"""
self._attr = attr
self._objtype = objtype
self.__doc__ = doc
def __get__(self, obj, objtype=None): # @UnusedVariable
return getattr(obj, '_' + self._attr)
def __set__(self, obj, value):
setattr(obj, '_' + self._attr, self._objtype(value))
class _VarbcEntryNumpyDescriptor(_VarbcEntryTypeDescriptor):
"""Handle access to an object's data contained in NumPy arrays."""
def __set__(self, obj, value):
if isinstance(value, six.string_types):
value = [self._objtype(st) for st in value.split()]
setattr(obj, '_' + self._attr, np.array(value, dtype=self._objtype))
[docs]class VarbcEntry(object):
"""One entry of a VarBC file.
The comparison operator ``==`` is available between objects of this class.
"""
def __init__(self):
"""No arguments have to be provided at creation time."""
self._type = ''
self._key = ''
self._ix = None
self._ndata = -9999
self._npred = 0
self._predcs = np.array((), dtype=np.uint8)
self._params = np.array((), dtype=np.float32)
type = _VarbcEntryTypeDescriptor("type", str, "The observation type.")
key = _VarbcEntryTypeDescriptor("key", str, "The entry key.")
ix = _VarbcEntryTypeDescriptor("ix", int, "The entry identifier.")
ndata = _VarbcEntryTypeDescriptor("ndata", int, "Number of data.")
npred = _VarbcEntryTypeDescriptor("npred", int, "Number of predictors.")
predcs = _VarbcEntryNumpyDescriptor("predcs", np.uint8, "Predictors NumPy array.")
params = _VarbcEntryNumpyDescriptor("params", np.float32, "Coefficients NumPy array.")
def __repr__(self):
return ('{0:s}(type: {1.type:s}, ix={1.ix:d}, key={1.key:s}, ndata={1.ndata:d}, npred={1.npred:d})'
.format(self.__class__.__name__, self))
def __str__(self):
return ('{!r}\n preds = {:s}\n params= {:s}'
.format(self,
' '.join(['{:7d}'.format(n) for n in self.predcs]),
' '.join(['{:7.3f}'.format(x) for x in self.params])))
def __eq__(self, other):
if not isinstance(other, VarbcEntry):
return False
return (self.key == other.key and self.type == other.type and
self.ndata == other.ndata and self.npred == other.npred and
np.alltrue(self.predcs == other.predcs) and
np.alltrue(self.params == other.params))
def __ne__(self, other):
return not self == other
[docs] def valid(self):
"""Check if all the mandatory fields are properly set up."""
return (len(self.predcs) == self.npred and
len(self.params) == self.npred and
self.key and self.type)
#: Holds the **regex** associated with a given **element** attribute of a
#: :class:`VarbcEntry` object.
_VarbcMatchElement = namedtuple('_VarbcMatchElement', ('element', 'regex'))
class _VarbcMatchTool(object):
"""Object that uses regular expressions to parse varbc entry."""
def __init__(self, matches, stack):
"""
:param matches: The list of :class:`_VarbcMatchElement` to look for
:param stack: The list where new :class:`ObsVarbcEntry` will be appended
"""
self._matches = matches
self._stack = stack
self._cur_entry = None
self._imatch = 0
def save_entry(self):
"""Save the current entry into the **stack**."""
if self._cur_entry:
if self._cur_entry.valid():
if self._cur_entry.ix != len(self._stack) + 1:
raise ValueError("Entry numbering inconsistency: {!s}"
.format(self._cur_entry))
self._stack.append(self._cur_entry)
self._cur_entry = None
else:
raise ValueError("Incomplete entry encountered: {!s}"
.format(self._cur_entry))
def __call__(self, line):
"""Process a single VarBC line."""
# New entry starting ?
firstmatch = self._matches[0].regex.match(line)
if firstmatch:
self.save_entry() # Save a previous one
self._cur_entry = VarbcEntry() # Create a new entry for the new run
setattr(self._cur_entry, self._matches[0].element, firstmatch.group(1))
self._imatch = 1
elif self._imatch > 0:
# It's not a first line, look for the next expected regex
curmatch = self._matches[self._imatch].regex.match(line)
if curmatch:
setattr(self._cur_entry, self._matches[self._imatch].element, curmatch.group(1))
self._imatch = (self._imatch + 1) % len(self._matches)
class _VarbcMatchList(object):
"""Object that uses regular expressions to parse varbc entry."""
def __init__(self, matches):
"""
:param matches: The list of :class:`_VarbcMatchElement` to look for
"""
self._matches = matches
@contextmanager
def autorecord(self, entrystatck):
"""Return a :class:`_VarbcMatchTool` that will be able to parse VarBC lines."""
mt = _VarbcMatchTool(self._matches, entrystatck)
yield mt
mt.save_entry()
[docs]class VarbcFile(collections_abc.Mapping):
"""Class to handle a full VarBC file.
It provides then two simple methods to access to elements :class:`ObsVarbcEntry`,
one with ix (:meth:`getix`), the other with varbc 'key' (:meth:`getkey`).
It also behaves like a *Mapping* since the :meth:`__getitem__`,
:meth:`__iter__`, :meth:`keys`, :meth:`values` and :meth:`items` methods
are defined. With all of these methods, the values are returned in the same
order than originaly read in thh VarBC file.
"""
_VBC_MATCH_ELEMENTS = [
_VarbcMatchElement('ix', re.compile(r'^ix=0*(\d+)$')),
_VarbcMatchElement('type', re.compile(r'^class=(\w+)$')),
_VarbcMatchElement('key', re.compile(r'^key=\s*([^=]+)\n$')),
_VarbcMatchElement('ndata', re.compile(r'^ndata=(\d+)$')),
_VarbcMatchElement('npred', re.compile(r'^npred=(\d+)$')),
_VarbcMatchElement('predcs', re.compile(r'^predcs=([\d ]+)$')),
_VarbcMatchElement('params', re.compile(r'^params=([\dEe+-. ]+)$')),
]
def __init__(self, asciidatas):
"""
:param asciidatas: Any iterable over lines from a VarBC file.
"""
self._datalist = []
self._metadata = varbcheaders.VarbcHeadersFile(asciidatas)
mymatchlist = _VarbcMatchList(self._VBC_MATCH_ELEMENTS)
with mymatchlist.autorecord(self._datalist) as mymatchtool:
for a_line in asciidatas:
mymatchtool(a_line)
self._key2entry = OrderedDict()
for entry in self._datalist:
self._key2entry[entry.key] = entry
@property
def metadata(self):
"""The metadata associated to the varbc file.
:rtype: :class:`bronx.datagrip.varbcheaders.VarbcHeadersFile`
"""
return self._metadata
def __len__(self):
"""The number of entries in the VarBC file."""
return len(self._datalist)
def __getitem__(self, item):
"""Return the entry associated with **entry**.
If **entry** is an integer, this is equivalent to :meth:`getix`. If
**entry** is a string, this is equivalent to :meth:`getkey`.
"""
if isinstance(item, six.string_types):
return self.getkey(item)
elif isinstance(item, int):
return self.getix(item)
else:
raise KeyError('{!s} is not a valid key for a VarbcFile object'.format(item))
[docs] def keys(self):
"""Iterate over all the keys available in the VarBC file."""
for k in self._key2entry.keys():
yield k
def __iter__(self):
"""Iterate over all the keys available in the VarBC file."""
return self.keys()
[docs] def values(self):
"""Iterate over all the :class:`ObsVarbcEntry` objects read from file."""
for entry in self._datalist:
yield entry
[docs] def items(self):
"""Iterate over all the (key, entry) pairs available in the VarBC file."""
for k, e in self._key2entry.items():
yield (k, e)
[docs] def getix(self, ix):
"""Gives the **ix** th entry of the VarBC file
:rtype: :class:`VarbcEntry`
"""
if ix < 1:
raise KeyError("The serie of ix numbers starts with 1")
return self._datalist[ix - 1]
[docs] def getkey(self, key):
"""Returns a VarBC entry given its **key**
:rtype: :class:`VarbcEntry`
:example: ``myobj.getkey('4 3 7')``
"""
return self._key2entry[key]