Source code for bronx.fancies.dump

# pylint: disable=unused-argument

"""
Data dumper... The (challenging) idea is to be able to dump any object to many
different formats.

It is mostly used in objects' docstring within the footprints package.

:note: Dumper objects are managed using :mod:`bronx.patterns.getbytag`;
       consequently, they are associated with a ``tag`` and can be re-used.

Example::

    >>> class Foo(object):
    ...     a = 1
    ...
    ...     def __str__(self):
    ...         return str(self.a)

    >>> somelist = [dict(akey=Foo(),bkey=['item1', 'item2'],
    ...                  ckey=dict(other=1, sutff=2)), 'a_string',
    ...             ['another', 'list', tuple([1, 2, 3])]]

    # A Txt Dumper object can be created directly
    >>> tdumper = TxtDumper()
    >>> print(tdumper.tag)
    default

    # Or using get(). Since getbytag is used, the same objet is dumped
    >>> tdumper_bis = get()
    >>> tdumper_bis is tdumper
    True

    >>> print(tdumper.cleandump(somelist)) # doctest: +ELLIPSIS
          [dict(
                  akey = ...Foo::1,
                  bkey = ['item1', 'item2'],
                  ckey = dict(
                      other = 1,
                      sutff = 2,
                  ),
              ), 'a_string', ['another', 'list', (1, 2, 3)]]

    # The Jsonable Dumper will produce something that can safely be dumped to
    # a JSON File
    >>> jdumper = JsonableDumper(tag='testdumper')
    >>> jdumper.cleandump(somelist) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
    [{'akey': ...'...Foo::1',
      'bkey': [...'item1', ...'item2'],
      'ckey': {'sutff': 2, 'other': 1}}, ...
     'a_string', [...'another', ...'list', [1, 2, 3]]]

    # The XML Dumper returns a xml.dom's Document object
    >>> xdumper = XmlDomDumper()
    >>> xd = xdumper.cleandump(somelist, 'testxml')
    >>> xd # doctest: +ELLIPSIS
    <xml.dom.minidom.Document instance at 0x...>
    >>> print(xd.toprettyxml(indent='  ', encoding='utf-8')) # doctest: +ELLIPSIS
    <?xml version="1.0" encoding="utf-8"?>
    <testxml>
      <generic_item>
        <akey>
          <generic_object>
            <overview>1</overview>
            <type>...Foo</type>
          </generic_object>
        </akey>
        <bkey>item1</bkey>
        <bkey>item2</bkey>
        <ckey>
          <other>1</other>
          <sutff>2</sutff>
        </ckey>
      </generic_item>
      <generic_item>a_string</generic_item>
      <generic_item>
        <generic_item>another</generic_item>
        <generic_item>list</generic_item>
        <generic_item>
          <generic_item>1</generic_item>
          <generic_item>2</generic_item>
          <generic_item>3</generic_item>
        </generic_item>
      </generic_item>
    </testxml>
    <BLANKLINE>

    # Interface functions can be used to obtain quickly a text dump
    >>> print(fulldump(somelist)) # doctest: +ELLIPSIS
          [dict(
                  akey = ...Foo::1,
                  bkey = ['item1', 'item2'],
                  ckey = dict(
                      other = 1,
                      sutff = 2,
                  ),
              ), 'a_string', ['another', 'list', (1, 2, 3)]]

"""

import re
from xml.dom import minidom

from bronx.patterns import getbytag

#: No automatic export
__all__ = []


def _DEBUG(msg, obj=None, level=None):
    """Fake method for debug purpose (then should provide a print statement)."""
    # print(msg, str(obj))
    pass


[docs]def is_an_instance(val): """Detect if a given object is an instance (as opposed to being a class). :param val: The object to analyse """ # Change: This routine will no longer detect old-style classes ! # (because the support of old-style classes will be removed) # instance of extension class, but not an actual extension class if (hasattr(val, '__class__') and hasattr(val, '__dict__') and not hasattr(val, '__bases__')): return True else: return False
[docs]def is_class(val): """Detect if a given object is a class (as opposed to being an instance). :param val: The object to analyse """ return hasattr(val, '__bases__')
[docs]def get(**kw): """Return an actual TxtDumper object matching the description.""" return TxtDumper(**kw)
class _AbstractDumper(getbytag.GetByTag): """Could dump almost anything.""" def __init__(self): """ No arguments. """ self.reset() def reset(self): """Clear the Dumper's object cache.""" self.seen = dict() def _dump_internal_dict(self, obj, level=0, nextline=True): return self.dump_dict(obj, level + 1, nextline) def _dump_as_proxy(self, proxy, obj, level=0, nextline=True): return getattr(self, 'dump_' + proxy, self._lazzy_dump)(obj, level + 1, nextline) def _unknown_obj_overview(self, obj): strobj = str(obj) reprobj = repr(obj) if '\n' not in strobj and strobj != reprobj: return strobj else: return reprobj def _dump_unknown_obj(self, obj, level=0, nextline=True): return "{:s}.{:s}::{:s}".format(type(obj).__module__, type(obj).__name__, self._unknown_obj_overview(obj)) def _dump_class(self, obj, level=0, nextline=True): return '{:s}.{:s}'.format(obj.__module__, obj.__name__) def _dump_builtin(self, obj, level=0, nextline=True): return obj.__name__ def _dump_obj_shortcut(self, obj, level=0, nextline=True): return "{:s}.{:s}::{:s}".format(type(obj).__module__, type(obj).__name__, obj.as_dump()) def dump_default(self, obj, level=0, nextline=True): _DEBUG('dump_default') # Great, obj as a as_dict method: top choice if hasattr(obj, '__dict__') and hasattr(obj, 'as_dict'): return self._dump_internal_dict(obj.as_dict(), level + 1) # Rely on parent classes: ok it should work if isinstance(obj, dict): return self._dump_as_proxy('dict', obj, level + 1, nextline) if isinstance(obj, set): return self._dump_as_proxy('set', obj, level + 1, nextline) if isinstance(obj, list): return self._dump_as_proxy('list', obj, level + 1, nextline) if isinstance(obj, tuple): return self._dump_as_proxy('tuple', obj, level + 1, nextline) # Can't do anything better, sorry ! return self._dump_unknown_obj(obj, level, nextline) def _lazzy_dump(self, obj, level=0, nextline=True): return obj dump_dict = _lazzy_dump dump_int = _lazzy_dump dump_long = _lazzy_dump dump_float = _lazzy_dump dump_bool = _lazzy_dump dump_str = _lazzy_dump dump_unicode = _lazzy_dump def _recursive_dump(self, obj, level=0, nextline=True): """This routine can be called recursively (if necessary).""" _DEBUG('dump top', obj) this_id = id(obj) if this_id in self.seen: return self.seen[this_id] if is_an_instance(obj) and hasattr(obj, 'as_dump'): _DEBUG('dump shortcut', obj) self.seen[this_id] = self._dump_obj_shortcut(obj, level, nextline) return self.seen[this_id] if is_class(obj): if obj.__module__ in ('__builtin__', 'builtins'): _DEBUG('builtin') self.seen[this_id] = self._dump_builtin(obj, level, nextline) else: _DEBUG('class ' + str(obj)) self.seen[this_id] = self._dump_class(obj, level, nextline) return self.seen[this_id] name = type(obj).__name__ dump_func = getattr(self, "dump_%s" % name, self.dump_default) return dump_func(obj, level, nextline) def dump(self, obj, level=0, nextline=True): """Call this method to dump ``obj`` (or at least try to...). :param obj: The object that will be dumped :param int level: For internal use only. :param bool nextline: For internal use only. """ return self._recursive_dump(obj, level=level, nextline=nextline) def cleandump(self, obj): """Clear cache dump and provide a dump of the provided ``obj``. :param obj: The object that will be dumped """ self.reset() return self.dump(obj)
[docs]class JsonableDumper(_AbstractDumper): """Return a dump consisting of a pure mix of dictionaries and lists. The resulting dump can be serialised using the standard pickle or json module. """ def dump_dict(self, obj, level=0, nextline=True): return {self._recursive_dump(k, level, nextline): self._recursive_dump(v, level + 1, nextline) for k, v in obj.items()} def dump_list(self, obj, level=0, nextline=True): return [self._recursive_dump(v, level + 1, nextline) for v in obj] dump_tuple = dump_list dump_set = dump_list def dump_NoneType(self, obj, level=0, nextline=True): return 'None'
[docs]class XmlDomDumper(JsonableDumper): """Return a dump as an XML DOM object (instance of :class:`xml.minidom.Document`).""" def __init__(self, named_nodes=()): """ :param tuple named_nodes: List of XML nodes that support a `name` attribute. For such nodes, a dictionary will be converted as follows : ``attr=dict(toto="BlaBla",titi="BlaBla")`` becomes ``<attr name="toto">BlaBla</attr><attr name="titi">BlaBla</attr>`` """ super().__init__() self._named_nodes = named_nodes def _unknown_obj_overview(self, obj): return re.sub(r'^<(.*)>$', r'\1', super()._unknown_obj_overview(obj)) def _dump_unknown_obj(self, obj, level=0, nextline=True): return dict(generic_object=dict(type='{}.{}'.format(type(obj).__module__, type(obj).__name__), overview=self._unknown_obj_overview(obj))) def _dump_as_proxy(self, proxy, obj, level=0, nextline=True): if proxy in ('list', 'set', 'tuple') or type(obj).__name__.startswith('FP'): return self._dump_unknown_obj(obj, level, nextline) else: return super()._dump_as_proxy(proxy, obj, level, nextline) def _dump_obj_shortcut(self, obj, level=0, nextline=True): return dict(generic_object=dict(type='{}.{}'.format(type(obj).__module__, type(obj).__name__), overview=obj.as_dump())) def _dump_class(self, obj, level=0, nextline=True): return {'class': super()._dump_class(obj, level, nextline)} def _dump_builtin(self, obj, level=0, nextline=True): return {'builtin': super()._dump_builtin(obj, level, nextline)} def _xdump_dict(self, xdoc, xroot, obj, myname): for k, v in sorted(obj.items(), key=lambda x: x[0]): if not isinstance(v, list): if myname in self._named_nodes: xnode = xdoc.createElement(myname) xnode.setAttribute('name', str(k)) else: if str(k) in self._named_nodes: xnode = xroot else: xnode = xdoc.createElement(str(k)) else: xnode = xroot self._xdump(xdoc, xnode, v, myname=str(k)) if xnode is not xroot: xroot.appendChild(xnode) def _xdump_list(self, xdoc, xroot, obj, myname, topelt=False): for v in obj: if topelt: xnode = xdoc.createElement('generic_item') else: xnode = xdoc.createElement(myname) self._xdump(xdoc, xnode, v, myname=str(v), topelt=True) xroot.appendChild(xnode) def _xdump(self, xdoc, xroot, obj, myname, topelt=False): if isinstance(obj, list): self._xdump_list(xdoc, xroot, obj, myname, topelt=topelt) elif isinstance(obj, dict): self._xdump_dict(xdoc, xroot, obj, myname) else: # Generic case xroot.appendChild(xdoc.createTextNode(str(obj)))
[docs] def dump(self, obj, root, rootattr=None, level=0, nextline=True): """Call this method to dump ``obj`` (or at least try to...). :param obj: The object that will be dumped :param str root: Name of the XML root node :param dict rootattr: dictionary of attributes that will be added to the XML root element. :param int level: For internal use only. :param bool nextline: For internal use only. """ parent_dump = self._recursive_dump(obj, level, nextline) xdoc = minidom.Document() xroot = xdoc.createElement(root) if rootattr is not None and isinstance(rootattr, dict): for k, v in rootattr.items(): xroot.setAttribute(k, v) self._xdump(xdoc, xroot, parent_dump, myname=root, topelt=True) xdoc.appendChild(xroot) return xdoc
cleandump = dump
[docs]class TxtDumper(_AbstractDumper): """Dump a text representation of almost any object...""" indent_first = 6 indent_size = 4 indent_space = ' ' max_depth = 32 break_base = False break_string = False break_bool = False break_default = True break_proxies = True break_before_list_item = False break_before_list_begin = False break_after_list_begin = False break_before_list_end = False break_after_list_end = False break_before_set_item = False break_before_set_begin = False break_after_set_begin = False break_before_set_end = False break_after_set_end = False break_before_tuple_item = False break_before_tuple_begin = False break_after_tuple_begin = False break_before_tuple_end = False break_after_tuple_end = False break_before_dict_key = True break_before_dict_value = False break_before_dict_begin = False break_after_dict_begin = False break_before_dict_end = True break_after_dict_end = False def _indent(self, level=0, nextline=True): if nextline: return "\n" + self.indent_space * (self.indent_first + self.indent_size * level) else: return "" def _dump_internal_dict(self, obj, level=0, nextline=True): parent_dump = super()._dump_internal_dict(obj, level + 1, nextline and self.break_proxies) return "<<{:s}__dict__:: {!s}{:s}>>".format(self._indent(level + 1, self.break_proxies), parent_dump, self._indent(level, self.break_proxies)) def _dump_as_proxy(self, proxy, obj, level=0, nextline=True): parent_dump = super()._dump_as_proxy(proxy, obj, level + 1, nextline and self.break_proxies) return "<<{:s}as_{:s}:: {!s}{:s}>>".format(self._indent(level + 1, self.break_proxies), proxy, parent_dump, self._indent(level, self.break_proxies),) def _dump_unknown_obj(self, obj, level=0, nextline=True): return self._unknown_obj_overview(obj) def dump_default(self, obj, level=0, nextline=True): _DEBUG('dump_default') if level + 1 > self.max_depth: return " <%s...>" % type(obj).__class__ else: parent_dump = super().dump_default(obj, level, nextline and self.break_default) return "{:s}.{:s}::{!s}".format(type(obj).__module__, type(obj).__name__, parent_dump) def dump_base(self, obj, level=0, nextline=True): _DEBUG('dump base ' + type(obj).__name__) return "{}{}".format(self._indent(level, self.break_base), obj) dump_NoneType = dump_base dump_int = dump_base dump_long = dump_base dump_float = dump_base def dump_str(self, obj, level=0, nextline=True): _DEBUG('dump_str', obj) return "{}'{}'".format(self._indent(level, self.break_string), obj) dump_unicode = dump_str def dump_bool(self, obj, level=0, nextline=True): _DEBUG('dump_bool', obj) return "{}{}".format(self._indent(level, self.break_bool), str(obj)) def dump_tuple(self, obj, level=0, nextline=True): _DEBUG('dump_tuple', obj) if level + 1 > self.max_depth: return "{}(...){}".format( self._indent(level, self.break_before_tuple_begin), self._indent(level, self.break_after_tuple_end) ) else: items = ["{}{}".format(self._indent(level + 1, self.break_before_tuple_item), self._recursive_dump(x, level + 1)) for x in obj] return "{}({}{}{}){}".format( self._indent(level, nextline and self.break_before_tuple_begin), self._indent(level + 1, self.break_after_tuple_begin), ', '.join(items), self._indent(level, self.break_before_tuple_end), self._indent(level, self.break_after_tuple_end) ) def dump_list(self, obj, level=0, nextline=True): _DEBUG('dump_list', obj) if level + 1 > self.max_depth: return "{}[...]{}".format( self._indent(level, self.break_before_list_begin), self._indent(level, self.break_after_list_end) ) else: items = ["{}{}".format(self._indent(level + 1, self.break_before_list_item), self._recursive_dump(x, level + 1)) for x in obj] return "{}[{}{}{}]{}".format( self._indent(level, nextline and self.break_before_list_begin), self._indent(level + 1, self.break_after_list_begin), ', '.join(items), self._indent(level, self.break_before_list_end), self._indent(level, self.break_after_list_end) ) def dump_set(self, obj, level=0, nextline=True): _DEBUG('dump_set', obj) if level + 1 > self.max_depth: return "{}set([...]){}".format( self._indent(level, self.break_before_set_begin), self._indent(level, self.break_after_set_end) ) else: items = [ "{}{}".format( self._indent(level + 1, self.break_before_set_item), self._recursive_dump(x, level + 1) ) for x in obj ] return "{}set([{}{}{}]){}".format( self._indent(level, nextline and self.break_before_set_begin), self._indent(level + 1, self.break_after_set_begin), ', '.join(items), self._indent(level, self.break_before_set_end), self._indent(level, self.break_after_set_end) ) def dump_dict(self, obj, level=0, nextline=True): _DEBUG('dump_dict', obj) if level + 1 > self.max_depth: return "{}{{...}}{}".format( self._indent(level, self.break_before_dict_begin), self._indent(level, self.break_after_dict_end) ) else: items = ["{}{} = {}{},".format(self._indent(level + 1, self.break_before_dict_key), str(k), self._indent(level + 2, self.break_before_dict_value), self._recursive_dump(v, level + 1)) for k, v in sorted(obj.items())] breakdict = self.break_before_dict_end if not len(obj): breakdict = False return "{}dict({}{}{}){}".format( self._indent(level, nextline and self.break_before_dict_begin), self._indent(level + 1, self.break_after_dict_begin), ' '.join(items), self._indent(level, breakdict), self._indent(level, self.break_after_dict_end) )
[docs] def cleandump(self, obj): """Clear cache dump and provide a top indented dump of the provided ``obj``. :param obj: The object that will be dumped """ parent_dump = super().cleandump(obj) return self.indent_space * self.indent_first + parent_dump
[docs]class OneLineTxtDumper(TxtDumper): """Dump single-line text representation of almost any object...""" indent_first = 0 indent_size = 0 break_default = False break_proxies = False break_before_dict_key = False break_before_dict_end = False def _dump_obj_shortcut(self, obj, level=0, nextline=True): return "{:s}::{:s}".format(type(obj).__name__, obj.as_dump()) def dump_default(self, obj, level=0, nextline=True): _DEBUG('dump_default') if level + 1 > self.max_depth: return " <%s...>" % type(obj).__class__ else: parent_dump = _AbstractDumper.dump_default(self, obj, level, nextline and self.break_default) return "{:s}::{!s}".format(type(obj).__name__, parent_dump)
[docs]def fulldump(obj, startpos=TxtDumper.indent_first, reset=True): """Entry point: Return a text dump of the provided ``obj``. :param obj: The object that will be dumped :param int startpos: Number of blank characters that will be added to the first line of the text dump :param bool reset: Reset the TxtDumper object's cache before dumping ``obj`` """ d = TxtDumper() if reset: d.reset() return TxtDumper.indent_space * startpos + d.dump(obj)
[docs]def lightdump(obj, break_before_dict_key=True, break_before_dict_value=False): """Entry point: Have a quick glance to an assumed 1-depth dictionary. :param obj: The object that will be dumped """ _DEBUG('dump_dict', obj) d = TxtDumper() items = [ "{}{} = {}{},".format( d._indent(0, break_before_dict_key), str(k), d._indent(1, break_before_dict_value), str(v) ) for k, v in sorted(obj.items(), key=lambda x: x[0]) ] return ''.join(items)
if __name__ == '__main__': import doctest doctest.testmod()